1 Star 0 Fork 0

desperadoxhy/SAM-Adapt

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
trian_hrf.py 8.82 KB
一键复制 编辑 原始数据 按行查看 历史
desperadoxhy 提交于 2024-05-19 03:12 . add hrf
# train.py
#!/usr/bin/env python3
""" train network using pytorch
Junde Wu
"""
import os
import sys
import argparse
from datetime import datetime
from collections import OrderedDict
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import roc_auc_score, accuracy_score, confusion_matrix
import torchvision
import torchvision.transforms as transforms
from skimage import io
from sklearn.model_selection import KFold
from torch.utils.data import DataLoader, DistributedSampler, ConcatDataset
#from dataset import *
from torch.autograd import Variable
from PIL import Image
# from tensorboardX import SummaryWriter
#from models.discriminatorlayer import discriminator
from dataset import *
from conf import settings
import time
import cfg
from tqdm import tqdm
from torch.utils.data import DataLoader, random_split
from utils import *
import function
'''定义了一些图像预处理的操作,用于在训练和测试过程中对图像进行变换'''
transform_train = transforms.Compose([
transforms.Resize((args.image_size, args.image_size)),
transforms.ToTensor(),
])
transform_train_seg = transforms.Compose([
transforms.Resize((args.out_size, args.out_size)),
transforms.ToTensor(),
])
random_dataset = RandomDataset('DualModal2019/RGB/Training')
if args.net == 'sam':
prompt = 'click'
elif args.net == 'sam_lite':
prompt = 'noprompt'
elif args.net == 'sam_self':
prompt = 'noprompt'
train_dataset = HRFRGB(args, data_path="HRFdatabaseslice/train", transform=transform_train, transform_msk=transform_train_seg)
test_dataset = HRFRGB(args, data_path="HRFdatabaseslice/test", transform=transform_train, transform_msk=transform_train_seg)
train_sampler = DistributedSampler(train_dataset)
test_sampler = DistributedSampler(test_dataset)
nice_train_loader = DataLoader(train_dataset, sampler=train_sampler, batch_size=args.b, num_workers=4, pin_memory=True)
nice_test_loader = DataLoader(test_dataset, sampler=test_sampler, batch_size=args.b, num_workers=4, pin_memory=True)
net = get_network(args, args.net, use_gpu=args.gpu, gpu_device=device, distribution=args.distributed)
# 将创建好的模型放在GPU上,并且使用DistributedDataParallel
num_gpus = torch.cuda.device_count()
for n, value in net.image_encoder.named_parameters():
# if "Adapter" not in n and "patch_embed" not in n:
# value.requires_grad = False
if "Adapter" not in n:
value.requires_grad = False
if num_gpus > 1:
net = torch.nn.parallel.DistributedDataParallel(net, device_ids=[args.local_rank])
# net = net.to(device=device)
if args.pretrain:
weights = torch.load(args.pretrain) # 加载了预训练模型的权重
net.load_state_dict(weights, strict=False) # 加载的权重应用到神经网络模型 net
# 这段代码设置了一个 Adam 优化器来更新模型参数,并使用 StepLR 调度器在每 10 个 epoch 后将学习率缩小为原来的一半
"""
net.parameters(): 指定了需要被优化的模型参数,即神经网络中所有可学习的权重和偏置。
lr=args.lr: 学习率,决定了每次参数更新的幅度。
betas=(0.9, 0.999): Adam 优化器中的两个指数衰减率,控制了梯度的指数加权平均数和平方的指数加权平均数。
eps=1e-08: 为了数值稳定性而添加到分母中的小常数,防止除零错误。
weight_decay=0: 权重衰减(L2 正则化)的系数,用于控制模型的复杂度。
amsgrad=False: 是否使用 AMSGrad 变体,通常设为 False。
"""
optimizer = optim.Adam(net.parameters(), lr=args.lr, betas=(0.9, 0.999), eps=1e-08, weight_decay=0, amsgrad=False)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.3)
# scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=5, verbose=True,
# threshold=0.002, threshold_mode='rel', cooldown=0, min_lr=0,
# eps=1e-08)
start_epoch = 0
# 载入预训练的权重
if args.weights != 0:
print(f'=> resuming from {args.weights}')
assert os.path.exists(args.weights)
checkpoint_file = os.path.join(args.weights)
print(checkpoint_file)
assert os.path.exists(checkpoint_file)
loc = 'cuda:{}'.format(args.gpu_device)
checkpoint = torch.load(checkpoint_file, map_location=loc)
start_epoch = checkpoint['epoch']
best_tol = checkpoint['best_tol']
state_dict = checkpoint['state_dict']
if args.distributed != 'none':
from collections import OrderedDict
new_state_dict = OrderedDict()
for k, v in state_dict.items():
# name = k[7:] # remove `module.`
name = 'module.' + k
new_state_dict[name] = v
# load params
else:
new_state_dict = state_dict
net.load_state_dict(new_state_dict)
print(f'=> loaded checkpoint {checkpoint_file} (epoch {start_epoch})')
# 创建 log 文件夹
if rank == 0:
args.path_helper = set_log_dir('logs', args.exp_name)
logger = create_logger(args.path_helper['log_path'])
logger.info(args)
'''tensorboard'''
if rank == 0:
# use tensorboard 创建 run 文件夹
if not os.path.exists(settings.LOG_DIR):
os.mkdir(settings.LOG_DIR)
log_dir = os.path.join(
settings.LOG_DIR, args.net, settings.TIME_NOW)
os.mkdir(log_dir)
# writer_t_loss = SummaryWriter(log_dir=os.path.join(log_dir, 'train_loss'))
# writer_t_iou = SummaryWriter(log_dir=os.path.join(log_dir, 'train_iou'))
# writer_t_dice = SummaryWriter(log_dir=os.path.join(log_dir, 'train_dice'))
# writer_v_loss = SummaryWriter(log_dir=os.path.join(log_dir, 'val_loss'))
# writer_v_iou = SummaryWriter(log_dir=os.path.join(log_dir, 'val_iou'))
# writer_v_dice = SummaryWriter(log_dir=os.path.join(log_dir, 'val_dice'))
# 暂时不用这个参数
writer = None
'''begain training'''
best_acc = 0.0
best_tol = 1e4
# 从本地文件中加载fake prompt
fake_prompt = np.load('fake_prompt.npz')
for epoch in range(start_epoch + 1, start_epoch + settings.EPOCH + 1):
if args.mod == 'sam_adpt':
train_sampler.set_epoch(epoch)
time_start = time.time()
if args.net == 'sam':
avg_loss, (iou, dice) = function.train_sam(args, net, optimizer, nice_train_loader, epoch, writer, schedulers=scheduler, vis=args.vis, device=device)
elif args.net == 'sam_lite' or args.net == 'sam_self' or args.net == 'sam_self_with_prompt':
avg_loss = function.train_mult_sam_lite(args, net, optimizer, nice_train_loader, epoch, writer, schedulers=scheduler, vis=args.vis, device=device, fake_prompt=fake_prompt)
if rank == 0:
logger.info(f'训练集平均loss: {avg_loss}|| @ epoch {epoch}.')
scheduler.step()
# scheduler.step(avg_loss)
# if rank == 0:
# writer_t_loss.add_scalar('loss', avg_loss, epoch)
# writer_t_iou.add_scalar('loss', iou, epoch)
# writer_t_dice.add_scalar('loss', dice, epoch)
time_end = time.time()
net.eval()
if epoch and epoch % args.val_freq == 0 or epoch == start_epoch + settings.EPOCH:
if args.net == 'sam':
tol, (eiou, edice) = function.validation_sam(args, nice_test_loader, epoch, net, writer)
elif args.net == 'sam_lite' or args.net == 'sam_self' or args.net == 'sam_self_with_prompt':
tol, (eiou1, edice1), (eiou2, edice2), (eiou3, edice3) = function.validation_mult_sam_lite(args, nice_test_loader, epoch, net, writer, fake_prompt=fake_prompt)
if rank == 0:
logger.info(f'T测试集平均loss: {tol}, 平均IOU: {eiou1},{eiou2},{eiou3} 平均DICE: {edice1}, {edice2}, {edice3} || @ epoch {epoch}.')
# if rank == 0:
# writer_v_loss.add_scalar('loss', tol, epoch)
# writer_v_iou.add_scalar('loss', eiou, epoch)
# writer_v_dice.add_scalar('loss', edice, epoch)
if args.distributed != 'none':
sd = net.module.state_dict()
else:
sd = net.state_dict()
if rank == 0 and tol < best_tol:
best_tol = tol
is_best = True
print('存储最佳节点')
save_checkpoint({
'epoch': epoch + 1,
'model': args.net,
'state_dict': sd,
'optimizer': optimizer.state_dict(),
'best_tol': best_tol,
'path_helper': args.path_helper,
}, is_best, args.path_helper['ckpt_path'], filename="best_checkpoint")
else:
is_best = False
# writer.close()
# if rank == 0:
# writer_t_loss.close()
# writer_t_iou.close()
# writer_t_dice.close()
# writer_v_loss.close()
# writer_v_iou.close()
# writer_v_dice.close()
Loading...
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
Python
1
https://gitee.com/xuhengyuplus/SAM-Adapt.git
[email protected]:xuhengyuplus/SAM-Adapt.git
xuhengyuplus
SAM-Adapt
SAM-Adapt
multout

搜索帮助