dreamlike-zzg

  博客园  :: 首页  :: 新随笔  :: 联系 :: 订阅 订阅  :: 管理

 pipeline

trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=4, pin_memory=True)
valloader = torch.utils.data.DataLoader(valset, batch_size=4, shuffle=False, pin_memory=True)

optimizer = torch.optim.AdamW(model.parameters(),lr=lr,weight_decay=weight_decay)
# optimizer = optim.SGD(model.parameters(), lr = 0.01, momentum=0.9)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=50, gamma=0.1)
criterion = nn.CrossEntropyLoss(ignore_index=11)

for epoch in range(num_epochs):
    loop = tqdm(train_loader)
    model.train()
    for data, target in loop:
        prediction = model(data)
        loss = criterion(prediction, target)

        losses.append(loss.item())
        loop.set_description('Epoch {}/{}'.format(epoch + 1, num_epochs))
        loop.set_postfix(loss=loss.item())
        loop.set_postfix(lr=scheduler.get_lr())

        loss.backward()
        scheduler.step(epoch=epoch)
        optimizer.step()
        

    avg_loss = sum(losses) / len(losses)

    torch.save(model.state_dict(), savename)
    torch.save(optimizer.state_dict(), savename2)
    
    model.eval()
    

optimizer 

optimizer.SGD([{'params': model.base.parameters()},
           {'params': model.classifier.parameters(), 'lr': 1e-3}], 
           lr=1e-2, momentum=0.9)
# 手动修改lr
lr = base_lr*((1-float(iter)/max_iter)**(power))
optimizer.param_groups[0]['lr'] = lr

optimizer.param_groups:是长度为2的list,其中的元素是2个字典;
optimizer.param_groups[0].keys()
['amsgrad', 'params', 'lr', 'betas', 'weight_decay', 'eps']

loss

 

torch.nn.KLDivLoss  
log: ln

https://pytorch.org/docs/stable/nn.html?highlight=torch%20nn%20kldivloss#torch.nn.KLDivLoss

a = torch.Tensor(list(range(1,19))).reshape([2,3,3]) # b,c,d
t = a*10

af = F.softmax(a,dim=-1)
tf = F.softmax(t,dim=-1)
KLdiv = torch.sum(tf*(torch.log(tf)-torch.log(af))) # 2.4429

torch.nn.KLDivLoss(reduction='sum')(torch.log(af),tf)  # 2.4429
torch.nn.KLDivLoss(reduction='mean')(torch.log(af),tf) # 0.1357
torch.nn.KLDivLoss(reduction='batchmean')(torch.log(af),tf) # 1.2215 batch内sum,batch间mean = sum()/#batch
torch.nn.KLDivLoss(reduction='none')(torch.log(af),tf) # shape: 2,3,3,element-wise

save / load model
https://zhuanlan.zhihu.com/p/38056115 

# 保存整个网络
torch.save(net, PATH) 
model_dict=torch.load(PATH)

# 保存网络中的参数, 速度快,占空间少
torch.save(net.state_dict(),PATH)
model_dict=model.load_state_dict(torch.load(PATH))

torch.save({
'epoch': epochID + 1, 
'state_dict': model.state_dict(), 
'optimizer': optimizer.state_dict(),
'best_loss': lossMIN, 
'alpha': loss.alpha, 
'gamma': loss.gamma
}, 'PATH.pth.tar')

def load_checkpoint(model, optimizer, checkpoint_PATH):
    if checkpoint != None:
        model_CKPT = torch.load(checkpoint_PATH)
        print('loading checkpoint!')
        model.load_state_dict(model_CKPT['state_dict'])
        optimizer.load_state_dict(model_CKPT['optimizer'])
    return model, optimizer

load时过滤不存在的param

def load_checkpoint(model, checkpoint, optimizer, loadOptimizer):
    if checkpoint != 'None':
        print("loading checkpoint...")
        model_dict = model.state_dict()

        modelCheckpoint = torch.load(checkpoint)
        pretrained_dict = modelCheckpoint['state_dict']

        # 过滤操作
        new_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict.keys()}
        model_dict.update(new_dict)
        model.load_state_dict(model_dict)

        # 打印出来,更新了多少的参数
        print('Total : {}, update: {}'.format(len(pretrained_dict), len(new_dict)))
        print("loaded finished!")

        # 如果不需要更新优化器那么设置为false
        if loadOptimizer == True:
            optimizer.load_state_dict(modelCheckpoint['optimizer'])
            print('loaded! optimizer')
        else:
            print('not loaded optimizer')
    else:
        print('No checkpoint is included')
    return model, optimizer

冻结参数
https://blog.csdn.net/lingzhou33/article/details/88977700 

# 直接fix
for p in net.parameters():
    p.requires_grad = False

class RESNET_attention(nn.Module):
    def __init__(self, model, pretrained):
        super(RESNET_attetnion, self).__init__()
        self.resnet = model(pretrained)
        # for循环以上的参数固定
        for p in self.parameters():
            p.requires_grad = False
        self.f = nn.Conv2d(2048, 512, 1)
        self.g = nn.Conv2d(2048, 512, 1)
        self.h = nn.Conv2d(2048, 2048, 1)
        self.softmax = nn.Softmax(-1)
        self.gamma = nn.Parameter(torch.FloatTensor([0.0]))
        self.avgpool = nn.AvgPool2d(7, stride=1)
        self.resnet.fc = nn.Linear(2048, 10)

# fix参数名含有word1的参数
word1 = 'seg'
for name, p in decode_net.named_parameters():
    # a = [m.start() for m in re.finditer(word1, name)]
    # if a: #列表a不为空的话就设置回传的标识为False
    if word1 in name:
        p.requires_grad = False
    else:
        p.requires_grad = True
    # if p.requires_grad: print(name)

# 优化器需要filter掉不更新的parameters
optimizer = optim.Adam(
filter(lambda p: p.requires_grad, model.parameters()), 
lr=0.001, betas=(0.9, 0.999), eps=1e-08, weight_decay=1e-5)

 根据参数id fix

base_params_id = 
list(map(id, net.conv1.parameters())) + 
list(map(id,net.bn1.parameters()))  +
list(map(id,net.layer1.parameters())) + 
list(map(id,net.layer2.parameters())) + 
list(map(id,net.layer3.parameters())) + 
list(map(id,net.layer4.parameters()))

new_params = filter(lambda p: id(p) not in base_params_id and p.requires_grad,\
    netG.parameters())
base_params = filter(lambda p: id(p) in base_params_id,
                     netG.parameters())     

optimizerG = optim.SGD([{'params': base_params, 'lr': 1e-4}, 
                        {'params': new_params}], lr = opt.lr, momentum = 0.9, weight_decay=0.0005)

参数量计算

https://github.com/Lyken17/pytorch-OpCounter   计算macs, params

params = list(model.parameters())
k = 0
for i in params:
    l = 1
    # print("该层的结构:" + str(list(i.size())))
    for j in i.size():
        l *= j
    # print("该层参数和:" + str(l))
    k = k + l
print("总参数数量和:" + str(k))

Debug

RuntimeError:Expected to have finished reduction in the prior iteration

解决办法:

# 添加find_unused_parameters=true
model=torch.nn.parallel.DistributedDataParallel(model,find_unused_parameters=true)

 

posted on 2022-11-05 15:59  梦幻济公  阅读(44)  评论(0编辑  收藏  举报