代码学习

代码学习

AI艺术鉴赏挑战赛的亚军、季军代码学习

基于Resnext50,eff-b3

  • 这个网络主要是基于Resnext50和efficientnet-b3 网络
  • 网络提供了很多可选择参数,可以很方便的调整网络的一下基本参数配置
  • 最后就是有一个投票部分,来做一个选择

网络部分(主要是包含一些相关功能模块的实现,这里也做一下记录)

def forward(self, x):
    if self.model_name == 'eff-b3':
        feat = self.backbone.extract_features(x)
    else:
        feat = self.backbone(x)
    
    feat = self.pool(feat)
    se = self.se(feat).view(feat.size(0), -1)
    feat_flat = feat.view(feat.size(0), -1)
    feat_flat = self.relu(self.hidden(feat_flat) * se)

    out = self.metric(feat_flat)
    return out
class SELayer(nn.Module):
    def __init__(self, channel, reduction=16):
        super(SELayer, self).__init__()
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.fc = nn.Sequential(
            nn.Linear(channel, channel // reduction, bias=False),
            nn.ReLU(inplace=True),
            nn.Linear(channel // reduction, channel, bias=False),
            nn.Sigmoid()
        )

    def forward(self, x):
        b, c, _, _ = x.size()
        y = self.avg_pool(x).view(b, c)
        y = self.fc(y).view(b, c, 1, 1)
        return y


class AdaptiveConcatPool2d(nn.Module):
    def __init__(self, sz=(1,1)):
        super().__init__()
        self.ap = nn.AdaptiveAvgPool2d(sz)
        self.mp = nn.AdaptiveMaxPool2d(sz)
        
    def forward(self, x):
        return torch.cat([self.mp(x), self.ap(x)], 1)


class GeneralizedMeanPooling(nn.Module):
    def __init__(self, norm=3, output_size=1, eps=1e-6):
        super().__init__()
        assert norm > 0
        self.p = float(norm)
        self.output_size = output_size
        self.eps = eps

    def forward(self, x):
        x = x.clamp(min=self.eps).pow(self.p)
        
        return torch.nn.functional.adaptive_avg_pool2d(x, self.output_size).pow(1. / self.p)

    def __repr__(self):
        return self.__class__.__name__ + '(' \
               + str(self.p) + ', ' \
               + 'output_size=' + str(self.output_size) + ')'



class BaseModel(nn.Module):
    def __init__(self, model_name, num_classes, pretrained=True, pool_type='max', down=True, metric='linear'):
        super().__init__()
        self.model_name = model_name
        
        if model_name == 'eff-b3':
            backbone = EfficientNet.from_pretrained('efficientnet-b3')
            plane = 1536
        elif model_name == 'resnext50':
            backbone = nn.Sequential(*list(models.resnext50_32x4d(pretrained=pretrained).children())[:-2])
            plane = 2048
        else:
            backbone = None
            plane = None

        self.backbone = backbone
        
        if pool_type == 'avg':
            self.pool = nn.AdaptiveAvgPool2d((1, 1))
        elif pool_type == 'cat':
            self.pool = AdaptiveConcatPool2d()
            down = 1
        elif pool_type == 'max':
            self.pool = nn.AdaptiveMaxPool2d((1, 1))
        elif pool_type == 'gem':
            self.pool = GeneralizedMeanPooling()
        else:
            self.pool = None
        
        if down:
            if pool_type == 'cat':
                self.down = nn.Sequential(
                    nn.Linear(plane * 2, plane),
                    nn.BatchNorm1d(plane),
                    nn.Dropout(0.2),
                    nn.ReLU(True)
                    )
            else:
                self.down = nn.Sequential(
                    nn.Linear(plane, plane),
                    nn.BatchNorm1d(plane),
                    nn.Dropout(0.2),
                    nn.ReLU(True)
                )
        else:
            self.down = nn.Identity()
        
        self.se = SELayer(plane)
        self.hidden = nn.Linear(plane, plane)
        self.relu = nn.ReLU(True)
        
        if metric == 'linear':
            self.metric = nn.Linear(plane, num_classes)
        elif metric == 'am':
            self.metric = AddMarginProduct(plane, num_classes)
        else:
            self.metric = None

在网络训练之前,作者做了一个计数操作,把train、val的数据记录在了txt文件中,并且做了一个计数操作。同时网络的训练和测试的收敛情况也做了记录,并且最终模型保存的时候也是做了一个准确率的记录。

def plot(d, mode='train', best_acc_=None):
    import matplotlib.pyplot as plt
    plt.figure(figsize=(10, 4))
    plt.suptitle('%s_curve' % mode)
    plt.subplots_adjust(wspace=0.2, hspace=0.2)
    epochs = len(d['acc'])

    plt.subplot(1, 2, 1)
    plt.plot(np.arange(epochs), d['loss'], label='loss')
    plt.xlabel('epoch')
    plt.ylabel('loss')
    plt.legend(loc='upper left')

    plt.subplot(1, 2, 2)
    plt.plot(np.arange(epochs), d['acc'], label='acc')
    if best_acc_ is not None:
        plt.scatter(best_acc_[0], best_acc_[1], c='r')
    plt.xlabel('epoch')
    plt.ylabel('acc')
    plt.legend(loc='upper left')

    plt.savefig(os.path.join(savepath, '%s.jpg' % mode), bbox_inches='tight')
    plt.close()

加载数据集和数据增强部分

trans = {
        'train':
            transforms.Compose([
                transforms.RandomHorizontalFlip(),
                # transforms.RandomVerticalFlip(),
                # transforms.ColorJitter(brightness=0.126, saturation=0.5),
                # transforms.RandomAffine(degrees=30, translate=(0.2, 0.2), fillcolor=0, scale=(0.8, 1.2), shear=None),
                transforms.Resize((int(size / 0.875), int(size / 0.875))),
                transforms.RandomCrop((size, size)),
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
                transforms.RandomErasing(p=0.5, scale=(0.02, 0.33), ratio=(0.3, 3.3))
            ]),
        'val':
            transforms.Compose([
                transforms.Resize((int(size / 0.875), int(size / 0.875))),
                transforms.CenterCrop((size, size)),
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
            ])
        }
class Dataset(dataset.Dataset):
    def __init__(self, mode):
        assert mode in ['train', 'val']
        txt = 'data/%s.txt' % mode

        fpath = []
        labels = []
        with open(txt, 'r')as f:
            for i in f.readlines():
                fp, label = i.strip().split(',')
                fpath.append(fp)
                labels.append(int(label))

        self.fpath = fpath
        self.labels = labels
        self.mode = mode
        self.trans = trans[mode]
        
    def __getitem__(self, index):
        fp = self.fpath[index]
        label = self.labels[index]
        img = Image.open(fp).convert('RGB')
        if self.trans is not None:
            img = self.trans(img)

        return img, label

    def __len__(self):
        return len(self.labels)

# dataloader
trainset = Dataset(mode='train')
valset = Dataset(mode='val')

trainloader = DataLoader(dataset=trainset, batch_size=args.batch_size, shuffle=True, \
                            num_workers=args.num_workers, pin_memory=True, drop_last=True)

valloader = DataLoader(dataset=valset, batch_size=128, shuffle=False, num_workers=args.num_workers, \
                        pin_memory=True)

网络的训练和测试部分代码都比较常规,所以记录一下别的部分的相关操作

# model
model = BaseModel(model_name=args.model_name, num_classes=args.num_classes, pretrained=args.pretrained, pool_type=args.pool_type, down=args.down, metric=args.metric)
if args.resume:
    state = torch.load(args.resume)
    print('best_epoch:{}, best_acc:{}'.format(state['epoch'], state['acc']))
    model.load_state_dict(state['net'])

if torch.cuda.device_count() > 1 and args.multi_gpus:
    print('use multi-gpus...')
    os.environ['CUDA_VISIBLE_DEVICES'] = '0,1'
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    torch.distributed.init_process_group(backend="nccl", init_method='tcp://localhost:23456', rank=0, world_size=1)
    model = model.to(device)
    model = nn.parallel.DistributedDataParallel(model)
else:
    device = ('cuda:%d'%args.gpu if torch.cuda.is_available() else 'cpu')
    model = model.to(device)
print('device:', device)

# optim
optimizer = torch.optim.SGD(
        [{'params': filter(lambda p: p.requires_grad, model.parameters()), 'lr': args.lr}],
        weight_decay=args.weight_decay, momentum=args.momentum)

print('init_lr={}, weight_decay={}, momentum={}'.format(args.lr, args.weight_decay, args.momentum))

if args.scheduler == 'step':
    scheduler = lr_scheduler.StepLR(optimizer, step_size=args.lr_step, gamma=args.lr_gamma, last_epoch=-1)
elif args.scheduler == 'multi':
    scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=[150, 225], gamma=args.lr_gamma, last_epoch=-1)
elif args.scheduler == 'cos':
    warm_up_step = 10
    lambda_ = lambda epoch: (epoch + 1) / warm_up_step if epoch < warm_up_step else 0.5 * (
                np.cos((epoch - warm_up_step) / (args.total_epoch - warm_up_step) * np.pi) + 1)
    scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lambda_)

最后的投票部分

files = ['1.csv', '2.csv', '3.csv', '4.csv']
weights = [1, 1, 1, 1]

results = np.zeros((800, 6))
for file, w in zip(files, weights):
    print(w)
    df = pd.read_csv(file, header=None).values
    for x, y in df:
        # print(x, y)
        results[x, y] += w
        # break

print(results[0])

submit = {
    'name': np.arange(800).tolist(),
    'pred': np.argmax(results, axis=1).tolist()
    }

for k, v in submit.items():
    print(k, v)

df = pd.DataFrame(submit)
df.to_csv('vote.csv', header=False, index=False)

ResNet200

不知道是研习社的问题还是作者代码问题,这个代码的排版就不太对,,main函数有一部分被放到了最后面,而且感觉代码不太完整,,代码部分还是比较常规的

有一部分是写了个类来计算准确率等

class AverageMeter(object):
    """Computes and stores the average and current value"""

    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count
posted @ 2020-11-02 17:47  hyzs1220  阅读(113)  评论(0编辑  收藏  举报