代码学习
代码学习
AI艺术鉴赏挑战赛的亚军、季军代码学习
基于Resnext50,eff-b3
- 这个网络主要是基于Resnext50和efficientnet-b3 网络
- 网络提供了很多可选择参数,可以很方便的调整网络的一下基本参数配置
- 最后就是有一个投票部分,来做一个选择
网络部分(主要是包含一些相关功能模块的实现,这里也做一下记录)
def forward(self, x):
if self.model_name == 'eff-b3':
feat = self.backbone.extract_features(x)
else:
feat = self.backbone(x)
feat = self.pool(feat)
se = self.se(feat).view(feat.size(0), -1)
feat_flat = feat.view(feat.size(0), -1)
feat_flat = self.relu(self.hidden(feat_flat) * se)
out = self.metric(feat_flat)
return out
class SELayer(nn.Module):
def __init__(self, channel, reduction=16):
super(SELayer, self).__init__()
self.avg_pool = nn.AdaptiveAvgPool2d(1)
self.fc = nn.Sequential(
nn.Linear(channel, channel // reduction, bias=False),
nn.ReLU(inplace=True),
nn.Linear(channel // reduction, channel, bias=False),
nn.Sigmoid()
)
def forward(self, x):
b, c, _, _ = x.size()
y = self.avg_pool(x).view(b, c)
y = self.fc(y).view(b, c, 1, 1)
return y
class AdaptiveConcatPool2d(nn.Module):
def __init__(self, sz=(1,1)):
super().__init__()
self.ap = nn.AdaptiveAvgPool2d(sz)
self.mp = nn.AdaptiveMaxPool2d(sz)
def forward(self, x):
return torch.cat([self.mp(x), self.ap(x)], 1)
class GeneralizedMeanPooling(nn.Module):
def __init__(self, norm=3, output_size=1, eps=1e-6):
super().__init__()
assert norm > 0
self.p = float(norm)
self.output_size = output_size
self.eps = eps
def forward(self, x):
x = x.clamp(min=self.eps).pow(self.p)
return torch.nn.functional.adaptive_avg_pool2d(x, self.output_size).pow(1. / self.p)
def __repr__(self):
return self.__class__.__name__ + '(' \
+ str(self.p) + ', ' \
+ 'output_size=' + str(self.output_size) + ')'
class BaseModel(nn.Module):
def __init__(self, model_name, num_classes, pretrained=True, pool_type='max', down=True, metric='linear'):
super().__init__()
self.model_name = model_name
if model_name == 'eff-b3':
backbone = EfficientNet.from_pretrained('efficientnet-b3')
plane = 1536
elif model_name == 'resnext50':
backbone = nn.Sequential(*list(models.resnext50_32x4d(pretrained=pretrained).children())[:-2])
plane = 2048
else:
backbone = None
plane = None
self.backbone = backbone
if pool_type == 'avg':
self.pool = nn.AdaptiveAvgPool2d((1, 1))
elif pool_type == 'cat':
self.pool = AdaptiveConcatPool2d()
down = 1
elif pool_type == 'max':
self.pool = nn.AdaptiveMaxPool2d((1, 1))
elif pool_type == 'gem':
self.pool = GeneralizedMeanPooling()
else:
self.pool = None
if down:
if pool_type == 'cat':
self.down = nn.Sequential(
nn.Linear(plane * 2, plane),
nn.BatchNorm1d(plane),
nn.Dropout(0.2),
nn.ReLU(True)
)
else:
self.down = nn.Sequential(
nn.Linear(plane, plane),
nn.BatchNorm1d(plane),
nn.Dropout(0.2),
nn.ReLU(True)
)
else:
self.down = nn.Identity()
self.se = SELayer(plane)
self.hidden = nn.Linear(plane, plane)
self.relu = nn.ReLU(True)
if metric == 'linear':
self.metric = nn.Linear(plane, num_classes)
elif metric == 'am':
self.metric = AddMarginProduct(plane, num_classes)
else:
self.metric = None
在网络训练之前,作者做了一个计数操作,把train、val的数据记录在了txt文件中,并且做了一个计数操作。同时网络的训练和测试的收敛情况也做了记录,并且最终模型保存的时候也是做了一个准确率的记录。
def plot(d, mode='train', best_acc_=None):
import matplotlib.pyplot as plt
plt.figure(figsize=(10, 4))
plt.suptitle('%s_curve' % mode)
plt.subplots_adjust(wspace=0.2, hspace=0.2)
epochs = len(d['acc'])
plt.subplot(1, 2, 1)
plt.plot(np.arange(epochs), d['loss'], label='loss')
plt.xlabel('epoch')
plt.ylabel('loss')
plt.legend(loc='upper left')
plt.subplot(1, 2, 2)
plt.plot(np.arange(epochs), d['acc'], label='acc')
if best_acc_ is not None:
plt.scatter(best_acc_[0], best_acc_[1], c='r')
plt.xlabel('epoch')
plt.ylabel('acc')
plt.legend(loc='upper left')
plt.savefig(os.path.join(savepath, '%s.jpg' % mode), bbox_inches='tight')
plt.close()
加载数据集和数据增强部分
trans = {
'train':
transforms.Compose([
transforms.RandomHorizontalFlip(),
# transforms.RandomVerticalFlip(),
# transforms.ColorJitter(brightness=0.126, saturation=0.5),
# transforms.RandomAffine(degrees=30, translate=(0.2, 0.2), fillcolor=0, scale=(0.8, 1.2), shear=None),
transforms.Resize((int(size / 0.875), int(size / 0.875))),
transforms.RandomCrop((size, size)),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
transforms.RandomErasing(p=0.5, scale=(0.02, 0.33), ratio=(0.3, 3.3))
]),
'val':
transforms.Compose([
transforms.Resize((int(size / 0.875), int(size / 0.875))),
transforms.CenterCrop((size, size)),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
}
class Dataset(dataset.Dataset):
def __init__(self, mode):
assert mode in ['train', 'val']
txt = 'data/%s.txt' % mode
fpath = []
labels = []
with open(txt, 'r')as f:
for i in f.readlines():
fp, label = i.strip().split(',')
fpath.append(fp)
labels.append(int(label))
self.fpath = fpath
self.labels = labels
self.mode = mode
self.trans = trans[mode]
def __getitem__(self, index):
fp = self.fpath[index]
label = self.labels[index]
img = Image.open(fp).convert('RGB')
if self.trans is not None:
img = self.trans(img)
return img, label
def __len__(self):
return len(self.labels)
# dataloader
trainset = Dataset(mode='train')
valset = Dataset(mode='val')
trainloader = DataLoader(dataset=trainset, batch_size=args.batch_size, shuffle=True, \
num_workers=args.num_workers, pin_memory=True, drop_last=True)
valloader = DataLoader(dataset=valset, batch_size=128, shuffle=False, num_workers=args.num_workers, \
pin_memory=True)
网络的训练和测试部分代码都比较常规,所以记录一下别的部分的相关操作
# model
model = BaseModel(model_name=args.model_name, num_classes=args.num_classes, pretrained=args.pretrained, pool_type=args.pool_type, down=args.down, metric=args.metric)
if args.resume:
state = torch.load(args.resume)
print('best_epoch:{}, best_acc:{}'.format(state['epoch'], state['acc']))
model.load_state_dict(state['net'])
if torch.cuda.device_count() > 1 and args.multi_gpus:
print('use multi-gpus...')
os.environ['CUDA_VISIBLE_DEVICES'] = '0,1'
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
torch.distributed.init_process_group(backend="nccl", init_method='tcp://localhost:23456', rank=0, world_size=1)
model = model.to(device)
model = nn.parallel.DistributedDataParallel(model)
else:
device = ('cuda:%d'%args.gpu if torch.cuda.is_available() else 'cpu')
model = model.to(device)
print('device:', device)
# optim
optimizer = torch.optim.SGD(
[{'params': filter(lambda p: p.requires_grad, model.parameters()), 'lr': args.lr}],
weight_decay=args.weight_decay, momentum=args.momentum)
print('init_lr={}, weight_decay={}, momentum={}'.format(args.lr, args.weight_decay, args.momentum))
if args.scheduler == 'step':
scheduler = lr_scheduler.StepLR(optimizer, step_size=args.lr_step, gamma=args.lr_gamma, last_epoch=-1)
elif args.scheduler == 'multi':
scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=[150, 225], gamma=args.lr_gamma, last_epoch=-1)
elif args.scheduler == 'cos':
warm_up_step = 10
lambda_ = lambda epoch: (epoch + 1) / warm_up_step if epoch < warm_up_step else 0.5 * (
np.cos((epoch - warm_up_step) / (args.total_epoch - warm_up_step) * np.pi) + 1)
scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lambda_)
最后的投票部分
files = ['1.csv', '2.csv', '3.csv', '4.csv']
weights = [1, 1, 1, 1]
results = np.zeros((800, 6))
for file, w in zip(files, weights):
print(w)
df = pd.read_csv(file, header=None).values
for x, y in df:
# print(x, y)
results[x, y] += w
# break
print(results[0])
submit = {
'name': np.arange(800).tolist(),
'pred': np.argmax(results, axis=1).tolist()
}
for k, v in submit.items():
print(k, v)
df = pd.DataFrame(submit)
df.to_csv('vote.csv', header=False, index=False)
ResNet200
不知道是研习社的问题还是作者代码问题,这个代码的排版就不太对,,main函数有一部分被放到了最后面,而且感觉代码不太完整,,代码部分还是比较常规的
有一部分是写了个类来计算准确率等
class AverageMeter(object):
"""Computes and stores the average and current value"""
def __init__(self):
self.reset()
def reset(self):
self.val = 0
self.avg = 0
self.sum = 0
self.count = 0
def update(self, val, n=1):
self.val = val
self.sum += val * n
self.count += n
self.avg = self.sum / self.count