第四周学习

AlexNet

本节主要讲了AlexNet的网络结构, 相比于LeNet,它的网络要相对复杂一些,由原来的平均值池化改为了最大值池化,效果提升比较明显

import torch
from torch import nn
from d2l import torch as d2l

# AlexNet的网络结构
net = nn.Sequential(
    nn.Conv2d(1, 96, kernel_size=11, stride=4, padding=1), nn.ReLU(),
    nn.MaxPool2d(kernel_size=3, stride=2),
    nn.Conv2d(96, 256, kernel_size=5, padding=2), nn.ReLU(),
    nn.MaxPool2d(kernel_size=3, stride=2),
    nn.Conv2d(256, 384, kernel_size=3, padding=1), nn.ReLU(),
    nn.Conv2d(384, 384, kernel_size=3, padding=1), nn.ReLU(),
    nn.Conv2d(384, 256, kernel_size=3, padding=1), nn.ReLU(),
    nn.MaxPool2d(kernel_size=3, stride=2), nn.Flatten(),
    nn.Linear(6400, 4096), nn.ReLU(), nn.Dropout(p=0.5),
    nn.Linear(4096, 4096), nn.ReLU(), nn.Dropout(p=0.5),
    nn.Linear(4096, 10))

# 简单测试网络
X = torch.randn(1, 1, 224, 224)
for layer in net:
    X = layer(X)
    print(layer.__class__.__name__, 'Output shape:\t', X.shape)

# 获取数据集
batch_size=128
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size, resize=224)

# 进行训练
lr, num_epochs = 0.01, 10
d2l.train_ch6(net, train_iter, test_iter, num_epochs, lr, d2l.try_gpu())

VGG

本节主要讲解了 VGG 的网络结构,VGG 将网络进行了块状处理,提出了 VGG 块,为后续网络的块状结构奠定了基础

import torch
from torch import nn
from d2l import torch as d2l

# 定义 VGG 块
def vgg_block(num_convs, in_channels, out_channels):
    layers = []
    for _ in range(num_convs):
        layers.append(
            nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1)
        )
        layers.append(nn.ReLU())
        in_channels = out_channels
    layers.append(nn.MaxPool2d(kernel_size=2, stride=2))
    return nn.Sequential(*layers)

# VGG 网络结构
conv_arch = ((1, 64), (1, 128), (2, 256), (2, 512), (2, 512))

def vgg(conv_arch):
    conv_blks = []
    in_channels = 1
    for (num_convs, out_channels) in conv_arch:
        conv_blks.append(vgg_block(num_convs, in_channels, out_channels))
        in_channels = out_channels
    return nn.Sequential(*conv_blks, nn.Flatten(),
                nn.Linear(out_channels * 7 * 7, 4096), nn.ReLU(),
                nn.Dropout(0.5), nn.Linear(4096, 4096), nn.ReLU(),
                nn.Dropout(0.5), nn.Linear(4096, 10))

net = vgg(conv_arch)

X = torch.randn(size=(1, 1, 224, 224))
for blk in net:
    X = blk(X)
    print(blk.__class__.__name__, 'output shape:\t', X.shape)

# 分成更小的块
ratio = 4
small_conv_arch = [(pair[0], pair[1] // ratio) for pair in conv_arch]
net = vgg(small_conv_arch)

lr, num_epochs, batch_size = 0.05, 10, 128
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size, resize=224)
d2l.train_ch6(net, train_iter, test_iter, num_epochs, lr, d2l.try_gpu())

NiN

本节主要讲解了 NiN,NiN 主要提出了用 NiN 块和全局平均池化层代替全连接层,显著减少了模型参数,被后续大量网络所采用

import torch
from torch import nn
from d2l import torch as d2l

# NiN 块
def nin_block(in_channels, out_channels, kernel_size, strides, padding):
    return nn.Sequential(
        nn.Conv2d(in_channels, out_channels, kernel_size, strides, padding),
        nn.ReLU(), nn.Conv2d(out_channels, out_channels, kernel_size=1),
        nn.ReLU(), nn.Conv2d(out_channels, out_channels, kernel_size=1),
        nn.ReLU())

# 网络结构
net = nn.Sequential(
    nin_block(1, 96, kernel_size=11, strides=4, padding=0),
    nn.MaxPool2d(3, stride=2),
    nin_block(96, 256, kernel_size=5, strides=1, padding=2),
    nn.MaxPool2d(3, stride=2),
    nin_block(256, 384, kernel_size=3, strides=1, padding=1),
    nn.MaxPool2d(3, stride=2), nn.Dropout(0.5),
    nin_block(384, 10, kernel_size=3, strides=1, padding=1),
    nn.AdaptiveAvgPool2d((1, 1)),
    nn.Flatten())

X = torch.rand(size=(1, 1, 224, 224))
for layer in net:
    X = layer(X)
    print(layer.__class__.__name__, 'output shape:\t', X.shape) 
    
lr, num_epochs, batch_size = 0.1, 10, 128
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size, resize=224)
d2l.train_ch6(net, train_iter, test_iter, num_epochs, lr, d2l.try_gpu())

GoogLeNet

GoogLeNet 是由谷歌提出的网络,该网络使用了不同尺寸的卷积核进行组合,构建了 Inception 块,使网络的深度达到很高

import torch
from torch import nn
from torch.nn import functional as F
from d2l import torch as d2l

# Inception 块
class Inception(nn.Module):
    def __init__(self, in_channels, c1, c2, c3, c4, **kwargs):
        super(Inception, self).__init__(**kwargs)
        self.p1_1 = nn.Conv2d(in_channels, c1, kernel_size=1)
        self.p2_1 = nn.Conv2d(in_channels, c2[0], kernel_size=1)
        self.p2_2 = nn.Conv2d(c2[0], c2[1], kernel_size=3, padding=1)
        self.p3_1 = nn.Conv2d(in_channels, c3[0], kernel_size=1)
        self.p3_2 = nn.Conv2d(c3[0], c3[1], kernel_size=5, padding=2)
        self.p4_1 = nn.MaxPool2d(kernel_size=3, stride=1, padding=1)
        self.p4_2 = nn.Conv2d(in_channels, c4, kernel_size=1)
    def forward(self, x):
        p1 = F.relu(self.p1_1(x))
        p2 = F.relu(self.p2_2(F.relu(self.p2_1(x))))
        p3 = F.relu(self.p3_2(F.relu(self.p3_1(x))))
        p4 = F.relu(self.p4_2(self.p4_1(x)))
        return torch.cat((p1, p2, p3, p4), dim=1)
    
# GoogLeNet 网络结构
b1 = nn.Sequential(nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3),
        nn.ReLU(), nn.MaxPool2d(kernel_size=3, stride=2,padding=1))

b2 = nn.Sequential(nn.Conv2d(64, 64, kernel_size=1), nn.ReLU(),
        nn.Conv2d(64, 192, kernel_size=3, padding=1),
        nn.MaxPool2d(kernel_size=3, stride=2, padding=1))

b3 = nn.Sequential(Inception(192, 64, (96, 128), (16, 32), 32),
        Inception(256, 128, (128, 192), (32, 96), 64),
        nn.MaxPool2d(kernel_size=3, stride=2, padding=1))

b4 = nn.Sequential(Inception(480, 192, (96, 208), (16, 48), 64),
        Inception(512, 160, (112, 224), (24, 64), 64),
        Inception(512, 128, (128, 256), (24, 64), 64),
        Inception(512, 112, (144, 288), (32, 64), 64),
        Inception(528, 256, (160, 320), (32, 128), 128),
        nn.MaxPool2d(kernel_size=3, stride=2, padding=1))

b5 = nn.Sequential(Inception(832, 256, (160, 320), (32, 128), 128),
        Inception(832, 384, (192, 384), (48, 128), 128),
        nn.AdaptiveAvgPool2d((1, 1)), nn.Flatten())

net = nn.Sequential(b1, b2, b3, b4, b5, nn.Linear(1024, 10))

X = torch.rand(size=(1, 1, 96, 96))
for layer in net:
    X = layer(X)
    print(layer.__class__.__name__, 'output shape:\t', X.shape)
lr, num_epochs, batch_size = 0.1, 10, 64
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size, resize=96)
d2l.train_ch6(net, train_iter, test_iter, num_epochs, lr, d2l.try_gpu())

批量归一化

本节主要讲解了批量归一化操作,该操作可以持续加速深层网络的收敛速度。

import torch
from torch import nn
from d2l import torch as d2l

# 定义归一化操作
def batch_norm(X, gamma, beta, moving_mean, moving_var, eps, momentum):
    if not torch.is_grad_enabled(): 
        # 预测模式下,直接使用传入的移动平均所得的均值和方差
        X_hat = (X - moving_mean) / torch.sqrt(moving_var + eps)
    else: 
        # 训练模式下,需要计算均值和方差
        assert len(X.shape) in (2, 4)
        if len(X.shape) == 2:
            # 全连接层,直接计算特征维上的均值和方差
            mean = X.mean(dim=0)
            var = ((X-mean)**2).mean(dim=0)
        else:
            # 卷积层计算通道维上的均值和方差
            # 这里需要保持X的形状以便后面可以做广播运算
            mean = X.mean(dim=(0, 2, 3), keepdim=True)
            var = ((X-mean)**2).mean(dim=(0, 2, 3), keepdim=True)
        # 训练模式下,用当前的均值和方差做标准化
        X_hat = (X - mean) / torch.sqrt(var + eps)
        # 更新移动平均的均值和方差
        moving_mean = momentum * moving_mean + (1.0 - momentum) * mean
        moving_var = momentum * moving_var + (1.0 - momentum) * var
    Y = gamma * X_hat + beta # 缩放和位移
    return Y, moving_mean.data, moving_var.data

# 定义 BatchNorm 类,进行归一化处理
class BatchNorm(nn.Module):
    def __init__(self, num_features, num_dims):
        super().__init__()
        if num_dims == 2:
            shape = (1, num_features)
        else:
            shape = (1, num_features, 1, 1)
        self.gamma = nn.Parameter(torch.ones(shape))
        self.beta = nn.Parameter(torch.zeros(shape))
        self.moving_mean = torch.zeros(shape)
        self.moving_var = torch.ones(shape)

    def forward(self, X):
        if self.moving_mean.device != X.device:
            self.moving_mean = self.moving_mean.to(X.device)
            self.moving_var = self.moving_var.to(X.device)
        Y, self.moving_mean, self.moving_var = batch_norm(
            X, self.gamma, self.beta, self.moving_mean, self.moving_var, 
            eps=1e-5, momentum=0.9)
        return Y

# 使用自己定义的归一化操作构建网络
net = nn.Sequential(nn.Conv2d(1, 6, kernel_size=5), BatchNorm(6, num_dims=4),
            nn.Sigmoid(), nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(6, 16, kernel_size=5), BatchNorm(16, num_dims=4),
            nn.Sigmoid(), nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Flatten(), nn.Linear(16 * 4 * 4, 120),
            BatchNorm(120, num_dims=2), nn.Sigmoid(),
            nn.Linear(120, 84), BatchNorm(84, num_dims=2),
            nn.Sigmoid(), nn.Linear(84, 10))

lr, num_epochs, batch_size = 1.0, 10, 256
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)
d2l.train_ch6(net, train_iter, test_iter, num_epochs, lr, d2l.try_gpu())

# 使用 pytorch 提供的归一化操作构建网络
net = nn.Sequential(nn.Conv2d(1, 6, kernel_size=5), nn.BatchNorm2d(6),
            nn.Sigmoid(), nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(6, 16, kernel_size=5), nn.BatchNorm2d(16),
            nn.Sigmoid(), nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Flatten(), nn.Linear(256, 120), nn.BatchNorm1d(120),
            nn.Sigmoid(), nn.Linear(120, 84), nn.BatchNorm1d(84),
            nn.Sigmoid(), nn.Linear(84, 10))

d2l.train_ch6(net, train_iter, test_iter, num_epochs, lr, d2l.try_gpu())

ResNet

ResNet 的提出,使网络的深度得到了进一步的加深,现在大多的网络都采用了 ResNet 的做法。

import torch
from torch import nn
from torch.nn import functional as F
from d2l import torch as d2l

# 残差操作
class Residual(nn.Module):
    def __init__(self, input_channels, num_channels, use_1x1conv=False, strides=1):
        super().__init__()
        self.conv1 = nn.Conv2d(input_channels, num_channels, kernel_size=3, padding=1, stride=strides)
        self.conv2 = nn.Conv2d(num_channels, num_channels, kernel_size=3, padding=1)
        if use_1x1conv:
            # 1x1 卷积,主要用来改变输入的尺寸,使其与卷积后的特征图尺寸相同,用于相加
            self.conv3 = nn.Conv2d(input_channels, num_channels, kernel_size=1, stride=strides)
        else:
            self.conv3 = None
        self.bn1 = nn.BatchNorm2d(num_channels)
        self.bn2 = nn.BatchNorm2d(num_channels)
        self.relu = nn.ReLU(inplace=True)

    def forward(self, X):
        Y = F.relu(self.bn1(self.conv1(X)))
        Y = self.bn2(self.conv2(Y))
        if self.conv3:
            X = self.conv3(X)
        Y += X
        return F.relu(Y)
    
blk = Residual(3,3)
X = torch.rand(4, 3, 6, 6)
Y = blk(X)
Y.shape

blk = Residual(3,6, use_1x1conv=True, strides=2)
blk(X).shape

# ResNet 初始块
b1 = nn.Sequential(nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3),
        nn.BatchNorm2d(64), nn.ReLU(),
        nn.MaxPool2d(kernel_size=3, stride=2, padding=1))

# 残差块
def resnet_block(input_channels, num_channels, num_residuals,
                 first_block=False):
    blk = []
    for i in range(num_residuals):
        if i == 0 and not first_block:
            # 第一块不改变尺寸和通道数
            blk.append(Residual(input_channels, num_channels,
                                use_1x1conv=True, strides=2))
        else:
            # 其余块长宽减半,通道数翻倍
            blk.append(Residual(num_channels, num_channels))
    return blk

# 构建多个残差块
b2 = nn.Sequential(*resnet_block(64, 64, 2, first_block=True))
b3 = nn.Sequential(*resnet_block(64, 128, 2))
b4 = nn.Sequential(*resnet_block(128, 256, 2))
b5 = nn.Sequential(*resnet_block(256, 512, 2))

# ResNet 网络结构
net = nn.Sequential(b1, b2, b3, b4, b5,
        nn.AdaptiveAvgPool2d((1,1)), nn.Flatten(), nn.Linear(512, 10))

X = torch.rand(size=(1, 1, 224, 224))
for layer in net:
    X = layer(X)
    print(layer.__class__.__name__,'output shape:\t', X.shape)
    
lr, num_epochs, batch_size = 0.05, 10, 256
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size, resize=96)
d2l.train_ch6(net, train_iter, test_iter, num_epochs, lr, d2l.try_gpu())

猫狗大战

运用迁移学习,采用预训练的 ResNet50 进行了训练,效果显著

!unzip '/content/drive/MyDrive/cat_dog.zip' # 解压数据,一定要解压到根目录,否则速度很慢

import numpy as np
import matplotlib.pyplot as plt
import os
import torch
import torch.nn as nn
from torch.nn import functional as F
import torchvision as tv
from torchvision import models,transforms,datasets
import time
import json
import csv
from d2l import torch as d2l

# 判断是否存在GPU设备
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print('Using gpu: %s ' % torch.cuda.is_available())

# 导入数据集,这里只进行了将尺寸转为 ResNet 需要的 244x244
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])

resnet_format = transforms.Compose([
                transforms.CenterCrop(224),
                transforms.ToTensor(),
                normalize,
            ])

data_dir = '/content/cat_dog'
data_test_dir = '/content/cat_dog/test'
dsets = {x: datasets.ImageFolder(os.path.join(data_dir, x), resnet_format)
         for x in ['train', 'val']}

dsets_test = {'test': datasets.ImageFolder(data_test_dir, resnet_format)}
dset_sizes = {x: len(dsets[x]) for x in ['train', 'val']}
dset_sizes['test'] = len(dsets_test['test'])
dset_classes = dsets['train'].classes
print(dsets['train'].classes)
print(dsets['train'].class_to_idx)
print('dset_sizes: ', dset_sizes)
batch_size = 25
loader_train = torch.utils.data.DataLoader(dsets['train'], batch_size=batch_size, shuffle=True, num_workers=2)
loader_val = torch.utils.data.DataLoader(dsets['val'], batch_size=batch_size, shuffle=False, num_workers=2)
loader_test = torch.utils.data.DataLoader(dsets_test['test'], batch_size=batch_size, shuffle=False, num_workers=2)

# 定义准确率计算函数
def evaluate_accuracy_gpu(net, data_iter, device=None):
    if isinstance(net, torch.nn.Module):
        net.eval()
        if not device:
            device = next(iter(net.parameters())).device

    metric = d2l.Accumulator(2)
    for X, y in data_iter:
        if isinstance(X, list):
            X = [x.to(device) for x in X]
        else:
            X = X.to(device)
        y = y.to(device)
        metric.add(d2l.accuracy(net(X), y), y.numel())
    return metric[0] / metric[1]

# 定义训练函数
def train(net, train_iter, test_iter, num_epochs, lr, device, name):
    print('train on', device)
    net.to(device)
    optimizer = torch.optim.SGD(net.fc.parameters(), lr=lr)
    loss = nn.CrossEntropyLoss()
    for epoch in range(num_epochs):
        metric = d2l.Accumulator(3)
        net.train()
        print(epoch)
        for i, (X, y) in enumerate(train_iter):
            optimizer.zero_grad()
            X, y = X.to(device), y.to(device)
            y_hat = net(X)
            l = loss(y_hat, y)
            l.backward()
            optimizer.step()
            with torch.no_grad():
                metric.add(l * X.shape[0], d2l.accuracy(y_hat, y), X.shape[0])
            train_l = metric[0] / metric[2]
            train_acc = metric[1] / metric[2]
        test_acc = evaluate_accuracy_gpu(net, test_iter)
        # 每五轮保存一次权重
        if (epoch+1) % 5 == 0:
            torch.save(net.state_dict(), f'/content/drive/MyDrive/params/{name}{epoch+1}.params')
        print(f'epoch_{epoch}: loss {train_l:.3f}, train acc {train_acc:.3f}, '
            f'test acc{test_acc:.3f}')
        
# 导入预训练的模型
model = tv.models.resnet50(pretrained=True)

# 冻结参数
for param in model.parameters():
    param.requires_grad = False
    
# 修改最后一层,使输出为分类的类别数
model.fc = nn.Sequential(
        nn.Linear(2048, 512),
        nn.Linear(512, 2))

# 进行训练,一共训练十轮
lr, num_epochs = 0.045, 10
net = model
train(net, loader_train, loader_val, num_epochs, lr, d2l.try_gpu(), 'ResNet')

# 导入保存好的权重,以进行预测
path = '/content/drive/MyDrive/params/ResNet10.params'
net = model
net.load_state_dict(torch.load(path))

# 定义预测函数
def prideict(net, data_iter, size, device=None):
    predictions = np.zeros(size)
    i = 0
    
    if isinstance(net, torch.nn.Module):
        net.eval()
        if not device:
            device = next(iter(net.parameters())).device
    net.to(device)
    for X, y in data_iter:
        if isinstance(X, list):
            X = [x.to(device) for x in X]
        else:
            X = X.to(device)
        predictions[i: i+len(X)] = d2l.argmax(net(X), dim=1).cpu().numpy()
        i += len(X)
    return predictions

# 进行预测
predictions = prideict(net, loader_test, dset_sizes['test'], d2l.try_gpu())

# 保存预测结果
def save_file(inputs, set_test, file):
    with open(file, 'w') as f:
        writer = csv.writer(f)
        for index, input in enumerate(inputs):
            img_name = set_test['test'].imgs[index][0].split('/')[-1]
            order = int(img_name.split('.')[0])
            writer.writerow([order, int(input)])

file = '/content/drive/MyDrive/result/catVSdog_ResNet_pretrained.csv'
save_file(predictions, dsets_test, file)

结果

posted @ 2021-09-26 16:20  logt  阅读(174)  评论(1编辑  收藏  举报