J6、ResNeXt-50实战解析
- 🍨 本文为🔗365天深度学习训练营 中的学习记录博客
- 🍖 原作者:K同学啊|接辅导、项目定制
学习要求¶
- 阅读ResNeXt论文,了解作者的构建思路
- 对比我们之前介绍的ResNet50V2、DenseNet算法
- 使用ResNeXt-50算法完成猴痘病识别
一、模型介绍¶
ResNeXt是由何凯明团队在2017年CVPR会议上提出来的新型图像分类网络。ResNeXt是ResNet的升级版,在ResNet的基础上,引入了cardinality的概念,类似于ResNet,ResNeXt也有ResNeXt-50,ResNeXt-101的版本。
ResNeXt的主要创新点就在于block的设计上,ResNeXt的block设计目的旨在于解决当时存在的一个问题:要提高模型的准确率,往往采取加深网络或者加宽网络的方法。虽然这种方法是有效的,但是随之而来的,是网络设计的难度和计算开销的增加。为了一点精度的提升往往需要付出更大的代价。因此,需要一个更好的策略,在不额外增加计算代价的情况下,提升网络的精度。
下图是ResNet(左)与ResNeXt(右)block的差异。在ResNet中,输入的具有256个通道的特征经过1×1卷积压缩4倍到64个通道,之后3×3的卷积核用于处理特征,经1×1卷积扩大通道数与原特征残差连接后输出。ResNeXt也是相同的处理策略,但在ResNeXt中,输入的具有256个通道的特征被分为32个组,每组被压缩64倍到4个通道后进行处理。32个组相加后与原特征残差连接后输出。这里cardinatity指的是一个block中所具有的相同分支的数目。
二、原理说明¶
ResNeXt中采用的分组卷机简单来说就是将特征图分为不同的组,再对每组特征图分别进行卷积,这个操作可以有效的降低计算量。
在分组卷积中,每个卷积核只处理部分通道,比如下图中,红色卷积核只处理红色的通道,绿色卷积核只处理绿色通道,黄色卷积核只处理黄色通道。此时每个卷积核有2个通道,每个卷积核生成一张特征图。
对于ResNeXt-50的网络结构对比ResNet-50如下所示:
可以看到每个block的卷积核个数不一样,每个block的前两层convolution kernel是resnet50的2倍,最后一层一样。参数几乎一样但是效果相当于resnet-101
作者在文章中其实一共提出了三种等价的模型结构,如下所示:
最后的ResNeXt用了C的结构来构建我们的ResNeXt,用相同的拓扑结构,并在保持参数量的情况下提高了准确率。这个构建基于两个准则
- 同stage中的block使用相同的width和filter size;
- spatial size减小时,增加channel的数量。
除此之外,ResNeXt 只能在 block 的 depth>3时使用. 如果 block 的 depth=2,则会得到宽而密集的模块,所以这也是为什么不在ResNet18和34进行修改的原因
三、PyTorch实现ResNeXt-50模型¶
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision
import torch.nn.functional as F
from torchvision import transforms, datasets
from sklearn.model_selection import KFold
from torch.optim.lr_scheduler import StepLR, MultiStepLR, LambdaLR, ExponentialLR, CosineAnnealingLR, ReduceLROnPlateau
import os,PIL,pathlib,random
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device
device(type='cuda')
1、分组卷积模块¶
class Bottleneck(nn.Module):
expansion = 4
def __init__(self, in_channel, out_channel, stride=1, downsample=None,
groups=1, width_per_group=64):
super(Bottleneck, self).__init__()
width = int(out_channel * (width_per_group / 64.)) * groups
self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=width,
kernel_size=1, stride=1, bias=False) # squeeze channels
self.bn1 = nn.BatchNorm2d(width)
# -----------------------------------------
self.conv2 = nn.Conv2d(in_channels=width, out_channels=width, groups=groups,
kernel_size=3, stride=stride, bias=False, padding=1)
self.bn2 = nn.BatchNorm2d(width)
# -----------------------------------------
self.conv3 = nn.Conv2d(in_channels=width, out_channels=out_channel*self.expansion,
kernel_size=1, stride=1, bias=False) # unsqueeze channels
self.bn3 = nn.BatchNorm2d(out_channel*self.expansion)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
def forward(self, x):
identity = x
if self.downsample is not None:
identity = self.downsample(x)
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
out += identity
out = self.relu(out)
return out
2、构建ResNet框架,只需要导入Bottleneck即可变为ResNeXt¶
class ResNet(nn.Module):
def __init__(self,
block,
blocks_num,
num_classes=1000,
include_top=True,
groups=1,
width_per_group=64):
super(ResNet, self).__init__()
self.include_top = include_top
self.in_channel = 64
self.groups = groups
self.width_per_group = width_per_group
self.conv1 = nn.Conv2d(3, self.in_channel, kernel_size=7, stride=2,
padding=3, bias=False)
self.bn1 = nn.BatchNorm2d(self.in_channel)
self.relu = nn.ReLU(inplace=True)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.layer1 = self._make_layer(block, 64, blocks_num[0])
self.layer2 = self._make_layer(block, 128, blocks_num[1], stride=2)
self.layer3 = self._make_layer(block, 256, blocks_num[2], stride=2)
self.layer4 = self._make_layer(block, 512, blocks_num[3], stride=2)
if self.include_top:
self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) # output size = (1, 1)
self.fc = nn.Linear(512 * block.expansion, num_classes)
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
def _make_layer(self, block, channel, block_num, stride=1):
downsample = None
if stride != 1 or self.in_channel != channel * block.expansion:
downsample = nn.Sequential(
nn.Conv2d(self.in_channel, channel * block.expansion, kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(channel * block.expansion))
layers = []
layers.append(block(self.in_channel,
channel,
downsample=downsample,
stride=stride,
groups=self.groups,
width_per_group=self.width_per_group))
self.in_channel = channel * block.expansion
for _ in range(1, block_num):
layers.append(block(self.in_channel,
channel,
groups=self.groups,
width_per_group=self.width_per_group))
return nn.Sequential(*layers)
def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
if self.include_top:
x = self.avgpool(x)
x = torch.flatten(x, 1)
x = self.fc(x)
return x
3、定义ResNeXt模型,并打印模型结构¶
def resnext50_32x4d(num_classes=1000, include_top=True):
# 预训练圈中:https://download.pytorch.org/models/resnext50_32x4d-7cdf4587.pth
groups = 32
width_per_group = 4
return ResNet(Bottleneck, [3, 4, 6, 3],
num_classes=num_classes,
include_top=include_top,
groups=groups,
width_per_group=width_per_group)
model = resnext50_32x4d(num_classes=4, include_top=True)
model.to(device)
# 统计模型参数量以及其他指标
import torchsummary as summary
summary.summary(model, (3, 224, 224)) # 输入形状设置为 (3, 224, 224),表示模型的输入图片大小为 $3 \times 224 \times 224$
----------------------------------------------------------------
Layer (type) Output Shape Param #
================================================================
Conv2d-1 [-1, 64, 112, 112] 9,408
BatchNorm2d-2 [-1, 64, 112, 112] 128
ReLU-3 [-1, 64, 112, 112] 0
MaxPool2d-4 [-1, 64, 56, 56] 0
Conv2d-5 [-1, 256, 56, 56] 16,384
BatchNorm2d-6 [-1, 256, 56, 56] 512
Conv2d-7 [-1, 128, 56, 56] 8,192
BatchNorm2d-8 [-1, 128, 56, 56] 256
ReLU-9 [-1, 128, 56, 56] 0
Conv2d-10 [-1, 128, 56, 56] 4,608
BatchNorm2d-11 [-1, 128, 56, 56] 256
ReLU-12 [-1, 128, 56, 56] 0
Conv2d-13 [-1, 256, 56, 56] 32,768
BatchNorm2d-14 [-1, 256, 56, 56] 512
ReLU-15 [-1, 256, 56, 56] 0
Bottleneck-16 [-1, 256, 56, 56] 0
Conv2d-17 [-1, 128, 56, 56] 32,768
BatchNorm2d-18 [-1, 128, 56, 56] 256
ReLU-19 [-1, 128, 56, 56] 0
Conv2d-20 [-1, 128, 56, 56] 4,608
BatchNorm2d-21 [-1, 128, 56, 56] 256
ReLU-22 [-1, 128, 56, 56] 0
Conv2d-23 [-1, 256, 56, 56] 32,768
BatchNorm2d-24 [-1, 256, 56, 56] 512
ReLU-25 [-1, 256, 56, 56] 0
Bottleneck-26 [-1, 256, 56, 56] 0
Conv2d-27 [-1, 128, 56, 56] 32,768
BatchNorm2d-28 [-1, 128, 56, 56] 256
ReLU-29 [-1, 128, 56, 56] 0
Conv2d-30 [-1, 128, 56, 56] 4,608
BatchNorm2d-31 [-1, 128, 56, 56] 256
ReLU-32 [-1, 128, 56, 56] 0
Conv2d-33 [-1, 256, 56, 56] 32,768
BatchNorm2d-34 [-1, 256, 56, 56] 512
ReLU-35 [-1, 256, 56, 56] 0
Bottleneck-36 [-1, 256, 56, 56] 0
Conv2d-37 [-1, 512, 28, 28] 131,072
BatchNorm2d-38 [-1, 512, 28, 28] 1,024
Conv2d-39 [-1, 256, 56, 56] 65,536
BatchNorm2d-40 [-1, 256, 56, 56] 512
ReLU-41 [-1, 256, 56, 56] 0
Conv2d-42 [-1, 256, 28, 28] 18,432
BatchNorm2d-43 [-1, 256, 28, 28] 512
ReLU-44 [-1, 256, 28, 28] 0
Conv2d-45 [-1, 512, 28, 28] 131,072
BatchNorm2d-46 [-1, 512, 28, 28] 1,024
ReLU-47 [-1, 512, 28, 28] 0
Bottleneck-48 [-1, 512, 28, 28] 0
Conv2d-49 [-1, 256, 28, 28] 131,072
BatchNorm2d-50 [-1, 256, 28, 28] 512
ReLU-51 [-1, 256, 28, 28] 0
Conv2d-52 [-1, 256, 28, 28] 18,432
BatchNorm2d-53 [-1, 256, 28, 28] 512
ReLU-54 [-1, 256, 28, 28] 0
Conv2d-55 [-1, 512, 28, 28] 131,072
BatchNorm2d-56 [-1, 512, 28, 28] 1,024
ReLU-57 [-1, 512, 28, 28] 0
Bottleneck-58 [-1, 512, 28, 28] 0
Conv2d-59 [-1, 256, 28, 28] 131,072
BatchNorm2d-60 [-1, 256, 28, 28] 512
ReLU-61 [-1, 256, 28, 28] 0
Conv2d-62 [-1, 256, 28, 28] 18,432
BatchNorm2d-63 [-1, 256, 28, 28] 512
ReLU-64 [-1, 256, 28, 28] 0
Conv2d-65 [-1, 512, 28, 28] 131,072
BatchNorm2d-66 [-1, 512, 28, 28] 1,024
ReLU-67 [-1, 512, 28, 28] 0
Bottleneck-68 [-1, 512, 28, 28] 0
Conv2d-69 [-1, 256, 28, 28] 131,072
BatchNorm2d-70 [-1, 256, 28, 28] 512
ReLU-71 [-1, 256, 28, 28] 0
Conv2d-72 [-1, 256, 28, 28] 18,432
BatchNorm2d-73 [-1, 256, 28, 28] 512
ReLU-74 [-1, 256, 28, 28] 0
Conv2d-75 [-1, 512, 28, 28] 131,072
BatchNorm2d-76 [-1, 512, 28, 28] 1,024
ReLU-77 [-1, 512, 28, 28] 0
Bottleneck-78 [-1, 512, 28, 28] 0
Conv2d-79 [-1, 1024, 14, 14] 524,288
BatchNorm2d-80 [-1, 1024, 14, 14] 2,048
Conv2d-81 [-1, 512, 28, 28] 262,144
BatchNorm2d-82 [-1, 512, 28, 28] 1,024
ReLU-83 [-1, 512, 28, 28] 0
Conv2d-84 [-1, 512, 14, 14] 73,728
BatchNorm2d-85 [-1, 512, 14, 14] 1,024
ReLU-86 [-1, 512, 14, 14] 0
Conv2d-87 [-1, 1024, 14, 14] 524,288
BatchNorm2d-88 [-1, 1024, 14, 14] 2,048
ReLU-89 [-1, 1024, 14, 14] 0
Bottleneck-90 [-1, 1024, 14, 14] 0
Conv2d-91 [-1, 512, 14, 14] 524,288
BatchNorm2d-92 [-1, 512, 14, 14] 1,024
ReLU-93 [-1, 512, 14, 14] 0
Conv2d-94 [-1, 512, 14, 14] 73,728
BatchNorm2d-95 [-1, 512, 14, 14] 1,024
ReLU-96 [-1, 512, 14, 14] 0
Conv2d-97 [-1, 1024, 14, 14] 524,288
BatchNorm2d-98 [-1, 1024, 14, 14] 2,048
ReLU-99 [-1, 1024, 14, 14] 0
Bottleneck-100 [-1, 1024, 14, 14] 0
Conv2d-101 [-1, 512, 14, 14] 524,288
BatchNorm2d-102 [-1, 512, 14, 14] 1,024
ReLU-103 [-1, 512, 14, 14] 0
Conv2d-104 [-1, 512, 14, 14] 73,728
BatchNorm2d-105 [-1, 512, 14, 14] 1,024
ReLU-106 [-1, 512, 14, 14] 0
Conv2d-107 [-1, 1024, 14, 14] 524,288
BatchNorm2d-108 [-1, 1024, 14, 14] 2,048
ReLU-109 [-1, 1024, 14, 14] 0
Bottleneck-110 [-1, 1024, 14, 14] 0
Conv2d-111 [-1, 512, 14, 14] 524,288
BatchNorm2d-112 [-1, 512, 14, 14] 1,024
ReLU-113 [-1, 512, 14, 14] 0
Conv2d-114 [-1, 512, 14, 14] 73,728
BatchNorm2d-115 [-1, 512, 14, 14] 1,024
ReLU-116 [-1, 512, 14, 14] 0
Conv2d-117 [-1, 1024, 14, 14] 524,288
BatchNorm2d-118 [-1, 1024, 14, 14] 2,048
ReLU-119 [-1, 1024, 14, 14] 0
Bottleneck-120 [-1, 1024, 14, 14] 0
Conv2d-121 [-1, 512, 14, 14] 524,288
BatchNorm2d-122 [-1, 512, 14, 14] 1,024
ReLU-123 [-1, 512, 14, 14] 0
Conv2d-124 [-1, 512, 14, 14] 73,728
BatchNorm2d-125 [-1, 512, 14, 14] 1,024
ReLU-126 [-1, 512, 14, 14] 0
Conv2d-127 [-1, 1024, 14, 14] 524,288
BatchNorm2d-128 [-1, 1024, 14, 14] 2,048
ReLU-129 [-1, 1024, 14, 14] 0
Bottleneck-130 [-1, 1024, 14, 14] 0
Conv2d-131 [-1, 512, 14, 14] 524,288
BatchNorm2d-132 [-1, 512, 14, 14] 1,024
ReLU-133 [-1, 512, 14, 14] 0
Conv2d-134 [-1, 512, 14, 14] 73,728
BatchNorm2d-135 [-1, 512, 14, 14] 1,024
ReLU-136 [-1, 512, 14, 14] 0
Conv2d-137 [-1, 1024, 14, 14] 524,288
BatchNorm2d-138 [-1, 1024, 14, 14] 2,048
ReLU-139 [-1, 1024, 14, 14] 0
Bottleneck-140 [-1, 1024, 14, 14] 0
Conv2d-141 [-1, 2048, 7, 7] 2,097,152
BatchNorm2d-142 [-1, 2048, 7, 7] 4,096
Conv2d-143 [-1, 1024, 14, 14] 1,048,576
BatchNorm2d-144 [-1, 1024, 14, 14] 2,048
ReLU-145 [-1, 1024, 14, 14] 0
Conv2d-146 [-1, 1024, 7, 7] 294,912
BatchNorm2d-147 [-1, 1024, 7, 7] 2,048
ReLU-148 [-1, 1024, 7, 7] 0
Conv2d-149 [-1, 2048, 7, 7] 2,097,152
BatchNorm2d-150 [-1, 2048, 7, 7] 4,096
ReLU-151 [-1, 2048, 7, 7] 0
Bottleneck-152 [-1, 2048, 7, 7] 0
Conv2d-153 [-1, 1024, 7, 7] 2,097,152
BatchNorm2d-154 [-1, 1024, 7, 7] 2,048
ReLU-155 [-1, 1024, 7, 7] 0
Conv2d-156 [-1, 1024, 7, 7] 294,912
BatchNorm2d-157 [-1, 1024, 7, 7] 2,048
ReLU-158 [-1, 1024, 7, 7] 0
Conv2d-159 [-1, 2048, 7, 7] 2,097,152
BatchNorm2d-160 [-1, 2048, 7, 7] 4,096
ReLU-161 [-1, 2048, 7, 7] 0
Bottleneck-162 [-1, 2048, 7, 7] 0
Conv2d-163 [-1, 1024, 7, 7] 2,097,152
BatchNorm2d-164 [-1, 1024, 7, 7] 2,048
ReLU-165 [-1, 1024, 7, 7] 0
Conv2d-166 [-1, 1024, 7, 7] 294,912
BatchNorm2d-167 [-1, 1024, 7, 7] 2,048
ReLU-168 [-1, 1024, 7, 7] 0
Conv2d-169 [-1, 2048, 7, 7] 2,097,152
BatchNorm2d-170 [-1, 2048, 7, 7] 4,096
ReLU-171 [-1, 2048, 7, 7] 0
Bottleneck-172 [-1, 2048, 7, 7] 0
AdaptiveAvgPool2d-173 [-1, 2048, 1, 1] 0
Linear-174 [-1, 4] 8,196
================================================================
Total params: 22,988,100
Trainable params: 22,988,100
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.57
Forward/backward pass size (MB): 361.77
Params size (MB): 87.69
Estimated Total Size (MB): 450.04
----------------------------------------------------------------
四、训练模型¶
1、数据集分割与处理¶
data_dir = './data/7-data'
# 通过Path类创建路径对象
data_dir = pathlib.Path(data_dir)
# 获取路径下所有文件路径
paths= list(data_dir.glob('*'))
# 获取所有文件夹的名字,也就是图片类别
classNames = [str(path).split("\\")[2] for path in paths] # K哥classNames中间会多一个e
classNames
['Dark', 'Green', 'Light', 'Medium']
train_transforms = transforms.Compose([
transforms.Resize([224, 224]), # 将输入图片resize成统一尺寸
# transforms.RandomHorizontalFlip(), # 随机水平翻转
transforms.ToTensor(), # 将PIL Image或numpy.ndarray转换为tensor,并归一化到[0,1]之间
transforms.Normalize( # 标准化处理-->转换为标准正态分布(高斯分布),使模型更容易收敛
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]) # 其中 mean=[0.485,0.456,0.406]与std=[0.229,0.224,0.225] 从数据集中随机抽样计算得到的。
])
test_transform = transforms.Compose([
transforms.Resize([224, 224]), # 将输入图片resize成统一尺寸
transforms.ToTensor(), # 将PIL Image或numpy.ndarray转换为tensor,并归一化到[0,1]之间
transforms.Normalize( # 标准化处理-->转换为标准正态分布(高斯分布),使模型更容易收敛
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]) # 其中 mean=[0.485,0.456,0.406]与std=[0.229,0.224,0.225] 从数据集中随机抽样计算得到的。
])
total_data = datasets.ImageFolder("./data/7-data/",transform=train_transforms)
total_data
Dataset ImageFolder
Number of datapoints: 1200
Root location: ./data/7-data/
StandardTransform
Transform: Compose(
Resize(size=[224, 224], interpolation=bilinear, max_size=None, antialias=None)
ToTensor()
Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
)
total_data.class_to_idx
{'Dark': 0, 'Green': 1, 'Light': 2, 'Medium': 3}
train_size = int(0.8 * len(total_data))
test_size = len(total_data) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(total_data, [train_size, test_size])
train_dataset, test_dataset
(<torch.utils.data.dataset.Subset at 0x1c7cf0f9a80>,
<torch.utils.data.dataset.Subset at 0x1c7cf0f9fc0>)
batch_size = 64
nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) # number of workers
train_dl = torch.utils.data.DataLoader(train_dataset,
batch_size=batch_size,
shuffle=True,
num_workers=nw)
test_dl = torch.utils.data.DataLoader(test_dataset,
batch_size=batch_size,
shuffle=True,
num_workers=nw)
# 训练循环
def train(dataloader, model, loss_fn, optimizer):
size = len(dataloader.dataset) # 训练集的大小
num_batches = len(dataloader) # 批次数目, (size/batch_size,向上取整)
train_loss, train_acc = 0, 0 # 初始化训练损失和正确率
for X, y in dataloader: # 获取图片及其标签
X, y = X.to(device), y.to(device)
# 计算预测误差
pred = model(X) # 网络输出
loss = loss_fn(pred, y) # 计算网络输出和真实值之间的差距,targets为真实值,计算二者差值即为损失
# 反向传播
optimizer.zero_grad() # grad属性归零
loss.backward() # 反向传播
optimizer.step() # 每一步自动更新
# 记录acc与loss
train_acc += (pred.argmax(1) == y).type(torch.float).sum().item()
train_loss += loss.item()
train_acc /= size
train_loss /= num_batches
return train_acc, train_loss
def test (dataloader, model, loss_fn):
size = len(dataloader.dataset) # 测试集的大小
num_batches = len(dataloader) # 批次数目, (size/batch_size,向上取整)
test_loss, test_acc = 0, 0
# 当不进行训练时,停止梯度更新,节省计算内存消耗
with torch.no_grad():
for imgs, target in dataloader:
imgs, target = imgs.to(device), target.to(device)
# 计算loss
target_pred = model(imgs)
loss = loss_fn(target_pred, target)
test_loss += loss.item()
test_acc += (target_pred.argmax(1) == target).type(torch.float).sum().item()
test_acc /= size
test_loss /= num_batches
return test_acc, test_loss
learn_rate = 1e-4 # 初始学习率
optimizer = torch.optim.Adam(model.parameters(), lr=learn_rate)
import copy
optimizer = torch.optim.Adam(model.parameters(), lr= 1e-4)
loss_fn = nn.CrossEntropyLoss() # 创建损失函数
epochs = 10
train_loss = []
train_acc = []
test_loss = []
test_acc = []
best_acc = 0 # 设置一个最佳准确率,作为最佳模型的判别指标
for epoch in range(epochs):
model.train()
epoch_train_acc, epoch_train_loss = train(train_dl, model, loss_fn, optimizer)
model.eval()
epoch_test_acc, epoch_test_loss = test(test_dl, model, loss_fn)
# 保存最佳模型到 best_model
if epoch_test_acc > best_acc:
best_acc = epoch_test_acc
best_model = copy.deepcopy(model)
train_acc.append(epoch_train_acc)
train_loss.append(epoch_train_loss)
test_acc.append(epoch_test_acc)
test_loss.append(epoch_test_loss)
# 获取当前的学习率
lr = optimizer.state_dict()['param_groups'][0]['lr']
template = ('Epoch:{:2d}, Train_acc:{:.1f}%, Train_loss:{:.3f}, Test_acc:{:.1f}%, Test_loss:{:.3f}, Lr:{:.2E}')
print(template.format(epoch+1, epoch_train_acc*100, epoch_train_loss,
epoch_test_acc*100, epoch_test_loss, lr))
# 保存最佳模型到文件中
# PATH = './best_model.pth' # 保存的参数文件名
# torch.save(model.state_dict(), PATH)
print('Done')
Epoch: 1, Train_acc:99.9%, Train_loss:0.005, Test_acc:98.8%, Test_loss:0.027, Lr:1.00E-04
Epoch: 2, Train_acc:98.9%, Train_loss:0.031, Test_acc:77.9%, Test_loss:5.040, Lr:1.00E-04
Epoch: 3, Train_acc:99.1%, Train_loss:0.036, Test_acc:80.8%, Test_loss:1.116, Lr:1.00E-04
Epoch: 4, Train_acc:98.6%, Train_loss:0.049, Test_acc:82.9%, Test_loss:0.810, Lr:1.00E-04
Epoch: 5, Train_acc:99.4%, Train_loss:0.014, Test_acc:95.0%, Test_loss:0.261, Lr:1.00E-04
Epoch: 6, Train_acc:99.0%, Train_loss:0.030, Test_acc:92.1%, Test_loss:0.369, Lr:1.00E-04
Epoch: 7, Train_acc:98.5%, Train_loss:0.050, Test_acc:94.6%, Test_loss:0.296, Lr:1.00E-04
Epoch: 8, Train_acc:99.1%, Train_loss:0.027, Test_acc:98.8%, Test_loss:0.042, Lr:1.00E-04
Epoch: 9, Train_acc:99.7%, Train_loss:0.011, Test_acc:98.8%, Test_loss:0.031, Lr:1.00E-04
Epoch:10, Train_acc:99.7%, Train_loss:0.010, Test_acc:95.8%, Test_loss:0.072, Lr:1.00E-04
Done