[课堂笔记][pytorch学习][5]cnn应用 图片分类
import torch import torch.nn as nn import torch.nn.functional as F import torch.optim as optim from torchvision import datasets, transforms # torchvision是独立于pytorch的关于图像操作的一些方便工具库。 # torchvision的详细介绍在:https://pypi.org/project/torchvision/0.1.8/ # torchvision主要包括一下几个包: # vision.datasets : 几个常用视觉数据集,可以下载和加载 # vision.models : 流行的模型,例如 AlexNet, VGG, and ResNet 以及 与训练好的参数。 # vision.transforms : 常用的图像操作,例如:随机切割,旋转等。 # vision.utils : 用于把形似 (3 x H x W) 的张量保存到硬盘中,给一个mini-batch的图像可以产生一个图像格网。
定义cnn模型
class Net(nn.Module): def __init__(self): super(Net, self).__init__() self.conv1 = nn.Conv2d(1, 20, 5, 1) #torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride=1) #in_channels:输入图像通道数,手写数字图像为1,彩色图像为3 #out_channels:输出通道数,这个等于卷积核的数量 #kernel_size:卷积核大小 #stride:步长 self.conv2 = nn.Conv2d(20, 50, 5, 1) #上个卷积网络的out_channels,就是下一个网络的in_channels,所以这里是20 #out_channels:卷积核数量50 self.fc1 = nn.Linear(4*4*50, 500) #全连接层torch.nn.Linear(in_features, out_features) #in_features:输入特征维度,4*4*50是自己算出来的,跟输入图像维度有关 #out_features;输出特征维度 self.fc2 = nn.Linear(500, 10) #输出维度10,10分类 def forward(self, x): #print(x.shape) #手写数字的输入维度,(N,1,28,28), N为batch_size x = F.relu(self.conv1(x)) # x = (N,50,24,24) x = F.max_pool2d(x, 2, 2) # x = (N,50,12,12) x = F.relu(self.conv2(x)) # x = (N,50,8,8) x = F.max_pool2d(x, 2, 2) # x = (N,50,4,4) x = x.view(-1, 4*4*50) # x = (N,4*4*50) x = F.relu(self.fc1(x)) # x = (N,4*4*50)*(4*4*50, 500)=(N,500) x = self.fc2(x) # x = (N,500)*(500, 10)=(N,10) return F.log_softmax(x, dim=1) #带log的softmax分类,每张图片返回10个概率
定义train和test函数
def train(model, device, train_loader, optimizer, epoch, log_interval=100): model.train() #进入训练模式 for batch_idx, (data, target) in enumerate(train_loader): data, target = data.to(device), target.to(device) optimizer.zero_grad() #梯度归零 output = model(data) #输出的维度[N,10] 这里的data是函数的forward参数x loss = F.nll_loss(output, target) #这里loss求的是平均数,除以了batch #F.nll_loss(F.log_softmax(input), target) : #单分类交叉熵损失函数,一张图片里只能有一个类别,输入input的需要softmax #还有一种是多分类损失函数,一张图片有多个类别,输入的input需要sigmoid loss.backward() optimizer.step() if batch_idx % log_interval == 0: print("Train Epoch: {} [{}/{} ({:0f}%)]\tLoss: {:.6f}".format( epoch, batch_idx * len(data), #100*32 len(train_loader.dataset), #60000 100. * batch_idx / len(train_loader), #len(train_loader)=60000/32=1875 loss.item() )) #print(len(train_loader))
def test(model, device, test_loader): model.eval() #进入测试模式 test_loss = 0 correct = 0 with torch.no_grad(): for data, target in test_loader: data, target = data.to(device), target.to(device) output = model(data) test_loss += F.nll_loss(output, target, reduction='sum').item() # sum up batch loss #reduction='sum'代表batch的每个元素loss累加求和,默认是mean求平均 pred = output.argmax(dim=1, keepdim=True) # get the index of the max log-probability #print(target.shape) #torch.Size([32]) #print(pred.shape) #torch.Size([32, 1]) correct += pred.eq(target.view_as(pred)).sum().item() #pred和target的维度不一样 #pred.eq()相等返回1,不相等返回0,返回的tensor维度(32,1)。 test_loss /= len(test_loader.dataset) print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format( test_loss, correct, len(test_loader.dataset), 100. * correct / len(test_loader.dataset)))
把训练集和验证集分batch转换成迭代器
现在我们知道了模型输入的size,我们就可以把数据预处理成相应的格式。
data_transforms = { "train": transforms.Compose([ transforms.RandomResizedCrop(input_size), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]), "val": transforms.Compose([ transforms.Resize(input_size), transforms.CenterCrop(input_size), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]), } print("Initializing Datasets and Dataloaders...") # Create training and validation datasets image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x), data_transforms[x]) for x in ['train', 'val']} # Create training and validation dataloaders dataloaders_dict = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=batch_size, shuffle=True, num_workers=4) for x in ['train', 'val']} #把迭代器存放到字典里作为value,key是train和val,后面调用key即可。 # Detect if we have a GPU available device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")