Pytorch Mnist手写数字识别
import torch import torch.nn as nn import torchvision import torchvision.transforms as transforms # Device configuration device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # Hyper parameters num_epochs = 5 num_classes = 10 batch_size = 100 learning_rate = 0.001 # MNIST dataset train_dataset = torchvision.datasets.MNIST(root='../../data/', train=True, transform=transforms.ToTensor(), download=True) test_dataset = torchvision.datasets.MNIST(root='../../data/', train=False, transform=transforms.ToTensor()) # Data loader #dataloader 已经在一个数据前又加了一个维度,batch_size维度,表示每批训练用的照片数目,乘上批次数目等于data总数. #而epoch指的是迭代数,一共迭代5次,每批处理100个图片,每迭代一次都要讲train_data 走完,所以计算的批次数 train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True) test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False) # Convolutional neural network (two convolutional layers) class ConvNet(nn.Module): def __init__(self, num_classes=10): super(ConvNet, self).__init__() self.layer1 = nn.Sequential( nn.Conv2d(1, 16, kernel_size=5, stride=1, padding=2), #1代表只有一个通道数,16代表有16个卷积神经元,每一个卷积神经元由一个kernel组成,也就是说有16个kernel, #每个kernel的大小是5, 步长为1, padding = 2说明每一侧有两条边补0,(M-F+1)/S表示每一维有多少个0,这里计算得到4 #所以左边2右边2,上边2下边2,所以padding要设为2 #1个通道的图片通过16个kernel卷积变换后得到16个通道的图片,这里的1可以指二维数据的input_size,第二个就相当于二维的output_size, #只是每一个二维数据都是一个二维张量,整体看待 nn.BatchNorm2d(16),#对卷积得到的16个二维张量进行一个BatchNormalize nn.ReLU(), nn.MaxPool2d(kernel_size=2, stride=2)) #图片长宽缩小一半 14*14 self.layer2 = nn.Sequential( nn.Conv2d(16, 32, kernel_size=5, stride=1, padding=2),#16个输入神经元,32个输出神经元 nn.BatchNorm2d(32), nn.ReLU(), nn.MaxPool2d(kernel_size=2, stride=2)) #7*7,但通道数变成了32 self.fc = nn.Linear(7*7*32, num_classes) #最表层fc,输出一个长度为10的值,可以代表属于这十个类别的概率,输入值就是将这个 def forward(self, x): out = self.layer1(x) out = self.layer2(out) #print('out.size:{}'.format(out.size())) #原来out的size为100,32,7,7 out = out.reshape(out.size(0), -1) #第一个维度长其实就等于batch_size的大小,将输出转为batch_size个链数据, #相当于把每一张32通道的输出图片都摊平,形状变化对卷积会产生影响,但是对回归不产生影响,之后再加上一个线性回归层 out = self.fc(out) return out model = ConvNet(num_classes).to(device) # Loss and optimizer criterion = nn.CrossEntropyLoss() #交叉熵:先对out分布归一化(转化为概率分布,一般用softmax函数),根据实际值计算交叉熵 #交叉熵其实表示的 optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) # Train the model total_step = len(train_loader) for epoch in range(num_epochs): for i, (images, labels) in enumerate(train_loader): images = images.to(device) labels = labels.to(device) # Forward pass outputs = model(images) #if i == 1: # print('outputs.size:{},labels.size:{}'.format(outputs.size(),labels.size())) loss = criterion(outputs, labels) #交叉熵需要预测为概率值(每一张图片的概率分布)一批有100张,因此对于每一张图片要有num_classes个表示每一类概率的值 #outputs为概率分布,outputs.size: 100x10 ; labels.size: 10 #与 # Backward and optimize optimizer.zero_grad() loss.backward() optimizer.step() if (i+1) % 100 == 0: print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' .format(epoch+1, num_epochs, i+1, total_step, loss.item())) # Test the model model.eval() # eval mode (batchnorm uses moving mean/variance instead of mini-batch mean/variance) with torch.no_grad(): #因为没有优化器,所以得自己将grad设为无,也就是梯度参数不保存,不然每次调用model函数都会产生grad参数,浪费内存 correct = 0 total = 0 for images, labels in test_loader: images = images.to(device) labels = labels.to(device) outputs = model(images) _, predicted = torch.max(outputs.data, 1) #选择最大的(概率)值所在的列数就是他所对应的类别数, total += labels.size(0) correct += (predicted == labels).sum().item() print('Test Accuracy of the model on the 10000 test images: {} %'.format(100 * correct / total)) # Save the model checkpoint torch.save(model.state_dict(), 'model.ckpt')
学习这段代码对卷积神经网络的建立很有帮助。