pytorch基础
pytorch基础
from __future__ import print_function import torch #--------------------------------------------- #基础知识 #定义张量 #常数初始化 #torch.long, float, double, float64, x = torch.tensor([5.5, 3]) torch.empty(size, dtype=torch.long) 返回形状为size的空tensor torch.zeros(size) 全部是0的tensor torch.ones(size) 全部是1的tensor torch.full(size, fill_value) 全fill_value的tensor torch.zeros_like(x) 返回跟input的tensor一个size的全零tensor torch.ones_like(x) 返回跟input的tensor一个size的全一tensor torch.arange(start=0, end, step=1) 返回一个从start到end的序列,只指定end则类似range() #随机初始化 torch.rand(size) [0,1)内的均匀分布随机数 torch.rand_like(input) 返回跟input的tensor一样size的0-1随机数 torch.randn(size) 返回标准正太分布N(0,1)的随机数 torch.normal(mean, std, out=None) 正态分布。注意,mean和std都是tensor,默认0,1 #张量属性 x.size() x.shape #张量操作 #tensor切片、合并、变形、抽取操作 x[:,1] x.view(-1, 8) x.item() #获得标量值 torch.cat(seq, dim=0, out=None) #拼接, 0=行拼接 torch.cat((x,x,x),0) torch.chunk(tensor, chunks, dim=0) #切块,数量由chunks指定。 torch.chunk(torch.arange(10),4) torch.split(tensor, split_size_or_sections, dim=0) #切块 torch.index_select(input, dim, index, out=None) #按index选择 torch.masked_select(input, mask, out=None) #按mask选择 torch.squeeze(input) #压缩成1维。注意,压缩后的tensor和原来的tensor共享地址 torch.reshape(input, shape) #改变形状 tensor.view(shape) #改变形状 #运算 x+y torch.add(x,y) #torch.add(input, value, out=None) y.add_(x) #Torch里面所有带"_"的操作,都是in-place的 x.copy_(y) x.data.norm() torch.mul(input, other, out=None) #乘法 torch.div(input, other, out=None) #除法 torch.pow(input, exponent, out=None) #指数 torch.sqrt(input, out=None) torch.round(input, out=None) #四舍五入到整数 torch.argmax(input, dim=None, keepdim=False) #argmax函数 torch.sigmoid(input, out=None) #sigmoid函数 torch.tanh(input, out=None) #tanh函数 torch.abs(input, out=None) #取绝对值 torch.ceil(input, out=None) #向上取整 torch.clamp(input, min, max, out=None) #截断函数,把输入数据规范在min-max区间,超过范围的用min、max代替 #张量与Python常用数据类型转换 #tensor与torch互转 x.numpy() #共址 torch.from_numpy(x) #共址 #自动微分 #要想使x支持求导,必须让x为浮点类型 #求导只能是标量对标量,或标量对向量/矩阵求导 x = torch.ones(2, 2, requires_grad=True) #默认False x.requires_grad #调用属性值 x.requires_grad_(True) #调用内置函数,改变属性值 y = x+2 y.grad_fn y.backward() #等价于y.backward(torch.tensor(1.,...)) x.data x.grad with torch.no_grad(): #对requires_grad=True的张量自动求导 print((x ** 2).requires_grad) #--------------------------------------------- #前馈神经网络 import torch import torch.nn as nn import torch.nn.functional as F class Net(nn.Module): def __init__(self): super(Net, self).__init__() # 1 input image channel, 6 output channels, 5x5 square convolution # kernel self.conv1 = nn.Conv2d(1, 6, 5) self.conv2 = nn.Conv2d(6, 16, 5) # an affine operation: y = Wx + b self.fc1 = nn.Linear(16 * 5 * 5, 120) self.fc2 = nn.Linear(120, 84) self.fc3 = nn.Linear(84, 10) def forward(self, x): # Max pooling over a (2, 2) window x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2)) # If the size is a square you can only specify a single number x = F.max_pool2d(F.relu(self.conv2(x)), 2) x = x.view(-1, self.num_flat_features(x)) x = F.relu(self.fc1(x)) x = F.relu(self.fc2(x)) x = self.fc3(x) return x def num_flat_features(self, x): size = x.size()[1:] # all dimensions except the batch dimension num_features = 1 for s in size: num_features *= s return num_features net = Net() print(net) ''' Net( (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1)) (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1)) (fc1): Linear(in_features=400, out_features=120, bias=True) (fc2): Linear(in_features=120, out_features=84, bias=True) (fc3): Linear(in_features=84, out_features=10, bias=True) ) ''' #查看可训练参数 params = list(net.parameters()) print(len(params)) #10 print(params[0].size()) # conv1's .weight #前向传播 input = torch.randn(1, 1, 32, 32) out = net(input) print(out) #把所有参数梯度缓存器置零,用随机的梯度来反向传播 ##retain_graph=True,计算完梯度不销毁子图,但是得带计算时耗内存 net.zero_grad() out.backward(torch.randn(1, 10)) #定义损失 output = net(input) target = torch.randn(10) # a dummy target, for example target = target.view(1, -1) # make it the same shape as output criterion = nn.MSELoss() loss = criterion(output, target) print(loss) #跟踪反向传播路径,可以使用它的 .grad_fn 属性 print(loss.grad_fn) # MSELoss print(loss.grad_fn.next_functions[0][0]) # Linear print(loss.grad_fn.next_functions[0][0].next_functions[0][0]) # ReLU ''' input -> conv2d -> relu -> maxpool2d -> conv2d -> relu -> maxpool2d -> view -> linear -> relu -> linear -> relu -> linear -> MSELoss -> loss ''' #损失反向传播 net.zero_grad() # zeroes the gradient buffers of all parameters print('conv1.bias.grad before backward') print(net.conv1.bias.grad) #0梯度 loss.backward() print('conv1.bias.grad after backward') print(net.conv1.bias.grad) #自定义参数反向传播 #weight = weight - learning_rate *gradient learning_rate = 0.01 for f in net.parameters(): f.data.sub_(f.grad.data * learning_rate) #参数优化方法选择 #SGD, Nesterov-SGD, Adam, RMSProp import torch.optim as optim optimizer = optim.SGD(net.parameters(), lr=0.01) # create your optimizer optimizer.zero_grad() # zero the gradient buffers output = net(input) loss = criterion(output, target) loss.backward() optimizer.step() # Does the update #--------------------------------------------- #CIFAR10图像分类器训练 import torch import torchvision import torchvision.transforms as transforms #数据集下载,并将数据归一化[-1,1]之间 transform = transforms.Compose( [transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))] ) trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform) testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform) #数据批次加载 trainloader = torch.utils.data.DataLoader(trainset, batch_size=4, shuffle=True, num_workers=2) testloader = torch.utils.data.DataLoader(testset, batch_size=4, shuffle=False, num_workers=2) classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck') import matplotlib.pyplot as plt import numpy as np # functions to show an image def imshow(img): img = img / 2 + 0.5 # unnormalize npimg = img.numpy() plt.imshow(np.transpose(npimg, (1, 2, 0))) plt.show() # get some random training images dataiter = iter(trainloader) images, labels = dataiter.next() # show images imshow(torchvision.utils.make_grid(images)) #将多张图拼成一张,padding表示多张子图间 # print labels print(' '.join('%5s' % classes[labels[j]] for j in range(4))) import torch.nn as nn import torch.nn.functional as F import torch.optim as optim class Net(nn.Module): def __init__(self): super(Net, self).__init__() self.conv1 = nn.Conv2d(3, 6, 5) self.pool = nn.MaxPool2d(2, 2) self.conv2 = nn.Conv2d(6, 16, 5) self.fc1 = nn.Linear(16 * 5 * 5, 120) self.fc2 = nn.Linear(120, 84) self.fc3 = nn.Linear(84, 10) def forward(self, x): x = self.pool(F.relu(self.conv1(x))) x = self.pool(F.relu(self.conv2(x))) x = x.view(-1, 16 * 5 * 5) x = F.relu(self.fc1(x)) x = F.relu(self.fc2(x)) x = self.fc3(x) return x net = Net() criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9) for epoch in range(2): # loop over the dataset multiple times running_loss = 0.0 for i, data in enumerate(trainloader, 0): # get the inputs inputs, labels = data # zero the parameter gradients optimizer.zero_grad() # forward + backward + optimize outputs = net(inputs) loss = criterion(outputs, labels) loss.backward() optimizer.step() # print statistics running_loss += loss.item() if i % 2000 == 1999: # print every 2000 mini-batches print('[%d, %5d] loss: %.3f' %(epoch + 1, i + 1, running_loss / 2000)) running_loss = 0.0 print('Finished Training') outputs = net(images) _, predicted = torch.max(outputs, 1) #最相似类别类标 print('Predicted: ', ' '.join('%5s' % classes[predicted[j]] for j in range(4))) #模型评估 correct = 0 total = 0 with torch.no_grad(): for data in testloader: images, labels = data outputs = net(images) _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum().item() print('Accuracy of the network on the 10000 test images: %d %%' % (100 * correct / total)) #每一类别预测准确率 class_correct = list(0. for i in range(10)) class_total = list(0. for i in range(10)) with torch.no_grad(): for data in testloader: images, labels = data outputs = net(images) _, predicted = torch.max(outputs, 1) c = (predicted == labels).squeeze() for i in range(4): label = labels[i] class_correct[label] += c[i].item() class_total[label] += 1 for i in range(10): print('Accuracy of %5s : %2d %%' % (classes[i], 100 * class_correct[i] / class_total[i])) #GPU device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # Assume that we are on a CUDA machine, then this should print a CUDA device: print(device)
from __future__ import print_function import torch #--------------------------------------------- #基础知识 #定义张量 #常数初始化 #torch.long, float, double, float64, x = torch.tensor([5.5, 3]) torch.empty(size, dtype=torch.long) #返回形状为size的空tensortorch.zeros(size) 全部是0的 tensortorch.ones(size) 全部是1的 tensortorch.full(size, fill_value) 全fill_value的 tensortorch.zeros_like(x) 返回跟input的tensor一个size的全零tensortorch.ones_like(x) 返回跟input的tensor一个size的全一tensortorch.arange(start=0, end, step=1) 返回一个从start到end的序列,只指定end则类似range() #随机初始化 torch.rand(size) [0,1)内的均匀分布随机数 torch.rand_like(input) 返回跟input的tensor一样size的0-1随机数 torch.randn(size) 返回标准正太分布N(0,1)的随机数 torch.normal(mean, std, out=None) 正态分布。注意,mean和std都是tensor,默认0,1 #张量属性 x.size() x.shape #张量操作 #tensor切片、合并、变形、抽取操作 x[:,1]x.view(-1, 8)x.item() #获得标量值 torch.cat(seq, dim=0, out=None) #拼接, 0=行拼接torch.cat((x,x,x),0)torch.chunk(tensor, chunks, dim=0) #切块,数量由chunks指定。 torch.chunk(torch.arange(10),4) torch.split(tensor, split_size_or_sections, dim=0) #切块torch.index_select(input, dim, index, out=None) #按index选择torch.masked_select(input, mask, out=None) #按mask选择torch.squeeze(input) #压缩成1维。注意,压缩后的tensor和原来的tensor共享地址torch.reshape(input, shape) #改变形状 tensor.view(shape) #改变形状 #运算 x+y torch.add(x,y) #torch.add(input, value, out=None) y.add_(x) #Torch里面所有带"_"的操作,都是in-place的 x.copy_(y) x.data.norm() torch.mul(input, other, out=None) #乘法 torch.div(input, other, out=None) #除法 torch.pow(input, exponent, out=None) #指数 torch.sqrt(input, out=None) torch.round(input, out=None) #四舍五入到整数 torch.argmax(input, dim=None, keepdim=False) #argmax函数torch.sigmoid(input, out=None) #sigmoid函数 torch.tanh(input, out=None) #tanh函数 torch.abs(input, out=None) #取绝对值 torch.ceil(input, out=None) #向上取整 torch.clamp(input, min, max, out=None) #截断函数,把输入数据规范在min-max区间,超过范围的用min、max代替 #张量与Python常用数据类型转换 #tensor与torch互转 x.numpy() #共址 torch.from_numpy(x) #共址 #自动微分 #要想使x支持求导,必须让x为浮点类型#求导只能是标量对标量,或标量对向量/矩阵求导 x = torch.ones(2, 2, requires_grad=True) #默认False x.requires_grad #调用属性值 x.requires_grad_(True) #调用内置函数,改变属性值 y = x+2 y.grad_fn y.backward() #等价于y.backward(torch.tensor(1.,...)) x.data x.grad with torch.no_grad(): #对requires_grad=True的张量自动求导 print((x ** 2).requires_grad) #--------------------------------------------- #前馈神经网络 import torch import torch.nn as nn import torch.nn.functional as F class Net(nn.Module): def __init__(self):super(Net, self).__init__() # 1 input image channel, 6 output channels, 5x5 square convolution # kernel self.conv1 = nn.Conv2d(1, 6, 5) self.conv2 = nn.Conv2d(6, 16, 5) # an affine operation: y = Wx + b self.fc1 = nn.Linear(16 * 5 * 5, 120) self.fc2 = nn.Linear(120, 84) self.fc3 = nn.Linear(84, 10) def forward(self, x): # Max pooling over a (2, 2) window x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2)) # If the size is a square you can only specify a single number x = F.max_pool2d(F.relu(self.conv2(x)), 2) x = x.view(-1, self.num_flat_features(x)) x = F.relu(self.fc1(x)) x = F.relu(self.fc2(x)) x = self.fc3(x) return x def num_flat_features(self, x): size = x net = Net() print(net) '''Net((conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))(conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))(fc1): Linear(in_features=400, out_features=120, bias=True)(fc2): Linear(in_features=120, out_features=84, bias=True)(fc3): Linear(in_features=84, out_features=10, bias=True))''' #查看可训练参数 params = list(net.parameters()) print(len(params))#10 print(params[0].size()) # conv1's .weight #前向传播 input = torch.randn(1, 1, 32, 32) out = net(input) print(out) #把所有参数梯度缓存器置零,用随机的梯度来反向传播 ##retain_graph=True,计算完梯度不销毁子图,但是得带计算时耗内存 net.zero_grad() out.backward(torch.randn(1, 10)) #定义损失 output = net(input) target = torch.randn(10) # a dummy target, for example target = target.view(1, -1) # make it the same shape as output criterion = nn.MSELoss() loss = criterion(output, target) print(loss) #跟踪反向传播路径,可以使用它的 .grad_fn 属性 print(loss.grad_fn) # MSELoss print(loss.grad_fn.next_functions[0][0]) # Linearprint(loss.grad_fn.next_functions[0][0].next_functions[0][0]) # ReLU '''input -> conv2d -> relu -> maxpool2d -> conv2d -> relu -> maxpool2d-> view -> linear -> relu -> linear -> relu -> linear-> MSELoss-> loss''' #损失反向传播 net.zero_grad() # zeroes the gradient buffers of all parametersprint('conv1.bias.grad before backward') print(net.conv1.bias.grad) #0梯度 loss.backward() print('conv1.bias.grad after backward') print(net.conv1.bias.grad) #自定义参数反向传播 #weight = weight - learning_rate *gradient learning_rate = 0.01 for f in net.parameters(): f.data.sub_(f.grad.data * learning_rate) #参数优化方法选择 #SGD, Nesterov-SGD, Adam, RMSProp import torch.optim as optim optimizer = optim.SGD(net.parameters(), lr=0.01) # create your optimizer optimizer.zero_grad() # zero the gradient buffers output = net(input) loss = criterion(output, target) loss.backward() optimizer.step() # Does the update #--------------------------------------------- #CIFAR10图像分类器训练 import torch import torchvision import torchvision.transforms as transforms #数据集下载,并将数据归一化[-1,1]之间 transform = transforms.Compose( [transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform) testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform) #数据批次加载 trainloader = torch.utils.data.DataLoader(trainset, batch_size=4, shuffle=True, num_workers=2) testloader = torch.utils.data.DataLoader(testset, batch_size=4, shuffle=False, num_workers=2) classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck') import matplotlib.pyplot as plt import numpy as np # functions to show an image def imshow(img): img = img / 2 + 0.5 # unnormalize npimg = img.numpy() plt.imshow(np.transpose(npimg, (1, 2, 0))) plt.show() # get some random training images dataiter = iter(trainloader) images, labels = dataiter.next() # show images imshow(torchvision.utils.make_grid(images)) #将多张图拼成一张,padding表示多张子图间 # print labels print(' '.join('%5s' % classes[labels[j]] for j in range(4))) import torch.nn as nn import torch.nn.functional as F import torch.optim as optim class Net(nn.Module): def __init__(self): super(Net, self).__init__() self.conv1 = nn.Conv2d(3, 6, 5) self.pool = nn.MaxPool2d(2, 2) self.conv2 = nn.Conv2d(6, 16, 5) self.fc1 = nn.Linear(16 * 5 * 5, 120) self.fc2 = nn.Linear(120, 84) self.fc3 = nn.Linear(84, 10) def forward(self, x): x = self.pool(F.relu(self.conv1(x))) x = self.pool(F.relu(self.conv2(x))) x = x.view(-1, 16 * 5 * 5) x = F.relu(self.fc1(x)) x = F.relu(self.fc2(x)) x = self.fc3(x) return x net = Net() criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9) for epoch in range(2): # loop over the dataset multiple times running_loss = 0.0 for i, data in enumerate(trainloader, 0): # get the inputs inputs, labels = data # zero the parameter gradients optimizer.zero_grad() # forward + backward + optimize outputs = net(inputs) loss = criterion(outputs, labels) loss.backward() optimizer.step() # print statistics running_loss += loss.item() if i % 2000 == 1999: # print every 2000 mini-batches print('[%d, %5d] loss: %.3f' %(epoch + 1, i + 1, running_loss / 2000)) running_loss = 0.0print('Finished Training') outputs = net(images) _, predicted = torch.max(outputs, 1) #最相似类别类标 print('Predicted: ', ' '.join('%5s' % classes[predicted[j]] for j in range(4))) #模型评估 correct = 0 total = 0 with torch.no_grad(): for data in testloader: images, labels = data outputs = net(images) _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum().item() print('Accuracy of the network on the 10000 test images: %d %%' % (100 * correct / total)) #每一类别预测准确率 class_correct = list(0. for i in range(10))class_total = list(0. for i in range(10)) with torch.no_grad(): for data in testloader: images, labels = data outputs = net(images) _, predicted = torch.max(outputs, 1) c = (predicted == labels).squeeze() for i in range(4): label = labels[i] class_correct[label] += c[i].item() class_total[label] += 1 for i in range(10): print('Accuracy of %5s : %2d %%' % (classes[i], 100 * class_correct[i] / class_total[i])) #GPU device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # Assume that we are on a CUDA machine, then this should print a CUDA device: print(device)
# ================ #.to(device) 可以指定CPU 或者GPU device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # 单GPU或者CPU model.to(device) #如果是多GPU if torch.cuda.device_count() > 1: model = nn.DataParallel(model,device_ids=[0,1,2]) model.to(device) # ================ #.cuda() 只能指定GPU #指定某个GPU os.environ['CUDA_VISIBLE_DEVICE']='1' model.cuda() #如果是多GPU os.environment['CUDA_VISIBLE_DEVICES'] = '0,1,2,3' device_ids = [0,1,2,3] net = torch.nn.Dataparallel(net, device_ids =device_ids) net = torch.nn.Dataparallel(net) # 默认使用所有的device_ids net = net.cuda() # ================ # 推荐 # 开始脚本,创建一个张量 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") ... # 但是无论你获得一个新的Tensor或者Module # 如果他们已经在目标设备上则不会执行复制操作 input = data.to(device) model = MyModule(...).to(device)