pytorch基础

pytorch基础

from __future__ import print_function
import torch

#---------------------------------------------
#基础知识

#定义张量
#常数初始化
#torch.long, float, double, float64,
x = torch.tensor([5.5, 3])
torch.empty(size, dtype=torch.long)			返回形状为size的空tensor
torch.zeros(size)			全部是0的tensor
torch.ones(size)			全部是1的tensor
torch.full(size, fill_value)	全fill_value的tensor
torch.zeros_like(x)		返回跟input的tensor一个size的全零tensor
torch.ones_like(x)		返回跟input的tensor一个size的全一tensor
torch.arange(start=0, end, step=1)	返回一个从start到end的序列,只指定end则类似range()

#随机初始化
torch.rand(size) 			[0,1)内的均匀分布随机数
torch.rand_like(input)		返回跟input的tensor一样size的0-1随机数
torch.randn(size)			返回标准正太分布N(0,1)的随机数
torch.normal(mean, std, out=None)	正态分布。注意,mean和std都是tensor,默认0,1

#张量属性
x.size()
x.shape

#张量操作
#tensor切片、合并、变形、抽取操作
x[:,1]
x.view(-1, 8)
x.item()  #获得标量值
torch.cat(seq, dim=0, out=None)   	#拼接, 0=行拼接
torch.cat((x,x,x),0)
torch.chunk(tensor, chunks, dim=0)	#切块,数量由chunks指定。
torch.chunk(torch.arange(10),4)
torch.split(tensor, split_size_or_sections, dim=0)  #切块
torch.index_select(input, dim, index, out=None)		#按index选择
torch.masked_select(input, mask, out=None)			#按mask选择
torch.squeeze(input)		#压缩成1维。注意,压缩后的tensor和原来的tensor共享地址
torch.reshape(input, shape)		#改变形状
tensor.view(shape)				#改变形状


#运算
x+y
torch.add(x,y)   #torch.add(input, value, out=None)
y.add_(x) 		#Torch里面所有带"_"的操作,都是in-place的
x.copy_(y)
x.data.norm()
torch.mul(input, other, out=None)		#乘法
torch.div(input, other, out=None)		#除法
torch.pow(input, exponent, out=None)	#指数
torch.sqrt(input, out=None)
torch.round(input, out=None)	#四舍五入到整数
torch.argmax(input, dim=None, keepdim=False)	#argmax函数
torch.sigmoid(input, out=None)	#sigmoid函数
torch.tanh(input, out=None)		#tanh函数
torch.abs(input, out=None)		#取绝对值
torch.ceil(input, out=None)		#向上取整
torch.clamp(input, min, max, out=None)	#截断函数,把输入数据规范在min-max区间,超过范围的用min、max代替


#张量与Python常用数据类型转换
#tensor与torch互转
x.numpy()	#共址
torch.from_numpy(x)  #共址


#自动微分
#要想使x支持求导,必须让x为浮点类型
#求导只能是标量对标量,或标量对向量/矩阵求导
x = torch.ones(2, 2, requires_grad=True)  #默认False
x.requires_grad         #调用属性值
x.requires_grad_(True)	#调用内置函数,改变属性值
y = x+2
y.grad_fn
y.backward()  #等价于y.backward(torch.tensor(1.,...))
x.data
x.grad


with torch.no_grad():
	#对requires_grad=True的张量自动求导
	print((x ** 2).requires_grad)


#---------------------------------------------
#前馈神经网络
import torch
import torch.nn as nn
import torch.nn.functional as F
class Net(nn.Module):
	def __init__(self):
		super(Net, self).__init__()
		# 1 input image channel, 6 output channels, 5x5 square convolution
		# kernel
		self.conv1 = nn.Conv2d(1, 6, 5)
		self.conv2 = nn.Conv2d(6, 16, 5)
		# an affine operation: y = Wx + b
		self.fc1 = nn.Linear(16 * 5 * 5, 120)
		self.fc2 = nn.Linear(120, 84)
		self.fc3 = nn.Linear(84, 10)
	def forward(self, x):
		# Max pooling over a (2, 2) window
		x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
		# If the size is a square you can only specify a single number
		x = F.max_pool2d(F.relu(self.conv2(x)), 2)
		x = x.view(-1, self.num_flat_features(x))
		x = F.relu(self.fc1(x))
		x = F.relu(self.fc2(x))
		x = self.fc3(x)
		return x
	def num_flat_features(self, x):
		size = x.size()[1:] # all dimensions except the batch dimension
		num_features = 1
		for s in size:
			num_features *= s
		return num_features

net = Net()
print(net)
'''
Net(
(conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
(conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
(fc1): Linear(in_features=400, out_features=120, bias=True)
(fc2): Linear(in_features=120, out_features=84, bias=True)
(fc3): Linear(in_features=84, out_features=10, bias=True)
)
'''

#查看可训练参数
params = list(net.parameters())
print(len(params))	#10
print(params[0].size()) # conv1's .weight

#前向传播
input = torch.randn(1, 1, 32, 32)
out = net(input)
print(out)

#把所有参数梯度缓存器置零,用随机的梯度来反向传播
##retain_graph=True,计算完梯度不销毁子图,但是得带计算时耗内存
net.zero_grad()
out.backward(torch.randn(1, 10))

#定义损失
output = net(input)
target = torch.randn(10) # a dummy target, for example
target = target.view(1, -1) # make it the same shape as output
criterion = nn.MSELoss()
loss = criterion(output, target)
print(loss)

#跟踪反向传播路径,可以使用它的 .grad_fn 属性
print(loss.grad_fn) # MSELoss
print(loss.grad_fn.next_functions[0][0]) # Linear
print(loss.grad_fn.next_functions[0][0].next_functions[0][0]) # ReLU
'''
input -> conv2d -> relu -> maxpool2d -> conv2d -> relu -> maxpool2d
	-> view -> linear -> relu -> linear -> relu -> linear
	-> MSELoss
	-> loss
'''

#损失反向传播
net.zero_grad() # zeroes the gradient buffers of all parameters
print('conv1.bias.grad before backward')
print(net.conv1.bias.grad)  #0梯度
loss.backward()
print('conv1.bias.grad after backward')
print(net.conv1.bias.grad)


#自定义参数反向传播
#weight = weight - learning_rate *gradient
learning_rate = 0.01
for f in net.parameters():
	f.data.sub_(f.grad.data * learning_rate)


#参数优化方法选择
#SGD, Nesterov-SGD, Adam, RMSProp
import torch.optim as optim
optimizer = optim.SGD(net.parameters(), lr=0.01)  # create your optimizer
optimizer.zero_grad()  # zero the gradient buffers
output = net(input)
loss = criterion(output, target)
loss.backward()
optimizer.step() # Does the update


#---------------------------------------------
#CIFAR10图像分类器训练
import torch
import torchvision
import torchvision.transforms as transforms

#数据集下载,并将数据归一化[-1,1]之间
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]
)

trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)
testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)

#数据批次加载
trainloader = torch.utils.data.DataLoader(trainset, batch_size=4,
                                          shuffle=True, num_workers=2)
testloader = torch.utils.data.DataLoader(testset, batch_size=4,
                                         shuffle=False, num_workers=2)
classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')


import matplotlib.pyplot as plt
import numpy as np
# functions to show an image
def imshow(img):
    img = img / 2 + 0.5 # unnormalize
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()
# get some random training images
dataiter = iter(trainloader)
images, labels = dataiter.next()
# show images
imshow(torchvision.utils.make_grid(images))  #将多张图拼成一张,padding表示多张子图间
# print labels
print(' '.join('%5s' % classes[labels[j]] for j in range(4)))




import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 5 * 5)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

net = Net()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

for epoch in range(2): # loop over the dataset multiple times
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs
        inputs, labels = data
        # zero the parameter gradients
        optimizer.zero_grad()
        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        # print statistics
        running_loss += loss.item()
        if i % 2000 == 1999: # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %(epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0
print('Finished Training')



outputs = net(images)
_, predicted = torch.max(outputs, 1)  #最相似类别类标
print('Predicted: ', ' '.join('%5s' % classes[predicted[j]] for j in range(4)))


#模型评估
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
print('Accuracy of the network on the 10000 test images: %d %%' % (100 * correct / total))


#每一类别预测准确率
class_correct = list(0. for i in range(10))
class_total = list(0. for i in range(10))
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs, 1)
        c = (predicted == labels).squeeze()
        for i in range(4):
            label = labels[i]
            class_correct[label] += c[i].item()
            class_total[label] += 1
for i in range(10):
    print('Accuracy of %5s : %2d %%' % (classes[i], 100 * class_correct[i] / class_total[i]))



#GPU
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# Assume that we are on a CUDA machine, then this should print a CUDA device:
print(device)

  

from __future__ import print_function
import torch
#---------------------------------------------
#基础知识
#定义张量
#常数初始化
#torch.long, float, double, float64,
x = torch.tensor([5.5, 3])
torch.empty(size, dtype=torch.long)  #返回形状为size的空tensortorch.zeros(size)  全部是0的
tensortorch.ones(size)   全部是1的
tensortorch.full(size, fill_value)  全fill_value的
tensortorch.zeros_like(x) 返回跟input的tensor一个size的全零tensortorch.ones_like(x)  返回跟input的tensor一个size的全一tensortorch.arange(start=0, end, step=1)  返回一个从start到end的序列,只指定end则类似range()

#随机初始化
torch.rand(size)   [0,1)内的均匀分布随机数
torch.rand_like(input)  返回跟input的tensor一样size的0-1随机数
torch.randn(size)  返回标准正太分布N(0,1)的随机数
torch.normal(mean, std, out=None)  正态分布。注意,mean和std都是tensor,默认0,1

#张量属性
x.size()
x.shape

#张量操作
#tensor切片、合并、变形、抽取操作
x[:,1]x.view(-1, 8)x.item()  #获得标量值
torch.cat(seq, dim=0, out=None)   #拼接, 0=行拼接torch.cat((x,x,x),0)torch.chunk(tensor, chunks, dim=0) #切块,数量由chunks指定。
torch.chunk(torch.arange(10),4)
torch.split(tensor, split_size_or_sections, dim=0)  #切块torch.index_select(input, dim, index, out=None)  #按index选择torch.masked_select(input, mask, out=None)      #按mask选择torch.squeeze(input)  #压缩成1维。注意,压缩后的tensor和原来的tensor共享地址torch.reshape(input, shape)  #改变形状
tensor.view(shape)  #改变形状

#运算
x+y
torch.add(x,y)   #torch.add(input, value, out=None)
y.add_(x)  #Torch里面所有带"_"的操作,都是in-place的
x.copy_(y)
x.data.norm()
torch.mul(input, other, out=None) #乘法
torch.div(input, other, out=None) #除法
torch.pow(input, exponent, out=None) #指数
torch.sqrt(input, out=None)
torch.round(input, out=None) #四舍五入到整数
torch.argmax(input, dim=None, keepdim=False) #argmax函数torch.sigmoid(input, out=None) #sigmoid函数
torch.tanh(input, out=None) #tanh函数
torch.abs(input, out=None) #取绝对值
torch.ceil(input, out=None) #向上取整
torch.clamp(input, min, max, out=None) #截断函数,把输入数据规范在min-max区间,超过范围的用min、max代替

#张量与Python常用数据类型转换
#tensor与torch互转
x.numpy() #共址
torch.from_numpy(x)  #共址

#自动微分
#要想使x支持求导,必须让x为浮点类型#求导只能是标量对标量,或标量对向量/矩阵求导
x = torch.ones(2, 2, requires_grad=True)  #默认False
x.requires_grad         #调用属性值
x.requires_grad_(True) #调用内置函数,改变属性值
y = x+2
y.grad_fn
y.backward()  #等价于y.backward(torch.tensor(1.,...))
x.data
x.grad

with torch.no_grad(): #对requires_grad=True的张量自动求导
    print((x ** 2).requires_grad)

#---------------------------------------------
#前馈神经网络
import torch
import torch.nn as nn
import torch.nn.functional as F
class Net(nn.Module):
def __init__(self):super(Net, self).__init__()
# 1 input image channel, 6 output channels, 5x5 square convolution
# kernel
self.conv1 = nn.Conv2d(1, 6, 5)
self.conv2 = nn.Conv2d(6, 16, 5)
# an affine operation: y = Wx + b
self.fc1 = nn.Linear(16 * 5 * 5, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10)
def forward(self, x):
# Max pooling over a (2, 2) window
x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
# If the size is a square you can only specify a single number
x = F.max_pool2d(F.relu(self.conv2(x)), 2)
x = x.view(-1, self.num_flat_features(x))
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x

def num_flat_features(self, x):
size = x        

net = Net()
print(net)
'''Net((conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))(conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))(fc1): Linear(in_features=400, out_features=120, bias=True)(fc2): Linear(in_features=120, out_features=84, bias=True)(fc3): Linear(in_features=84, out_features=10, bias=True))'''


#查看可训练参数
params = list(net.parameters())
print(len(params))#10
print(params[0].size()) # conv1's .weight

#前向传播
input = torch.randn(1, 1, 32, 32)
out = net(input)
print(out)

#把所有参数梯度缓存器置零,用随机的梯度来反向传播
##retain_graph=True,计算完梯度不销毁子图,但是得带计算时耗内存
net.zero_grad()
out.backward(torch.randn(1, 10))

#定义损失
output = net(input)
target = torch.randn(10) # a dummy target, for example
target = target.view(1, -1) # make it the same shape as output
criterion = nn.MSELoss()
loss = criterion(output, target)
print(loss)

#跟踪反向传播路径,可以使用它的 .grad_fn 属性
print(loss.grad_fn) # MSELoss
print(loss.grad_fn.next_functions[0][0]) # Linearprint(loss.grad_fn.next_functions[0][0].next_functions[0][0]) 
# ReLU
'''input -> conv2d -> relu -> maxpool2d -> conv2d -> relu -> maxpool2d-> view -> linear -> relu -> linear -> relu -> linear-> MSELoss-> loss'''

#损失反向传播
net.zero_grad() # zeroes the gradient buffers of all parametersprint('conv1.bias.grad before backward')
print(net.conv1.bias.grad)  #0梯度
loss.backward()
print('conv1.bias.grad after backward')
print(net.conv1.bias.grad)

#自定义参数反向传播
#weight = weight - learning_rate *gradient
learning_rate = 0.01
for f in net.parameters():
    f.data.sub_(f.grad.data * learning_rate)

#参数优化方法选择
#SGD, Nesterov-SGD, Adam, RMSProp
import torch.optim as optim
optimizer = optim.SGD(net.parameters(), lr=0.01)  # create your optimizer
optimizer.zero_grad()  # zero the gradient buffers
output = net(input)
loss = criterion(output, target)
loss.backward()
optimizer.step() # Does the update

#---------------------------------------------
#CIFAR10图像分类器训练
import torch
import torchvision
import torchvision.transforms as transforms

#数据集下载,并将数据归一化[-1,1]之间
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
trainset = torchvision.datasets.CIFAR10(root='./data', train=True,                                        download=True, transform=transform)
testset = torchvision.datasets.CIFAR10(root='./data', train=False,                                       download=True, transform=transform)

#数据批次加载
trainloader = torch.utils.data.DataLoader(trainset, batch_size=4,                                          shuffle=True, num_workers=2)
testloader = torch.utils.data.DataLoader(testset, batch_size=4,                                         shuffle=False, num_workers=2)
classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

import matplotlib.pyplot as plt
import numpy as np

# functions to show an image
def imshow(img):
    img = img / 2 + 0.5 # unnormalize
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show() # get some random training images

dataiter = iter(trainloader)
images, labels = dataiter.next() # show images
imshow(torchvision.utils.make_grid(images))  #将多张图拼成一张,padding表示多张子图间
# print labels
print(' '.join('%5s' % classes[labels[j]] for j in range(4)))



import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 5 * 5)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

net = Net()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
for epoch in range(2):
   # loop over the dataset multiple times
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs
        inputs, labels = data
        # zero the parameter gradients
        optimizer.zero_grad()
        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        # print statistics
        running_loss += loss.item()
        if i % 2000 == 1999: # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %(epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0print('Finished Training')


outputs = net(images)
_, predicted = torch.max(outputs, 1)  #最相似类别类标
print('Predicted: ', ' '.join('%5s' % classes[predicted[j]] for j in range(4)))

#模型评估
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
print('Accuracy of the network on the 10000 test images: %d %%' % (100 * correct / total))

#每一类别预测准确率
class_correct = list(0. for i in range(10))class_total = list(0. for i in range(10))
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs, 1)
        c = (predicted == labels).squeeze()
        for i in range(4):
            label = labels[i]
            class_correct[label] += c[i].item()
            class_total[label] += 1
for i in range(10):
    print('Accuracy of %5s : %2d %%' % (classes[i], 100 * class_correct[i] / class_total[i]))


#GPU
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# Assume that we are on a CUDA machine, then this should print a CUDA device:
print(device)

  

# ================
#.to(device) 可以指定CPU 或者GPU
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # 单GPU或者CPU
model.to(device)
#如果是多GPU
if torch.cuda.device_count() > 1:
  model = nn.DataParallel(model,device_ids=[0,1,2])
model.to(device)


# ================
#.cuda() 只能指定GPU
#指定某个GPU
os.environ['CUDA_VISIBLE_DEVICE']='1'
model.cuda()
#如果是多GPU
os.environment['CUDA_VISIBLE_DEVICES'] = '0,1,2,3'
device_ids = [0,1,2,3]
net  = torch.nn.Dataparallel(net, device_ids =device_ids)
net  = torch.nn.Dataparallel(net) # 默认使用所有的device_ids 
net = net.cuda()


# ================
# 推荐
# 开始脚本,创建一个张量
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
...
# 但是无论你获得一个新的Tensor或者Module
# 如果他们已经在目标设备上则不会执行复制操作
input = data.to(device)
model = MyModule(...).to(device)

  

















 

posted on 2021-04-07 17:34  iUpoint  阅读(51)  评论(0编辑  收藏  举报

导航