莫烦pytorch学习记录

感谢莫烦大神Pytorch B站视频：https://www.bilibili.com/video/av15997678?p=11

一个博主的笔记：https://blog.csdn.net/Will_Ye/article/details/104516423

一、PyTorch是什么？

它是一个基于Python的科学计算包，其主要是为了解决两类场景：

1、一种是可以替代Numpy进行科学计算，同时还可以使用张量在GPU上进行加速运算。
2、一个深度学习的研究平台，提供最大的灵活性和速度。

二、Numpy与Torch之间的转换

import torch
import numpy as np
from torch.autograd import Variable # torch 中 Variable 模块

### Torch 自称为神经网络界的 Numpy,
# 因为他能将 torch 产生的 tensor 放在 GPU 中加速运算
np_data = np.arange(6).reshape((2,3))
torch_data = torch.from_numpy(np_data) #torch形式
tensor2array = torch_data.numpy() # numpy形式
#  torch 做的和 numpy 能很好的兼容.
#  比如这样就能自由地转换 numpy array 和 torch tensor 了
print(
    '\nnumpy array:', np_data,  # [[0 1 2], [3 4 5]]
    '\ntorch tensor:', torch_data,  # 0  1  2 \n 3  4  5    [torch.LongTensor of size 2x3]
    '\ntensor to array:', tensor2array,  # [[0 1 2], [3 4 5]]
)

numpy array: [[0 1 2]
              [3 4 5]] 
torch tensor: tensor([[0, 1, 2],
                      [3, 4, 5]], dtype=torch.int32) 
tensor to array: [[0 1 2]
                  [3 4 5]]

三、Torch中的数学运算与numpy的对比

API手册

常用计算：注意！！！！所有在pytorch里的计算，都要先转换为tensor的形式，不然就报错，切记！！！

#abs绝对值运算
data = [-1, -2, 1, 2]
tensor = torch.FloatTensor(data) # 转换成32位浮点tensor
print(
    '\nabs',
    '\nnumpy: ', np.abs(data),  # [1 2 1 2]
    '\ntorch: ', torch.abs(tensor)  # [1 2 1 2]
)

#sin 三角函数 sin
print(
    '\nsin',
    '\nnumpy: ', np.sin(data),  # [-0.84147098 -0.90929743  0.84147098  0.90929743]
    '\ntorch: ', torch.sin(tensor)  # [-0.8415 -0.9093  0.8415  0.9093]
)

#mean 均值
print(
    '\nmean',
    '\nnumpy: ', np.mean(data),         # 0.0
    '\ntorch: ', torch.mean(tensor)     # 0.0
)

矩阵计算：注意，有些numpy的封装的函数跟pytorch的不一样，这一点一定要区分清楚，也是很容易出问题的一个地方。

# matrix multiplication 矩阵点乘
data = [[1,2], [3,4]]
tensor = torch.FloatTensor(data)  # 转换成32位浮点 tensor
# correct method
print(
    '\nmatrix multiplication (matmul)',
    '\nnumpy: ', np.matmul(data, data),     # [[7, 10], [15, 22]]
    '\ntorch: ', torch.mm(tensor, tensor)   # [[7, 10], [15, 22]]
)

# !!!!  下面是错误的方法 !!!!
data = np.array(data)
print(
    '\nmatrix multiplication (dot)',
    '\nnumpy: ', data.dot(data),        # [[7, 10], [15, 22]] 在numpy 中可行
    # 关于 tensor.dot() 有了新的改变, 它只能针对于一维的数组. 所以上面的有所改变.
    # '\ntorch: ', tensor.dot(tensor)     # torch 会转换成 [1,2,3,4].dot([1,2,3,4) = 30.0
    # 变为
    # '\ntorch: ', torch.dot(tensor.dot(tensor))#
)

四、Variable

tensor = torch.FloatTensor([[1,2],[3,4]])
# 里面的值会不停的变化. 就像一个裝鸡蛋的篮子, 鸡蛋数会不停变动.
# 那谁是里面的鸡蛋呢, 自然就是 Torch 的 Tensor 咯.
# 如果用一个 Variable 进行计算, 那返回的也是一个同类型的 Variable.
# 把鸡蛋放到篮子里,
variable = Variable(tensor, requires_grad=True)#requires_grad是参不参与误差反向传播, 要不要计算梯度

print('\n',tensor)
"""
 1  2
 3  4
[torch.FloatTensor of size 2x2]
"""

print(variable)
"""
Variable containing:
 1  2
 3  4
[torch.FloatTensor of size 2x2]
"""

variable的计算

模仿一个计算梯度的情况

比较tensor的计算和variable的计算，在正向传播它们是看不出有什么不同的，而且variable和tensor有个很大的区别，variable是存储变量的，是会改变的，而tensor是不会改变的，是我们输入时就设定好的参数，variable会在反向传播后修正自己的数值。这是我觉得他们最大的不同。

t_out = torch.mean(tensor*tensor)       # x^2
v_out = torch.mean(variable*variable)   # x^2
print('\n',t_out)
print('\n',v_out)    # 7.5

假设mean的均值做为结果的误差，对误差反向传播得到各项梯度。利用这个例子去看，在反向传播中它们之间的不同。

v_out = torch.mean(variable*variable)就是给各个variable搭建一个运算的步骤，搭建的网络也是其中一种运算的步骤。

v_out.backward()    # 模拟 v_out 的误差反向传递,在背景计算图中加速运算
# 下面两步看不懂没关系, 只要知道 Variable 是计算图的一部分, 可以用来传递误差就好.
# v_out = 1/4 * sum(variable*variable) 这是计算图中的 v_out 计算步骤
# 针对于 v_out 的梯度就是, d(v_out)/d(variable) = 1/4*2*variable = variable/2
print('\n',variable.grad)    # 初始 Variable 的梯度
'''
 0.5000  1.0000
 1.5000  2.0000
'''

可以看到，在backward中已经计算好梯度了，利用*.grad将背景中计算好的variable的各项梯度print出来。

这样如果是个网络的运算步骤也可以在backward中将各个梯度计算好。

获取Variable里面的数据

直接print(variable)只会输出 Variable形式的数据, 在很多时候是用不了的(比如想要用 plt 画图), 需要转换成tensor形式.

## 获取 Variable 里面的数据
print(variable)     #  Variable 形式
"""
Variable containing:
 1  2
 3  4
[torch.FloatTensor of size 2x2]
"""
print(variable.data)    # tensor 形式
"""
 1  2
 3  4
[torch.FloatTensor of size 2x2]
"""
print(variable.data.numpy())    # numpy 形式
"""
[[ 1.  2.]
 [ 3.  4.]]
"""

五、常用几种激励函数及图像

常用几种激励函数：relu, sigmoid, tanh, softplus

# 做一些假数据来观看图像
x = torch.linspace(-5, 5, 200)  # x data (tensor), shape=(100, 1)
x = Variable(x)
# Torch 中的激励函数有很多, 不过我们平时要用到的就这几个.
# relu, sigmoid, tanh, softplus. 那我们就看看他们各自长什么样啦.
x_np = x.data.numpy()   # 换成 numpy array, 出图时用

莫烦大神那时的版本用的是torch.nn.relu，但后来版本改了，直接用torch.relu就可以，其他激励函数也一样。

# 几种常用的 激励函数
y_relu = F.relu(x).data.numpy()
y_sigmoid = torch.sigmoid(x).data.numpy()
y_tanh = torch.tanh(x).data.numpy()
y_softplus = F.softplus(x).data.numpy()
# y_softmax = F.softmax(x)  softmax 比较特殊, 不能直接显示, 不过他是关于概率的, 用于分类

#用pit画
plt.figure(1,figsize=(8,6))
plt.subplot(221)
plt.plot(x_np,y_relu, c='red', label='relu')
plt.ylim(-1,5)
plt.legend(loc='best')

plt.subplot(222)
plt.plot(x_np,y_sigmoid, c='red',label='sigmoid')
plt.ylim(-0.2,1.2)
plt.legend(loc='best')

plt.subplot(223)
plt.plot(x_np,y_tanh, c='red',label='tanh')
plt.ylim(-1.2,1.2)
plt.legend(loc='best')

plt.subplot(224)
plt.plot(x_np,y_softplus, c='red',label='softplus')
plt.ylim(-0.2,6)
plt.legend(loc='best')


#用ax画
fig, ax = plt.subplots(2,2,figsize=(8,6))
ax[0,0].plot(x_np,y_relu,c='red', label='relu')
ax[0,0].set_title('relu',fontsize=18)
ax[0,0].set_ylim(-1,5)
ax[0,0].legend()

ax[0,1].plot(x_np,y_sigmoid)
ax[1,0].plot(x_np,y_tanh)
ax[1,1].plot(x_np,y_softplus)
plt.show()

六、线性拟合回归

import torch
import matplotlib.pyplot as plt
import torch.nn.functional as F
#######捏造数据#######
x = torch.unsqueeze(torch.linspace(-1,1,500),dim=1)#x的数据，shape=(500,1)
y = x.pow(2) + 0.2*torch.rand(x.size())
# 画图看看捏的数据咋样
fig,ax = plt.subplots(2,1)
ax[0].scatter(x.data.numpy(),y.data.numpy())
ax[0].set_title("Pinched data",fontsize=18)

搭建网络

# 建立一个神经网络我们可以直接运用 torch 中的体系. 先定义所有的层属性(__init__()),
# 然后再一层层搭建(forward(x))层于层的关系链接. 建立关系的时候, 我们会用到激励函数.
class Net(torch.nn.Module): #继承torch中的Module
    def __init__(self,n_feature,n_hidden,n_output):
        super(Net, self).__init__() #继承__init__功能
        # 定义每层用什么样的形式
        self.hidden = torch.nn.Linear(n_feature,n_hidden) # 隐藏层线性输出
        self.predict = torch.nn.Linear(n_hidden,n_output)  # 输出层线性输出

    def forward(self,x): # 这同时也是Module中的forward功能
        # 正向传播输入值，神经网络分析出输出值
        x = F.relu(self.hidden(x))  # 激励函数（隐藏层的线性值）
        x = self.predict(x)         # 输出值
        return x

net = Net(n_feature=1, n_hidden=10, n_output=1)
print(net)
"""
Net (
  (hidden): Linear (1 -> 10)
  (predict): Linear (10 -> 1)
)
"""

开始训练

#optimizer训练工具
optimizer = torch.optim.SGD(net.parameters(), lr=0.2) # 传入net的所有参数，学习率
loss_func = torch.nn.MSELoss() # 预测值和真实值的误差计算公式（均方差）

plt.ion()
for t in range(10000):
    prediction = net(x) # 喂给net训练数据x，输出预测值
    loss = loss_func(prediction, y) # 计算两者的误差,#要预测值在前，label在后

    optimizer.zero_grad() # 清空上一步的残余更新参数值,#net.parameters()所有参数梯度变为0
    loss.backward()       # 误差反向传播，计算参数更新
    optimizer.step()      # 将参数更新施加到net的parameters上,#optimizr优化parameters

可视化训练过程

    if t % 5 == 0:
        # plot and show learning process
        ax[1].cla()
        ax[1].scatter(x.data.numpy(), y.data.numpy())
        ax[1].plot(x.data.numpy(), prediction.data.numpy(), 'r-', lw=5)
        ax[1].text(0.5, 0, 'Loss=%.4f' % loss.data.numpy(), fontdict={'size': 20, 'color': 'red'})
        plt.pause(0.01)
#如果在脚本中使用ion()命令开启了交互模式，没有使用ioff()关闭的话，
# 则图像会一闪而过，并不会常留。要想防止这种情况，需要在plt.show()之前加上ioff()命令。
plt.ioff()
plt.show()

七、区分类型 (分类)

捏个数据

import torch
import matplotlib.pyplot as plt

# 假数据
n_data = torch.ones(100, 2)         # 数据的基本形态
x0 = torch.normal(2*n_data, 1)      # 类型0 x data (tensor), shape=(100, 2)
y0 = torch.zeros(100)               # 类型0 y data (tensor), shape=(100, )
x1 = torch.normal(-2*n_data, 1)     # 类型1 x data (tensor), shape=(100, 1)
y1 = torch.ones(100)                # 类型1 y data (tensor), shape=(100, )

# 注意 x, y 数据的数据形式是一定要像下面一样 (torch.cat 是在合并数据)
x = torch.cat((x0, x1), 0).type(torch.FloatTensor)  # FloatTensor = 32-bit floating
y = torch.cat((y0, y1), ).type(torch.LongTensor)    # LongTensor = 64-bit integer


# 画图 会出错：会报错，因为画图x和y的数量不相同，x矩阵的形状是（200,2）的，而y矩阵的形状是（200），
# 所以需要把x分成两部分来画图才可以的。
# plt.scatter(x.data.numpy(), y.data.numpy())
# plt.show()
# 画图
plt.scatter(x.data.numpy()[:, 0], x.data.numpy()[:, 1], c=y.data.numpy(), s=100, lw=0, cmap='RdYlGn')
plt.show()

搭个网络

class Net(torch.nn.Module):
    def __init__(self,n_feature,n_hidden,n_outpot):
        super(Net,self).__init__()  #继承__init__的功能
        self.hidden = torch.nn.Linear(n_feature,n_hidden)
        self.out = torch.nn.Linear(n_hidden,n_outpot)

    def forward(self,x):
        x = torch.relu(self.hidden(x))
        x = self.out(x)
        return x

net = Net(n_feature=2,n_hidden=10,n_outpot=2)

print(net)

训练网络

optimizer = torch.optim.SGD(net.parameters(), lr=0.02)  # 传入 net 的所有参数, 学习率
# 算误差的时候, 注意真实值!不是! one-hot 形式的, 而是1D Tensor, (batch,)
# 但是预测值是2D tensor (batch, n_classes)
loss_func = torch.nn.CrossEntropyLoss()

for t in range(100):
    out = net(x)     # 喂给 net 训练数据 x, 输出分析值

    loss = loss_func(out, y)     # 计算两者的误差

    optimizer.zero_grad()   # 清空上一步的残余更新参数值
    loss.backward()         # 误差反向传播, 计算参数更新值
    optimizer.step()        # 将参数更新值施加到 net 的 parameters 上

可视化

plt.ion()
for t in range(100):
    out = net(x)     # 喂给 net 训练数据 x, 输出分析值

    loss = loss_func(out, y)     # 计算两者的误差

    optimizer.zero_grad()   # 清空上一步的残余更新参数值
    loss.backward()         # 误差反向传播, 计算参数更新值
    optimizer.step()        # 将参数更新值施加到 net 的 parameters 上
    # 接着上面来
    if t % 2 == 0:
        plt.cla()
        # 过了一道 softmax 的激励函数后的最大概率才是预测值
        prediction = torch.max(F.softmax(out), 1)[1]
        #prediction=torch.max(F.softmax(out), 1) 中的1，表示【0,0,1】预测结果中，结果为1的结果的位置。
        pred_y = prediction.data.numpy().squeeze()#利用squeeze（）函数将表示向量的数组转换为秩为1的数组，这样利用matplotlib库函数画图时，就可以正常的显示结果了
        target_y = y.data.numpy()
        plt.scatter(x.data.numpy()[:, 0], x.data.numpy()[:, 1], c=pred_y, s=100, lw=0, cmap='RdYlGn')
        accuracy = sum(pred_y == target_y) / 200.  # 预测中有多少和真实值一样
        plt.text(1.5, -4, 'Accuracy=%.2f' % accuracy, fontdict={'size': 20, 'color': 'red'})
        plt.pause(0.1)

plt.ioff()
plt.show()

八、快速搭建法

　　Torch 中提供了很多方便的途径, 同样是神经网络, 能快则快, 我们看看如何用更简单的方式搭建同样的回归神经网络.

上一节用的方法更加底层，其实有更加快速的方法，对比一下就很清楚了：

Method 1

我们用 class 继承了一个 torch 中的神经网络结构, 然后对其进行了修改

class Net(torch.nn.Module): #我们用 class 继承了一个 torch 中的神经网络结构, 然后对其进行了修改
    def __init__(self,n_feature,n_hidden,n_outpot):
        super(Net,self).__init__()  #继承__init__的功能,
        self.hidden = torch.nn.Linear(n_feature,n_hidden)
        self.out = torch.nn.Linear(n_hidden,n_outpot)

    def forward(self,x):
        x = torch.relu(self.hidden(x))
        x = self.out(x)
        return x

net1 = Net(n_feature=2,n_hidden=10,n_outpot=2)

Method 2

用nn库里一个函数就能快速搭建了，注意ReLU也算做一层加入到网络序列中

net2 = torch.nn.Sequential(
    torch.nn.Linear(2,10),
    torch.nn.ReLU(),
    torch.nn.Linear(10,2)
)
print(net1)
print(net2)

结果是类似的：就是net2中将ReLU也做为一个神经层

print(net1)
"""
Net(
  (hidden): Linear(in_features=2, out_features=10, bias=True)
  (out): Linear(in_features=10, out_features=2, bias=True)
)
"""
print(net2)
"""
Sequential(
  (0): Linear(in_features=2, out_features=10, bias=True)
  (1): ReLU()
  (2): Linear(in_features=10, out_features=2, bias=True)
)
"""

九、保存与提取网络

保存

#######捏造数据#######
torch.manual_seed(1)    # reproducible 使得每次随机初始化的随机数是一致的
x = torch.unsqueeze(torch.linspace(-1,1,500),dim=1)#x的数据，shape=(500,1)
y = x.pow(2) + 0.2*torch.rand(x.size())

def save():
    # 搭建网络
    net1 = torch.nn.Sequential(
        torch.nn.Linear(1, 10),
        torch.nn.ReLU(),
        torch.nn.Linear(10, 1),
    )
    optimizer = torch.optim.SGD(net1.parameters(), lr=0.5)
    loss_func = torch.nn.MSELoss()
    # 训练
    for t in range(100):
        prediction = net1(x)
        loss = loss_func(prediction, y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    torch.save(net1, 'net.pkl')  # 保存整个网络
    torch.save(net1.state_dict(), 'net_params.pkl')  # 只保存网络中的参数 (速度快, 占内存少)

提取网络

# 这种方式将会提取整个神经网络, 网络大的时候可能会比较慢.
def restore_net():
    # restore entire net1 to net2
    net2 = torch.load('net.pkl')
    prediction = net2(x)

只提取网络参数

# 这种方式将会提取所有的参数, 然后再放到你的新建网络中.
def restore_params():
    # 新建 net3
    net3 = torch.nn.Sequential(
        torch.nn.Linear(1, 10),
        torch.nn.ReLU(),
        torch.nn.Linear(10, 1)
    )

    # 将保存的参数复制到 net3
    net3.load_state_dict(torch.load('net_params.pkl'))
    prediction = net3(x)

完整代码并查看三个网络模型的结果

import torch
import matplotlib.pyplot as plt
import torch.nn.functional as F
from matplotlib import animation
#######捏造数据#######
torch.manual_seed(1)    # reproducible 使得每次随机初始化的随机数是一致的
x = torch.unsqueeze(torch.linspace(-1,1,100),dim=1)#x的数据，shape=(500,1)
y = x.pow(2) + 0.2*torch.rand(x.size())



def save():
    # 搭建网络
    net1 = torch.nn.Sequential(
        torch.nn.Linear(1, 100),
        torch.nn.ReLU(),
        torch.nn.Linear(100, 1),
    )
    optimizer = torch.optim.SGD(net1.parameters(), lr=0.3)
    loss_func = torch.nn.MSELoss()
    # 训练
    for t in range(1000):
        prediction = net1(x)
        loss = loss_func(prediction, y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    torch.save(net1, 'net.pkl')  # 保存整个网络
    torch.save(net1.state_dict(), 'net_params.pkl')  # 只保存网络中的参数 (速度快, 占内存少)

    # plot result
    plt.figure(1, figsize=(10,3))
    plt.subplot(131)
    plt.title('Net1')
    plt.scatter(x.data.numpy(), y.data.numpy())
    plt.plot(x.data.numpy(), prediction.data.numpy(), 'r-', lw=5)


# 这种方式将会提取整个神经网络, 网络大的时候可能会比较慢.
def restore_net():
    # restore entire net1 to net2
    net2 = torch.load('net.pkl')
    prediction = net2(x)

    # plot result
    plt.figure(1, figsize=(10,3))
    plt.subplot(132)
    plt.title('Net2')
    plt.scatter(x.data.numpy(), y.data.numpy())
    plt.plot(x.data.numpy(), prediction.data.numpy(), 'r-', lw=5)

# 这种方式将会提取所有的参数, 然后再放到你的新建网络中.
def restore_params():
    # 新建 net3
    net3 = torch.nn.Sequential(
        torch.nn.Linear(1, 100),
        torch.nn.ReLU(),
        torch.nn.Linear(100, 1)
    )

    # 将保存的参数复制到 net3
    net3.load_state_dict(torch.load('net_params.pkl'))
    prediction = net3(x)
    # plot result
    plt.figure(1, figsize=(10,3))
    plt.subplot(133)
    plt.title('Net3')
    plt.scatter(x.data.numpy(), y.data.numpy())
    plt.plot(x.data.numpy(), prediction.data.numpy(), 'r-', lw=5)



# 保存 net1 (1. 整个网络, 2. 只有参数)
save()

# 提取整个网络
restore_net()

# 提取网络参数, 复制到新网络
restore_params()

plt.show()

十、批训练

　　进行批量训练需要用到一个很好用的工具DataLoader 。

　　注意，莫烦大神用的版本跟现在新版还是有些出入的，用Data.TensorDataset这个函数，不要指定data_tensor和target_tensor会报错的，因为新版的库改了，直接输入x和y就行了。

数据分批

import torch
import torch.utils.data as tud
torch.manual_seed(1)

BATCH_SIZE = 20  #批训练的数据个数，每批20个
#######捏造数据#######
x = torch.unsqueeze(torch.linspace(-1,1,100),dim=1)#x的数据，shape=(100,1)
y = x.pow(2) + 0.2*torch.rand(x.size())

# 先转换成 torch 能识别的 Dataset
# torch_dataset = DL.TensorDataset(data_tensor=x, target_tensor=y)
# 新版的库改了，直接输入x和y就行了。
torch_dataset = tud.TensorDataset(x,y)

# 把dataset放入DataLoader
loader = tud.DataLoader(
    dataset=torch_dataset, # 转换好的torch能识别的dataset导入DataLoader
    batch_size=BATCH_SIZE, # 每批的大小是多大
    shuffle=True,          # 要不要打乱数据顺序（一般打乱比较好）
    num_workers=2          # 多线程来读取数据

)

分批训练

epoch表示整体数据训练次数，step则是每一批数据里面有几组数据

def show_batch():
    for epoch in range(3):     # 训练所有！整套！数据3次
        for step,(batch_x, batch_y) in enumerate(loader): #每一步loader释放一批数据来学习
            # 训练的地方

            # 打出来一些数据
            print('Epoch:',epoch,'|Step:',step,'|batch x:',batch_x.numpy(),'|batch y:',batch_y.numpy())

if __name__ == '__main__':
    show_batch()

"""
Epoch: 0 |Step: 0 |batch x: [ 5.  7. 10.  3.  4.] |batch y: [6. 4. 1. 8. 7.]
Epoch: 0 |Step: 1 |batch x: [2. 1. 8. 9. 6.] |batch y: [ 9. 10.  3.  2.  5.]
Epoch: 1 |Step: 0 |batch x: [ 4.  6.  7. 10.  8.] |batch y: [7. 5. 4. 1. 3.]
Epoch: 1 |Step: 1 |batch x: [5. 3. 2. 1. 9.] |batch y: [ 6.  8.  9. 10.  2.]
Epoch: 2 |Step: 0 |batch x: [ 4.  2.  5.  6. 10.] |batch y: [7. 9. 6. 5. 1.]
Epoch: 2 |Step: 1 |batch x: [3. 9. 1. 8. 7.] |batch y: [ 8.  2. 10.  3.  4.]
"""

　　可以看出, 每步都导出了5个数据进行学习. 然后每个 epoch 的导出数据都是先打乱了以后再导出。

　　改变一下 BATCH_SIZE = 8, 这样我们就知道, step=0 会导出8个数据, 但是, step=1 时数据库中的数据不够 8个, 这时怎么办呢:这时, 在 step=1 就只给你返回这个 epoch 中剩下的数据就好了.

Epoch: 0 |Step: 0 |batch x: [ 5.  7. 10.  3.  4.  2.  1.  8.] |batch y: [ 6.  4.  1.  8.  7.  9. 10.  3.]
Epoch: 0 |Step: 1 |batch x: [9. 6.] |batch y: [2. 5.]
Epoch: 1 |Step: 0 |batch x: [ 4.  6.  7. 10.  8.  5.  3.  2.] |batch y: [7. 5. 4. 1. 3. 6. 8. 9.]
Epoch: 1 |Step: 1 |batch x: [1. 9.] |batch y: [10.  2.]
Epoch: 2 |Step: 0 |batch x: [ 4.  2.  5.  6. 10.  3.  9.  1.] |batch y: [ 7.  9.  6.  5.  1.  8.  2. 10.]
Epoch: 2 |Step: 1 |batch x: [8. 7.] |batch y: [3. 4.]

十一、Optimizer 优化器

各种不同的优化器在同一个网路上的对比

SGD 是最普通的优化器, 也可以说没有加速效果, 而 Momentum 是 SGD 的改良版, 它加入了动量原则. 后面的 RMSprop 又是 Momentum 的升级版. 而 Adam 又是 RMSprop 的升级版. 不过从这个结果中我们看到, Adam 的效果似乎比 RMSprop 要差一点. 所以说并不是越先进的优化器, 结果越佳.

1.捏一个数据

import torch
import torch.utils.data as tud
import matplotlib.pyplot as plt

torch.manual_seed(1)

LR = 0.01
BATCH_SIZE = 32
EPOCH = 12

######捏一个数据######
x = torch.unsqueeze(torch.linspace(-1,1,1000),dim=1)
y = x.pow(2) + 0.1*torch.normal(torch.zeros(*x.size()))

plt.scatter(x.numpy(),y.numpy())
plt.show()

torch_dataset = tud.TensorDataset(x,y)
loader = tud.DataLoader(
    dataset=torch_dataset,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=2
)

2.建立同样的网络

# 搭建同样的网络Net
class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.hidden = torch.nn.Linear(1,20)
        self.predict = torch.nn.Linear(20,1)

    def forward(self,x):
        x = torch.ReLU(self.hidden(x))
        x = self.predict(x)
        return  x

# 为每个优化器创建一个net
net_SGD = Net()
net_Momentum = Net()
net_RMSprop = Net()
net_Adam = Net()
nets = [net_SGD,net_Momentum,net_RMSprop,net_Adam]

3.不同的optimizer

　　接下来在创建不同的优化器, 用来训练不同的网络. 并创建一个 loss_func 用来计算误差. 我们用几种常见的优化器, SGD, Momentum, RMSprop, Adam.

# different optimizers
opt_SGD         = torch.optim.SGD(net_SGD.parameters(), lr=LR)
opt_Momentum    = torch.optim.SGD(net_Momentum.parameters(), lr=LR, momentum=0.8)
opt_RMSprop     = torch.optim.RMSprop(net_RMSprop.parameters(), lr=LR, alpha=0.9)
opt_Adam        = torch.optim.Adam(net_Adam.parameters(), lr=LR, betas=(0.9, 0.99))
optimizers = [opt_SGD, opt_Momentum, opt_RMSprop, opt_Adam]

loss_func = torch.nn.MSELoss()
losses_his = [[], [], [], []]   # 记录 training 时不同神经网络的 loss

4.训练出图

    for epoch in range(EPOCH):
        print('Epoch',epoch)
        for step,(b_x,b_y) in enumerate(loader):
            # 对每个优化器, 优化属于他的神经网络
            for net, opt, l_his in zip(nets, optimizers, losses_his):
                output = net(b_x)  # 获得每个网络的输出
                loss = loss_func(output, b_y)  # compute loss for every net
                opt.zero_grad()  # clear gradients for next train
                loss.backward()  # backpropagation, compute gradients
                opt.step()  # apply gradients
                l_his.append(loss.data.numpy())  # loss recoder

    labels = ['SGD', 'MoMENTUM', 'RMSPROP', 'Adam']
    for i, l_his in enumerate(losses_his):
        plt.plot(l_his, label=labels[i])
    plt.legend(loc='best')
    plt.xlabel('Steps')
    plt.ylabel('Loss')
    plt.ylim((0, 0.2))
    plt.show()

完整代码

import torch
import torch.utils.data as tud
import matplotlib.pyplot as plt
import torch.nn.functional as F
torch.manual_seed(1)

LR = 0.01
BATCH_SIZE = 32
EPOCH = 12

######捏一个数据######
x = torch.unsqueeze(torch.linspace(-1,1,1000),dim=1)
y = x.pow(2) + 0.1*torch.normal(torch.zeros(*x.size()))

# plt.scatter(x.numpy(),y.numpy())
# plt.show()

torch_dataset = tud.TensorDataset(x,y)
loader = tud.DataLoader(
    dataset=torch_dataset,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=2
)

# 搭建同样的网络Net
class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.hidden = torch.nn.Linear(1,20)
        self.predict = torch.nn.Linear(20,1)

    def forward(self,x):
        x = F.relu(self.hidden(x))
        x = self.predict(x)
        return  x

if __name__== '__main__':
    # 为每个优化器创建一个net
    net_SGD = Net()
    net_Momentum = Net()
    net_RMSprop = Net()
    net_Adam = Net()
    nets = [net_SGD,net_Momentum,net_RMSprop,net_Adam]

    # different optimizers
    opt_SGD         = torch.optim.SGD(net_SGD.parameters(), lr=LR)
    opt_Momentum    = torch.optim.SGD(net_Momentum.parameters(), lr=LR, momentum=0.8)
    opt_RMSprop     = torch.optim.RMSprop(net_RMSprop.parameters(), lr=LR, alpha=0.9)
    opt_Adam        = torch.optim.Adam(net_Adam.parameters(), lr=LR, betas=(0.9, 0.99))
    optimizers = [opt_SGD, opt_Momentum, opt_RMSprop, opt_Adam]

    loss_func = torch.nn.MSELoss()
    losses_his = [[], [], [], []]   # 记录 training 时不同神经网络的 loss

    for epoch in range(EPOCH):
        print('Epoch',epoch)
        for step,(b_x,b_y) in enumerate(loader):
            # 对每个优化器, 优化属于他的神经网络
            for net, opt, l_his in zip(nets, optimizers, losses_his):
                output = net(b_x)  # 获得每个网络的输出
                loss = loss_func(output, b_y)  # compute loss for every net
                opt.zero_grad()  # clear gradients for next train
                loss.backward()  # backpropagation, compute gradients
                opt.step()  # apply gradients
                l_his.append(loss.data.numpy())  # loss recoder

    labels = ['SGD', 'MoMENTUM', 'RMSPROP', 'Adam']
    for i, l_his in enumerate(losses_his):
        plt.plot(l_his, label=labels[i])
    plt.legend(loc='best')
    plt.xlabel('Steps')
    plt.ylabel('Loss')
    plt.ylim((0, 0.2))
    plt.show()

十二、CNN_classification

1、MINIST手写数据

import torch
import torch.nn as nn
import torch.utils.data as Data
import torchvision      # 数据库模块
import matplotlib.pyplot as plt

torch.manual_seed(1)    # reproducible

# Hyper Parameters
EPOCH = 1           # 训练整批数据多少次, 为了节约时间, 我们只训练一次
BATCH_SIZE = 50
LR = 0.001          # 学习率
DOWNLOAD_MNIST = True  # 如果你已经下载好了mnist数据就写上 False


# Mnist 手写数字
train_data = torchvision.datasets.MNIST(
    root='./mnist/',    # 保存或者提取位置
    train=True,  # this is training data
    transform=torchvision.transforms.ToTensor(),    # 转换 PIL.Image or numpy.ndarray 成
                                                    # torch.FloatTensor (C x H x W), 训练的时候 normalize 成 [0.0, 1.0] 区间
    download=DOWNLOAD_MNIST,          # 没下载就下载, 下载了就不用再下了
)

test_data = torchvision.datasets.MNIST(root='./mnist/', train=False)

# 批训练 50samples, 1 channel, 28x28 (50, 1, 28, 28)
train_loader = Data.DataLoader(dataset=train_data, batch_size=BATCH_SIZE, shuffle=True)

# 为了节约时间, 我们测试时只测试前2000个
test_x = torch.unsqueeze(test_data.test_data, dim=1).type(torch.FloatTensor)[:2000]/255.   # shape from (2000, 28, 28) to (2000, 1, 28, 28), value in range(0,1)
test_y = test_data.test_labels[:2000]

2、CNN模型

和以前一样, 我们用一个 class 来建立 CNN 模型. 这个 CNN 整体流程是

卷积(Conv2d) -> 激励函数(ReLU) -> 池化, 向下采样 (MaxPooling)

-> 再来一遍 -> 展平多维的卷积成的特征图 -> 接入全连接层 (Linear) -> 输出

# CNN模型搭建
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Sequential(  # input shape (1,28,28)
            nn.Conv2d(
                in_channels=1,    # 输入高度
                out_channels=16,  # n_filters 输出高度
                kernel_size=5,    # 卷积核大小  filter size
                stride=1,         # 卷积核步进或者说步长，filter movement/step
                padding=2,        # 如果想要 con2d 出来的图片长宽没有变化, padding=(kernel_size-1)/2 当 stride=1

            ), ## output shape (16, 28, 28)
            nn.ReLU(),            # activation
            nn.MaxPool2d(kernel_size=2), #在2*2 空间里向下采样，output shape (16, 14, 14)
        )
        self.conv2 = nn.Sequential(   # input shape (16,14,14)
            nn.Conv2d(16,32,5,1,2),   # output shape(32,14,14)
            nn.ReLU(),
            nn.MaxPool2d(2),          # output size (32,7,7)
        )
        self.out = nn.Linear(32*7*7,10)  # 全连接层输出10个类
    def forword(self,x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = x.view(x.size(0), -1) # 展平多维的卷积图成 (batch_size, 32 * 7 * 7)
        output = self.out(x)
        return output

cnn = CNN()
print(cnn)

CNN(
  (conv1): Sequential(
    (0): Conv2d(1, 16, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (conv2): Sequential(
    (0): Conv2d(16, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (out): Linear(in_features=1568, out_features=10, bias=True)
)

3、训练，全部代码

下面我们开始训练, 将 x y 都用 Variable 包起来, 然后放入 cnn 中计算 output, 最后再计算误差.下面代码省略了计算精确度 accuracy 的部分

import os
import torch
import torch.nn as nn
import torch.utils.data as Data
import torchvision      # 数据库模块
import matplotlib.pyplot as plt
from matplotlib import cm

torch.manual_seed(1)    # reproducible

# Hyper Parameters
EPOCH = 1           # 训练整批数据多少次, 为了节约时间, 我们只训练一次
BATCH_SIZE = 50
LR = 0.001          # 学习率
DOWNLOAD_MNIST = False  # 如果你已经下载好了mnist数据就写上 False

if not(os.path.exists('./mnist/')) or not os.listdir('./mnist/'):
    # not mnist dir or mnist is empyt dir
    DOWNLOAD_MNIST = True


# Mnist 手写数字
train_data = torchvision.datasets.MNIST(
    root='./mnist/',    # 保存或者提取位置
    train=True,  # this is training data
    transform=torchvision.transforms.ToTensor(),    # 转换 PIL.Image or numpy.ndarray 成
                                                    # torch.FloatTensor (C x H x W), 训练的时候 normalize 成 [0.0, 1.0] 区间
    download=DOWNLOAD_MNIST,          # 没下载就下载, 下载了就不用再下了
)

test_data = torchvision.datasets.MNIST(root='./mnist/', train=False)

# plot one example
print(train_data.data.size())                 # (60000, 28, 28)
print(train_data.targets.size())               # (60000)
plt.imshow(train_data.data[0].numpy(), cmap='gray')
plt.title('%i' % train_data.targets[0])
plt.show()


# 批训练 50samples, 1 channel, 28x28 (50, 1, 28, 28)
train_loader = Data.DataLoader(dataset=train_data, batch_size=BATCH_SIZE, shuffle=True)

# 为了节约时间, 我们测试时只测试前2000个
test_x = torch.unsqueeze(test_data.data, dim=1).type(torch.FloatTensor)[:2000]/255.   # shape from (2000, 28, 28) to (2000, 1, 28, 28), value in range(0,1)
test_y = test_data.targets[:2000]

# CNN模型搭建
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Sequential(  # input shape (1,28,28)
            nn.Conv2d(
                in_channels=1,    # 输入高度
                out_channels=16,  # n_filters 输出高度
                kernel_size=5,    # 卷积核大小  filter size
                stride=1,         # 卷积核步进或者说步长，filter movement/step
                padding=2,        # 如果想要 con2d 出来的图片长宽没有变化, padding=(kernel_size-1)/2 当 stride=1

            ), ## output shape (16, 28, 28)
            nn.ReLU(),            # activation
            nn.MaxPool2d(kernel_size=2), #在2*2 空间里向下采样，output shape (16, 14, 14)
        )
        self.conv2 = nn.Sequential(   # input shape (16,14,14)
            nn.Conv2d(16,32,5,1,2),   # output shape(32,14,14)
            nn.ReLU(),
            nn.MaxPool2d(2),          # output size (32,7,7)
        )
        self.out = nn.Linear(32*7*7,10)  # 全连接层输出10个类

    def forward(self,x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = x.view(x.size(0), -1) # 展平多维的卷积图成 (batch_size, 32 * 7 * 7)
        output = self.out(x)
        return output, x


def train_save():
    cnn = CNN()
    print(cnn)

    optimizer = torch.optim.Adam(cnn.parameters(), lr=LR) # 优化整一个CNN的参数
    loss_func = nn.CrossEntropyLoss()

    try:
        from sklearn.manifold import TSNE;HAS_SK = True
    except:
        HAS_SK = False;
        print('Please install sklearn for layer visualization')

    def plot_with_labels(lowDWeights, labels):
        plt.cla()
        X, Y = lowDWeights[:, 0], lowDWeights[:, 1]
        for x, y, s in zip(X, Y, labels):
            c = cm.rainbow(int(255 * s / 9));
            plt.text(x, y, s, backgroundcolor=c, fontsize=9)
        plt.xlim(X.min(), X.max());
        plt.ylim(Y.min(), Y.max());
        plt.title('Visualize last layer');
        plt.show();
        plt.pause(0.01)

    plt.ion()

    # training and testing
    for epoch in range(EPOCH):
        for step, (b_x, b_y) in enumerate(train_loader):   # gives batch data, normalize x when iterate train_loader

            output = cnn(b_x)[0]            # cnn output
            loss = loss_func(output, b_y)   # cross entropy loss
            optimizer.zero_grad()           # clear gradients for this training step
            loss.backward()                 # backpropagation, compute gradients
            optimizer.step()                # apply gradients

            if step % 50 == 0:
                test_output, last_layer = cnn(test_x)
                pred_y = torch.max(test_output, 1)[1].data.numpy()
                accuracy = float((pred_y == test_y.data.numpy()).astype(int).sum()) / float(test_y.size(0))
                print('Epoch: ', epoch, '| train loss: %.4f' % loss.data.numpy(), '| test accuracy: %.2f' % accuracy)
                if HAS_SK:
                    # Visualization of trained flatten layer (T-SNE)
                    tsne = TSNE(perplexity=30, n_components=2, init='pca', n_iter=5000)
                    plot_only = 500
                    low_dim_embs = tsne.fit_transform(last_layer.data.numpy()[:plot_only, :])
                    labels = test_y.numpy()[:plot_only]
                    plot_with_labels(low_dim_embs, labels)
    plt.ioff()

    torch.save(cnn, './mnist_net/cnn_classification_net.pkl')  # 保存整个网络
    torch.save(cnn.state_dict(), './mnist_net/cnn_classification_net_params.pkl')  # 只保存网络中的参数 (速度快, 占内存少)

    """
    ...
    Epoch:  0 | train loss: 0.0306 | test accuracy: 0.97
    Epoch:  0 | train loss: 0.0147 | test accuracy: 0.98
    Epoch:  0 | train loss: 0.0427 | test accuracy: 0.98
    Epoch:  0 | train loss: 0.0078 | test accuracy: 0.98
    """

    # print 10 predictions from test data
    test_output, _ = cnn(test_x[:10])
    pred_y = torch.max(test_output, 1)[1].data.numpy()
    print(pred_y, 'prediction number')
    print(test_y[:10].numpy(), 'real number')
    """
    [7 2 1 0 4 1 4 9 5 9] prediction number
    [7 2 1 0 4 1 4 9 5 9] real number
    """


# 这种方式将会提取整个神经网络, 网络大的时候可能会比较慢.
def restore_net():
    # restore entire net1 to net2
    net2 = torch.load('./mnist_net/cnn_classification_net.pkl')
    test_output, _ = net2(test_x[:20])
    pred_y = torch.max(test_output, 1)[1].data.numpy()
    print(pred_y, 'prediction number')
    print(test_y[:20].numpy(), 'real number')

if __name__ == '__main__':
    train = True
    if train:
        train_save()
    else:
        restore_net()

十三、RNN_classification

1、MINIST手写数据

import os
import torch
import torch.nn as nn
import torch.utils.data as Data
import torchvision      # 数据库模块
import matplotlib.pyplot as plt
from matplotlib import cm
import torchvision.transforms as transforms
torch.manual_seed(1)    # reproducible

# Hyper Parameters
EPOCH = 1           # 训练整批数据多少次, 为了节约时间, 我们只训练一次
BATCH_SIZE = 64
TIME_STEP = 28
INPUT_SIZE = 28
LR = 0.01          # 学习率
DOWNLOAD_MNIST = False  # 如果你已经下载好了mnist数据就写上 False

if not(os.path.exists('./mnist/')) or not os.listdir('./mnist/'):
    # not mnist dir or mnist is empyt dir
    DOWNLOAD_MNIST = True


# Mnist 手写数字
train_data = torchvision.datasets.MNIST(
    root='./mnist/',    # 保存或者提取位置
    train=True,  # this is training data
    transform=torchvision.transforms.ToTensor(),    # 转换 PIL.Image or numpy.ndarray 成
                                                    # torch.FloatTensor (C x H x W), 训练的时候 normalize 成 [0.0, 1.0] 区间
    download=DOWNLOAD_MNIST,          # 没下载就下载, 下载了就不用再下了
)

test_data = torchvision.datasets.MNIST(root='./mnist/', train=False, transform=transforms.ToTensor())

# 批训练 50samples, 1 channel, 28x28 (50, 1, 28, 28)
train_loader = Data.DataLoader(dataset=train_data, batch_size=BATCH_SIZE, shuffle=True)

# 为了节约时间, 我们测试时只测试前2000个
test_x = torch.unsqueeze(test_data.data, dim=1).type(torch.FloatTensor)[:2000]/255.   # shape from (2000, 28, 28) to (2000, 1, 28, 28), value in range(0,1)
# test_x = test_data.test_data.type(torch.FloatTensor)[:2000]/255. 
test_y = test_data.targets[:2000]

2.RNN模型

和以前一样, 我们用一个 class 来建立 RNN 模型. 这个 RNN 整体流程是

(input0, state0) -> LSTM -> (output0, state1);
(input1, state1) -> LSTM -> (output1, state2);
…
(inputN, stateN)-> LSTM -> (outputN, stateN+1);
outputN -> Linear -> prediction. 通过LSTM分析每一时刻的值, 并且将这一时刻和前面时刻的理解合并在一起, 生成当前时刻对前面数据的理解或记忆. 传递这种理解给下一时刻分析.

class RNN(nn.Module):
    def __init__(self):
        super(RNN, self).__init__()
        self.rnn = nn.LSTM(  # input shape (1,28,28)LSTM 效果要比 nn.RNN() 好多了
            input_size=28,      # 图片每行的数据像素点
            hidden_size=64,     # rnn hidden unit
            num_layers=1,       # 有几层 RNN layers
            batch_first=True,   # input & output 会是以 batch size 为第一维度的特征集 e.g. (batch, time_step, input_size)
        )
        self.out = nn.Linear(64, 10)  # 输出层

    def forward(self,x):
        # x shape (batch, time_step, input_size)
        # r_out shape (batch, time_step, output_size)
        # h_n shape (n_layers, batch, hidden_size)   LSTM 有两个 hidden states, h_n 是分线, h_c 是主线
        # h_c shape (n_layers, batch, hidden_size)
        r_out, (h_n, h_c) = self.rnn(x, None) ## None 表示 hidden state 会用全0的 state
        # 选取最后一个时间点的 r_out 输出
        # 这里 r_out[:, -1, :] 的值也是 h_n 的值
        out = self.out(r_out[:, -1, :])
        return out

3、训练&完整代码

我们将图片数据看成一个时间上的连续数据, 每一行的像素点都是这个时刻的输入, 读完整张图片就是从上而下的读完了每行的像素点. 然后我们就可以拿出 RNN 在最后一步的分析值判断图片是哪一类了.

import os
import torch
import torch.nn as nn
import torch.utils.data as Data
import torchvision      # 数据库模块
import matplotlib.pyplot as plt
from matplotlib import cm
import torchvision.transforms as transforms
torch.manual_seed(1)    # reproducible

# Hyper Parameters
EPOCH = 1           # 训练整批数据多少次, 为了节约时间, 我们只训练一次
BATCH_SIZE = 64
TIME_STEP = 28
INPUT_SIZE = 28
LR = 0.01          # 学习率
DOWNLOAD_MNIST = False  # 如果你已经下载好了mnist数据就写上 False

if not(os.path.exists('./mnist/')) or not os.listdir('./mnist/'):
    # not mnist dir or mnist is empyt dir
    DOWNLOAD_MNIST = True


# Mnist 手写数字
train_data = torchvision.datasets.MNIST(
    root='./mnist/',    # 保存或者提取位置
    train=True,  # this is training data
    transform=torchvision.transforms.ToTensor(),    # 转换 PIL.Image or numpy.ndarray 成
                                                    # torch.FloatTensor (C x H x W), 训练的时候 normalize 成 [0.0, 1.0] 区间
    download=DOWNLOAD_MNIST,          # 没下载就下载, 下载了就不用再下了
)

test_data = torchvision.datasets.MNIST(root='./mnist/', train=False, transform=transforms.ToTensor())

# 批训练 50samples, 1 channel, 28x28 (50, 1, 28, 28)
train_loader = Data.DataLoader(dataset=train_data, batch_size=BATCH_SIZE, shuffle=True)

# 为了节约时间, 我们测试时只测试前2000个
test_x = test_data.data.type(torch.FloatTensor)[:2000]/255. # shape from (2000, 28, 28) to (2000, 1, 28, 28), value in range(0,1)
test_y = test_data.targets.numpy()[:2000]

# CNN模型搭建
class RNN(nn.Module):
    def __init__(self):
        super(RNN, self).__init__()
        self.rnn = nn.LSTM(  # input shape (1,28,28)LSTM 效果要比 nn.RNN() 好多了
            input_size=28,      # 图片每行的数据像素点
            hidden_size=64,     # rnn hidden unit
            num_layers=1,       # 有几层 RNN layers
            batch_first=True,   # input & output 会是以 batch size 为第一维度的特征集 e.g. (batch, time_step, input_size)
        )
        self.out = nn.Linear(64, 10)  # 输出层

    def forward(self,x):
        # x shape (batch, time_step, input_size)
        # r_out shape (batch, time_step, output_size)
        # h_n shape (n_layers, batch, hidden_size)   LSTM 有两个 hidden states, h_n 是分线, h_c 是主线
        # h_c shape (n_layers, batch, hidden_size)
        r_out, (h_n, h_c) = self.rnn(x, None) ## None 表示 hidden state 会用全0的 state
        # 选取最后一个时间点的 r_out 输出
        # 这里 r_out[:, -1, :] 的值也是 h_n 的值
        out = self.out(r_out[:, -1, :])
        return out

def train_save():
    rnn = RNN()
    print(rnn)

    optimizer = torch.optim.Adam(rnn.parameters(), lr=LR) # 优化整一个CNN的参数
    loss_func = nn.CrossEntropyLoss()

    # training and testing
    for epoch in range(EPOCH):
        for step, (b_x, b_y) in enumerate(train_loader):   # gives batch data, normalize x when iterate train_loader

            b_x = b_x.view(-1, 28, 28)      # reshape x to (batch, time_step, input_size)

            output = rnn(b_x)           # cnn output
            loss = loss_func(output, b_y)   # cross entropy loss
            optimizer.zero_grad()           # clear gradients for this training step
            loss.backward()                 # backpropagation, compute gradients
            optimizer.step()                # apply gradients

            if step % 50 == 0:
                test_output = rnn(test_x)   # (samples, time_step, input_size)
                pred_y = torch.max(test_output, 1)[1].data.numpy()

                accuracy = float((pred_y == test_y).astype(int).sum()) / float(test_y.size)
                print('Epoch: ', epoch, '| train loss: %.4f' % loss.data.numpy(), '| test accuracy: %.2f' % accuracy)


    torch.save(rnn, './mnist_net/rnn_classification_net.pkl')  # 保存整个网络
    torch.save(rnn.state_dict(), './mnist_net/rnn_classification_net_params.pkl')  # 只保存网络中的参数 (速度快, 占内存少)

    """
    ...
    Epoch:  0 | train loss: 0.0306 | test accuracy: 0.97
    Epoch:  0 | train loss: 0.0147 | test accuracy: 0.98
    Epoch:  0 | train loss: 0.0427 | test accuracy: 0.98
    Epoch:  0 | train loss: 0.0078 | test accuracy: 0.98
    """

    # print 10 predictions from test data
    test_output = rnn(test_x[:10].view(-1, 28, 28))
    pred_y = torch.max(test_output, 1)[1].data.numpy()
    print(pred_y, 'prediction number')
    print(test_y[:10], 'real number')
    """
    [7 2 1 0 4 1 4 9 5 9] prediction number
    [7 2 1 0 4 1 4 9 5 9] real number
    """


# 这种方式将会提取整个神经网络, 网络大的时候可能会比较慢.
def restore_net():
    # restore entire net1 to net2
    net2 = torch.load('./mnist_net/rnn_classification_net.pkl')
    test_output = net2(test_x[:100].view(-1, 28, 28))
    pred_y = torch.max(test_output, 1)[1].data.numpy()
    accuracy = float((pred_y == test_y[:100]).astype(int).sum()) / 100
    print(pred_y, 'prediction number')
    print(test_y[:100], 'real number')
    print('accuracy: ',accuracy)


if __name__ == '__main__':
    GO_train = False    # False表示不训练直接调用训练好的模型，True表示训练
    if GO_train:
        train_save()
    else:
        restore_net()

十四、RNN_regression

用 RNN 来及时预测时间序列

1、训练数据

用 sin 的曲线预测出 cos 的曲线

import torch
from torch import nn
import numpy as np
import matplotlib.pyplot as plt

torch.manual_seed(1)

# 超参数
TIME_STEP = 10      # rnn时间步长/图像高度
INPUT_SIZE = 1      # rnn输入大小/图像宽度
LR = 0.02           # 学习率

# 显示数据
steps = np.linspace(0, np.pi*2, 100, dtype=np.float32)  # float32 for converting torch FloatTensor
x_np = np.sin(steps)
y_np = np.cos(steps)
plt.plot(steps, y_np, 'r-', label = 'target(cos)')
plt.plot(steps, x_np, 'b-', label = 'input(sin)')
plt.legend(loc='best')
plt.show()

2、RNN网络

class RNN(nn.Module):
    def __init__(self):
        super(RNN, self).__init__()
        self.rnn = nn.RNN(   # 一个普通的RNN就能胜任
            input_size=INPUT_SIZE,
            hidden_size=32,
            num_layers=1,
            batch_first=True,
        )
        self.out = nn.Linear(32, 1)

    def forward(self, x, h_state):    # 因为 hidden state 是连续的, 所以我们要一直传递这一个 state
        # x (batch, time_step, input_size)
        # h_state (n_layers, batch, hidden_size)
        # r_out (batch, time_step, output_size)
        r_out, h_state = self.rnn(x, h_state)  # h_state 也要做为RNN的输入  这次具有时间序列特征
        outs = []  # 保存所有时间点的预测值
        for time_step in range(r_out.size(1)):  # 对每一个时间点计算 output
            outs.append(self.out(r_out[:, time_step, :]))
        return torch.stack(outs, dim=1), h_state

    # 其实熟悉RNN的朋友应该知道, forward过程中的对每个时间点求输出还有一招使得计算量比较小的.不过上面的内容主要是为了呈现
    # PyTorch在动态构图上的优势, 所以我用了一个for loop 来搭建那套输出系统.下面介绍一个替换方式.使用 reshape 的方式整批计算.
    # def forward(self, x, h_state):
    #     r_out, h_state = self.rnn(x, h_state)
    #     r_out = r_out.view(-1, 32)
    #     outs = self.out(r_out)
    #     return outs.view(-1, 32, TIME_STEP), h_state

rnn = RNN()
print(rnn)
"""
RNN(
  (rnn): RNN(1, 32, batch_first=True)
  (out): Linear(in_features=32, out_features=1, bias=True)
)
"""

3、训练

可以看出, 我们使用 x 作为输入的 sin 值, 然后 y 作为想要拟合的输出, cos 值. 因为他们两条曲线是存在某种关系的, 所以我们就能用 sin 来预测 cos. rnn 会理解他们的关系, 并用里面的参数分析出来这个时刻 sin 曲线上的点如何对应上 cos 曲线上的点.

optimizer = torch.optim.Adam(rnn.parameters(), lr=LR)   # optimize all rnn parameters
loss_func = nn.MSELoss()

h_state = None   # 要使用初始 hidden state, 可以设成 None

for step in range(100):
    start, end = step * np.pi, (step+1)*np.pi   # time steps
    # sin 预测 cos
    steps = np.linspace(start, end, TIME_STEP, dtype=np.float32, endpoint=False)  # float32 for converting torch FloatTensor
    x_np = np.sin(steps)    # float32 for converting torch FloatTensor
    y_np = np.cos(steps)

    # 原来只有一维的数据，利用np.newaxis，np.newaxis的作用就是在这一位置增加一个一维，这一位置指的是np.newaxis所在的位置
    x = torch.from_numpy(x_np[np.newaxis, :, np.newaxis])    # shape (batch, time_step, input_size)  （1，10，1）
    y = torch.from_numpy(y_np[np.newaxis, :, np.newaxis])

    prediction, h_state = rnn(x, h_state)   # rnn 对于每个 step 的 prediction, 还有最后一个 step 的 h_state
    # !!  下一步十分重要 !!
    h_state = h_state.data  # 要把 h_state 重新包装一下才能放入下一个 iteration, 不然会报错

    loss = loss_func(prediction, y)     # cross entropy loss
    optimizer.zero_grad()               # clear gradients for this training step
    loss.backward()                     # backpropagation, compute gradients
    optimizer.step()                    # apply gradients
    # plotting
    plt.plot(steps, y_np.flatten(), 'r-')
    plt.plot(steps, prediction.data.numpy().flatten(), 'b-')
    plt.draw(); plt.pause(0.05)

plt.ioff()
plt.show()

4、完整代码

import torch
from torch import nn
import numpy as np
import matplotlib.pyplot as plt

torch.manual_seed(1)

# 超参数
TIME_STEP = 10      # rnn时间步长/图像高度
INPUT_SIZE = 1      # rnn输入大小/图像宽度
LR = 0.02           # 学习率

# 显示数据
steps = np.linspace(0, np.pi*2, 100, dtype=np.float32)  # float32 for converting torch FloatTensor
x_np = np.sin(steps)
y_np = np.cos(steps)

# plt.plot(steps, y_np, 'r-', label = 'target(cos)')
# plt.plot(steps, x_np, 'b-', label = 'input(sin)')
# plt.legend(loc='best')
# plt.show()

class RNN(nn.Module):
    def __init__(self):
        super(RNN, self).__init__()
        self.rnn = nn.RNN(   # 一个普通的RNN就能胜任
            input_size=INPUT_SIZE,
            hidden_size=32,
            num_layers=1,
            batch_first=True,
        )
        self.out = nn.Linear(32, 1)

    def forward(self, x, h_state):    # 因为 hidden state 是连续的, 所以我们要一直传递这一个 state
        # x (batch, time_step, input_size)
        # h_state (n_layers, batch, hidden_size)
        # r_out (batch, time_step, output_size)
        r_out, h_state = self.rnn(x, h_state)  # h_state 也要做为RNN的输入  这次具有时间序列特征
        outs = []  # 保存所有时间点的预测值
        for time_step in range(r_out.size(1)):  # 对每一个时间点计算 output
            outs.append(self.out(r_out[:, time_step, :]))
        return torch.stack(outs, dim=1), h_state

    # 其实熟悉RNN的朋友应该知道, forward过程中的对每个时间点求输出还有一招使得计算量比较小的.不过上面的内容主要是为了呈现
    # PyTorch在动态构图上的优势, 所以我用了一个for loop 来搭建那套输出系统.下面介绍一个替换方式.使用 reshape 的方式整批计算.
    # def forward(self, x, h_state):
    #     r_out, h_state = self.rnn(x, h_state)
    #     r_out = r_out.view(-1, 32)
    #     outs = self.out(r_out)
    #     return outs.view(-1, 32, TIME_STEP), h_state

rnn = RNN()
print(rnn)
"""
RNN(
  (rnn): RNN(1, 32, batch_first=True)
  (out): Linear(in_features=32, out_features=1, bias=True)
)
"""

optimizer = torch.optim.Adam(rnn.parameters(), lr=LR)   # optimize all rnn parameters
loss_func = nn.MSELoss()

h_state = None   # 要使用初始 hidden state, 可以设成 None

for step in range(100):
    start, end = step * np.pi, (step+1)*np.pi   # time steps
    # sin 预测 cos
    steps = np.linspace(start, end, TIME_STEP, dtype=np.float32, endpoint=False)  # float32 for converting torch FloatTensor
    x_np = np.sin(steps)    # float32 for converting torch FloatTensor
    y_np = np.cos(steps)

    # 原来只有一维的数据，利用np.newaxis，np.newaxis的作用就是在这一位置增加一个一维，这一位置指的是np.newaxis所在的位置
    x = torch.from_numpy(x_np[np.newaxis, :, np.newaxis])    # shape (batch, time_step, input_size)  （1，10，1）
    y = torch.from_numpy(y_np[np.newaxis, :, np.newaxis])

    prediction, h_state = rnn(x, h_state)   # rnn 对于每个 step 的 prediction, 还有最后一个 step 的 h_state
    # !!  下一步十分重要 !!
    h_state = h_state.data  # 要把 h_state 重新包装一下才能放入下一个 iteration, 不然会报错

    loss = loss_func(prediction, y)     # cross entropy loss
    optimizer.zero_grad()               # clear gradients for this training step
    loss.backward()                     # backpropagation, compute gradients
    optimizer.step()                    # apply gradients
    # plotting
    plt.plot(steps, y_np.flatten(), 'r-')
    plt.plot(steps, prediction.data.numpy().flatten(), 'b-')
    plt.draw(); plt.pause(0.05)

plt.ioff()
plt.show()

十五、GAN生成对抗网络

https://mofanpy.com/tutorials/machine-learning/torch//intro-GAN/

https://mofanpy.com/tutorials/machine-learning/torch/GAN/

　　我的一句话介绍 GAN 就是: Generator 是新手画家, Discriminator 是新手鉴赏家, 你是高级鉴赏家. 你将著名画家的品和新手画家的作品都给新手鉴赏家评定, 并告诉新手鉴赏家哪些是新手画家画的, 哪些是著名画家画的, 新手鉴赏家就慢慢学习怎么区分新手画家和著名画家的画, 但是新手画家和新手鉴赏家是好朋友, 新手鉴赏家会告诉新手画家要怎么样画得更像著名画家, 新手画家就能将自己的突然来的灵感 (random noise) 画得更像著名画家。

　　下面是本节内容的效果, 绿线的变化是新手画家慢慢学习如何踏上画家之路的过程. 而能被认定为著名的画作在 upper bound 和 lower bound 之间.。

1、超参数设定

　　新手画家 (Generator) 在作画的时候需要有一些灵感 (random noise), 我们这些灵感的个数定义为 N_IDEAS. 而一幅画需要有一些规格, 我们将这幅画的画笔数定义一下, N_COMPONENTS 就是一条一元二次曲线(这幅画画)上的点个数. 为了进行批训练, 我们将一整批话的点都规定一下(PAINT_POINTS).

import torch
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt

torch.manual_seed(1)
np.random.seed(1)

# 超参数
BATCH_SIZE = 64
LR_G = 0.0001
LR_D = 0.0001
N_IDEAS = 5 # think of this as number of ideas for generating an art work (Generator)
ART_COMPONENTS = 15   # 一条一元二次曲线(这幅画画)上的点个数 it could be total point G can draw in the canvas
PAINT_POINTS = np.vstack([np.linspace(-1,1,ART_COMPONENTS) for  _ in range(BATCH_SIZE)])

2、来自著名艺术家的绘画（真实目标）

def artist_works():   # 来自著名艺术家的绘画（真实目标）
    a = np.random.uniform(1, 2, size=BATCH_SIZE)[:, np.newaxis]
    paintings = a * np.power(PAINT_POINTS, 2) + (a-1)
    paintings = torch.from_numpy(paintings).float()
    return paintings

3、GAN网络

这里会创建两个神经网络, 分别是 Generator (新手画家), Discriminator(新手鉴赏家). G 会拿着自己的一些灵感当做输入, 输出一元二次曲线上的点 (G 的画).

D 会接收一幅画作 (一元二次曲线), 输出这幅画作到底是不是著名画家的画(是著名画家的画的概率).

G = nn.Sequential(                      # Generator
    nn.Linear(N_IDEAS, 128),            # random ideas (could from normal distribution)
    nn.ReLU(),
    nn.Linear(128, ART_COMPONENTS),     # making a painting from these random ideas
)

D = nn.Sequential(                      # Discriminator
    nn.Linear(ART_COMPONENTS, 128),     # receive art work either from the famous artist or a newbie like G
    nn.ReLU(),
    nn.Linear(128, 1),
    nn.Sigmoid(),                       # tell the probability that the art work is made by artist
)

opt_D = torch.optim.Adam(D.parameters(), lr=LR_D)
opt_G = torch.optim.Adam(G.parameters(), lr=LR_G)
# 有弹幕说 RMSprop 比较好

4、训练

接着我们来同时训练 D 和 G. 训练之前, 我们来看看G作画的原理. G 首先会有些灵感, G_ideas 就会拿到这些随机灵感 (可以是正态分布的随机数), 然后 G 会根据这些灵感画画. 接着我们拿着著名画家的画和 G 的画, 让 D 来判定这两批画作是著名画家画的概率

for step in range(10000):
    artist_paintings = artist_works()  # real painting from artist
    G_ideas = torch.randn(BATCH_SIZE, N_IDEAS, requires_grad=True)  # random ideas\n
    G_paintings = G(G_ideas)  # fake painting from G (random ideas)
    prob_artist1 = D(G_paintings)  # D try to reduce this prob
    G_loss = torch.mean(torch.log(1. - prob_artist1))
    opt_G.zero_grad()
    G_loss.backward()
    opt_G.step()

    prob_artist0 = D(artist_paintings)  # D try to increase this prob   D尝试增加这个概率
    prob_artist1 = D(G_paintings.detach())  # D try to reduce this prob D尝试减少这个概率

然后计算有多少来之画家的画猜对了, 有多少来自 G 的画猜对了, 我们想最大化这些猜对的次数. 这也就是 log(D(x)) + log(1-D(G(z)) 在论文中的形式. 而因为 torch 中提升参数的形式是最小化误差, 那我们把最大化 score 转换成最小化 loss, 在两个 score 的合的地方加一个符号就好. 而 G 的提升就是要减小 D 猜测 G 生成数据的正确率, 也就是减小 D_score1.

    D_loss = - torch.mean(torch.log(prob_artist0) + torch.log(1. - prob_artist1))
    G_loss = torch.mean(torch.log(1. - prob_artist1))

最后我们在根据 loss 提升神经网络就好了.

    opt_D.zero_grad()
    D_loss.backward(retain_graph=True)      # retain_graph 这个参数是为了再次使用计算图纸
    opt_D.step()

    opt_G.zero_grad()
    G_loss.backward()
    opt_G.step()

5、完整代码

import torch
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt

torch.manual_seed(1)
np.random.seed(1)

# 超参数
BATCH_SIZE = 64
LR_G = 0.0001
LR_D = 0.0001
N_IDEAS = 5 # think of this as number of ideas for generating an art work (Generator)
ART_COMPONENTS = 15   # 一条一元二次曲线(这幅画画)上的点个数 it could be total point G can draw in the canvas
PAINT_POINTS = np.vstack([np.linspace(-1,1,ART_COMPONENTS) for  _ in range(BATCH_SIZE)])

# show our beautiful painting range
# plt.plot(PAINT_POINTS[0], 2 * np.power(PAINT_POINTS[0], 2) + 1, c='#74BCFF', lw=3, label='upper bound')
# plt.plot(PAINT_POINTS[0], 1 * np.power(PAINT_POINTS[0], 2) + 0, c='#FF9359', lw=3, label='lower bound')
# plt.legend(loc='upper right')
# plt.show()

def artist_works():   # 来自著名艺术家的绘画（真实目标）
    a = np.random.uniform(1, 2, size=BATCH_SIZE)[:, np.newaxis]
    paintings = a * np.power(PAINT_POINTS, 2) + (a-1)
    paintings = torch.from_numpy(paintings).float()
    return paintings


G = nn.Sequential(                      # Generator
    nn.Linear(N_IDEAS, 128),            # random ideas (could from normal distribution)
    nn.ReLU(),
    nn.Linear(128, ART_COMPONENTS),     # making a painting from these random ideas
)

D = nn.Sequential(                      # Discriminator
    nn.Linear(ART_COMPONENTS, 128),     # receive art work either from the famous artist or a newbie like G
    nn.ReLU(),
    nn.Linear(128, 1),
    nn.Sigmoid(),                       # tell the probability that the art work is made by artist
)

opt_D = torch.optim.Adam(D.parameters(), lr=LR_D)
opt_G = torch.optim.Adam(G.parameters(), lr=LR_G)
# 有弹幕说 RMSprop 比较好

plt.ion()

for step in range(10000):
    artist_paintings = artist_works()  # real painting from artist
    G_ideas = torch.randn(BATCH_SIZE, N_IDEAS, requires_grad=True)  # random ideas\n
    G_paintings = G(G_ideas)  # fake painting from G (random ideas)
    prob_artist1 = D(G_paintings)  # D try to reduce this prob
    G_loss = torch.mean(torch.log(1. - prob_artist1))
    opt_G.zero_grad()
    G_loss.backward()
    opt_G.step()

    prob_artist0 = D(artist_paintings)  # D try to increase this prob   D尝试增加这个概率
    prob_artist1 = D(G_paintings.detach())  # D try to reduce this prob D尝试减少这个概率
    D_loss = - torch.mean(torch.log(prob_artist0) + torch.log(1. - prob_artist1))
    opt_D.zero_grad()
    D_loss.backward(retain_graph=True)  # reusing computational graph
    opt_D.step()

    if step % 50 == 0:  # plotting
        plt.cla()
        plt.plot(PAINT_POINTS[0], G_paintings.data.numpy()[0], c='#4AD631', lw=3, label='Generated painting', )
        plt.plot(PAINT_POINTS[0], 2 * np.power(PAINT_POINTS[0], 2) + 1, c='#74BCFF', lw=3, label='upper bound')
        plt.plot(PAINT_POINTS[0], 1 * np.power(PAINT_POINTS[0], 2) + 0, c='#FF9359', lw=3, label='lower bound')
        plt.text(-.5, 2.3, 'D accuracy=%.2f (0.5 for D to converge)' % prob_artist0.data.numpy().mean(),
                 fontdict={'size': 13})
        plt.text(-.5, 2, 'D score= %.2f (-1.38 for G to converge)' % -D_loss.data.numpy(), fontdict={'size': 13})
        plt.ylim((0, 3));
        plt.legend(loc='upper right', fontsize=10);
        plt.draw();
        plt.pause(0.01)

plt.ioff()
plt.show()

十六、GPU 加速运算

import os
import torch
import torch.nn as nn
import torch.utils.data as Data
import torchvision      # 数据库模块

torch.manual_seed(1)    # reproducible

os.environ["CUDA_VISIBLE_DEVICES"] = "0"  # 使用编号为1，2号的GPU

# Hyper Parameters
EPOCH = 1           # 训练整批数据多少次, 为了节约时间, 我们只训练一次
BATCH_SIZE = 50
LR = 0.001          # 学习率
DOWNLOAD_MNIST = False  # 如果你已经下载好了mnist数据就写上 False

if not(os.path.exists('./mnist/')) or not os.listdir('./mnist/'):
    # not mnist dir or mnist is empyt dir
    DOWNLOAD_MNIST = True


# Mnist 手写数字
train_data = torchvision.datasets.MNIST(
    root='./mnist/',    # 保存或者提取位置
    train=True,  # this is training data
    transform=torchvision.transforms.ToTensor(),    # 转换 PIL.Image or numpy.ndarray 成
                                                    # torch.FloatTensor (C x H x W), 训练的时候 normalize 成 [0.0, 1.0] 区间
    download=DOWNLOAD_MNIST,          # 没下载就下载, 下载了就不用再下了
)

test_data = torchvision.datasets.MNIST(root='./mnist/', train=False)


# 批训练 50samples, 1 channel, 28x28 (50, 1, 28, 28)
train_loader = Data.DataLoader(dataset=train_data, batch_size=BATCH_SIZE, shuffle=True)

# 为了节约时间, 我们测试时只测试前2000个

# ############# 这里加cuda ###############
test_x = torch.unsqueeze(test_data.data, dim=1).type(torch.FloatTensor)[:2000].cuda()/255.   # shape from (2000, 28, 28) to (2000, 1, 28, 28), value in range(0,1)
test_y = test_data.targets[:2000].cuda()

# CNN模型搭建
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Sequential(  # input shape (1,28,28)
            nn.Conv2d(
                in_channels=1,    # 输入高度
                out_channels=16,  # n_filters 输出高度
                kernel_size=5,    # 卷积核大小  filter size
                stride=1,         # 卷积核步进或者说步长，filter movement/step
                padding=2,        # 如果想要 con2d 出来的图片长宽没有变化, padding=(kernel_size-1)/2 当 stride=1

            ), ## output shape (16, 28, 28)
            nn.ReLU(),            # activation
            nn.MaxPool2d(kernel_size=2), #在2*2 空间里向下采样，output shape (16, 14, 14)
        )
        self.conv2 = nn.Sequential(   # input shape (16,14,14)
            nn.Conv2d(16,32,5,1,2),   # output shape(32,14,14)
            nn.ReLU(),
            nn.MaxPool2d(2),          # output size (32,7,7)
        )
        self.out = nn.Linear(32*7*7,10)  # 全连接层输出10个类

    def forward(self,x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = x.view(x.size(0), -1) # 展平多维的卷积图成 (batch_size, 32 * 7 * 7)
        output = self.out(x)
        return output, x


def train_save():
    cnn = CNN()
    # ############# 这里加cuda ###############
    cnn.cuda()###将所有模型参数和缓冲区转移到GPU

    print(cnn)

    optimizer = torch.optim.Adam(cnn.parameters(), lr=LR) # 优化整一个CNN的参数
    loss_func = nn.CrossEntropyLoss()



    # training and testing
    for epoch in range(EPOCH):
        for step, (b_x, b_y) in enumerate(train_loader):   # gives batch data, normalize x when iterate train_loader
            # !!!!!!!! 这里有修改 !!!!!!!!! #
            b_x = b_x.cuda()   # Tensor on GPU
            b_y = b_y.cuda()   # Tensor on GPU

            output = cnn(b_x)[0]            # cnn output
            loss = loss_func(output, b_y)   # cross entropy loss
            optimizer.zero_grad()           # clear gradients for this training step
            loss.backward()                 # backpropagation, compute gradients
            optimizer.step()                # apply gradients

            if step % 50 == 0:
                test_output, last_layer = cnn(test_x)

                # !!!!!!!! 这里有修改 !!!!!!!!! #
                pred_y = torch.max(test_output, 1)[1].cuda().data   # 将操作放去 GPU
                accuracy =  torch.sum(pred_y == test_y).type(torch.FloatTensor) / test_y.size(0)
                print('Epoch: ', epoch, '| train loss: %.4f' % loss.data.cpu().numpy(), '| test accuracy: %.2f' % accuracy)


    torch.save(cnn, './mnist_net/cnn_classification_net.pkl')  # 保存整个网络
    torch.save(cnn.state_dict(), './mnist_net/cnn_classification_net_params.pkl')  # 只保存网络中的参数 (速度快, 占内存少)

    """
    ...
    Epoch:  0 | train loss: 0.0306 | test accuracy: 0.97
    Epoch:  0 | train loss: 0.0147 | test accuracy: 0.98
    Epoch:  0 | train loss: 0.0427 | test accuracy: 0.98
    Epoch:  0 | train loss: 0.0078 | test accuracy: 0.98
    """

    # print 10 predictions from test data
    test_output, _ = cnn(test_x[:10])

    # !!!!!!!! 这里有修改 !!!!!!!!! #
    pred_y = torch.max(test_output, 1)[1].cuda().data
    print(pred_y, 'prediction number')
    print(test_y[:10], 'real number')
    """
    [7 2 1 0 4 1 4 9 5 9] prediction number
    [7 2 1 0 4 1 4 9 5 9] real number
    """


# 这种方式将会提取整个神经网络, 网络大的时候可能会比较慢.
def restore_net():
    # restore entire net1 to net2
    net2 = torch.load('./mnist_net/cnn_classification_net.pkl')
    net2.cuda()
    test_output, _ = net2(test_x[:20])
    pred_y = torch.max(test_output, 1)[1].cuda().data
    print(pred_y, 'prediction number')
    print(test_y[:20], 'real number')

if __name__ == '__main__':
    GO_train = False
    if GO_train:
        train_save()
    else:
        restore_net()

posted @ 2020-10-20 21:44 薄书阅读(397) 评论(0) 编辑收藏举报

刷新页面返回顶部

薄书C8H16NaO2

莫烦pytorch学习记录

一、PyTorch是什么？

二、Numpy与Torch之间的转换

三、Torch中的数学运算与numpy的对比

API手册

四、Variable

variable的计算

获取Variable里面的数据

五、常用几种激励函数及图像

六、线性拟合回归

搭建网络

开始训练

可视化训练过程

七、区分类型 (分类)

捏个数据

搭个网络

训练网络

可视化

八、快速搭建法

九、保存与提取网络

保存

提取网络

只提取网络参数

完整代码并查看三个网络模型的结果

十、批训练

数据分批

分批训练

十一、Optimizer 优化器

1.捏一个数据

2.建立同样的网络

3.不同的optimizer

4.训练出图

完整代码

十二、CNN_classification

1、MINIST手写数据

2、CNN模型

3、训练，全部代码

1、MINIST手写数据

2.RNN模型

3、训练&完整代码

十四、RNN_regression

1、训练数据

3、训练

4、完整代码

十五、GAN生成对抗网络

1、超参数设定

2、来自著名艺术家的绘画（真实目标）

3、GAN网络

4、训练

5、完整代码

十六、GPU 加速运算

公告