Torch速查_CPU和GPU的mnist预测训练_模型导出_模型导入再预测_导出script并预测_导出onnx并预测

需要做点什么

方便广大烟酒生研究生、人工智障炼丹师算法工程师快速使用torch,所以特写此文章,默认使用者已有基本的深度学习概念、数据集概念。

系统环境

python 3.7.4
torch 1.9.0+cu111
onnx 1.9.0
onnxruntime-gpu 1.9.0

数据准备

MNIST数据集csv文件是一个42000x785的矩阵
42000表示有42000张图片
785中第一列是图片的类别(0,1,2,..,9),第二列到最后一列是图片数据向量 (28x28的图片张成784的向量), 数据集长这个样子:

1 0 0 0 0 0 0 0 0 0 ..
0 0 0 0 0 0 0 0 0 0
1 0 0 0 0 0 0 0 0 0
4 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0
7 0 0 0 0 0 0 0 0 0
3 0 0 0 0 0 0 0 0 0
5 0 0 0 0 0 0 0 0 0
3 0 0 0 0 0 0 0 0 0
8 0 0 0 0 0 0 0 0 0
9 0 0 0 0 0 0 0 0 0
1 0 0 0 0 0 0 0 0 0
3 0 0 0 0 0 0 0 0 0
3 0 0 0 0 0 0 0 0 0
1 0 0 0 0 0 0 0 0 0
2 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0

1. 导入需要的包

import os
import time
import onnx
import torch
import numpy as np
import pandas as pd
import torch.nn as nn
import onnxruntime as ort
import torch.nn.functional as F
from sklearn.metrics import accuracy_score
from torch.utils.data import Dataset, DataLoader

2. 参数准备

N_EPOCH = 1
N_BATCH = 128
N_BATCH_NUM = 250
S_DATA_PATH = r"mnist_train.csv"
S_TORCH_MODEL_FULL_PATH = r"cnn_model.pth"
S_TORCH_MODEL_PARAMS_PATH = r"cnn_model_state.pth"
S_TORCH_MODEL_SCRIPT_PATH = r"cnn_model.torch_script.pt"
S_ONNX_MODEL_PATH = r"cnn_model_batch%d.onnx" % N_BATCH
S_DEVICE, N_DEVICE_ID, S_DEVICE_FULL = "cuda", 0, "cuda:0"
# S_DEVICE, N_DEVICE_ID, S_DEVICE_FULL = "cpu", 0, "cpu"

3. 读取数据

df = pd.read_csv(S_DATA_PATH, header=None)
print(df.shape)
np_mat = np.array(df)
print(np_mat.shape)

X = np_mat[:, 1:]
Y = np_mat[:, 0]
X = X.astype(np.float32) / 255
X_train = X[:N_BATCH * N_BATCH_NUM]
X_test = X[N_BATCH * N_BATCH_NUM:]
Y_train = Y[:N_BATCH * N_BATCH_NUM]
Y_test = Y[N_BATCH * N_BATCH_NUM:]

X_train = X_train.reshape(X_train.shape[0], 1, 28, 28)
X_test = X_test.reshape(X_test.shape[0], 1, 28, 28)

print(X_train.shape)
print(Y_train.shape)
print(X_test.shape)
print(Y_test.shape)


class MnistDataSet(Dataset):
    def __init__(self, X, Y):
        self.l_data, self.l_label = [], []
        for i in range(X.shape[0]):
            self.l_data.append(X[i, :, :, :])
            self.l_label.append(Y[i])

    def __getitem__(self, index):
        return self.l_data[index], self.l_label[index]

    def __len__(self):
        return len(self.l_data)


train_loader = DataLoader(MnistDataSet(X_train, Y_train), batch_size=N_BATCH, shuffle=True)
test_loader = DataLoader(MnistDataSet(X_test, Y_test), batch_size=N_BATCH, shuffle=False)

运行输出

(42000, 785)
(42000, 785)
(32000, 1, 28, 28)
(32000,)
(10000, 1, 28, 28)
(10000,)

4. 模型构建

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.encoder = torch.nn.Sequential(nn.Conv2d(1, 16, 3, 1),
                                           nn.MaxPool2d(2), nn.Flatten(1),
                                           nn.Linear(2704, 128), nn.ReLU(),
                                           nn.Linear(128, 10))

    def forward(self, x):
        out = self.encoder(x)
        return out


net = Net().to(S_DEVICE)
optimizer = torch.optim.Adam(net.parameters(), lr=1e-3)
loss_fun = nn.CrossEntropyLoss()

运行输出

Net(
  (encoder): Sequential(
    (0): Conv2d(1, 16, kernel_size=(3, 3), stride=(1, 1))
    (1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (2): Flatten(start_dim=1, end_dim=-1)
    (3): Linear(in_features=2704, out_features=128, bias=True)
    (4): ReLU()
    (5): Linear(in_features=128, out_features=10, bias=True)
  )
)

5. 模型训练

print("model train")
for i in range(N_EPOCH):
    net.train()
    t_loss = 0.
    np_pred, np_y = None, None
    for j, (t_x_b, t_y_b) in enumerate(train_loader):
        t_y_b = t_y_b.long().to(S_DEVICE)
        t_x_b = t_x_b.float().to(S_DEVICE)

        t_logits_b = net(t_x_b)
        t_loss_b = loss_fun(t_logits_b, t_y_b)

        optimizer.zero_grad()
        t_loss_b.backward()
        optimizer.step()

        t_loss += t_loss_b
        np_pred_b = torch.argmax(t_logits_b, -1).detach().cpu().numpy()
        np_pred = np_pred_b if np_pred is None else np.concatenate(
            (np_pred, np_pred_b), 0)
        np_y = t_y_b.cpu().numpy() if np_y is None else np.concatenate(
            (np_y, t_y_b.cpu().numpy()), 0)

    f_acc = accuracy_score(np_y, np_pred)
    print("train ", t_loss, f_acc)
    print()

运行输出

model train
train  tensor(113.2757, device='cuda:0', grad_fn=<AddBackward0>) 0.8743125

6.模型预测

with torch.no_grad():
    for i in range(N_EPOCH):
        net.eval()
        t_loss = 0.
        np_pred, np_y = None, None
        for j, (t_x_b, t_y_b) in enumerate(test_loader):
            t_y_b = t_y_b.long().to(S_DEVICE)
            t_x_b = t_x_b.float().to(S_DEVICE)

            t_logits_b = net(t_x_b)
            t_loss_b = loss_fun(t_logits_b, t_y_b)

            t_loss += t_loss_b

            np_pred_b = torch.argmax(t_logits_b, -1).detach().cpu().numpy()
            np_pred = np_pred_b if np_pred is None else np.concatenate(
                (np_pred, np_pred_b), 0)
            np_y = t_y_b.cpu().numpy() if np_y is None else np.concatenate(
                (np_y, t_y_b.cpu().numpy()), 0)

        f_acc = accuracy_score(np_y, np_pred)
        print("test ", t_loss, f_acc)
        print()

运行输出

test  tensor(17.3666, device='cuda:0') 0.9334

7.模型保存

torch.save(net, S_TORCH_MODEL_FULL_PATH)
torch.save(net.state_dict(), S_TORCH_MODEL_PARAMS_PATH)

8.模型加载和加载模型使用

print("load torch model and pred test data")
net_load = torch.load(S_TORCH_MODEL_FULL_PATH,
                      map_location=lambda storage, loc: storage)
net_load = net_load.to(S_DEVICE)
print("load model ok")
with torch.no_grad():
    for i in range(N_EPOCH):
        net_load.eval()
        t_loss = 0.
        np_pred, np_y = None, None
        for j, (t_x_b, t_y_b) in enumerate(test_loader):
            t_y_b = t_y_b.long().to(S_DEVICE)
            t_x_b = t_x_b.float().to(S_DEVICE)

            t_logits_b = net_load(t_x_b)
            t_loss_b = loss_fun(t_logits_b, t_y_b)

            t_loss += t_loss_b
            np_pred_b = torch.argmax(t_logits_b, -1).detach().cpu().numpy()
            np_pred = np_pred_b if np_pred is None else np.concatenate(
                (np_pred, np_pred_b), 0)
            np_y = t_y_b.cpu().numpy() if np_y is None else np.concatenate(
                (np_y, t_y_b.cpu().numpy()), 0)

        f_acc = accuracy_score(np_y, np_pred)
        print("load torch model ", t_loss, f_acc)
        print()

运行输出

load torch model and pred test data
load model ok
load torch model  tensor(17.3666, device='cuda:0') 0.9334

9.导出Torch Scirpt

torch_script_trace = torch.jit.trace(net_load, t_x_b)
print(torch_script_trace)
torch_script_trace.save(S_TORCH_MODEL_SCRIPT_PATH)

运行输出

Net(
  original_name=Net
  (encoder): Sequential(
    original_name=Sequential
    (0): Conv2d(original_name=Conv2d)
    (1): MaxPool2d(original_name=MaxPool2d)
    (2): Flatten(original_name=Flatten)
    (3): Linear(original_name=Linear)
    (4): ReLU(original_name=ReLU)
    (5): Linear(original_name=Linear)
  )
)

10. 加载Torch Script并预测

torch_script_load = torch.jit.load(S_TORCH_MODEL_SCRIPT_PATH)
torch_script_load = torch_script_load.to(S_DEVICE)
print(torch_script_load)
print(torch_script_load.code)
print("load scirpt model ok")
with torch.no_grad():
    for i in range(N_EPOCH):
        torch_script_load.eval()
        t_loss = 0.
        np_pred, np_y = None, None
        for j, (t_x_b, t_y_b) in enumerate(test_loader):
            t_y_b = t_y_b.long().to(S_DEVICE)
            t_x_b = t_x_b.float().to(S_DEVICE)

            t_logits_b = torch_script_load(t_x_b)
            t_loss_b = loss_fun(t_logits_b, t_y_b)

            t_loss += t_loss_b
            np_pred_b = torch.argmax(t_logits_b, -1).detach().cpu().numpy()
            np_pred = np_pred_b if np_pred is None else np.concatenate(
                (np_pred, np_pred_b), 0)
            np_y = t_y_b.cpu().numpy() if np_y is None else np.concatenate(
                (np_y, t_y_b.cpu().numpy()), 0)

        f_acc = accuracy_score(np_y, np_pred)
        print("load scirpt torch model ", t_loss, f_acc)
        print()

运行输出

RecursiveScriptModule(
  original_name=Net
  (encoder): RecursiveScriptModule(
    original_name=Sequential
    (0): RecursiveScriptModule(original_name=Conv2d)
    (1): RecursiveScriptModule(original_name=MaxPool2d)
    (2): RecursiveScriptModule(original_name=Flatten)
    (3): RecursiveScriptModule(original_name=Linear)
    (4): RecursiveScriptModule(original_name=ReLU)
    (5): RecursiveScriptModule(original_name=Linear)
  )
)
def forward(self,
    x: Tensor) -> Tensor:
  return (self.encoder).forward(x, )

load scirpt model ok
load scirpt torch model  tensor(17.3666, device='cuda:0') 0.9334

11.导出ONNX

dummy_in = torch.randn(N_BATCH, 1, 28, 28)
torch.onnx.export(
    net_load.cpu(),
    dummy_in,
    S_ONNX_MODEL_PATH,
    verbose=True,
    input_names=["data"],
    output_names=["output"],
    dynamic_axes={
        'data': {0: 'batch_size'},
        'output': {0: 'batch_size'}
    })

运行输出

graph(%data : Float(*, 1, 28, 28, strides=[784, 784, 28, 1], requires_grad=0, device=cpu),
      %encoder.0.weight : Float(16, 1, 3, 3, strides=[9, 9, 3, 1], requires_grad=1, device=cpu),
      %encoder.0.bias : Float(16, strides=[1], requires_grad=1, device=cpu),
      %encoder.3.weight : Float(128, 2704, strides=[2704, 1], requires_grad=1, device=cpu),
      %encoder.3.bias : Float(128, strides=[1], requires_grad=1, device=cpu),
      %encoder.5.weight : Float(10, 128, strides=[128, 1], requires_grad=1, device=cpu),
      %encoder.5.bias : Float(10, strides=[1], requires_grad=1, device=cpu)):
  %7 : Float(*, 16, 26, 26, strides=[10816, 676, 26, 1], requires_grad=1, device=cpu) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[3, 3], pads=[0, 0, 0, 0], strides=[1, 1]](%data, %encoder.0.weight, %encoder.0.bias) # D:\Dev_Utils\Anaconda3\lib\site-packages\torch\nn\modules\conv.py:440:0
  %8 : Float(*, 16, 13, 13, strides=[2704, 169, 13, 1], requires_grad=1, device=cpu) = onnx::MaxPool[kernel_shape=[2, 2], pads=[0, 0, 0, 0], strides=[2, 2]](%7) # D:\Dev_Utils\Anaconda3\lib\site-packages\torch\nn\functional.py:718:0
  %9 : Float(*, 2704, strides=[2704, 1], requires_grad=1, device=cpu) = onnx::Flatten[axis=1](%8) # D:\Dev_Utils\Anaconda3\lib\site-packages\torch\nn\modules\flatten.py:40:0
  %10 : Float(*, 128, strides=[128, 1], requires_grad=1, device=cpu) = onnx::Gemm[alpha=1., beta=1., transB=1](%9, %encoder.3.weight, %encoder.3.bias) # D:\Dev_Utils\Anaconda3\lib\site-packages\torch\nn\functional.py:1847:0
  %11 : Float(*, 128, strides=[128, 1], requires_grad=1, device=cpu) = onnx::Relu(%10) # D:\Dev_Utils\Anaconda3\lib\site-packages\torch\nn\functional.py:1298:0
  %output : Float(*, 10, strides=[10, 1], requires_grad=1, device=cpu) = onnx::Gemm[alpha=1., beta=1., transB=1](%11, %encoder.5.weight, %encoder.5.bias) # D:\Dev_Utils\Anaconda3\lib\site-packages\torch\nn\functional.py:1847:0
  return (%output)

8. 加载ONNX并运行

model = onnx.load(S_ONNX_MODEL_PATH)
print(onnx.checker.check_model(model))  # Check that the model is well formed
print(onnx.helper.printable_graph(model.graph))  # Print a human readable representation of the graph
ls_input_name, ls_output_name = [input.name for input in model.graph.input], [output.name for output in model.graph.output]
print("input name ", ls_input_name)
print("output name ", ls_output_name)
s_input_name = ls_input_name[0]

x_input = X_train[:N_BATCH*2, :, :, :].astype(np.float32)
ort_val = ort.OrtValue.ortvalue_from_numpy(x_input, S_DEVICE, N_DEVICE_ID)
print("val device ", ort_val.device_name())
print("val shape ", ort_val.shape())
print("val data type ", ort_val.data_type())
print("is_tensor ", ort_val.is_tensor())
print("array_equal ", np.array_equal(ort_val.numpy(), x_input))
providers = 'CUDAExecutionProvider' if S_DEVICE == "cuda" else 'CPUExecutionProvider'
print("providers ", providers)
ort_session = ort.InferenceSession(S_ONNX_MODEL_PATH,
                                   providers=[providers])  # gpu运行
ort_session.set_providers([providers])
outputs = ort_session.run(None, {s_input_name: ort_val})
print("sess env ", ort_session.get_providers())
print(type(outputs))
print(outputs[0])
'''
For example ['CUDAExecutionProvider', 'CPUExecutionProvider']
    means execute a node using CUDAExecutionProvider if capable, otherwise execute using CPUExecutionProvider.
'''

运行输出

None
graph torch-jit-export (
  %data[FLOAT, batch_sizex1x28x28]
) initializers (
  %encoder.0.weight[FLOAT, 16x1x3x3]
  %encoder.0.bias[FLOAT, 16]
  %encoder.3.weight[FLOAT, 128x2704]
  %encoder.3.bias[FLOAT, 128]
  %encoder.5.weight[FLOAT, 10x128]
  %encoder.5.bias[FLOAT, 10]
) {
  %7 = Conv[dilations = [1, 1], group = 1, kernel_shape = [3, 3], pads = [0, 0, 0, 0], strides = [1, 1]](%data, %encoder.0.weight, %encoder.0.bias)
  %8 = MaxPool[kernel_shape = [2, 2], pads = [0, 0, 0, 0], strides = [2, 2]](%7)
  %9 = Flatten[axis = 1](%8)
  %10 = Gemm[alpha = 1, beta = 1, transB = 1](%9, %encoder.3.weight, %encoder.3.bias)
  %11 = Relu(%10)
  %output = Gemm[alpha = 1, beta = 1, transB = 1](%11, %encoder.5.weight, %encoder.5.bias)
  return %output
}
input name  ['data']
output name  ['output']
val device  cuda
val shape  [256, 1, 28, 28]
val data type  tensor(float)
is_tensor  True
array_equal  True
providers  CUDAExecutionProvider
sess env  ['CUDAExecutionProvider', 'CPUExecutionProvider']
<class 'list'>
[[ -3.5930414    8.179376     1.1969751  ...  -2.913561     2.5138445
   -2.2389767 ]
 [ 11.716089   -11.836465     2.8341749  ...  -1.8803438    0.31916314
   -1.637662  ]
 [ -6.1383176    7.9563417    0.18428418 ...   0.2816238    0.55466944
   -1.2241261 ]
 ...
 [  0.02245945  -5.2462187   -2.9979806  ...   1.0633407   -0.07040683
   -0.49605215]
 [ -7.219374    -3.159672    -0.64644974 ...   5.7991867   -1.9511163
    1.4337606 ]
 [ -4.0595794    7.265975     0.7286219  ...  -0.5744688    0.522286
   -1.5456666 ]]

你甚至不愿意Start的Github

ai_fast_handbook

posted @ 2022-03-22 08:07  Yumeka  阅读(671)  评论(0编辑  收藏  举报