Torch速查_CPU和GPU的mnist预测训练_模型导出_模型导入再预测_导出script并预测_导出onnx并预测
需要做点什么
方便广大烟酒生研究生、人工智障炼丹师算法工程师快速使用torch,所以特写此文章,默认使用者已有基本的深度学习概念、数据集概念。
系统环境
python 3.7.4
torch 1.9.0+cu111
onnx 1.9.0
onnxruntime-gpu 1.9.0
数据准备
MNIST数据集csv文件是一个42000x785的矩阵
42000表示有42000张图片
785中第一列是图片的类别(0,1,2,..,9),第二列到最后一列是图片数据向量 (28x28的图片张成784的向量), 数据集长这个样子:
1 0 0 0 0 0 0 0 0 0 ..
0 0 0 0 0 0 0 0 0 0
1 0 0 0 0 0 0 0 0 0
4 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0
7 0 0 0 0 0 0 0 0 0
3 0 0 0 0 0 0 0 0 0
5 0 0 0 0 0 0 0 0 0
3 0 0 0 0 0 0 0 0 0
8 0 0 0 0 0 0 0 0 0
9 0 0 0 0 0 0 0 0 0
1 0 0 0 0 0 0 0 0 0
3 0 0 0 0 0 0 0 0 0
3 0 0 0 0 0 0 0 0 0
1 0 0 0 0 0 0 0 0 0
2 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0
1. 导入需要的包
import os
import time
import onnx
import torch
import numpy as np
import pandas as pd
import torch.nn as nn
import onnxruntime as ort
import torch.nn.functional as F
from sklearn.metrics import accuracy_score
from torch.utils.data import Dataset, DataLoader
2. 参数准备
N_EPOCH = 1
N_BATCH = 128
N_BATCH_NUM = 250
S_DATA_PATH = r"mnist_train.csv"
S_TORCH_MODEL_FULL_PATH = r"cnn_model.pth"
S_TORCH_MODEL_PARAMS_PATH = r"cnn_model_state.pth"
S_TORCH_MODEL_SCRIPT_PATH = r"cnn_model.torch_script.pt"
S_ONNX_MODEL_PATH = r"cnn_model_batch%d.onnx" % N_BATCH
S_DEVICE, N_DEVICE_ID, S_DEVICE_FULL = "cuda", 0, "cuda:0"
# S_DEVICE, N_DEVICE_ID, S_DEVICE_FULL = "cpu", 0, "cpu"
3. 读取数据
df = pd.read_csv(S_DATA_PATH, header=None)
print(df.shape)
np_mat = np.array(df)
print(np_mat.shape)
X = np_mat[:, 1:]
Y = np_mat[:, 0]
X = X.astype(np.float32) / 255
X_train = X[:N_BATCH * N_BATCH_NUM]
X_test = X[N_BATCH * N_BATCH_NUM:]
Y_train = Y[:N_BATCH * N_BATCH_NUM]
Y_test = Y[N_BATCH * N_BATCH_NUM:]
X_train = X_train.reshape(X_train.shape[0], 1, 28, 28)
X_test = X_test.reshape(X_test.shape[0], 1, 28, 28)
print(X_train.shape)
print(Y_train.shape)
print(X_test.shape)
print(Y_test.shape)
class MnistDataSet(Dataset):
def __init__(self, X, Y):
self.l_data, self.l_label = [], []
for i in range(X.shape[0]):
self.l_data.append(X[i, :, :, :])
self.l_label.append(Y[i])
def __getitem__(self, index):
return self.l_data[index], self.l_label[index]
def __len__(self):
return len(self.l_data)
train_loader = DataLoader(MnistDataSet(X_train, Y_train), batch_size=N_BATCH, shuffle=True)
test_loader = DataLoader(MnistDataSet(X_test, Y_test), batch_size=N_BATCH, shuffle=False)
运行输出
(42000, 785)
(42000, 785)
(32000, 1, 28, 28)
(32000,)
(10000, 1, 28, 28)
(10000,)
4. 模型构建
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.encoder = torch.nn.Sequential(nn.Conv2d(1, 16, 3, 1),
nn.MaxPool2d(2), nn.Flatten(1),
nn.Linear(2704, 128), nn.ReLU(),
nn.Linear(128, 10))
def forward(self, x):
out = self.encoder(x)
return out
net = Net().to(S_DEVICE)
optimizer = torch.optim.Adam(net.parameters(), lr=1e-3)
loss_fun = nn.CrossEntropyLoss()
运行输出
Net(
(encoder): Sequential(
(0): Conv2d(1, 16, kernel_size=(3, 3), stride=(1, 1))
(1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(2): Flatten(start_dim=1, end_dim=-1)
(3): Linear(in_features=2704, out_features=128, bias=True)
(4): ReLU()
(5): Linear(in_features=128, out_features=10, bias=True)
)
)
5. 模型训练
print("model train")
for i in range(N_EPOCH):
net.train()
t_loss = 0.
np_pred, np_y = None, None
for j, (t_x_b, t_y_b) in enumerate(train_loader):
t_y_b = t_y_b.long().to(S_DEVICE)
t_x_b = t_x_b.float().to(S_DEVICE)
t_logits_b = net(t_x_b)
t_loss_b = loss_fun(t_logits_b, t_y_b)
optimizer.zero_grad()
t_loss_b.backward()
optimizer.step()
t_loss += t_loss_b
np_pred_b = torch.argmax(t_logits_b, -1).detach().cpu().numpy()
np_pred = np_pred_b if np_pred is None else np.concatenate(
(np_pred, np_pred_b), 0)
np_y = t_y_b.cpu().numpy() if np_y is None else np.concatenate(
(np_y, t_y_b.cpu().numpy()), 0)
f_acc = accuracy_score(np_y, np_pred)
print("train ", t_loss, f_acc)
print()
运行输出
model train
train tensor(113.2757, device='cuda:0', grad_fn=<AddBackward0>) 0.8743125
6.模型预测
with torch.no_grad():
for i in range(N_EPOCH):
net.eval()
t_loss = 0.
np_pred, np_y = None, None
for j, (t_x_b, t_y_b) in enumerate(test_loader):
t_y_b = t_y_b.long().to(S_DEVICE)
t_x_b = t_x_b.float().to(S_DEVICE)
t_logits_b = net(t_x_b)
t_loss_b = loss_fun(t_logits_b, t_y_b)
t_loss += t_loss_b
np_pred_b = torch.argmax(t_logits_b, -1).detach().cpu().numpy()
np_pred = np_pred_b if np_pred is None else np.concatenate(
(np_pred, np_pred_b), 0)
np_y = t_y_b.cpu().numpy() if np_y is None else np.concatenate(
(np_y, t_y_b.cpu().numpy()), 0)
f_acc = accuracy_score(np_y, np_pred)
print("test ", t_loss, f_acc)
print()
运行输出
test tensor(17.3666, device='cuda:0') 0.9334
7.模型保存
torch.save(net, S_TORCH_MODEL_FULL_PATH)
torch.save(net.state_dict(), S_TORCH_MODEL_PARAMS_PATH)
8.模型加载和加载模型使用
print("load torch model and pred test data")
net_load = torch.load(S_TORCH_MODEL_FULL_PATH,
map_location=lambda storage, loc: storage)
net_load = net_load.to(S_DEVICE)
print("load model ok")
with torch.no_grad():
for i in range(N_EPOCH):
net_load.eval()
t_loss = 0.
np_pred, np_y = None, None
for j, (t_x_b, t_y_b) in enumerate(test_loader):
t_y_b = t_y_b.long().to(S_DEVICE)
t_x_b = t_x_b.float().to(S_DEVICE)
t_logits_b = net_load(t_x_b)
t_loss_b = loss_fun(t_logits_b, t_y_b)
t_loss += t_loss_b
np_pred_b = torch.argmax(t_logits_b, -1).detach().cpu().numpy()
np_pred = np_pred_b if np_pred is None else np.concatenate(
(np_pred, np_pred_b), 0)
np_y = t_y_b.cpu().numpy() if np_y is None else np.concatenate(
(np_y, t_y_b.cpu().numpy()), 0)
f_acc = accuracy_score(np_y, np_pred)
print("load torch model ", t_loss, f_acc)
print()
运行输出
load torch model and pred test data
load model ok
load torch model tensor(17.3666, device='cuda:0') 0.9334
9.导出Torch Scirpt
torch_script_trace = torch.jit.trace(net_load, t_x_b)
print(torch_script_trace)
torch_script_trace.save(S_TORCH_MODEL_SCRIPT_PATH)
运行输出
Net(
original_name=Net
(encoder): Sequential(
original_name=Sequential
(0): Conv2d(original_name=Conv2d)
(1): MaxPool2d(original_name=MaxPool2d)
(2): Flatten(original_name=Flatten)
(3): Linear(original_name=Linear)
(4): ReLU(original_name=ReLU)
(5): Linear(original_name=Linear)
)
)
10. 加载Torch Script并预测
torch_script_load = torch.jit.load(S_TORCH_MODEL_SCRIPT_PATH)
torch_script_load = torch_script_load.to(S_DEVICE)
print(torch_script_load)
print(torch_script_load.code)
print("load scirpt model ok")
with torch.no_grad():
for i in range(N_EPOCH):
torch_script_load.eval()
t_loss = 0.
np_pred, np_y = None, None
for j, (t_x_b, t_y_b) in enumerate(test_loader):
t_y_b = t_y_b.long().to(S_DEVICE)
t_x_b = t_x_b.float().to(S_DEVICE)
t_logits_b = torch_script_load(t_x_b)
t_loss_b = loss_fun(t_logits_b, t_y_b)
t_loss += t_loss_b
np_pred_b = torch.argmax(t_logits_b, -1).detach().cpu().numpy()
np_pred = np_pred_b if np_pred is None else np.concatenate(
(np_pred, np_pred_b), 0)
np_y = t_y_b.cpu().numpy() if np_y is None else np.concatenate(
(np_y, t_y_b.cpu().numpy()), 0)
f_acc = accuracy_score(np_y, np_pred)
print("load scirpt torch model ", t_loss, f_acc)
print()
运行输出
RecursiveScriptModule(
original_name=Net
(encoder): RecursiveScriptModule(
original_name=Sequential
(0): RecursiveScriptModule(original_name=Conv2d)
(1): RecursiveScriptModule(original_name=MaxPool2d)
(2): RecursiveScriptModule(original_name=Flatten)
(3): RecursiveScriptModule(original_name=Linear)
(4): RecursiveScriptModule(original_name=ReLU)
(5): RecursiveScriptModule(original_name=Linear)
)
)
def forward(self,
x: Tensor) -> Tensor:
return (self.encoder).forward(x, )
load scirpt model ok
load scirpt torch model tensor(17.3666, device='cuda:0') 0.9334
11.导出ONNX
dummy_in = torch.randn(N_BATCH, 1, 28, 28)
torch.onnx.export(
net_load.cpu(),
dummy_in,
S_ONNX_MODEL_PATH,
verbose=True,
input_names=["data"],
output_names=["output"],
dynamic_axes={
'data': {0: 'batch_size'},
'output': {0: 'batch_size'}
})
运行输出
graph(%data : Float(*, 1, 28, 28, strides=[784, 784, 28, 1], requires_grad=0, device=cpu),
%encoder.0.weight : Float(16, 1, 3, 3, strides=[9, 9, 3, 1], requires_grad=1, device=cpu),
%encoder.0.bias : Float(16, strides=[1], requires_grad=1, device=cpu),
%encoder.3.weight : Float(128, 2704, strides=[2704, 1], requires_grad=1, device=cpu),
%encoder.3.bias : Float(128, strides=[1], requires_grad=1, device=cpu),
%encoder.5.weight : Float(10, 128, strides=[128, 1], requires_grad=1, device=cpu),
%encoder.5.bias : Float(10, strides=[1], requires_grad=1, device=cpu)):
%7 : Float(*, 16, 26, 26, strides=[10816, 676, 26, 1], requires_grad=1, device=cpu) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[3, 3], pads=[0, 0, 0, 0], strides=[1, 1]](%data, %encoder.0.weight, %encoder.0.bias) # D:\Dev_Utils\Anaconda3\lib\site-packages\torch\nn\modules\conv.py:440:0
%8 : Float(*, 16, 13, 13, strides=[2704, 169, 13, 1], requires_grad=1, device=cpu) = onnx::MaxPool[kernel_shape=[2, 2], pads=[0, 0, 0, 0], strides=[2, 2]](%7) # D:\Dev_Utils\Anaconda3\lib\site-packages\torch\nn\functional.py:718:0
%9 : Float(*, 2704, strides=[2704, 1], requires_grad=1, device=cpu) = onnx::Flatten[axis=1](%8) # D:\Dev_Utils\Anaconda3\lib\site-packages\torch\nn\modules\flatten.py:40:0
%10 : Float(*, 128, strides=[128, 1], requires_grad=1, device=cpu) = onnx::Gemm[alpha=1., beta=1., transB=1](%9, %encoder.3.weight, %encoder.3.bias) # D:\Dev_Utils\Anaconda3\lib\site-packages\torch\nn\functional.py:1847:0
%11 : Float(*, 128, strides=[128, 1], requires_grad=1, device=cpu) = onnx::Relu(%10) # D:\Dev_Utils\Anaconda3\lib\site-packages\torch\nn\functional.py:1298:0
%output : Float(*, 10, strides=[10, 1], requires_grad=1, device=cpu) = onnx::Gemm[alpha=1., beta=1., transB=1](%11, %encoder.5.weight, %encoder.5.bias) # D:\Dev_Utils\Anaconda3\lib\site-packages\torch\nn\functional.py:1847:0
return (%output)
8. 加载ONNX并运行
model = onnx.load(S_ONNX_MODEL_PATH)
print(onnx.checker.check_model(model)) # Check that the model is well formed
print(onnx.helper.printable_graph(model.graph)) # Print a human readable representation of the graph
ls_input_name, ls_output_name = [input.name for input in model.graph.input], [output.name for output in model.graph.output]
print("input name ", ls_input_name)
print("output name ", ls_output_name)
s_input_name = ls_input_name[0]
x_input = X_train[:N_BATCH*2, :, :, :].astype(np.float32)
ort_val = ort.OrtValue.ortvalue_from_numpy(x_input, S_DEVICE, N_DEVICE_ID)
print("val device ", ort_val.device_name())
print("val shape ", ort_val.shape())
print("val data type ", ort_val.data_type())
print("is_tensor ", ort_val.is_tensor())
print("array_equal ", np.array_equal(ort_val.numpy(), x_input))
providers = 'CUDAExecutionProvider' if S_DEVICE == "cuda" else 'CPUExecutionProvider'
print("providers ", providers)
ort_session = ort.InferenceSession(S_ONNX_MODEL_PATH,
providers=[providers]) # gpu运行
ort_session.set_providers([providers])
outputs = ort_session.run(None, {s_input_name: ort_val})
print("sess env ", ort_session.get_providers())
print(type(outputs))
print(outputs[0])
'''
For example ['CUDAExecutionProvider', 'CPUExecutionProvider']
means execute a node using CUDAExecutionProvider if capable, otherwise execute using CPUExecutionProvider.
'''
运行输出
None
graph torch-jit-export (
%data[FLOAT, batch_sizex1x28x28]
) initializers (
%encoder.0.weight[FLOAT, 16x1x3x3]
%encoder.0.bias[FLOAT, 16]
%encoder.3.weight[FLOAT, 128x2704]
%encoder.3.bias[FLOAT, 128]
%encoder.5.weight[FLOAT, 10x128]
%encoder.5.bias[FLOAT, 10]
) {
%7 = Conv[dilations = [1, 1], group = 1, kernel_shape = [3, 3], pads = [0, 0, 0, 0], strides = [1, 1]](%data, %encoder.0.weight, %encoder.0.bias)
%8 = MaxPool[kernel_shape = [2, 2], pads = [0, 0, 0, 0], strides = [2, 2]](%7)
%9 = Flatten[axis = 1](%8)
%10 = Gemm[alpha = 1, beta = 1, transB = 1](%9, %encoder.3.weight, %encoder.3.bias)
%11 = Relu(%10)
%output = Gemm[alpha = 1, beta = 1, transB = 1](%11, %encoder.5.weight, %encoder.5.bias)
return %output
}
input name ['data']
output name ['output']
val device cuda
val shape [256, 1, 28, 28]
val data type tensor(float)
is_tensor True
array_equal True
providers CUDAExecutionProvider
sess env ['CUDAExecutionProvider', 'CPUExecutionProvider']
<class 'list'>
[[ -3.5930414 8.179376 1.1969751 ... -2.913561 2.5138445
-2.2389767 ]
[ 11.716089 -11.836465 2.8341749 ... -1.8803438 0.31916314
-1.637662 ]
[ -6.1383176 7.9563417 0.18428418 ... 0.2816238 0.55466944
-1.2241261 ]
...
[ 0.02245945 -5.2462187 -2.9979806 ... 1.0633407 -0.07040683
-0.49605215]
[ -7.219374 -3.159672 -0.64644974 ... 5.7991867 -1.9511163
1.4337606 ]
[ -4.0595794 7.265975 0.7286219 ... -0.5744688 0.522286
-1.5456666 ]]