Chainer速查_CPU和GPU的mnist预测训练_模型导出_模型导入再预测_导出onnx并预测
需要做点什么
方便广大烟酒生研究生、人工智障炼丹师算法工程师快速使用chainer,所以特写此文章,默认使用者已有基本的深度学习概念、数据集概念。
系统环境
python 3.7.4
chainer 6.7.0
cupy-cuda114 10.2.0
onnx-chainer 1.6.0
onnx 1.9.0
onnxruntime-gpu 1.9.0
数据准备
MNIST数据集csv文件是一个42000x785的矩阵
42000表示有42000张图片
785中第一列是图片的类别(0,1,2,..,9),第二列到最后一列是图片数据向量 (28x28的图片张成784的向量), 数据集长这个样子:
1 0 0 0 0 0 0 0 0 0 ..
0 0 0 0 0 0 0 0 0 0 ..
1 0 0 0 0 0 0 0 0 0 ..
1. 导入需要的包
import os
import time
import onnx
import chainer
import onnx_chainer
import numpy as np
import pandas as pd
import onnxruntime as ort
import chainer.links as L
import chainer.functions as F
from sklearn.metrics import accuracy_score
from chainer.iterators import SerialIterator
from chainer.datasets import TupleDataset
2. 参数准备
N_EPOCH = 1
N_BATCH = 128
N_BATCH_NUM = 250
S_DATA_PATH = r"mnist_train.csv"
S_CHINER_MODEL_PATH = r"cnn_mnist.npz"
S_ONNX_MODEL_PATH = r"cnn_model_batch%d.onnx" % N_BATCH
S_DEVICE, N_DEVICE_ID, S_DEVICE_FULL = "cuda", 0, "cuda:0"
# S_DEVICE, N_DEVICE_ID, S_DEVICE_FULL = "cpu", -1, "cpu"
chainer.cuda.check_cuda_available()
if S_DEVICE == "cuda":
print("chainer use cuda")
chainer.cuda.get_device(N_DEVICE_ID).use()
# x_cpu = np.ones((5, 4, 3), dtype=np.float32)
# x_gpu = chainer.backends.cuda.to_gpu(x_cpu, device=0)
运行输出
chainer use cuda
3. 读取数据
df = pd.read_csv(S_DATA_PATH, header=None)
print(df.shape)
np_mat = np.array(df)
print(np_mat.shape)
X = np_mat[:, 1:]
Y = np_mat[:, 0]
X = X.astype(np.float32) / 255
X_train = X[:N_BATCH * N_BATCH_NUM]
X_test = X[N_BATCH * N_BATCH_NUM:]
Y_train = Y[:N_BATCH * N_BATCH_NUM]
Y_test = Y[N_BATCH * N_BATCH_NUM:]
X_train = X_train.reshape(X_train.shape[0], 1, 28, 28)
X_test = X_test.reshape(X_test.shape[0], 1, 28, 28)
print(X_train.shape)
print(Y_train.shape)
print(X_test.shape)
print(Y_test.shape)
train_iter = SerialIterator(TupleDataset(X_train, Y_train), batch_size=N_BATCH, shuffle=True)
test_iter = SerialIterator(TupleDataset(X_test, Y_test), batch_size=N_BATCH, shuffle=False)
运行输出
(42000, 785)
(42000, 785)
(32000, 1, 28, 28)
(32000,)
(10000, 1, 28, 28)
(10000,)
4. 模型构建
class Net(chainer.Chain):
def __init__(self):
super(Net, self).__init__()
with self.init_scope():
self.cov2 = L.Convolution2D(1, 16, (3, 3))
self.L1 = L.Linear(2704, 128)
self.L2 = L.Linear(128, 10)
def forward(self, x):
out = self.cov2(x)
out = F.max_pooling_2d(out, 2)
out = F.reshape(out, (-1, 2704))
out = self.L2(F.relu(self.L1(out)))
return out
net = Net()
if S_DEVICE == "cuda":
net.to_gpu(N_DEVICE_ID)
optimizer = chainer.optimizers.AdaGrad()
optimizer.setup(net)
print(net)
运行输出
Net(
(L1): Linear(in_size=2704, out_size=128, nobias=False),
(L2): Linear(in_size=128, out_size=10, nobias=False),
(cov2): Convolution2D(in_channels=1, out_channels=16, ksize=(3, 3), stride=(1, 1), pad=(0, 0), nobias=False, dilate=(1, 1), groups=1),
)
5. 模型训练
print("model train")
for i in range(N_EPOCH):
print('epoch', i, '/', N_EPOCH)
count = 0
while True:
train_batch = train_iter.next()
x_b, y_b = chainer.dataset.concat_examples(train_batch, N_DEVICE_ID)
logit_b = net(x_b)
loss_b = F.softmax_cross_entropy(logit_b, y_b)
np_pred = np.argmax(logit_b.array, -1)
if S_DEVICE == "cuda":
y_b, np_pred = y_b.get(), np_pred.get() # get(): cupy to numpy
acc = accuracy_score(y_b, np_pred)
net.cleargrads()
loss_b.backward()
optimizer.update()
count += 1
if count % 50 == 0:
print("env {}".format(type(x_b)))
print("Train epoch:{} batch:{} loss:{} acc:{}".format(i, count, loss_b, acc))
if train_iter.is_new_epoch:
break
print()
运行输出
model train
epoch 0 / 1
env <class 'cupy._core.core.ndarray'>
Train epoch:0 batch:50 loss:variable(0.6438818) acc:0.8125
env <class 'cupy._core.core.ndarray'>
Train epoch:0 batch:100 loss:variable(0.33969015) acc:0.9375
env <class 'cupy._core.core.ndarray'>
Train epoch:0 batch:150 loss:variable(0.36363766) acc:0.9140625
env <class 'cupy._core.core.ndarray'>
Train epoch:0 batch:200 loss:variable(0.41578048) acc:0.859375
env <class 'cupy._core.core.ndarray'>
Train epoch:0 batch:250 loss:variable(0.3055666) acc:0.921875
6.模型预测
print("model pred")
count = 0
while True:
test_batch = test_iter.next()
x_b, y_b = chainer.dataset.concat_examples(train_batch, N_DEVICE_ID)
logit_b = net(x_b)
loss_b = F.softmax_cross_entropy(logit_b, y_b)
np_pred = np.argmax(logit_b.array, -1)
if S_DEVICE == "cuda":
y_b, np_pred = y_b.get(), np_pred.get() # get(): cupy to numpy
acc = accuracy_score(y_b, np_pred)
count += 1
if count % 10 == 0:
print("env {}".format(type(x_b)))
print("Test batch:{} loss:{} acc:{}".format(count, loss_b, acc))
if test_iter.is_new_epoch:
break
print()
运行输出
model pred
env <class 'cupy._core.core.ndarray'>
Test batch:10 loss:variable(0.29355824) acc:0.921875
env <class 'cupy._core.core.ndarray'>
Test batch:20 loss:variable(0.29355824) acc:0.921875
env <class 'cupy._core.core.ndarray'>
Test batch:30 loss:variable(0.29355824) acc:0.921875
env <class 'cupy._core.core.ndarray'>
Test batch:40 loss:variable(0.29355824) acc:0.921875
env <class 'cupy._core.core.ndarray'>
Test batch:50 loss:variable(0.29355824) acc:0.921875
env <class 'cupy._core.core.ndarray'>
Test batch:60 loss:variable(0.29355824) acc:0.921875
env <class 'cupy._core.core.ndarray'>
Test batch:70 loss:variable(0.29355824) acc:0.921875
7.模型保存
net.to_cpu()
chainer.serializers.save_npz(S_CHINER_MODEL_PATH, net)
8.模型加载和加载模型使用
print("load torch model and pred test data")
net_load = Net()
chainer.serializers.load_npz(S_CHINER_MODEL_PATH, net_load)
if S_DEVICE == "cuda":
net_load.to_gpu(N_DEVICE_ID)
print("loaded model pred")
count = 0
while True:
test_batch = test_iter.next()
x_b, y_b = chainer.dataset.concat_examples(train_batch, N_DEVICE_ID)
logit_b = net_load(x_b)
loss_b = F.softmax_cross_entropy(logit_b, y_b)
np_pred = np.argmax(logit_b.array, -1)
if S_DEVICE == "cuda":
y_b, np_pred = y_b.get(), np_pred.get() # get(): cupy to numpy
acc = accuracy_score(y_b, np_pred)
count += 1
if count % 10 == 0:
print("env {}".format(type(x_b)))
print("Test batch:{} loss:{} acc:{}".format(count, loss_b, acc))
if test_iter.is_new_epoch:
break
print()
运行输出
load torch model and pred test data
loaded model pred
env <class 'cupy._core.core.ndarray'>
Test batch:10 loss:variable(0.29355824) acc:0.921875
env <class 'cupy._core.core.ndarray'>
Test batch:20 loss:variable(0.29355824) acc:0.921875
env <class 'cupy._core.core.ndarray'>
Test batch:30 loss:variable(0.29355824) acc:0.921875
env <class 'cupy._core.core.ndarray'>
Test batch:40 loss:variable(0.29355824) acc:0.921875
env <class 'cupy._core.core.ndarray'>
Test batch:50 loss:variable(0.29355824) acc:0.921875
env <class 'cupy._core.core.ndarray'>
Test batch:60 loss:variable(0.29355824) acc:0.921875
env <class 'cupy._core.core.ndarray'>
Test batch:70 loss:variable(0.29355824) acc:0.921875
9.导出ONNX
x = np.zeros((N_BATCH, 1, 28, 28), dtype=np.float32)
chainer.config.train = False
onnx_chainer.export(net, x, filename=S_ONNX_MODEL_PATH)
运行输出
ir_version: 7
producer_name: "Chainer"
producer_version: "6.7.0"
graph {
node {
input: "Input_0"
input: "param_cov2_W"
input: "param_cov2_b"
output: "Convolution2DFunction_0"
name: "Convolution2DFunction_0"
op_type: "Conv"
attribute {
name: "dilations"
ints: 1
ints: 1
type: INTS
}
attribute {
name: "group"
i: 1
type: INT
}
attribute {
name: "kernel_shape"
ints: 3
ints: 3
type: INTS
}
attribute {
name: "pads"
ints: 0
ints: 0
ints: 0
ints: 0
type: INTS
}
attribute {
name: "strides"
ints: 1
ints: 1
type: INTS
}
}
node {
input: "Convolution2DFunction_0"
output: "MaxPooling2D_0"
name: "MaxPooling2D_0"
op_type: "MaxPool"
attribute {
name: "kernel_shape"
ints: 2
ints: 2
type: INTS
}
attribute {
name: "pads"
ints: 0
ints: 0
ints: 1
ints: 1
type: INTS
}
attribute {
name: "storage_order"
i: 0
type: INT
}
attribute {
name: "strides"
ints: 2
ints: 2
type: INTS
}
}
node {
output: "Reshape_0_const_shape"
name: "Reshape_0"
op_type: "Constant"
attribute {
name: "value"
t {
dims: 2
data_type: 7
int64_data: -1
int64_data: 2704
name: "Reshape_0_const_shape"
}
type: TENSOR
}
}
node {
input: "MaxPooling2D_0"
input: "Reshape_0_const_shape"
output: "Reshape_0"
name: "Reshape_0"
op_type: "Reshape"
}
node {
input: "Reshape_0"
input: "param_L1_W"
input: "param_L1_b"
output: "LinearFunction_0"
name: "LinearFunction_0"
op_type: "Gemm"
attribute {
name: "alpha"
f: 1.0
type: FLOAT
}
attribute {
name: "beta"
f: 1.0
type: FLOAT
}
attribute {
name: "transA"
i: 0
type: INT
}
attribute {
name: "transB"
i: 1
type: INT
}
}
node {
input: "LinearFunction_0"
output: "ReLU_0"
name: "ReLU_0"
op_type: "Relu"
}
node {
input: "ReLU_0"
input: "param_L2_W"
input: "param_L2_b"
output: "LinearFunction_1"
name: "LinearFunction_1"
op_type: "Gemm"
attribute {
name: "alpha"
f: 1.0
type: FLOAT
}
attribute {
name: "beta"
f: 1.0
type: FLOAT
}
attribute {
name: "transA"
i: 0
type: INT
}
attribute {
name: "transB"
i: 1
type: INT
}
}
name: "Graph"
initializer {
dims: 128
dims: 2704
data_type: 1
name: "param_L1_W"
raw_data: "\361\037..
10. 加载ONNX并运行
model = onnx.load(S_ONNX_MODEL_PATH)
print(onnx.checker.check_model(model)) # Check that the model is well formed
print(onnx.helper.printable_graph(model.graph)) # Print a human readable representation of the graph
ls_input_name, ls_output_name = [input.name for input in model.graph.input], [output.name for output in model.graph.output]
print("input name ", ls_input_name)
print("output name ", ls_output_name)
s_input_name = ls_input_name[-1]
x_input = X_train[:N_BATCH, :, :, :].astype(np.float32)
ort_val = ort.OrtValue.ortvalue_from_numpy(x_input, S_DEVICE, N_DEVICE_ID)
print("val device ", ort_val.device_name())
print("val shape ", ort_val.shape())
print("val data type ", ort_val.data_type())
print("is_tensor ", ort_val.is_tensor())
print("array_equal ", np.array_equal(ort_val.numpy(), x_input))
providers = 'CUDAExecutionProvider' if S_DEVICE == "cuda" else 'CPUExecutionProvider'
print("providers ", providers)
ort_session = ort.InferenceSession(S_ONNX_MODEL_PATH, providers=[providers]) # gpu运行
ort_session.set_providers([providers])
outputs = ort_session.run(None, {s_input_name: ort_val})
print("sess env ", ort_session.get_providers())
print(type(outputs))
print(outputs[0])
'''
For example ['CUDAExecutionProvider', 'CPUExecutionProvider']
means execute a node using CUDAExecutionProvider if capable, otherwise execute using CPUExecutionProvider.
'''
运行输出
None
graph Graph (
%Input_0[FLOAT, 128x1x28x28]
) optional inputs with matching initializers (
%param_L1_W[FLOAT, 128x2704]
%param_L1_b[FLOAT, 128]
%param_L2_W[FLOAT, 10x128]
%param_L2_b[FLOAT, 10]
%param_cov2_W[FLOAT, 16x1x3x3]
%param_cov2_b[FLOAT, 16]
) {
%Convolution2DFunction_0 = Conv[dilations = [1, 1], group = 1, kernel_shape = [3, 3], pads = [0, 0, 0, 0], strides = [1, 1]](%Input_0, %param_cov2_W, %param_cov2_b)
%MaxPooling2D_0 = MaxPool[kernel_shape = [2, 2], pads = [0, 0, 1, 1], storage_order = 0, strides = [2, 2]](%Convolution2DFunction_0)
%Reshape_0_const_shape = Constant[value = <Tensor>]()
%Reshape_0 = Reshape(%MaxPooling2D_0, %Reshape_0_const_shape)
%LinearFunction_0 = Gemm[alpha = 1, beta = 1, transA = 0, transB = 1](%Reshape_0, %param_L1_W, %param_L1_b)
%ReLU_0 = Relu(%LinearFunction_0)
%LinearFunction_1 = Gemm[alpha = 1, beta = 1, transA = 0, transB = 1](%ReLU_0, %param_L2_W, %param_L2_b)
return %LinearFunction_1
}
input name ['param_L1_W', 'param_L1_b', 'param_L2_W', 'param_L2_b', 'param_cov2_W', 'param_cov2_b', 'Input_0']
output name ['LinearFunction_1']
val device cuda
val shape [128, 1, 28, 28]
val data type tensor(float)
is_tensor True
array_equal True
providers CUDAExecutionProvider
sess env ['CUDAExecutionProvider', 'CPUExecutionProvider']
<class 'list'>
[[-2.0423372 4.5884066 0.36332878 ... -2.4472551 2.0819268
-2.7261026 ]
[ 8.305377 -6.7392306 2.3732197 ... -1.0024172 1.6413575
-3.7483473 ]
[-2.6331928 5.8743267 -0.6629743 ... -0.5096626 0.5064622
-1.2187834 ]
...
[ 0.39152578 -2.465724 1.177169 ... -3.6535776 0.03016166
-1.6232893 ]
[-2.476002 -0.02901424 -0.0378947 ... 5.504667 -0.8124644
2.1768222 ]
[-1.5162327 -2.5863512 2.1492455 ... -5.290888 1.1870652
-2.5682375 ]]