Chainer速查_CPU和GPU的mnist预测训练_模型导出_模型导入再预测_导出onnx并预测

需要做点什么

方便广大烟酒生研究生、人工智障炼丹师算法工程师快速使用chainer,所以特写此文章,默认使用者已有基本的深度学习概念、数据集概念。

系统环境

python 3.7.4
chainer 6.7.0
cupy-cuda114 10.2.0
onnx-chainer 1.6.0
onnx 1.9.0
onnxruntime-gpu 1.9.0

数据准备

MNIST数据集csv文件是一个42000x785的矩阵
42000表示有42000张图片
785中第一列是图片的类别(0,1,2,..,9),第二列到最后一列是图片数据向量 (28x28的图片张成784的向量), 数据集长这个样子:

1 0 0 0 0 0 0 0 0 0 ..
0 0 0 0 0 0 0 0 0 0 ..
1 0 0 0 0 0 0 0 0 0 ..

1. 导入需要的包

import os
import time
import onnx
import chainer
import onnx_chainer
import numpy as np
import pandas as pd
import onnxruntime as ort
import chainer.links as L
import chainer.functions as F
from sklearn.metrics import accuracy_score
from chainer.iterators import SerialIterator
from chainer.datasets import TupleDataset

2. 参数准备

N_EPOCH = 1
N_BATCH = 128
N_BATCH_NUM = 250
S_DATA_PATH = r"mnist_train.csv"
S_CHINER_MODEL_PATH = r"cnn_mnist.npz"
S_ONNX_MODEL_PATH = r"cnn_model_batch%d.onnx" % N_BATCH
S_DEVICE, N_DEVICE_ID, S_DEVICE_FULL = "cuda", 0, "cuda:0"
# S_DEVICE, N_DEVICE_ID, S_DEVICE_FULL = "cpu", -1, "cpu"

chainer.cuda.check_cuda_available()
if S_DEVICE == "cuda":
    print("chainer use cuda")
    chainer.cuda.get_device(N_DEVICE_ID).use()

# x_cpu = np.ones((5, 4, 3), dtype=np.float32)
# x_gpu = chainer.backends.cuda.to_gpu(x_cpu, device=0)

运行输出

chainer use cuda

3. 读取数据

df = pd.read_csv(S_DATA_PATH, header=None)
print(df.shape)
np_mat = np.array(df)
print(np_mat.shape)

X = np_mat[:, 1:]
Y = np_mat[:, 0]
X = X.astype(np.float32) / 255
X_train = X[:N_BATCH * N_BATCH_NUM]
X_test = X[N_BATCH * N_BATCH_NUM:]
Y_train = Y[:N_BATCH * N_BATCH_NUM]
Y_test = Y[N_BATCH * N_BATCH_NUM:]

X_train = X_train.reshape(X_train.shape[0], 1, 28, 28)
X_test = X_test.reshape(X_test.shape[0], 1, 28, 28)

print(X_train.shape)
print(Y_train.shape)
print(X_test.shape)
print(Y_test.shape)

train_iter = SerialIterator(TupleDataset(X_train, Y_train), batch_size=N_BATCH, shuffle=True)
test_iter = SerialIterator(TupleDataset(X_test, Y_test), batch_size=N_BATCH, shuffle=False)

运行输出

(42000, 785)
(42000, 785)
(32000, 1, 28, 28)
(32000,)
(10000, 1, 28, 28)
(10000,)

4. 模型构建

class Net(chainer.Chain):
    def __init__(self):
        super(Net, self).__init__()
        with self.init_scope():
            self.cov2 = L.Convolution2D(1, 16, (3, 3))
            self.L1 = L.Linear(2704, 128)
            self.L2 = L.Linear(128, 10)

    def forward(self, x):
        out = self.cov2(x)
        out = F.max_pooling_2d(out, 2)
        out = F.reshape(out, (-1, 2704))
        out = self.L2(F.relu(self.L1(out)))
        return out


net = Net()
if S_DEVICE == "cuda":
    net.to_gpu(N_DEVICE_ID)
optimizer = chainer.optimizers.AdaGrad()
optimizer.setup(net)
print(net)

运行输出

Net(
  (L1): Linear(in_size=2704, out_size=128, nobias=False),
  (L2): Linear(in_size=128, out_size=10, nobias=False),
  (cov2): Convolution2D(in_channels=1, out_channels=16, ksize=(3, 3), stride=(1, 1), pad=(0, 0), nobias=False, dilate=(1, 1), groups=1),
)

5. 模型训练

print("model train")
for i in range(N_EPOCH):
    print('epoch', i, '/', N_EPOCH)
    count = 0
    while True:
        train_batch = train_iter.next()
        x_b, y_b = chainer.dataset.concat_examples(train_batch, N_DEVICE_ID)
        
        logit_b = net(x_b)
        loss_b = F.softmax_cross_entropy(logit_b, y_b)
        
        np_pred = np.argmax(logit_b.array, -1)
        if S_DEVICE == "cuda":
            y_b, np_pred = y_b.get(), np_pred.get()  # get(): cupy to numpy
        acc = accuracy_score(y_b, np_pred)
        
        net.cleargrads()
        loss_b.backward()
        optimizer.update()

        count += 1
        if count % 50 == 0:
            print("env {}".format(type(x_b)))
            print("Train epoch:{} batch:{} loss:{} acc:{}".format(i, count, loss_b, acc))
        if train_iter.is_new_epoch:
            break
print()

运行输出

model train
epoch 0 / 1
env <class 'cupy._core.core.ndarray'>
Train epoch:0 batch:50 loss:variable(0.6438818) acc:0.8125
env <class 'cupy._core.core.ndarray'>
Train epoch:0 batch:100 loss:variable(0.33969015) acc:0.9375
env <class 'cupy._core.core.ndarray'>
Train epoch:0 batch:150 loss:variable(0.36363766) acc:0.9140625
env <class 'cupy._core.core.ndarray'>
Train epoch:0 batch:200 loss:variable(0.41578048) acc:0.859375
env <class 'cupy._core.core.ndarray'>
Train epoch:0 batch:250 loss:variable(0.3055666) acc:0.921875

6.模型预测

print("model pred")
count = 0
while True:
    test_batch = test_iter.next()
    x_b, y_b = chainer.dataset.concat_examples(train_batch, N_DEVICE_ID)
    
    logit_b = net(x_b)
    loss_b = F.softmax_cross_entropy(logit_b, y_b)
    
    np_pred = np.argmax(logit_b.array, -1)
    if S_DEVICE == "cuda":
        y_b, np_pred = y_b.get(), np_pred.get()  # get(): cupy to numpy
    acc = accuracy_score(y_b, np_pred)
    
    count += 1
    if count % 10 == 0:
        print("env {}".format(type(x_b)))
        print("Test batch:{} loss:{} acc:{}".format(count, loss_b, acc))
    if test_iter.is_new_epoch:
        break
print()

运行输出

model pred
env <class 'cupy._core.core.ndarray'>
Test batch:10 loss:variable(0.29355824) acc:0.921875
env <class 'cupy._core.core.ndarray'>
Test batch:20 loss:variable(0.29355824) acc:0.921875
env <class 'cupy._core.core.ndarray'>
Test batch:30 loss:variable(0.29355824) acc:0.921875
env <class 'cupy._core.core.ndarray'>
Test batch:40 loss:variable(0.29355824) acc:0.921875
env <class 'cupy._core.core.ndarray'>
Test batch:50 loss:variable(0.29355824) acc:0.921875
env <class 'cupy._core.core.ndarray'>
Test batch:60 loss:variable(0.29355824) acc:0.921875
env <class 'cupy._core.core.ndarray'>
Test batch:70 loss:variable(0.29355824) acc:0.921875

7.模型保存

net.to_cpu()
chainer.serializers.save_npz(S_CHINER_MODEL_PATH, net)

8.模型加载和加载模型使用

print("load torch model and pred test data")
net_load = Net()
chainer.serializers.load_npz(S_CHINER_MODEL_PATH, net_load)
if S_DEVICE == "cuda":
    net_load.to_gpu(N_DEVICE_ID)
print("loaded model pred")
count = 0
while True:
    test_batch = test_iter.next()
    x_b, y_b = chainer.dataset.concat_examples(train_batch, N_DEVICE_ID)
    
    logit_b = net_load(x_b)
    loss_b = F.softmax_cross_entropy(logit_b, y_b)
    
    np_pred = np.argmax(logit_b.array, -1)
    if S_DEVICE == "cuda":
        y_b, np_pred = y_b.get(), np_pred.get()  # get(): cupy to numpy
    acc = accuracy_score(y_b, np_pred)
    
    count += 1
    if count % 10 == 0:
        print("env {}".format(type(x_b)))
        print("Test batch:{} loss:{} acc:{}".format(count, loss_b, acc))
    if test_iter.is_new_epoch:
        break
print()

运行输出

load torch model and pred test data
loaded model pred
env <class 'cupy._core.core.ndarray'>
Test batch:10 loss:variable(0.29355824) acc:0.921875
env <class 'cupy._core.core.ndarray'>
Test batch:20 loss:variable(0.29355824) acc:0.921875
env <class 'cupy._core.core.ndarray'>
Test batch:30 loss:variable(0.29355824) acc:0.921875
env <class 'cupy._core.core.ndarray'>
Test batch:40 loss:variable(0.29355824) acc:0.921875
env <class 'cupy._core.core.ndarray'>
Test batch:50 loss:variable(0.29355824) acc:0.921875
env <class 'cupy._core.core.ndarray'>
Test batch:60 loss:variable(0.29355824) acc:0.921875
env <class 'cupy._core.core.ndarray'>
Test batch:70 loss:variable(0.29355824) acc:0.921875

9.导出ONNX

x = np.zeros((N_BATCH, 1, 28, 28), dtype=np.float32)
chainer.config.train = False
onnx_chainer.export(net, x, filename=S_ONNX_MODEL_PATH)

运行输出

ir_version: 7
producer_name: "Chainer"
producer_version: "6.7.0"
graph {
  node {
    input: "Input_0"
    input: "param_cov2_W"
    input: "param_cov2_b"
    output: "Convolution2DFunction_0"
    name: "Convolution2DFunction_0"
    op_type: "Conv"
    attribute {
      name: "dilations"
      ints: 1
      ints: 1
      type: INTS
    }
    attribute {
      name: "group"
      i: 1
      type: INT
    }
    attribute {
      name: "kernel_shape"
      ints: 3
      ints: 3
      type: INTS
    }
    attribute {
      name: "pads"
      ints: 0
      ints: 0
      ints: 0
      ints: 0
      type: INTS
    }
    attribute {
      name: "strides"
      ints: 1
      ints: 1
      type: INTS
    }
  }
  node {
    input: "Convolution2DFunction_0"
    output: "MaxPooling2D_0"
    name: "MaxPooling2D_0"
    op_type: "MaxPool"
    attribute {
      name: "kernel_shape"
      ints: 2
      ints: 2
      type: INTS
    }
    attribute {
      name: "pads"
      ints: 0
      ints: 0
      ints: 1
      ints: 1
      type: INTS
    }
    attribute {
      name: "storage_order"
      i: 0
      type: INT
    }
    attribute {
      name: "strides"
      ints: 2
      ints: 2
      type: INTS
    }
  }
  node {
    output: "Reshape_0_const_shape"
    name: "Reshape_0"
    op_type: "Constant"
    attribute {
      name: "value"
      t {
        dims: 2
        data_type: 7
        int64_data: -1
        int64_data: 2704
        name: "Reshape_0_const_shape"
      }
      type: TENSOR
    }
  }
  node {
    input: "MaxPooling2D_0"
    input: "Reshape_0_const_shape"
    output: "Reshape_0"
    name: "Reshape_0"
    op_type: "Reshape"
  }
  node {
    input: "Reshape_0"
    input: "param_L1_W"
    input: "param_L1_b"
    output: "LinearFunction_0"
    name: "LinearFunction_0"
    op_type: "Gemm"
    attribute {
      name: "alpha"
      f: 1.0
      type: FLOAT
    }
    attribute {
      name: "beta"
      f: 1.0
      type: FLOAT
    }
    attribute {
      name: "transA"
      i: 0
      type: INT
    }
    attribute {
      name: "transB"
      i: 1
      type: INT
    }
  }
  node {
    input: "LinearFunction_0"
    output: "ReLU_0"
    name: "ReLU_0"
    op_type: "Relu"
  }
  node {
    input: "ReLU_0"
    input: "param_L2_W"
    input: "param_L2_b"
    output: "LinearFunction_1"
    name: "LinearFunction_1"
    op_type: "Gemm"
    attribute {
      name: "alpha"
      f: 1.0
      type: FLOAT
    }
    attribute {
      name: "beta"
      f: 1.0
      type: FLOAT
    }
    attribute {
      name: "transA"
      i: 0
      type: INT
    }
    attribute {
      name: "transB"
      i: 1
      type: INT
    }
  }
  name: "Graph"
  initializer {
    dims: 128
    dims: 2704
    data_type: 1
    name: "param_L1_W"
    raw_data: "\361\037..

10. 加载ONNX并运行

model = onnx.load(S_ONNX_MODEL_PATH)
print(onnx.checker.check_model(model))  # Check that the model is well formed
print(onnx.helper.printable_graph(model.graph))  # Print a human readable representation of the graph
ls_input_name, ls_output_name = [input.name for input in model.graph.input], [output.name for output in model.graph.output]
print("input name ", ls_input_name)
print("output name ", ls_output_name)
s_input_name = ls_input_name[-1]

x_input = X_train[:N_BATCH, :, :, :].astype(np.float32)
ort_val = ort.OrtValue.ortvalue_from_numpy(x_input, S_DEVICE, N_DEVICE_ID)
print("val device ", ort_val.device_name())
print("val shape ", ort_val.shape())
print("val data type ", ort_val.data_type())
print("is_tensor ", ort_val.is_tensor())
print("array_equal ", np.array_equal(ort_val.numpy(), x_input))
providers = 'CUDAExecutionProvider' if S_DEVICE == "cuda" else 'CPUExecutionProvider'
print("providers ", providers)
ort_session = ort.InferenceSession(S_ONNX_MODEL_PATH, providers=[providers])  # gpu运行
ort_session.set_providers([providers])
outputs = ort_session.run(None, {s_input_name: ort_val})
print("sess env ", ort_session.get_providers())
print(type(outputs))
print(outputs[0])
'''
For example ['CUDAExecutionProvider', 'CPUExecutionProvider']
    means execute a node using CUDAExecutionProvider if capable, otherwise execute using CPUExecutionProvider.
'''

运行输出

None
graph Graph (
  %Input_0[FLOAT, 128x1x28x28]
) optional inputs with matching initializers (
  %param_L1_W[FLOAT, 128x2704]
  %param_L1_b[FLOAT, 128]
  %param_L2_W[FLOAT, 10x128]
  %param_L2_b[FLOAT, 10]
  %param_cov2_W[FLOAT, 16x1x3x3]
  %param_cov2_b[FLOAT, 16]
) {
  %Convolution2DFunction_0 = Conv[dilations = [1, 1], group = 1, kernel_shape = [3, 3], pads = [0, 0, 0, 0], strides = [1, 1]](%Input_0, %param_cov2_W, %param_cov2_b)
  %MaxPooling2D_0 = MaxPool[kernel_shape = [2, 2], pads = [0, 0, 1, 1], storage_order = 0, strides = [2, 2]](%Convolution2DFunction_0)
  %Reshape_0_const_shape = Constant[value = <Tensor>]()
  %Reshape_0 = Reshape(%MaxPooling2D_0, %Reshape_0_const_shape)
  %LinearFunction_0 = Gemm[alpha = 1, beta = 1, transA = 0, transB = 1](%Reshape_0, %param_L1_W, %param_L1_b)
  %ReLU_0 = Relu(%LinearFunction_0)
  %LinearFunction_1 = Gemm[alpha = 1, beta = 1, transA = 0, transB = 1](%ReLU_0, %param_L2_W, %param_L2_b)
  return %LinearFunction_1
}
input name  ['param_L1_W', 'param_L1_b', 'param_L2_W', 'param_L2_b', 'param_cov2_W', 'param_cov2_b', 'Input_0']
output name  ['LinearFunction_1']
val device  cuda
val shape  [128, 1, 28, 28]
val data type  tensor(float)
is_tensor  True
array_equal  True
providers  CUDAExecutionProvider
sess env  ['CUDAExecutionProvider', 'CPUExecutionProvider']
<class 'list'>
[[-2.0423372   4.5884066   0.36332878 ... -2.4472551   2.0819268
  -2.7261026 ]
 [ 8.305377   -6.7392306   2.3732197  ... -1.0024172   1.6413575
  -3.7483473 ]
 [-2.6331928   5.8743267  -0.6629743  ... -0.5096626   0.5064622
  -1.2187834 ]
 ...
 [ 0.39152578 -2.465724    1.177169   ... -3.6535776   0.03016166
  -1.6232893 ]
 [-2.476002   -0.02901424 -0.0378947  ...  5.504667   -0.8124644
   2.1768222 ]
 [-1.5162327  -2.5863512   2.1492455  ... -5.290888    1.1870652
  -2.5682375 ]]

你甚至不愿意Start的Github

ai_fast_handbook

posted @ 2022-04-03 21:53  Yumeka  阅读(162)  评论(0编辑  收藏  举报