mxnet 学习

1. kv = mx.kvstore.create()

kvstore主要是解决你的梯度更新是在cpu进行还是gpu进行

kv = mx.kvstore.create("device") ,表示在GPU上计算梯度和更新权重

kv=mx.kvstore.create("local"),表示在cpu上更新

kv=mx.kvstore.create("dist_device_sync"),表示分布式训练

2. 模型载入

    sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch)
    print(sym)
    # print(arg_params)
    # print(aux_params)
 
    # 提取中间某层输出帖子特征层作为输出
    all_layers = sym.get_internals()
    print(all_layers)
    sym = all_layers['fc1_output']
 
    # 重建模型
    model = mx.mod.Module(symbol=sym, label_names=None)
    model.bind(for_training=False, data_shapes=[('data', (1, 3, 112, 112))])
    model.set_params(arg_params, aux_params)

.params是二进制参数文件，.json是文本网络结构文件。训练好的模型参数是用.params文件保存，网络结构是用.json文件保存。

ref：

MXNet/Gluon 中网络和参数的存取方式

mxnet加载模型并进行前向推断、MXNet学习笔记——1 inference全过程

3. mxnet 模型保存与载入

  1 from __future__ import print_function
  2 
  3 import mxnet as mx
  4 import mxnet.ndarray as nd
  5 from mxnet import nd, autograd, gluon
  6 from mxnet.gluon.data.vision import transforms
  7 
  8 import numpy as np
  9 
 10 # Use GPU if one exists, else use CPU
 11 ctx = mx.gpu() if mx.context.num_gpus() else mx.cpu()
 12 
 13 # MNIST images are 28x28. Total pixels in input layer is 28x28 = 784
 14 num_inputs = 784
 15 # Clasify the images into one of the 10 digits
 16 num_outputs = 10
 17 # 64 images in a batch
 18 batch_size = 2
 19 
 20 # Load the training data
 21 train_data = gluon.data.DataLoader(gluon.data.vision.MNIST(train=True).transform_first(transforms.ToTensor()),
 22                                    batch_size, shuffle=True)
 23 
 24 # Build a simple convolutional network
 25 def build_lenet(net):
 26     with net.name_scope():
 27         # First convolution
 28         net.add(gluon.nn.Conv2D(channels=10, kernel_size=3, activation='relu'))
 29         net.add(gluon.nn.MaxPool2D(pool_size=2, strides=2))
 30         # Second convolution
 31         net.add(gluon.nn.Conv2D(channels=10, kernel_size=3, activation='relu'))
 32         net.add(gluon.nn.MaxPool2D(pool_size=2, strides=2))
 33         # Flatten the output before the fully connected layers
 34         net.add(gluon.nn.Flatten())
 35         # First fully connected layers with 512 neurons
 36         net.add(gluon.nn.Dense(16, activation="relu"))
 37         # Second fully connected layer with as many neurons as the number of classes
 38         net.add(gluon.nn.Dense(num_outputs))
 39 
 40         return net
 41 
 42 # Train a given model using MNIST data
 43 def train_model(model):
 44     # Initialize the parameters with Xavier initializer
 45     model.collect_params().initialize(mx.init.Xavier(), ctx=ctx)
 46     # Use cross entropy loss
 47     softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()
 48     # Use Adam optimizer
 49     trainer = gluon.Trainer(model.collect_params(), 'adam', {'learning_rate': .001})
 50 
 51     # Train for one epoch
 52     for epoch in range(1):
 53         # Iterate through the images and labels in the training data
 54         for batch_num, (data, label) in enumerate(train_data):
 55             # get the images and labels
 56             data = data.as_in_context(ctx)
 57             label = label.as_in_context(ctx)
 58             # Ask autograd to record the forward pass
 59             with autograd.record():
 60                 # Run the forward pass
 61                 output = model(data)
 62                 # Compute the loss
 63                 loss = softmax_cross_entropy(output, label)
 64             # Compute gradients
 65             # loss.backward()
 66             # Update parameters
 67             # trainer.step(data.shape[0])
 68 
 69             # Print loss once in a while
 70             if batch_num % 10 == 0:
 71                 curr_loss = nd.mean(loss).asscalar()
 72                 print("Epoch: %d; Batch %d; Loss %f" % (epoch, batch_num, curr_loss))
 73             if batch_num >=100:
 74                 print('*'*10 + 'finished'+ '*'*10)
 75                 break
 76 
 77 
 78 """
 79 # way1
 80 # net = build_lenet(gluon.nn.Sequential())
 81 # train_model(net)
 82 
 83 # save model
 84 # file_name = "net.params"
 85 # net.save_parameters(file_name)
 86 
 87 # # load model
 88 # new_net = build_lenet(gluon.nn.Sequential())
 89 # new_net.load_parameters(file_name, ctx=ctx)
 90 #
 91 # x = nd.random.uniform(shape=(1,1,28,28),ctx=mx.cpu())
 92 # print(new_net().shape)
 93 
 94 
 95 """
 96 
 97 
 98 """
 99 # way 2-1
100 # net = build_lenet(gluon.nn.HybridSequential())
101 # net.hybridize()
102 # train_model(net)
103 #
104 #
105 # net.export("new_net", epoch=111)
106 #
107 # import warnings
108 # with warnings.catch_warnings():
109 #     warnings.simplefilter("ignore")
110 #     deserialized_net = gluon.nn.SymbolBlock.imports("new_net-symbol.json", ['data'], "new_net-0111.params", ctx=ctx)
111 #
112 # x = nd.random.uniform(shape=(1,1,28,28),ctx=mx.cpu())
113 # print(net(x).shape)
114 
115 """
116 
117 
118 
119 """
120 
121 # way 2-2
122 # mxnet.mod.Module方法恢复模型，load_checkpoint
123 syms_, arg_params, aux_params = mx.model.load_checkpoint("new_net",111)
124 
125 # label_names设置为None，否则bind时会有warning
126 mod = mx.mod.Module(symbol=syms_,context=mx.gpu(),
127                     data_names=["data"], label_names=None)
128 mod.bind(for_training=False,data_shapes=[("data",(32,1,28,28))])
129 # 设定模型参数
130 mod.set_params(arg_params,aux_params,allow_missing=True)
131 
132 X = nd.random.uniform(shape=(3,1,28,28),ctx=mx.gpu(0))
133 x_iter = mx.io.NDArrayIter(X,batch_size=1)
134 
135 mod.predict(x_iter)
136 
137 # mxnet.mod.Module方法恢复模型，load
138 net = mx.mod.Module.load("new_net", 111)
139 net.bind(for_training=False, data_shapes=[("data",(32,1,28,28))])
140 
141 """

注意方式2（有一种保存和两种载入方式）: hybridize模型是指gluon模型调用hybridize函数后的模型。gluon调用hybridize()后，经过前向推导一次，可以通过export函数保存符号式程序和模型参数到硬盘，得到2个文件：finetune_net-symbol.json | finetune_net-0000.params。

方式2-1 保存后给出了第一种载入方式 gluon.SymbolBlock.imports，方式2-2 给出了第二种载入方式 mxnet.mod.Module 。

参考：知乎、官网、MXNet——symbol

4.mxnet视频读取