mxnet反序列化: 由symbol到gluon
SymbolBlock : class mxnet.gluon.nn.
SymbolBlock
(outputs, inputs, params=None)
继承自类HybridBlock
1. Gluon 加载并定制预训练模型
symbolblock的作用是构建一个symbol block。通常在需要调用一部分预训练模型后进行其他处理时会有用。例如官方给出的提取预训练的alexnet的fc1和fc2层特征:
>>> # To extract the feature from fc1 and fc2 layers of AlexNet: >>> alexnet = gluon.model_zoo.vision.alexnet(pretrained=True, ctx=mx.cpu(), prefix='model_') # gluon模型 >>> inputs = mx.sym.var('data') >>> out = alexnet(inputs) # ⚠️这句话就将gluon模型序列化成为了symbol模型 >>> internals = out.get_internals() >>> print(internals.list_outputs()) # 这个打印出所有的层,供你选择想取出哪一层的特征 ['data', ..., 'model_dense0_relu_fwd_output', ..., 'model_dense1_relu_fwd_output', ...] >>> outputs = [internals['model_dense0_relu_fwd_output'], # fc1 internals['model_dense1_relu_fwd_output']] # fc2 >>> # Create SymbolBlock that shares parameters with alexnet >>> feat_model = gluon.SymbolBlock(outputs, inputs, params=alexnet.collect_params()) # 共享alexnet的特征,⚠️注意这个权重载入方式 >>> x = mx.nd.random.normal(shape=(16, 3, 224, 224)) >>> print(feat_model(x)) # 直接前向计算
2. Symbol 加载并定制预训练模型->symbol反序列化
上面这个例子是gluon的预训练模型,那如果你的预训练权重是自己训好的,例如自己训了一个resnet18模型,得到以下的权重:
此时可以这样载入symbol模型和权重:
import warnings with warnings.catch_warnings(): warnings.simplefilter("ignore") deserialized_net = gluon.nn.SymbolBlock.imports("new_net-symbol.json", ['data'], "new_net-0111.params", ctx=ctx) # symbol载入方式1
导入网络和参数,这样可以进行测试或者进一步训练。
但是如果只需要使用模型的其中一部分,比如只需要中间某层,或者再另外增加一些层, 这样直接导入就会比较复杂。正确的做法是下面的另一种symbol模型载入方式:
sym, arg_params, aux_params = mx.model.load_checkpoint("new_net",111) # symbol载入方式2, 后续的步骤就和第1小节类似了 layers = sym.get_internals() outputs = layers['hybridsequential0_dense1_fwd_output'] # 这个层的名字应该从这里找: layers = sym.get_internals().list_outputs() inputs = layers['data'] net = gluon.SymbolBlock(outputs, inputs) net.load_parameters('new_net-0111.params', ignore_extra=True, allow_missing=True) # ⚠️注意这里的权重加载方式不能用第1小节的了,因为第1小节的模型权重来自于gluon net的collect_params方法
这里net的权重载入用的是gluon方式的。
下面的例子给出了在预训练模型后又加入hybrid的gluon模型,这样做实现了symbol和gluon的结合。
可以利用gluon的方式来训练,而不是symbol的fit训练方式,更加灵活:
1 from __future__ import print_function 2 3 import mxnet as mx 4 import mxnet.ndarray as nd 5 from mxnet import nd, autograd, gluon, init 6 from mxnet.gluon.data.vision import transforms 7 from mxnet.gluon import nn 8 9 import numpy as np 10 11 # Use GPU if one exists, else use CPU 12 ctx = mx.gpu() if mx.context.num_gpus() else mx.cpu() 13 14 # MNIST images are 28x28. Total pixels in input layer is 28x28 = 784 15 num_inputs = 784 16 # Clasify the images into one of the 10 digits 17 num_outputs = 10 18 # 64 images in a batch 19 batch_size = 1 20 21 # Load the training data 22 train_data = gluon.data.DataLoader(gluon.data.vision.MNIST(train=True).transform_first(transforms.ToTensor()), 23 batch_size, shuffle=True) 24 25 # Build a simple convolutional network 26 def build_lenet(net): 27 with net.name_scope(): 28 # First convolution 29 net.add(gluon.nn.Conv2D(channels=10, kernel_size=3, activation='relu')) 30 net.add(gluon.nn.MaxPool2D(pool_size=2, strides=2)) 31 # Second convolution 32 net.add(gluon.nn.Conv2D(channels=10, kernel_size=3, activation='relu')) 33 net.add(gluon.nn.MaxPool2D(pool_size=2, strides=2)) 34 # Flatten the output before the fully connected layers 35 net.add(gluon.nn.Flatten()) 36 # First fully connected layers with 512 neurons 37 net.add(gluon.nn.Dense(16, activation="relu")) 38 # Second fully connected layer with as many neurons as the number of classes 39 net.add(gluon.nn.Dense(num_outputs)) 40 41 return net 42 43 # Train a given model using MNIST data 44 def train_model(model): 45 # Initialize the parameters with Xavier initializer 46 model.collect_params().initialize(mx.init.Xavier(), ctx=ctx) 47 # Use cross entropy loss 48 softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss() 49 # Use Adam optimizer 50 trainer = gluon.Trainer(model.collect_params(), 'adam', {'learning_rate': .001}) 51 52 # Train for one epoch 53 for epoch in range(1): 54 # Iterate through the images and labels in the training data 55 for batch_num, (data, label) in enumerate(train_data): 56 # get the images and labels 57 data = data.as_in_context(ctx) 58 label = label.as_in_context(ctx) 59 # Ask autograd to record the forward pass 60 with autograd.record(): 61 # Run the forward pass 62 output = model(data) 63 # Compute the loss 64 loss = softmax_cross_entropy(output, label) 65 # Compute gradients 66 # loss.backward() 67 # Update parameters 68 # trainer.step(data.shape[0]) 69 70 # Print loss once in a while 71 if batch_num % 10 == 0: 72 curr_loss = nd.mean(loss).asscalar() 73 print("Epoch: %d; Batch %d; Loss %f" % (epoch, batch_num, curr_loss)) 74 if batch_num >=100: 75 print('*'*10 + 'finished'+ '*'*10) 76 break 77 78 79 80 # import warnings 81 # with warnings.catch_warnings(): 82 # warnings.simplefilter("ignore") 83 # deserialized_net = gluon.nn.SymbolBlock.imports("new_net-symbol.json", ['data'], "new_net-0111.params", ctx=ctx) 84 85 86 87 sym, arg_params, aux_params = mx.model.load_checkpoint("new_net",111) 88 layers = sym.get_internals() 89 outputs = layers['hybridsequential0_dense1_fwd_output'] # 这个层的名字应该从这里找: layers = sym.get_internals().list_outputs() 90 inputs = layers['data'] 91 net = gluon.SymbolBlock(outputs, inputs) 92 net.load_parameters('new_net-0111.params', ignore_extra=True, allow_missing=True) # 注意这里的权重载入方式 93 94 95 class PretrainedNetwork(gluon.HybridBlock): 96 def __init__(self, pretrained_layer, **kwargs): 97 super(PretrainedNetwork, self).__init__(**kwargs) 98 with self.name_scope(): 99 self.pretrained_layer = pretrained_layer # (n, 4, 4, 128) 100 self.fc = nn.HybridSequential() 101 self.fc.add( 102 nn.Flatten(), 103 nn.Dense(256, activation='relu'), 104 nn.Dropout(rate=0.5), 105 nn.Dense(128) 106 ) 107 self.single_fc = nn.Dense(10) 108 109 def hybrid_forward(self, F, x): 110 x = self.pretrained_layer(x) # 之前处理的预训练模型 111 x = self.fc(x) # 后续接上自己额外定制的结构 112 y = self.single_fc(x) 113 114 return y 115 116 117 model = PretrainedNetwork(pretrained_layer = net) # 得到混合的模型 118 train_model(model) # 可以利用gluon的方式训练,而非module(symbol)的fit方式,更加灵活 119 120 # train_model(net) # 当然这个原始的也可以跑
打印看下这个混合模型的结构:
print(model) PretrainedNetwork( (pretrained_layer): SymbolBlock( <Symbol hybridsequential0_dense1_fwd> : 1 -> 1 # symbol block ) (fc): HybridSequential( (0): Flatten (1): Dense(None -> 256, Activation(relu)) # gluon模型,可以看到symbol和gluon结合 (2): Dropout(p = 0.5, axes=()) (3): Dense(None -> 128, linear) ) (single_fc): Dense(None -> 10, linear) )
打印看下这个混合模型的参数:
print(model.collect_params()) pretrainednetwork0_ ( Parameter hybridsequential0_conv0_weight (shape=None, dtype=<class 'numpy.float32'>) Parameter hybridsequential0_conv0_bias (shape=None, dtype=<class 'numpy.float32'>) Parameter hybridsequential0_conv1_weight (shape=None, dtype=<class 'numpy.float32'>) Parameter hybridsequential0_conv1_bias (shape=None, dtype=<class 'numpy.float32'>) Parameter hybridsequential0_dense0_weight (shape=None, dtype=<class 'numpy.float32'>) Parameter hybridsequential0_dense0_bias (shape=None, dtype=<class 'numpy.float32'>) Parameter hybridsequential0_dense1_weight (shape=None, dtype=<class 'numpy.float32'>) Parameter hybridsequential0_dense1_bias (shape=None, dtype=<class 'numpy.float32'>) # 前半部分symbolblock模型 Parameter pretrainednetwork0_dense0_weight (shape=(256, 0), dtype=float32) # 后半部分gluon模型 Parameter pretrainednetwork0_dense0_bias (shape=(256,), dtype=float32) Parameter pretrainednetwork0_dense1_weight (shape=(128, 0), dtype=float32) Parameter pretrainednetwork0_dense1_bias (shape=(128,), dtype=float32) Parameter pretrainednetwork0_dense2_weight (shape=(10, 0), dtype=float32) Parameter pretrainednetwork0_dense2_bias (shape=(10,), dtype=float32) )
Ref: 使用gluon接口读取symbol预训练模型finetune