gluon实现VGG
from __future__ import print_function import mxnet as mx from mxnet import nd, autograd from mxnet import gluon import numpy as np mx.random.seed(1) ctx = mx.cpu()
batch_size = 64 def transform(data, label): return nd.transpose(data.astype(np.float32), (2,0,1))/255, label.astype(np.float32) train_data = mx.gluon.data.DataLoader(mx.gluon.data.vision.MNIST(train=True, transform=transform), batch_size, shuffle=True) test_data = mx.gluon.data.DataLoader(mx.gluon.data.vision.MNIST(train=False, transform=transform), batch_size, shuffle=False)
from mxnet.gluon import nn def vgg_block(num_convs, channels): out = nn.Sequential() for _ in range(num_convs): out.add(nn.Conv2D(channels=channels, kernel_size=3, padding=1, activation='relu')) out.add(nn.MaxPool2D(pool_size=2, strides=2)) return out def vgg_stack(architecture): out = nn.Sequential() for (num_convs, channels) in architecture: out.add(vgg_block(num_convs, channels)) return out num_outputs = 10 architecture = ((1,64), (1,128), (2,256), (2,512)) net = nn.Sequential() with net.name_scope(): net.add(vgg_stack(architecture)) net.add(nn.Flatten()) net.add(nn.Dense(512, activation="relu")) net.add(nn.Dropout(.5)) net.add(nn.Dense(512, activation="relu")) net.add(nn.Dropout(.5)) net.add(nn.Dense(num_outputs))
net.collect_params().initialize(mx.init.Xavier(magnitude=2.24), ctx=ctx) trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': .05}) softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()
def accuracy(output, label): return nd.mean(output.argmax(axis=1)==label).asscalar() def evaluate_accuracy(data_iterator, net): acc = mx.metric.Accuracy() for d, l in data_iterator: data = d.as_in_context(ctx) label = l.as_in_context(ctx) output = net(data) predictions = nd.argmax(output, axis=1) acc.update(preds=predictions, labels=label) return acc.get()[1]
epochs = 5 smoothing_constant = .01 for epoch in range(5): train_loss = 0. train_acc = 0. for data, label in train_data: label = label.as_in_context(ctx) with autograd.record(): output = net(data) loss = softmax_cross_entropy(output, label) loss.backward() trainer.step(batch_size) train_loss += nd.mean(loss).asscalar() train_acc += accuracy(output, label) test_acc = evaluate_accuracy(test_data, net) print("Epoch %d. Loss: %f, Train acc %f, Test acc %f" % (epoch, train_loss/len(train_data),train_acc/len(train_data), test_acc))