softmax回归简单实现
Fashion-MNIST.py
from mxnet.gluon import data as gdata import sys import time from d2lzh import * mnist_train = gdata.vision.FashionMNIST(train=True) mnist_test = gdata.vision.FashionMNIST(train=False) # print(len(mnist_train)) # print(len(mnist_test)) # feature,label = mnist_train[0] # print(feature) # print(label) # X,y = mnist_train[0:9] # d2l.show_fashion_mnist(X,d2l.get_fashion_mnist_labels(y)) num_inputs = 784 num_outputs = 10 W = nd.random.normal(scale=0.01, shape=(num_inputs, num_outputs)) b = nd.zeros(num_outputs) W.attach_grad() b.attach_grad() batch_size = 256 transformer = gdata.vision.transforms.ToTensor() #学习率 lr = 0.03 #迭代周期 num_epochs = 3 if sys.platform.startswith('win'): num_workers=0 else: num_workers = 4 train_iter = gdata.DataLoader(mnist_train.transform_first(transformer), batch_size, shuffle=True, num_workers=num_workers) test_iter = gdata.DataLoader(mnist_test.transform_first(transformer), batch_size, shuffle=False, num_workers=num_workers) def net(X): return softmax(nd.dot(X.reshape((-1,num_inputs)),W)+b) #交叉熵损失函数 def cross_entropy(y_hat,y): return -nd.pick(y_hat,y).log() # start = time.time() # for X,y in train_iter: # continue # print(time.time()-start) # print(evaluate_accuracy(test_iter,net)) train_ch3(net, train_iter, test_iter, cross_entropy, num_epochs, batch_size,[W, b], lr)
d2lzh.py
from IPython import display from matplotlib import pyplot as plt from mxnet import autograd,nd import random import os #用矢量图显示 def use_svg_display(): display.set_matplotlib_formats('svg') #设置图的尺寸 def set_figsize(figsize=(3.5,2.5)): use_svg_display() plt.rcParams['figure.figsize'] = figsize #读取小批量数据 def data_iter(batch_size,features,labels): num_examples = len(features) indices = list(range(num_examples)) random.shuffle(indices)#样本读取顺序随机 for i in range(0,num_examples,batch_size): #选取从i到i+batch_size-1的元素 j = nd.array(indices[i:min(i+batch_size,num_examples)]) #返回第j个元素和标签 yield features.take(j),labels.take(j) #线性回归的矢量表达式 def linreg(X,w,b): return nd.dot(X,w) + b #定义损失函数 def squard_loss(y_hat,y): return (y_hat-y.reshape(y_hat.shape))**2/2 #定义优化算法 ''' 小批量随机梯度下降算法。它通过不断迭代模型参数来优 化损失函数。这⾥⾃动求梯度模块计算得来的梯度是⼀个批量样本的梯度和。我们将它除以批量 ⼤小来得到平均值。 ''' def sgd(params,lr,batch_size): for param in params: param[:] = param - lr * param.grad / batch_size #绘制 def paint(features,labels): set_figsize() plt.scatter(features[:,1].asnumpy(),labels.asnumpy(),1) plt.show() ''' Fashion-MNIST中一共包括了10个类别,分别为t-shirt(T恤)、trouser(裤子)、pullover(套衫)、dress(连衣裙)、coat(外套)、sandal(凉鞋)、shirt(衬衫)、sneaker(运动鞋)、bag(包)和ankle boot(短靴)。 以下函数可以将数值标签转成相应的文本标签。 ''' def get_fashion_mnist_labels(labels): text_labels = ['t-shirt', 'trouser', 'pullover', 'dress', 'coat', 'sandal', 'shirt', 'sneaker', 'bag', 'ankle boot'] return [text_labels[int(i)] for i in labels] ''' 可以在一行里画出多张图像和对应标签的函数 ''' def show_fashion_mnist(images,labels): use_svg_display() # 这里的_表示我们忽略(不使用)的变量 _,figs = plt.subplots(1,len(images),figsize=(12,12)) for f,img,lbl in zip(figs,images,labels): f.imshow(img.reshape((28,28)).asnumpy()) f.set_title(lbl) f.axes.get_xaxis().set_visible(False) f.axes.get_yaxis().set_visible(False) plt.show() def softmax(X): X_exp = X.exp() partition = X_exp.sum(axis=1,keepdims=True) return X_exp/partition #求准确率函数 def accuracy(y_hat,y): return (y_hat.argmax(axis=1) == y.astype('float32')).mean.asscalar() def evaluate_accuracy(data_iter,net): acc_sum,n=0.0,0 for X,y in data_iter: y = y.astype('float32') acc_sum += (net(X).argmax(axis=1) == y).sum().asscalar() n += y.size return acc_sum / n def train_ch3(net,train_iter,test_iter,loss,num_epochs,batch_size,params=None,lr=None,trainer=None): for epoch in range(num_epochs): train_l_sum,train_acc_sum,n = 0.0,0.0,0 for X,y in train_iter: with autograd.record(): y_hat = net(X) l = loss(y_hat,y).sum() l.backward() if trainer is None: sgd(params,lr,batch_size) else: trainer.step(batch_size) y = y.astype('float32') train_l_sum += l.asscalar() train_acc_sum += (y_hat.argmax(axis=1) == y).sum().asscalar() # print(y_hat) # print(y_hat.argmax(axis=1)) # print((y_hat.argmax(axis=1) == y)) # os.system("pause") n += y.size test_acc = evaluate_accuracy(test_iter, net) print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f'% (epoch + 1, train_l_sum / n, train_acc_sum / n, test_acc))