动手实现深度学习(8):基于计算图构建的神经网络
传送门: https://www.cnblogs.com/greentomlee/p/12314064.html
github: Leezhen2014: https://github.com/Leezhen2014/python_deep_learning
5.5 Softmax-with-Loss 层
SoftMax函数会将函数正则话以后在输出,比如在手写体识别 中,输出层一般是softmax。考虑到里面也使用了交叉熵(cross entroy error),所以命名为softmax-with-loss。
1 class SoftmaxWithLoss: 2 def __init__(self): 3 self.loss = None 4 self.y = None # softmax的输出 5 self.t = None # 标签函数 6 7 def forward(self, x, t): 8 self.t = t 9 self.y = softmax(x) 10 self.loss = cross_entropy_error(self.y, self.t) 11 12 return self.loss 13 14 def backward(self, dout=1): 15 batch_size = self.t.shape[0] 16 if self.t.size == self.y.size: # 处理one-hot 17 dx = (self.y - self.t) / batch_size 18 else: 19 dx = self.y.copy() 20 dx[np.arange(batch_size), self.t] -= 1 21 dx = dx / batch_size 22 23 return dx
5.6 基于计算图构建的神经网络
前几篇已经介绍了基于计算图的各种基本操作,这次将这些操作 组装一下:
1 # -*- coding: utf-8 -*- 2 # @File : two_layer_net_v2.py 3 # @Author: lizhen 4 # @Date : 2020/1/28 5 # @Desc : 这个是使用计算图的网络 6 import numpy as np 7 from src.common.layers import * 8 from src.common.gradient import numerical_gradient 9 from collections import OrderedDict 10 11 12 class TwoLayerNet: 13 14 def __init__(self, input_size, hidden_size, output_size, weight_init_std = 0.01): 15 # 初始化成员 16 self.params = {} 17 self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size) 18 self.params['b1'] = np.zeros(hidden_size) 19 self.params['W2'] = weight_init_std * np.random.randn(hidden_size, output_size) 20 self.params['b2'] = np.zeros(output_size) 21 22 # 网络层 23 self.layers = OrderedDict() 24 self.layers['Affine1'] = Affine(self.params['W1'], self.params['b1']) 25 self.layers['Relu1'] = Relu() 26 self.layers['Affine2'] = Affine(self.params['W2'], self.params['b2']) 27 28 self.lastLayer = SoftmaxWithLoss() 29 30 def predict(self, x): 31 for layer in self.layers.values(): 32 x = layer.forward(x) 33 34 return x 35 36 def loss(self, x, t): 37 y = self.predict(x) 38 return self.lastLayer.forward(y, t) 39 40 def accuracy(self, x, t): 41 y = self.predict(x) 42 y = np.argmax(y, axis=1) 43 if t.ndim != 1 : t = np.argmax(t, axis=1) 44 45 accuracy = np.sum(y == t) / float(x.shape[0]) 46 return accuracy 47 48 49 def numerical_gradient(self, x, t): 50 loss_W = lambda W: self.loss(x, t) 51 52 grads = {} 53 grads['W1'] = numerical_gradient(loss_W, self.params['W1']) 54 grads['b1'] = numerical_gradient(loss_W, self.params['b1']) 55 grads['W2'] = numerical_gradient(loss_W, self.params['W2']) 56 grads['b2'] = numerical_gradient(loss_W, self.params['b2']) 57 58 return grads 59 60 def gradient(self, x, t): 61 # forward 62 dout=self.loss(x, t) 63 64 # backward 65 # dout = 1 66 dout = self.lastLayer.backward(dout) 67 68 layers = list(self.layers.values()) 69 layers.reverse() 70 for layer in layers: 71 dout = layer.backward(dout) 72 73 # 更新 74 grads = {} 75 grads['W1'], grads['b1'] = self.layers['Affine1'].dW, self.layers['Affine1'].db 76 grads['W2'], grads['b2'] = self.layers['Affine2'].dW, self.layers['Affine2'].db 77 78 return grads
5.7 使用mnist数据集测试
本节的测试是为了测试简单层的书写是否符合逻辑。本次依旧是使用mnist数据集训练识别手写体。与第二篇所不同的是:本次使用的是基于计算图的逻辑实现。
1 # coding: utf-8 2 import sys, os 3 sys.path.append(os.pardir) 4 5 import numpy as np 6 from src.datasets.mnist import load_mnist 7 from src.test.two_layer_net_v2 import TwoLayerNet 8 9 10 (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True) 11 12 network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10) 13 14 iters_num = 10000 15 train_size = x_train.shape[0] 16 batch_size = 100 17 learning_rate = 0.1 18 19 train_loss_list = [] 20 train_acc_list = [] 21 test_acc_list = [] 22 23 iter_per_epoch = max(train_size / batch_size, 1) 24 25 for i in range(iters_num): 26 batch_mask = np.random.choice(train_size, batch_size) 27 x_batch = x_train[batch_mask] 28 t_batch = t_train[batch_mask] 29 30 grad = network.gradient(x_batch, t_batch) 31 32 # 更新 33 for key in ('W1', 'b1', 'W2', 'b2'): 34 network.params[key] -= learning_rate * grad[key] 35 36 loss = network.loss(x_batch, t_batch) 37 train_loss_list.append(loss) 38 39 if i % iter_per_epoch == 0: 40 train_acc = network.accuracy(x_train, t_train) 41 test_acc = network.accuracy(x_test, t_test) 42 train_acc_list.append(train_acc) 43 test_acc_list.append(test_acc) 44 print(train_acc, test_acc)
训练过程中的输出
train loss:0.933130708198716
train loss:0.8530083545338488
train loss:0.8060872314284586
=============== Final Test Accuracy ===============
test acc:0.9939
Saved Network Parameters!
我心匪石,不可转也。我心匪席,不可卷也。