神经网络与深度学习（邱锡鹏）编程练习4 FNN 简单神经网络 Jupyter导出版 numpy

GitHub - nndl/nndl-exercise-ans: Solutions for nndl/exercise

答案中，x的定义需要前置。否则会提示x未定义。

可能是Python、tf版本不同吧。

相信原有答案实验测试没问题。两年过去了，有些小问题需要微调。

准备数据

https://github.com/nndl/nndl-exercise-ans/blob/master/chap4_simple_neural_network/tutorial_minst_fnn-numpy.ipynb

import os
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, optimizers, datasets

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'  # or any {'0', '1', '2'}

def mnist_dataset():
    (x, y), (x_test, y_test) = datasets.mnist.load_data()
    #normalize
    x = x/255.0
    x_test = x_test/255.0
    
    return (x, y), (x_test, y_test)

Demo numpy based auto differentiation

import numpy as np

class Matmul:
    def __init__(self):
        self.mem = {}
        
    def forward(self, x, W):
        h = np.matmul(x, W)
        self.mem={'x': x, 'W':W}
        return h
    
    def backward(self, grad_y):
        '''
        x: shape(N, d)
        w: shape(d, d')
        grad_y: shape(N, d')
        '''
        x = self.mem['x']
        W = self.mem['W']
        
        ####################
        '''计算矩阵乘法的对应的梯度'''
        grad_x = np.matmul(grad_y, W.T) # shape(N, b)
        grad_W = np.matmul(x.T, grad_y)
        ####################
        return grad_x, grad_W


class Relu:
    def __init__(self):
        self.mem = {}
        
    def forward(self, x):
        self.mem['x']=x
        return np.where(x > 0, x, np.zeros_like(x))
    
    def backward(self, grad_y):
        '''
        grad_y: same shape as x
        '''
        ####################
        '''计算relu 激活函数对应的梯度'''
        x = self.mem['x']
        grad_x = (x > 0).astype(np.float32) * grad_y
        ####################
        return grad_x
    


class Softmax:
    '''
    softmax over last dimention
    '''
    def __init__(self):
        self.epsilon = 1e-12
        self.mem = {}
        
    def forward(self, x):
        '''
        x: shape(N, c)
        '''
        x_exp = np.exp(x)
        partition = np.sum(x_exp, axis=1, keepdims=True)
        out = x_exp/(partition+self.epsilon)
        
        self.mem['out'] = out
        self.mem['x_exp'] = x_exp
        return out
    
    def backward(self, grad_y):
        '''
        grad_y: same shape as x
        '''
        s = self.mem['out']
        sisj = np.matmul(np.expand_dims(s,axis=2), np.expand_dims(s, axis=1)) # (N, c, c)
        g_y_exp = np.expand_dims(grad_y, axis=1)
        tmp = np.matmul(g_y_exp, sisj) #(N, 1, c)
        tmp = np.squeeze(tmp, axis=1)
        tmp = -tmp+grad_y*s 
        return tmp
    
class Log:
    '''
    softmax over last dimention
    '''
    def __init__(self):
        self.epsilon = 1e-12
        self.mem = {}
        
    def forward(self, x):
        '''
        x: shape(N, c)
        '''
        out = np.log(x+self.epsilon)
        
        self.mem['x'] = x
        return out
    
    def backward(self, grad_y):
        '''
        grad_y: same shape as x
        '''
        x = self.mem['x']
        
        return 1./(x+1e-12) * grad_y

Gradient check

# import tensorflow as tf

# x = np.random.normal(size=[5, 6])
# W = np.random.normal(size=[6, 4])
# aa = Matmul()
# out = aa.forward(x, W) # shape(5, 4)
# grad = aa.backward(np.ones_like(out))
# print (grad)

# with tf.GradientTape() as tape:
#     x, W = tf.constant(x), tf.constant(W)
#     tape.watch(x)
#     y = tf.matmul(x, W)
#     loss = tf.reduce_sum(y)
#     grads = tape.gradient(loss, x)
#     print (grads)

# import tensorflow as tf

# x = np.random.normal(size=[5, 6])
# aa = Relu()
# out = aa.forward(x) # shape(5, 4)
# grad = aa.backward(np.ones_like(out))
# print (grad)

# with tf.GradientTape() as tape:
#     x= tf.constant(x)
#     tape.watch(x)
#     y = tf.nn.relu(x)
#     loss = tf.reduce_sum(y)
#     grads = tape.gradient(loss, x)
#     print (grads)

# import tensorflow as tf
# x = np.random.normal(size=[5, 6], scale=5.0, loc=1)
# label = np.zeros_like(x)
# label[0, 1]=1.
# label[1, 0]=1
# label[1, 1]=1
# label[2, 3]=1
# label[3, 5]=1
# label[4, 0]=1
# print(label)
# aa = Softmax()
# out = aa.forward(x) # shape(5, 6)
# grad = aa.backward(label)
# print (grad)

# with tf.GradientTape() as tape:
#     x= tf.constant(x)
#     tape.watch(x)
#     y = tf.nn.softmax(x)
#     loss = tf.reduce_sum(y*label)
#     grads = tape.gradient(loss, x)
#     print (grads)

# import tensorflow as tf

# x = np.random.normal(size=[5, 6])
# aa = Log()
# out = aa.forward(x) # shape(5, 4)
# grad = aa.backward(label)
# print (grad)

# with tf.GradientTape() as tape:
#     x= tf.constant(x)
#     tape.watch(x)
#     y = tf.math.log(x)
#     loss = tf.reduce_sum(y*label)
#     grads = tape.gradient(loss, x)
#     print (grads)

Final Gradient Check

import tensorflow as tf

x = np.random.normal(size=[5, 6])
W1 = np.random.normal(size=[6, 5])
W2 = np.random.normal(size=[5, 6])

label = np.zeros_like(x)
label[0, 1]=1.
label[1, 0]=1
label[2, 3]=1
label[3, 5]=1
label[4, 0]=1

mul_h1 = Matmul()
mul_h2 = Matmul()
relu = Relu()
softmax = Softmax()
log = Log()

h1 = mul_h1.forward(x, W1) # shape(5, 4)
h1_relu = relu.forward(h1)
h2 = mul_h2.forward(h1_relu, W2)
h2_soft = softmax.forward(h2)
h2_log = log.forward(h2_soft)


h2_log_grad = log.backward(label)
h2_soft_grad = softmax.backward(h2_log_grad)
h2_grad, W2_grad = mul_h2.backward(h2_soft_grad)
h1_relu_grad = relu.backward(h2_grad)
h1_grad, W1_grad = mul_h1.backward(h1_relu_grad)

print(h2_log_grad)
print('--'*20)
# print(W2_grad)

with tf.GradientTape() as tape:
    x, W1, W2, label = tf.constant(x), tf.constant(W1), tf.constant(W2), tf.constant(label)
    tape.watch(W1)
    tape.watch(W2)
    h1 = tf.matmul(x, W1)
    h1_relu = tf.nn.relu(h1)
    h2 = tf.matmul(h1_relu, W2)
    prob = tf.nn.softmax(h2)
    log_prob = tf.math.log(prob)
    loss = tf.reduce_sum(label * log_prob)
    grads = tape.gradient(loss, [prob])
    print (grads[0].numpy())

[[0.00000000e+00 5.93789528e+00 0.00000000e+00 0.00000000e+00
  0.00000000e+00 0.00000000e+00]
 [6.65507591e+01 0.00000000e+00 0.00000000e+00 0.00000000e+00
  0.00000000e+00 0.00000000e+00]
 [0.00000000e+00 0.00000000e+00 0.00000000e+00 1.65233390e+04
  0.00000000e+00 0.00000000e+00]
 [0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
  0.00000000e+00 7.02646772e+02]
 [2.88022415e+01 0.00000000e+00 0.00000000e+00 0.00000000e+00
  0.00000000e+00 0.00000000e+00]]
----------------------------------------
[[0.00000000e+00 5.93789528e+00 0.00000000e+00 0.00000000e+00
  0.00000000e+00 0.00000000e+00]
 [6.65507591e+01 0.00000000e+00 0.00000000e+00 0.00000000e+00
  0.00000000e+00 0.00000000e+00]
 [0.00000000e+00 0.00000000e+00 0.00000000e+00 1.65233393e+04
  0.00000000e+00 0.00000000e+00]
 [0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
  0.00000000e+00 7.02646772e+02]
 [2.88022415e+01 0.00000000e+00 0.00000000e+00 0.00000000e+00
  0.00000000e+00 0.00000000e+00]]

建立模型

class myModel:
    def __init__(self):
        
        self.W1 = np.random.normal(size=[28*28+1, 100])
        self.W2 = np.random.normal(size=[100, 10])
        
        self.mul_h1 = Matmul()
        self.mul_h2 = Matmul()
        self.relu = Relu()
        self.softmax = Softmax()
        self.log = Log()
        
        
    def forward(self, x):
        x = x.reshape(-1, 28*28)
        bias = np.ones(shape=[x.shape[0], 1])
        x = np.concatenate([x, bias], axis=1)
        
        self.h1 = self.mul_h1.forward(x, self.W1) # shape(5, 4)
        self.h1_relu = self.relu.forward(self.h1)
        self.h2 = self.mul_h2.forward(self.h1_relu, self.W2)
        self.h2_soft = self.softmax.forward(self.h2)
        self.h2_log = self.log.forward(self.h2_soft)
            
    def backward(self, label):
        self.h2_log_grad = self.log.backward(-label)
        self.h2_soft_grad = self.softmax.backward(self.h2_log_grad)
        self.h2_grad, self.W2_grad = self.mul_h2.backward(self.h2_soft_grad)
        self.h1_relu_grad = self.relu.backward(self.h2_grad)
        self.h1_grad, self.W1_grad = self.mul_h1.backward(self.h1_relu_grad)
        
model = myModel()

计算 loss

def compute_loss(log_prob, labels):
     return np.mean(np.sum(-log_prob*labels, axis=1))
    

def compute_accuracy(log_prob, labels):
    predictions = np.argmax(log_prob, axis=1)
    truth = np.argmax(labels, axis=1)
    return np.mean(predictions==truth)

def train_one_step(model, x, y):
    model.forward(x)
    model.backward(y)
    model.W1 -= 1e-5* model.W1_grad
    model.W2 -= 1e-5* model.W2_grad
    loss = compute_loss(model.h2_log, y)
    accuracy = compute_accuracy(model.h2_log, y)
    return loss, accuracy

def test(model, x, y):
    model.forward(x)
    loss = compute_loss(model.h2_log, y)
    accuracy = compute_accuracy(model.h2_log, y)
    return loss, accuracy

实际训练

train_data, test_data = mnist_dataset()
train_label = np.zeros(shape=[train_data[0].shape[0], 10])
test_label = np.zeros(shape=[test_data[0].shape[0], 10])
train_label[np.arange(train_data[0].shape[0]), np.array(train_data[1])] = 1.
test_label[np.arange(test_data[0].shape[0]), np.array(test_data[1])] = 1.

for epoch in range(50):
    loss, accuracy = train_one_step(model, train_data[0], train_label)
    print('epoch', epoch, ': loss', loss, '; accuracy', accuracy)
loss, accuracy = test(model, test_data[0], test_label)

print('test loss', loss, '; accuracy', accuracy)

epoch 0 : loss 24.120447550872193 ; accuracy 0.10506666666666667
epoch 1 : loss 23.556807793137484 ; accuracy 0.12285
epoch 2 : loss 23.048235414054865 ; accuracy 0.14381666666666668
epoch 3 : loss 22.681964782819378 ; accuracy 0.15748333333333334
epoch 4 : loss 22.31934050264306 ; accuracy 0.16823333333333335
epoch 5 : loss 21.816533326873117 ; accuracy 0.17876666666666666
epoch 6 : loss 21.290418972328762 ; accuracy 0.1923
epoch 7 : loss 20.864675481620193 ; accuracy 0.21083333333333334
epoch 8 : loss 20.40118101318322 ; accuracy 0.22231666666666666
epoch 9 : loss 19.688677507169867 ; accuracy 0.24261666666666667
epoch 10 : loss 18.250652141064315 ; accuracy 0.28105
epoch 11 : loss 17.413139190206394 ; accuracy 0.3168166666666667
epoch 12 : loss 15.905288691015508 ; accuracy 0.3589
epoch 13 : loss 14.97563324371492 ; accuracy 0.4026
epoch 14 : loss 13.90615214978969 ; accuracy 0.4339166666666667
epoch 15 : loss 12.928732933159907 ; accuracy 0.4564666666666667
epoch 16 : loss 12.316841190099938 ; accuracy 0.4794
epoch 17 : loss 11.738809927940636 ; accuracy 0.5045
epoch 18 : loss 10.766245553596033 ; accuracy 0.54675
epoch 19 : loss 10.241306013662607 ; accuracy 0.5598666666666666
epoch 20 : loss 10.17684778741172 ; accuracy 0.57345
epoch 21 : loss 9.69722437744703 ; accuracy 0.5863
epoch 22 : loss 9.583569462976824 ; accuracy 0.5949833333333333
epoch 23 : loss 9.412803692259162 ; accuracy 0.5995
epoch 24 : loss 9.32294046081438 ; accuracy 0.6035833333333334
epoch 25 : loss 9.27861330368951 ; accuracy 0.6066666666666667
epoch 26 : loss 8.635697269031605 ; accuracy 0.6255666666666667
epoch 27 : loss 8.270694241139738 ; accuracy 0.6350166666666667
epoch 28 : loss 7.981981911444959 ; accuracy 0.6392666666666666
epoch 29 : loss 7.7815894381357475 ; accuracy 0.6464666666666666
epoch 30 : loss 7.319034580141721 ; accuracy 0.6605166666666666
epoch 31 : loss 6.990317954569284 ; accuracy 0.673
epoch 32 : loss 6.676015560368183 ; accuracy 0.6792166666666667
epoch 33 : loss 6.338457483699933 ; accuracy 0.6994166666666667
epoch 34 : loss 6.18231834020112 ; accuracy 0.6971666666666667
epoch 35 : loss 5.8601951694351895 ; accuracy 0.721
epoch 36 : loss 5.5723800515554 ; accuracy 0.72185
epoch 37 : loss 5.345872925131976 ; accuracy 0.7413166666666666
epoch 38 : loss 5.1037465470389805 ; accuracy 0.74295
epoch 39 : loss 4.998503318083353 ; accuracy 0.7547666666666667
epoch 40 : loss 4.7739352219431614 ; accuracy 0.7584333333333333
epoch 41 : loss 4.684302398248045 ; accuracy 0.7667
epoch 42 : loss 4.528539225806665 ; accuracy 0.7697833333333334
epoch 43 : loss 4.430669532636246 ; accuracy 0.7773
epoch 44 : loss 4.299548004077121 ; accuracy 0.7803333333333333
epoch 45 : loss 4.1827985220379515 ; accuracy 0.7884333333333333
epoch 46 : loss 4.066019079244932 ; accuracy 0.7913333333333333
epoch 47 : loss 3.9536817935945083 ; accuracy 0.7982333333333334
epoch 48 : loss 3.867862412250489 ; accuracy 0.7999333333333334
epoch 49 : loss 3.779043032271536 ; accuracy 0.80545
test loss 3.4802052462670967 ; accuracy 0.819

posted on 2022-06-04 11:12 HBU_DAVID 阅读(911) 评论(0) 编辑收藏举报

刷新页面返回顶部

Notebook

神经网络与深度学习（邱锡鹏）编程练习4 FNN 简单神经网络 Jupyter导出版 numpy

准备数据

Demo numpy based auto differentiation

Gradient check

Final Gradient Check

建立模型

计算 loss

实际训练

导航