神经网络相关（4）-反向传播的实现

反向传播的实现--回归

%matplotlib inline

import numpy as np
import matplotlib.pyplot as plt

input_data = np.arange(0,np.pi*2,0.1)
correct_data = np.sin(input_data)
input_data = (input_data-np.pi)/np.pi #收敛
n_data = len(correct_data)

n_in = 1
n_mid = 3
n_out = 1

wb_width = 0.01
eta = 0.1 #学习系数
epoch = 2001
interval = 200

class MiddleLayer:
    def __init__(self,n_upper,n): #接收上层网络神经元的数量和本层的
        self.w = wb_width * np.random.randn(n_upper,n)
        self.b = wb_width * np.random.randn(n)
        
    def forward(self,x):
        self.x = x
        u = np.dot(x,self.w) + self.b
        self.y = 1/(1+np.exp(-u))   #使用sigmoid
        
    def backward(self,grad_y):
        delta = grad_y * (1-self.y)* self.y  #sigmoid 的微分
        self.grad_w = np.dot(self.x.T,delta)
        self.grad_b = np.sum(delta,axis=0)
        self.grad_x = np.dot(delta,self.w.T)
        
    def update(self,eta):
        self.w -= eta*self.grad_w
        self.b -= eta*self.grad_b 

class OutputLayer:
    def __init__(self,n_upper,n):
        self.w = wb_width * np.random.randn(n_upper,n) #矩阵 用randn生成一维数组，符合正态分布  wb_width是正态分布的分散程度
        self.b = wb_width * np.random.randn(n) #向量  设置成随机数的原因？？？
        
    def forward(self,x):
        self.x = x
        u = np.dot(x,self.w) + self.b
        self.y = u   #恒等函数
        
    def backward(self,t):
        delta = self.y -t  
        self.grad_w = np.dot(self.x.T,delta)
        self.grad_b = np.sum(delta,axis=0)
        self.grad_x = np.dot(delta,self.w.T)
        
    def update(self,eta):
        self.w -= eta*self.grad_w  #减就是基于随机梯度下降法！
        self.b -= eta*self.grad_b 
        
#各网络层的初始化

middle_layer = MiddleLayer(n_in,n_mid)
output_layer = OutputLayer(n_mid,n_out)
        
for i in range(epoch):
    index_random = np.arange(n_data)  #打乱索引值 这里也是基于随机梯度下降法
    np.random.shuffle(index_random)
    
    #用于结果的显示
    total_error = 0
    plot_x = []
    plot_y = []

    for idx in index_random:
        x = input_data[idx:idx+1]
        t = correct_data[idx:idx+1]
        #正向传播
        middle_layer.forward(x.reshape(1,1)) #输入开始
        output_layer.forward(middle_layer.y)
        #反向传播
        output_layer.backward(t.reshape(1,1)) #正确答案开始
        middle_layer.backward(output_layer.grad_x)
        
        middle_layer.update(eta)
        output_layer.update(eta)
        
        if i%interval==0:
            y = output_layer.y.reshape(-1) #还原成向量
            total_error += 1.0/2.0*np.sum(np.square(y-t)) #损失函数用平方和误差
            plot_x.append(x)
            plot_y.append(y)
        
    if i%interval==0:
        plt.plot(input_data,correct_data,linestyle="dashed")
        plt.scatter(plot_x,plot_y,marker="+")
        plt.show()
        
        print("Epoch:"+str(i)+"/"+str(epoch),"Error:"+str(total_error/n_data))

中间层的神经元数量对结果的影响：

太少，表现不足；

太多，计算量增加，可能会过拟合

反向传播的实现--分类

%matplotlib inline

import numpy as np
import matplotlib.pyplot as plt

X = np.arange(-1.0,1.1,0.1)
Y = np.arange(-1.0,1.1,0.1)

input_data = []
correct_data = []

for x in X:
    for y in Y:
        input_data.append([x,y])
        if y < np.sin(np.pi*x):
            correct_data.append([0,1]) #下方
        else:
            correct_data.append([1,0]) #上方

n_data = len(correct_data)

input_data = np.array(input_data)
correct_data = np.array(correct_data)

n_in = 2
n_mid = 6
n_out = 2

wb_width = 0.01
eta = 0.1 #学习系数
epoch = 101
interval = 10

class MiddleLayer:
    def __init__(self,n_upper,n): #接收上层网络神经元的数量和本层的
        self.w = wb_width * np.random.randn(n_upper,n)
        self.b = wb_width * np.random.randn(n)
        
    def forward(self,x):
        self.x = x
        u = np.dot(x,self.w) + self.b
        self.y = 1/(1+np.exp(-u))   #使用sigmoid
        
    def backward(self,grad_y):
        delta = grad_y * (1-self.y)* self.y  #sigmoid 的微分
        self.grad_w = np.dot(self.x.T,delta)
        self.grad_b = np.sum(delta,axis=0)
        self.grad_x = np.dot(delta,self.w.T)
        
    def update(self,eta):
        self.w -= eta*self.grad_w
        self.b -= eta*self.grad_b 

class OutputLayer:
    def __init__(self,n_upper,n):
        self.w = wb_width * np.random.randn(n_upper,n) #矩阵 用randn生成一维数组，符合正态分布  wb_width是正态分布的分散程度
        self.b = wb_width * np.random.randn(n) #向量  设置成随机数的原因？？？
        
    def forward(self,x):
        self.x = x
        u = np.dot(x,self.w) + self.b
        self.y = np.exp(u)/np.sum(np.exp(u),axis=1,keepdims=True)   #SoftMax
        
    def backward(self,t):
        delta = self.y -t  
        self.grad_w = np.dot(self.x.T,delta)
        self.grad_b = np.sum(delta,axis=0)
        self.grad_x = np.dot(delta,self.w.T)
        
    def update(self,eta):
        self.w -= eta*self.grad_w  #减就是基于随机梯度下降法！
        self.b -= eta*self.grad_b 
        
#各网络层的初始化

middle_layer = MiddleLayer(n_in,n_mid)
output_layer = OutputLayer(n_mid,n_out)

sin_data = np.sin(np.pi*X) #用于对结果的验证
for i in range(epoch):
    index_random = np.arange(n_data)  #打乱索引值 这里也是基于随机梯度下降法
    np.random.shuffle(index_random)
    
    #用于结果的显示
    total_error = 0
    x_1 = []
    y_1 = []
    x_2 = []
    y_2 = []

    for idx in index_random:
        x = input_data[idx]
        t = correct_data[idx]
        #正向传播
        middle_layer.forward(x.reshape(1,2)) #输入开始
        output_layer.forward(middle_layer.y)
        #反向传播
        output_layer.backward(t.reshape(1,2)) #正确答案开始
        middle_layer.backward(output_layer.grad_x)
        
        middle_layer.update(eta)
        output_layer.update(eta)
        
        if i%interval==0:
            y = output_layer.y.reshape(-1) #还原成向量
            total_error += -np.sum(t*np.log(y+1e-7)) #损失函数用交叉熵误差
            if y[0]>y[1]:
                x_1.append(x[0])
                y_1.append(x[1])
            else:
                x_2.append(x[0])
                y_2.append(x[1])
        
    if i%interval==0:
        plt.plot(X,sin_data,linestyle="dashed")
        plt.scatter(x_1,y_1,marker="+")
        plt.scatter(x_2,y_2,marker="x")
        plt.show()
        
        print("Epoch:"+str(i)+"/"+str(epoch),"Error:"+str(total_error/n_data))