机器学习-自适应学习率和损失函数的反向传播神经网络BPNN

问题背景:多分类问题,输入参数为8组特征值,采用Excel表格导入,输出参数为5种不同的分类结果。数据量为1000组左右,训练组与测试组保持为7:3。

 

BP神经网络模型:隐藏层计算为10层,达到最大准确率,且时间较短。

 

学习率更新公式:

损失函数更新公式:

有几个超参数需要提前设置好,根据自己的问题背景环境。

import numpy as np
import datetime
import xlrd
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve

def noramlization(data):
    minVals = data.min(0)
    maxVals = data.max(0)
    ranges = maxVals - minVals
    normData = (data - minVals)/ranges
    return normData

# 0.读入数据
def load_excel(path):
    result_array=[]
    # 读取初始数据
    data = xlrd.open_workbook(path)
    table = data.sheet_by_index(0)
    # table.nrows表示总行数
    for i in range(table.nrows):
        # 读取每行数据,保存在line里面,line是list
        line = table.row_values(i)
        # 将line加入到result_array中,result_array是二维list
        result_array.append(line)
    # 将result_array从二维list变成数组
    result_array = np.array(result_array)
    return result_array


# 1.初始化参数
def initialize_parameters(n_x, n_h, n_y):
    np.random.seed(2)

    # 权重和偏置矩阵
    w1 = np.random.rand(n_h, n_x) * 0.1
    #b1 = np.random.rand(n_h, 1)
    b1 = np.zeros(shape=(n_h, 1))
    w2 = np.random.rand(n_y, n_h) * 0.1
    #b2 = np.random.rand(n_y, 1)
    b2 = np.zeros(shape=(n_y, 1))

    # 通过字典存储参数
    parameters = {'w1': w1, 'b1': b1, 'w2': w2, 'b2': b2}
    return parameters

#  更新权重和阈值
def update_temp(parameters):
    w1 = parameters['w1']
    b1 = parameters['b1']
    w2 = parameters['w2']
    b2 = parameters['b2']

    temp = {'w1': w1, 'b1': b1, 'w2': w2, 'b2': b2}

    return temp

def update_wb(parameters,temp):
    w1 = parameters['w1']
    b1 = parameters['b1']
    w2 = parameters['w2']
    b2 = parameters['b2']
    temp_w1 = temp['w1']
    temp_b1 = temp['b1']
    temp_w2 = temp['w2']
    temp_b2 = temp['b2']
    m1 = w1- temp_w1
    n1 = b1- temp_b1
    m2 = w2 - temp_w2
    n2 = b2 - temp_b2
    update_para = {'m1': m1, 'n1': n1, 'm2': m2, 'n2': n2}

    return update_para

# 2.前向传播
def forward_propagation(X, parameters):
    w1 = parameters['w1']
    b1 = parameters['b1']
    w2 = parameters['w2']
    b2 = parameters['b2']

    # 通过前向传播来计算a2
    z1 = np.dot(w1, X) + b1     # 这个地方需注意矩阵加法:虽然(w1*X)和b1的维度不同,但可以相加
    a1 = np.tanh(z1)           # 使用tanh作为第一层的激活函数
    z2 = np.dot(w2, a1) + b2

    # exp = np.exp(z2)
    # sum_exp = np.sum(np.exp(z2), axis=0, keepdims=True)
    # a2 = exp / sum_exp

    a2 = 1 / (1 + np.exp(-z2))  # 使用sigmoid作为第二层的激活函数
    # 通过字典存储参数
    cache = {'z1': z1, 'a1': a1, 'z2': z2, 'a2': a2}

    return a2, cache


# 3.计算代价函数
def compute_cost(a2, Y, parameters):
    m = Y.shape[1]      # Y的列数即为总的样本数
    # 采用交叉熵(cross-entropy)作为代价函数
    logprobs = np.multiply(np.log(a2), Y) + np.multiply((1 - Y), np.log(1 - a2))
    #logprobs = np.multiply(np.log(a2), Y)
    cost = - np.sum(logprobs) / m
    return cost


# 4.反向传播(计算代价函数的导数)
def backward_propagation(parameters, cache, X, Y):
    m = Y.shape[1]

    w2 = parameters['w2']

    a1 = cache['a1']
    a2 = cache['a2']

    # 反向传播,计算dw1、db1、dw2、db2
    dz2 = a2 - Y
    dw2 = (1 / m) * np.dot(dz2, a1.T)
    #print(dw2.shape)
    db2 = (1 / m) * np.sum(dz2, axis=1, keepdims=True)
    dz1 = np.multiply(np.dot(w2.T, dz2), 1 - np.power(a1, 2))
    dw1 = (1 / m) * np.dot(dz1, X.T)
    db1 = (1 / m) * np.sum(dz1, axis=1, keepdims=True)
    grads = {'dw1': dw1, 'db1': db1, 'dw2': dw2, 'db2': db2}

    return grads

def update_learn(learning_rate,i,num_iterations):
    learning=learning_rate*(1-i/num_iterations)
    return learning


# 5.更新参数
def update_parameters(parameters, grads,update_para,learning_rate,k):
    w1 = parameters['w1']
    b1 = parameters['b1']
    w2 = parameters['w2']
    b2 = parameters['b2']

    dw1 = grads['dw1']
    db1 = grads['db1']
    dw2 = grads['dw2']
    db2 = grads['db2']

    m1 = update_para['m1']
    n1 = update_para['n1']
    m2 = update_para['m2']
    n2 = update_para['n2']

    # 更新参数
    w1 = w1 - dw1 * learning_rate + m1 * k*0.9
    b1 = b1 - db1 * learning_rate + n1 * k*0.9
    w2 = w2 - dw2 * learning_rate + m2 * k*0.9
    b2 = b2 - db2 * learning_rate + n2 * k*0.9

    parameters = {'w1': w1, 'b1': b1, 'w2': w2, 'b2': b2}

    return parameters


# 建立神经网络
def nn_model(X, Y, n_h, n_input, n_output, num_iterations=10000, print_cost=False):
    np.random.seed(3)

    n_x = n_input           # 输入层节点数
    n_y = n_output          # 输出层节点数

    # 1.初始化参数
    learning_rate=0.1
    parameters = initialize_parameters(n_x, n_h, n_y)
    temp = update_temp(parameters)
    cost_list=[]
    grads_list=[]
    j = 1
    # 梯度下降循环
    for i in range(0, num_iterations):
        learning=update_learn(learning_rate,i,num_iterations)
        # 2.前向传播
        a2, cache = forward_propagation(X, parameters)
        # 3.计算代价函数
        cost = compute_cost(a2, Y, parameters)
        cost_list.append(cost)
        # 4.反向传播
        grads = backward_propagation(parameters, cache, X, Y)
        # 5.更新参数
        update_para = update_wb(parameters, temp)
        k=1
        if j==i:
            temp=update_temp(parameters)
            j=j+1
            k=cost/ttt
        ttt = cost
        parameters = update_parameters(parameters, grads, update_para,learning_rate=learning,k=k)
        grads_list.append(k)
        # 每1000次迭代,输出一次代价函数
        if print_cost and i % 1000 == 0:
            print('迭代第%i次,代价函数为:%f' % (i, cost))
    plt.plot(cost_list)
    #plt.plot(grads_list)
    return parameters,cost_list


# 对模型进行测试
def predict(parameters, x_test, y_test):
    w1 = parameters['w1']
    b1 = parameters['b1']
    w2 = parameters['w2']
    b2 = parameters['b2']

    z1 = np.dot(w1, x_test) + b1
    a1 = np.tanh(z1)

    z2 = np.dot(w2, a1) + b2
    # exp = np.exp(z2)
    # sum_exp = np.sum(np.exp(z2), axis=0, keepdims=True)
    # a2 = exp / sum_exp
    a2 = 1 / (1 + np.exp(-z2))

    # 结果的维度
    n_rows = y_test.shape[0]
    n_cols = y_test.shape[1]

    # 预测值结果存储
    output = np.empty(shape=(n_rows, n_cols), dtype=int)

    # 取出每条测试数据的预测结果
    for i in range(n_cols):
        # 将每条测试数据的预测结果(概率)存为一个行向量
        temp = np.zeros(shape=n_rows)
        for j in range(n_rows):
            temp[j] = a2[j][i]

        # 将每条结果(概率)从小到大排序,并获得相应下标
        sorted_dist = np.argsort(temp)
        length = len(sorted_dist)

        # 将概率最大的置为1,其它置为0
        for k in range(length):
            if k == sorted_dist[length - 1]:
                output[k][i] = 1
            else:
                output[k][i] = 0

    print('预测结果:')
    print(output)
    print('真实结果:')
    print(y_test)

    count = 0
    for k in range(0, n_cols):
        if output[0][k] == y_test[0][k] and output[1][k] == y_test[1][k] and output[2][k] == y_test[2][k]\
                and output[3][k] == y_test[3][k] and output[4][k] == y_test[4][k]:
            count = count + 1

    acc = count / int(y_test.shape[1]) * 100
    print('准确率:%.2f%%' % acc)
    return output,acc


def cross_validation():
    pass


if __name__ == "__main__":
    # 读取数据
    data_normal = load_excel('E:/exp/1.xlsx')
    data_sunken = load_excel('E:/exp/2.xlsx')
    data_undercut = load_excel('E:/exp/3.xlsx')
    data_pore = load_excel('E:/exp/4.xlsx')
    data_burnthrough = load_excel('E:/exp/5.xlsx')
    #test_weld = load_excel('E:/exp/test.xlsx')
    # 生成数据集
    y11 = np.array([1,0,0,0,0])
    y1 = np.tile(y11,(data_normal.shape[0],1))
    #y11 = np.zeros((data_normal.shape[0], 1))
    c1 = np.concatenate((data_normal, y1), axis=1) 

    y22 = np.array([0,1,0,0,0])
    y2 = np.tile(y22,(data_sunken.shape[0],1))
    #y2 = np.ones((data_sunken.shape[0], 1)) * 1
    c2 = np.concatenate((data_sunken, y2), axis=1)  

    y33 = np.array([0,0,1,0,0])
    y3 = np.tile(y33,(data_undercut.shape[0],1))
    #y3 = np.ones((data_undercut.shape[0], 1)) * 2
    c3 = np.concatenate((data_undercut, y3), axis=1) 
    y44 = np.array([0,0,0,1,0])
    y4 = np.tile(y44,(data_pore.shape[0],1))
    #y4 = np.ones((data_pore.shape[0], 1)) * 3
    c4 = np.concatenate((data_pore, y4), axis=1)  

    y55 = np.array([0,0,0,0,1])
    y5 = np.tile(y55,(data_burnthrough.shape[0],1))
    #y5 = np.ones((data_burnthrough.shape[0], 1)) * 4
    c5 = np.concatenate((data_burnthrough, y5), axis=1)  
#######load samples##
    # np.random.shuffle(c1)
    # np.random.shuffle(c2)
    # np.random.shuffle(c3)
    # np.random.shuffle(c4)
    # np.random.shuffle(c5)
    # yuan = np.concatenate((c1[:75,:], c2[:75,:], c3[:75,:], c4[:75,:], c5[:75,:]), axis=0)
#########
    #all_data=load_excel('E:/temp.xlsx')
    all_data = np.concatenate((c1, c2, c3, c4, c5), axis=0)
    np.random.shuffle(all_data)
    # train_data_x1 = all_data[:1089, :8].T
    # train_data_y1 = all_data[:1089, 8:13].T
    # train_data_x2 = all_data[1218:, :8].T
    # train_data_y2 = all_data[1218:, 8:13].T
    # train_data_x=np.concatenate((train_data_x1,train_data_x2),axis=1)
    # train_data_y = np.concatenate((train_data_y1, train_data_y2), axis=1)
    train_data_x = all_data[:812, :8].T
    train_data_y = all_data[:812, 8:13].T  # 训练集
    # #test_data_x=test_weld[:,:8].T
    # #test_data_y=test_weld[:,8:13].T
    test_data_x = all_data[812:, :8].T
    test_data_y = all_data[812:, 8:13].T  # 测试集
    # test_data_x = all_data[1089:1218, :8].T
    # test_data_y = all_data[1089:1218, 8:13].T
    train_data_y = train_data_y.astype('uint8')
    train_data_x=noramlization(train_data_x)
    test_data_x=noramlization(test_data_x)

    # 开始训练
    start_time = datetime.datetime.now()
    # 输入8个节点,隐层10个节点,输出5个节点,迭代10000次
    parameters,result= nn_model(train_data_x, train_data_y, n_h=13, n_input=8, n_output=5, num_iterations=10000, print_cost=True)
    end_time = datetime.datetime.now()
    print("用时:" + str((end_time - start_time).seconds) + 's' + str(round((end_time - start_time).microseconds / 1000)) + 'ms')

    # 对模型进行测试
    test_data_y = test_data_y.astype('uint8')
    #o,t=predict(parameters, train_data_x, train_data_y)
    p,acc=predict(parameters, test_data_x,test_data_y)
    #np.savetxt('E:/exp/weld detect/result_test.txt',yuan)
    #np.savetxt('E:/exp/weld detect/result_predict.txt', p.T)




 

损失函数:

准确率:

 

posted @ 2021-04-22 21:00  code_witness  阅读(244)  评论(0编辑  收藏  举报