质谱数据,二分类,bp神经网络

复制代码
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

data=pd.read_pickle('ICC_rms.pkl')
df=pd.DataFrame(data)
X = df.iloc[:, 0:510].values #所有样本的x值,0-510列 矩阵(1544,510)由此得出样本个数1544个,特征510
y = df.iloc[:, 511].values #所有样本的标签,511列 矩阵(1544,)
#把y转成1-0形式,Neurons对应0,Astrocytes对应1
Y=np.array([-1.0] * 1544)
for i in range(len(y)):
    if y[i] =='Neurons':
        Y[i]=0
    if y[i] =='Astrocytes':
        Y[i]=1
# y=['Neurons' 'Neurons' 'Neurons' ... 'Astrocytes' 'Astrocytes' 'Astrocytes']
# Y=[0. 0. 0. ... 1. 1. 1.]

# x为输入层神经元个数,y为隐层神经元个数,z输出层神经元个数
# 创建的是参数初始化函数,参数有各层间的权重weight和阈值即偏置value就是b
# 本例的x,y=len(dataset[0])=22,z=1
def parameter_initialization(x, y, z):
    # 隐层阈值
    value1 = np.random.randint(-5, 5, (1, y)).astype(np.float64)  # 随机生成(-5,5)之间的整数组成(1,y)的数组,然后再将其转为浮点数显示

    # 输出层阈值
    value2 = np.random.randint(-5, 5, (1, z)).astype(np.float64)

    # 输入层与隐层的连接权重
    weight1 = np.random.randint(-5, 5, (x, y)).astype(np.float64)

    # 隐层与输出层的连接权重
    weight2 = np.random.randint(-5, 5, (y, z)).astype(np.float64)

    return weight1, weight2, value1, value2


# 创建激活函数sigmoid
def sigmoid(z):
    return 1 / (1 + np.exp(-z))


'''
weight1:输入层与隐层的连接权重
weight2:隐层与输出层的连接权重
value1:隐层阈值
value2:输出层阈值
权重和阈值的个数和神经网络的隐层层数有关,若隐层为n,则权重和阈值的个数为n+1
'''


# 创建训练样本的函数,返回训练完成后的参数weight和value,这里的函数是经过一次迭代后的参数,即所有的样本经过一次训练后的参数
# 具体参数的值可以通过设置迭代次数和允许误差来进行确定
def trainning(dataset, labelset, weight1, weight2, value1, value2):
    # x为步长
    x = 0.01  # 学习率
    for i in range(len(dataset)):  # 依次读取数据特征集中的元素,一个元素即为一个样本所含有的所有特征数据

        # 输入数据
        # (1,21)
        inputset = np.mat(dataset[i]).astype(np.float64)  # 每次输入一个样本,将样本的特征转化为矩阵,以浮点数显示

        # 数据标签
        # (1,1)
        outputset = np.mat(labelset[i]).astype(np.float64)  # 输入样本所对应的标签

        # 隐层输入,隐层的输入是由输入层的权重决定的,wx
        # input1:(1,21).(21,21)=(1,21)
        input1 = np.dot(inputset, weight1).astype(np.float64)

        # 隐层输出,由隐层的输入和阈值以及激活函数决定的,这里的阈值也可以放在输入进行计算
        # sigmoid((1,21)-(1,21))=(1,21)
        output2 = sigmoid(input1 - value1).astype(np.float64)

        # 输出层输入,由隐层的输出
        # (1,21).(21,1)=(1,1)
        input2 = np.dot(output2, weight2).astype(np.float64)

        # 输出层输出,由输出层的输入和阈值以及激活函数决定的,这里的阈值也可以放在输出层输入进行计算
        # (1,1).(1,1)=(1,1)
        output3 = sigmoid(input2 - value2).astype(np.float64)

        # 更新公式由矩阵运算表示
        # a:(1,1)
        a = np.multiply(output3, 1 - output3)  # 输出层激活函数求导后的式子,multiply对应元素相乘,dot矩阵运算
        # g:(1,1)
        g = np.multiply(a, outputset - output3)  # outputset - output3:实际标签和预测标签差
        # weight2:(21,1),np.transpose(weight2):(1,21),b:(1,21)
        b = np.dot(g, np.transpose(weight2))
        # (1,21)
        c = np.multiply(output2, 1 - output2)  # 隐层输出激活函数求导后的式子,multiply对应元素相乘,dot矩阵运算
        # (1,21)
        e = np.multiply(b, c)

        value1_change = -x * e  # (1,21)
        value2_change = -x * g  # (1,1)
        weight1_change = x * np.dot(np.transpose(inputset), e)  # (21,21)
        weight2_change = x * np.dot(np.transpose(output2), g)  # (21,1)

        # 更新参数,权重与阈值的迭代公式
        value1 += value1_change
        value2 += value2_change
        weight1 += weight1_change
        weight2 += weight2_change
    return weight1, weight2, value1, value2


# 创建测试样本数据的函数
def testing(dataset1, labelset1, weight1, weight2, value1, value2):
    # 记录预测正确的个数
    rightcount = 0
    for i in range(len(dataset1)):
        # 计算每一个样例的标签通过上面创建的神经网络模型后的预测值
        inputset = np.mat(dataset1[i]).astype(np.float64)
        outputset = np.mat(labelset1[i]).astype(np.float64)
        output2 = sigmoid(np.dot(inputset, weight1) - value1)
        output3 = sigmoid(np.dot(output2, weight2) - value2)

        # 确定其预测标签
        if output3 > 0.5:
            flag = 1
        else:
            flag = 0
        if labelset1[i] == flag:
            rightcount += 1
        # 输出预测结果
        print("预测为%d   实际为%d" % (flag, labelset1[i]))
    # 返回正确率
    return rightcount / len(dataset1)


def main():
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.20, random_state=0)
    weight1, weight2, value1, value2 = parameter_initialization(len(X_train[0]), len(X_train[0]), 1)
    for i in range(100):
        # 获得对所有训练样本训练迭代一次后的待估参数
        weight1, weight2, value1, value2 = trainning(X_train, Y_train, weight1, weight2, value1, value2)
        print("epoch:%d/100",i+1)
    # 对测试样本进行测试,并且得到正确率
    rate = testing(X_test, Y_test, weight1, weight2, value1, value2)
    print("正确率为%f" % (rate))


if __name__ == '__main__':
    main()
复制代码

参考博客:https://blog.csdn.net/qq_45769063/article/details/106747656?spm=1001.2014.3001.5506

迭代了100次之后的正确率为0.440129

迭代了200次之后的正确率为0.559871

迭代了500次之后的正确率为0.449838

迭代了1000次之后的正确率为0.440129

迭代了1500次之后的正确率为0.644013

迭代了2000次之后的正确率为0.624595

posted @   奋发图强的小赵  阅读(42)  评论(0编辑  收藏  举报
相关博文:
阅读排行:
· TypeScript + Deepseek 打造卜卦网站:技术与玄学的结合
· Manus的开源复刻OpenManus初探
· AI 智能体引爆开源社区「GitHub 热点速览」
· 三行代码完成国际化适配,妙~啊~
· .NET Core 中如何实现缓存的预热?
点击右上角即可分享
微信分享提示