质谱数据,二分类,bp神经网络
import numpy as np import pandas as pd from sklearn.model_selection import train_test_split data=pd.read_pickle('ICC_rms.pkl') df=pd.DataFrame(data) X = df.iloc[:, 0:510].values #所有样本的x值,0-510列 矩阵(1544,510)由此得出样本个数1544个,特征510 y = df.iloc[:, 511].values #所有样本的标签,511列 矩阵(1544,) #把y转成1-0形式,Neurons对应0,Astrocytes对应1 Y=np.array([-1.0] * 1544) for i in range(len(y)): if y[i] =='Neurons': Y[i]=0 if y[i] =='Astrocytes': Y[i]=1 # y=['Neurons' 'Neurons' 'Neurons' ... 'Astrocytes' 'Astrocytes' 'Astrocytes'] # Y=[0. 0. 0. ... 1. 1. 1.] # x为输入层神经元个数,y为隐层神经元个数,z输出层神经元个数 # 创建的是参数初始化函数,参数有各层间的权重weight和阈值即偏置value就是b # 本例的x,y=len(dataset[0])=22,z=1 def parameter_initialization(x, y, z): # 隐层阈值 value1 = np.random.randint(-5, 5, (1, y)).astype(np.float64) # 随机生成(-5,5)之间的整数组成(1,y)的数组,然后再将其转为浮点数显示 # 输出层阈值 value2 = np.random.randint(-5, 5, (1, z)).astype(np.float64) # 输入层与隐层的连接权重 weight1 = np.random.randint(-5, 5, (x, y)).astype(np.float64) # 隐层与输出层的连接权重 weight2 = np.random.randint(-5, 5, (y, z)).astype(np.float64) return weight1, weight2, value1, value2 # 创建激活函数sigmoid def sigmoid(z): return 1 / (1 + np.exp(-z)) ''' weight1:输入层与隐层的连接权重 weight2:隐层与输出层的连接权重 value1:隐层阈值 value2:输出层阈值 权重和阈值的个数和神经网络的隐层层数有关,若隐层为n,则权重和阈值的个数为n+1 ''' # 创建训练样本的函数,返回训练完成后的参数weight和value,这里的函数是经过一次迭代后的参数,即所有的样本经过一次训练后的参数 # 具体参数的值可以通过设置迭代次数和允许误差来进行确定 def trainning(dataset, labelset, weight1, weight2, value1, value2): # x为步长 x = 0.01 # 学习率 for i in range(len(dataset)): # 依次读取数据特征集中的元素,一个元素即为一个样本所含有的所有特征数据 # 输入数据 # (1,21) inputset = np.mat(dataset[i]).astype(np.float64) # 每次输入一个样本,将样本的特征转化为矩阵,以浮点数显示 # 数据标签 # (1,1) outputset = np.mat(labelset[i]).astype(np.float64) # 输入样本所对应的标签 # 隐层输入,隐层的输入是由输入层的权重决定的,wx # input1:(1,21).(21,21)=(1,21) input1 = np.dot(inputset, weight1).astype(np.float64) # 隐层输出,由隐层的输入和阈值以及激活函数决定的,这里的阈值也可以放在输入进行计算 # sigmoid((1,21)-(1,21))=(1,21) output2 = sigmoid(input1 - value1).astype(np.float64) # 输出层输入,由隐层的输出 # (1,21).(21,1)=(1,1) input2 = np.dot(output2, weight2).astype(np.float64) # 输出层输出,由输出层的输入和阈值以及激活函数决定的,这里的阈值也可以放在输出层输入进行计算 # (1,1).(1,1)=(1,1) output3 = sigmoid(input2 - value2).astype(np.float64) # 更新公式由矩阵运算表示 # a:(1,1) a = np.multiply(output3, 1 - output3) # 输出层激活函数求导后的式子,multiply对应元素相乘,dot矩阵运算 # g:(1,1) g = np.multiply(a, outputset - output3) # outputset - output3:实际标签和预测标签差 # weight2:(21,1),np.transpose(weight2):(1,21),b:(1,21) b = np.dot(g, np.transpose(weight2)) # (1,21) c = np.multiply(output2, 1 - output2) # 隐层输出激活函数求导后的式子,multiply对应元素相乘,dot矩阵运算 # (1,21) e = np.multiply(b, c) value1_change = -x * e # (1,21) value2_change = -x * g # (1,1) weight1_change = x * np.dot(np.transpose(inputset), e) # (21,21) weight2_change = x * np.dot(np.transpose(output2), g) # (21,1) # 更新参数,权重与阈值的迭代公式 value1 += value1_change value2 += value2_change weight1 += weight1_change weight2 += weight2_change return weight1, weight2, value1, value2 # 创建测试样本数据的函数 def testing(dataset1, labelset1, weight1, weight2, value1, value2): # 记录预测正确的个数 rightcount = 0 for i in range(len(dataset1)): # 计算每一个样例的标签通过上面创建的神经网络模型后的预测值 inputset = np.mat(dataset1[i]).astype(np.float64) outputset = np.mat(labelset1[i]).astype(np.float64) output2 = sigmoid(np.dot(inputset, weight1) - value1) output3 = sigmoid(np.dot(output2, weight2) - value2) # 确定其预测标签 if output3 > 0.5: flag = 1 else: flag = 0 if labelset1[i] == flag: rightcount += 1 # 输出预测结果 print("预测为%d 实际为%d" % (flag, labelset1[i])) # 返回正确率 return rightcount / len(dataset1) def main(): X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.20, random_state=0) weight1, weight2, value1, value2 = parameter_initialization(len(X_train[0]), len(X_train[0]), 1) for i in range(100): # 获得对所有训练样本训练迭代一次后的待估参数 weight1, weight2, value1, value2 = trainning(X_train, Y_train, weight1, weight2, value1, value2) print("epoch:%d/100",i+1) # 对测试样本进行测试,并且得到正确率 rate = testing(X_test, Y_test, weight1, weight2, value1, value2) print("正确率为%f" % (rate)) if __name__ == '__main__': main()
参考博客:https://blog.csdn.net/qq_45769063/article/details/106747656?spm=1001.2014.3001.5506
迭代了100次之后的正确率为0.440129
迭代了200次之后的正确率为0.559871
迭代了500次之后的正确率为0.449838
迭代了1000次之后的正确率为0.440129
迭代了1500次之后的正确率为0.644013
迭代了2000次之后的正确率为0.624595
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· TypeScript + Deepseek 打造卜卦网站:技术与玄学的结合
· Manus的开源复刻OpenManus初探
· AI 智能体引爆开源社区「GitHub 热点速览」
· 三行代码完成国际化适配,妙~啊~
· .NET Core 中如何实现缓存的预热?