PCA历程详细python代码(原创)
1 #PCA主成分分析,原文为文末的链接,代码为自己亲自手码 2 3 def cov_out1(dx,dy): 4 #第一步:求解x,y各自的均值 5 mean_x=0 6 mean_y=0 7 for i in range(len(dx)): 8 mean_x+=dx[i] 9 mean_y+=dy[i] 10 # print(i) 11 mean_x/=len(dx) 12 mean_y/=len(dy) 13 # print('mean_x:',mean_x) 14 # print('mean_y:',mean_y) 15 #第二步:求解xy的联合均值 16 mean_xy=0 17 for i in range(len(dx)): 18 mean_xy+=dx[i]*dy[i] 19 mean_xy/=len(dy) 20 # print('mean_xy',mean_xy) 21 22 return mean_xy-mean_x*mean_y 23 # 24 25 def cov_out2(dx,dy): 26 #第一步:求解x,y各自的均值 27 mean_x=0 28 mean_y=0 29 for i in range(len(dx)): 30 mean_x+=dx[i] 31 mean_y+=dy[i] 32 mean_x/=len(dx) 33 mean_y/=len(dy) 34 # print('mean_x:',mean_x) 35 # print('mean_y:',mean_y) 36 #第二步:求解联合均值 37 mean_x_y=0 38 for i in range(len(dx)): 39 mean_x_y+=(dx[i]-mean_x)*(dy[i]-mean_y) 40 # print('mean_x_y',mean_x_y) 41 return mean_x_y/len(dx) 42 # 43 44 dx=[2.5,0.5,2.2,1.9,3.1,2.3,2.0,1.0,1.5,1.1] 45 dy=[2.4,0.7,2.9,2.2,3.0,2.7,1.6,1.1,1.6,0.9] 46 # print(len(dx)) 47 # print(len(dy)) 48 49 # covx=cov_out2(dx,dx) 50 # covy=cov_out2(dy,dy) 51 # print(covx) 52 # print(covy) 53 54 # cov1=cov_out1(dx,dy) 55 # cov2=cov_out2(dx,dy) 56 # print(cov1) 57 # print(cov2) 58 59 import numpy as np 60 61 62 #第一步:求dx,dy的平均值 63 print('第一步:求dx,dy的平均值') 64 mean_x=np.mean(dx) 65 mean_y=np.mean(dy) 66 print(mean_x,mean_y) 67 68 #第二步:求解DataAjust 69 print('第二步:求解DataAjust') 70 dx=dx-mean_x 71 dy=dy-mean_y 72 print(dx) 73 print(dy) 74 DataAdjust=np.vstack((dx,dy)) 75 print(DataAdjust.T) 76 77 covx=cov_out2(dx,dx) 78 covy=cov_out2(dy,dy) 79 covxy=cov_out2(dx,dy) 80 covyx=cov_out2(dy,dx) 81 print(covx) 82 print(covy) 83 print(covxy) 84 print(covyx) 85 86 #第三步:求解特征值和特征向量 87 print('第三步:求解特征值和特征向量') 88 cov=np.array([[covx,covxy],[covyx,covy]]) 89 print(cov) 90 91 a,b=np.linalg.eig(cov) 92 print('特征值') 93 print(a) 94 print("特征矩阵") 95 print(b) 96 97 #第四步:将特征值由大到小排序,选取其中最大的k个(这里是1个) 98 print('第四步:将特征值由大到小排序,选取其中最大的k个(这里是1个)') 99 a_max=np.max(a) 100 print(a_max) 101 a_index=np.where(a==a_max)[0][0] 102 print(a_index) 103 b_max=b[:,a_index] 104 print(b_max) 105 106 #第五步:将样本点投影到选取的特征向量上 107 print('第五步:将样本点投影到选取的特征向量上') 108 finalData=np.dot(DataAdjust.T,b_max) 109 print(finalData)
引文:http://www.cnblogs.com/jerrylead/archive/2011/04/18/2020209.html
探究未知是最大乐趣