PCA练习
学习了理论知识后,做一个简单的练习实践一下。
1 from numpy import * 2 import matplotlib 3 import matplotlib.pyplot as plt 4 import csv 5 6 clr = ['g', 'b', 'c', 'k'] 7 8 def loadDataSet(fileName, delim='\t'): 9 fr = open(fileName) 10 stringArr = [line.strip().split(delim) for line in fr.readlines()] 11 dataArr = [map(float, int) for line in stringArr] 12 return mat(dataArr) 13 14 def str2double(lst): 15 lst = mat(lst) 16 m, n = shape(lst) 17 mt = ones((m, n)) 18 for i in range(m): 19 for j in range(n): 20 mt[i, j] = double(lst[i, j]) 21 return mt 22 23 def loadDataSet(fileName): 24 l = [] 25 with open(fileName) as file: 26 lines = csv.reader(file) 27 for line in lines: 28 l.append(line) 29 rt = str2double(l) 30 m, n = shape(rt) 31 data = zeros((m, n-1)) 32 label = zeros((m, 1)) 33 for i in range(m): 34 label[i, 0] = rt[i, n-1] 35 for j in range(n-1): 36 data[i, j] = rt[i, j] 37 return data, label 38 39 def pca(dataMat, topNfeat=9999999): 40 meanVals = mean(dataMat, axis=0) 41 meanRemoved = dataMat - meanVals 42 covMat = cov(meanRemoved, rowvar=0) 43 eigVals, eigVects = linalg.eig(mat(covMat)) 44 eigValInd = argsort(eigVals) 45 eigValInd = eigValInd[: -(topNfeat+1):-1] 46 redEigVects = eigVects[:,eigValInd] 47 lowDDataMat = meanRemoved * redEigVects 48 reconMat = (lowDDataMat * redEigVects.T) + meanVals 49 return lowDDataMat, reconMat 50 51 def show(data, label): 52 m, n = shape(data) 53 for i in range(m): 54 plt.scatter(double(data[i, 0]), double(data[i, 1]), color = clr[int(label[i, 0])-1]) 55 plt.show() 56 57 def show1(data, label): 58 m, n = shape(data) 59 for i in range(m): 60 plt.scatter(double(data[i, 0]), 0, color = clr[int(label[i, 0])-1]) 61 plt.show() 62 63 path = "C:\\Users\\lg\\Desktop\\pca.csv" 64 data, label = loadDataSet(path) 65 print(label) 66 show(data, label) 67 data1, data2 = pca(data, 1) 68 print(data1) 69 show1(data1, label)
下图是经过PCA降维之后的,降二维降维至一维。
下面是数据集:
1 2.07,0.779,1 2 2.37,0.916,1 3 2.54,0.905,1 4 2.54,0.906,1 5 2.55,0.939,1 6 2.79,0.967,1 7 2.91,0.964,1 8 3.04,0.914,1 9 3.11,0.939,1 10 3.16,0.961,1 11 3.33,0.898,1 12 3.38,0.912,1 13 3.41,0.942,1 14 3.42,0.966,2 15 3.53,1.05,2 16 3.64,1.01,2 17 3.67,0.960,2 18 3.93,0.969,2 19 4.05,1.08,2 20 4.25,1.15,2 21 4.34,1.03,2 22 4.38,1.01,2 23 4.42,0.967,2 24 4.61,1.09,2 25 4.69,1.06,2 26 4.98,1.12,2 27 5.04,1.03,2 28 5.07,1.09,2 29 5.42,1.07,2 30 5.44,1.16,2 31 5.46,1.08,3 32 5.57,1.11,3 33 5.60,1.10,3 34 5.69,1.16,3 35 5.72,1.14,3 36 5.85,1.08,3 37 6.20,1.13,3 38 6.35,1.12,3 39 6.48,1.20,3 40 6.74,1.21,3 41 6.86,1.13,4 42 7.02,1.12,4 43 7.08,1.21,4 44 7.15,1.25,4 45 7.47,1.25,4 46 7.60,1.18,4 47 7.74,1.19,4 48 7.77,1.30,4 49 7.83,1.26,4 50 7.93,1.26,4