PCA练习

学习了理论知识后,做一个简单的练习实践一下。

 1 from numpy import *
 2 import matplotlib
 3 import matplotlib.pyplot as plt
 4 import csv
 5 
 6 clr = ['g', 'b', 'c', 'k']
 7 
 8 def loadDataSet(fileName, delim='\t'):
 9     fr = open(fileName)
10     stringArr = [line.strip().split(delim) for line in fr.readlines()]
11     dataArr = [map(float, int) for line in stringArr]
12     return mat(dataArr)
13 
14 def str2double(lst):
15     lst = mat(lst)
16     m, n = shape(lst)
17     mt = ones((m, n))
18     for i in range(m):
19         for j in range(n):
20             mt[i, j] = double(lst[i, j])
21     return mt
22 
23 def loadDataSet(fileName):
24     l = []
25     with open(fileName) as file:
26         lines = csv.reader(file)
27         for line in lines:
28             l.append(line)
29         rt = str2double(l)
30         m, n = shape(rt)
31         data = zeros((m, n-1))
32         label = zeros((m, 1))
33         for i in range(m):
34             label[i, 0] = rt[i, n-1]
35             for j in range(n-1):
36                 data[i, j] = rt[i, j]
37         return data, label
38 
39 def pca(dataMat, topNfeat=9999999):
40     meanVals = mean(dataMat, axis=0)
41     meanRemoved = dataMat - meanVals
42     covMat = cov(meanRemoved, rowvar=0)
43     eigVals, eigVects = linalg.eig(mat(covMat))
44     eigValInd = argsort(eigVals)
45     eigValInd = eigValInd[: -(topNfeat+1):-1]
46     redEigVects = eigVects[:,eigValInd]
47     lowDDataMat = meanRemoved * redEigVects
48     reconMat = (lowDDataMat * redEigVects.T) + meanVals
49     return lowDDataMat, reconMat
50 
51 def show(data, label):
52     m, n = shape(data)
53     for i in range(m):
54         plt.scatter(double(data[i, 0]), double(data[i, 1]), color = clr[int(label[i, 0])-1])
55     plt.show()
56 
57 def show1(data, label):
58     m, n = shape(data)
59     for i in range(m):
60         plt.scatter(double(data[i, 0]), 0, color = clr[int(label[i, 0])-1])
61     plt.show()
62 
63 path = "C:\\Users\\lg\\Desktop\\pca.csv"
64 data, label = loadDataSet(path)
65 print(label)
66 show(data, label)
67 data1, data2 = pca(data, 1)
68 print(data1)
69 show1(data1, label)
View Code

下图是经过PCA降维之后的,降二维降维至一维。

下面是数据集:

 1 2.07,0.779,1
 2 2.37,0.916,1
 3 2.54,0.905,1
 4 2.54,0.906,1
 5 2.55,0.939,1
 6 2.79,0.967,1
 7 2.91,0.964,1
 8 3.04,0.914,1
 9 3.11,0.939,1
10 3.16,0.961,1
11 3.33,0.898,1
12 3.38,0.912,1
13 3.41,0.942,1
14 3.42,0.966,2
15 3.53,1.05,2
16 3.64,1.01,2
17 3.67,0.960,2
18 3.93,0.969,2
19 4.05,1.08,2
20 4.25,1.15,2
21 4.34,1.03,2
22 4.38,1.01,2
23 4.42,0.967,2
24 4.61,1.09,2
25 4.69,1.06,2
26 4.98,1.12,2
27 5.04,1.03,2
28 5.07,1.09,2
29 5.42,1.07,2
30 5.44,1.16,2
31 5.46,1.08,3
32 5.57,1.11,3
33 5.60,1.10,3
34 5.69,1.16,3
35 5.72,1.14,3
36 5.85,1.08,3
37 6.20,1.13,3
38 6.35,1.12,3
39 6.48,1.20,3
40 6.74,1.21,3
41 6.86,1.13,4
42 7.02,1.12,4
43 7.08,1.21,4
44 7.15,1.25,4
45 7.47,1.25,4
46 7.60,1.18,4
47 7.74,1.19,4
48 7.77,1.30,4
49 7.83,1.26,4
50 7.93,1.26,4
View Code

 

posted on 2016-03-19 19:06  JustForCS  阅读(418)  评论(0编辑  收藏  举报

导航