鸢尾花主成分分析 jupyter实现

import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.datasets import load_iris

data = load_iris() #字典的形式
#print(data,'!!!!!!')
y = data.target
#print(y)
X = data.data
#print(X)
pca = PCA(n_components=2) #n_components:指定主成分的个数,即降维后数据的维度
#print(pca)#PCA(copy=True, iterated_power='auto', n_components=2, random_state=None,
#特征值分解 svd_solver='auto', tol=0.0, whiten=False)
reduced_X = pca.fit_transform(X)#用PCA处理的数据是X
#print(reduced_X,'!!!!') #此时的X是2维了
#print(reduced_X.shape) #输出矩阵维度
#建立三类数据集
red_x, red_y = [], []
blue_x, blue_y = [], []
green_x, green_y = [], []

for i in range(len(reduced_X)): #len(reduce_x)有150个
if y[i] == 0:# 以字典的方式检索,y = data.target,target 就是0,1,2
red_x.append(reduced_X[i][0])#不断地添加红色的样本点的个数
red_y.append(reduced_X[i][1])
#print(len(red_x),'!!!') #一个一个添加,一直有50个
elif y[i] == 1:
blue_x.append(reduced_X[i][0])
blue_y.append(reduced_X[i][1])
else:
green_x.append(reduced_X[i][0])
green_y.append(reduced_X[i][1])

plt.scatter(red_x, red_y, c='r', marker='x')#c 是color
#print(plt.scatter)
plt.scatter(blue_x, blue_y, c='b', marker='D')
plt.scatter(green_x, green_y, c='g', marker='+') #marker是标记点的形状
plt.show()

posted @ 2019-09-23 00:16  丹心静居  阅读(835)  评论(0编辑  收藏  举报