PCA降维练习
1.读取数据
import pandas as pd import openpyxl import numpy as np data = pd.read_excel("C:\\Users\\86152\\Desktop\\我国大陆经济发展状况数据.xlsx",header=None,engine='openpyxl') data = data [2:] data = data[[1,2,3,4,5,6,7,8]] data
2.去中心化:每个数据减去对应每列的平均值
sample,feature=data.shape data = data - np.mean(data) data
3.计算协方差矩阵:直接调用方法
data1 = np.mat(data) data1 = data1.astype('float16') # 计算协方差矩阵 covX = np.cov(data1.T) covX
4. 求特征值和特征向量:调用方法
eig_val, eig_vec = np.linalg.eig(covX) eig_pairs = [(np.abs(eig_val[i]), eig_vec[:, i]) for i in range(feature)] eig_val
5.特征值排序
index = np.argsort(-eig_val) # 对特征值从大到小排序, np.argsort(eig_val)
6.降维
k = 3 selectVec = np.matrix(eig_vec.T[index[:k]]) finalData = data1 * selectVec.T # (30, 8) * (8, 3) = (30, 3) finalData.shape finalData