# 以波士顿房价数据为例import pandas as pd
train=pd.read_csv('/kaggle/input/house-prices-advanced-regression-techniques/train.csv')
train.head
<bound method NDFrame.head of Id MSSubClass MSZoning LotFrontage LotArea Street Alley LotShape \
0160 RL 65.08450 Pave NaN Reg
1220 RL 80.09600 Pave NaN Reg
2360 RL 68.011250 Pave NaN IR1
3470 RL 60.09550 Pave NaN IR1
4560 RL 84.014260 Pave NaN IR1
...........................1455145660 RL 62.07917 Pave NaN Reg
1456145720 RL 85.013175 Pave NaN Reg
1457145870 RL 66.09042 Pave NaN Reg
1458145920 RL 68.09717 Pave NaN Reg
1459146020 RL 75.09937 Pave NaN Reg
LandContour Utilities ... PoolArea PoolQC Fence MiscFeature MiscVal \
0 Lvl AllPub ...0 NaN NaN NaN 01 Lvl AllPub ...0 NaN NaN NaN 02 Lvl AllPub ...0 NaN NaN NaN 03 Lvl AllPub ...0 NaN NaN NaN 04 Lvl AllPub ...0 NaN NaN NaN 0...........................1455 Lvl AllPub ...0 NaN NaN NaN 01456 Lvl AllPub ...0 NaN MnPrv NaN 01457 Lvl AllPub ...0 NaN GdPrv Shed 25001458 Lvl AllPub ...0 NaN NaN NaN 01459 Lvl AllPub ...0 NaN NaN NaN 0
MoSold YrSold SaleType SaleCondition SalePrice
022008 WD Normal 208500152007 WD Normal 181500292008 WD Normal 223500322006 WD Abnorml 1400004122008 WD Normal 250000..................145582007 WD Normal 175000145622010 WD Normal 210000145752010 WD Normal 266500145842010 WD Normal 142125145962008 WD Normal 147500[1458 rows x 81 columns]>
相关性矩阵获取
import numpy as np
k=10
corrmat=train_drop.corr()#获取相关性矩阵#获取相关度最高的K个特征
cols=corrmat.nlargest(k,'SalePrice')['SalePrice'].index
# 获得相关性最高的 K 个特征组成的子数据集
cm=np.corrcoef(train_drop[cols].values.T)#获取相关性矩阵print(cm)