20171029机器学习之特征值选择

在我们对于有很多特征值数据处理时，往往需要找到特征值对于结果Y权重最大的几个，便于做降维。

于是我们可以用以下这段代码：

GitHub：https://github.com/chenjunhaolefa/AI/blob/master/MachineLearning/FeatureSelection.py

# coding=utf-8
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification
from sklearn.ensemble import ExtraTreesClassifier

#解决画图产生的中文乱码问题
mpl.rcParams['font.sans-serif']=[u'simHei']
mpl.rcParams['axes.unicode_minus']=False

#测试一下创建了X和Y的矩阵
X = np.random.random((3,3))
Y = np.arange(1,4)
print X,Y

def FeatureSelection(X,Y):
    # Build a classification task using 3 informative features
    '''
    X, Y = make_classification(n_samples=10,  #该函数负责创建一个自定义的矩阵（X->Y）的关系
                               n_features=10,
                               n_informative=3,
                               n_redundant=0,
                               n_repeated=0,
                               n_classes=2,
                               random_state=0,
                               shuffle=False)'''
    # Build a forest and compute the feature importances
    forest = ExtraTreesClassifier(n_estimators=10,random_state=0) #创建一个额外树
    #forest = RandomForestClassifier (n_estimators = 10)  #创建一个随机树

    #计算X特征值对于Y的影响，并排序出来
    forest.fit(X, Y)
    importances = forest.feature_importances_
    std = np.std([tree.feature_importances_ for tree in forest.estimators_],
                 axis=0)
    indices = np.argsort(importances)[::-1]
    print indices
    # Print the feature ranking
    print(u"特征排名 :")

    for f in range(X.shape[1]):
        print("%d. feature %d (%f)" % (f + 1, indices[f], importances[indices[f]]))

    # Plot the feature importances of the forest
    plt.figure()
    plt.title(u"特征选择")
    plt.bar(range(X.shape[1]), importances[indices],
           color="r", yerr=std[indices], align="center")
    plt.xticks(range(X.shape[1]), indices)
    plt.xlim([-1, X.shape[1]])
    plt.show()

FeatureSelection(X,Y)

posted @ 2017-10-29 23:16 Ootori 阅读(1577) 评论(0) 编辑收藏举报

刷新页面返回顶部

Ootori

20171029机器学习之特征值选择

公告