python RandomForest跑feature重要性

其实呢,就是直接调用一个函数的事情。。。

#coding=utf-8
from sklearn.tree import DecisionTreeClassifier
from matplotlib.pyplot import *
from sklearn.cross_validation import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.externals.joblib import Parallel, delayed
from sklearn.tree import export_graphviz

final = open('full_train.csv','r')
print "open good!"
data = [line.strip().split(',') for line in final]
feature = [[float(x) for x in row[1:]] for row in data]
target = [int(row[0]) for row in data]
print "del good!"
#拆分训练集和测试集
feature_train, feature_test, target_train, target_test = train_test_split(feature, target, test_size=0.1, random_state=42)

#分类型决策树
clf = RandomForestClassifier(n_estimators = 8)
print "train good"
#训练模型
s = clf.fit(feature_train , target_train)
print s
print "fuck high"
#评估模型准确率
r = clf.score(feature_test , target_test)
print r

print '判定结果:%s' % clf.predict(feature_test[0])
#print clf.predict_proba(feature_test[0])

print '所有的树:%s' % clf.estimators_

print clf.classes_
print clf.n_classes_

print '各feature的重要性:%s' % clf.feature_importances_

print clf.n_outputs_
posted @ 2017-05-07 11:03  qscqesze  阅读(1087)  评论(0编辑  收藏  举报