python RandomForest跑feature重要性
其实呢,就是直接调用一个函数的事情。。。
#coding=utf-8
from sklearn.tree import DecisionTreeClassifier
from matplotlib.pyplot import *
from sklearn.cross_validation import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.externals.joblib import Parallel, delayed
from sklearn.tree import export_graphviz
final = open('full_train.csv','r')
print "open good!"
data = [line.strip().split(',') for line in final]
feature = [[float(x) for x in row[1:]] for row in data]
target = [int(row[0]) for row in data]
print "del good!"
#拆分训练集和测试集
feature_train, feature_test, target_train, target_test = train_test_split(feature, target, test_size=0.1, random_state=42)
#分类型决策树
clf = RandomForestClassifier(n_estimators = 8)
print "train good"
#训练模型
s = clf.fit(feature_train , target_train)
print s
print "fuck high"
#评估模型准确率
r = clf.score(feature_test , target_test)
print r
print '判定结果:%s' % clf.predict(feature_test[0])
#print clf.predict_proba(feature_test[0])
print '所有的树:%s' % clf.estimators_
print clf.classes_
print clf.n_classes_
print '各feature的重要性:%s' % clf.feature_importances_
print clf.n_outputs_