机器学习—集成学习(GBDT)

一、原理部分:

图片形式~

 

二、sklearn实现:

可以看看这个:https://blog.csdn.net/han_xiaoyang/article/details/52663170

1、分类:

from sklearn.ensemble import GradientBoostingClassifier
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
import numpy as np

digits = load_digits()
x_data = digits.data
y_data = digits.target

x_train,x_test,y_train,y_test = train_test_split(x_data,y_data,random_state = 1)
#第一轮,确定n=90
gbdt = GradientBoostingClassifier()
model_gbdt1 = GridSearchCV(gbdt,param_grid=({'n_estimators':np.arange(50,200,10)}),cv=5)
model_gbdt1.fit(x_train,y_train)
print(model_gbdt1.best_params_)
y_hat1 = model_gbdt1.predict(x_test)
print(accuracy_score(y_hat1,y_test))

#第二轮
gbdt = GradientBoostingClassifier(n_estimators=90)
model_gbdt2 = GridSearchCV(gbdt,param_grid=({'learning_rate':[0.01,0.03]}),cv=5)
model_gbdt2.fit(x_train,y_train)
print(model_gbdt2.best_params_)
y_hat2 = model_gbdt2.predict(x_test)
print(accuracy_score(y_hat2,y_test))

#第三轮,确定层数:2
gbdt = GradientBoostingClassifier(n_estimators=90,learning_rate=0.3,subsample=0.8)
model_gbdt3 = GridSearchCV(gbdt,param_grid=({'max_depth':[2,4]}),cv=5)
model_gbdt3.fit(x_train,y_train)
print(model_gbdt3.best_params_)
y_hat3 = model_gbdt3.predict(x_test)
print(accuracy_score(y_hat3,y_test))

#第四轮,确定降采样
gbdt = GradientBoostingClassifier(n_estimators=90,learning_rate=0.3,max_depth=2)
model_gbdt4 = GridSearchCV(gbdt,param_grid=({'subsample':[0.8,0.9]}),cv=5)
model_gbdt4.fit(x_train,y_train)
print(model_gbdt4.best_params_)
y_hat4 = model_gbdt4.predict(x_test)
print(accuracy_score(y_hat4,y_test))

 

 最后结果,结果也是很好,跟svm差不多吧。可能是我调参不够好,不过暂时不纠结这个

{'subsample': 0.8}
0.986666666667

 

posted @ 2018-04-14 13:41  慢慢来会比较快  阅读(173)  评论(0编辑  收藏  举报