机器学习—集成学习(GBDT)
一、原理部分:
图片形式~
二、sklearn实现:
可以看看这个:https://blog.csdn.net/han_xiaoyang/article/details/52663170
1、分类:
from sklearn.ensemble import GradientBoostingClassifier from sklearn.datasets import load_digits from sklearn.model_selection import train_test_split from sklearn.model_selection import GridSearchCV from sklearn.preprocessing import StandardScaler from sklearn.metrics import accuracy_score import numpy as np digits = load_digits() x_data = digits.data y_data = digits.target x_train,x_test,y_train,y_test = train_test_split(x_data,y_data,random_state = 1) #第一轮,确定n=90 gbdt = GradientBoostingClassifier() model_gbdt1 = GridSearchCV(gbdt,param_grid=({'n_estimators':np.arange(50,200,10)}),cv=5) model_gbdt1.fit(x_train,y_train) print(model_gbdt1.best_params_) y_hat1 = model_gbdt1.predict(x_test) print(accuracy_score(y_hat1,y_test)) #第二轮 gbdt = GradientBoostingClassifier(n_estimators=90) model_gbdt2 = GridSearchCV(gbdt,param_grid=({'learning_rate':[0.01,0.03]}),cv=5) model_gbdt2.fit(x_train,y_train) print(model_gbdt2.best_params_) y_hat2 = model_gbdt2.predict(x_test) print(accuracy_score(y_hat2,y_test)) #第三轮,确定层数:2 gbdt = GradientBoostingClassifier(n_estimators=90,learning_rate=0.3,subsample=0.8) model_gbdt3 = GridSearchCV(gbdt,param_grid=({'max_depth':[2,4]}),cv=5) model_gbdt3.fit(x_train,y_train) print(model_gbdt3.best_params_) y_hat3 = model_gbdt3.predict(x_test) print(accuracy_score(y_hat3,y_test)) #第四轮,确定降采样 gbdt = GradientBoostingClassifier(n_estimators=90,learning_rate=0.3,max_depth=2) model_gbdt4 = GridSearchCV(gbdt,param_grid=({'subsample':[0.8,0.9]}),cv=5) model_gbdt4.fit(x_train,y_train) print(model_gbdt4.best_params_) y_hat4 = model_gbdt4.predict(x_test) print(accuracy_score(y_hat4,y_test))
最后结果,结果也是很好,跟svm差不多吧。可能是我调参不够好,不过暂时不纠结这个
{'subsample': 0.8}
0.986666666667