from sklearn.ensemble import RandomForestClassifier
model=RandomForestClassifier(n_estimators=22,max_depth=7,min_samples_split=33,min_samples_leaf=18)
model.fit(x_train, y_train)
p=model.predict_proba(x_test)
m=pd.DataFrame(p)
n=m.iloc[:,1:]
from sklearn.metrics import roc_curve
label=y_test
score=n
fpr,tpr,thresholds= roc_curve(label,score)
ks_value = max(abs(fpr-tpr))
画图,画出曲线
plt.plot(fpr, label='bad')
plt.plot(tpr, label='good')
plt.plot(abs(fpr-tpr), label='diff')
标记ks
x = np.argwhere(abs(fpr-tpr) == ks_value)[0, 0]
plt.plot((x, x), (0, ks_value), label='ks - {:.2f}'.format(ks_value), color='r', marker='o', markerfacecolor='r', markersize=5)
plt.scatter((x, x), (0, ks_value), color='r')
plt.legend()
plt.show()
3.1调参
from sklearn.model_selection import GridSearchCV
model=RandomForestClassifier(n_estimators=22,max_depth=7,min_samples_split=33)
grid={'min_samples_leaf':[10,11,12,13,14,15,16,17,18,19]}
gs=GridSearchCV(model,grid,cv=10)
gs.fit(x_train,y_train)
gs.best_score_
gs.best_params_