import pandas as pd
from lightgbm import LGBMClassifier
from sklearn.metrics import accuracy_score
df3=pd.concat([df1,df2],axis=1)
model = LGBMClassifier(learning_rate=0.05,num_leaves=29)
model.fit(x_train, y_train)
p=model.predict_proba(x_test)
m=pd.DataFrame(p)
n=m.iloc[:,1:]
from sklearn.metrics import roc_curve
label=y_test
score=n
fpr,tpr,thresholds= roc_curve(label,score)
ks_value = max(abs(fpr-tpr))
画图,画出曲线
plt.plot(fpr, label='bad')
plt.plot(tpr, label='good')
plt.plot(abs(fpr-tpr), label='diff')
标记ks
x = np.argwhere(abs(fpr-tpr) == ks_value)[0, 0]
plt.plot((x, x), (0, ks_value), label='ks - {:.2f}'.format(ks_value), color='r', marker='o', markerfacecolor='r', markersize=5)
plt.scatter((x, x), (0, ks_value), color='r')
plt.legend()
plt.show()
5.1调参
model =LGBMClassifier(learning_rate=0.05)
grid={'num_leaves':[28,29,30,32,35,40]}
gs=GridSearchCV(model,grid,cv=10)
gs.fit(x_train,y_train)
gs.best_score_
gs.best_params_