1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 | # -*- coding: utf-8 -*- import pandas as pd from sklearn.grid_search import GridSearchCV from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler from sklearn.utils import shuffle import numpy as np from sklearn import metrics from sklearn.metrics import log_loss, recall_score, precision_score, accuracy_score,f1_score from sklearn.metrics import roc_curve, precision_recall_curve, roc_auc_score # from sklearn.model_selection import cross_val_score import lightgbm def ks_statistic(Y,Y_hat): data = { "Y" :Y, "Y_hat" :Y_hat} df = pd.DataFrame(data) bins = np.array([ - 0.1 , 0.1 , 0.2 , 0.3 , 0.4 , 0.5 , 0.6 , 0.7 , 0.8 , 0.9 , 1.0 ]) category = pd.cut(df[ "Y_hat" ],bins = bins) category = category.sort_values() #max_index = len(np.unique(df["Y_hat"])) Y = df.ix[category.index,:][ 'Y' ] Y_hat = df.ix[category.index,:][ 'Y_hat' ] df2 = pd.concat([Y,Y_hat],axis = 1 ) df3 = pd.pivot_table(df2,values = [ 'Y_hat' ],index = 'Y_hat' ,columns = 'Y' ,aggfunc = len ,fill_value = 0 ) df4 = np.cumsum(df3) df5 = df4 / df4.iloc[:, 1 ]. max () ks = max ( abs (df5.iloc[:, 0 ] - df5.iloc[:, 1 ])) return ks / len (bins) df = pd.read_csv( 'DC_ALL_20170217.csv' , header = 0 ) X = df[df.columns.drop([ 'user_id' , 'overdue' ])].fillna( - 999 ) # X = df[['count','time_stamp','credit_limit','credit_card_use_rate','credit_count_x','bank_count','sex','occupation','education','marriage','hukou']] y = df[ 'overdue' ] train = X.head( 55596 ) test = X.tail( 69495 - 55596 ) train_label = y.head( 55596 ).convert_objects(convert_numeric = True ) X_train, X_test, y_train, y_test = train_test_split(\ train.values, train_label, test_size = 0.2 , random_state = 42 ) max_depth = 5 subsample = 0.8 learning_rate = 0.01 n_estimators = 400 random_state = 3 nthread = 4 is_unbalance = True objective = 'binary' LGBM = lightgbm.LGBMClassifier(max_depth = max_depth, learning_rate = learning_rate, n_estimators = n_estimators, objective = objective,is_unbalance = is_unbalance, nthread = nthread,subsample = subsample) LGBM.fit(X_train, y_train) y_test_v = LGBM.predict(X_test) y_test_p = LGBM.predict_proba(X_test)[:, 1 ] print 'auc: ' , roc_auc_score(y_test, y_test_p) print 'log_loss: ' , log_loss(y_test, y_test_p) print 'precision: ' , precision_score(y_test, y_test_v) print 'recall: ' , recall_score(y_test, y_test_v) print 'accuracy: ' , accuracy_score(y_test, y_test_v) print 'f1_score: ' , f1_score(y_test, y_test_v) print 'ks_statistic: ' , ks_statistic(y_test.values, y_test_v) |
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 开发者必知的日志记录最佳实践
· SQL Server 2025 AI相关能力初探
· Linux系列:如何用 C#调用 C方法造成内存泄露
· AI与.NET技术实操系列(二):开始使用ML.NET
· 记一次.NET内存居高不下排查解决与启示
· 阿里最新开源QwQ-32B,效果媲美deepseek-r1满血版,部署成本又又又降低了!
· 开源Multi-agent AI智能体框架aevatar.ai,欢迎大家贡献代码
· Manus重磅发布:全球首款通用AI代理技术深度解析与实战指南
· 被坑几百块钱后,我竟然真的恢复了删除的微信聊天记录!
· 没有Manus邀请码?试试免邀请码的MGX或者开源的OpenManus吧