1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 | import lightgbm as lgb import pandas as pd from sklearn.metrics import mean_squared_error from sklearn.model_selection import GridSearchCV from sklearn.datasets import load_iris from sklearn.model_selection import train_test_split from sklearn.datasets import make_classification import csv import numpy as np def read_data(test_data = 'input/train.csv' , n = 0 , label = 1 ): ''' 加载数据的功能 n:特征数据起始位 label:是否是监督样本数据 ''' csv.field_size_limit( 500 * 1024 * 1024 ) #一定要加上这一句 csv_reader = csv.reader( open (test_data, encoding = "utf8" , errors = "ignore" )) data_list = [] for one_line in csv_reader: data_list.append(one_line) x_list = [] y_list = [] for one_line in data_list[ 1 :]: if label = = 1 : #如果是监督样本数据 y_list.append( int (one_line[ - 1 ])) # 标志位(最后一位都是标签位) one_list = [o for o in one_line[n: - 1 ]] x_list.append(one_list) else : one_list = [o for o in one_line[n:]] x_list.append(one_list) return x_list, y_list def split_data(data_list, y_list, ratio = 0.30 ): #70%训练集,30%测试集: 914285,391837 ''' 按照指定的比例,划分样本数据集 ratio: 测试数据的比率 ''' X_train, X_test, y_train, y_test = train_test_split(data_list, y_list, test_size = ratio, random_state = 50 ) """训练集""" with open ( 'input/sub_train.csv' , 'w' , encoding = "utf8" ,newline = " ", errors=" ignore ") as csvfile:#不加newline=" "的话会空一行出来 fieldnames = [ 'qid' , 'question_text' , 'target' ] write = csv.DictWriter(csvfile,fieldnames = fieldnames) write.writeheader() #写表头 for i in range ( len (X_train)): write.writerow({ 'qid' :X_train[i][ 0 ], 'question_text' :X_train[i][ 1 ], 'target' :y_train[i]}) """测试集""" #标签文件 with open ( 'input/sub_test_y' , 'w' ) as fp: json.dump(y_test, fp) #测试csv with open ( 'input/sub_test_x.csv' , 'w' , encoding = "utf8" ,newline = " ", errors=" ignore ") as csvfile:#不加newline=" "的话会空一行出来 fieldnames = [ 'qid' , 'question_text' ] write = csv.DictWriter(csvfile,fieldnames = fieldnames) write.writeheader() #写表头 for i in range ( len (X_test)): write.writerow({ 'qid' :X_test[i][ 0 ], 'question_text' :X_test[i][ 1 ]}) return X_train, X_test, y_train, y_test F_feature,F_label = read_data(test_data = 'D:\\20210706E\\2020-python\\light_GBM\\Iris.csv' , n = 1 , label = 1 ) print (np.array(F_feature)) print (np.array(F_label)) X_train,X_test,y_train,y_test = train_test_split(np.array(F_feature),np.array(F_label),test_size = 0.2 ) #X_train,X_test,y_train,y_test =train_test_split(F_train,F_train,test_size=0.2) print ( 70 * '*\n' ) '''df_train = pd.read_csv('D:\\20210706E\\2020-python\\light_GBM\\Iris.csv',sep=",") #print(df_train) df_train_label=df_train['Species'] # df_train_feature=df_train['SepalLengthCm','SepalWidthCm','PetalLengthCm','PetalWidthCm'] df_train_feature=df_train[['SepalWidthCm','SepalWidthCm','PetalLengthCm','PetalWidthCm']] #print(df_train_label.tolist()) #test = df_train_label.drop(, axis = 1) #print(test) print(df_train_feature)''' # df_train = pd.read_csv('../regression/regression.train', header=None, sep='\t') # df_test = pd.read_csv('../regression/regression.test', header=None, sep='\t') # y_train = df_train[0].values # y_test = df_test[0].values # X_train = df_train.drop(0, axis=1).values # X_test = df_test.drop(0, axis=1).values # 加载数据 print ( 'Load data...' ) iris = load_iris() data = target = #X_train,X_test,y_train,y_test =train_test_split(data,target,test_size=0.2) # df_train = pd.read_csv('../regression/regression.train', header=None, sep='\t') # df_test = pd.read_csv('../regression/regression.test', header=None, sep='\t') # y_train = df_train[0].values # y_test = df_test[0].values # X_train = df_train.drop(0, axis=1).values # X_test = df_test.drop(0, axis=1).values print ( 'Start training...' ) # 创建模型,训练模型 gbm = lgb.LGBMRegressor(objective = 'regression' ,num_leaves = 31 ,learning_rate = 0.05 ,n_estimators = 20 ), y_train,eval_set = [(X_test, y_test)],eval_metric = 'l1' ,early_stopping_rounds = 5 ) <br> print ( 'Start predicting...' ) # 测试机预测 y_pred = gbm.predict(X_test, num_iteration = gbm.best_iteration_) # 模型评估 print ( 'The rmse of prediction is:' , mean_squared_error(y_test, y_pred) * * 0.5 ) # feature importances print ( 'Feature importances:' , list (gbm.feature_importances_)) # 网格搜索,参数优化 estimator = lgb.LGBMRegressor(num_leaves = 31 ) param_grid = { 'learning_rate' : [ 0.01 , 0.1 , 1 ], 'n_estimators' : [ 20 , 40 ] } gbm = GridSearchCV(estimator, param_grid), y_train) print ( 'Best parameters found by grid search are:' , gbm.best_params_) |
from sklearn.metrics import roc_auc_score, accuracy_score
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 | Id ,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species 1 , 5.1 , 3.5 , 1.4 , 0.2 , 0 2 , 4.9 , 3.0 , 1.4 , 0.2 , 0 3 , 4.7 , 3.2 , 1.3 , 0.2 , 0 4 , 4.6 , 3.1 , 1.5 , 0.2 , 0 5 , 5.0 , 3.6 , 1.4 , 0.2 , 0 6 , 5.4 , 3.9 , 1.7 , 0.4 , 0 7 , 4.6 , 3.4 , 1.4 , 0.3 , 0 8 , 5.0 , 3.4 , 1.5 , 0.2 , 0 9 , 4.4 , 2.9 , 1.4 , 0.2 , 0 10 , 4.9 , 3.1 , 1.5 , 0.1 , 0 11 , 5.4 , 3.7 , 1.5 , 0.2 , 0 12 , 4.8 , 3.4 , 1.6 , 0.2 , 0 13 , 4.8 , 3.0 , 1.4 , 0.1 , 0 14 , 4.3 , 3.0 , 1.1 , 0.1 , 0 15 , 5.8 , 4.0 , 1.2 , 0.2 , 0 16 , 5.7 , 4.4 , 1.5 , 0.4 , 0 17 , 5.4 , 3.9 , 1.3 , 0.4 , 0 18 , 5.1 , 3.5 , 1.4 , 0.3 , 0 19 , 5.7 , 3.8 , 1.7 , 0.3 , 0 20 , 5.1 , 3.8 , 1.5 , 0.3 , 0 21 , 5.4 , 3.4 , 1.7 , 0.2 , 0 22 , 5.1 , 3.7 , 1.5 , 0.4 , 0 23 , 4.6 , 3.6 , 1.0 , 0.2 , 0 24 , 5.1 , 3.3 , 1.7 , 0.5 , 0 25 , 4.8 , 3.4 , 1.9 , 0.2 , 0 26 , 5.0 , 3.0 , 1.6 , 0.2 , 0 27 , 5.0 , 3.4 , 1.6 , 0.4 , 0 28 , 5.2 , 3.5 , 1.5 , 0.2 , 0 29 , 5.2 , 3.4 , 1.4 , 0.2 , 0 30 , 4.7 , 3.2 , 1.6 , 0.2 , 0 31 , 4.8 , 3.1 , 1.6 , 0.2 , 0 32 , 5.4 , 3.4 , 1.5 , 0.4 , 0 33 , 5.2 , 4.1 , 1.5 , 0.1 , 0 34 , 5.5 , 4.2 , 1.4 , 0.2 , 0 35 , 4.9 , 3.1 , 1.5 , 0.1 , 0 36 , 5.0 , 3.2 , 1.2 , 0.2 , 0 37 , 5.5 , 3.5 , 1.3 , 0.2 , 0 38 , 4.9 , 3.1 , 1.5 , 0.1 , 0 39 , 4.4 , 3.0 , 1.3 , 0.2 , 0 40 , 5.1 , 3.4 , 1.5 , 0.2 , 0 41 , 5.0 , 3.5 , 1.3 , 0.3 , 0 42 , 4.5 , 2.3 , 1.3 , 0.3 , 0 43 , 4.4 , 3.2 , 1.3 , 0.2 , 0 44 , 5.0 , 3.5 , 1.6 , 0.6 , 0 45 , 5.1 , 3.8 , 1.9 , 0.4 , 0 46 , 4.8 , 3.0 , 1.4 , 0.3 , 0 47 , 5.1 , 3.8 , 1.6 , 0.2 , 0 48 , 4.6 , 3.2 , 1.4 , 0.2 , 0 49 , 5.3 , 3.7 , 1.5 , 0.2 , 0 50 , 5.0 , 3.3 , 1.4 , 0.2 , 0 51 , 7.0 , 3.2 , 4.7 , 1.4 , 1 52 , 6.4 , 3.2 , 4.5 , 1.5 , 1 53 , 6.9 , 3.1 , 4.9 , 1.5 , 1 54 , 5.5 , 2.3 , 4.0 , 1.3 , 1 55 , 6.5 , 2.8 , 4.6 , 1.5 , 1 56 , 5.7 , 2.8 , 4.5 , 1.3 , 1 57 , 6.3 , 3.3 , 4.7 , 1.6 , 1 58 , 4.9 , 2.4 , 3.3 , 1.0 , 1 59 , 6.6 , 2.9 , 4.6 , 1.3 , 1 60 , 5.2 , 2.7 , 3.9 , 1.4 , 1 61 , 5.0 , 2.0 , 3.5 , 1.0 , 1 62 , 5.9 , 3.0 , 4.2 , 1.5 , 1 63 , 6.0 , 2.2 , 4.0 , 1.0 , 1 64 , 6.1 , 2.9 , 4.7 , 1.4 , 1 65 , 5.6 , 2.9 , 3.6 , 1.3 , 1 66 , 6.7 , 3.1 , 4.4 , 1.4 , 1 67 , 5.6 , 3.0 , 4.5 , 1.5 , 1 68 , 5.8 , 2.7 , 4.1 , 1.0 , 1 69 , 6.2 , 2.2 , 4.5 , 1.5 , 1 70 , 5.6 , 2.5 , 3.9 , 1.1 , 1 71 , 5.9 , 3.2 , 4.8 , 1.8 , 1 72 , 6.1 , 2.8 , 4.0 , 1.3 , 1 73 , 6.3 , 2.5 , 4.9 , 1.5 , 1 74 , 6.1 , 2.8 , 4.7 , 1.2 , 1 75 , 6.4 , 2.9 , 4.3 , 1.3 , 1 76 , 6.6 , 3.0 , 4.4 , 1.4 , 1 77 , 6.8 , 2.8 , 4.8 , 1.4 , 1 78 , 6.7 , 3.0 , 5.0 , 1.7 , 1 79 , 6.0 , 2.9 , 4.5 , 1.5 , 1 80 , 5.7 , 2.6 , 3.5 , 1.0 , 1 81 , 5.5 , 2.4 , 3.8 , 1.1 , 1 82 , 5.5 , 2.4 , 3.7 , 1.0 , 1 83 , 5.8 , 2.7 , 3.9 , 1.2 , 1 84 , 6.0 , 2.7 , 5.1 , 1.6 , 1 85 , 5.4 , 3.0 , 4.5 , 1.5 , 1 86 , 6.0 , 3.4 , 4.5 , 1.6 , 1 87 , 6.7 , 3.1 , 4.7 , 1.5 , 1 88 , 6.3 , 2.3 , 4.4 , 1.3 , 1 89 , 5.6 , 3.0 , 4.1 , 1.3 , 1 90 , 5.5 , 2.5 , 4.0 , 1.3 , 1 91 , 5.5 , 2.6 , 4.4 , 1.2 , 1 92 , 6.1 , 3.0 , 4.6 , 1.4 , 1 93 , 5.8 , 2.6 , 4.0 , 1.2 , 1 94 , 5.0 , 2.3 , 3.3 , 1.0 , 1 95 , 5.6 , 2.7 , 4.2 , 1.3 , 1 96 , 5.7 , 3.0 , 4.2 , 1.2 , 1 97 , 5.7 , 2.9 , 4.2 , 1.3 , 1 98 , 6.2 , 2.9 , 4.3 , 1.3 , 1 99 , 5.1 , 2.5 , 3.0 , 1.1 , 1 100 , 5.7 , 2.8 , 4.1 , 1.3 , 1 101 , 6.3 , 3.3 , 6.0 , 2.5 , 2 102 , 5.8 , 2.7 , 5.1 , 1.9 , 2 103 , 7.1 , 3.0 , 5.9 , 2.1 , 2 104 , 6.3 , 2.9 , 5.6 , 1.8 , 2 105 , 6.5 , 3.0 , 5.8 , 2.2 , 2 106 , 7.6 , 3.0 , 6.6 , 2.1 , 2 107 , 4.9 , 2.5 , 4.5 , 1.7 , 2 108 , 7.3 , 2.9 , 6.3 , 1.8 , 2 109 , 6.7 , 2.5 , 5.8 , 1.8 , 2 110 , 7.2 , 3.6 , 6.1 , 2.5 , 2 111 , 6.5 , 3.2 , 5.1 , 2.0 , 2 112 , 6.4 , 2.7 , 5.3 , 1.9 , 2 113 , 6.8 , 3.0 , 5.5 , 2.1 , 2 114 , 5.7 , 2.5 , 5.0 , 2.0 , 2 115 , 5.8 , 2.8 , 5.1 , 2.4 , 2 116 , 6.4 , 3.2 , 5.3 , 2.3 , 2 117 , 6.5 , 3.0 , 5.5 , 1.8 , 2 118 , 7.7 , 3.8 , 6.7 , 2.2 , 2 119 , 7.7 , 2.6 , 6.9 , 2.3 , 2 120 , 6.0 , 2.2 , 5.0 , 1.5 , 2 121 , 6.9 , 3.2 , 5.7 , 2.3 , 2 122 , 5.6 , 2.8 , 4.9 , 2.0 , 2 123 , 7.7 , 2.8 , 6.7 , 2.0 , 2 124 , 6.3 , 2.7 , 4.9 , 1.8 , 2 125 , 6.7 , 3.3 , 5.7 , 2.1 , 2 126 , 7.2 , 3.2 , 6.0 , 1.8 , 2 127 , 6.2 , 2.8 , 4.8 , 1.8 , 2 128 , 6.1 , 3.0 , 4.9 , 1.8 , 2 129 , 6.4 , 2.8 , 5.6 , 2.1 , 2 130 , 7.2 , 3.0 , 5.8 , 1.6 , 2 131 , 7.4 , 2.8 , 6.1 , 1.9 , 2 132 , 7.9 , 3.8 , 6.4 , 2.0 , 2 133 , 6.4 , 2.8 , 5.6 , 2.2 , 2 134 , 6.3 , 2.8 , 5.1 , 1.5 , 2 135 , 6.1 , 2.6 , 5.6 , 1.4 , 2 136 , 7.7 , 3.0 , 6.1 , 2.3 , 2 137 , 6.3 , 3.4 , 5.6 , 2.4 , 2 138 , 6.4 , 3.1 , 5.5 , 1.8 , 2 139 , 6.0 , 3.0 , 4.8 , 1.8 , 2 140 , 6.9 , 3.1 , 5.4 , 2.1 , 2 141 , 6.7 , 3.1 , 5.6 , 2.4 , 2 142 , 6.9 , 3.1 , 5.1 , 2.3 , 2 143 , 5.8 , 2.7 , 5.1 , 1.9 , 2 144 , 6.8 , 3.2 , 5.9 , 2.3 , 2 145 , 6.7 , 3.3 , 5.7 , 2.5 , 2 146 , 6.7 , 3.0 , 5.2 , 2.3 , 2 147 , 6.3 , 2.5 , 5.0 , 1.9 , 2 148 , 6.5 , 3.0 , 5.2 , 2.0 , 2 149 , 6.2 , 3.4 , 5.4 , 2.3 , 2 150 , 5.9 , 3.0 , 5.1 , 1.8 , 2 |
from lightgbm import LGBMClassifier from sklearn.metrics import accuracy_score from sklearn.model_selection import GridSearchCV from sklearn.datasets import load_iris from sklearn.model_selection import train_test_split from sklearn.externals import joblib # 加载数据 iris = load_iris() data = target = # 划分训练数据和测试数据 X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.2) # 模型训练 gbm = LGBMClassifier(num_leaves=31, learning_rate=0.05, n_estimators=20), y_train, eval_set=[(X_test, y_test)], early_stopping_rounds=5) # 模型存储 joblib.dump(gbm, 'loan_model.pkl') # 模型加载 gbm = joblib.load('loan_model.pkl') # 模型预测 y_pred = gbm.predict(X_test, num_iteration=gbm.best_iteration_) # 模型评估 print('The accuracy of prediction is:', accuracy_score(y_test, y_pred)) # 特征重要度 print('Feature importances:', list(gbm.feature_importances_)) # 网格搜索,参数优化 estimator = LGBMClassifier(num_leaves=31) param_grid = { 'learning_rate': [0.01, 0.1, 1], 'n_estimators': [20, 40] } gbm = GridSearchCV(estimator, param_grid), y_train) print('Best parameters found by grid search are:', gbm.best_params_)
ImportError: cannot import name ‘joblib‘ from ‘sklearn.externals‘
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 无需6万激活码!GitHub神秘组织3小时极速复刻Manus,手把手教你使用OpenManus搭建本
· C#/.NET/.NET Core优秀项目和框架2025年2月简报
· 葡萄城 AI 搜索升级:DeepSeek 加持,客户体验更智能
· 什么是nginx的强缓存和协商缓存
· 一文读懂知识蒸馏