lightgbm的学习笔记

from sklearn.datasets import load_iris
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import lightgbm as lgb
from lightgbm import LGBMClassifier
import numpy as np
import pandas as pd
from sklearn import metrics
import warnings
warnings.filterwarnings("ignore")

iris = load_iris()
X,y = iris.data,iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=2022)


gbm = lgb.LGBMClassifier(max_depth=10,
learning_rate=0.01,
n_estimators=2000,#提升迭代次数
objective='multi:softmax',#默认regression,用于设置损失函数
num_class=3 ,
nthread=-1,#LightGBM 的线程数
min_child_weight=1,
max_delta_step=0,
subsample=0.85,
colsample_bytree=0.7,
reg_alpha=0,#L1正则化系数
reg_lambda=1,#L2正则化系数
scale_pos_weight=1,
seed=0,
missing=None)
gbm.fit(X_train, y_train)

y_pred = gbm.predict(X_test)
# 计算准确率
accuracy = accuracy_score(y_test,y_pred)
print("accuarcy: %.2f%%" % (accuracy*100.0))

 

这里引入了鸢尾花的样本集,前四列为特征,最后一列为分类的标签,这个训练的模型就是用gbm去分类鸢尾花的。

其中比较好用的方法,accuracy_score(y_test,y_pred)  这个函数可以比较两个数组里面元素相同的个数比 配合第二步的print直接转成百分数 好用 。

from sklearn.model_selection import train_test_split   可以直接将数据集分成训练和测试两部分

具体的模型里面的参数我们现在看看:

gbm = lgb.LGBMClassifier(max_depth=10,
learning_rate=0.01,
n_estimators=2000,#提升迭代次数
objective='multi:softmax',#默认regression,用于设置损失函数
num_class=3 , 
nthread=-1,#LightGBM 的线程数
min_child_weight=1,
max_delta_step=0,
subsample=0.85,
colsample_bytree=0.7,
reg_alpha=0,#L1正则化系数
reg_lambda=1,#L2正则化系数
scale_pos_weight=1,
seed=0,
missing=None)
gbm.fit(X_train, y_train)

#使用pickle来保存模型
import pickle

with open('model.pkl', 'wb') as fout:
pickle.dump(gbm, fout)
# load model with pickle to predict
with open('model.pkl', 'rb') as fin:
pkl_bst = pickle.load(fin)
# can predict with any iteration when loaded in pickle way
y_pred = pkl_bst.predict(X_test)
accuracy = accuracy_score(y_test,y_pred)
print("accuarcy: %.2f%%" % (accuracy*100.0))

 

posted @ 2022-10-21 11:03  二师兄不讲英文  阅读(101)  评论(0编辑  收藏  举报