超参数调优——网格搜索、随机搜索和贝叶斯优化

在Python中,我们可以使用sklearn.model_selection.GridSearchCV进行网格搜索。这是一个用于系统地遍历多种参数组合,通过交叉验证确定最佳效果参数的方法。它的主要目的是通过网格搜索在指定的值集合中找到最优的模型参数。

以下是一个使用XGBoost分类器的网格搜索示例:

 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
# 导入必要的库
from sklearn.model_selection import GridSearchCV
import xgboost as xgb
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
 
# 加载数据集
iris = load_iris()
X = iris.data
y = iris.target
 
# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
 
# 创建xgb分类模型实例
model = xgb.XGBClassifier()
 
# 待搜索的参数列表空间
param_lst = {"max_depth": [3,5,7],
             "min_child_weight" : [1,3,6],
             "n_estimators": [100,200,300],
             "learning_rate": [0.01, 0.05, 0.1]
            }
 
# 创建网格搜索对象
grid_search = GridSearchCV(model, param_grid=param_lst, cv=3, verbose=10, n_jobs=-1)
 
# 基于iris数据集执行搜索
grid_search.fit(X_train, y_train)
 
# 输出搜索结果
print(grid_search.best_estimator_)

 

输出:

XGBClassifier(base_score=None, booster=None, callbacks=None,
              colsample_bylevel=None, colsample_bynode=None,
              colsample_bytree=None, device=None, early_stopping_rounds=None,
              enable_categorical=False, eval_metric=None, feature_types=None,
              gamma=None, grow_policy=None, importance_type=None,
              interaction_constraints=None, learning_rate=0.01, max_bin=None,
              max_cat_threshold=None, max_cat_to_onehot=None,
              max_delta_step=None, max_depth=3, max_leaves=None,
              min_child_weight=1, missing=nan, monotone_constraints=None,
              multi_strategy=None, n_estimators=200, n_jobs=None,
              num_parallel_tree=None, objective='multi:softprob', ...)

 

如果使用随机搜索,代码如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
# 导入必要的库
from sklearn.model_selection import RandomizedSearchCV
import xgboost as xgb
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
 
# 加载数据集
iris = load_iris()
X = iris.data
y = iris.target
 
# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
 
# 创建xgb分类模型实例
model = xgb.XGBClassifier()
 
# 待搜索的参数列表空间
param_lst = {"max_depth": [3,5,7],
             "min_child_weight" : [1,3,6],
             "n_estimators": [100,200,300],
             "learning_rate": [0.01, 0.05, 0.1]
            }
 
# 创建随机搜索对象
random_search = RandomizedSearchCV(model, param_distributions=param_lst, cv=3, verbose=10, n_jobs=-1)
 
# 基于iris数据集执行搜索
random_search.fit(X_train, y_train)
 
# 输出搜索结果
print(random_search.best_estimator_)

 输出:

XGBClassifier(base_score=None, booster=None, callbacks=None,
              colsample_bylevel=None, colsample_bynode=None,
              colsample_bytree=None, device=None, early_stopping_rounds=None,
              enable_categorical=False, eval_metric=None, feature_types=None,
              gamma=None, grow_policy=None, importance_type=None,
              interaction_constraints=None, learning_rate=0.01, max_bin=None,
              max_cat_threshold=None, max_cat_to_onehot=None,
              max_delta_step=None, max_depth=7, max_leaves=None,
              min_child_weight=1, missing=nan, monotone_constraints=None,
              multi_strategy=None, n_estimators=300, n_jobs=None,
              num_parallel_tree=None, objective='multi:softprob', ...)

结果和上面gridsearch不一样。。。

 

使用贝叶斯优化的代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
# 导入必要的库
from skopt import BayesSearchCV
from skopt.space import Categorical, Integer, Real
import xgboost as xgb
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
 
# 加载数据集
iris = load_iris()
X = iris.data
y = iris.target
 
# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
 
# 创建xgb分类模型实例
model = xgb.XGBClassifier()
 
# 待搜索的参数列表空间
param_lst = {"max_depth": Categorical([3,5,7]),
             "min_child_weight" : Categorical([1,3,6]),
             "n_estimators": Integer(100,300),
             "learning_rate": Real(0.01, 0.1)
            }
 
# 创建贝叶斯优化搜索对象
bayes_search = BayesSearchCV(model, search_spaces=param_lst, cv=3, n_jobs=-1)
 
# 基于iris数据集执行搜索
bayes_search.fit(X_train, y_train)
 
# 输出搜索结果
print(bayes_search.best_estimator_)

 上述的离线的值,如果是连续的值,则代码修改为:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
# 导入必要的库
from skopt import BayesSearchCV
import xgboost as xgb
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
 
# 加载数据集
iris = load_iris()
X = iris.data
y = iris.target
 
# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
 
# 创建xgb分类模型实例
model = xgb.XGBClassifier()
 
# 待搜索的参数列表空间
param_lst = {"max_depth": (3,7),
             "min_child_weight" : (1,6),
             "n_estimators": (100,300),
             "learning_rate": (0.01, 0.1)
            }
 
# 创建贝叶斯优化搜索对象
bayes_search = BayesSearchCV(model, search_spaces=param_lst, cv=3, n_jobs=-1)
 
# 基于iris数据集执行搜索
bayes_search.fit(X_train, y_train)
 
# 输出搜索结果
print(bayes_search.best_estimator_)

 

posted @   bonelee  阅读(358)  评论(0编辑  收藏  举报
相关博文:
阅读排行:
· 全程不用写代码,我用AI程序员写了一个飞机大战
· MongoDB 8.0这个新功能碉堡了,比商业数据库还牛
· 记一次.NET内存居高不下排查解决与启示
· 白话解读 Dapr 1.15:你的「微服务管家」又秀新绝活了
· DeepSeek 开源周回顾「GitHub 热点速览」
历史上的今天:
2022-09-20 如何规避Sysmon DNS监控
2022-09-20 关于DNS cache
2022-09-20 svchost.exe详细解析,运行过程
2020-09-20 linux下Segmentation Fault生成coredump文件进行gdb调试
2019-09-20 python dijkstra 最短路算法示意代码
2019-09-20 heapq 对有序的数组列表进行整体排序
2017-09-20 HBase单机环境搭建
点击右上角即可分享
微信分享提示