【机器学习】:Xgboost使用optuna进行调试参数
代码如下:
def objective(trial,data=data,target=target): train_x, test_x, train_y, test_y = train_test_split(data, target, test_size=0.15,random_state=42) param = { 'tree_method':'gpu_hist', # this parameter means using the GPU when training our model to speedup the training process 'lambda': trial.suggest_loguniform('lambda', 1e-3, 10.0), 'alpha': trial.suggest_loguniform('alpha', 1e-3, 10.0), 'colsample_bytree': trial.suggest_categorical('colsample_bytree', [0.3,0.4,0.5,0.6,0.7,0.8,0.9, 1.0]), 'subsample': trial.suggest_categorical('subsample', [0.4,0.5,0.6,0.7,0.8,1.0]), 'learning_rate': trial.suggest_categorical('learning_rate', [0.008,0.009,0.01,0.012,0.014,0.016,0.018, 0.02]), 'n_estimators': 4000, 'max_depth': trial.suggest_categorical('max_depth', [5,7,9,11,13,15,17,20]), 'random_state': trial.suggest_categorical('random_state', [24, 48,2020]), 'min_child_weight': trial.suggest_int('min_child_weight', 1, 300), } model = xgb.XGBRegressor(**param) model.fit(train_x,train_y,eval_set=[(test_x,test_y)],early_stopping_rounds=100,verbose=False) preds = model.predict(test_x) rmse = mean_squared_error(test_y, preds,squared=False) return rmse
然后使用optuna进行study:
使用minimize是因为我们的loss使用了mse,因此需要进行最小化我们的loss。
n_trials代表一共进行执行的次数
study = optuna.create_study(direction='minimize') study.optimize(objective, n_trials=50) print('Number of finished trials:', len(study.trials)) print('Best trial:', study.best_trial.params)
下面是catboost的:
def objective(trial,data=data,target=target): train_x, test_x, train_y, test_y = train_test_split(data, target, test_size=0.15,random_state=42) param = { 'loss_function': 'RMSE', 'task_type': 'GPU', 'l2_leaf_reg': trial.suggest_loguniform('l2_leaf_reg', 1e-3, 10.0), 'max_bin': trial.suggest_int('max_bin', 200, 400), #'rsm': trial.suggest_uniform('rsm', 0.3, 1.0), 'subsample': trial.suggest_uniform('bagging_fraction', 0.4, 1.0), 'learning_rate': trial.suggest_uniform('learning_rate', 0.006, 0.018), 'n_estimators': 25000, 'max_depth': trial.suggest_categorical('max_depth', [7,10,14,16]), 'random_state': trial.suggest_categorical('random_state', [24, 48,2020]), 'min_data_in_leaf': trial.suggest_int('min_data_in_leaf', 1, 300), } model = CatBoostRegressor(**param) model.fit(train_x,train_y,eval_set=[(test_x,test_y)],early_stopping_rounds=200,verbose=False) preds = model.predict(test_x) rmse = mean_squared_error(test_y, preds,squared=False) return rmse