plot_importance多分类、排序不匹配、图片数值不显示
多分类
params = {'booster': 'gbtree',
'objective': 'reg:squarederror',
"learning_rate": 0.01,
'n_estimators': 500,
"missing": -1}
multioutputregressor_xgb = MultiOutputRegressor(xgb.XGBRegressor(**params)).fit(train_x, train_y)
predict_values = multioutputregressor_xgb.predict(test_x)
for items in multioutputregressor_xgb.estimators_:
items.get_booster().feature_names = x_columns_list
empty_dict = {k:round(v,4) for k,v in items.get_booster().get_score(importance_type='gain').items()}
if bool(empty_dict):
plot_importance(empty_dict, max_num_features=10, importance_type='gain',show_values=True, title="Feature importance")
imp["name"] = items.get_booster().feature_names
imp[y_columns_list[i]] = items.feature_importances_
imp.round(6)
i+=1
plt.show()
imp.to_csv(importance_path, sep="\t", float_format="%.6f")
排序不匹配
model.feature_importances_的重要性排名默认使用gain,而xgb.plot_importance默认使用weight,所以:
xgb.plot_importance(model,max_num_features=10,importance_type='gain')
图片数值不显示
打开plotting.py
修改:
def plot_importance(booster, ax=None, height=0.2,
xlim=None, ylim=None, title='Feature importance',
xlabel='F score', ylabel='Features', fmap='',
importance_type='weight', max_num_features=None,
grid=True, show_values=True, **kwargs):
增加max_digits=3,
,修改show_values
,原来是这样:
if show_values is True:
for x, y in zip(values, ylocs):
ax.text(x + 1, y, x, va='center')
更改成:
if max_digits is not None:
t = values
lst = list(t)
if len(str(lst[0]).split('.')[-1]) > max_digits:
values_displayed = tuple([('{:.' + str(max_digits) + 'f}').format(x) for x in lst])
else:
values_displayed = values
if show_values is True:
for x, x2, y in zip(values, values_displayed, ylocs):
dx = np.max(values) / 100
ax.text(x + dx, y, x2, va='center')