2024.11.27(周三)

import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import cross_val_score, cross_validate, StratifiedKFold
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score
from sklearn.model_selection import train_test_split
# (1)利用 pandas 库从本地读取 iris 数据集
# 假设数据集已保存为 CSV 格式
# iris_data = pd.read_csv("iris.csv")  # 读取本地 CSV 文件(根据实际情况调整路径)

# (2)从 scikit-learn 库中直接加载 iris 数据集
iris = load_iris()
X = iris.data  # 特征
y = iris.target  # 标签

# (3)实现五折交叉验证进行模型训练
# 使用 StratifiedKFold 保证每一折中的类别分布相似
kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# 使用随机森林分类器
model = RandomForestClassifier(random_state=42)

# 使用 cross_validate 进行五折交叉验证,返回更多评估指标
scores = cross_validate(model, X, y, cv=kf, scoring=['accuracy', 'precision_macro', 'recall_macro', 'f1_macro'])

# 输出结果
print("五折交叉验证结果:")
print(f"准确度 (Accuracy): {scores['test_accuracy'].mean():.4f}")
print(f"精度 (Precision): {scores['test_precision_macro'].mean():.4f}")
print(f"召回率 (Recall): {scores['test_recall_macro'].mean():.4f}")
print(f"F1 值 (F1 Score): {scores['test_f1_macro'].mean():.4f}")

 

posted @ 2024-12-02 16:36  记得关月亮  阅读(2)  评论(0编辑  收藏  举报