2024.11.27(周三)
import pandas as pd from sklearn.datasets import load_iris from sklearn.model_selection import cross_val_score, cross_validate, StratifiedKFold from sklearn.ensemble import RandomForestClassifier from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score from sklearn.model_selection import train_test_split # (1)利用 pandas 库从本地读取 iris 数据集 # 假设数据集已保存为 CSV 格式 # iris_data = pd.read_csv("iris.csv") # 读取本地 CSV 文件(根据实际情况调整路径) # (2)从 scikit-learn 库中直接加载 iris 数据集 iris = load_iris() X = iris.data # 特征 y = iris.target # 标签 # (3)实现五折交叉验证进行模型训练 # 使用 StratifiedKFold 保证每一折中的类别分布相似 kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42) # 使用随机森林分类器 model = RandomForestClassifier(random_state=42) # 使用 cross_validate 进行五折交叉验证,返回更多评估指标 scores = cross_validate(model, X, y, cv=kf, scoring=['accuracy', 'precision_macro', 'recall_macro', 'f1_macro']) # 输出结果 print("五折交叉验证结果:") print(f"准确度 (Accuracy): {scores['test_accuracy'].mean():.4f}") print(f"精度 (Precision): {scores['test_precision_macro'].mean():.4f}") print(f"召回率 (Recall): {scores['test_recall_macro'].mean():.4f}") print(f"F1 值 (F1 Score): {scores['test_f1_macro'].mean():.4f}")