2024.12.9(周一)

import numpy as np
import pandas as pd
from sklearn import datasets
from sklearn.model_selection import train_test_split, cross_val_score, StratifiedKFold
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import cross_validate

# 1. 加载 iris 数据集
iris = datasets.load_iris()
X = iris.data
y = iris.target

# 2. 使用留出法划分数据集, 1/3 用作测试集, 2/3 用作训练集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42, stratify=y)

# 3. 创建随机森林分类器并训练模型
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)

# 4. 使用五折交叉验证评估模型的性能
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
scoring = ['accuracy', 'precision_macro', 'recall_macro', 'f1_macro']
cv_results = cross_validate(rf, X_train, y_train, cv=cv, scoring=scoring)

# 输出五折交叉验证的评估结果
print("五折交叉验证结果:")
print(f"准确度: {np.mean(cv_results['test_accuracy']):.4f}")
print(f"精度: {np.mean(cv_results['test_precision_macro']):.4f}")
print(f"召回率: {np.mean(cv_results['test_recall_macro']):.4f}")
print(f"F1 值: {np.mean(cv_results['test_f1_macro']):.4f}")

# 5. 使用测试集对模型进行评估
y_pred = rf.predict(X_test)

# 计算测试集上的准确度、精度、召回率和 F1 值
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='macro')
recall = recall_score(y_test, y_pred, average='macro')
f1 = f1_score(y_test, y_pred, average='macro')

# 输出测试集上的评估结果
print("\n测试集评估结果:")
print(f"准确度: {accuracy:.4f}")
print(f"精度: {precision:.4f}")
print(f"召回率: {recall:.4f}")
print(f"F1 值: {f1:.4f}")

 

posted @ 2024-12-02 16:47  记得关月亮  阅读(2)  评论(0编辑  收藏  举报