11.27

import numpy as np
from sklearn import datasets
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, cross_val_score, StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, accuracy_score

# 数据加载
iris = datasets.load_iris()
X, y = iris.data, iris.target

# 数据预处理 - 标准化
scaler = StandardScaler().fit(X)
X_scaled = scaler.transform(X)

# 划分数据集为训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(
X_scaled, y, test_size=1/3, random_state=42, stratify=y)

# 模型初始化
rf = RandomForestClassifier(n_estimators=100, criterion='gini', max_depth=None,
min_samples_split=2, min_samples_leaf=1,
bootstrap=True, oob_score=False, n_jobs=-1,
random_state=42)


# 使用五折交叉验证评估模型性能
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
cross_val_scores = cross_val_score(rf, X_train, y_train, cv=cv, scoring='accuracy')

print(f'Cross-validation accuracy scores: {cross_val_scores}')
print(f'Mean cross-validation accuracy score: {np.mean(cross_val_scores)}')

# 训练最终模型
rf.fit(X_train, y_train)

# 使用测试集评估最终模型性能
y_pred = rf.predict(X_test)

# 输出测试集上的性能指标
report = classification_report(y_test, y_pred, target_names=iris.target_names)
print(report)

# 计算准确率
accuracy = accuracy_score(y_test, y_pred)
print(f'Test set accuracy: {accuracy}')
posted @   liuxuechao  阅读(8)  评论(0编辑  收藏  举报
相关博文:
阅读排行:
· 无需6万激活码!GitHub神秘组织3小时极速复刻Manus,手把手教你使用OpenManus搭建本
· Manus爆火,是硬核还是营销?
· 终于写完轮子一部分:tcp代理 了,记录一下
· 别再用vector<bool>了!Google高级工程师:这可能是STL最大的设计失误
· 单元测试从入门到精通
点击右上角即可分享
微信分享提示