2024.11.29(周五)

# 导入必要的库
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score, StratifiedKFold
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_iris
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import cross_val_predict

# 1. 加载 Iris 数据集
iris = load_iris()
X = iris.data  # 特征矩阵
y = iris.target  # 标签

# 2. 使用留出法分割数据集,1/3 为测试集,2/3 为训练集(注意同分布取样)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42, stratify=y)

# 3. 创建逻辑回归模型
model = LogisticRegression(max_iter=200)  # 设置最大迭代次数为 200,防止收敛问题

# 4. 使用五折交叉验证对模型进行评估
# 使用交叉验证进行评估:评估标准是准确度、精度、召回率、F1 值
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)  # 5折交叉验证

# 交叉验证结果
accuracy = cross_val_score(model, X_train, y_train, cv=cv, scoring='accuracy')
precision = cross_val_score(model, X_train, y_train, cv=cv, scoring='precision_weighted')
recall = cross_val_score(model, X_train, y_train, cv=cv, scoring='recall_weighted')
f1 = cross_val_score(model, X_train, y_train, cv=cv, scoring='f1_weighted')

# 输出交叉验证的平均结果
print(f'交叉验证结果:')
print(f'准确度: {accuracy.mean():.4f} ± {accuracy.std():.4f}')
print(f'精度: {precision.mean():.4f} ± {precision.std():.4f}')
print(f'召回率: {recall.mean():.4f} ± {recall.std():.4f}')
print(f'F1 值: {f1.mean():.4f} ± {f1.std():.4f}')

# 5. 使用训练集训练模型
model.fit(X_train, y_train)

# 6. 使用测试集评估模型性能
y_pred = model.predict(X_test)

# 7. 计算测试集上的性能指标
accuracy_test = accuracy_score(y_test, y_pred)
precision_test = precision_score(y_test, y_pred, average='weighted')
recall_test = recall_score(y_test, y_pred, average='weighted')
f1_test = f1_score(y_test, y_pred, average='weighted')

# 输出测试集结果
print(f'\n测试集评估:')
print(f'准确度: {accuracy_test:.4f}')
print(f'精度: {precision_test:.4f}')
print(f'召回率: {recall_test:.4f}')
print(f'F1 值: {f1_test:.4f}')

 

posted @ 2024-12-02 16:37  记得关月亮  阅读(1)  评论(0编辑  收藏  举报