2024.11.29(周五)
# 导入必要的库 import numpy as np from sklearn.model_selection import train_test_split, cross_val_score, StratifiedKFold from sklearn.linear_model import LogisticRegression from sklearn.datasets import load_iris from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score from sklearn.model_selection import cross_val_predict # 1. 加载 Iris 数据集 iris = load_iris() X = iris.data # 特征矩阵 y = iris.target # 标签 # 2. 使用留出法分割数据集,1/3 为测试集,2/3 为训练集(注意同分布取样) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42, stratify=y) # 3. 创建逻辑回归模型 model = LogisticRegression(max_iter=200) # 设置最大迭代次数为 200,防止收敛问题 # 4. 使用五折交叉验证对模型进行评估 # 使用交叉验证进行评估:评估标准是准确度、精度、召回率、F1 值 cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42) # 5折交叉验证 # 交叉验证结果 accuracy = cross_val_score(model, X_train, y_train, cv=cv, scoring='accuracy') precision = cross_val_score(model, X_train, y_train, cv=cv, scoring='precision_weighted') recall = cross_val_score(model, X_train, y_train, cv=cv, scoring='recall_weighted') f1 = cross_val_score(model, X_train, y_train, cv=cv, scoring='f1_weighted') # 输出交叉验证的平均结果 print(f'交叉验证结果:') print(f'准确度: {accuracy.mean():.4f} ± {accuracy.std():.4f}') print(f'精度: {precision.mean():.4f} ± {precision.std():.4f}') print(f'召回率: {recall.mean():.4f} ± {recall.std():.4f}') print(f'F1 值: {f1.mean():.4f} ± {f1.std():.4f}') # 5. 使用训练集训练模型 model.fit(X_train, y_train) # 6. 使用测试集评估模型性能 y_pred = model.predict(X_test) # 7. 计算测试集上的性能指标 accuracy_test = accuracy_score(y_test, y_pred) precision_test = precision_score(y_test, y_pred, average='weighted') recall_test = recall_score(y_test, y_pred, average='weighted') f1_test = f1_score(y_test, y_pred, average='weighted') # 输出测试集结果 print(f'\n测试集评估:') print(f'准确度: {accuracy_test:.4f}') print(f'精度: {precision_test:.4f}') print(f'召回率: {recall_test:.4f}') print(f'F1 值: {f1_test:.4f}')