2024.12.5(周四)
# 导入必要的库 import numpy as np from sklearn import datasets from sklearn.model_selection import train_test_split, cross_val_score, StratifiedKFold from sklearn.naive_bayes import GaussianNB from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report # 1. 从 scikit-learn 库中加载 iris 数据集 iris = datasets.load_iris() X = iris.data # 特征数据 y = iris.target # 标签数据 # 2. 使用留出法(holdout)将数据分为训练集和测试集,1/3 的样本作为测试集 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42, stratify=y) # 3. 使用训练集训练朴素贝叶斯分类算法 nb_classifier = GaussianNB() # 朴素贝叶斯分类器 nb_classifier.fit(X_train, y_train) # 4. 使用五折交叉验证对模型性能进行评估 # 使用交叉验证评估模型的准确度、精度、召回率和 F1 值 cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42) # 交叉验证的准确度 accuracy = cross_val_score(nb_classifier, X, y, cv=cv, scoring='accuracy') # 交叉验证的精度 precision = cross_val_score(nb_classifier, X, y, cv=cv, scoring='precision_macro') # 交叉验证的召回率 recall = cross_val_score(nb_classifier, X, y, cv=cv, scoring='recall_macro') # 交叉验证的 F1 值 f1 = cross_val_score(nb_classifier, X, y, cv=cv, scoring='f1_macro') # 输出交叉验证结果 print(f"Accuracy (Cross-validation): {accuracy.mean():.4f} ± {accuracy.std():.4f}") print(f"Precision (Cross-validation): {precision.mean():.4f} ± {precision.std():.4f}") print(f"Recall (Cross-validation): {recall.mean():.4f} ± {recall.std():.4f}") print(f"F1 Score (Cross-validation): {f1.mean():.4f} ± {f1.std():.4f}") # 5. 使用测试集测试模型性能 y_pred = nb_classifier.predict(X_test) # 计算模型的评估指标 accuracy_test = accuracy_score(y_test, y_pred) precision_test = precision_score(y_test, y_pred, average='macro') recall_test = recall_score(y_test, y_pred, average='macro') f1_test = f1_score(y_test, y_pred, average='macro') # 输出测试集上的结果 print("\nTest Set Performance:") print(f"Accuracy: {accuracy_test:.4f}") print(f"Precision: {precision_test:.4f}") print(f"Recall: {recall_test:.4f}") print(f"F1 Score: {f1_test:.4f}") # 生成详细的分类报告 print("\nClassification Report:") print(classification_report(y_test, y_pred))