机器学习期末大作业

100

101

102

103

104

105

106

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, classification_report
import warnings
warnings.filterwarnings('ignore')
 
# 设置中文显示
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
 
# 1. 数据导入和基本查看
# 创建数据框
# 作业1：导入数据集，并采用.head()读取前6条数据，采用.info()返回当前数据的信息。(10分)
file_path = 'BP_R_Data.xlsx'
data = pd.read_excel(file_path)
# 查看前6行数据
print("===== 任务1：数据导入和基本查看 =====")
print("数据集前6行：")
print(data.head(6))
print("\n数据集信息：")
print(data.info())
 
# 2. 数据可视化
plt.figure(figsize=(12, 8))
sns.pairplot(data)
plt.savefig('数据可视化.png')
plt.close()
print("\n===== 任务2：数据可视化 =====")
print("数据可视化结果已保存为'数据可视化.png'")
 
# 3. 数据预处理
# 首先进行数据划分
X = data.iloc[:, :-1]  # 所有行，除了最后一列
y = data.iloc[:, -1]   # 所有行，最后一列
 
# 然后打印预处理信息
print("\n===== 任务3：数据预处理和模型拟合 =====")
print("原始数据形状:", X.shape)
print("数据预处理步骤：")
 
# 数据标准化
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
print("1. 特征标准化完成")
 
# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)
print("2. 数据集划分：")
print("   - 训练集大小:", X_train.shape)
print("   - 测试集大小:", X_test.shape)
 
# 使用随机森林分类器
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)
print("3. 模型训练：")
print("   - 选用模型：随机森林分类器")
print("   - 模型参数：")
print("     * n_estimators:", rf_model.n_estimators)
print("     * random_state:", rf_model.random_state)
print("模型训练完成！")
 
# 4. 交叉验证
cv_scores = cross_val_score(rf_model, X_scaled, y, cv=5)
print("\n===== 任务4：交叉验证结果 =====")
print("5折交叉验证得分：", cv_scores)
print("平均交叉验证得分：", cv_scores.mean())
 
# 5. 预测和评估
y_pred = rf_model.predict(X_test)
 
# 混淆矩阵可视化
cm = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.title('混淆矩阵')
plt.ylabel('真实值')
plt.xlabel('预测值')
plt.savefig('混淆矩阵.png')
plt.close()
 
print("\n===== 任务5：预测结果分析 =====")
print("混淆矩阵已保存为'混淆矩阵.png'")
 
# 获取分类报告
report = classification_report(y_test, y_pred, output_dict=True)
 
print("\n简明结果：")
print("="*50)
print(f"精度(Accuracy):\t\t{report['accuracy']:.4f}")
print("-"*50)
print("各类别详细指标：")
for label in sorted(set(y_test)):
    if str(label) in report:
        print(f"\n类别 {label}:")
        print(f"查准率(Precision):\t{report[str(label)]['precision']:.4f}")
        print(f"查全率(Recall):\t\t{report[str(label)]['recall']:.4f}")
        print(f"F1值(F1-score):\t\t{report[str(label)]['f1-score']:.4f}")
print("="*50)
 
print("\n完整分类报告：")
print(classification_report(y_test, y_pred))

posted @ 2024-12-31 12:06 财神给你送元宝阅读(8) 评论(0) 编辑收藏举报

刷新页面返回顶部

登录后才能查看或发表评论，立即登录或者逛逛博客园首页

相关博文：

· 机器学习实验六

· 机器学习实验一

· 2024.12.10（周二）

· 机器学习任务1

· 机器学习任务5

公告

昵称：财神给你送元宝
园龄： 1年10个月
粉丝： 11
关注： 4

+加关注

2025年3月

日

一

二

三

四

五

六

youxiandechilun

机器学习期末大作业

公告

搜索

常用链接

合集

随笔档案

阅读排行榜

评论排行榜

推荐排行榜

最新评论

youxiandechilun

机器学习 期末大作业

公告

搜索

常用链接

合集

随笔档案

阅读排行榜

评论排行榜

推荐排行榜

最新评论

机器学习期末大作业