import numpy as np
from sklearn import datasets
from sklearn.cluster import KMeans
from sklearn.model_selection import train_test_split, cross_val_score, StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, accuracy_score
# 数据加载
iris = datasets.load_iris()
X, y = iris.data, iris.target
# 数据预处理 - 标准化
scaler = StandardScaler().fit(X)
X_scaled = scaler.transform(X)
# 划分数据集为训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(
X_scaled, y, test_size=1/3, random_state=42, stratify=y)
# 模型初始化
kmeans = KMeans(n_clusters=3, init='k-means++', n_init=10, max_iter=300, random_state=42)
# 函数参数说明:
# n_clusters: 聚类的数量,默认是8
# init: 初始化方法,默认是'k-means++'
# n_init: 运行算法的次数,默认是10
# max_iter: 单次运行的最大迭代次数,默认是300
# random_state: 随机生成器的种子,确保结果可复现
# 使用五折交叉验证评估模型性能
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
scores = []
for train_index, val_index in cv.split(X_train, y_train):
X_fold_train, X_fold_val = X_train[train_index], X_train[val_index]
y_fold_train, y_fold_val = y_train[train_index], y_train[val_index]
kmeans.fit(X_fold_train)
y_pred = kmeans.predict(X_fold_val)
# 将预测的簇映射到真实标签上
mapping = {cluster: np.bincount(y_fold_val[y_pred == cluster]).argmax() for cluster in set(y_pred)}
y_mapped = np.array([mapping[cluster] for cluster in y_pred])
scores.append(accuracy_score(y_fold_val, y_mapped))
print(f'Cross-validation accuracy scores: {scores}')
print(f'Mean cross-validation accuracy score: {np.mean(scores)}')
# 测试模型在测试集上的表现
kmeans.fit(X_train) # 最终模型训练
y_pred = kmeans.predict(X_test)
# 映射预测的簇到实际标签
mapping = {cluster: np.bincount(y_test[y_pred == cluster]).argmax() for cluster in set(y_pred)}
y_pred_mapped = np.array([mapping[cluster] for cluster in y_pred])
# 输出测试集上的性能指标
report = classification_report(y_test, y_pred_mapped, target_names=iris.target_names)
print(report)
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 无需6万激活码!GitHub神秘组织3小时极速复刻Manus,手把手教你使用OpenManus搭建本
· Manus爆火,是硬核还是营销?
· 终于写完轮子一部分:tcp代理 了,记录一下
· 别再用vector<bool>了!Google高级工程师:这可能是STL最大的设计失误
· 单元测试从入门到精通