调用肘部法返回K值送入Kmeans 根据K值在中心点集合中选取前K-1个中心点
1.聚类结果 调用肘部法返回K值送入Kmeans
| import random |
| import numpy as np |
| import pandas as pd |
| import matplotlib.pyplot as plt |
| from sklearn.cluster import KMeans |
| from sklearn.metrics import silhouette_score |
| |
| |
| |
| def Distance(dataSet, centroids, k) -> np.array: |
| dis = [] |
| for data in dataSet: |
| diff = np.tile(data, (k, 1)) - centroids |
| temp1 = diff ** 2 |
| temp2 = np.sum(temp1, axis=1) |
| dis_temp = temp2 ** 0.5 |
| dis.append(dis_temp) |
| dis = np.array(dis) |
| |
| return dis |
| |
| |
| |
| def Update_cen(dataSet, centroids, k): |
| |
| distance = Distance(dataSet, centroids, k) |
| |
| |
| minIndex = np.argmin(distance, axis=1) |
| |
| newCentroids = pd.DataFrame(dataSet).groupby(minIndex).mean() |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| newCentroids = newCentroids.values |
| |
| |
| |
| changed = newCentroids - centroids |
| return changed, newCentroids |
| |
| |
| |
| def kmeans(dataSet, k): |
| |
| centroids = random.sample(dataSet, k) |
| |
| print("质心:", centroids) |
| |
| |
| |
| changed, newCentroids = Update_cen(dataSet, centroids, k) |
| while np.any(changed): |
| changed, newCentroids = Update_cen(dataSet, newCentroids, k) |
| centroids = sorted(newCentroids.tolist()) |
| |
| |
| cluster = [] |
| dis = Distance(dataSet, centroids, k) |
| minIndex = np.argmin(dis, axis=1) |
| for i in range(k): |
| cluster.append([]) |
| for i, j in enumerate(minIndex): |
| cluster[j].append(dataSet[i]) |
| |
| |
| |
| return centroids, cluster |
| |
| |
| def train_cluster(train_vecs, model_name=None, start_k=2, end_k=15): |
| print('training cluster') |
| SSE = [] |
| SSE_d1 = [] |
| SSE_d2 = [] |
| models = [] |
| |
| for i in range(start_k, end_k): |
| kmeans_model = KMeans(n_clusters=i ) |
| kmeans_model.fit(train_vecs) |
| SSE.append(kmeans_model.inertia_) |
| print(SSE) |
| |
| models.append(kmeans_model) |
| print(models) |
| plt.figure(dpi=150) |
| plt.plot(SSE,marker='o') |
| plt.show() |
| |
| |
| SSE_length = len(SSE) |
| for i in range(1, SSE_length): |
| SSE_d1.append((SSE[i - 1] - SSE[i]) / 2) |
| for i in range(1, len(SSE_d1) - 1): |
| SSE_d2.append((SSE_d1[i - 1] - SSE_d1[i]) / 2) |
| |
| |
| best_model = models[SSE_d2.index(max(SSE_d2)) + 1] |
| |
| |
| |
| print(best_model.n_clusters) |
| return best_model.n_clusters |
| |
| |
| |
| def createDataSet(): |
| return [[9,37],[42,37],[11,50],[27,25],[27,6],[7,22],[44,9],[33,28],[43,37],[12,50],[14,48],[34,21],[1,26],[13,2],[24,11],[24,28],[48,16],[42,31],[34,20],[46,25],[31,47],[29,3],[39,50],[45,50],[48,46],[35,48],[48,35],[40,18],[31,50],[28,36],[11,48],[3,27],[30,18],[32,21],[24,37],[48,48],[48,47],[11,2],[15,48],[6,12],[45,27],[47,49],[49,43],[46,46],[13,19],[18,31],[44,47],[21,8],[36,48],[27,27],[9,9],[39,50],[50,11],[27,32],[50,14],[4,11],[40,14],[26,42],[23,48],[29,35],[6,7],[11,4],[43,3],[41,8],[24,15],[3,48],[9,6],[20,45],[27,40],[49,18],[8,39],[40,25],[28,28],[7,36],[7,38],[26,47],[49,12],[49,39],[27,26],[39,17],[21,19],[12,17],[17,50],[40,18],[31,21],[35,26],[42,11],[45,34],[6,32],[25,28],[14,21],[37,22],[50,22],[3,16],[7,3],[29,47],[25,6],[11,5],[45,15],[26,33],[37,39],[14,31],[50,48],[30,27],[31,4],[33,32],[34,17],[42,37],[22,5],[42,23],[19,38],[49,31],[46,43],[33,44],[2,12],[12,26],[27,18],[18,37],[13,26],[2,35],[7,12],[19,45],[15,45],[40,18],[43,29],[39,22],[11,44],[24,1],[13,7],[48,30],[8,42],[17,42],[4,42],[47,46],[41,41],[15,17],[9,13],[21,15],[27,22],[15,34],[21,8],[15,39],[2,27],[7,41],[26,4],[15,50],[21,7],[46,29],[31,14],[42,22],[49,25],[6,42],[4,35],[24,34],[43,5],[32,39],[3,11],[34,8],[20,10],[4,29],[36,32],[17,16],[17,26],[39,16],[16,28],[40,22],[7,7],[6,35],[36,9],[33,7],[28,17],[42,47],[9,50],[50,21],[22,12],[42,43],[35,4],[36,10],[27,6],[13,12],[41,43],[50,44],[24,34],[7,11],[49,16],[3,37],[10,48],[15,12],[36,48],[3,17],[31,16],[47,14],[6,25],[31,28],[1,46],[47,7],[41,27],[35,22],[20,46],[28,49]] |
| |
| |
| |
| if __name__ == '__main__': |
| dataset = createDataSet() |
| k=train_cluster(dataset, model_name=None, start_k=2, end_k=15) |
| centroids, cluster = kmeans(dataset, k) |
| print("共",k,"聚类") |
| for i in range(k): |
| print(i+1,'聚类质心为: %s' % centroids[i]) |
| print(i+1,'聚类集群为:%s' % cluster[i]) |
| color_all = ['c', 'g', 'r', 'm', 'y', 'k', 'b'] |
| |
| |
| |
| plt.scatter(list(np.array(centroids).T[0]), list(np.array(centroids).T[1]), marker='x', color='red', label="质心") |
| for i in range(k): |
| plt.scatter(list(np.array(cluster[i]).T[0]), list(np.array(cluster[i]).T[1]), marker='o', color=color_all[i], label="数据集" ) |
| |
| plt.show() |
| |
| |
| def createDataSet(): |
| return [[18,29014],[18,77868],[19,58759],[19,37786],[20,8899],[20,46768],[20,774613],[21,488014],[21,769689],[22,359562],[22,352376],[24,111367],[24,152688],[24,109327],[24,106804],[24,137929],[25,71109],[25,113614],[25,119435],[26,154356],[26,44811],[27,166804],[28,98015],[29,21363],[29,125762],[29,93072],[29,153561],[30,39274],[30,168760],[30,24461],[30,191557],[30,61752],[32,124488],[32,170708],[33,130019],[33,99870],[33,331454],[34,190433],[34,177582],[34,300554],[34,138273],[34,314246],[35,276270],[35,37760],[35,296483],[35,377311],[35,6868],[35,9689],[36,992675],[37,830087],[37,937103],[37,40717],[38,953928],[39,13622507],[39,12048197],[40,1920635],[40,18916664],[40,3880638],[40,25428],[41,162939],[41,71320],[41,360108],[42,322575],[42,97904],[42,215271],[43,34127],[43,203836],[43,6359],[43,9408],[43,970418],[44,428948],[44,349269],[45,232780],[46,293502],[46,67138],[46,834070],[47,27633],[47,624311],[48,63117],[48,549391],[49,99226],[49,358856],[49,436160],[50,3984],[51,1799],[51,405163],[51,846203],[52,479420],[53,92465],[53,136059],[53,56145],[53,158809],[54,947448],[54,625963],[55,666463],[55,995614],[55,17004],[56,566007],[56,919124],[56,484365],[57,938506],[57,164692],[57,936188],[57,617512],[58,455966],[58,59662],[58,297124],[59,605331],[59,497737],[59,214608],[59,156849],[60,920477],[60,5894008],[60,2455622],[61,480931],[62,95628],[63,50546],[63,39582],[63,428934],[64,390934],[65,920156],[65,107724],[65,3509273],[65,7842456],[66,8909362],[66,5311804],[67,46879],[68,24905],[68,83869],[68,880292],[71,894481],[71,524879],[72,368116],[72,3865443],[72,405367],[72,8691331],[72,3060200],[73,9806966],[73,1874448],[73,5716398],[73,9629520],[73,30687],[73,59736],[74,55489],[74,318877],[74,859963],[75,633227],[75,725002],[75,3695087],[75,5982553],[75,3043470],[75,2390573],[75,45888],[76,40348],[76,88729],[77,229941],[77,70829],[77,836217],[77,717727],[78,4418],[78,75522],[79,89147],[79,49533],[80,21818],[80,90642],[80,772160],[80,781887],[81,262857],[81,768323],[81,39781],[81,6117],[81,85550],[81,90980],[82,81310],[82,57097],[82,684607],[82,169619],[82,830320],[83,912973],[83,34105],[84,55130],[84,10447],[84,10405],[85,21196],[85,34589],[85,50499],[86,72650],[86,397602],[86,649289],[86,490891],[87,115778],[87,89786],[88,15375],[88,80349],[89,94850],[89,20309],[89,87940],[89,410890],[90,573323],[90,417892]] |
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 25岁的心里话
· 闲置电脑爆改个人服务器(超详细) #公网映射 #Vmware虚拟网络编辑器
· 零经验选手,Compose 一天开发一款小游戏!
· 因为Apifox不支持离线,我果断选择了Apipost!
· 通过 API 将Deepseek响应流式内容输出到前端