#!/usr/bin/python
#-*-coding:utf-8-*-
import numpy as np
from sklearn.cluster import KMeans
from scipy.spatial.distance import cdist
import matplotlib.pyplot as plt
cluster1 = np.random.uniform(0.5, 1.5, (2, 10))
cluster2 = np.random.uniform(3.5, 4.5, (2, 10))
X = np.hstack((cluster1, cluster2)).T
K = range(1, 10)
meandistortions = []
for k in K:
kmeans = KMeans(n_clusters=k)
kmeans.fit(X)
#首先求出X和聚簇中心的距离(20个点*5个聚簇中心)
#然后以每个点和5个聚簇中心为一组,选出一个距离最小值(注意axis=1,否则求出20个点*5个聚簇中心最小值,是唯一值)
#然后将这20个最小值求和,并添加到meandistortions
meandistortions.append(sum(np.min(cdist(X, kmeans.cluster_centers_, 'euclidean'), axis=1))/X.shape[0])
plt.plot(K, meandistortions, 'rx-')
plt.xlabel('k')
plt.ylabel('Average distortion')
plt.title('Selecting k with the Elbow Method')
plt.show()
S = range(1,10) #等价 [1,10)
for s in S:
print(s)
print('over')
1
2
3
4
5
6
7
8
9
over
#特别注意,此处range在for循环中,不包括最后一个数10,但是包括第一个数1