#!/usr/bin/python
#-*-coding:utf-8-*-
import numpy as np
from sklearn.cluster import KMeans
from scipy.spatial.distance import cdist
import matplotlib.pyplot as plt
cluster1 = np.random.uniform(0.5, 1.5, (2, 10))
cluster2 = np.random.uniform(3.5, 4.5, (2, 10))
X = np.hstack((cluster1, cluster2)).T
K = range(1, 10)
meandistortions = []
for k in K:
    kmeans = KMeans(n_clusters=k)
    kmeans.fit(X)
    #首先求出X和聚簇中心的距离(20个点*5个聚簇中心)
    #然后以每个点和5个聚簇中心为一组,选出一个距离最小值(注意axis=1,否则求出20个点*5个聚簇中心最小值,是唯一值)
    #然后将这20个最小值求和,并添加到meandistortions
    meandistortions.append(sum(np.min(cdist(X, kmeans.cluster_centers_, 'euclidean'), axis=1))/X.shape[0])

plt.plot(K, meandistortions, 'rx-')
plt.xlabel('k')
plt.ylabel('Average distortion')
plt.title('Selecting k with the Elbow Method')
plt.show()

S = range(1,10) #等价  [1,10)
for s in S:
    print(s)
print('over')
1
2
3
4
5
6
7
8
9
over
#特别注意,此处range在for循环中,不包括最后一个数10,但是包括第一个数1

posted on 2016-03-11 11:10  qqhfeng16  阅读(459)  评论(0编辑  收藏  举报