python K-means工具包初解

近期数据挖掘实验,写个K-means算法,写完也不是非常难,写的过程中想到python肯定有包,尽管师兄说不让用,只是自己也写完了,而用包的话,还不是非常熟,略微查找了下资料,学了下。另外,自己本身写的太烂了,不敢拿出来,兴许改进了再写出来吧。

1.注意初始的点,须要转为numpy.array数组格式。

2.若是直接算中心点的话,直接调用kmeans2函数即可,后面的绘图,仅仅为了可视化。

#!/usr/bin/python
 
import numpy
import matplotlib
import os
matplotlib.use('Agg')
from scipy.cluster.vq import *
import pylab
pylab.close()

xy1=[[2,10],[2,5],[8,4],[5,8],[7,5],[6,4],[1,2],[4,9],[7,3],[1,3]]
xy2=numpy.array(xy1)

cluster_num=3
res, idx = kmeans2(numpy.array(zip(xy2[:,0],xy2[:,1])),cluster_num)

print "local centre points:\n",res

colors = ([([0.4,1,0.4],[1,0.4,0.4],[0.1,0.8,1])[i] for i in idx])
# plot colored points
pylab.scatter(xy2[:,0],xy2[:,1])

# mark centroids as (X)
pylab.scatter(res[:,0],res[:,1], marker='o', s = 500, linewidths=2, c='none')
pylab.scatter(res[:,0],res[:,1], marker='x', s = 500, linewidths=2)

#print os.getcwd()
pylab.savefig('pic.png')
效果图:

#---------------------------------------------------------------------------

參考:http://blog.csdn.net/brandohero/article/details/39967663

#!/usr/bin/python
 
# Adapted from http://hackmap.blogspot.com/2007/09/k-means-clustering-in-scipy.html
 
import numpy
import matplotlib
matplotlib.use('Agg')
from scipy.cluster.vq import *
import pylab
pylab.close()
 
# generate 3 sets of normally distributed points around
# different means with different variances
pt1 = numpy.random.normal(1, 0.2, (100,2))
pt2 = numpy.random.normal(2, 0.5, (300,2))
pt3 = numpy.random.normal(3, 0.3, (100,2))
 
# slightly move sets 2 and 3 (for a prettier output)
pt2[:,0] += 1
pt3[:,0] -= 0.5
 
xy = numpy.concatenate((pt1, pt2, pt3))
 
# kmeans for 3 clusters
res, idx = kmeans2(numpy.array(zip(xy[:,0],xy[:,1])),3)
 
colors = ([([0.4,1,0.4],[1,0.4,0.4],[0.1,0.8,1])[i] for i in idx])
 
# plot colored points
pylab.scatter(xy[:,0],xy[:,1], c=colors)
 
# mark centroids as (X)
pylab.scatter(res[:,0],res[:,1], marker='o', s = 500, linewidths=2, c='none')
pylab.scatter(res[:,0],res[:,1], marker='x', s = 500, linewidths=2)
 
pylab.savefig('/tmp/kmeans.png')

#------------------------------------

转载请认证于:http://write.blog.csdn.net/postedit/41158167

posted @ 2015-06-05 15:22  phlsheji  阅读(2084)  评论(0编辑  收藏  举报