机器学习 k-临近算法
程序清单一:
from numpy import * import operator def creatDataSet(): group = array([[1.0,1.1],[1.0,1.0],[0,0],[0,0.1]]) labels = ["A","A","B","B"] return group,labels def classify0(inX,dataSet,labels,k): dataSetSize = dataSet.shape[0] diffMat = tile(inX,(dataSetSize,1)) - dataSet sqDiffMat = diffMat**2 sqDistances = sqDiffMat.sum(axis = 1) distances = sqDistances **0.5 sortedDistIndicis = distances.argsort() print(sortedDistIndicis) classCount = {} for i in range(k): voteIlabel = labels[sortedDistIndicis[i]] print(voteIlabel) classCount[voteIlabel] = classCount.get(voteIlabel,0)+1 print(classCount) print(classCount) sortedClassCount = sorted(classCount.iteritems(),key = operator.itemgetter(1),reverse = True) print(sortedClassCount) print(sortedClassCount[0][0]) return sortedClassCount[0][0] group,labels = creatDataSet() classify0([0,0],group,labels,3)
源代码中print是为了便于理解代码。
numpy 库中 :tile函数用法 :http://www.cnblogs.com/zibu1234/p/4210521.html
sorted:http://blog.163.com/zhuandi_h/blog/static/1802702882012111284632184/
python iteritems(),itemgetter(),sorted():http://blog.csdn.net/u013713637/article/details/39521187