K近邻算法(python 源码解析)
from numpy import * import operator from os import listdir def classify0(inX, dataSet, labels, k): dataSetSize = dataSet.shape[0] diffMat = tile(inX, (dataSetSize,1)) #产生一个dateSetSize行,1列的元素,不复制列元素 - dataSet#计算各个元素之差 sqDiffMat = diffMat**2#各个元素求平方 sqDistances = sqDiffMat.sum(axis=1)#列方向求和也就是求平方根之和 distances = sqDistances**0.5#求平方根,即求得和各个dataset之间的距离 sortedDistIndicies = distances.argsort() # 排序得到各个元素的位置下标 classCount={} for i in range(k): voteIlabel = labels[sortedDistIndicies[i]]#获取分类 classCount[voteIlabel] = classCount.get(voteIlabel,0) + 1#获取分类的个数 sortedClassCount = sorted(classCount.iteritems(), #获取排列按照分类个数逆序key=operator.itemgetter(1), reverse=True) return sortedClassCount[0][0]#得到分类