从零开始人工智能AI(一)-k-nn-矩阵
参考资料:
https://baike.baidu.com/item/%E7%9F%A9%E9%98%B5/18069?fr=aladdin
http://blog.csdn.net/c406495762/article/details/75172850
基础概念:
#0取行数,1取得列数
import numpy as np
dataSetSize = dataSet.shape[0]
diffMat = np.tile(inX, (dataSetSize, 1)) - dataSet
tile第二个参数为元组,元组第一个参数代表复制的个数,纵(list的个数), 第二个参数为横向(单个list中的元素复制的份数)
例如:
>>> a=[0,1,2] >>> b=tile(a,9) >>> b array([0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2]) >>> >>> b=tile(a,(9,2)) >>> b array([[0, 1, 2, 0, 1, 2], [0, 1, 2, 0, 1, 2], [0, 1, 2, 0, 1, 2], [0, 1, 2, 0, 1, 2], [0, 1, 2, 0, 1, 2], [0, 1, 2, 0, 1, 2], [0, 1, 2, 0, 1, 2], [0, 1, 2, 0, 1, 2], [0, 1, 2, 0, 1, 2]]) >>> b=tile(a,(9,1)) >>> b array([[0, 1, 2], [0, 1, 2], [0, 1, 2], [0, 1, 2], [0, 1, 2], [0, 1, 2], [0, 1, 2], [0, 1, 2], [0, 1, 2]]) >>> b=tile(a,(9,12)) >>> b array([[0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2], [0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2], [0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2], [0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2], [0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2], [0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2], [0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2], [0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2], [0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2]])
矩阵减法
>>> a array([[101, 20], [101, 20], [101, 20], [101, 20]]) >>> b array([[ 1, 101], [ 5, 89], [108, 5], [115, 8]]) >>> a-b array([[100, -81], [ 96, -69], [ -7, 15], [-14, 12]])
#二维特征相减后平方
sqDiffMat = diffMat**2
>>> c array([[100, -81], [ 96, -69], [ -7, 15], [-14, 12]]) >>> c**2 array([[10000, 6561], [ 9216, 4761], [ 49, 225], [ 196, 144]])
#sum()所有元素相加,sum(0)列相加,sum(1)行相加
sqDistances = sqDiffMat.sum(axis=1)
>>> d=c**2 >>> d.sum(axis=1) array([16561, 13977, 274, 340]) >>> d.sum(axis=0) array([19461, 11691])
#开方,计算出距离
distances = sqDistances**0.5
>>> f=e**0.5
>>> f
array([ 128.68954892, 118.22436297, 16.55294536, 18.43908891])
sortedDistIndices = distances.argsort()
#定一个记录类别次数的字典 ,按大小排序,打印index
>>> f array([ 128.68954892, 118.22436297, 16.55294536, 18.43908891]) >>> g=f.argsort() >>> g array([2, 3, 1, 0])
取出前k个元素的类别
>>> for i in range(3): ... print(i) ... labels[sortedDistIndices[i]] ... 0 '动作片' 1 '动作片' 2 '爱情片'
>>> for i in range(3): ... voteIlabel = labels[sortedDistIndices[i]] ... classCount.get(voteIlabel,0) ... 0 0 0 >>> for i in range(3): ... voteIlabel = labels[sortedDistIndices[i]] ... classCount.get(voteIlabel,0) + 1 ... 1 1 1
#key=operator.itemgetter(1)根据字典的值进行排序 #key=operator.itemgetter(0)根据字典的键进行排序 #reverse降序排序字典 sortedClassCount = sorted(classCount.items(),key=operator.itemgetter(1),reverse=True) print('#######',sortedClassCount)
classCount~~~~~~ {} classCount~~~~~~ {'动作片': 2, '爱情片': 1} ####### [('动作片', 2), ('爱情片', 1)] 动作片
第一段代码总算跑通了,虽然到现在为止还不理解什么事人工智能,哈哈,