从零开始人工智能AI(一)-k-nn-矩阵

参考资料:

https://baike.baidu.com/item/%E7%9F%A9%E9%98%B5/18069?fr=aladdin

http://blog.csdn.net/c406495762/article/details/75172850

基础概念:

#0取行数,1取得列数

import numpy as np

dataSetSize = dataSet.shape[0]

 

 

diffMat = np.tile(inX, (dataSetSize, 1)) - dataSet

tile第二个参数为元组,元组第一个参数代表复制的个数,纵(list的个数), 第二个参数为横向(单个list中的元素复制的份数)

例如:

>>> a=[0,1,2] 
>>> b=tile(a,9)
>>> b
array([0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1,
       2, 0, 1, 2])
>>> 
>>> b=tile(a,(9,2))
>>> b
array([[0, 1, 2, 0, 1, 2],
       [0, 1, 2, 0, 1, 2],
       [0, 1, 2, 0, 1, 2],
       [0, 1, 2, 0, 1, 2],
       [0, 1, 2, 0, 1, 2],
       [0, 1, 2, 0, 1, 2],
       [0, 1, 2, 0, 1, 2],
       [0, 1, 2, 0, 1, 2],
       [0, 1, 2, 0, 1, 2]])
>>> b=tile(a,(9,1))
>>> b
array([[0, 1, 2],
       [0, 1, 2],
       [0, 1, 2],
       [0, 1, 2],
       [0, 1, 2],
       [0, 1, 2],
       [0, 1, 2],
       [0, 1, 2],
       [0, 1, 2]])
>>> b=tile(a,(9,12))
>>> b
array([[0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0,
        1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2],
       [0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0,
        1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2],
       [0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0,
        1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2],
       [0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0,
        1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2],
       [0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0,
        1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2],
       [0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0,
        1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2],
       [0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0,
        1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2],
       [0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0,
        1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2],
       [0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0,
        1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2]])

 

矩阵减法

>>> a
array([[101,  20],
       [101,  20],
       [101,  20],
       [101,  20]])
>>> b
array([[  1, 101],
       [  5,  89],
       [108,   5],
       [115,   8]])
>>> a-b
array([[100, -81],
       [ 96, -69],
       [ -7,  15],
       [-14,  12]])

 

#二维特征相减后平方
sqDiffMat = diffMat**2

>>> c
array([[100, -81],
       [ 96, -69],
       [ -7,  15],
       [-14,  12]])
>>> c**2
array([[10000,  6561],
       [ 9216,  4761],
       [   49,   225],
       [  196,   144]])

#sum()所有元素相加,sum(0)列相加,sum(1)行相加
sqDistances = sqDiffMat.sum(axis=1)

>>> d=c**2
>>> d.sum(axis=1)
array([16561, 13977,   274,   340])
 
>>> d.sum(axis=0)
array([19461, 11691])

#开方,计算出距离
distances = sqDistances**0.5

>>> f=e**0.5
>>> f
array([ 128.68954892,  118.22436297,   16.55294536,   18.43908891])

sortedDistIndices = distances.argsort()
#定一个记录类别次数的字典 ,按大小排序,打印index

 
>>> f
array([ 128.68954892,  118.22436297,   16.55294536,   18.43908891])
>>> g=f.argsort()
>>> g
array([2, 3, 1, 0])

取出前k个元素的类别

>>> for i  in range(3):
...     print(i) 
...     labels[sortedDistIndices[i]]
... 
0
'动作片'
1
'动作片'
2
'爱情片'

 

>>> for i  in range(3):
...     voteIlabel = labels[sortedDistIndices[i]]
...     classCount.get(voteIlabel,0) 
... 
0
0
0
>>> for i  in range(3):
...     voteIlabel = labels[sortedDistIndices[i]]
...     classCount.get(voteIlabel,0) + 1
... 
1
1
1
 #key=operator.itemgetter(1)根据字典的值进行排序
    #key=operator.itemgetter(0)根据字典的键进行排序
    #reverse降序排序字典
    sortedClassCount = sorted(classCount.items(),key=operator.itemgetter(1),reverse=True)

    print('#######',sortedClassCount)
classCount~~~~~~ {}
classCount~~~~~~ {'动作片': 2, '爱情片': 1}
####### [('动作片', 2), ('爱情片', 1)]
动作片

 

第一段代码总算跑通了,虽然到现在为止还不理解什么事人工智能,哈哈,

posted on 2017-09-07 13:46  wangbokun  阅读(495)  评论(0编辑  收藏  举报

导航