机器学习实战 k-近邻算法 python3

k-近邻算法

贴位大牛的https://blog.csdn.net/c406495762/article/details/75172850
最后的手写数字识别他不是按照原书用python来写的，可以以我的为参考

k-近邻算法概述

简单地说,k-近邻算法就是采用测量不同特征值之间的距离的方法进行分类.
它的工作原理是:存在一个样本数据集合,也称作训练样本集,并且样本集中每一个数据都存在标签,即我们都知道样本集中每一个数据与所属分类的对应关系.输入没有标签的
新数据后,将新数据的每一个特征与样本集中数据对应的特征进行比较,然后算法提取样本集中特征最相似数据(最近邻)的分类标签.一般来说,我们只选择样本数据集中前k个
最相似的数据,这就是k-近邻算法中k的出处,通常k是不大于20的整数.最后,选择k个最相似数据中出现最多的分类标签,作为新数据的分类.

一.电影分类

1.准备数据集

import numpy as np 

"""
function:
    创建数据集
parameters:
    无
returns:
    group - 数据集
    labels - 分类标签
"""

def createDataSet():
    #数据集的二维特征
    group = np.array([[1,101],[5,89],[108,5],[115,8]])
    #数据集的标签
    labels = ['love', 'love', 'action', 'action']
    return group, labels 

if __name__ == '__main__':
    group, labels = createDataSet()
    print(group)
    print(labels)

2.kNN算法

import numpy as np 
import operator

"""
function:
    knn算法,分类器
parameters:
    inX - 测试集　
    dataSet - 训练集
    labels - 分类标签
    k - kNN算法参数,选取距离最小的k个点
returns:
    sortedClassCount[0][0] - 分类结果
"""

def classify0(inX, dataSet, labels, k):
    dataSetSize = dataSet.shape[0]
    diffMat = np.tile(inX, (dataSetSize, 1)) - dataSet 
    sqDiffMat = diffMat**2
    sqDistances = sqDiffMat.sum(axis=1)
    distances = sqDistances**0.5
    sortedDistIndices = distances.argsort()
    #记录标签次数
    classCount = {}
    for i in range(k):
        voteIlabel = labels[sortedDistIndices[i]]
        classCount[voteIlabel] = classCount.get(voteIlabel,0) + 1
    sortedClassCount = sorted(classCount.items(), key=operator.itemgetter(1), reverse=True)
    return sortedClassCount[0][0]

3.预测电影

import numpy as np 
import operator 

"""
function:
    创建数据集
parameters:
    无
returns:
    group - 数据集
    labels - 分类标签
"""

def createDataSet():
    #数据集的二维特征
    group = np.array([[1,101],[5,89],[108,5],[115,8]])
    #数据集的标签
    labels = ['love', 'love', 'action', 'action']
    return group, labels

"""
function:
    knn算法,分类器
parameters:
    inX - 测试集　
    dataSet - 训练集
    labels - 分类标签
    k - kNN算法参数,选取距离最小的k个点
returns:
    sortedClassCount[0][0] - 分类结果
"""

def classify0(inX, dataSet, labels, k):
    dataSetSize = dataSet.shape[0]
    diffMat = np.tile(inX, (dataSetSize, 1)) - dataSet 
    sqDiffMat = diffMat**2
    sqDistances = sqDiffMat.sum(axis=1)
    distances = sqDistances**0.5
    sortedDistIndices = distances.argsort()
    #记录标签次数
    classCount = {}
    for i in range(k):
        voteIlabel = labels[sortedDistIndices[i]]
        classCount[voteIlabel] = classCount.get(voteIlabel,0) + 1
    sortedClassCount = sorted(classCount.items(), key=operator.itemgetter(1), reverse=True)
    return sortedClassCount[0][0]

if __name__ == '__main__':
    group, labels = createDataSet()
    test = [101,20]
    test_class = classify0(test, group, labels, 3)
    print(test_class)

二.约会网站

1.数据解析

import numpy as np 

"""
function:
    打开并解析文件,对数据进行分类:1不喜欢,2魅力一般,3极具魅力

parameters:
    filename - 文件名

returns:
    returnMat -　特征矩阵
    classLabelVector - 分类Label向量
"""

def file2matrix(filename):
    fr = open(filename)
    #读取文件内容
    arrayOLines = fr.readlines()
    numberOfLines = len(arrayOLines)
    returnMat = np.zeros((numberOfLines,3))
    #分类标签
    classLabelVector = []
    index = 0
    for line in arrayOLines:
        #删除空白符
        line = line.strip()
        #根据\t来切片
        listFromLine = line.split('\t')
        returnMat[index,:] = listFromLine[0:3]
        if listFromLine[-1] == 'didntLike':
            classLabelVector.append(1)
        elif listFromLine[-1] == 'smallDoses':
            classLabelVector.append(2)
        elif listFromLine[-1] == 'largeDoses':
            classLabelVector.append(3)
        index += 1
    return returnMat, classLabelVector 

if __name__ == '__main__':
    filename = "datingTestSet.txt"
    datingDataMat, datingLabels = file2matrix(filename)
    print(datingDataMat)
    print(datingLabels)

2.数据可视化

from matplotlib.font_manager import FontProperties
import matplotlib.lines as mlines 
import matplotlib.pyplot as plt 
import numpy as np 

"""
function:
    打开并解析文件,对数据进行分类:1不喜欢,2魅力一般,3极具魅力

parameters:
    filename - 文件名

returns:
    returnMat -　特征矩阵
    classLabelVector - 分类Label向量
"""

def file2matrix(filename):
    fr = open(filename)
    #读取文件内容
    arrayOLines = fr.readlines()
    numberOfLines = len(arrayOLines)
    returnMat = np.zeros((numberOfLines,3))
    #分类标签
    classLabelVector = []
    index = 0
    for line in arrayOLines:
        #删除空白符
        line = line.strip()
        #根据\t来切片
        listFromLine = line.split('\t')
        returnMat[index,:] = listFromLine[0:3]
        if listFromLine[-1] == 'didntLike':
            classLabelVector.append(1)
        elif listFromLine[-1] == 'smallDoses':
            classLabelVector.append(2)
        elif listFromLine[-1] == 'largeDoses':
            classLabelVector.append(3)
        index += 1
    return returnMat, classLabelVector 

"""
function:
    可视化数据
parameters:
    datingDataMat - 特征矩阵
    datingLabels - 分类标签
returns:
    无
"""

def showdatas(datingDataMat, datingLabels):
    fig, axs = plt.subplots(nrows=2, ncols=2, sharex=False, sharey=False, figsize=(13,8))

    numberOfLines = len(datingLabels)
    LabelColors = []
    for i in datingLabels:
        if i == 1:
            LabelColors.append('black')
        elif i == 2:
            LabelColors.append('orange')
        else:
            LabelColors.append('red')
    
    #作图
    axs[0][0].scatter(x=datingDataMat[:,0], y=datingDataMat[:,1], color=LabelColors, s=15, alpha=.5)
    axs0_title_text = axs[0][0].set_title('plane vs game')
    axs0_xlabel_text = axs[0][0].set_xlabel('plane')
    asx0_ylabel_text = axs[0][0].set_ylabel('game')
    
    axs[0][1].scatter(x=datingDataMat[:,0], y=datingDataMat[:,2], color=LabelColors, s=15, alpha=.5)
    axs1_title_text = axs[0][1].set_title('plane vs ice_cream')
    axs1_xlabel_text = axs[0][1].set_xlabel('plane')
    axs1_ylabel_text = axs[0][1].set_ylabel('ice_cream')

    axs[1][0].scatter(x=datingDataMat[:,1], y=datingDataMat[:,2], color=LabelColors, s=15, alpha=.5)
    axs2_title_text = axs[1][0].set_title('game vs ice_cream')
    axs2_xlabel_text = axs[1][0].set_xlabel('game')
    axs2_ylabel_text = axs[1][0].set_ylabel('ice_cream')
    #设置图例
    didntLike = mlines.Line2D([], [], color='black', marker='.', markersize=6, label='didntlike')
    smallDoses = mlines.Line2D([], [], color='orange', marker='.', markersize=6, label='smallDoses')
    largeDoses = mlines.Line2D([], [], color='red', marker='.', markersize=6, label='largeDoses')
    #添加图例
    axs[0][0].legend(handles=[didntLike,smallDoses,largeDoses])
    axs[0][1].legend(handles=[didntLike,smallDoses,largeDoses])
    axs[1][0].legend(handles=[didntLike,smallDoses,largeDoses])

    plt.show()

if __name__ == '__main__':
    filename = 'datingTestSet.txt'
    datingDataMat, datingLabels = file2matrix(filename)
    showdatas(datingDataMat,datingLabels)

3.数据归一化

from matplotlib.font_manager import FontProperties
import matplotlib.lines as mlines 
import matplotlib.pyplot as plt 
import numpy as np 

"""
function:
    打开并解析文件,对数据进行分类:1不喜欢,2魅力一般,3极具魅力

parameters:
    filename - 文件名

returns:
    returnMat -　特征矩阵
    classLabelVector - 分类Label向量
"""

def file2matrix(filename):
    fr = open(filename)
    #读取文件内容
    arrayOLines = fr.readlines()
    numberOfLines = len(arrayOLines)
    returnMat = np.zeros((numberOfLines,3))
    #分类标签
    classLabelVector = []
    index = 0
    for line in arrayOLines:
        #删除空白符
        line = line.strip()
        #根据\t来切片
        listFromLine = line.split('\t')
        returnMat[index,:] = listFromLine[0:3]
        if listFromLine[-1] == 'didntLike':
            classLabelVector.append(1)
        elif listFromLine[-1] == 'smallDoses':
            classLabelVector.append(2)
        elif listFromLine[-1] == 'largeDoses':
            classLabelVector.append(3)
        index += 1
    return returnMat, classLabelVector 

"""
function:
    可视化数据
parameters:
    datingDataMat - 特征矩阵
    datingLabels - 分类标签
returns:
    无
"""

def showdatas(datingDataMat, datingLabels):
    fig, axs = plt.subplots(nrows=2, ncols=2, sharex=False, sharey=False, figsize=(13,8))

    numberOfLines = len(datingLabels)
    LabelColors = []
    for i in datingLabels:
        if i == 1:
            LabelColors.append('black')
        elif i == 2:
            LabelColors.append('orange')
        else:
            LabelColors.append('red')
    
    #作图
    axs[0][0].scatter(x=datingDataMat[:,0], y=datingDataMat[:,1], color=LabelColors, s=15, alpha=.5)
    axs0_title_text = axs[0][0].set_title('plane vs game')
    axs0_xlabel_text = axs[0][0].set_xlabel('plane')
    asx0_ylabel_text = axs[0][0].set_ylabel('game')
    
    axs[0][1].scatter(x=datingDataMat[:,0], y=datingDataMat[:,2], color=LabelColors, s=15, alpha=.5)
    axs1_title_text = axs[0][1].set_title('plane vs ice_cream')
    axs1_xlabel_text = axs[0][1].set_xlabel('plane')
    axs1_ylabel_text = axs[0][1].set_ylabel('ice_cream')

    axs[1][0].scatter(x=datingDataMat[:,1], y=datingDataMat[:,2], color=LabelColors, s=15, alpha=.5)
    axs2_title_text = axs[1][0].set_title('game vs ice_cream')
    axs2_xlabel_text = axs[1][0].set_xlabel('game')
    axs2_ylabel_text = axs[1][0].set_ylabel('ice_cream')
    #设置图例
    didntLike = mlines.Line2D([], [], color='black', marker='.', markersize=6, label='didntlike')
    smallDoses = mlines.Line2D([], [], color='orange', marker='.', markersize=6, label='smallDoses')
    largeDoses = mlines.Line2D([], [], color='red', marker='.', markersize=6, label='largeDoses')
    #添加图例
    axs[0][0].legend(handles=[didntLike,smallDoses,largeDoses])
    axs[0][1].legend(handles=[didntLike,smallDoses,largeDoses])
    axs[1][0].legend(handles=[didntLike,smallDoses,largeDoses])

    plt.show()

"""
function:
    对数据进行归一化
parameters:
    dataSet - 特征矩阵
returns:
    normDataSet - 归一化后的矩阵
    ranges - 数据范围
    minVals - 数据最小值
"""

def autoNorm(dataSet):
    minVals = dataSet.min(0)
    maxVals = dataSet.max(0)
    ranges = maxVals - minVals 
    normDataSet = np.zeros(np.shape(dataSet))
    m = dataSet.shape[0]
    normDataSet = dataSet - np.tile(minVals, (m, 1))
    normDataSet = normDataSet / np.tile(ranges, (m, 1))
    return normDataSet, ranges, minVals 

if __name__ == '__main__':
    filename = 'datingTestSet.txt'
    datingDataMat, datingLabels = file2matrix(filename)
    normDataSet, ranges, minVals = autoNorm(datingDataMat)
    print(normDataSet)
    print(ranges)
    print(minVals)

4.测试分类器

from matplotlib.font_manager import FontProperties
import matplotlib.lines as mlines 
import matplotlib.pyplot as plt 
import numpy as np 
import operator

"""
function:
    knn算法,分类器
parameters:
    inX - 测试集　
    dataSet - 训练集
    labels - 分类标签
    k - kNN算法参数,选取距离最小的k个点
returns:
    sortedClassCount[0][0] - 分类结果
"""

def classify0(inX, dataSet, labels, k):
    dataSetSize = dataSet.shape[0]
    diffMat = np.tile(inX, (dataSetSize, 1)) - dataSet 
    sqDiffMat = diffMat**2
    sqDistances = sqDiffMat.sum(axis=1)
    distances = sqDistances**0.5
    sortedDistIndices = distances.argsort()
    #记录标签次数
    classCount = {}
    for i in range(k):
        voteIlabel = labels[sortedDistIndices[i]]
        classCount[voteIlabel] = classCount.get(voteIlabel,0) + 1
    sortedClassCount = sorted(classCount.items(), key=operator.itemgetter(1), reverse=True)
    return sortedClassCount[0][0]
"""
function:
    打开并解析文件,对数据进行分类:1不喜欢,2魅力一般,3极具魅力

parameters:
    filename - 文件名

returns:
    returnMat -　特征矩阵
    classLabelVector - 分类Label向量
"""

def file2matrix(filename):
    fr = open(filename)
    #读取文件内容
    arrayOLines = fr.readlines()
    numberOfLines = len(arrayOLines)
    returnMat = np.zeros((numberOfLines,3))
    #分类标签
    classLabelVector = []
    index = 0
    for line in arrayOLines:
        #删除空白符
        line = line.strip()
        #根据\t来切片
        listFromLine = line.split('\t')
        returnMat[index,:] = listFromLine[0:3]
        if listFromLine[-1] == 'didntLike':
            classLabelVector.append(1)
        elif listFromLine[-1] == 'smallDoses':
            classLabelVector.append(2)
        elif listFromLine[-1] == 'largeDoses':
            classLabelVector.append(3)
        index += 1
    return returnMat, classLabelVector 

"""
function:
    可视化数据
parameters:
    datingDataMat - 特征矩阵
    datingLabels - 分类标签
returns:
    无
"""

def showdatas(datingDataMat, datingLabels):
    fig, axs = plt.subplots(nrows=2, ncols=2, sharex=False, sharey=False, figsize=(13,8))

    numberOfLines = len(datingLabels)
    LabelColors = []
    for i in datingLabels:
        if i == 1:
            LabelColors.append('black')
        elif i == 2:
            LabelColors.append('orange')
        else:
            LabelColors.append('red')
    
    #作图
    axs[0][0].scatter(x=datingDataMat[:,0], y=datingDataMat[:,1], color=LabelColors, s=15, alpha=.5)
    axs0_title_text = axs[0][0].set_title('plane vs game')
    axs0_xlabel_text = axs[0][0].set_xlabel('plane')
    asx0_ylabel_text = axs[0][0].set_ylabel('game')
    
    axs[0][1].scatter(x=datingDataMat[:,0], y=datingDataMat[:,2], color=LabelColors, s=15, alpha=.5)
    axs1_title_text = axs[0][1].set_title('plane vs ice_cream')
    axs1_xlabel_text = axs[0][1].set_xlabel('plane')
    axs1_ylabel_text = axs[0][1].set_ylabel('ice_cream')

    axs[1][0].scatter(x=datingDataMat[:,1], y=datingDataMat[:,2], color=LabelColors, s=15, alpha=.5)
    axs2_title_text = axs[1][0].set_title('game vs ice_cream')
    axs2_xlabel_text = axs[1][0].set_xlabel('game')
    axs2_ylabel_text = axs[1][0].set_ylabel('ice_cream')
    #设置图例
    didntLike = mlines.Line2D([], [], color='black', marker='.', markersize=6, label='didntlike')
    smallDoses = mlines.Line2D([], [], color='orange', marker='.', markersize=6, label='smallDoses')
    largeDoses = mlines.Line2D([], [], color='red', marker='.', markersize=6, label='largeDoses')
    #添加图例
    axs[0][0].legend(handles=[didntLike,smallDoses,largeDoses])
    axs[0][1].legend(handles=[didntLike,smallDoses,largeDoses])
    axs[1][0].legend(handles=[didntLike,smallDoses,largeDoses])

    plt.show()

"""
function:
    对数据进行归一化
parameters:
    dataSet - 特征矩阵
returns:
    normDataSet - 归一化后的矩阵
    ranges - 数据范围
    minVals - 数据最小值
"""

def autoNorm(dataSet):
    minVals = dataSet.min(0)
    maxVals = dataSet.max(0)
    ranges = maxVals - minVals 
    normDataSet = np.zeros(np.shape(dataSet))
    m = dataSet.shape[0]
    normDataSet = dataSet - np.tile(minVals, (m, 1))
    normDataSet = normDataSet / np.tile(ranges, (m, 1))
    return normDataSet, ranges, minVals 

"""
function:
    分类器测试
parameters:
    无
returns:
    无
"""

def datingClassTest():
    filename = "datingTestSet.txt"
    datingDataMat, datingLabels = file2matrix(filename)
    #取总数据的10%进行测试
    hoRatio = 0.1
    #数据归一化
    normMat, ranges, minVals = autoNorm(datingDataMat)
    m = normMat.shape[0]
    numTestVecs = int (m * hoRatio)
    errorCount = 0.0

    for i in range(numTestVecs):
        classifierResult = classify0(normMat[i,:], normMat[numTestVecs:m,:],datingLabels[numTestVecs:m], 6)
        print("分类结果:%d\t 实际类别:%d" % (classifierResult, datingLabels[i]))
        if classifierResult != datingLabels[i]:
            errorCount += 1
    print("错误率:%f%%" % (errorCount / float(numTestVecs)*100))

if __name__ == '__main__':
    datingClassTest()

5.约会人物分类

from matplotlib.font_manager import FontProperties
import matplotlib.lines as mlines 
import matplotlib.pyplot as plt 
import numpy as np 
import operator

"""
function:
    knn算法,分类器
parameters:
    inX - 测试集　
    dataSet - 训练集
    labels - 分类标签
    k - kNN算法参数,选取距离最小的k个点
returns:
    sortedClassCount[0][0] - 分类结果
"""

def classify0(inX, dataSet, labels, k):
    dataSetSize = dataSet.shape[0]
    diffMat = np.tile(inX, (dataSetSize, 1)) - dataSet 
    sqDiffMat = diffMat**2
    sqDistances = sqDiffMat.sum(axis=1)
    distances = sqDistances**0.5
    sortedDistIndices = distances.argsort()
    #记录标签次数
    classCount = {}
    for i in range(k):
        voteIlabel = labels[sortedDistIndices[i]]
        classCount[voteIlabel] = classCount.get(voteIlabel,0) + 1
    sortedClassCount = sorted(classCount.items(), key=operator.itemgetter(1), reverse=True)
    return sortedClassCount[0][0]
"""
function:
    打开并解析文件,对数据进行分类:1不喜欢,2魅力一般,3极具魅力

parameters:
    filename - 文件名

returns:
    returnMat -　特征矩阵
    classLabelVector - 分类Label向量
"""

def file2matrix(filename):
    fr = open(filename)
    #读取文件内容
    arrayOLines = fr.readlines()
    numberOfLines = len(arrayOLines)
    returnMat = np.zeros((numberOfLines,3))
    #分类标签
    classLabelVector = []
    index = 0
    for line in arrayOLines:
        #删除空白符
        line = line.strip()
        #根据\t来切片
        listFromLine = line.split('\t')
        returnMat[index,:] = listFromLine[0:3]
        if listFromLine[-1] == 'didntLike':
            classLabelVector.append(1)
        elif listFromLine[-1] == 'smallDoses':
            classLabelVector.append(2)
        elif listFromLine[-1] == 'largeDoses':
            classLabelVector.append(3)
        index += 1
    return returnMat, classLabelVector 

"""
function:
    可视化数据
parameters:
    datingDataMat - 特征矩阵
    datingLabels - 分类标签
returns:
    无
"""

def showdatas(datingDataMat, datingLabels):
    fig, axs = plt.subplots(nrows=2, ncols=2, sharex=False, sharey=False, figsize=(13,8))

    numberOfLines = len(datingLabels)
    LabelColors = []
    for i in datingLabels:
        if i == 1:
            LabelColors.append('black')
        elif i == 2:
            LabelColors.append('orange')
        else:
            LabelColors.append('red')
    
    #作图
    axs[0][0].scatter(x=datingDataMat[:,0], y=datingDataMat[:,1], color=LabelColors, s=15, alpha=.5)
    axs0_title_text = axs[0][0].set_title('plane vs game')
    axs0_xlabel_text = axs[0][0].set_xlabel('plane')
    asx0_ylabel_text = axs[0][0].set_ylabel('game')
    
    axs[0][1].scatter(x=datingDataMat[:,0], y=datingDataMat[:,2], color=LabelColors, s=15, alpha=.5)
    axs1_title_text = axs[0][1].set_title('plane vs ice_cream')
    axs1_xlabel_text = axs[0][1].set_xlabel('plane')
    axs1_ylabel_text = axs[0][1].set_ylabel('ice_cream')

    axs[1][0].scatter(x=datingDataMat[:,1], y=datingDataMat[:,2], color=LabelColors, s=15, alpha=.5)
    axs2_title_text = axs[1][0].set_title('game vs ice_cream')
    axs2_xlabel_text = axs[1][0].set_xlabel('game')
    axs2_ylabel_text = axs[1][0].set_ylabel('ice_cream')
    #设置图例
    didntLike = mlines.Line2D([], [], color='black', marker='.', markersize=6, label='didntlike')
    smallDoses = mlines.Line2D([], [], color='orange', marker='.', markersize=6, label='smallDoses')
    largeDoses = mlines.Line2D([], [], color='red', marker='.', markersize=6, label='largeDoses')
    #添加图例
    axs[0][0].legend(handles=[didntLike,smallDoses,largeDoses])
    axs[0][1].legend(handles=[didntLike,smallDoses,largeDoses])
    axs[1][0].legend(handles=[didntLike,smallDoses,largeDoses])

    plt.show()

"""
function:
    对数据进行归一化
parameters:
    dataSet - 特征矩阵
returns:
    normDataSet - 归一化后的矩阵
    ranges - 数据范围
    minVals - 数据最小值
"""

def autoNorm(dataSet):
    minVals = dataSet.min(0)
    maxVals = dataSet.max(0)
    ranges = maxVals - minVals 
    normDataSet = np.zeros(np.shape(dataSet))
    m = dataSet.shape[0]
    normDataSet = dataSet - np.tile(minVals, (m, 1))
    normDataSet = normDataSet / np.tile(ranges, (m, 1))
    return normDataSet, ranges, minVals 

"""
function:
    分类器测试
parameters:
    无
returns:
    无
"""

def datingClassTest():
    filename = "datingTestSet.txt"
    datingDataMat, datingLabels = file2matrix(filename)
    #取总数据的10%进行测试
    hoRatio = 0.1
    #数据归一化
    normMat, ranges, minVals = autoNorm(datingDataMat)
    m = normMat.shape[0]
    numTestVecs = int (m * hoRatio)
    errorCount = 0.0

    for i in range(numTestVecs):
        classifierResult = classify0(normMat[i,:], normMat[numTestVecs:m,:],datingLabels[numTestVecs:m], 6)
        print("分类结果:%d\t 实际类别:%d" % (classifierResult, datingLabels[i]))
        if classifierResult != datingLabels[i]:
            errorCount += 1
    print("错误率:%f%%" % (errorCount / float(numTestVecs)*100))

"""
function:
    输入特征来进行标签分类
parameters:
    无
returns:
    无
"""

def classifyPerson():
    resultList = ['讨厌', '有些喜欢', '非常喜欢']
    precentTats = float(input("game:"))
    ffMiles = float(input("plane:"))
    iceCream = float(input("ice_cream:"))
    filename = "datingTestSet.txt"
    #训练集处理
    datingDataMat, datingLabels = file2matrix(filename)
    #训练集归一化
    normMat, ranges, minVals = autoNorm(datingDataMat)
    #测试集
    inArr = np.array([precentTats, ffMiles, iceCream])
    #测试集归一化
    norminArr = (inArr - minVals) / ranges 
    classifierResult = classify0(norminArr, normMat, datingLabels, 4)
    print("你可能%s这个人" % (resultList[classifierResult-1]))

if __name__ == "__main__":
    classifyPerson()

三.手写数字识别

1.数据处理

import numpy as np

"""
function:
   处理训练集数据
parameters:
    filename - 文件名
returns:
    returnVect - 训练集向量
"""

def img2vector(filename):
    returnVect = np.zeros((1,1024))
    fr = open(filename)
    for i in range(32):
        lineStr = fr.readline()
        for j in range(32):
            returnVect[0, 32*i+j] = int(lineStr[j])
    return returnVect 

if __name__ == "__main__":
    testVector = img2vector("testDigits/0_13.txt")
    print(testVector)

2.测试分类器

import numpy as np

"""
function:
   处理训练集数据
parameters:
    filename - 文件名
returns:
    returnVect - 训练集向量
"""

def img2vector(filename):
    returnVect = np.zeros((1,1024))
    fr = open(filename)
    for i in range(32):
        lineStr = fr.readline()
        for j in range(32):
            returnVect[0, 32*i+j] = int(lineStr[j])
    return returnVect 

if __name__ == "__main__":
    testVector = img2vector("testDigits/0_13.txt")
    print(testVector)

posted @ 2018-11-25 18:26 樱花色的梦阅读(168) 评论(0) 编辑收藏举报

会员力量，点亮园子希望

刷新页面返回顶部

机器学习实战 k-近邻算法 python3

k-近邻算法

k-近邻算法概述

一.电影分类

1.准备数据集

2.kNN算法

3.预测电影

二.约会网站

1.数据解析

2.数据可视化

3.数据归一化

4.测试分类器

5.约会人物分类

三.手写数字识别

1.数据处理

2.测试分类器

公告