kNN_datingTest

import numpy as np
import operator

def classify1(inX, dataset, labels, k):
    datasetSize = dataset.shape[0]
    diffMat = np.tile(inX,(datasetSize,1)) - dataset
    sqDiffMat = diffMat**2
    sqDistance = np.sum(sqDiffMat, axis=1)
    distances = sqDistance**0.5
    sortedDistIndicies = np.argsort(distances)
    classCount = {}
    for i in range(k):
        voteIlable = labels[sortedDistIndicies[i]]
        classCount[voteIlable] = classCount.get(voteIlable,0)+1
    sortedDistIndicies = sorted(classCount.items(),key=operator.itemgetter(1),reverse=True)
    return sortedDistIndicies[0][0]

def file2matrix(filename):
    fr = open(filename)
    lines = fr.readlines()
    numberOfLines = len(lines)
    returnMat = np.zeros((numberOfLines,3))
    classLabelVector = np.zeros(numberOfLines)
    index = 0
    for line in lines:
        line = line.strip()
        listFromline = line.split('\t')
        returnMat[index,:] = listFromline[0:3]
        classLabelVector[index] = listFromline[-1]
        index +=1
    return returnMat,classLabelVector

def autoNorm(dataset):
    minVals = dataset.min(0)
    maxVals = dataset.max(0)
    ranges = maxVals - minVals
    normDataSet = np.zeros(dataset.shape)
    m = dataset.shape[0]
    normDataSet = dataset - np.tile(minVals,(m,1))
    normDataSet = normDataSet/np.tile(ranges,(m,1))
    return normDataSet,ranges,minVals

def datingClassTest():
     hoRatio = 0.10
     datingDataMat,datingLabels = file2matrix('dating2.txt')
    normMat = autoNorm(datingDataMat)
    m = normMat.shape[0]
    numTestVecs = int(m*hoRatio)
    errorCount = 0.0
    for i in range(numTestVecs):
        classifierResult = classify1(normMat[i,:],normMat[numTestVecs:m,:],\
            datingLabels[numTestVecs:m],3)
        print "the classifier came back with: %d, the real answer is: %d" % (classifierResult,datingLabels[i])
        if (classifierResult != datingLabels[i]):
            errorCount += 1.0
    print "the total error rate is: %f" % (errorCount/float(numTestVecs))

def classifyPerson():
    resultList =['not at all','in small doses','in large doses']
    percentTats = float(raw_input("percentage of time spent playing video games?"))
    ffMiles = float(raw_input("frequent flier miles earned per year?"))
    iceCream = float(raw_input("liters of ice cream consumed per year?"))
    datingDataMat,datingLabels = file2matrix('dating2.txt')
    normMat,ranges,minVals = autoNorm(datingDataMat)
    inArr = np.array([percentTats,ffMiles,iceCream])
    classfifierResult = classify1((inArr - minVals)/ranges,normMat,datingLabels,3)
    print "You will probably like this person: ", resultList[int(classfifierResult-1)]

posted on 2021-08-28 16:38  Yan12345678  阅读(39)  评论(0编辑  收藏  举报

导航