简单实现KNN(处理连续型数据)

import numpy as np
import matplotlib.pyplot as plt
import time
import math
import collections



raw_data_x = [[3.39,2.33],
             [3.11,1.78],
             [1.34,3.36],
             [3.58,4.67],
             [2.28,2.86],
             [7.442,4.69],
             [5.74,3.53],
             [9.17,2.51],
             [7.79,3.42],
             [7.93,0.79]
             ]
raw_data_y = [0,0,0,0,0,1,1,1,1,1]
x_train = np.array(raw_data_x)
y_train = np.array(raw_data_y)

x_test = np.array([8.0,3.36])

plt.scatter(x_train[y_train == 0,0],x_train[y_train == 0,1],color = 'r')
plt.scatter(x_train[y_train == 1,0],x_train[y_train == 1,1],color = 'g')
plt.scatter(x_test[0],x_test[1],color = 'b')

plt.show()

#compute the Euclidean distance

distance = [] #creat empty list
for i in x_train: # for loop
    result_sum = math.sqrt(sum((i-x_test)**2)) #compute distance for single vector
    distance.append(result_sum) # add the result to list


index = np.argsort(distance)
k = 6
result = [y_train[i] for i in index[:k]]


vote = collections.Counter(result)
print("the class is {}".format(vote.most_common(1)[0][0]))

 

posted @ 2019-10-30 10:02  看星星的派大星  阅读(652)  评论(0编辑  收藏  举报