简单实现KNN(处理连续型数据)

复制代码
import numpy as np
import matplotlib.pyplot as plt
import time
import math
import collections



raw_data_x = [[3.39,2.33],
             [3.11,1.78],
             [1.34,3.36],
             [3.58,4.67],
             [2.28,2.86],
             [7.442,4.69],
             [5.74,3.53],
             [9.17,2.51],
             [7.79,3.42],
             [7.93,0.79]
             ]
raw_data_y = [0,0,0,0,0,1,1,1,1,1]
x_train = np.array(raw_data_x)
y_train = np.array(raw_data_y)

x_test = np.array([8.0,3.36])

plt.scatter(x_train[y_train == 0,0],x_train[y_train == 0,1],color = 'r')
plt.scatter(x_train[y_train == 1,0],x_train[y_train == 1,1],color = 'g')
plt.scatter(x_test[0],x_test[1],color = 'b')

plt.show()

#compute the Euclidean distance

distance = [] #creat empty list
for i in x_train: # for loop
    result_sum = math.sqrt(sum((i-x_test)**2)) #compute distance for single vector
    distance.append(result_sum) # add the result to list


index = np.argsort(distance)
k = 6
result = [y_train[i] for i in index[:k]]


vote = collections.Counter(result)
print("the class is {}".format(vote.most_common(1)[0][0]))
复制代码

 

posted @   看星星的派大星  阅读(658)  评论(0编辑  收藏  举报
编辑推荐:
· 基于Microsoft.Extensions.AI核心库实现RAG应用
· Linux系列:如何用heaptrack跟踪.NET程序的非托管内存泄露
· 开发者必知的日志记录最佳实践
· SQL Server 2025 AI相关能力初探
· Linux系列:如何用 C#调用 C方法造成内存泄露
阅读排行:
· 震惊!C++程序真的从main开始吗?99%的程序员都答错了
· 【硬核科普】Trae如何「偷看」你的代码?零基础破解AI编程运行原理
· 单元测试从入门到精通
· 上周热点回顾(3.3-3.9)
· winform 绘制太阳,地球,月球 运作规律
点击右上角即可分享
微信分享提示