机器学习学习中-->>>knn手动实现

knn学习中,手动实现测试版   knn思想

  1 # 假设test_data1 为类型1  test_data2 为类型2 test_data3 为类型3
  2 test_data1 = [(1, 2), (2, 5), (3, 3), (5, 9), (6, 8), (8, 3), (4, 3)]
  3 test_data2 = [(15, 6), (2, 6), (8, 6), (3, 1), (4, 5), (2, 1), (3, 6)]
  4 test_data3 = [(25, 4), (3, 2), (8, 4), (2, 53), (6, 18), (13, 3), (25, 8)]
  5 
  6 
  7 def knn(k:int, new_data: tuple, *test_data:list):
  8     """
  9     实现knn分类算法
 10     :param k: knn中 k的值
 11     :param new_data: 新纪录
 12     :return: 返回所属类型
 13     """
 14     # 用于生成字典的 key, 其中
 15     # i = 1
 16     j = 1
 17     # 用于存储所有的字典
 18     all_distance_dict = {}
 19     for data in test_data:
 20         distance = computed_range(new_data = new_data, test_data = data)
 21         if j == 1  :
 22             temp = [("j" + str(a)) for a in range(len(distance))]
 23         elif j == 2:
 24             temp = [("k" + str(a)) for a in range(len(distance))]
 25         elif j == 3:
 26             temp = [("l" + str(a)) for a in range(len(distance))]
 27 
 28         distance_dict = dict(zip(temp, distance))
 29         all_distance_dict.update(distance_dict)
 30         # i += 1
 31         j += 1
 32 
 33 
 34     # 排序后的结果
 35     sort_all_distance_list = sorted(all_distance_dict.items(), key=lambda x: x[1], reverse=False)
 36     sort_all_distance_dict = dict(sort_all_distance_list)
 37 
 38     print(sort_all_distance_dict)
 39 
 40     # 排序后取前k个
 41     end_distance_list =  []
 42     # 引入一个计数器
 43     i = 0
 44     for key in sort_all_distance_dict.keys():
 45         if i != k:
 46             end_distance_list.append(key)
 47         else :
 48             break
 49         i += 1
 50 
 51     # 创建计数器  res1 res2 res3 分别表示每个种类的个数
 52     res1, res2 ,res3 = 0, 0, 0
 53     for key in end_distance_list:
 54         if key[0] == "j":
 55             res1 += 1
 56         elif key[0] =="k" :
 57             res2 +=1
 58         elif key[0] == "l":
 59             res3 += 1
 60 
 61     if res1>res2:
 62         if res1>res3:
 63             return "类型一"
 64         elif res3 > res2 :
 65             return "类型三"
 66     else :
 67         return "类型二"
 68 
 69 
 70 
 71 
 72 # 计算距离
 73 def computed_range(new_data: tuple, test_data:list, formula_mode = 1)->list :
 74     '''
 75     该函数用于计算欧氏距离
 76     :param new_data:新需要计算的数据
 77     :param test_data:为样本数据
 78     :param formula_mode:用于选择相似度计算方式:其中 1:欧氏距离 2:曼哈顿距离 3:余弦相似度
 79     :return:list 代表每个数据与新记录之间的距离
 80     '''
 81     result = []  # 定义一个列表:用于存出结果
 82     if formula_mode == 1 :
 83         formula = Euclidean_distance
 84     elif formula_mode == 2 :
 85         formula = Manhattan_distance
 86     elif formula_mode == 3:
 87         formula = cosine_measure
 88     else:
 89         return  # 输入非 1 2 3 则直接返回None
 90 
 91     for data in test_data :
 92         # 用于计算距离欧式距离
 93         length = formula(data, new_data)
 94         result.append(length)
 95 
 96     return result
 97 
 98 # 用于计算欧氏距离
 99 def Euclidean_distance(data1,data2) :
100     return (((data1[0] - data2[0]) ** 2) + ((data1[1] - data2[1]) ** 2)) ** (0.5)
101 
102 # 用于计算曼哈顿距离
103 def Manhattan_distance() :
104     pass
105 
106 # 用于计算余弦相似度
107 def cosine_measure() :
108     pass
109 
110 string = knn( 3, (5,55), test_data1, test_data2, test_data3)
111 print(string)

返回结果

{'l3': 3.605551275463989, 'l4': 37.013511046643494, 'j3': 46.0, 'j4': 47.01063709417264, 'k6': 49.040799340956916, 'k1': 49.09175083453431, 'k2': 49.09175083453431, 'k0': 50.00999900019995, 'k4': 50.00999900019995, 'j1': 50.08991914547278, 'l6': 51.07837115648854, 'l2': 51.088159097779204, 'j6': 52.009614495783374, 'j2': 52.03844732503075, 'j5': 52.08646657242167, 'l5': 52.61178575186362, 'l1': 53.03772242470448, 'j0': 53.150729063673246, 'k3': 54.037024344425184, 'k5': 54.08326913195984, 'l0': 54.78138369920935}
类型三

 

posted @ 2021-04-29 23:12  小怪兽他爹爹  阅读(78)  评论(0编辑  收藏  举报