相似度与距离计算python代码实现

 1 #定义几种距离计算函数
 2 #更高效的方式为把得分向量化之后使用scipy中定义的distance方法
 3 
 4 from math import sqrt
 5 def euclidean_dis(rating1, rating2):    #欧式距离计算
 6     """计算2个打分序列间的欧式距离. 输入的rating1和rating2都是打分dict
 7        格式为{'小时代4': 1.0, '疯狂动物城': 5.0}"""
 8     distance = 0
 9     commonRatings = False 
10     for key in rating1:
11         if key in rating2:
12             distance += (rating1[key] - rating2[key])^2
13             commonRatings = True
14     #两个打分序列之间有公共打分电影
15     if commonRatings:
16         return distance
17     #无公共打分电影
18     else:
19         return -1
20 
21 
22 def manhattan_dis(rating1, rating2):    #曼哈顿距离计算
23     """计算2个打分序列间的曼哈顿距离. 输入的rating1和rating2都是打分dict
24        格式为{'小时代4': 1.0, '疯狂动物城': 5.0}"""
25     distance = 0
26     commonRatings = False 
27     for key in rating1:
28         if key in rating2:
29             distance += abs(rating1[key] - rating2[key])
30             commonRatings = True
31     #两个打分序列之间有公共打分电影
32     if commonRatings:
33         return distance
34     #无公共打分电影
35     else:
36         return -1
37 
38 def cos_dis(rating1, rating2):   #余弦相似度计算
39     """计算2个打分序列间的cos距离. 输入的rating1和rating2都是打分dict
40        格式为{'小时代4': 1.0, '疯狂动物城': 5.0}"""
41     distance = 0
42     dot_product_1 = 0
43     dot_product_2 = 0
44     commonRatings = False
45     
46     for score in rating1.values():
47         dot_product_1 += score^2
48     for score in rating2.values():
49         dot_product_2 += score^2
50         
51     for key in rating1:
52         if key in rating2:
53             distance += rating1[key] * rating2[key]
54             commonRatings = True
55     #两个打分序列之间有公共打分电影
56     if commonRatings:
57         return 1-distance/sqrt(dot_product_1*dot_product_2)
58     #无公共打分电影
59     else:
60         return -1
61 
62 def pearson_dis(rating1, rating2):  #皮尔逊相似度计算
63     """计算2个打分序列间的pearson距离. 输入的rating1和rating2都是打分dict
64        格式为{'小时代4': 1.0, '疯狂动物城': 5.0}"""
65     sum_xy = 0
66     sum_x = 0
67     sum_y = 0
68     sum_x2 = 0
69     sum_y2 = 0
70     n = 0
71     for key in rating1:
72         if key in rating2:
73             n += 1
74             x = rating1[key]
75             y = rating2[key]
76             sum_xy += x * y
77             sum_x += x
78             sum_y += y
79             sum_x2 += pow(x, 2)
80             sum_y2 += pow(y, 2)
81     # now compute denominator
82     denominator = sqrt(sum_x2 - pow(sum_x, 2) / n) * sqrt(sum_y2 - pow(sum_y, 2) / n)
83     if denominator == 0:
84         return 0
85     else:
86         return (sum_xy - (sum_x * sum_y) / n) / denominator

 

posted on 2018-03-23 22:24  NothingLZ  阅读(3585)  评论(0编辑  收藏  举报

导航