相似度与距离计算python代码实现
1 #定义几种距离计算函数 2 #更高效的方式为把得分向量化之后使用scipy中定义的distance方法 3 4 from math import sqrt 5 def euclidean_dis(rating1, rating2): #欧式距离计算 6 """计算2个打分序列间的欧式距离. 输入的rating1和rating2都是打分dict 7 格式为{'小时代4': 1.0, '疯狂动物城': 5.0}""" 8 distance = 0 9 commonRatings = False 10 for key in rating1: 11 if key in rating2: 12 distance += (rating1[key] - rating2[key])^2 13 commonRatings = True 14 #两个打分序列之间有公共打分电影 15 if commonRatings: 16 return distance 17 #无公共打分电影 18 else: 19 return -1 20 21 22 def manhattan_dis(rating1, rating2): #曼哈顿距离计算 23 """计算2个打分序列间的曼哈顿距离. 输入的rating1和rating2都是打分dict 24 格式为{'小时代4': 1.0, '疯狂动物城': 5.0}""" 25 distance = 0 26 commonRatings = False 27 for key in rating1: 28 if key in rating2: 29 distance += abs(rating1[key] - rating2[key]) 30 commonRatings = True 31 #两个打分序列之间有公共打分电影 32 if commonRatings: 33 return distance 34 #无公共打分电影 35 else: 36 return -1 37 38 def cos_dis(rating1, rating2): #余弦相似度计算 39 """计算2个打分序列间的cos距离. 输入的rating1和rating2都是打分dict 40 格式为{'小时代4': 1.0, '疯狂动物城': 5.0}""" 41 distance = 0 42 dot_product_1 = 0 43 dot_product_2 = 0 44 commonRatings = False 45 46 for score in rating1.values(): 47 dot_product_1 += score^2 48 for score in rating2.values(): 49 dot_product_2 += score^2 50 51 for key in rating1: 52 if key in rating2: 53 distance += rating1[key] * rating2[key] 54 commonRatings = True 55 #两个打分序列之间有公共打分电影 56 if commonRatings: 57 return 1-distance/sqrt(dot_product_1*dot_product_2) 58 #无公共打分电影 59 else: 60 return -1 61 62 def pearson_dis(rating1, rating2): #皮尔逊相似度计算 63 """计算2个打分序列间的pearson距离. 输入的rating1和rating2都是打分dict 64 格式为{'小时代4': 1.0, '疯狂动物城': 5.0}""" 65 sum_xy = 0 66 sum_x = 0 67 sum_y = 0 68 sum_x2 = 0 69 sum_y2 = 0 70 n = 0 71 for key in rating1: 72 if key in rating2: 73 n += 1 74 x = rating1[key] 75 y = rating2[key] 76 sum_xy += x * y 77 sum_x += x 78 sum_y += y 79 sum_x2 += pow(x, 2) 80 sum_y2 += pow(y, 2) 81 # now compute denominator 82 denominator = sqrt(sum_x2 - pow(sum_x, 2) / n) * sqrt(sum_y2 - pow(sum_y, 2) / n) 83 if denominator == 0: 84 return 0 85 else: 86 return (sum_xy - (sum_x * sum_y) / n) / denominator
posted on 2018-03-23 22:24 NothingLZ 阅读(3585) 评论(0) 编辑 收藏 举报