更多算法可查看,包含很多距离算法讲解:机器学习中的数学——距离定义(五):标准化的欧几里得距离(Standardized Euclidean Distance)_von Neumann的博客-CSDN博客_标准化欧几里得距离
#相似度算法
class Similarity():
def __init__(self):
pass
# 欧式距离, (x-y)平方累加在开放 参考:https://blog.csdn.net/hy592070616/article/details/121461909?spm=1001.2014.3001.5501
def Euclidean_distance(slef,vector1,vector2):
count = 0
for i, t in enumerate(vector1):
count += (vector1[i] - vector2[i]) ** 2
print('Euclidean_distance', 1 / ((count ** 0.5) + 1))
return 1 / ((count ** 0.5) + 1)
# 余弦距离,参考:https://blog.csdn.net/hy592070616/article/details/122271927?spm=1001.2014.3001.5501
def Cosine_distance(self,vector1,vector2):
count, d = 0, 0
for i, t in enumerate(vector1):
d += (vector1[i] * vector2[i])
count += (vector1[i] ** 2) * (vector2[i] ** 2)
print('Cosine_distance', d / (count+1)) #余弦取值范围为[-1,1] 0以上正相关,0一下负相关,0时两向量垂直
return d / (count+1)
# 曼哈顿距离 参考:https://blog.csdn.net/hy592070616/article/details/121569933?spm=1001.2014.3001.5501
def Manhattan_distance(self,vector1,vector2):
count = 0
for i, t in enumerate(vector1):
count += abs(vector1[i] - vector2[i]) #abs() 绝对值函数
print('Manhattan_distance', 1 / (count + 1))
return 1 / (count + 1)
# 杰卡德距离 参考:https://blog.csdn.net/qq_21484461/article/details/125570951
def Jaccard_distance(slef, vector1,vector2):
word_bag = []
# 交集
for i in vector1:
if i in vector2:
word_bag.append(i)
print(set(word_bag))
# 并集
ret = set(vector1) | set(vector2) #并不会有重复值出现
print(ret)
print('Jaccard_distance', ((len(ret) - len(word_bag)) / len(ret)))
return (len(ret) - len(word_bag)) / len(ret)
# 汉明距离
def Hamming_distance(self,vector1,vector2):
count = 0
for x,y in zip(vector1,vector2):
if x != y:
count += 1
print('Hamming_distance',count/len(vector1))
return count/len(vector1)
# 编辑距离
def EditDistance(self,x, y):
import numpy as np
dp = np.zeros((len(x) + 1, len(y) + 1))
for i in range(len(x) + 1):
dp[i][0] = i
for j in range(len(y) + 1):
dp[0][j] = j
for i in range(1, len(x) + 1):
for j in range(1, len(y) + 1):
delta = 0 if x[i - 1] == y[j - 1] else 1
dp[i][j] = min(dp[i - 1][j - 1] + delta, min(dp[i - 1][j] + 1, dp[i][j - 1] + 1))
print(int(dp[len(x)][len(y)]))
return int(dp[len(x)][len(y)])
#
# s = Similarity()
# a = ['a','c','s','a']
# b = ['a','d','b','d']
# s.Jaccard_distance(a,b)
# s.Hamming_distance(a,b)
# s.EditDistance(a,b)
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· DeepSeek 开源周回顾「GitHub 热点速览」
· 物流快递公司核心技术能力-地址解析分单基础技术分享
· .NET 10首个预览版发布:重大改进与新特性概览!
· AI与.NET技术实操系列(二):开始使用ML.NET
· 单线程的Redis速度为什么快?