SQLServer自定义函数实现比对文本相似度(用来模糊查询或查重)
USE [DB] GO /****** Object: UserDefinedFunction [dbo].[FN_StrCompare] Script Date: 2023/10/17 15:10:17 ******/ SET ANSI_NULLS ON GO SET QUOTED_IDENTIFIER ON GO CREATE FUNCTION [dbo].[FN_StrCompare] ( @Stra NVARCHAR(MAX), --字符串a @Strb NVARCHAR(MAX), --字符串b @PosWeight FLOAT, --比对时位置权重0-1.0,默认0.5 @ContainsAsTrue BIT, --包含是否视为相同,默认0 @IgnoreNul BIT --忽略空字符,默认 1 ) RETURNS FLOAT AS BEGIN DECLARE @num FLOAT, @len INT, @StrL NVARCHAR(MAX), @StrS NVARCHAR(MAX), @position FLOAT SET @Stra = ISNULL(@Stra, 0) SET @Strb = ISNULL(@Strb, 0) SET @PosWeight = ISNULL(@PosWeight, 0.5) SET @ContainsAsTrue = ISNULL(@ContainsAsTrue, 0) SET @IgnoreNul = ISNULL(@IgnoreNul, 1) IF (@IgnoreNul=1) BEGIN SET @Stra = REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(@Stra,' ',''), CHAR(10), ''), CHAR(13), ''), CHAR(9), ''), ' ', '') SET @Strb = REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(@Strb,' ',''), CHAR(10), ''), CHAR(13), ''), CHAR(9), ''), ' ', '') END IF (LEN(@Stra) > LEN(@Strb)) BEGIN SET @StrL = @Stra SET @StrS = @Strb END ELSE BEGIN SET @StrL = @Strb SET @StrS = @Stra END IF (@ContainsAsTrue = 1 AND CHARINDEX(@StrS, @StrL) > 0) BEGIN RETURN 1 END SET @len = 1 SET @num = 0 SET @position = 0 WHILE (LEN(@StrL) <> 0 AND LEN(@StrS) <> 0) BEGIN WHILE (@len <= LEN(@StrS)) BEGIN DECLARE @a NVARCHAR(4) SET @a = '' SET @a = SUBSTRING(@StrS, @len, 1) IF (CHARINDEX(@a, @StrL) > 0) BEGIN SET @num = @num + 1 DECLARE @maxIndex INT SET @maxIndex = @len IF (CHARINDEX(@a, @StrL) > @maxIndex) BEGIN SET @maxIndex = CHARINDEX(@a, @StrL) END SET @position = @position + (1 -ABS(@len -CHARINDEX(@a, @StrL, @len)) * 1.0 / @maxIndex) END SET @len = @len + 1 END SET @num = (@num * (1 -@PosWeight) + @position * @PosWeight) / LEN(@StrL) BREAK END RETURN @num END
----测试代码 SELECT dbo.FN_StrCompare('张三', '张三A', NULL, 0, NULL); --0.666666666666667 SELECT dbo.FN_StrCompare('1234', '4321', 0.01, 1, NULL); --0.992291666666667 SELECT dbo.FN_StrCompare('36cfe88476f046198c1a99f9f7f3a1e5', '00015e9443434a178df1a6dc9d787fa3', 0.99, 0, NULL); --0.604230480461777