SQLServer自定义函数实现比对文本相似度(用来模糊查询或查重)

USE [DB]
GO
/****** Object:  UserDefinedFunction [dbo].[FN_StrCompare]    Script Date: 2023/10/17 15:10:17 ******/
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
 
CREATE FUNCTION [dbo].[FN_StrCompare]
(
    @Stra               NVARCHAR(MAX),    --字符串a
    @Strb               NVARCHAR(MAX),    --字符串b
    @PosWeight          FLOAT,        --比对时位置权重0-1.0,默认0.5
    @ContainsAsTrue     BIT,        --包含是否视为相同,默认0
    @IgnoreNul          BIT            --忽略空字符,默认 1
)
RETURNS FLOAT
AS
 
BEGIN
    DECLARE @num          FLOAT,
            @len          INT,
            @StrL         NVARCHAR(MAX),
            @StrS         NVARCHAR(MAX),
            @position     FLOAT
    
    SET @Stra = ISNULL(@Stra, 0)
    SET @Strb = ISNULL(@Strb, 0)
    SET @PosWeight = ISNULL(@PosWeight, 0.5)
    SET @ContainsAsTrue = ISNULL(@ContainsAsTrue, 0)
    SET @IgnoreNul = ISNULL(@IgnoreNul, 1)
    
    
    IF  (@IgnoreNul=1)
    BEGIN
         SET @Stra =  REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(@Stra,' ',''), CHAR(10), ''), CHAR(13), ''), CHAR(9), ''), ' ', '')
         SET @Strb = REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(@Strb,' ',''), CHAR(10), ''), CHAR(13), ''), CHAR(9), ''), ' ', '')
    END
    
    IF (LEN(@Stra) > LEN(@Strb))
    BEGIN
        SET @StrL = @Stra        
        SET @StrS = @Strb
    END
    ELSE
    BEGIN
        SET @StrL = @Strb
        SET @StrS = @Stra
    END 
    
    IF (@ContainsAsTrue = 1 AND CHARINDEX(@StrS, @StrL) > 0)
    BEGIN
        RETURN 1
    END
    
    SET @len = 1
    SET @num = 0
    SET @position = 0
    WHILE (LEN(@StrL) <> 0 AND LEN(@StrS) <> 0)
    BEGIN
        WHILE (@len <= LEN(@StrS))
        BEGIN
            DECLARE @a NVARCHAR(4)
            SET @a = ''
            SET @a = SUBSTRING(@StrS, @len, 1)
            IF (CHARINDEX(@a, @StrL) > 0)
            BEGIN
                SET @num = @num + 1
                DECLARE @maxIndex INT
                SET @maxIndex = @len 
                IF (CHARINDEX(@a, @StrL) > @maxIndex)
                BEGIN
                    SET @maxIndex = CHARINDEX(@a, @StrL)
                END
                
                SET @position = @position + (1 -ABS(@len -CHARINDEX(@a, @StrL, @len)) * 1.0 / @maxIndex)
            END
            
            SET @len = @len + 1
        END
        SET @num = (@num * (1 -@PosWeight) + @position * @PosWeight) / LEN(@StrL)
        BREAK
    END
    
    RETURN @num
END
  
----测试代码
SELECT dbo.FN_StrCompare('张三', '张三A', NULL, 0, NULL); 
--0.666666666666667
 
SELECT dbo.FN_StrCompare('1234', '4321', 0.01, 1, NULL); 
--0.992291666666667
 
SELECT dbo.FN_StrCompare('36cfe88476f046198c1a99f9f7f3a1e5', '00015e9443434a178df1a6dc9d787fa3', 0.99, 0, NULL);
--0.604230480461777

 

posted @ 2023-10-17 15:13  孔小爽  阅读(243)  评论(0编辑  收藏  举报
作者:孔小爽 引用请标明出处:https://www.cnblogs.com/kongxiaoshuang/