字符串相似度的算法(sqlserver转和c#)
(
@word1 varchar(50),
@word2 varchar(50)
)
returns nvarchar(4000)
as
begin
declare @re int
declare @maxLenth int
declare @i int,@l int
declare @tb1 table(child varchar(50))
declare @tb2 table(child varchar(50))
set @i=1
set @l=2
set @maxLenth=len(@word1)
if len(@word1)<len(@word2)
begin
set @maxLenth=len(@word2)
end
while @l<=len(@word1)
begin
while @i<len(@word1)-1
begin
insert @tb1 (child) values( SUBSTRING(@word1,@i,@l) )
set @i=@i+1
end
set @i=1
set @l=@l+1
end
set @i=1
set @l=2
while @l<=len(@word2)
begin
while @i<len(@word2)-1
begin
insert @tb2 (child) values( SUBSTRING(@word2,@i,@l) )
set @i=@i+1
end
set @i=1
set @l=@l+1
end
select @re=isnull(max( len(a.child)*100/ @maxLenth ) ,0) from @tb1 a, @tb2 b where a.child=b.child
return @re
end
--测试
--select dbo.get_semblance_By_2words('我是谁','我是谁啊')
--75
--相似度
c#------------------------------------------------------
using System;
using System.Collections.Generic;
using System.Text;
namespace ConsoleApplication6
{
class semblance
{
static void Main(string[] args)
{
string re= get_semblance_By_2words("我是谁", "我是谁啊");
Console.WriteLine(re);
Console.ReadLine();
}
public static string get_semblance_By_2words(string word1, string word2)
{
int re = 0;
int maxLength;
int i, l;
List<string> tb1 = new List<string>();
List<string> tb2 = new List<string>();
i = 0;
l = 1;
maxLength = word1.Length;
if (word1.Length < word2.Length)
maxLength = word2.Length;
while (l <= word1.Length)
{
while (i < word1.Length - 1)
{
if (i + l > word1.Length)
break;
tb1.Add(word1.Substring(i, l));
i++;
}
i = 0;
l++;
}
i = 0;
l = 1;
while (l <= word2.Length)
{
while (i < word2.Length - 1)
{
if (i + l > word2.Length)
break;
tb2.Add(word2.Substring(i, l));
i++;
}
i = 0;
l++;
}
foreach (string subStr in tb1)
{
int tempRe = 0;
if (tb2.Contains(subStr))
{
tempRe = subStr.Length * 100 / maxLength;
if (tempRe > re)
re = tempRe;
if (tempRe == 100)
break;
}
}
return re.ToString()+"%";
}
}
}