C#/.NET计算文本相似度

 

看到的一篇文本匹配程度程序,記一下

public static double Sim(string txt1, string txt2)
{
List<char> sl1 = txt1.ToCharArray().ToList();
List<char> sl2 = txt2.ToCharArray().ToList();
//去重
List<char> sl = sl1.Union(sl2).ToList<char>();

//获取重复次数
List<int> arrA = new List<int>();
List<int> arrB = new List<int>();
foreach (var str in sl)
{
arrA.Add(sl1.Where(x => x == str).Count());
arrB.Add(sl2.Where(x => x == str).Count());
}
//计算商
double num = 0;
//被除数
double numA = 0;
double numB = 0;
for (int i = 0; i < sl.Count; i++)
{
num += arrA[i] * arrB[i];
numA += Math.Pow(arrA[i], 2);
numB += Math.Pow(arrB[i], 2);
}
double cos = num / (Math.Sqrt(numA) * Math.Sqrt(numB));
return cos;
}

posted @ 2020-04-20 13:19  刘中俊  阅读(1097)  评论(0编辑  收藏  举报