高效比对,返回最短编辑距离算法匹配度最高的数据

        #region 高效比对返回匹配度最高的数据
        /// <summary>
        /// 高效比对返回匹配度最高的数据
        /// </summary>
        /// <param name="sourceList">源数据</param>
        /// <param name="targetList">目标数据</param>
        /// <returns></returns>
        public static List<MapToData> GetAutoMapData(List<MapToData> sourceList, List<MapToData> targetList)
        {
            #region 高效计算匹配
            List<MapToData> resultList = new List<MapToData>();
            Parallel.For(0, sourceList.Count, i =>
            {
                var sourceValue = sourceList[i].key;
                foreach (var item in targetList)
                {
                    var targetValue = item.key;
                    var jsonObject = item.value;
                    int matchNum = LevenshteinDistance(sourceValue, targetValue);
                    resultList.Add(new MapToData { key = sourceValue, value = jsonObject, match = matchNum });
                }
            });

            var q = from p in resultList
                    where p != null
                    orderby p.match descending
                    group new { p.key, p.value, p.match } by p.key into g
                    select new MapToData
                    {
                        key = g.FirstOrDefault().key,
                        value = g.FirstOrDefault().value,
                        match = g.FirstOrDefault().match
                    };
            return q.ToList();
            #endregion
        }
        #endregion

        #region LD最短编辑距离算法

        /// <summary>
        /// LD最短编辑距离算法
        /// </summary>
        /// <param name="source">源字符串</param>
        /// <param name="target">目标字符串</param>
        /// <returns></returns>
        public static int LevenshteinDistance(string source, string target)
        {
            int cell = source.Length;
            int row = target.Length;
            if (cell == 0)
            {
                return row;
            }
            if (row == 0)
            {
                return cell;
            }
            int[,] matrix = new int[row + 1, cell + 1];
            for (var i = 0; i <= cell; i++)
            {
                matrix[0, i] = i;
            }
            for (var j = 1; j <= row; j++)
            {
                matrix[j, 0] = j;
            }
            var tmp = 0;
            for (var k = 0; k < row; k++)
            {
                for (var l = 0; l < cell; l++)
                {
                    if (source[l].Equals(target[k]))
                        tmp = 0;
                    else
                        tmp = 1;
                    matrix[k + 1, l + 1] = Math.Min(Math.Min(matrix[k, l] + tmp, matrix[k + 1, l] + 1), matrix[k, l + 1] + 1);
                }
            }
            return matrix[row, cell];
        }
        #endregion

    public class MapToData
    {
        /// <summary>
        /// 要匹配的字符串
        /// </summary>
        public string key = "";

        /// <summary>
        /// 匹配的结果
        /// </summary>
        public object value = new object();
        /// <summary>
        /// 匹配度
        /// </summary>
        public int match = 0;
    }
posted @ 2016-09-23 14:27  深南大道  阅读(205)  评论(0编辑  收藏  举报