# -*- coding: utf-8 -*-
"""
Created on 2015/7/7  10:08
使用动态规划算法实现编辑距离的计算
@author: Wang Xu
"""
import numpy as np


class LevenshteinDistance:
    def leDistance(self, input_x, input_y):
        xlen = len(input_x) + 1  # 此处需要多开辟一个元素存储最后一轮的计算结果
        ylen = len(input_y) + 1

        dp = np.zeros(shape=(xlen, ylen), dtype=int)
        for i in range(0, xlen):
            dp[i][0] = i
        for j in range(0, ylen):
            dp[0][j] = j

        for i in range(1, xlen):
            for j in range(1, ylen):
                if input_x[i - 1] == input_y[j - 1]:
                    dp[i][j] = dp[i - 1][j - 1]
                else:
                    dp[i][j] = 1 + min(dp[i - 1][j], dp[i][j - 1], dp[i - 1][j - 1])
        return dp[xlen - 1][ylen - 1]


if __name__ == '__main__':
    ld = LevenshteinDistance()
    print(ld.leDistance('瓦罐蹄膀饭', '瓦罐焖蹄饭'))  # Prints 2
    print(ld.leDistance('', 'a'))   # Prints 1
    print(ld.leDistance('b', ''))   # Prints 1
    print(ld.leDistance('', ''))    # Prints 0
    print(ld.leDistance('杭椒小炒肉面', '外婆小肉面'))  # Prints 3
    print(ld.leDistance('外婆小肉面', '杭椒小炒肉面'))  # Prints 3

  来自:http://codepub.cn/2015/07/07/Python-implementation-string-similarity-edit-distance/

posted on 2016-08-08 15:52  小白闯天下  阅读(294)  评论(0编辑  收藏  举报