【数据结构】KMP 算法

受阮一峰博客关于KMP字符串匹配算法启发,实现python的kmp实现, 比较清楚明白
地址为 http://www.ruanyifeng.com/blog/2013/05/Knuth–Morris–Pratt_algorithm.html

def next_arr(s: str) -> list:
    """
    计算 《部分匹配表》
    https://www.cnblogs.com/dahu-daqing/p/9302668.html
    http://www.ruanyifeng.com/blog/2013/05/Knuth%E2%80%93Morris%E2%80%93Pratt_algorithm.html
    :param s:
    :return:
    """

    def calc(p: str) -> int:
        if len(p) > 1:

            # "前缀"指除了最后一个字符以外,一个字符串的全部头部组合
            prefix_com = [p[:i + 1] for i in range(len(p) - 1)]
            # "后缀"指除了第一个字符以外,一个字符串的全部尾部组合
            suffix_com = [p[i + 1:] for i in range(len(p) - 1)]

            common_has_len = 0  # 共有元素的长度

            for p_str in prefix_com:
                for j_str in suffix_com:
                    if p_str == j_str:
                        common_has_len = len(j_str)
            return common_has_len
        else:
            return 0

    tmp = []
    for i in range(len(s)):
        t = s[:i + 1]
        tmp.append(calc(t))
    return tmp


def bf(s: str, p: str) -> int:
    ret = -1
    """

    :param s: 目标串
    :param p: 模式串
    :return:
    """
    if len(s) < len(p): return ret

    k = 0
    k_end = len(p)

    while k <= len(s) - len(p):
        ts = s[k:k_end]
        if ts == p:
            ret = k
            break
        else:
            k += 1
            k_end += 1

    return ret


def bf_example():
    s = "BBC ABCDAB ABCDABCDABDE"
    p = "ABCDABD"
    print(bf(s, p))  # 输出匹配成功15的下标


def kmp(s: str, p: str) -> int:
    next = next_arr(p)

    ret = -1

    sl = list(s)
    pl = list(p)

    if len(sl) < len(pl): return ret

    start = 0

    while start <= len(sl) - len(pl):

        eq_num = 0  # 已匹配的字符数
        for i in range(len(pl)):
            if pl[i] != sl[start + i]:
                break
            else:
                eq_num += 1

        if eq_num == 0:
            start += 1
        elif eq_num == len(pl):
            # 全部匹配成功
            ret = start
            break
        else:
            # 模式串不匹配字符的前一个字符
            next_num = next[eq_num - 1]
            # 移动位数 = 已匹配的字符数 - 对应的部分匹配值
            start += (eq_num - next_num)

    return ret


if __name__ == '__main__':
    s = "BBC ABCDAB ABCDABCDABDE"
    p = "ABCDABD"
    print(kmp(s, p))

posted @ 2019-07-19 15:43  加州风尘  阅读(188)  评论(0编辑  收藏  举报