字符串匹配算法KMP
KMP算法的原理,这里我不写,建议参考:
http://www.ruanyifeng.com/blog/2013/05/Knuth%E2%80%93Morris%E2%80%93Pratt_algorithm.html
http://blog.csdn.net/v_JULY_v/article/details/6545192
http://blog.csdn.net/joylnwang/article/details/6778316
这篇博客写的非常好,以本人的能力只能写出更烂的解释 .....所以就不再多此一举了,下面仅仅给出两段代码,分别用C++和Python写的(都已经编译通过),
希望能对大家有所帮助:
C++代码:
#include <iostream> #include <cstring> using namespace std; bool piFun(char *pattern, int *pi) //pattern表示模式字符串,pi表示失配函数 { if (pattern == NULL || pi == NULL) return false ; int len = strlen(pattern) ; pi[ 0 ] = -1 ; int k = -1 ; for (int i = 1; i < len ; ++ i) { while ( k >= 0 && pattern[k + 1] != pattern[i]) k = pi[k] ; //寻找一个满足条件的前缀,使得该前缀是pattern[0 ~ i -1]的一个后缀 if ( pattern[k + 1] == pattern[i]) ++ k; //能够匹配的前缀和后缀的长度增加1 pi[i] = k ; //失配后开始查找的位置 } return true ; } int matchStr(char *text, char*pattern) { if (text == NULL || pattern == NULL) return -1 ; int lenT = strlen(text) ; int lenP = strlen(pattern) ; int *pi = new int[lenP] ; if (!piFun(pattern, pi)) return - 1; int k = -1 ; for (int i = 0; i < lenT; ++ i) { while(k >= 0 && pattern[k + 1] != text[i]) k = pi[k] ; //下一个字符不能匹配 if (pattern[k + 1] == text[i]) ++ k ; //匹配上下一个字符 if (k == lenP - 1) //已经找到可以匹配的pattern { delete[] pi ; return (i - lenP + 1) ; //得到开始出现的第一个位置 } } delete[] pi ; return - 1 ; } int main(int argc, char **argv) { char p[] = "ababbacdefgacd" ; cout << matchStr(p, "aba") << endl; cout << matchStr(p, "ba") << endl; cout << matchStr(p, "efg") << endl; cout << matchStr(p, "addf") << endl; return 0 ; }
#!/usr/bin/python #Filename KMP.py def piFun(pattern, pi): if len(pattern) == 0: return False del pi[:] pi.extend(-1 for i in range(0, len(pattern))) pi[0] = -1 k = -1 for i in range(1, len(pattern)) : while k >= 0 and pattern[k + 1] != pattern[i] : k = pi[k] if pattern[k + 1] == pattern[i] : k = k + 1 pi[i] = k return True def matchStr(text, pattern) : if len(text) == 0 or len(pattern) == 0 : return -1 pi = [] if not piFun(pattern, pi) : return -1 k = -1 for i in range(0, len(text)) : while k >= 0 and pattern[k + 1] != text[i] : k = pi[k] if pattern[k + 1] == text[i] : k = k + 1 if k == len(pattern) - 1 : return (i - len(pattern) + 1) return -1 print(matchStr('abcefabdefcnfghiorpnpp', 'ce')) print(matchStr('abcefabdefcnfghiorpnpp', 'ab')) print(matchStr('abcefabdefcnfghiorpnpp', 'ef')) print(matchStr('abcefabdefcnfghiorpnpp', 'npp')) print(matchStr('abcefabdefcnfghiorpnpp', 'ok'))