C语言实现KMP模式匹配算法
next:
/*! * Description: * author scictor <scictor@gmail.com> * date 2018/7/4 */ #include <stdio.h> #include <stdlib.h> #include <string.h> // https://tekmarathon.com/2013/05/14/algorithm-to-find-substring-in-a-string-kmp-algorithm/ /*What is Partial Match Table? It is an array of size (pattern_length+1) where, for each position of i in pattern p, b[i] is defined such that it takes the ‘length of the longest proper suffix of P[1…i-1]’ that matches with the ‘prefix of P’. What is longest prefix/suffix match??? Proper prefix is a prefix which is not same as the substring. Recall proper set which is a subset of a set but is not same as the set. Why a prefix should match suffix of the pattern? its because when we shift the pattern its the prefix of P which comes towards the suffix. And also the key idea is that if we have successfully matched prefix P[1…i-1] of the pattern with the substring T[j-(i-1)…j-1] of the input string and P(i)!=T(j), then we dont need to reprocess any of the suffix T[j-(i-1)…j-1] since we know this portion of the text string is the prefix of the pattern that we have just matched. */ //"ababacb" /** * Pre processes the pattern array based on proper prefixes and proper * suffixes at every position of the array * * @param ptrn * word that is to be searched in the search string * @return partial match table which indicates */ void kmp_next(const char *pattern, int patternLen, int *next) { int i = 0, j = -1; next[i] = j; // default next[0] = -1 while (i < patternLen) { while (j >= 0 && pattern[i] != pattern[j]) { // if there is mismatch consider the next widest border // The borders to be examined are obtained in decreasing order from // the values b[i], b[b[i]] etc. j = next[j]; } i++; j++; next[i] = j; } for(int index = 0; index < patternLen; ++index) printf("%d ", next[index]); return; } /** * Based on the pre processed array, search for the pattern in the text * * @param text * text over which search happens * @param ptrn * pattern that is to be searched */ //int matchIndex[128] = {0}; int kmp_search(const char *text, int textLen, const char *pattern, int patternLen) { int i = 0, j = 0; // initialize new array and preprocess the pattern int next[patternLen + 1]; memset(next, 0x00, sizeof(next)); // int idx = 0; // memset(matchIndex, 0x00, sizeof(matchIndex)); kmp_next(pattern, patternLen, next); while (i < textLen) { while (j >= 0 && text[i] != pattern[j]) { j = next[j]; } i++; j++; // a match is found // if (j == patternLen) { // printf("found substring at index:" + (i - patternLen)); // j = next[j]; // } if (j == patternLen) { printf("found substring at index:%d", (i - patternLen)); //j = next[j]; //matchIndex[idx++] = i - patternLen; return (i - patternLen); } } // for(int k = 0; k < idx; ++k) // { // printf("%d ", matchIndex[k]); // } return -1; } /* Index 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 Text(T) a b c a b d a b c a b d a b c a b d a b d a b c Pattern(P) a b c a b d a b c PMT (next[i]) -1 0 0 0 1 2 0 1 2 3 */
// 4ms int kmp(const char *text, int textLen, const char *pattern, int patternLen) { int *T; int i, j; if (pattern[0] == '\0') return 0; // Construct the lookup table T = (int*) malloc( (patternLen + 1) * sizeof(int) ); T[0] = -1; for (i=0; pattern[i] != '\0'; i++) { T[i+1] = T[i] + 1; while (T[i+1] > 0 && pattern[i] != pattern[T[i+1]-1]) T[i+1] = T[T[i+1]-1] + 1; } for(int k = 0; k < patternLen; ++k) printf("%d ", T[k]); // Perform the search for (i=j=0; text[i] != '\0'; ) { if (j < 0 || text[i] == pattern[j]) { ++i, ++j; if (pattern[j] == '\0') { return (i-j); } } else j = T[j]; } free(T); return -1; } /*const char *kmp_search(const char *text, const char *pattern) { int *T; int i, j; const char *result = NULL; if (pattern[0] == '\0') return text; // Construct the lookup table T = (int*) malloc((strlen(pattern)+1) * sizeof(int) ); T[0] = -1; for (i=0; pattern[i] != '\0'; i++) { T[i+1] = T[i] + 1; while (T[i+1] > 0 && pattern[i] != pattern[T[i+1]-1]) T[i+1] = T[T[i+1]-1] + 1; } // Perform the search for (i=j=0; text[i] != '\0'; ) { if (j < 0 || text[i] == pattern[j]) { ++i, ++j; if (pattern[j] == '\0') { result = text+i-j; break; } } else j = T[j]; } free(T); return result; } */
nextval:
void preKmp(char *x, int m, int kmpNext[]) { int i, j; i = 0; j = kmpNext[0] = -1; while (i < m) { while (j > -1 && x[i] != x[j]) j = kmpNext[j]; i++; j++; if (x[i] == x[j]) kmpNext[i] = kmpNext[j]; else kmpNext[i] = j; } } // text:y, len: n, match parttern:x, len: m int KMP(char *y, int n, char *x, int m) { int i, j, kmpNext[m]; /* Preprocessing */ preKmp(x, m, kmpNext); /* Searching */ i = j = 0; while (j < n) { while (i > -1 && x[i] != y[j]) i = kmpNext[i]; i++; j++; // if (i >= m) { // printf("j-i=%d\n",j - i); // i = kmpNext[i]; // } if (i == m) { //printf("j-i=%d\n",j - i); return j - i; //i = kmpNext[i]; } } return -1; } int strStr(char* haystack, char* needle) { int needleLen = strlen(needle); if(needleLen == 0) return 0; int hayLen = strlen(haystack); if(hayLen == 0) return -1; return KMP(haystack, hayLen, needle, needleLen); }
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· SQL Server 2025 AI相关能力初探
· Linux系列:如何用 C#调用 C方法造成内存泄露
· AI与.NET技术实操系列(二):开始使用ML.NET
· 记一次.NET内存居高不下排查解决与启示
· 探究高空视频全景AR技术的实现原理
· 阿里最新开源QwQ-32B,效果媲美deepseek-r1满血版,部署成本又又又降低了!
· 单线程的Redis速度为什么快?
· SQL Server 2025 AI相关能力初探
· AI编程工具终极对决:字节Trae VS Cursor,谁才是开发者新宠?
· 展开说说关于C#中ORM框架的用法!