字符串匹配算法KMP
1. 求数组next[j]的算法如下,它只与模式串有关,与目标串无关
1 void cal_next(char *p, int *next, int len) 2 {//important!! array rangs from 0 to len-1 3 int i = -1, j = 0;//i == -1 means back to the first elem of p[]; 4 next[0] = -1; 5 //except first element, next[j]=k means that before j, there are k elem matched, 6 while(j < len) 7 { 8 if(i == -1 || p[i] == p[j]) 9 {//if p[j] machted, we know that before j+1, there are i elem matched 10 i++; 11 j++; 12 next[j] = i; 13 } 14 else 15 {//go back; 16 i = next[i]; 17 } 18 } 19 } 20 //e.g1: p[] a b c a b c d e 21 // next[j] -1 0 0 0 1 2 3 0 22 //e.g2: p[] a b a a b c a c 23 // next[j] -1 0 0 1 1 2 0 1
2. KMP算法
1 int KMP(char* s, char *p, int len) 2 { //array starts with 0; 3 //p is par_string, s is des_string, len is the length of p[]; 4 int i = 0, j = 0;//i point to s[], j point to p[] 5 while(s[i] != '\0' && j < len) 6 {//not meet the end of both arrays 7 if(j == -1 || p[j] == s[i]) 8 {//matched 9 i++; 10 j++; 11 } 12 else 13 {//mismatched 14 j = next[j]; 15 } 16 } 17 if(j == len) 18 {//all matched! 19 printf("\ni = %d, j = %d, match point = %d\n", i, j, i - j); 20 return i - j; 21 } 22 else 23 {//fail to match! 24 printf("no match!\n"); 25 return -1; 26 } 27 }
3. Test program (C)
1 #include <stdio.h> 2 #include <stdlib.h> 3 #define N 100 4 int next[N]; 5 void cal_next(char *p, int *next, int len) 6 {//array rangs from 1 to len-1 7 int i = -1, j = 0;//i == 0 is invalid 8 next[0] = -1; 9 //except first element, 10 //next[j]=k means that befor j, there are k-1 elem matched 11 while(j < len) 12 { 13 if(i == -1 || p[i] == p[j]) 14 { 15 i++; 16 j++; 17 next[j] = i; 18 } 19 else 20 { 21 i = next[i]; 22 } 23 } 24 //print next[] 25 printf(" %s\n", p); 26 for(i = 0; i < strlen(p); i++) 27 { 28 printf("%d", next[i]); 29 } 30 } 31 32 int KMP(char* s, char *p, int len) 33 { //array starts with 0; 34 //p is par_string, s is des_string, len is the length of p[]; 35 int i = 0, j = 0;//i point to s[], j point to p[] 36 while(s[i] != '\0' && j < len) 37 {//not meet the end of both arrays 38 if(j == -1 || p[j] == s[i]) 39 {//matched 40 i++; 41 j++; 42 } 43 else 44 {//mismatched 45 j = next[j]; 46 } 47 } 48 if(j == len) 49 {//all matched! 50 printf("\ni = %d, j = %d, match point = %d\n", i, j, i - j); 51 return i - j; 52 } 53 else 54 {//fail to match! 55 printf("\n miss match!\n"); 56 return -1; 57 } 58 } 59 60 int main() 61 { 62 char *p = "abcabcde";//par_string 63 char *s = "abcdabceabcabcdefabc";//des_string 64 cal_next(p, next, strlen(p)); 65 KMP(s, p, strlen(p)); 66 return 0; 67 }