字符串匹配算法KMP

1. 求数组next[j]的算法如下,它只与模式串有关,与目标串无关

 1 void cal_next(char *p, int *next, int len)
 2 {//important!!  array rangs from 0 to len-1
 3     int i = -1, j = 0;//i == -1 means back to the first elem of p[];
 4     next[0] = -1;
 5     //except first element, next[j]=k means that before j, there are k elem matched, 
 6     while(j < len)
 7     {
 8         if(i == -1 || p[i] == p[j])
 9         {//if p[j] machted, we know that before j+1, there are i elem matched
10             i++;
11             j++;
12             next[j] = i;
13         }
14         else
15         {//go back;
16             i = next[i];
17         }
18     }
19 }
20 //e.g1:   p[]   a b c a b c d e
21 //    next[j]  -1 0 0 0 1 2 3 0
22 //e.g2:   p[]   a b a a b c a c
23 //    next[j]  -1 0 0 1 1 2 0 1

 

2. KMP算法

 1 int KMP(char* s, char *p, int len)
 2 {   //array starts with 0;
 3     //p is par_string, s is des_string, len is the length of p[];
 4     int i = 0, j = 0;//i point to s[], j point to p[]
 5     while(s[i] != '\0' && j < len)
 6     {//not meet the end of both arrays
 7         if(j == -1 || p[j] == s[i])
 8         {//matched
 9             i++;
10             j++;
11         }
12         else
13         {//mismatched
14             j = next[j];
15         }
16     }
17     if(j == len)
18     {//all matched!
19         printf("\ni = %d, j = %d, match point = %d\n", i, j, i - j);
20         return i - j;
21     }
22     else
23     {//fail to match!
24         printf("no match!\n");
25         return -1;
26     }
27 }

 

3. Test program (C) 

 1 #include <stdio.h>
 2 #include <stdlib.h>
 3 #define N 100
 4 int next[N];
 5 void cal_next(char *p, int *next, int len)
 6 {//array rangs from 1 to len-1
 7     int i = -1, j = 0;//i == 0 is invalid
 8     next[0] = -1;
 9     //except first element,
10     //next[j]=k means that befor j, there are k-1 elem matched
11     while(j < len)
12     {
13         if(i == -1 || p[i] == p[j])
14         {
15             i++;
16             j++;
17             next[j] = i;
18         }
19         else
20         {
21             i = next[i];
22         }
23     }
24     //print next[]
25     printf(" %s\n", p);
26     for(i = 0; i < strlen(p); i++)
27     {
28         printf("%d", next[i]);
29     }
30 }
31 
32 int KMP(char* s, char *p, int len)
33 {   //array starts with 0;
34     //p is par_string, s is des_string, len is the length of p[];
35     int i = 0, j = 0;//i point to s[], j point to p[]
36     while(s[i] != '\0' && j < len)
37     {//not meet the end of both arrays
38         if(j == -1 || p[j] == s[i])
39         {//matched
40             i++;
41             j++;
42         }
43         else
44         {//mismatched
45             j = next[j];
46         }
47     }
48     if(j == len)
49     {//all matched!
50         printf("\ni = %d, j = %d, match point = %d\n", i, j, i - j);
51         return i - j;
52     }
53     else
54     {//fail to match!
55         printf("\n miss match!\n");
56         return -1;
57     }
58 }
59 
60 int main()
61 {
62     char *p = "abcabcde";//par_string
63     char *s = "abcdabceabcabcdefabc";//des_string
64     cal_next(p, next, strlen(p));
65     KMP(s, p, strlen(p));
66     return 0;
67 }
posted @ 2013-03-12 17:29  mender  阅读(166)  评论(1编辑  收藏  举报