KMP与sunday的比较

有关sunday算法的讲解：http://blog.csdn.net/caianye/article/details/6096610

以下转载请注明出处 by CrazyAC

1.求模式串出现在文本串的第一个位置

情况1：

char src[]="jfkdsahdiojdaigfgthlipjgffg";
char des[]="gffg";

KMP：（匹配了49次）

#include <iostream>
using namespace std;

char src[]="jfkdsahdiojdaigfgthlipjgffg";
char des[]="gffg";
int next[10];
int n, m;


void getNext() {
    int i, j;
    i = 0;
    j = -1;
	n = strlen( src );
	m = strlen( des );
    next[0] = -1;
    while( i < m ) {
        if( j == -1 || des[i] == des[j]) {
            i ++;
            j ++;
            next[i] = j;
        } else {
            j = next[j];
        }
    }
}


void solve() {
    int i, j;
    i = j = 0;
	int cnt = 0;
    while( i < n && j < m ) {
		cnt ++;
        if( j == -1 || src[i] == des[j] ) {
            i ++;
            j ++;
        } else {
            j = next[j];
        }
    }
    if( j == m ) printf( "%d\n", i-j+1 );
    else printf( "-1\n" );
	printf( "cnt = %d\n", cnt );
}


int main() {
//    freopen( "c:/aaa.txt", "r", stdin );
    getNext();
    solve();
    return 0;
}

sunday: (匹配了10次）

#include <iostream>
using namespace std;


char src[]="jfkdsahdiojdaigfgthlipjgffg";
char des[]="gffg";
int next[26], cnt;


int sunday() {
	int i, j, pos, sum;
	int len_s = strlen( src );
	int len_d = strlen( des );

	for( i=0; i<26; ++i ) next[i] = len_d + 1;
	for( i=0; i<len_d; ++i ) next[des[i]-'a'] = len_d - i;

	pos = sum = 0;
	while( pos < (len_s-len_d+1) ) {
		for( i=0; i<len_d; ++i ) {
			++ cnt;
			if( src[pos+i] != des[i] ) {
				pos += next[ src[pos+len_d] - 'a'];
				break;
			}
		}
		if( i == len_d ) return pos;
	}
	return -1;
}


int main() {
    cnt = 0;
	printf( "%d\n", sunday() );
	printf( "cnt = %d\n", cnt );
	return 0;
}

可见sunday的优越性。但是如果测试数据为

char src[]="aaaaaaaaaaaaaaaaaaaaba";
char des[]="aaaaaaaba";

KMP匹配了35次，sunday匹配了105次！、

2.求模式串在文本串中出现的次数。hdoj 1686

sunday超时

#include <iostream>
#include <cstring>
#include <cstdio>
using namespace std;


char src[1000005], des[10005], next[26];

int sunday() {
	int i, j, pos, sum;
	int len_s = strlen( src );
	int len_d = strlen( des );

	for( i=0; i<26; ++i ) next[i] = len_d + 1;
	for( i=0; i<len_d; ++i ) next[des[i]-'A'] = len_d - i;

	pos = sum = 0;
	while( pos < (len_s-len_d+1) ) {
		for( i=0; i<len_d; ++i ) {
			if( src[pos+i] != des[i] ) {
				pos += next[ src[pos+len_d] - 'A'];
				break;
			}
		}
		if( i == len_d ) {
			++sum;
			if( pos + len_d == len_s ) break;
			pos += next[ src[pos+len_d] - 'A' ];
		}
	}
	return sum;
}


int main() {
//	freopen( "c:/aaa.txt", "r", stdin);
	int T;
	scanf( "%d", &T );
	while( T-- ) {
		scanf( "%s %s", des, src );
		printf( "%d\n", sunday() );
	}
	return 0;
}

KMP: 93MS

#include <iostream>
#include <cstdio>
#include <cstring>
using namespace std;


char src[1000010], des[10010];
int next[10010];
int len_d, len_s;

void getNext() {
    int i, j;
    i = 0;
    j = -1;
    next[0] = -1;
    while( i<len_d ) {
        if( j == -1 || des[i] == des[j] ) {
            i ++;
            j ++;
            next[i] = j;
        } else {
            j = next[j];
        }
    }
}


void solve() {
    int i, j, sum = 0;
    i = j = 0;
    while( i < len_s ) {
        if( j == -1 || src[i] == des[j] ) {
            i ++;
            j ++;
        } else {
            j = next[j];
        }
        if( j == len_d ) {
            ++ sum;
            j = next[j];
        }
    }
    printf( "%d\n", sum );
}


int main() {
 //   freopen( "c:/aaa.txt", "r", stdin);
    int T;
    scanf( "%d", &T );
    while( T-- ) {
        scanf( "%s %s", des, src );
        len_d = strlen( des );
        len_s = strlen( src );
        getNext();
        solve();
    }
    return 0;
}

综上所述，在信息学竞赛中sunday算法的优势得不到体现，那是因为ACM比赛对时间的重视，所以测试数据会卡你时间，而这些卡你时间的测试数据又往往把sunday给卡住了

就像上面这组数据

char src[]="aaaaaaaaaaaaaaaaaaaaba";
char des[]="aaaaaaaba";

根据sunday的原理，当一直匹配到b时，

aaaaaaaaaaaaaaaaaaaaba

aaaaaaaba

发生不匹配，根据sunday中，移动步长=匹配串中最右端的该字符到末尾的距离+1 ，而最右端是‘a'，’a'这个字符到末尾的距离为0，所以步长为1，

又要从第二个'a'开始匹配

aaaaaaaaaaaaaaaaaaaaba

aaaaaaaba

如此和暴力就相差无几了。

对KMP来说，当到b不匹配时，i = j = 7，然后，j = next[j] = 6，所以，下次只是对src[i]与des[j]进行比较

aaaaaaaaaaaaaaaaaaaaba

aaaaaaaba

前面那一串红色的a的比较就可以省掉了，而sunday是没有省掉的。

posted on 2011-02-20 10:28 CrazyAC 阅读(2338) 评论(0) 收藏举报

刷新页面返回顶部

KMP与sunday的比较

综上所述，在信息学竞赛中sunday算法的优势得不到体现，那是因为ACM比赛对时间的重视，所以测试数据会卡你时间，而这些卡你时间的测试数据又往往把sunday给卡住了

CrazyAC

公告

导航