fft 与字符串匹配

规定模式串为 \(S\)\(T\) , 且 \(|S| \ge |T|\)

1.正常版

定义匹配函数 \(p(S,T)=(S-T)^2\)

那么对于 \(\displaystyle h(r)=\sum_{i=0}^{|T|-1} p(S_{r-(|T|-1-i)},T_i)\),若 \(h(r)\)\(0\) 则在 \(r\) 位置完全匹配。

尝试展开 \(h\):

\[\begin{aligned} h(r) &= \sum_{i=0}^{|T|-1} p(S_{r-(|T|-1-i)},T_i) \\ &= \sum_{i=0}^{|T|-1} p(S_{r-|T|+1+i},T_i) \\ &= \sum_{i=0}^{|T|-1} (S_{r-|T|+1+i})^2 - 2S_{r-|T|+1+i}T_i + T_i^2 \\ &= \sum_{i=0}^{|T|-1} (S_{r-|T|+1+i})^2 + T_i^2 - \sum_{i=0}^{|T|-1} 2S_{r-|T|+1+i}T_i \end{aligned}\]

第一个和式可以很快处理出来。

第二个和式发现下标 \(i\) 均为加,所以考虑反转 \(T\) 串写成卷积形式

为了不考虑下标问题,\(T\) 串外的部分的值看作 \(0\) , 这样就不会对答案产生影响。

\[\begin{aligned} h(r) &= \sum_{i=0}^{|T|-1} (S_{r-|T|+1+i})^2 + T_i^2 - \sum_{i=0}^{|T|-1} 2S_{r-|T|+1+i}T_{|T|-1-i} \\ &= \sum_{i=0}^{|T|-1} (S_{r-|T|+1+i})^2 + T_i^2 - \sum_{i=0}^{r} 2S_{r-i}T_{i} \end{aligned}\]

复杂度 \(\mathcal O(n \log n)\)

2.有通配符 P4173 残缺的字符串

为了消除通配符的影响,定义通配符的值为 \(0\)

定义匹配函数 \(p(S,T)=(S-T)^2ST\) , \(h\) 同上,同理得:

\[\begin{aligned} h(r) &= \sum_{i=0}^{|T|-1} p(S_{r-(|T|-1-i)},T_i) \\ &= \sum_{i=0}^{|T|-1} p(S_{r-|T|+1+i},T_i) \\ &= \sum_{i=0}^{|T|-1} (S_{r-|T|+1+i})^3T_i - 2(S_{r-|T|+1+i})^2T_i^2 + T_i^3S_{r-|T|+1+i} \\ \end{aligned}\]

同 1 的套路反转 \(T\)

\[\begin{aligned} h(r) &= \sum_{i=0}^{|T|-1} (S_{r-|T|+1+i})^3T_{|T|-1-i} - 2(S_{r-|T|+1+i})^2(T_{|T|-1-i})^2 + (T_{|T|-1-i})^3S_{r-|T|+1+i} \\ &= \sum_{i=0}^{r} (S_{r-i})^3T_{i} - 2(S_{r-i})^2T_{i}^2 + (T_{i})^3S_{r-i} \end{aligned}\]

三次多项式乘法即可完成。

#include <cmath>
#include <cstdio>
#include <vector>
#include <cstring>
#include <iostream>
#include <algorithm>
using namespace std;
#define pi acos( -1 )
#define eps 1e-8
//#define double long double

const int MAXN = 1.2e6;

struct Complex {
    double x , y;
	Complex(){ x = y = 0; }
	Complex( double X , double Y ) { x = X , y = Y; }
	Complex operator * ( const double &a ) const { return Complex( x * a , y * a ); } 
	Complex operator / ( const double &a ) const { return Complex( x / a , y / a ); }
    Complex operator + ( const Complex &a ) const { return Complex( x + a.x , y + a.y ); }
    Complex operator - ( const Complex &a ) const { return Complex( x - a.x , y - a.y ); }
    Complex operator * ( const Complex &a ) const { return Complex( x * a.x - y * a.y , x * a.y + y * a.x ); }
};

#define Poly vector< Complex >
#define len( x ) ( (int)x.size() )

int lim , rev[ MAXN + 5 ];
void fft( Poly &f , int op ) {
    for( int i = 0 ; i < lim ; i ++ ) if( i < rev[ i ] ) swap( f[ i ] , f[ rev[ i ] ] );
    for( int len = 2 ; len <= lim ; len <<= 1 ) {
    	Complex w( cos( 2 * pi / len ) , op * sin( 2 * pi / len ) );
		for( int l = 0 ; l < lim ; l += len ) {
			Complex wk( 1 , 0 );
			for( int i = l ; i < l + len / 2 ; i ++ , wk = wk * w ) {
				Complex t = wk * f[ i + len / 2 ];
				f[ i + len / 2 ] = f[ i ] - t; f[ i ] = f[ i ] + t; 
			}
		} 
	}
	if( op == -1 ) for( int i = 0 ; i < lim ; i ++ ) f[ i ] = f[ i ] / lim;
}
Poly operator * ( Poly f , Poly g ) {
	int n = len( f ) + len( g ) - 1; for( lim = 1 ; lim < n ; lim <<= 1 );
	for( int i = 0 ; i < lim ; i ++ ) rev[ i ] = ( rev[ i >> 1 ] >> 1 ) | ( ( i & 1 ) ? lim >> 1 : 0 );
	f.resize( lim ); g.resize( lim );
	
	fft( f , 1 ); fft( g , 1 );
 	for( int i = 0 ; i < lim ; i ++ ) f[ i ] = f[ i ] * g[ i ];
//	fft( f , -1 );
//	f.resize( n ); 
	return f;
}
Poly operator + ( Poly f , Poly g ) {
	int n = max( len( f ) , len( g ) );
	f.resize( n ); g.resize( n );
	for( int i = 0 ; i < n ; i ++ ) f[ i ] = f[ i ] + g[ i ];
	return f;
}
Poly operator - ( Poly f , Poly g ) {
	int n = max( len( f ) , len( g ) );
	f.resize( n ); g.resize( n );
	for( int i = 0 ; i < n ; i ++ ) f[ i ] = f[ i ] - g[ i ];
	while( f.size() && fabs( f.back().x ) < eps ) f.pop_back();
	return f;
}
Poly operator * ( double p , Poly f  ) {
	for( int i = 0 ; i < len( f ) ; i ++ ) f[ i ] = f[ i ] * p;
	return f;
} 
Poly operator / ( double p , Poly f ) {
	for( int i = 0 ; i < len( f ) ; i ++ ) f[ i ] = f[ i ] / p;
	return f;
}

int n , m;
char S[ MAXN + 5 ] , T[ MAXN + 5 ];

Poly f , g , h;

int main() {
//	freopen("1.in","r",stdin);
//	freopen("1.ans","w",stdout); 
	
	scanf("%d %d",&m,&n);
	f.resize( n ); g.resize( m );
	scanf("%s %s", T , S ); reverse( T , T + m );
	
	for( int i = 0 ; i < n ; i ++ ) f[ i ].x = S[ i ] == '*' ? 0 : pow( S[ i ] - 'a' + 1 , 3 );
	for( int i = 0 ; i < m ; i ++ ) g[ i ].x = T[ i ] == '*' ? 0 : pow( T[ i ] - 'a' + 1 , 1 );
	h = h + ( f * g );
	
	for( int i = 0 ; i < n ; i ++ ) f[ i ].x = S[ i ] == '*' ? 0 : pow( S[ i ] - 'a' + 1 , 2 );
	for( int i = 0 ; i < m ; i ++ ) g[ i ].x = T[ i ] == '*' ? 0 : pow( T[ i ] - 'a' + 1 , 2 );
	h = h - ( 2 * ( f * g ) );
	
	for( int i = 0 ; i < n ; i ++ ) f[ i ].x = S[ i ] == '*' ? 0 : pow( S[ i ] - 'a' + 1 , 1 );
	for( int i = 0 ; i < m ; i ++ ) g[ i ].x = T[ i ] == '*' ? 0 : pow( T[ i ] - 'a' + 1 , 3 );
	h = h + ( f * g );
	
	fft( h , -1 );
	
	int Ans = 0;
	for( int i = m - 1 ; i < n ; i ++ ) if( fabs( h[ i ].x ) < eps ) Ans ++;
	printf("%d\n", Ans );
	for( int i = m - 1 ; i < n ; i ++ ) if( fabs( h[ i ].x ) < eps ) printf("%d ", i - m + 2 );
	return 0;
}
posted @ 2021-03-10 16:48  chihik  阅读(164)  评论(0编辑  收藏  举报