CF528D Fuzzy Search FFT
中文题面
题目中有个k的限制比较麻烦,我们稍微转换一下,令\(g[i][j]\)表示第\(i\)位,第\(j\)个字母,能否在k的范围内匹配上。
那么这个数组可以由类似滑动窗口的方式来解决:
新定义一个数组\(T[i][j]\)表示S串中第\(i\)个字母是不是\(j\),如果是则为1,反之为0.(注意与原题面中的T串相区分)
那么\(g[i][j]\)为1当且仅当向左向右分别扩展最多k位的空间内,\(s\)数组中有值为1,即
\[g[i][j] = max(\sum_{l = i - k}^{l <= i + k}T[l][j])
\]
那么k的限制就被消除了,因此下文中的k仅仅作为循环变量出现,与原题意中的k没有任何关系
下文写的是思路过程,在一些下标问题上并不一定严谨,具体的边界,下标之类的还要看具体实现(代码)
令\(F[i]\)表示以第i位为开头进行匹配,能够匹配上的最大长度。
那么有:
\[F[i] = \sum_{j = i}^{j \le i + |T| - 1}\sum_{k = 1}^{4} g[j][k] \cdot T[j - i][k]
\]
\[F[i] = \sum_{k = 1}^{4} \sum_{j = i}^{j \le i + |T| - 1}g[j][k] \cdot T[j - i][k]
\]
因此如果我们先枚举k,对于每个k单独计算后面部分并求和,那么如果\(F[i] == |T|\)则++ans;
因为先枚举了k,因此\(g[j][k]\)和\(T[j][k]\)后面的参数k就暂时略去(方便书写)
观察到\(g[j][k] \cdot T[j - i][k]\)类似与卷积形式,因此我们将\(T\)数组翻转得到原式的新的表达式:
\[f[i] = \sum_{j = i}^{j \le i + |T| - 1} g[j] \cdot T[|T| - j + i]
\]
因为
\[j + |T| - j + i = |T| + i
\]
所以
\[f[i] = (g * T)(|T| + i)
\]
直接上FFT / NTT即可
#include<bits/stdc++.h>
using namespace std;
#define R register int
#define AC 410000
#define ac 850000
#define LL long long
#define p 998244353
const int G = 3, Gi = 332748118;
int n, m, k, lim = 1, len, head, tail, inv, ans;
int g[ac], a[ac], f[AC], rev[ac], s[ac], q[ac];
char S[AC], T[AC], L[10] = {'A', 'G', 'C', 'T'};
inline void upmax(int &a, int b){if(b > a) a = b;}
inline int qpow(int x, int have)
{
int rnt = 1;
while(have)
{
if(have & 1) rnt = 1LL * rnt * x % p;
x = 1LL * x * x % p, have >>= 1;
}
return rnt;
}
void pre()
{
scanf("%d%d%d%s%s", &n, &m, &k, S, T);
-- n, -- m;//让下标从0开始?
while(lim <= n + m) lim <<= 1, len ++;
for(R i = 0; i < lim; i ++)
rev[i] = (rev[i >> 1] >> 1) | ((i & 1) << (len - 1));
inv = qpow(lim, p - 2);//lim的inv
}
void build(int x)//滑动窗口预处理相应的数组
{
for(R i = 0; i <= n; i ++) s[i + 1] = (S[i] == L[x]);//s数组从1开始处理比较方便
memset(g, 0, sizeof(g)), memset(a, 0, sizeof(a));
head = 1, tail = 0;
int tmp = n + 1;
for(R i = 1; i <= n + 1; i ++)
{
while(head <= tail && s[q[tail]] < s[i]) -- tail;//判断队内有无元素是要加<=的!!!
q[++ tail] = i;
while(head <= tail && q[head] < i - k) ++ head;
upmax(g[i - 1], s[q[head]]);
}
head = 1, tail = 0;
for(R i = tmp; i; i --)
{
while(head <= tail && s[q[tail]] < s[i]) -- tail;
q[++ tail] = i;
while(head <= tail && q[head] > i + k) ++ head;
upmax(g[i - 1], s[q[head]]);
}
for(R i = 0; i <= m; i ++) a[i] = (T[i] == L[x]);
reverse(a, a + m + 1);//翻转
/*printf("%c\n", x + 'A');
for(R i = 0; i <= n; i ++) printf("%d ", g[i]);
printf("\n");
for(R i = 0; i <= m; i ++) printf("%d ", a[i]);
printf("\n\n");*/
}
void NTT(int *A, int opt)
{
for(R i = 0; i < lim; i ++)
if(i < rev[i]) swap(A[i], A[rev[i]]);
for(R i = 1; i < lim; i <<= 1)
{
int W = qpow((opt == 1) ? G : Gi, (p - 1) / (i << 1));
for(R r = i << 1, j = 0; j < lim; j += r)
for(R w = 1, l = 0; l < i; l ++, w = 1LL * w * W % p)
{
int x = A[j + l], y = 1LL * w * A[j + i + l] % p;
A[j + l] = (x + y) % p, A[j + i + l] = (x - y + p) % p;
}
}
}
void work()
{
for(R i = 0; i <= 3; i ++)
{
build(i);
/*printf("%d\n", lim);
for(R j = 0; j <= n; j ++) printf("%d ", g[j]);
printf("\n");
for(R j = 0; j <= m; j ++) printf("%d ", a[j]);
printf("\n");*/
NTT(a, 1), NTT(g, 1);
for(R j = 0; j < lim; j ++) a[j] = 1LL * a[j] * g[j] % p;
NTT(a, -1);
for(R j = 0; j < lim; j ++) a[j] = 1LL * a[j] * inv % p;
for(R j = 0; j < lim; j ++) f[j] += a[m + j];
/* for(R j = 0; j <= n - m; j ++) printf("%d ", a[m + 1 + j]);
printf("\n\n");*/
}
/*printf("!!!");
for(R i = 1; i <= n - k + 1; i ++) printf("%d ", f[i]);
printf("\n");*/
for(R i = 0; i <= n - m; i ++) ans += (f[i] == m + 1);//只有和为|T|才能计入答案
printf("%d\n", ans);
}
int main()
{
// freopen("in.in", "r", stdin);
pre();
work();
// fclose(stdin);
return 0;
}