CF528D Fuzzy Search
题意
有两个基因串\(S\)和\(T\),他们只包含\(AGCT\)四种字符。
现在你要找出\(T\)在\(S\)中出现了几次。
有一个门限值\(k≥0\)
只要\(T[i]\)和\(S[j-k]\)到\(S[j+k]\)有相同的,就视为匹配
\((1≤|T|≤|S|≤200000, 0≤k≤200000)\)
Sol
套路
这类字符串的匹配都可以通过把一个串翻转然后\(FFT\)得到
每个字符分开考虑
如果有这个字符视为\(1\),否则为\(0\)
那么\(FFT\)后的系数\(a[i]\)就表示这个字符在\(1\)~\(i\)有多少对匹配
卷积后就相当于\(reverse\)回来的一一匹配
那么只要每个字符在这个位置的之前的一一匹配对数加起来为\(|T|\)那么就匹配上了一次
考虑有个阀值\(k\)
那么构建\(S\)的多项式时,系数\(a[i]\)就是判断一下\(s[i-k]\)到\(s[i+k]\)有没有这个字符就行了
# include <bits/stdc++.h>
# define IL inline
# define RG register
# define Fill(a, b) memset(a, b, sizeof(a))
using namespace std;
typedef long long ll;
template <class Int>
IL void Input(RG Int &x){
RG int z = 1; RG char c = getchar(); x = 0;
for(; c < '0' || c > '9'; c = getchar()) z = c == '-' ? -1 : 1;
for(; c >= '0' && c <= '9'; c = getchar()) x = (x << 1) + (x << 3) + (c ^ 48);
x *= z;
}
const int maxn(2e5 + 5);
const int oo(1e9);
const double pi(acos(-1));
int n, m, k, r[maxn << 2], len, ans[maxn], cnt;
char s[maxn], t[maxn], tp[4] = {'A', 'T', 'G', 'C'};
struct Complex{
double real, image;
IL Complex(){
real = image = 0;
}
IL Complex(RG double a, RG double b){
real = a, image = b;
}
IL Complex operator +(RG Complex b){
return Complex(real + b.real, image + b.image);
}
IL Complex operator -(RG Complex b){
return Complex(real - b.real, image - b.image);
}
IL Complex operator *(RG Complex b){
return Complex(real * b.real - image * b.image, real * b.image + image * b.real);
}
} a[maxn << 2], b[maxn << 2], w[maxn << 2];
IL void FFT(RG Complex *p, RG int opt){
for(RG int i = 0; i < len; ++i) if(r[i] < i) swap(p[i], p[r[i]]);
for(RG int i = 1; i < len; i <<= 1)
for(RG int j = 0, l = i << 1; j < len; j += l){
for(RG int k = 0; k < i; ++k){
RG Complex wn = Complex(w[len / i * k].real, w[len / i * k].image * opt);
RG Complex x = p[k + j], y = wn * p[k + j + i];
p[k + j] = x + y, p[k + j + i] = x - y;
}
}
}
IL void Prepare(){
RG int l = 0, tmp = m + n - 1;
for(len = 1; len < tmp; len <<= 1) ++l;
for(RG int i = 0; i < len; ++i) r[i] = (r[i >> 1] >> 1) | ((i & 1) << (l - 1));
for(RG int i = 1; i <= len; i <<= 1)
for(RG int k = 0; k < i; ++k)
w[len / i * k] = Complex(cos(pi / i * k), sin(pi / i * k));
}
int main(RG int argc, RG char* argv[]){
Input(n), Input(m), Input(k);
scanf(" %s %s", s, t);
reverse(t, t + m), Prepare();
for(RG int j = 0; j < 4; ++j){
for(RG int i = 0; i < len; ++i) a[i] = b[i] = Complex(0, 0);
for(RG int i = 0, lst = -oo; i < n; ++i){
if(s[i] == tp[j]) lst = i;
if(i - lst <= k) a[i].real = 1;
}
for(RG int i = n - 1, lst = oo; ~i; --i){
if(s[i] == tp[j]) lst = i;
if(lst - i <= k) a[i].real = 1;
}
for(RG int i = 0; i < m; ++i) b[i].real = t[i] == tp[j];
FFT(a, 1), FFT(b, 1);
for(RG int i = 0; i < len; ++i) a[i] = a[i] * b[i];
FFT(a, -1);
for(RG int i = 0; i < n; ++i) ans[i] += int(a[i].real / len + 0.5);
}
for(RG int i = 0; i < n; ++i) if(ans[i] == m) ++cnt;
printf("%d\n", cnt);
return 0;
}