CF528D Fuzzy Search

题意

有两个基因串\(S\)\(T\),他们只包含\(AGCT\)四种字符。
现在你要找出\(T\)\(S\)中出现了几次。
有一个门限值\(k≥0\)
只要\(T[i]\)\(S[j-k]\)\(S[j+k]\)有相同的,就视为匹配
\((1≤|T|≤|S|≤200000, 0≤k≤200000)\)

Sol

套路
这类字符串的匹配都可以通过把一个串翻转然后\(FFT\)得到
每个字符分开考虑
如果有这个字符视为\(1\),否则为\(0\)
那么\(FFT\)后的系数\(a[i]\)就表示这个字符在\(1\)~\(i\)有多少对匹配
卷积后就相当于\(reverse\)回来的一一匹配

那么只要每个字符在这个位置的之前的一一匹配对数加起来为\(|T|\)那么就匹配上了一次

考虑有个阀值\(k\)
那么构建\(S\)的多项式时,系数\(a[i]\)就是判断一下\(s[i-k]\)\(s[i+k]\)有没有这个字符就行了

# include <bits/stdc++.h>
# define IL inline
# define RG register
# define Fill(a, b) memset(a, b, sizeof(a))
using namespace std;
typedef long long ll;

template <class Int>
IL void Input(RG Int &x){
	RG int z = 1; RG char c = getchar(); x = 0;
	for(; c < '0' || c > '9'; c = getchar()) z = c == '-' ? -1 : 1;
	for(; c >= '0' && c <= '9'; c = getchar()) x = (x << 1) + (x << 3) + (c ^ 48);
	x *= z;
}

const int maxn(2e5 + 5);
const int oo(1e9);
const double pi(acos(-1));

int n, m, k, r[maxn << 2], len, ans[maxn], cnt;
char s[maxn], t[maxn], tp[4] = {'A', 'T', 'G', 'C'};

struct Complex{
	double real, image;

	IL Complex(){
		real = image = 0;
	}
	
	IL Complex(RG double a, RG double b){
		real = a, image = b;
	}

	IL Complex operator +(RG Complex b){
		return Complex(real + b.real, image + b.image);
	}

	IL Complex operator -(RG Complex b){
		return Complex(real - b.real, image - b.image);
	}

	IL Complex operator *(RG Complex b){
		return Complex(real * b.real - image * b.image, real * b.image + image * b.real);
	}
} a[maxn << 2], b[maxn << 2], w[maxn << 2];

IL void FFT(RG Complex *p, RG int opt){
	for(RG int i = 0; i < len; ++i) if(r[i] < i) swap(p[i], p[r[i]]);
	for(RG int i = 1; i < len; i <<= 1)
		for(RG int j = 0, l = i << 1; j < len; j += l){
			for(RG int k = 0; k < i; ++k){
				RG Complex wn = Complex(w[len / i * k].real, w[len / i * k].image * opt);
				RG Complex x = p[k + j], y = wn * p[k + j + i];
				p[k + j] = x + y, p[k + j + i] = x - y;
			}
		}
}

IL void Prepare(){
	RG int l = 0, tmp = m + n - 1;
	for(len = 1; len < tmp; len <<= 1) ++l;
	for(RG int i = 0; i < len; ++i) r[i] = (r[i >> 1] >> 1) | ((i & 1) << (l - 1));
	for(RG int i = 1; i <= len; i <<= 1)
		for(RG int k = 0; k < i; ++k)
			w[len / i * k] = Complex(cos(pi / i * k), sin(pi / i * k));
}

int main(RG int argc, RG char* argv[]){
	Input(n), Input(m), Input(k);
	scanf(" %s %s", s, t);
	reverse(t, t + m), Prepare();
	for(RG int j = 0; j < 4; ++j){
		for(RG int i = 0; i < len; ++i) a[i] = b[i] = Complex(0, 0);
		for(RG int i = 0, lst = -oo; i < n; ++i){
			if(s[i] == tp[j]) lst = i;
			if(i - lst <= k) a[i].real = 1;
		}
		for(RG int i = n - 1, lst = oo; ~i; --i){
			if(s[i] == tp[j]) lst = i;
			if(lst - i <= k) a[i].real = 1;
		}
		for(RG int i = 0; i < m; ++i) b[i].real = t[i] == tp[j];
		FFT(a, 1), FFT(b, 1);
		for(RG int i = 0; i < len; ++i) a[i] = a[i] * b[i];
		FFT(a, -1);
		for(RG int i = 0; i < n; ++i) ans[i] += int(a[i].real / len + 0.5);
	}
	for(RG int i = 0; i < n; ++i) if(ans[i] == m) ++cnt;
	printf("%d\n", cnt);
	return 0;
}

posted @ 2018-04-13 14:28  Cyhlnj  阅读(109)  评论(0编辑  收藏  举报