题解 基因切割

传送门

发现等价于每个机器先将全部匹配的子串都删掉
那么用 kmp 可以做到 \(O(nm)\)

正解考虑分治:
设定阀值 \(B\),串长较大时跑 kmp
较小时维护所有 \(len\leqslant B\) 的子串的 hash 值
复杂度平衡后是 \(O(n^{\frac{5}{3}})\)

但是其实可以直接 bitset 做
删除一个子串的话可以左右移实现
精细实现可以卡过

  • 貌似 bitset 中 &= 一类操作比 & 更快
点击查看代码
#include <bits/stdc++.h>
using namespace std;
#define INF 0x3f3f3f3f
#define N 100010
#define ll long long
#define ull unsigned long long
//#define int long long

int n, m, lim;
ull h[N];
bool vis[N];
const int sqr=20;
const ull base=13131;
unordered_map<ull, set<int>> mp;
char s[N], t[N], tr[256], back[5];
int nxt[N], pre[N], sta[N], bit[N], top;
bitset<100000> pos[5], mask, one, t1, t2, all_one;
inline void add(int i, int dat) {++i; for (; i<=lim; i+=i&-i) bit[i]+=dat;}
inline int kth(int k) {
	int ans=0; ++k;
	for (int i=16; ~i; --i) if ((ans|(1<<i))<=lim && k>bit[ans|(1<<i)])
		k-=bit[ans|=1<<i];
	return ans;
}

void del(int now, int len) {
	// cout<<"del: "<<now<<' '<<len<<endl;
	int l=now, r=now+len-1;
	// cout<<"lr: "<<l<<' '<<r<<endl;
	// cout<<pos[4]<<endl;
	// for (int i=1; i<=4; ++i) pos[i]=((pos[i]<<100000-l)>>100000-l)|((pos[i]>>r+1)<<l);
	t1=all_one>>100000-l;
	for (int i=1; i<=4; ++i) {
		t2=pos[i], t2>>=r+1, t2<<=l;
		pos[i]&=t1, pos[i]|=t2;
	}
	// cout<<pos[4]<<endl;
	for (int i=1; i<=len; ++i) one[--n]=0;
	for (now=kth(now); len; --len,now=nxt[now]) {
		// cout<<"now: "<<now<<endl;
		add(now, -1), vis[now]=1;
		// cout<<pre[now]<<endl;
		if (~pre[now]) nxt[pre[now]]=nxt[now];
		if (~nxt[now]) pre[nxt[now]]=pre[now];
	}
	// exit(0);
}

signed main()
{
	freopen("dna.in", "r", stdin);
	freopen("dna.out", "w", stdout);

	scanf("%s%d", s, &m);
	lim=n=strlen(s);
	back[tr['A']=1]='A', back[tr['T']=2]='T', back[tr['C']=3]='C', back[tr['G']=4]='G';
	for (int i=0; i<n; ++i) pos[s[i]=tr[s[i]]][i]=1, one[i]=1;
	for (int i=0; i+1<n; ++i) nxt[i]=i+1; nxt[n-1]=-1;
	for (int i=1; i<n; ++i) pre[i]=i-1; pre[0]=-1;
	for (int i=0; i<n; ++i) add(i, 1);
	all_one.set();
	// mask=one;
	// cout<<mask<<endl;
	// mask=(mask<<100000-3+1)>>100000-3+1;
	// cout<<mask<<endl;
	for (int i=1,len; i<=m; ++i) {
		// cout<<"i: "<<i<<endl;
		scanf("%s", t);
		len=strlen(t);
		for (int j=0; j<len; ++j) t[j]=tr[t[j]];
		mask=pos[t[0]];
		for (int j=1; j<len&&mask.any(); ++j) mask&=(pos[t[j]]>>j);
		if (!mask.any()) continue;
		// cout<<mask<<endl;
		for (int pos=mask._Find_first(),lst=-1,dlt=0; pos<100000; pos=mask._Find_next(pos+len-1)) {
			// cout<<"pos: "<<pos<<endl;
			if (lst==-1) del(pos, len), lst=pos, dlt+=len;
			else del(pos-dlt, len), lst=pos, dlt+=len;
		}
	}
	// for (int i=1; i<=4; ++i) cout<<pos[i]<<endl;
	int pos;
	for (pos=0; vis[pos]; ++pos);
	for (; ~pos; pos=nxt[pos]) printf("%c", back[s[pos]]);
	printf("\n");

	return 0;
}
posted @ 2022-05-07 21:53  Administrator-09  阅读(2)  评论(0编辑  收藏  举报