题解 基因切割
发现等价于每个机器先将全部匹配的子串都删掉
那么用 kmp 可以做到 \(O(nm)\)
正解考虑分治:
设定阀值 \(B\),串长较大时跑 kmp
较小时维护所有 \(len\leqslant B\) 的子串的 hash 值
复杂度平衡后是 \(O(n^{\frac{5}{3}})\)
但是其实可以直接 bitset 做
删除一个子串的话可以左右移实现
精细实现可以卡过
- 貌似 bitset 中
&=
一类操作比&
更快
点击查看代码
#include <bits/stdc++.h>
using namespace std;
#define INF 0x3f3f3f3f
#define N 100010
#define ll long long
#define ull unsigned long long
//#define int long long
int n, m, lim;
ull h[N];
bool vis[N];
const int sqr=20;
const ull base=13131;
unordered_map<ull, set<int>> mp;
char s[N], t[N], tr[256], back[5];
int nxt[N], pre[N], sta[N], bit[N], top;
bitset<100000> pos[5], mask, one, t1, t2, all_one;
inline void add(int i, int dat) {++i; for (; i<=lim; i+=i&-i) bit[i]+=dat;}
inline int kth(int k) {
int ans=0; ++k;
for (int i=16; ~i; --i) if ((ans|(1<<i))<=lim && k>bit[ans|(1<<i)])
k-=bit[ans|=1<<i];
return ans;
}
void del(int now, int len) {
// cout<<"del: "<<now<<' '<<len<<endl;
int l=now, r=now+len-1;
// cout<<"lr: "<<l<<' '<<r<<endl;
// cout<<pos[4]<<endl;
// for (int i=1; i<=4; ++i) pos[i]=((pos[i]<<100000-l)>>100000-l)|((pos[i]>>r+1)<<l);
t1=all_one>>100000-l;
for (int i=1; i<=4; ++i) {
t2=pos[i], t2>>=r+1, t2<<=l;
pos[i]&=t1, pos[i]|=t2;
}
// cout<<pos[4]<<endl;
for (int i=1; i<=len; ++i) one[--n]=0;
for (now=kth(now); len; --len,now=nxt[now]) {
// cout<<"now: "<<now<<endl;
add(now, -1), vis[now]=1;
// cout<<pre[now]<<endl;
if (~pre[now]) nxt[pre[now]]=nxt[now];
if (~nxt[now]) pre[nxt[now]]=pre[now];
}
// exit(0);
}
signed main()
{
freopen("dna.in", "r", stdin);
freopen("dna.out", "w", stdout);
scanf("%s%d", s, &m);
lim=n=strlen(s);
back[tr['A']=1]='A', back[tr['T']=2]='T', back[tr['C']=3]='C', back[tr['G']=4]='G';
for (int i=0; i<n; ++i) pos[s[i]=tr[s[i]]][i]=1, one[i]=1;
for (int i=0; i+1<n; ++i) nxt[i]=i+1; nxt[n-1]=-1;
for (int i=1; i<n; ++i) pre[i]=i-1; pre[0]=-1;
for (int i=0; i<n; ++i) add(i, 1);
all_one.set();
// mask=one;
// cout<<mask<<endl;
// mask=(mask<<100000-3+1)>>100000-3+1;
// cout<<mask<<endl;
for (int i=1,len; i<=m; ++i) {
// cout<<"i: "<<i<<endl;
scanf("%s", t);
len=strlen(t);
for (int j=0; j<len; ++j) t[j]=tr[t[j]];
mask=pos[t[0]];
for (int j=1; j<len&&mask.any(); ++j) mask&=(pos[t[j]]>>j);
if (!mask.any()) continue;
// cout<<mask<<endl;
for (int pos=mask._Find_first(),lst=-1,dlt=0; pos<100000; pos=mask._Find_next(pos+len-1)) {
// cout<<"pos: "<<pos<<endl;
if (lst==-1) del(pos, len), lst=pos, dlt+=len;
else del(pos-dlt, len), lst=pos, dlt+=len;
}
}
// for (int i=1; i<=4; ++i) cout<<pos[i]<<endl;
int pos;
for (pos=0; vis[pos]; ++pos);
for (; ~pos; pos=nxt[pos]) printf("%c", back[s[pos]]);
printf("\n");
return 0;
}