bzoj4892
后缀数组
先开始nc了,觉得自动机做法是指数级的,就写了个后缀数组
具体方法是暴力,枚举起点,然后用lcp向后暴力匹配,如果失配就减少一次,我们一共有3次机会,这样每次匹配复杂度是O(1)的,所以总复杂度是O(nlogn+n),然后t掉了,交了发别人代码,bzoj怎么那么慢,洛谷跑的飞快。调了很长时间发现sa板子写错了,明明是粘过来的。。。
后缀自动机就是在自动机上匹配,如果不匹配可以随便走,每次匹配完统计就行了
#include<bits/stdc++.h> using namespace std; const int N = 2e5 + 5; int n, m, k, len, pos, ans; char s[N], t[N]; int p[N], a[N], b[N], rank[N], lcp[N], sa[N], mn[N][19], mp[256], Log[N], tmp[N]; void radix(int *s, int *a, int *b, int n, int m) { int count[N]; memset(count, 0, sizeof(count)); for(int i = 1; i <= n; ++i) ++count[s[a[i]]]; for(int i = 1; i <= m; ++i) count[i] += count[i - 1]; for(int i = n; i; --i) b[count[s[a[i]]]--] = a[i]; } void Sa(int *s, int n) { for(int i = 1; i <= n; ++i) rank[i] = i; radix(s, rank, sa, n, 26); rank[sa[1]] = 1; for(int i = 2; i <= n; ++i) rank[sa[i]] = rank[sa[i - 1]] + (s[sa[i]] != s[sa[i - 1]]); for(int k = 1; k <= n; k <<= 1) { for(int i = 1; i <= n; ++i) { a[i] = rank[i]; b[i] = i + k <= n ? rank[i + k] : 0; sa[i] = i; } radix(b, sa, rank, n, n); radix(a, rank, sa, n, n); rank[sa[1]] = 1; for(int i = 2; i <= n; ++i) rank[sa[i]] = rank[sa[i - 1]] + (a[sa[i]] != a[sa[i - 1]] || b[sa[i]] != b[sa[i - 1]]); } } void Lcp(int *s, int n) { int h = 0; for(int i = 1; i <= n; ++i) rank[sa[i]] = i; for(int i = 1; i <= n; ++i) { int j = sa[rank[i] - 1]; if(rank[i] <= 1) continue; if(h > 0) --h; for(; i + h <= n && j + h <= n; ++h) if(s[i + h] != s[j + h]) break; mn[rank[i] - 1][0] = h; } for(int j = 1; j <= 18; ++j) for(int i = 1; i + (1 << j) - 1 <= n; ++i) mn[i][j] = min(mn[i][j - 1], mn[i + (1 << (j - 1))][j - 1]); } int query(int l, int r) { l = rank[l]; r = rank[r]; if(l > r) swap(l, r); --r; int x = Log[r - l + 1]; return min(mn[l][x], mn[r - (1 << x) + 1][x]); } int main() { int T; scanf("%d", &T); mp['A'] = 0; mp['G'] = 1; mp['C'] = 2; mp['T'] = 3; for(int i = 2; i < N; ++i) Log[i] = Log[i >> 1] + 1; while(T--) { ans = 0; scanf("%s%s", s + 1, t + 1); len = 0; n = strlen(s + 1); m = strlen(t + 1); for(int i = 1; i <= n; ++i) p[++len] = mp[s[i]]; p[++len] = 4; pos = len + 1; for(int i = 1; i <= m; ++i) p[++len] = mp[t[i]]; Sa(p, len); Lcp(p, len); for(int i = 1; i <= n - m + 1; ++i) { int tmp = m, cnt = 3, p1 = i, p2 = pos; while(tmp > 0) { int x = query(p1, p2); tmp -= x; p1 += x; p2 += x; if(tmp <= 0) break; while(cnt >= 0 && p[p1] != p[p2] && p1 <= n && p2 <= len) { ++p1; ++p2; --tmp; --cnt; } if(cnt < 0 || p2 > len || p1 > n) break; } if(cnt >= 0 && tmp <= 0) ++ans; } printf("%d\n", ans); } return 0; }