bzoj4892

后缀数组

先开始nc了,觉得自动机做法是指数级的,就写了个后缀数组

具体方法是暴力,枚举起点,然后用lcp向后暴力匹配,如果失配就减少一次,我们一共有3次机会,这样每次匹配复杂度是O(1)的,所以总复杂度是O(nlogn+n),然后t掉了,交了发别人代码,bzoj怎么那么慢,洛谷跑的飞快。调了很长时间发现sa板子写错了,明明是粘过来的。。。

后缀自动机就是在自动机上匹配,如果不匹配可以随便走,每次匹配完统计就行了

#include<bits/stdc++.h>
using namespace std;
const int N = 2e5 + 5;
int n, m, k, len, pos, ans;
char s[N], t[N];
int p[N], a[N], b[N], rank[N], lcp[N], sa[N], mn[N][19], mp[256], Log[N], tmp[N];
void radix(int *s, int *a, int *b, int n, int m)
{
    int count[N]; memset(count, 0, sizeof(count));
    for(int i = 1; i <= n; ++i) ++count[s[a[i]]];
    for(int i = 1; i <= m; ++i) count[i] += count[i - 1];
    for(int i = n; i; --i) b[count[s[a[i]]]--] = a[i];
}
void Sa(int *s, int n)
{
    for(int i = 1; i <= n; ++i) rank[i] = i;
    radix(s, rank, sa, n, 26);
    rank[sa[1]] = 1;
    for(int i = 2; i <= n; ++i) rank[sa[i]] = rank[sa[i - 1]] + (s[sa[i]] != s[sa[i - 1]]);
    for(int k = 1; k <= n; k <<= 1)
    {
        for(int i = 1; i <= n; ++i)
        {
            a[i] = rank[i];
            b[i] = i + k <= n ? rank[i + k] : 0;
            sa[i] = i; 
        }
        radix(b, sa, rank, n, n);
        radix(a, rank, sa, n, n);
        rank[sa[1]] = 1;
        for(int i = 2; i <= n; ++i) rank[sa[i]] = rank[sa[i - 1]] + (a[sa[i]] != a[sa[i - 1]] || b[sa[i]] != b[sa[i - 1]]);      
    }
}
void Lcp(int *s, int n)
{
    int h = 0;
    for(int i = 1; i <= n; ++i) rank[sa[i]] = i;
    for(int i = 1; i <= n; ++i)
    {
        int j = sa[rank[i] - 1];
        if(rank[i] <= 1) continue;
        if(h > 0) --h;
        for(; i + h <= n && j + h <= n; ++h) if(s[i + h] != s[j + h]) break;
        mn[rank[i] - 1][0] = h;
    }
    for(int j = 1; j <= 18; ++j)
        for(int i = 1; i + (1 << j) - 1 <= n; ++i)
            mn[i][j] = min(mn[i][j - 1], mn[i + (1 << (j - 1))][j - 1]);
}
int query(int l, int r)
{
    l = rank[l];
    r = rank[r];
    if(l > r) swap(l, r);
    --r;
    int x = Log[r - l + 1];
    return min(mn[l][x], mn[r - (1 << x) + 1][x]);
}
int main()
{
    int T;
    scanf("%d", &T);
    mp['A'] = 0;
    mp['G'] = 1;
    mp['C'] = 2;
    mp['T'] = 3;
    for(int i = 2; i < N; ++i) Log[i] = Log[i >> 1] + 1;
    while(T--)
    {
        ans = 0; 
        scanf("%s%s", s + 1, t + 1);
        len = 0;    
        n = strlen(s + 1);
        m = strlen(t + 1);
        for(int i = 1; i <= n; ++i) p[++len] = mp[s[i]];
        p[++len] = 4;
        pos = len + 1;
        for(int i = 1; i <= m; ++i) p[++len] = mp[t[i]];
        Sa(p, len);
        Lcp(p, len);
        for(int i = 1; i <= n - m + 1; ++i)
        {
            int tmp = m, cnt = 3, p1 = i, p2 = pos;
            while(tmp > 0) 
            {
                int x = query(p1, p2);
                tmp -= x; 
                p1 += x;
                p2 += x; 
                if(tmp <= 0) break;
                while(cnt >= 0 && p[p1] != p[p2] && p1 <= n && p2 <= len)
                {
                    ++p1;
                    ++p2;
                    --tmp;
                    --cnt;
                }
                if(cnt < 0 || p2 > len || p1 > n) break;
            }
            if(cnt >= 0 && tmp <= 0) ++ans;
        } 
        printf("%d\n", ans);
    }
    return 0;
}
View Code

 

posted @ 2017-12-10 12:08  19992147  阅读(181)  评论(0编辑  收藏  举报