HDU 3518 Boring counting (后缀自动机)

统计至少出现两次的不重叠子串的个数

后缀数组可做,所以我用后缀自动机

考虑到任意子串都包含在sam的某个状态中,所以对于某个状态st,只要| endpos(st) | >= 2
则状态中长度 <= ( max( endpos( st ) ) - min( endpos ( st ) )的子串都是满足条件的
所以只需要求出两个数组L[ i ],R[ i ],分别表示endpos( i )中的最大值和最小值
因为parent tree中叶子结点的| endpos |一定是1,即L == R
又因为建立sam过程中,每新增加一个状态,从这个状态到根节点的路径上所有状态的R都可以更新为当前字符的位置
也就是说所有叶子结点的L和R都是已知的
又因为儿子的endpos是父亲的子集,所以可以拓扑排序求出所有结点的L和R
至于父亲到儿子转移过程中丢失的状态,可以在每次插入字符的时候直接记录新增加的状态的L和R(因为丢失的状态一定包含原串的某个前缀)

#include <bits/stdc++.h>
#define Cpy(a, b) memcpy(a, b, sizeof(a))
using namespace std;
typedef long long ll;
const int Maxn = 2010;
const int maxn = 1010;
int l[Maxn], r[Maxn], in[Maxn];
struct Suffix_Automata
{
    int maxlen[Maxn], trans[Maxn][26], link[Maxn], Size, Last; //长度为 n 的字符串 s 建立的后缀自动机的状态个数不超过 2n−1
    void init()
    {
        Size = Last = 1;
        link[1] = 0;
        memset(trans[1], 0, sizeof(trans[1]));
    }
    inline void Extend(int pos, int id)
    {
        int cur = (++Size), p;
        l[cur] = pos;  //保证丢失的状态可以被统计到
        r[cur] = pos;
        link[cur] = 0;
        memset(trans[cur], 0, sizeof(trans[cur]));
        maxlen[cur] = maxlen[Last] + 1;
        for (p = Last; p && !trans[p][id]; p = link[p])
            trans[p][id] = cur;
        if (!p)
            link[cur] = 1;
        else //trans[p][id] != 0;
        {
            int q = trans[p][id];
            if (maxlen[q] == maxlen[p] + 1)
                link[cur] = q;
            else
            {
                int clone = (++Size);
                link[clone] = 0;
                memset(trans[clone], 0, sizeof(trans[clone]));
                maxlen[clone] = maxlen[p] + 1;
                Cpy(trans[clone], trans[q]);
                link[clone] = link[q];
                for (; p && trans[p][id] == q; p = link[p])
                    trans[p][id] = clone;
                link[cur] = link[q] = clone;
            }
        }
        Last = cur;
        for (; cur > 0; cur = link[cur])
        {
            r[cur] = pos;
            if (l[cur])
                l[cur] = min(pos, l[cur]);
            else
                l[cur] = pos;
        }
    }
} T;
char s[maxn];
void tp()
{
    queue<int> q;
    for (int i = 1; i <= T.Size; i++)
        if (in[i] == 0)
            q.push(i);
    while (!q.empty())
    {
        int p = q.front();
        q.pop();
        int y = T.link[p];
        if (y == 0)
            continue;
        l[y] = min(l[y], l[p]);
        r[y] = max(r[y], r[p]);
        in[y]--;
        if (in[y] == 0)
            q.push(y);
    }
}
int main()
{
    while (scanf("%s", s) && s[0] != '#')
    {
        T.init();
        memset(l, 0, sizeof(l));
        memset(r, 0, sizeof(r));
        memset(in, 0, sizeof(in));
        int len = strlen(s);
        for (int i = 0; i < len; i++)
            T.Extend(i + 1, s[i] - 'a');
        for (int i = 1; i <= T.Size; i++)
            in[T.link[i]]++;
        tp();
        ll ans = 0;
        for (int i = 2; i <= T.Size; i++)
        {
            int len = r[i] - l[i];
            int minlen = T.maxlen[T.link[i]] + 1;
            if (T.maxlen[i] <= len)
                ans += T.maxlen[i] - T.maxlen[T.link[i]];
            else if (minlen <= len)
                ans += len - minlen + 1;
        }
        printf("%lld\n", ans);
    }
    return 0;
}

更新一波,把拓扑排序删了,在新增状态时更新\(l\),\(r\)即可

#include <bits/stdc++.h>
#define Cpy(a, b) memcpy(a, b, sizeof(a))
using namespace std;
typedef long long ll;
const int Maxn = 2010;
const int maxn = 1010;
int l[Maxn], r[Maxn];
struct Suffix_Automata
{
    int maxlen[Maxn], trans[Maxn][26], link[Maxn], Size, Last;
    void init()
    {
        Size = Last = 1;
        link[1] = 0;
        memset(trans[1], 0, sizeof(trans[1]));
    }
    inline void Extend(int pos, int id)
    {
        int cur = (++Size), p;
        l[cur] = pos;
        r[cur] = pos;
        link[cur] = 0;
        memset(trans[cur], 0, sizeof(trans[cur]));
        maxlen[cur] = maxlen[Last] + 1;
        for (p = Last; p && !trans[p][id]; p = link[p])
            trans[p][id] = cur;
        if (!p)
            link[cur] = 1;
        else
        {
            int q = trans[p][id];
            if (maxlen[q] == maxlen[p] + 1)
                link[cur] = q;
            else
            {
                int clone = (++Size);
                link[clone] = 0;
                memset(trans[clone], 0, sizeof(trans[clone]));
                maxlen[clone] = maxlen[p] + 1;
                Cpy(trans[clone], trans[q]);
                link[clone] = link[q];
                l[clone] = l[q];
                r[clone] = r[q];
                for (; p && trans[p][id] == q; p = link[p])
                    trans[p][id] = clone;
                link[cur] = link[q] = clone;
            }
        }
        Last = cur;
        for (; cur > 0; cur = link[cur])
        {
            r[cur] = pos;
            if (l[cur])
                l[cur] = min(pos, l[cur]);
            else
                l[cur] = pos;
        }
    }
} T;
char s[maxn];
int main()
{
    while (scanf("%s", s) && s[0] != '#')
    {
        T.init();
        memset(l, 0, sizeof(l));
        memset(r, 0, sizeof(r));
        int len = strlen(s);
        for (int i = 0; i < len; i++)
            T.Extend(i + 1, s[i] - 'a');
        ll ans = 0;
        for (int i = 2; i <= T.Size; i++)
        {
            int len = r[i] - l[i];
            int minlen = T.maxlen[T.link[i]] + 1;
            if (T.maxlen[i] <= len)
                ans += T.maxlen[i] - T.maxlen[T.link[i]];
            else if (minlen <= len)
                ans += len - minlen + 1;
        }
        printf("%lld\n", ans);
    }
    return 0;
}
posted @ 2019-09-25 09:23  Zeronera  阅读(161)  评论(0编辑  收藏  举报