后缀数组好题选讲(2)

涉及到的处理问题套路

用单调队列来处理长度为特定值的$LCP$出现次数

luogu P2852 Milk Pattern G

题意简述

给定一个长度为$n$的串,求出重复出现次数$\ge k$的子串数量

$n\le 2\cdot 10^4$

Sol

先将后缀排序,求出$height$数组

由于$height$数组代表的是$LCP(rnk[i],rnk[i-1])$,而这两个字符串开头所在的地方一定不同(毕竟是后缀数组),那么$LCP[rnk[i],rnk[i-1]]$至少出现了$2$次

同理,区间$[i,i+k-1]$中$height$的最小值$s$代表某一个长度为$s$的子串至少出现了$k+1$次

所以一个长度为$k-1$的区间中$height$的最小值就是该区间中至少出现$k$次子串的最大长度

然后用单调队列解决即可。

#include <bits/stdc++.h>
using namespace std;
int n, k, m, num, a[50005], x[50005], y[50005], sa[50005], c[50005];
int rnk[50005], height[50005];
void get_SA()
{
    m = 200;
    for (int i = 1; i <= n; i++)
        ++c[x[i] = a[i]];
    for (int i = 2; i <= m; i++)
        c[i] += c[i - 1];
    for (int i = n; i >= 1; i--)
        sa[c[x[i]]--] = i;
    for (int k = 1; k <= n; k <<= 1)
    {
        num = 0;
        for (int i = n - k + 1; i <= n; i++)
            y[++num] = i;
        for (int i = 1; i <= n; i++)
            if (sa[i] > k)
                y[++num] = sa[i] - k;
        memset(c, 0, sizeof(c));
        for (int i = 1; i <= n; i++)
            c[x[i]]++;
        for (int i = 2; i <= m; i++)
            c[i] += c[i - 1];
        for (int i = n; i >= 1; i--)
        {
            sa[c[x[y[i]]]--] = y[i];
            y[i] = 0;
        }
        swap(x, y);
        x[sa[1]] = 1;
        num = 1;
        for (int i = 2; i <= n; i++)
        {
            if (y[sa[i]] == y[sa[i - 1]] && y[sa[i] + k] == y[sa[i - 1] + k])
                x[sa[i]] = num;
            else
                x[sa[i]] = ++num;
        }
        if (num == n)
            break;
        m = num;
    }
}
void get_height()
{
    for (int i = 1; i <= n; i++)
        rnk[sa[i]] = i;
    int k = 0;
    for (int i = 1; i <= n; i++)
    {
        if (rnk[i] == 1)
            continue;
        if (k)
            k--;
        while (sa[rnk[i] - 1] + k <= n && i + k <= n && a[sa[rnk[i] - 1] + k] == a[i + k])
            k++;
        height[rnk[i]] = k;
    }
}
int q[100005], pos[100005], hd = 1, tl = 0, maxn = 0;
int main()
{
    cin >> n >> k;
    k--;
    for (int i = 1; i <= n; i++)
        scanf("%d", &a[i]);
    get_SA();
    get_height();
    for (int i = 1; i <= n; i++)
    {
        while (hd <= tl && i - pos[hd] + 1 > k)
            ++hd;
        while (hd <= tl && height[i] <= q[tl])
            --tl;
        q[++tl] = height[i];
        pos[tl] = i;
        maxn = max(maxn, q[hd]);
    }
    cout << maxn << endl;
}
P2852

luogu P5341 甲苯先生和大中锋的字符串

题意简述

给定一个长度为$n$的字符串,求出现次数恰好为$k$的字符串中出现次数最多的长度。

$n\le 10^5$

Sol

现在要求长度恰好为$k$,那么在我们的单调队列选定的$[i,i+k-2]$的长度为$k-1$的区间中,设其$height$最小值为$mn$,必须保证$\max (height[i-1],height[i+k-1]\le mn$,这样就满足了恰好$k$个的限制

 

#include <bits/stdc++.h>
using namespace std;
int n, m, k, num, x[100005], y[100005], c[100005], sa[100005], rnk[100005], height[100005];
char ch[500005];
void get_SA()
{
    m = 200;
    for (int i = 1; i <= n; i++)
        ++c[x[i] = ch[i]];
    for (int i = 2; i <= m; i++)
        c[i] += c[i - 1];
    for (int i = n; i >= 1; i--)
        sa[c[x[i]]--] = i;
    for (int k = 1; k <= n; k <<= 1)
    {
        num = 0;
        for (int i = n - k + 1; i <= n; i++)
            y[++num] = i;
        for (int i = 1; i <= n; i++)
            if (sa[i] > k)
                y[++num] = sa[i] - k;
        memset(c, 0, sizeof(c));
        for (int i = 1; i <= n; i++)
            ++c[x[i]];
        for (int i = 2; i <= m; i++)
            c[i] += c[i - 1];
        for (int i = n; i >= 1; i--)
            sa[c[x[y[i]]]--] = y[i], y[i] = 0;
        swap(x, y);
        x[sa[1]] = 1;
        num = 1;
        for (int i = 2; i <= n; i++)
            if (y[sa[i]] == y[sa[i - 1]] && y[sa[i] + k] == y[sa[i - 1] + k])
                x[sa[i]] = num;
            else
                x[sa[i]] = ++num;
        if (num == n)
            break;
        m = num;
    }
    for (int i = 1; i <= n; i++)
        rnk[sa[i]] = i;
    int k = 0;
    for (int i = 1; i <= n; i++)
    {
        if (rnk[i] == 1)
            continue;
        if (k)
            k--;
        int j = sa[rnk[i] - 1];
        while (j + k <= n && i + k <= n && ch[j + k] == ch[i + k])
            k++;
        height[rnk[i]] = k;
    }
}
int q[500005], head = 1, tail = 0, cnt[500005];
int main()
{
    int T;
    cin >> T;
    while (T--)
    {
        memset(x, 0, sizeof(x));
        memset(y, 0, sizeof(y));
        memset(rnk, 0, sizeof(rnk));
        memset(c, 0, sizeof(c));
        memset(sa, 0, sizeof(sa));
        memset(height, 0, sizeof(height));
        memset(cnt, 0, sizeof(cnt));
        memset(q, 0, sizeof(q));
        head = 1, tail = 0;
        scanf("%s%d", ch + 1, &k);
        n = strlen(ch + 1);
        get_SA();
        for (int i = 2; i <= k; i++)
        {
            while (head <= tail && height[q[tail]] >= height[i])
                --tail;
            q[++tail] = i;
        }
        for (int i = k; i <= n; i++)
        {
            if (i - q[head] + 1 >= k)
                ++head;
            while (head <= tail && height[q[tail]] >= height[i])
                --tail;
            q[++tail] = i;
            int t = (k == 1) ? n - sa[i] + 1 : height[q[head]];
            int g = max(height[i + 1], height[i - k + 1]);
            if (g <= t)
                ++cnt[g + 1], --cnt[t + 1];
        }
        int maxn = 1, pos = -1;
        for (int i = 1; i <= n; i++)
        {
            cnt[i] += cnt[i - 1];
            if (cnt[i] >= maxn)
                maxn = cnt[i], pos = i;
        }
        cout << pos << endl;
    }
}
P5341

 

posted @ 2020-06-05 07:54  verjun  阅读(179)  评论(0编辑  收藏  举报