Loading

后缀数组练习

pku-1743 Musical Theme

解题思路

本质是一个寻找最长不重叠相同子串长度的题目
下面是求最长重叠相同子串长度的思路:
二分枚举+height数组分组。这道题的思想很巧妙,后面要仔细推敲。先二分答案,把题目变成判定性问题:判断是否存在两个长度为k的子串是相同的,且不重叠。解决这个问题的关键还是利用height数组。把排序后的后缀分成若干组,其中每组的后缀之间的height值都不小于k。例如,字符串为“aabaaaab”,当k=2时,后缀分成了4组,如图所示。
此处输入图片的描述
容易看出,有希望成为最长公共前缀不小于k的两个后缀一定在同一组。然后对于每组后缀,只须判断每个后缀的sa值的最大值和最小值之差是否不小于k。如果有一组满足,则说明存在,否则不存在。整个做法的时间复杂度为O(nlogn)。
下面一份DC3和一份后缀数组的写法= =
Musical Theme

代码

#include <algorithm>
#include <cstdio>
#include <cmath>
#include <cstring>
#include <iostream>
#include <cstdlib>
#include <set>
#include <vector>
#include <cctype>
#include <iomanip>
#include <sstream>
#include <climits>
#include <queue>
#include <stack>
using namespace std;
/*    freopen("k.in", "r", stdin);
    freopen("k.out", "w", stdout); */
//clock_t c1 = clock();
//std::cerr << "Time:" << clock() - c1 <<"ms" << std::endl;
//#pragma comment(linker, "/STACK:1024000000,1024000000")
#define de(a) cout << #a << " = " << a << endl
#define rep(i, a, n) for (int i = a; i <= n; i++)
#define per(i, a, n) for (int i = n; i >= a; i--)
typedef long long ll;
typedef unsigned long long ull;
typedef pair<int, int> PII;
typedef pair<double, double> PDD;
typedef vector<int, int> VII;
#define inf 0x3f3f3f3f
const ll INF = 0x3f3f3f3f3f3f3f3f;
const ll MAXN = 1e6 + 7;
const ll MAXM = 1e6 + 7;
const ll MOD = 1e9 + 7;
const double eps = 1e-6;
const double pi = acos(-1.0);
int sa[MAXN];     //rank为i的后缀的起始位置
int rk[MAXN];     //sa数组的映射
int tp[MAXN];     //基数排序的第二关键字,第二关键字排名为i的后缀的起始位置
int tax[MAXN];    //第i号元素出现了多少次,辅助基数排序
int Height[MAXN]; //排名为i的后缀与排名为i-1的后缀的最长公共前缀
/* lcp(sa[i],sa[i-1])*/
int n, m;
int s[MAXN];
/* void Debug()
{
    printf("*****************\n");
    printf("下标");
    for (int i = 1; i <= n; i++)
        printf("%d ", i);
    printf("\n");
    printf("sa  ");
    for (int i = 1; i <= n; i++)
        printf("%d ", sa[i]);
    printf("\n");
    printf("rak ");
    for (int i = 1; i <= n; i++)
        printf("%d ", rk[i]);
    printf("\n");
    printf("tp  ");
    for (int i = 1; i <= n; i++)
        printf("%d ", tp[i]);
    printf("\n");
} */
void Qsort()
{
    for (int i = 0; i <= m; i++)
        tax[i] = 0;
    for (int i = 1; i <= n; i++)
        tax[rk[i]]++;
    for (int i = 1; i <= m; i++)
        tax[i] += tax[i - 1];
    for (int i = n; i >= 1; i--)
        sa[tax[rk[tp[i]]]--] = tp[i];
}
void SuffixSort()
{
    m = 200;
    for (int i = 1; i <= n; i++)
        rk[i] = s[i], tp[i] = i;
    Qsort();
    // Debug();
    for (int w = 1, p = 0; p < n; m = p, w <<= 1)
    {
        //w:当前倍增的长度,w = x表示已经求出了长度为x的后缀的排名,现在要更新长度为2x的后缀的排名
        //p表示不同的后缀的个数,很显然原字符串的后缀都是不同的,因此p = n时可以退出循环
        p = 0; //这里的p仅仅是一个计数器
        for (int i = 1; i <= w; i++)
            tp[++p] = n - w + i;
        for (int i = 1; i <= n; i++)
            if (sa[i] > w)
                tp[++p] = sa[i] - w; //这两句是后缀数组的核心部分,我已经画图说明
        Qsort();                     //此时我们已经更新出了第二关键字,利用上一轮的rk更新本轮的sa
        swap(tp, rk);                //这里原本tp已经没有用了
        rk[sa[1]] = p = 1;
        for (int i = 2; i <= n; i++)
            rk[sa[i]] = (tp[sa[i - 1]] == tp[sa[i]] && tp[sa[i - 1] + w] == tp[sa[i] + w]) ? p : ++p;
        //这里当两个后缀上一轮排名相同时本轮也相同,至于为什么大家可以思考一下
        // Debug();
    }
}
void GetHeight()
{
    int j, k = 0;
    for (int i = 1; i <= n; i++)
    {
        if (k)
            k--;
        j = sa[rk[i] - 1];
        while (s[i + k] == s[j + k])
            k++;
        Height[rk[i]] = k;
    }
}
bool check(int len)
{
    int minn = sa[1], maxx = sa[1];
    for (int i = 2; i <= n; i++)
    {
        if (Height[i] >= len - 1)
        {
            maxx = max(maxx, sa[i]);
            minn = min(minn, sa[i]);
        }
        else
            maxx = minn = sa[i];
        if (maxx - minn >= len)
            return true;
    }
    return false;
}
int ans = 0;
int main()
{
    int num;
    while (~scanf("%d", &n) && n)
    {
        ans = 0;
        for (int i = 1; i <= n; i++)
            scanf("%d", &s[i]);
        for (int i = n; i >= 1; i--)
            s[i] -= s[i - 1] - 100;
        SuffixSort();
        GetHeight();
        int l = 1, r = (n >> 1) + 1;
        while (l < r)
        {
            int mid = (l + r) >> 1;
            if (check(mid))
            {
                l = mid + 1;
                ans = mid;
            }
            else
                r = mid;
        }
        if (ans < 5)
            printf("0\n");
        else
            printf("%d\n", ans);
    }
    return 0;
}
//--------------------DC3
#include <cstdio>
#include <algorithm>
#include <queue>
#include <iostream>
#include <cmath>
#include <cstring>
using namespace std;
#define F(x) ((x) / 3 + ((x) % 3 == 1 ? 0 : tb))
#define G(x) ((x) < tb ? (x)*3 + 1 : ((x)-tb) * 3 + 2)
const int MAXN = 200000 + 100; //n*10
int sa[MAXN];
int rk[MAXN];
int height[MAXN];
int n;
int s[MAXN];
int r[MAXN];
int wa[MAXN], wb[MAXN], wv[MAXN];
int wws[MAXN];
void sort(int *r, int *a, int *b, int n, int m)
{
    int i;
    for (i = 0; i < n; i++)
        wv[i] = r[a[i]];
    for (i = 0; i < m; i++)
        wws[i] = 0;
    for (i = 0; i < n; i++)
        wws[wv[i]]++;
    for (i = 1; i < m; i++)
        wws[i] += wws[i - 1];
    for (i = n - 1; i >= 0; i--)
        b[--wws[wv[i]]] = a[i];
    return;
}
int c0(int *r, int a, int b)
{
    return r[a] == r[b] && r[a + 1] == r[b + 1] && r[a + 2] == r[b + 2];
}
int c12(int k, int *r, int a, int b)
{
    if (k == 2)
        return r[a] < r[b] || r[a] == r[b] && c12(1, r, a + 1, b + 1);
    else
        return r[a] < r[b] || r[a] == r[b] && wv[a + 1] < wv[b + 1];
}

void dc3(int *r, int *sa, int n, int m)
{
    int i, j, *rn = r + n, *san = sa + n, ta = 0, tb = (n + 1) / 3, tbc = 0, p;
    r[n] = r[n + 1] = 0;
    for (i = 0; i < n; i++)
        if (i % 3 != 0)
            wa[tbc++] = i;
    sort(r + 2, wa, wb, tbc, m);
    sort(r + 1, wb, wa, tbc, m);
    sort(r, wa, wb, tbc, m);
    for (p = 1, rn[F(wb[0])] = 0, i = 1; i < tbc; i++)
        rn[F(wb[i])] = c0(r, wb[i - 1], wb[i]) ? p - 1 : p++;
    if (p < tbc)
        dc3(rn, san, tbc, p);
    else
        for (i = 0; i < tbc; i++)
            san[rn[i]] = i;
    for (i = 0; i < tbc; i++)
        if (san[i] < tb)
            wb[ta++] = san[i] * 3;
    if (n % 3 == 1)
        wb[ta++] = n - 1;
    sort(r, wb, wa, ta, m);
    for (i = 0; i < tbc; i++)
        wv[wb[i] = G(san[i])] = i;
    for (i = 0, j = 0, p = 0; i < ta && j < tbc; p++)
        sa[p] = c12(wb[j] % 3, r, wa[i], wb[j]) ? wa[i++] : wb[j++];
    for (; i < ta; p++)
        sa[p] = wa[i++];
    for (; j < tbc; p++)
        sa[p] = wb[j++];
    return;
}
void calheight(int *r, int *sa, int n)
{
    int i, j, k = 0;
    for (i = 1; i <= n; ++i)
        rk[sa[i]] = i;
    for (i = 0; i < n; height[rk[i++]] = k)
        for (k ? k-- : 0, j = sa[rk[i] - 1]; r[i + k] == r[j + k]; ++k)
            ;
    return;
}
bool check(int len)
{
    int minn = sa[1], maxx = sa[1];
    for (int i = 2; i <= n; i++)
    {
        if (height[i] >= len - 1)
        {
            maxx = max(maxx, sa[i]);
            minn = min(minn, sa[i]);
        }
        else
            maxx = minn = sa[i];
        if (maxx - minn >= len)
            return true;
    }
    return false;
}
int ans;
int main()
{
    int num;
    while (~scanf("%d", &n) && n)
    {
        ans = 0;
        for (int i = 0; i < n; i++)
            scanf("%d", &s[i]);
        for (int i = n - 1; i >= 0; i--)
            s[i] -= s[i - 1] - 100;
        int Max = -1;
        for (int i = 0; i < n; i++)
        {
            r[i] = s[i];
            if (r[i] > Max)
                Max = r[i];
        }
        r[n] = 0;
        dc3(r, sa, n + 1, Max + 1);
        calheight(r, sa, n);
        int l = 1, r = (n >> 1) + 1;
        while (l < r)
        {
            int mid = (l + r) >> 1;
            if (check(mid))
            {
                l = mid + 1;
                ans = mid;
            }
            else
                r = mid;
        }
        if (ans < 5)
            printf("0\n");
        else
            printf("%d\n", ans);
    }
    return 0;
}

HDU-4622 Reincarnation

Reincarnation

题意

区间内不同子串个数

#include <bits/stdc++.h>
using namespace std;
/*    freopen("k.in", "r", stdin);
    freopen("k.out", "w", stdout); */
//clock_t c1 = clock();
//std::cerr << "Time:" << clock() - c1 <<"ms" << std::endl;
//#pragma comment(linker, "/STACK:1024000000,1024000000")
#define de(a) cout << #a << " = " << a << endl
#define rep(i, a, n) for (int i = a; i <= n; i++)
#define per(i, a, n) for (int i = n; i >= a; i--)
typedef long long ll;
typedef unsigned long long ull;
typedef pair<int, int> PII;
typedef pair<double, double> PDD;
typedef vector<int, int> VII;
#define inf 0x3f3f3f3f
const ll INF = 0x3f3f3f3f3f3f3f3f;
const ll MAXN = 1e6 + 7;
const ll MAXM = 1e6 + 7;
const ll MOD = 1e9 + 7;
const double eps = 1e-6;
const double pi = acos(-1.0);
int sa[MAXN];     //rank为i的后缀的起始位置
int rk[MAXN];     //sa数组的映射
int tp[MAXN];     //基数排序的第二关键字,第二关键字排名为i的后缀的起始位置
int tax[MAXN];    //第i号元素出现了多少次,辅助基数排序
int Height[MAXN]; //排名为i的后缀与排名为i-1的后缀的最长公共前缀
/* lcp(sa[i],sa[i-1])*/
int n, m;
char s[MAXN];
/* void Debug()
{
    printf("*****************\n");
    printf("下标");
    for (int i = 1; i <= n; i++)
        printf("%d ", i);
    printf("\n");
    printf("sa  ");
    for (int i = 1; i <= n; i++)
        printf("%d ", sa[i]);
    printf("\n");
    printf("rak ");
    for (int i = 1; i <= n; i++)
        printf("%d ", rk[i]);
    printf("\n");
    printf("tp  ");
    for (int i = 1; i <= n; i++)
        printf("%d ", tp[i]);
    printf("\n");
} */
void Qsort()
{
    for (int i = 0; i <= m; i++)
        tax[i] = 0;
    for (int i = 1; i <= n; i++)
        tax[rk[i]]++;
    for (int i = 1; i <= m; i++)
        tax[i] += tax[i - 1];
    for (int i = n; i >= 1; i--)
        sa[tax[rk[tp[i]]]--] = tp[i];
}
void SuffixSort()
{
    m = 75;
    for (int i = 1; i <= n; i++)
        rk[i] = s[i] - '0' + 1, tp[i] = i;
    Qsort();
    // Debug();
    for (int w = 1, p = 0; p < n; m = p, w <<= 1)
    {
        //w:当前倍增的长度,w = x表示已经求出了长度为x的后缀的排名,现在要更新长度为2x的后缀的排名
        //p表示不同的后缀的个数,很显然原字符串的后缀都是不同的,因此p = n时可以退出循环
        p = 0; //这里的p仅仅是一个计数器
        for (int i = 1; i <= w; i++)
            tp[++p] = n - w + i;
        for (int i = 1; i <= n; i++)
            if (sa[i] > w)
                tp[++p] = sa[i] - w; //这两句是后缀数组的核心部分,我已经画图说明
        Qsort();                     //此时我们已经更新出了第二关键字,利用上一轮的rk更新本轮的sa
        swap(tp, rk);                //这里原本tp已经没有用了
        rk[sa[1]] = p = 1;
        for (int i = 2; i <= n; i++)
            rk[sa[i]] = (tp[sa[i - 1]] == tp[sa[i]] && tp[sa[i - 1] + w] == tp[sa[i] + w]) ? p : ++p;
        //这里当两个后缀上一轮排名相同时本轮也相同,至于为什么大家可以思考一下
        // Debug();
    }
}
void GetHeight()
{
    int j, k = 0;
    for (int i = 1; i <= n; i++)
    {
        if (k)
            k--;
        j = sa[rk[i] - 1];
        while (s[i + k] == s[j + k])
            k++;
        Height[rk[i]] = k;
    }
}
int st[MAXN][21];
int Query(int l, int r)
{
    int k = log2(r - l + 1);
    return min(st[l][k], st[r - (1 << k) + 1][k]);
}
int main()
{
    int t;
    scanf("%d", &t);
    while (t--)
    {
        scanf(" %s", s + 1);
        n = strlen(s + 1);
        SuffixSort();
        GetHeight();
        for (int i = 0; i <= n; i++)
            st[i][0] = Height[i];
        for (int i = 1; i <= 21; i++)
            for (int j = 1; j + (1 << i) - 1 <= n; j++)
                st[j][i] = min(st[j][i - 1], st[j + (1 << (i - 1))][i - 1]);
        //st处理出lcp(sa[i],sa[j])
        int q;
        scanf("%d", &q);
        while (q--)
        {
            int l, r;
            scanf("%d%d", &l, &r);
            int ans = (r - l + 1) * (r - l + 2) / 2;
            int cnt = 0;
            int pre = -1;
            for (int i = 1; i <= n; i++)
            {
                if (cnt == r - l + 1)
                    break;
                if (sa[i] < l || sa[i] > r)
                    continue;
                cnt++;
                if (pre == -1)
                {
                    pre = i;
                    continue;
                }
                int a = pre;
                int b = i;
                if (pre > i)
                    swap(pre, i);
                int lcp = Query(a + 1, b);
                int la = r - sa[pre] + 1;
                int lb = r - sa[i] + 1;
                if(!(la > lb && lcp >= lb))
                    pre = i;
                ans -= min(lcp, min(la, lb));
            }
            printf("%d\n", ans);
        }
    }
    return 0;
}

牛客 CSL的密码

题目链接

题意:

长度不小于k的本质不同子串数量

#include <bits/stdc++.h>
using namespace std;
/*    freopen("k.in", "r", stdin);
    freopen("k.out", "w", stdout); */
//clock_t c1 = clock();
//std::cerr << "Time:" << clock() - c1 <<"ms" << std::endl;
//#pragma comment(linker, "/STACK:1024000000,1024000000")
#define de(a) cout << #a << " = " << a << endl
#define rep(i, a, n) for (int i = a; i <= n; i++)
#define per(i, a, n) for (int i = n; i >= a; i--)
typedef long long ll;
typedef unsigned long long ull;
typedef pair<int, int> PII;
typedef pair<double, double> PDD;
typedef vector<int, int> VII;
#define inf 0x3f3f3f3f
const ll INF = 0x3f3f3f3f3f3f3f3f;
const ll MAXN = 1e6 + 7;
const ll MAXM = 1e6 + 7;
const ll MOD = 1e9 + 7;
const double eps = 1e-6;
const double pi = acos(-1.0);
int sa[MAXN];     //rank为i的后缀的起始位置
int rk[MAXN];     //sa数组的映射
int tp[MAXN];     //基数排序的第二关键字,第二关键字排名为i的后缀的起始位置
int tax[MAXN];    //第i号元素出现了多少次,辅助基数排序
int Height[MAXN]; //排名为i的后缀与排名为i-1的后缀的最长公共前缀
/* lcp(sa[i],sa[i-1]) */
int n, m; // n字符串长度  m字符集大小
char s[MAXN], t[MAXN];
/* void Debug()
{
    printf("*****************\n");
    printf("下标");
    for (int i = 1; i <= n; i++)
        printf("%d ", i);
    printf("\n");
    printf("sa  ");
    for (int i = 1; i <= n; i++)
        printf("%d ", sa[i]);
    printf("\n");
    printf("rak ");
    for (int i = 1; i <= n; i++)
        printf("%d ", rk[i]);
    printf("\n");
    printf("tp  ");
    for (int i = 1; i <= n; i++)
        printf("%d ", tp[i]);
    printf("\n");
} */
void Qsort()
{
    for (int i = 0; i <= m; i++)
        tax[i] = 0;
    for (int i = 1; i <= n; i++)
        tax[rk[i]]++;
    for (int i = 1; i <= m; i++)
        tax[i] += tax[i - 1];
    for (int i = n; i >= 1; i--)
        sa[tax[rk[tp[i]]]--] = tp[i];
}
void SuffixSort()
{
    m = 75;
    for (int i = 1; i <= n; i++)
        rk[i] = s[i] - '0' + 1, tp[i] = i;
    Qsort();
    // Debug();
    for (int w = 1, p = 0; p < n; m = p, w <<= 1)
    {
        //w:当前倍增的长度,w = x表示已经求出了长度为x的后缀的排名,现在要更新长度为2x的后缀的排名
        //p表示不同的后缀的个数,很显然原字符串的后缀都是不同的,因此p = n时可以退出循环
        p = 0; //这里的p仅仅是一个计数器
        for (int i = 1; i <= w; i++)
            tp[++p] = n - w + i;
        for (int i = 1; i <= n; i++)
            if (sa[i] > w)
                tp[++p] = sa[i] - w; //这两句是后缀数组的核心部分,我已经画图说明
        Qsort();                     //此时我们已经更新出了第二关键字,利用上一轮的rk更新本轮的sa
        swap(tp, rk);                //这里原本tp已经没有用了
        rk[sa[1]] = p = 1;
        for (int i = 2; i <= n; i++)
            rk[sa[i]] = (tp[sa[i - 1]] == tp[sa[i]] && tp[sa[i - 1] + w] == tp[sa[i] + w]) ? p : ++p;
        //这里当两个后缀上一轮排名相同时本轮也相同
        // Debug();
    }
}
void GetHeight()
{
    int j, k = 0;
    for (int i = 1; i <= n; i++)
    {
        if (k)
            k--;
        j = sa[rk[i] - 1];
        while (s[i + k] == s[j + k])
            k++;
        Height[rk[i]] = k;
    }
}
/* 本质不同的子串的数量
枚举每一个后缀,第i个后缀对答案的贡献为n-sa[i]+1-Height[i]*/
/* 长度不小于k的不同本质子串数量 */
int main()
{
    int k;
    while (~scanf("%d%d", &n, &k))
    {
        scanf(" %s", s + 1);
        n = strlen(s + 1);
        SuffixSort();
        GetHeight();
        ll ans = 0;
        for (int i = 1; i <= n; i++)
            ans += (n - sa[i] + 1) - min(max(k - 1, Height[i]), n - sa[i] + 1);
        printf("%lld\n", ans);
    }
    return 0;
}
posted @ 2019-09-24 16:20  GrayKido  阅读(182)  评论(0编辑  收藏  举报