POJ - 3693 Maximum repetition substring (后缀数组)

The repetition number of a string is defined as the maximum number R such that the string can be partitioned into R same consecutive substrings. For example, the repetition number of "ababab" is 3 and "ababa" is 1.

Given a string containing lowercase letters, you are to find a substring of it with maximum repetition number.

Input

The input consists of multiple test cases. Each test case contains exactly one line, which
gives a non-empty string consisting of lowercase letters. The length of the string will not be greater than 100,000.

The last test case is followed by a line containing a '#'.

Output

For each test case, print a line containing the test case number( beginning with 1) followed by the substring of maximum repetition number. If there are multiple substrings of maximum repetition number, print the lexicographically smallest one.

Sample Input

ccabababc
daabbccaa
#

Sample Output

Case 1: ababab
Case 2: aa

题意:

重复次数最多的连续重复子串所在的子串

思路:

https://www.cnblogs.com/ZGQblogs/p/11176264.html

通过这篇博客找出重复次数最多的连续重复子串出现的次数.

然后在后缀数组的前缀里寻找符合条件的子串.因为后缀数组已经按字典序排好序,所以找到后立即退出.

具体详见代码注释:

#include<iostream>
#include<algorithm>
#include<vector>
#include<stack>
#include<queue>
#include<map>
#include<set>
#include<cstdio>
#include<cstring>
#include<cmath>
#include<ctime>

#define fuck(x) cerr<<#x<<" = "<<x<<endl;
#define debug(a, x) cerr<<#a<<"["<<x<<"] = "<<a[x]<<endl;
#define ls (t<<1)
#define rs ((t<<1)|1)
using namespace std;
typedef long long ll;
typedef unsigned long long ull;
const int maxn = 100086;
const int maxm = 100086;
const int inf = 0x3f3f3f3f;
const ll Inf = 999999999999999999;
const int mod = 1000000007;
const double eps = 1e-6;
const double pi = acos(-1);

char s[maxn];
int len, Rank[maxn], sa[maxn], tlen, tmp[maxn];
bool compare_sa(int i, int j) {
    if (Rank[i] != Rank[j]) { return Rank[i] < Rank[j]; }
    //如果以i开始,长度为k的字符串的长度,已经超出了字符串尾,那么就赋值为-1
    //这是因为,在前面所有数据相同的情况下,字符串短的字典序小.
    int ri = i + tlen <= len ? Rank[i + tlen] : -inf;
    int rj = j + tlen <= len ? Rank[j + tlen] : -inf;
    return ri < rj;
}
void construct_sa() {
    //初始的RANK为字符的ASCII码
    for (int i = 0; i <= len; i++) {
        sa[i] = i;
        Rank[i] = i < len ? s[i] : -inf;
    }
    for (tlen = 1; tlen <= len; tlen *= 2) {
        sort(sa, sa + len + 1, compare_sa);
        tmp[sa[0]] = 0;
        //全新版本的RANK,tmp用来计算新的rank
        //将字典序最小的后缀rank计为0
        //sa之中表示的后缀都是有序的,所以将下一个后缀与前一个后缀比较,如果大于前一个后缀,rank就比前一个加一.
        //否则就和前一个相等.
        for (int i = 1; i <= len; i++) {
            tmp[sa[i]] = tmp[sa[i - 1]] + (compare_sa(sa[i - 1], sa[i]) ? 1 : 0);
        }
        for (int i = 0; i <= len; i++) {
            Rank[i] = tmp[i];

        }
    }
}
int height[maxn];
void construct_lcp() {
//    for(int i=0;i<=n;i++){Rank[sa[i]]=i;}
    int h = 0;
    height[0] = 0;
    for (int i = 0; i < len; i++) {//i为后缀数组起始位置
        int j = sa[Rank[i] - 1];//获取当前后缀的前一个后缀(排序后)
        if (h > 0)h--;
        for (; j + h < len && i + h < len; h++) {
            if (s[j + h] != s[i + h])break;
        }
        height[Rank[i]] = h;
    }
}
int st[maxn][20];
void rmq_init() {
    for (int i = 1; i <= len; i++) {
        st[i][0] = height[i];
    }
    int l = 2;
    for (int i = 1; l <= len; i++) {
        for (int j = 1; j + l / 2 <= len; j++) {
            st[j][i] = min(st[j][i - 1], st[j + l / 2][i - 1]);
        }
        l <<= 1;
    }
}
int ask_min(int i, int j) {
    int k = int(log(j - i + 1.0) / log(2.0));
    return min(st[i][k], st[j - (1 << k) + 1][k]);
}
int lcp(int a, int b) {//此处参数是,原字符串下标
    a = Rank[a], b = Rank[b];
    if (a > b)
        swap(a, b);
    return ask_min(a + 1, b);
}
vector<int>ansl;
int main() {
    int cases=0;
    while (scanf("%s",s)!=EOF){
        ansl.clear();
        len = strlen(s);
        cases++;
        if(len==1&&s[0]=='#'){ break;}
        construct_sa();
        construct_lcp();
        rmq_init();
        int ansx,ans;
        ansx=ans=0;
        for(int i=1;i<=len;i++){
            bool flag = true;
            for(int j=0;j+i<len;j+=i){
                ans=lcp(j,j+i);
                int k=j-(i-ans%i);
                ans=ans/i+1;
                if(k>=0&&lcp(k,k+i)>=i){ans++;}

                if(ans==ansx){
                    if(flag){
                        ansl.push_back(i);
                        flag = false;
                    }
                }else if(ans>ansx){
                    ansx=ans;
                    ansl.clear();
                    ansl.push_back(i);
                    flag=false;
                }
            }
        }

        int siz = ansl.size();
        bool flag = false;
        for(int i=1;i<=len;i++){
            for(int j=0;j<siz;j++){
                int l = ansl[j];
                if(sa[i]+l<len&&lcp(sa[i],sa[i]+l)>=(ansx-1)*l){//核心代码,如果sa[i]和sa[i]+l的公共前缀中包含了ansx-1个l,
                    // 说明sa[i]的前缀中已经包含了ansx个l
                    ans = sa[i];
                    s[ans+ansx*l]=0;
                    flag=true;
                }
            }
            if(flag){ break;}
        }
        printf("Case %d: %s\n",cases,s+ans);
    }

    return 0;
}
View Code

 

 

Maximum repetition substring

 POJ - 3693 

posted @ 2019-07-12 16:47  断腿三郎  阅读(163)  评论(0编辑  收藏  举报