Maximum repetition substring POJ - 3693 (后缀数组)

The repetition number of a string is defined as the maximum number R such that the string can be partitioned into R same consecutive substrings. For example, the repetition number of "ababab" is 3 and "ababa" is 1.

Given a string containing lowercase letters, you are to find a substring of it with maximum repetition number.

Input

The input consists of multiple test cases. Each test case contains exactly one line, which
gives a non-empty string consisting of lowercase letters. The length of the string will not be greater than 100,000.

The last test case is followed by a line containing a '#'.

Output

For each test case, print a line containing the test case number( beginning with 1) followed by the substring of maximum repetition number. If there are multiple substrings of maximum repetition number, print the lexicographically smallest one.

Sample Input

ccabababc
daabbccaa
#

Sample Output

Case 1: ababab
Case 2: aa

题意:给出字符串,求出其中重复次数最多的,然后字典序最小的子串。

思路:我们可以枚举循环节长度,对于一个循环节长度为mid的子串,那么假设其中一个位置为pos,那么pos+mid必然也在其中,那么就是求两个最长公共前缀了,如果
最长公共前缀是mid的整数倍,那么答案就是lcp/mid+1,如果不是,那可能前面还有一部分可以使得匹配次数增加,那么就将pos前推,再次比较一次,更新答案。
对于字典序,我们可以把符合要求的所有答案长度记录下来,然后从1到n枚举sa数组的位置,然后看看RMQ(rk【sa[i]】,rk【sa[i]+len】) 是否符合(ans-1)*len,是就停下


  1 #include<iostream>
  2 #include<cstdio>
  3 #include<cmath>
  4 #include<cstring>
  5 using namespace std;
  6 
  7 const int maxn = 1e5+5;
  8 char s[maxn];
  9 int sa[maxn],t[maxn],t2[maxn],c[maxn],n;
 10 
 11 void build_sa(int n,int m)
 12 {
 13     int i,*x=t,*y=t2;
 14     for(i=0; i<m; i++)c[i]=0;
 15     for(i=0; i<n; i++)c[x[i]=s[i]]++;
 16     for(i=1; i<m; i++)c[i] += c[i-1];
 17     for(i=n-1; i>=0; i--)sa[--c[x[i]]] = i;
 18     for(int k=1; k<=n; k<<=1)
 19     {
 20         int p=0;
 21         for(i=n-k; i<n; i++)y[p++]=i;
 22         for(i=0; i<n; i++)if(sa[i] >= k)y[p++] = sa[i]-k;
 23         for(i=0; i<m; i++)c[i] = 0;
 24         for(i=0; i<n; i++)c[x[y[i]]]++;
 25         for(i=1; i<m; i++)c[i] += c[i-1];
 26         for(i=n-1; i>=0; i--)sa[--c[x[y[i]]]] = y[i];
 27         swap(x,y);
 28         p=1;
 29         x[sa[0]]=0;
 30         for(i=1; i<n; i++)
 31             x[sa[i]] = y[sa[i-1]] == y[sa[i]] && y[sa[i-1]+k] == y[sa[i]+k]?p-1:p++;
 32         if(p>=n)break;
 33         m = p;
 34     }
 35 }
 36 
 37 int rk[maxn],ht[maxn];
 38 void getHeight(int n)
 39 {
 40     int i,j,k=0;
 41     for(i=1; i<=n; i++)rk[sa[i]] = i;
 42     for(i=0; i<n; i++)
 43     {
 44         if(k)k--;
 45         int j=sa[rk[i]-1];
 46         while(s[i+k] == s[j+k])k++;
 47         ht[rk[i]] = k;
 48     }
 49 }
 50 
 51 int pre[maxn],f[maxn][20];
 52 void ST_pre()
 53 {
 54     int t = log2(n)+1;
 55     for(int i=1; i<=n; i++)f[i][0] = ht[i];
 56     for(int i=1; i<t; i++)
 57     {
 58         for(int j=1; j<=n-(1<<i)+1; j++)
 59         {
 60             f[j][i] = min(f[j][i-1],f[j+(1<<(i-1))][i-1]);
 61         }
 62     }
 63 }
 64 int RMQ(int x,int y)
 65 {
 66     if(x > y)swap(x,y);
 67     x++;
 68     int k = pre[y-x+1];
 69     return min(f[x][k],f[y-(1<<k)+1][k]);
 70 }
 71 
 72 int cmp(int pos1,int pos2)
 73 {
 74     for(int i=0; pos1+i<n && pos2+i<n; i++)
 75     {
 76         if(s[pos1+i] < s[pos2+i])return -1;
 77         else if(s[pos1+i] > s[pos2+i])return 1;
 78     }
 79     return 0;
 80 }
 81 int anslen[maxn],pos,poslen,ans,top;
 82 void check()
 83 {
 84     ans = top = poslen = 0;
 85     for(int len = 1; len<n; len++)
 86     {
 87         for(int i=0; i+len < n; i+=len)
 88         {
 89             int lcp = RMQ(rk[i],rk[i+len]);
 90             int tmpans = lcp/len+1;
 91             int tmpres = len - lcp % len;
 92             tmpres = i - tmpres;
 93             if(tmpres >= 0 && lcp % len)
 94             {
 95                 if(RMQ(rk[tmpres],rk[tmpres+len]) > lcp)tmpans++;
 96             }
 97             if(tmpans > ans)
 98             {
 99                 ans = tmpans;
100                 top = 0;
101                 anslen[++top] = len;
102             }
103             else if(tmpans == ans)
104             {
105                 //printf("%d    %d ===\n",tmpans,ans);
106                 anslen[++top] = len;
107             }
108         }
109     }
110     for(int i=1;i<=n;i++)
111     {
112         for(int j=1;j<=top;j++)
113         {
114             if(RMQ(rk[sa[i]],rk[sa[i]+anslen[j]]) >= (ans-1)*anslen[j])
115             {
116                 pos = sa[i];
117                 poslen = ans*anslen[j];
118                 break;
119             }
120         }
121         if(poslen)break;
122     }
123     for(int i=pos; i<pos+poslen; i++)
124     {
125         printf("%c",s[i]);
126     }
127     puts("");
128 }
129 
130 int main()
131 {
132     for(int i=1; i<=maxn; i++)pre[i] = log2(i);
133     int cas = 0;
134     while(~scanf("%s",s)  && s[0] != '#')
135     {
136         n = strlen(s);
137         s[n] = 0;
138         build_sa(n+1,180);
139         getHeight(n);
140         ST_pre();
141         printf("Case %d: ",++cas);
142         check();
143     }
144 }
View Code

 

posted @ 2019-09-07 22:06  进击的黑仔  阅读(237)  评论(0编辑  收藏  举报