poj3693 Maximum repetition substring

http://poj.org/problem?id=3693

Description

The repetition number of a string is defined as the maximum number R such that the string can be partitioned into R same consecutive substrings. For example, the repetition number of "ababab" is 3 and "ababa" is 1.

Given a string containing lowercase letters, you are to find a substring of it with maximum repetition number.

Input

The input consists of multiple test cases. Each test case contains exactly one line, which
gives a non-empty string consisting of lowercase letters. The length of the string will not be greater than 100,000.

The last test case is followed by a line containing a '#'.

Output

For each test case, print a line containing the test case number( beginning with 1) followed by the substring of maximum repetition number. If there are multiple substrings of maximum repetition number, print the lexicographically smallest one.

Sample Input

ccabababc
daabbccaa
#

Sample Output

Case 1: ababab
Case 2: aa

 

题目大意:

给定一个字符串,求重复次数最多的连续重复子串。如果存在多个,则输出字典序最小的一个。

 

解题报告:

  后缀数组。枚举重复子串的长度len,求出循环节长度为k时最大的重复次数f[k]。具体做法是:对整个字符串分组,每k个一组,在每一组找一个元素i,则只用求出r[i]和r[i+k]往前往后各最多能匹配多远,用height数组求出i与i+k的最长公共前缀t,那么i最多往后跳t个位置,往前最多跳k-(t mod k)个位置(加起来已经大于或等于k,再往前匹配就没有必要了)。于是再求r[i-(k-t%k)]与r[i+k-(k-t%k)]的最长公共前缀,设为x,则最大循环次数为(x/k+1)。

  求出f数组后,把所有满足重复次数最多的子串的长度存在另一个数组b里。为了保证字典序最小,在sa数组里从头开始for答案的起始位置i,for数组b里所有长度j,再用后缀数组判断以i开头,循环节长度为j的子串是否满足条件,找到就直接输出就可以了。

  复杂度 O(n/1+n/2+n/3+n/4+...+n/n)=O(n logn)。

代码:

View Code
  1 #include<cstdio>
  2 #include<cstdlib>
  3 #include<cstring>
  4 #include<cmath>
  5 using namespace std;
  6 #define maxn 100010
  7 #define min(a,b) (a)<(b)?(a):(b)
  8 #define max(a,b) (a)>(b)?(a):(b)
  9 
 10 int wa[maxn],wb[maxn],wv[maxn],ws[maxn],h[maxn],rank[maxn],sa[maxn],r[maxn],n,m,ans,st[maxn][20],f[maxn],b[maxn];
 11 char c[maxn];
 12 void swap(int &x,int &y)
 13 {
 14     int t=x;x=y,y=t;
 15 }
 16 bool cmp(int *r,int i,int j,int l)
 17 {
 18     return r[i]==r[j] && r[i+l]==r[j+l];
 19 }
 20 void calc(int n)
 21 {
 22     int i,j,k=0;
 23     for (i=1;i<=n;i++) rank[sa[i]]=i;
 24     for (i=1;i<=n;h[rank[i++]]=k)
 25         for (k?k--:0,j=sa[rank[i]-1];r[i+k]==r[j+k];k++);
 26 }
 27 void da(int n)
 28 {
 29     int i,j,p,*x=wa,*y=wb,*t;
 30     for (i=0;i<=m;i++) ws[i]=0;
 31     for (i=1;i<=n;i++) ws[x[i]=r[i]]++;
 32     for (i=1;i<=m;i++) ws[i]+=ws[i-1];
 33     for (i=n;i;i--) sa[ws[x[i]]--]=i;
 34     for (j=1,p=1;p<=n;m=p,j<<=1)
 35     {
 36         for (p=1,i=n-j+1;i<=n;i++) y[p++]=i;
 37         for (i=1;i<=n;i++) if (sa[i]>j) y[p++]=sa[i]-j;
 38         for (i=0;i<=m;i++) ws[i]=0;
 39         for (i=1;i<=n;i++) ws[wv[i]=x[y[i]]]++;
 40         for (i=1;i<=m;i++) ws[i]+=ws[i-1];
 41         for (i=n;i;i--) sa[ws[wv[i]]--]=y[i];
 42         for (t=x,x=y,y=t,i=p=2,x[sa[1]]=1;i<=n;i++)
 43             x[sa[i]]=cmp(y,sa[i-1],sa[i],j)?p-1:p++;
 44     }
 45     for (i=1;i<n;i++) sa[i]=sa[i+1];
 46     calc(n-1);
 47 }
 48 void init_rmq()
 49 {
 50     memset(st,0x3f,sizeof(st));
 51     for (int i=n;i;i--)
 52     {
 53         st[i][0]=h[i];
 54         if (i<n) st[i][1]=min(h[i],h[i+1]);
 55         for (int j=2;j<=20;j++)
 56             if (i+(1<<(j-1))<=n)
 57                 st[i][j]=min(st[i][j-1],st[i+(1<<(j-1))][j-1]);
 58     }
 59 }
 60 int get_min(int x,int y)
 61 {
 62     int i,p;
 63     if (x>y) swap(x,y);
 64     x++;
 65     if (x==y) return st[x][0];
 66     else if (x==y-1) return st[x][1];
 67     i=(int)(log((y-x+1)*1.0)/log(2.0));
 68     p=y-(1<<i)+1;
 69     return min(st[x][i],st[p][i]);
 70 }
 71 void check(int k)
 72 {
 73     for (int x=0;x<(n-1)/k;x++)
 74     {
 75         int i=x*k+1,j=i+k,t=get_min(rank[i],rank[j]);
 76         f[k]=max(f[k],t/k+1);
 77         if (t%k)
 78         {
 79             t=get_min(rank[i-(k-t%k)],rank[j-(k-t%k)]);
 80             f[k]=max(f[k],t/k+1);
 81         }
 82     }
 83 }
 84 void output()
 85 {
 86     for (int i=1;i<=n;i++)
 87         for (int j=1;j<=m;j++)
 88         {
 89             int x=get_min(i,rank[sa[i]+b[j]]);
 90             if (x>=(ans-1)*b[j])
 91             {
 92                 for (int k=sa[i];k<sa[i]+ans*b[j];k++) printf("%c",c[k]);
 93                 return;
 94             }
 95         }
 96 }
 97 
 98 int main()
 99 {
100     int _=0;
101     scanf("%s",c+1);
102     while (c[1]!='#')
103     {
104         printf("Case %d: ",++_);
105         m=0,ans=0;
106         memset(f,0,sizeof(f));
107         for (n=1;c[n];r[n]=(int)c[n],++n);
108         for (int i=1;i<=n;i++) if (r[i]>m) m=r[i];
109         r[n]=0;
110         da(n);
111         n--,m=0;
112         init_rmq();
113         for (int i=1;i<=n/2;i++) check(i);
114         for (int i=1;i<=n/2;i++) if (f[i]>ans) ans=f[i];
115         for (int i=1;i<=n/2;i++) if (f[i]==ans) b[++m]=i;
116         output();
117         puts("");
118         scanf("%s",c+1);
119     }
120     return 0;
121 }

 

posted @ 2013-03-18 18:54  wangziyun  阅读(287)  评论(1编辑  收藏  举报
神奇的东西