Maximum repetition substring 后缀数组

Maximum repetition substring
Time Limit: 1000MS   Memory Limit: 65536K
Total Submissions: 7578   Accepted: 2281

Description

The repetition number of a string is defined as the maximum number R such that the string can be partitioned into R same consecutive substrings. For example, the repetition number of "ababab" is 3 and "ababa" is 1.

Given a string containing lowercase letters, you are to find a substring of it with maximum repetition number.

Input

The input consists of multiple test cases. Each test case contains exactly one line, which gives a non-empty string consisting of lowercase letters. The length of the string will not be greater than 100,000.

The last test case is followed by a line containing a '#'.

Output

For each test case, print a line containing the test case number( beginning with 1) followed by the substring of maximum repetition number. If there are multiple substrings of maximum repetition number, print the lexicographically smallest one.

Sample Input

ccabababc
daabbccaa
#

Sample Output

Case 1: ababab
Case 2: aa

Source

 
题意:给出一串字符,需要求这串字符中的最长重复子串,要是有多个,输出字典序最小的.........
 
 
 
 
  1 #include <iostream>
  2 #include <cstdio>
  3 #include <cstring>
  4 #include <cmath>
  5 #include <algorithm>
  6 #include <string>
  7 #include <vector>
  8 #include <stack>
  9 #include <queue>
 10 #include <set>
 11 #include <map>
 12 #include <list>
 13 #include <iomanip>
 14 #include <cstdlib>
 15 using namespace std;
 16 const int INF=0x5fffffff;
 17 const double EXP=1e-8;
 18 const int MS=200005;
 19 //   KMP  TRIE  DFA  SUFFIX
 20 int dp[MS][30];        //    RMQ
 21 int t1[MS],t2[MS],c[MS],v[MS];
 22 int rank[MS],sa[MS],height[MS];
 23 char str[MS],str1[MS];
 24 int s[MS];
 25 int cmp(int *r,int a,int b,int k)
 26 {
 27     return r[a]==r[b]&&r[a+k]==r[b+k];
 28 }
 29 
 30 void get_sa(int *r,int *sa,int n,int m)
 31 {
 32     int i,j,p,*x=t1,*y=t2;
 33     for(i=0;i<m;i++)
 34         c[i]=0;
 35     for(i=0;i<n;i++)
 36         c[x[i]=r[i]]++;
 37     for(i=1;i<m;i++)
 38         c[i]+=c[i-1];
 39     for(i=n-1;i>=0;i--)
 40         sa[--c[x[i]]]=i;
 41     p=1;j=1;
 42     for(;p<n;j*=2,m=p)
 43     {
 44         for(p=0,i=n-j;i<n;i++)
 45             y[p++]=i;
 46         for(i=0;i<n;i++)
 47             if(sa[i]>=j)
 48                 y[p++]=sa[i]-j;
 49         for(i=0;i<n;i++)
 50             v[i]=x[y[i]];
 51         for(i=0;i<m;i++)
 52             c[i]=0;
 53         for(i=0;i<n;i++)
 54             c[v[i]]++;
 55         for(i=1;i<m;i++)
 56             c[i]+=c[i-1];
 57         for(i=n-1;i>=0;i--)
 58             sa[--c[v[i]]]=y[i];
 59         swap(x,y);
 60         x[sa[0]]=0;
 61         for(p=1,i=1;i<n;i++)
 62             x[sa[i]]=cmp(y,sa[i-1],sa[i],j)?p-1:p++;
 63     }
 64 }
 65 
 66 void get_height(int *r,int n)
 67 {
 68     int i,j,k=0;
 69     for(i=1;i<=n;i++)
 70         rank[sa[i]]=i;
 71     //height[i]>=height[i-1]-1;
 72     for(i=0;i<n;i++)
 73     {
 74         if(k)
 75             k--;
 76         else
 77             k=0;
 78         j=sa[rank[i]-1];
 79         while(r[i+k]==r[j+k])
 80             k++;
 81         height[rank[i]]=k;
 82     }
 83 }
 84 
 85 void rmq_init(int n)
 86 {
 87     for(int i=1;i<=n;i++)  dp[i][0]=height[i];
 88     for(int j=1;(1<<j)<=n;j++)
 89     for(int i=1;i+(1<<j)-1<=n;i++)
 90     dp[i][j]=min(dp[i][j-1],dp[i+(1<<(j-1))][j-1]);
 91 }
 92 
 93 int rmq(int ll,int rr)
 94 {
 95     int k=0;
 96     ll=rank[ll];
 97     rr=rank[rr];
 98     if(ll>rr)
 99     {
100         int tmp=ll;
101         ll=rr;
102         rr=tmp;
103     }
104     ll++;
105     while((1<<(k+1))<=rr-ll+1) k++;
106     return min(dp[ll][k],dp[rr-(1<<k)+1][k]);
107 }
108 
109 int main()
110 {
111     int text=0;
112     while(scanf("%s",str)>0)
113     {
114         if(str[0]=='#')
115         break;
116         int len=strlen(str);
117         for(int i=0;i<len;i++)
118         s[i]=str[i]-'a'+1;
119         s[len]=0;
120         get_sa(s,sa,len+1,30);
121         get_height(s,len);
122         rmq_init(len);
123         int ans=0,pos=0,lenn;
124         for(int i=1;i<=len/2;i++)
125         {
126             for(int j=0;j<len-i;j+=i)
127             {
128                 if(str[j]!=str[j+i])
129                 continue;
130                 int k=rmq(j,j+i);
131                 int tol=k/i+1;
132                 //printf("%d\n",tol);
133                 int r=i-k%i;
134                 int p=j;
135                 int cnt=0;
136                 for(int m=j-1;m>j-i&&str[m]==str[m+i]&&m>=0;m--)
137                 {
138                     cnt++;
139                     if(cnt==r)
140                     {
141                         tol++;
142                         p=m;
143                     }
144                     else if(rank[p]>rank[m])
145                     {
146                         p=m;
147                     }
148                 }
149                 if(ans<tol)
150                 {
151                     ans=tol;
152                     pos=p;
153                     lenn=tol*i;
154                 }
155                 else if(ans==tol&&rank[pos]>rank[p])
156                 {
157                     pos=p;
158                     lenn=tol*i;
159                 }
160             }
161         }
162         printf("Case %d: ",++text);
163     //  printf("%d %d %d\n",ans,pos,lenn);
164         if(ans<2)                         //这里,如果字符总长度小于2,那么就在原串中找出一个最小的字符就好
165         {
166             char ch='z';
167             for(int i=0;i<len;i++)
168             if(str[i]<ch)
169             ch=str[i];
170             printf("%c\n",ch);
171             continue;
172         }
173         for(int i=pos;i<pos+lenn;i++)
174         printf("%c",str[i]);
175         printf("\n");
176     }
177     return 0;
178 }

 

posted @ 2015-03-03 01:46  daydaycode  阅读(312)  评论(0编辑  收藏  举报