POJ3693 Maximum repetition substring 后缀数组
POJ - 3693 Maximum repetition substring
题意
输入一个串,求重复次数最多的连续重复字串,如果有次数相同的,则输出字典序最小的
Sample input
ccabababc
daabbccaa
#
Sample Output
Case 1: ababab
Case 2: aa
1 #include <iostream> 2 #include <cstdio> 3 #include <cstring> 4 #include <algorithm> 5 using namespace std; 6 const int maxn = 1e5+5; 7 char s[maxn]; 8 int sa[maxn], t[maxn], t2[maxn], c[maxn]; 9 int n; 10 //构造字符串s的后缀数组, 每个字符值必须为0 ~ m-1 11 void build_sa(int m) { 12 int *x = t, *y = t2; 13 //基数排序 14 for(int i = 0; i < m; i++) c[i] = 0; 15 for(int i = 0; i < n; i++) c[x[i] = s[i]]++; 16 for(int i = 1; i < m; i++) c[i] += c[i-1]; 17 for(int i = n-1; i >= 0; i--) sa[--c[x[i]]] = i; 18 for(int k = 1; k <= n; k <<= 1) { 19 int p = 0; 20 //直接利用sa数组排序第二关键字 21 for(int i = n-k; i < n; i++) y[p++] = i; 22 for(int i = 0; i < n; i++) if(sa[i] >= k) y[p++] = sa[i] - k; 23 //基数排序第一关键字 24 for(int i = 0; i < m; i++) c[i] = 0; 25 for(int i = 0; i < n; i++) c[x[y[i]]]++; 26 for(int i = 1; i < m; i++) c[i] += c[i-1]; 27 for(int i = n-1; i>= 0; i--) sa[--c[x[y[i]]]] = y[i]; 28 //根据sa和y数组计算新的x数组 29 swap(x, y); 30 p = 1; 31 x[sa[0]] = 0; 32 for(int i = 1; i < n; i++) 33 x[sa[i]] = (y[sa[i-1]] == y[sa[i]] && y[sa[i-1]+k] == y[sa[i]+k] ? p-1 : p++); 34 if(p >= n) break; 35 m = p; 36 } 37 } 38 39 int rank_[maxn]; //rank[i]代表后缀i在sa数组中的下标 40 int height[maxn]; //height[i] 定义为sa[i-1] 和 sa[i] 的最长公共前缀 41 //后缀j和k的LCP长度等于RMQ(height, rank[j]+1, rank[k]) 42 void get_height() { 43 int i, j, k = 0; 44 for(int i = 0; i < n; i++) rank_[sa[i]] = i; 45 for(int i = 0; i < n; i++) { 46 if(!rank_[i]) continue; 47 int j = sa[rank_[i]-1]; 48 if(k) k--; 49 50 while(s[i+k] == s[j+k]) k++; 51 height[rank_[i]] = k; 52 } 53 } 54 int d[maxn][50]; 55 void rmq_init() { 56 for(int i = 0; i < n; i++) d[i][0] = height[i]; 57 for(int j = 1; (1<<j) <= n; j++) 58 for(int i = 0; i + (1<<j) - 1 < n; i++) 59 d[i][j] = min(d[i][j-1], d[i+(1<<(j-1))][j-1]); 60 } 61 int rmq(int l, int r) { 62 if(l == r) return n-l; 63 if(rank_[l] > rank_[r]) swap(l, r); 64 int L = rank_[l]+1; 65 int R = rank_[r]; 66 int k = 0; 67 while((1<<(k+1)) <= R-L+1) k++; 68 return min(d[L][k], d[R-(1<<k)+1][k]); 69 } 70 71 int a[maxn]; 72 int main() { 73 int kase = 0; 74 while(~scanf("%s", s) && s[0] != '#') { 75 int L = strlen(s); 76 n = L + 1; 77 build_sa(128); 78 get_height(); 79 rmq_init(); 80 int mx = 0; 81 int cnt = 0; 82 // 寻找重复次数最多的连续子串单个子串的长度,可能有多种重复次数相同的子串 83 for(int l = 1; l <= L; l++) { 84 for(int j = 0; j + l < L; j += l) { 85 int k = rmq(j, j + l); // lcp 86 int res = k / l + 1; 87 int pos = j - (l - (k % l)); 88 if(pos >= 0 && k % l && rmq(pos, pos + l)) res++; 89 if(res > mx) { 90 mx = res; 91 cnt = 0; 92 a[cnt++] = l; 93 } else if(res == mx) { 94 a[cnt++] = l; 95 } 96 } 97 } 98 // 找字典序最小 99 int len = 0, st; 100 for(int i = 1; i < n && !len; i++) { 101 for(int j = 0; j < cnt; j++) { 102 if(rmq(sa[i], sa[i] + a[j]) >= (mx - 1) * a[j]) { 103 len = a[j]; 104 st = sa[i]; 105 break; 106 } 107 } 108 } 109 printf("Case %d: ", ++kase); 110 for(int i = st; i < st + len * mx; i++) { 111 printf("%c", s[i]); 112 } 113 printf("\n"); 114 } 115 return 0; 116 }