POJ-3693/HDU-2459 Maximum repetition substring 最多重复次数的子串(需要输出具体子串,按字典序)
http://acm.hdu.edu.cn/showproblem.php?pid=2459
之前hihocoder那题可以算出最多重复次数,但是没有输出子串。一开始以为只要基于那个,每次更新答案的时候按SA更新子串起始位置和长度就好了,但实际发现那个解法在计算重复次数的时候并不会遍历每一个合法串(比如daeaea,枚举长度为2时,只会计算eaea,漏了aeae)所以不能直接在计算过程中直接得出答案。
那难道又要退化成枚举每个起始位置了么?其实不然,我们可以记录答案支持的所有长度(这个数组不会太大),扫描SA,每个SA都枚举长度,一旦发现满足直接输出并结束,另外特判一下只出现一次的重复串(即ascii最小的单字符)。
#include <cstdio> #include <string> #include <iostream> #include <set> #include <algorithm> #include <vector> #include <map> #include <cstring> #include <queue> #include <cmath> #define LL int using namespace std; const LL N = 100055; string s; int lx[N]; class SF { //N:数组大小 public: int x[N], y[N], c[N]; int Height[N], str[N], SA[N], Rank[N];//Height数组从2开始,SA记录Rank=i的下标 int slen; int m;//字符集处理大小(传入如果不是数字,需要做位移转换) bool cmp(int* r, int a, int b, int l) { return r[a] == r[b] && r[a + l] == r[b + l]; } void Suffix(int n) { ++n; int i, j, p; for (i = 0; i < m; ++i) c[i] = 0; for (i = 0; i < n; ++i) c[x[i] = str[i]]++; for (i = 1; i < m; ++i) c[i] += c[i - 1]; for (i = n - 1; i >= 0; --i) SA[--c[x[i]]] = i; for (j = 1; j <= n; j <<= 1) { p = 0; for (i = n - j; i < n; ++i) y[p++] = i; for (i = 0; i < n; ++i) if (SA[i] >= j) y[p++] = SA[i] - j; for (i = 0; i < m; ++i) c[i] = 0; for (i = 0; i < n; ++i) c[x[y[i]]]++; for (i = 1; i < m; ++i) c[i] += c[i - 1]; for (i = n - 1; i >= 0; --i) SA[--c[x[y[i]]]] = y[i]; swap(x, y); p = 1; x[SA[0]] = 0; for (i = 1; i < n; ++i) { x[SA[i]] = cmp(y, SA[i - 1], SA[i], j) ? p - 1 : p++; } if (p >= n)break; m = p; } int k = 0; n--; for (i = 0; i <= n; ++i) Rank[SA[i]] = i; for (i = 0; i < n; ++i) { if (k)--k; j = SA[Rank[i] - 1]; while (str[i + k] == str[j + k])++k; Height[Rank[i]] = k; //cout << k << endl; } } static const int bitlen = 25; LL lg2(LL p)//计算log2(n) { return (LL)(log(p) / log(2)); } LL dp[bitlen][N]; LL bit[bitlen]; void initRMQ()//初始化 { bit[0] = 1; for (int i = 1; i < bitlen; i++) bit[i] = 2 * bit[i - 1]; for (int i = 0; i <= slen; i++) dp[0][i] = Height[i]; dp[0][0] = dp[0][1] = 0; for (LL i = 1; bit[i] < slen + 1; i++) for (LL j = 0; j + bit[i] <= slen + 1; j++) dp[i][j] = min(dp[i - 1][j], dp[i - 1][j + bit[i - 1]]); } LL query(LL l, LL r)//查询两个Rank之间的lcp { if (r == l) return slen - SA[l]; if (l > r) swap(l, r); l++; LL mig = lg2(r - l + 1.0); return min(dp[mig][l], dp[mig][r - bit[mig] + 1]); } void init(string &s) { m=1050; slen = s.size(); for (int i = 0; i < slen; i++) str[i] = s[i] - 'a' + 2;//如果是字符,映射成从1开始的序列 str[slen] = 1;//1作为结束符,防止越界 Suffix(slen); initRMQ(); } void solve()//cascnacnacdsdaj { int ans = 2,al,st=-1; int cnt=0; set<char>fuck; for(int i=0; i<slen; i++) fuck.insert(s[i]); for (int len = 1; len <= slen; len++) { for (int i = 0; i+len< slen; i+=len) { int r1 = Rank[i], r2 = Rank[i+ len]; int lcp = query(r1, r2); int sta=i; if(ans<=lcp / len + 1) { if(ans<lcp / len + 1) { cnt=0; lx[cnt++]=len; } else lx[cnt++]=len; ans=lcp / len + 1; } if (i - len + lcp%len >= 0) { sta=i - len + lcp%len; if(sta==i-len)sta++; lcp = query(Rank[sta], Rank[sta+len]); if(ans<=lcp / len + 1) { if(ans<lcp / len + 1) { cnt=0; lx[cnt++]=len; } else lx[cnt++]=len; ans=lcp / len + 1; } } } } int pos=-1,lxl; for(int i=1; i<=slen; i++) { for(int j=0;j<cnt;j++) { int sta=SA[i]; if(sta+lx[j]>=slen)break; int rk=Rank[sta+lx[j]]; int lcp = query(i, rk); if(ans==lcp / lx[j] + 1) { for(int i=0; i<ans*lx[j]; i++) cout<<s[i+sta]; cout<<endl; goto li; } } } cout<<*fuck.begin()<<endl; return; li: ; } } sf; int main() { cin.sync_with_stdio(false); //freopen("in.txt","r",stdin); //freopen("o1.txt","w",stdout); int cas=1; while (cin >> s) { if(s=="#")break; cout<<"Case "<<cas++<<": "; sf.init(s); sf.solve(); } return 0; }