后缀数组
一些概念和理解:
Suffix(i): 表示从i位置到字符串末尾这个子串,(后缀数组);
sa[i]:表示排名为i的子串从哪个位置开始,即排名为i的子串为Suffix(sa[i]);
rank[i]:表示在字符串上,第i位置在后缀数组中的排名是多少;
height[i]:height[i] = Suffix(sa[i-1]) 和Suffix(sa[i])的最大公共前缀;
假设有j、k,且rank[j] < rank[k],则有:Suffix(j)和Suffix(k)的最大公共前缀为height[rank[j] + 1], height[rank[j] + 2], ... , height[rank[k]]中的最小值;
关于最长公共前缀:
求两个后缀的最长公共前缀即为求height数组中某区间上的最小值,显然这个可以用rmq问题解决。O(nlogn)的时间预处理,O(1)的时间询问。
ps:后缀数组还有好多应用,详见 罗穗骞《后缀数组——处理字符串的有力工具》
模板:
倍增算法:
#define maxn 1000001 int wa[maxn],wb[maxn],wv[maxn],ws[maxn]; int cmp(int *r,int a,int b,int l) {return r[a]==r[b]&&r[a+l]==r[b+l];} void da(int *r,int *sa,int n,int m) { int i,j,p,*x=wa,*y=wb,*t; for(i=0;i<m;i++) ws[i]=0; for(i=0;i<n;i++) ws[x[i]=r[i]]++; for(i=1;i<m;i++) ws[i]+=ws[i-1]; for(i=n-1;i>=0;i--) sa[--ws[x[i]]]=i; for(j=1,p=1;p<n;j*=2,m=p) { for(p=0,i=n-j;i<n;i++) y[p++]=i; for(i=0;i<n;i++) if(sa[i]>=j) y[p++]=sa[i]-j; for(i=0;i<n;i++) wv[i]=x[y[i]]; for(i=0;i<m;i++) ws[i]=0; for(i=0;i<n;i++) ws[wv[i]]++; for(i=1;i<m;i++) ws[i]+=ws[i-1]; for(i=n-1;i>=0;i--) sa[--ws[wv[i]]]=y[i]; for(t=x,x=y,y=t,p=1,x[sa[0]]=0,i=1;i<n;i++) x[sa[i]]=cmp(y,sa[i-1],sa[i],j)?p-1:p++; } return; } int rank[maxn],height[maxn]; void calheight(int *r,int *sa,int n) { int i,j,k=0; for(i=1;i<=n;i++) rank[sa[i]]=i; for(i=0;i<n;height[rank[i++]]=k) for(k?k--:0,j=sa[rank[i]-1];r[i+k]==r[j+k];k++); return; } int RMQ[maxn]; int mm[maxn]; int best[20][maxn]; void initRMQ(int n) { int i,j,a,b; for(i = 1; i <= n; ++i) RMQ[i] = height[i]; for(mm[0]=-1,i=1;i<=n;i++) mm[i]=((i&(i-1))==0)?mm[i-1]+1:mm[i-1]; for(i=1;i<=n;i++) best[0][i]=i; for(i=1;i<=mm[n];i++) for(j=1;j<=n+1-(1<<i);j++) { a=best[i-1][j]; b=best[i-1][j+(1<<(i-1))]; if(RMQ[a]<RMQ[b]) best[i][j]=a; else best[i][j]=b; } return; } int askRMQ(int a,int b) { int t; t=mm[b-a+1];b-=(1<<t)-1; a=best[t][a];b=best[t][b]; return RMQ[a]<RMQ[b]?a:b; } int lcp(int a,int b) { int t; a=rank[a];b=rank[b]; if(a>b) {t=a;a=b;b=t;} return(height[askRMQ(a+1,b)]); }
dc3:
#define maxn 1000003 //注意扩大 #define F(x) ((x)/3+((x)%3==1?0:tb)) #define G(x) ((x)<tb?(x)*3+1:((x)-tb)*3+2) int wa[maxn],wb[maxn],wv[maxn],ws[maxn]; int c0(int *r,int a,int b) {return r[a]==r[b]&&r[a+1]==r[b+1]&&r[a+2]==r[b+2];} int c12(int k,int *r,int a,int b) {if(k==2) return r[a]<r[b]||r[a]==r[b]&&c12(1,r,a+1,b+1); else return r[a]<r[b]||r[a]==r[b]&&wv[a+1]<wv[b+1];} void sort(int *r,int *a,int *b,int n,int m) { int i; for(i=0;i<n;i++) wv[i]=r[a[i]]; for(i=0;i<m;i++) ws[i]=0; for(i=0;i<n;i++) ws[wv[i]]++; for(i=1;i<m;i++) ws[i]+=ws[i-1]; for(i=n-1;i>=0;i--) b[--ws[wv[i]]]=a[i]; return; } void dc3(int *r,int *sa,int n,int m) { int i,j,*rn=r+n,*san=sa+n,ta=0,tb=(n+1)/3,tbc=0,p; r[n]=r[n+1]=0; for(i=0;i<n;i++) if(i%3!=0) wa[tbc++]=i; sort(r+2,wa,wb,tbc,m); sort(r+1,wb,wa,tbc,m); sort(r,wa,wb,tbc,m); for(p=1,rn[F(wb[0])]=0,i=1;i<tbc;i++) rn[F(wb[i])]=c0(r,wb[i-1],wb[i])?p-1:p++; if(p<tbc) dc3(rn,san,tbc,p); else for(i=0;i<tbc;i++) san[rn[i]]=i; for(i=0;i<tbc;i++) if(san[i]<tb) wb[ta++]=san[i]*3; if(n%3==1) wb[ta++]=n-1; sort(r,wb,wa,ta,m); for(i=0;i<tbc;i++) wv[wb[i]=G(san[i])]=i; for(i=0,j=0,p=0;i<ta && j<tbc;p++) sa[p]=c12(wb[j]%3,r,wa[i],wb[j])?wa[i++]:wb[j++]; for(;i<ta;p++) sa[p]=wa[i++]; for(;j<tbc;p++) sa[p]=wb[j++]; return; } int rank[maxn],height[maxn]; void calheight(int *r,int *sa,int n) { int i,j,k=0; for(i=1;i<=n;i++) rank[sa[i]]=i; for(i=0;i<n;height[rank[i++]]=k) for(k?k--:0,j=sa[rank[i]-1];r[i+k]==r[j+k];k++); return; } int RMQ[maxn]; int mm[maxn]; int best[20][maxn]; void initRMQ(int n) { int i,j,a,b; for(i = 1; i <= n; ++i) RMQ[i] = height[i]; for(mm[0]=-1,i=1;i<=n;i++) mm[i]=((i&(i-1))==0)?mm[i-1]+1:mm[i-1]; for(i=1;i<=n;i++) best[0][i]=i; for(i=1;i<=mm[n];i++) for(j=1;j<=n+1-(1<<i);j++) { a=best[i-1][j]; b=best[i-1][j+(1<<(i-1))]; if(RMQ[a]<RMQ[b]) best[i][j]=a; else best[i][j]=b; } return; } int askRMQ(int a,int b) { int t; t=mm[b-a+1];b-=(1<<t)-1; a=best[t][a];b=best[t][b]; return RMQ[a]<RMQ[b]?a:b; } int lcp(int a,int b) { int t; a=rank[a];b=rank[b]; if(a>b) {t=a;a=b;b=t;} return(height[askRMQ(a+1,b)]); }
POJ 1743
给点一个字符串,求最长重复子串,这两个子串不能重叠。
二分答案,判断是否存在长度为k的子串是相同的,且不重叠。在判定过程中用到一个很犀利的方法:给height数组分组,使得每组中height值都不小于k。这样判断是否存在就是判断某组中有没有两个sa[i], sa[j]的差值 >= k(也就是长度 >= k);
//#pragma comment(linker,"/STACK:327680000,327680000") #include <iostream> #include <cstdio> #include <cmath> #include <vector> #include <cstring> #include <algorithm> #include <string> #include <set> #include <functional> #include <numeric> #include <sstream> //#include <stack> #include <map> #include <queue> #define CL(arr, val) memset(arr, val, sizeof(arr)) #define REP(i, n) for((i) = 0; (i) < (n); ++(i)) #define FOR(i, l, h) for((i) = (l); (i) <= (h); ++(i)) #define FORD(i, h, l) for((i) = (h); (i) >= (l); --(i)) #define L(x) (x) << 1 #define R(x) (x) << 1 | 1 #define MID(l, r) (l + r) >> 1 #define Min(x, y) (x) < (y) ? (x) : (y) #define Max(x, y) (x) < (y) ? (y) : (x) #define E(x) (1 << (x)) #define iabs(x) (x) < 0 ? -(x) : (x) #define OUT(x) printf("%I64d\n", x) #define Read() freopen("data.in", "r", stdin) #define Write() freopen("data.out", "w", stdout); typedef long long LL; const double eps = 1e-8; const double PI = acos(-1.0); const int inf = ~0u>>2; using namespace std; const int maxn = 200010; int wa[maxn], wb[maxn], wv[maxn], WS[maxn]; int r[maxn], sa[maxn], n, k; int rank[maxn], height[maxn]; int cmp(int *r, int a, int b, int l) { return r[a] == r[b]&&r[a+l] == r[b+l]; } void da(int* r, int* sa, int n, int m) { int i, j, p, *x = wa, *y = wb, *t; for(i = 0; i < m; ++i) WS[i] = 0; for(i = 0; i < n; ++i) WS[x[i]=r[i]]++; for(i = 1; i < m; ++i) WS[i] += WS[i-1]; for(i = n - 1; i >= 0; --i) sa[--WS[x[i]]] = i; for(j = 1, p = 1; p < n; j *= 2, m = p) { for(p = 0, i = n - j; i < n; ++i) y[p++] = i; for(i = 0; i < n; ++i) if(sa[i] >= j) y[p++] = sa[i] - j; for(i = 0; i < n; ++i) wv[i] = x[y[i]]; for(i = 0; i < m; ++i) WS[i] = 0; for(i = 0; i < n; ++i) WS[wv[i]]++; for(i = 1; i < m; ++i) WS[i] += WS[i-1]; for(i = n - 1; i >= 0; --i) sa[--WS[wv[i]]] = y[i]; for(t = x, x = y, y = t, p = 1, x[sa[0]] = 0, i = 1; i < n; ++i) x[sa[i]] = cmp(y, sa[i-1], sa[i], j)?p-1:p++; } return ; } void calheight(int* r, int* sa, int n) { int i, j, k = 0; for(i = 1; i <= n; ++i) rank[sa[i]] = i; for(i = 0; i < n; height[rank[i++]] = k) for(k?k--:0, j = sa[rank[i]-1]; r[i+k] == r[j+k]; ++k); return ; } bool cal(int k) { int i, mx, mi; for(i = 2; i <= n; ++i) { if(height[i] < k) { mx = -1; mi = inf; } else { mx = max(mx, max(sa[i], sa[i-1])); mi = min(mi, min(sa[i], sa[i-1])); if(mx - mi > k) return true; } } return false; } int main() { //Read(); int i, x, p; while(scanf("%d", &n), n) { scanf("%d", &r[0]); p = r[0]; for(i = 1; i < n; ++i) { scanf("%d", &x); r[i] = x - p + 100; p = x; } da(r, sa, n + 1, 200); calheight(r, sa, n); int l = 0, r = n>>1, mid; while(r - l > 1) { mid = MID(l, r); if(cal(mid)) l = mid; else r = mid; } if(l < 4) puts("0"); else printf("%d\n", l + 1); } return 0; } //39 34 30 26 22 //82 78 74 70 66
POJ 3261
给一个字符串,求至少出现k次的最长重复子串,这k个子串可以重叠。
还是二分答案,给height分组,不过这里只需要判断一组中是否有>=k个元素。
//#pragma comment(linker,"/STACK:327680000,327680000") #include <iostream> #include <cstdio> #include <cmath> #include <vector> #include <cstring> #include <algorithm> #include <string> #include <set> #include <functional> #include <numeric> #include <sstream> //#include <stack> #include <map> #include <queue> #define CL(arr, val) memset(arr, val, sizeof(arr)) #define REP(i, n) for((i) = 0; (i) < (n); ++(i)) #define FOR(i, l, h) for((i) = (l); (i) <= (h); ++(i)) #define FORD(i, h, l) for((i) = (h); (i) >= (l); --(i)) #define L(x) (x) << 1 #define R(x) (x) << 1 | 1 #define MID(l, r) (l + r) >> 1 #define Min(x, y) (x) < (y) ? (x) : (y) #define Max(x, y) (x) < (y) ? (y) : (x) #define E(x) (1 << (x)) #define iabs(x) (x) < 0 ? -(x) : (x) #define OUT(x) printf("%I64d\n", x) #define Read() freopen("data.in", "r", stdin) #define Write() freopen("data.out", "w", stdout); typedef long long LL; const double eps = 1e-8; const double PI = acos(-1.0); const int inf = ~0u>>2; using namespace std; const int maxn = 200010; int wa[maxn], wb[maxn], wv[maxn], WS[maxn]; int r[maxn], sa[maxn], n, K; int rank[maxn], height[maxn]; int cmp(int *r, int a, int b, int l) { return r[a] == r[b]&&r[a+l] == r[b+l]; } void da(int* r, int* sa, int n, int m) { int i, j, p, *x = wa, *y = wb, *t; for(i = 0; i < m; ++i) WS[i] = 0; for(i = 0; i < n; ++i) WS[x[i]=r[i]]++; for(i = 1; i < m; ++i) WS[i] += WS[i-1]; for(i = n - 1; i >= 0; --i) sa[--WS[x[i]]] = i; for(j = 1, p = 1; p < n; j *= 2, m = p) { for(p = 0, i = n - j; i < n; ++i) y[p++] = i; for(i = 0; i < n; ++i) if(sa[i] >= j) y[p++] = sa[i] - j; for(i = 0; i < n; ++i) wv[i] = x[y[i]]; for(i = 0; i < m; ++i) WS[i] = 0; for(i = 0; i < n; ++i) WS[wv[i]]++; for(i = 1; i < m; ++i) WS[i] += WS[i-1]; for(i = n - 1; i >= 0; --i) sa[--WS[wv[i]]] = y[i]; for(t = x, x = y, y = t, p = 1, x[sa[0]] = 0, i = 1; i < n; ++i) x[sa[i]] = cmp(y, sa[i-1], sa[i], j)?p-1:p++; } return ; } void calheight(int* r, int* sa, int n) { int i, j, k = 0; for(i = 1; i <= n; ++i) rank[sa[i]] = i; for(i = 0; i < n; height[rank[i++]] = k) for(k?k--:0, j = sa[rank[i]-1]; r[i+k] == r[j+k]; ++k); return ; } bool cal(int mid) { int i, cnt = 1; for(i = 2; i <= n; ++i) { if(height[i] < mid) { cnt = 1; } else { cnt++; if(cnt >= K) return true; } } return false; } int main() { //Read(); int i ; while(~scanf("%d%d", &n, &K)) { for(i = 0; i < n; ++i) scanf("%d", r + i); da(r, sa, n + 1, 200); calheight(r, sa, n); int l = 0, r = n, mid; while(r - l > 1) { mid = MID(l, r); if(cal(mid)) l = mid; else r = mid; } printf("%d\n", l); } return 0; }
URAL 1297
给一个字符串,求最长回文串。
现将字符串反转加到原串后面,中间用一个从来没有出现过的字符隔开,求后缀数组;然后枚举每一位i,求以i为中心的回文串的长度。(len为原串的长度,n为加上反转以后字符串的长度)也就是求i(0 <= i < len)位置和n - i - 1位置的最大公共前缀长度(枚举的这个串长度为计数),或者i位置和n - i位置的最大公共前缀长度。
//#pragma comment(linker,"/STACK:327680000,327680000") #include <iostream> #include <cstdio> #include <cmath> #include <vector> #include <cstring> #include <algorithm> #include <string> #include <set> #include <functional> #include <numeric> #include <sstream> //#include <stack> #include <map> #include <queue> #define CL(arr, val) memset(arr, val, sizeof(arr)) #define REP(i, n) for((i) = 0; (i) < (n); ++(i)) #define FOR(i, l, h) for((i) = (l); (i) <= (h); ++(i)) #define FORD(i, h, l) for((i) = (h); (i) >= (l); --(i)) #define L(x) (x) << 1 #define R(x) (x) << 1 | 1 #define MID(l, r) (l + r) >> 1 #define Min(x, y) (x) < (y) ? (x) : (y) #define Max(x, y) (x) < (y) ? (y) : (x) #define E(x) (1 << (x)) #define iabs(x) (x) < 0 ? -(x) : (x) #define OUT(x) printf("%I64d\n", x) #define Read() freopen("data.in", "r", stdin) #define Write() freopen("data.out", "w", stdout); typedef long long LL; const double eps = 1e-8; const double PI = acos(-1.0); const int inf = ~0u>>2; using namespace std; const int maxn = 2012; int wa[maxn], wb[maxn], wv[maxn], WS[maxn]; int r[maxn], sa[maxn], n, K; int Rank[maxn], height[maxn]; int cmp(int *r, int a, int b, int l) { return r[a] == r[b]&&r[a+l] == r[b+l]; } void da(int* r, int* sa, int n, int m) { int i, j, p, *x = wa, *y = wb, *t; for(i = 0; i < m; ++i) WS[i] = 0; for(i = 0; i < n; ++i) WS[x[i]=r[i]]++; for(i = 1; i < m; ++i) WS[i] += WS[i-1]; for(i = n - 1; i >= 0; --i) sa[--WS[x[i]]] = i; for(j = 1, p = 1; p < n; j *= 2, m = p) { for(p = 0, i = n - j; i < n; ++i) y[p++] = i; for(i = 0; i < n; ++i) if(sa[i] >= j) y[p++] = sa[i] - j; for(i = 0; i < n; ++i) wv[i] = x[y[i]]; for(i = 0; i < m; ++i) WS[i] = 0; for(i = 0; i < n; ++i) WS[wv[i]]++; for(i = 1; i < m; ++i) WS[i] += WS[i-1]; for(i = n - 1; i >= 0; --i) sa[--WS[wv[i]]] = y[i]; for(t = x, x = y, y = t, p = 1, x[sa[0]] = 0, i = 1; i < n; ++i) x[sa[i]] = cmp(y, sa[i-1], sa[i], j)?p-1:p++; } return ; } void calheight(int* r, int* sa, int n) { int i, j, k = 0; for(i = 1; i <= n; ++i) Rank[sa[i]] = i; for(i = 0; i < n; height[Rank[i++]] = k) for(k?k--:0, j = sa[Rank[i]-1]; r[i+k] == r[j+k]; ++k); return ; } int RMQ[maxn]; int mm[maxn]; int best[20][maxn]; void initRMQ(int n) { int i, j, a, b; for(i = 1; i <= n; ++i) RMQ[i] = height[i]; for(mm[0] = -1, i = 1; i <= n; ++i) mm[i] = ((i&(i-1)) == 0) ? mm[i-1] + 1 : mm[i-1]; for(i = 1; i <= n; ++i) best[0][i] = i; for(i = 1; i <= mm[n]; ++i) { for(j = 1; j <= n + 1 - (1<<i); ++j) { a = best[i-1][j]; b = best[i-1][j+(1<<(i-1))]; if(RMQ[a] < RMQ[b]) best[i][j] = a; else best[i][j] = b; } } return ; } int askRMQ(int a, int b) { int t; t = mm[b-a+1]; b -= (1<<t) - 1; a = best[t][a]; b = best[t][b]; return RMQ[a] < RMQ[b] ? a : b; } int lcp(int a, int b) { a = Rank[a]; b = Rank[b]; if(a > b) swap(a, b); return height[askRMQ(a + 1, b)]; } char ss[maxn]; int main() { //Read(); while(~scanf("%s", ss)) { int len, i; len = strlen(ss); n = 0; for(i = 0; i < len; ++i) r[n++] = ss[i]; r[n++] = '$'; for(i = len - 1; i >= 0; --i) r[n++] = ss[i]; r[n] = 0; /* for(i = 0; i < n; ++i) printf("%c", r[i]); cout << endl; printf("%d\n", n); */ da(r, sa, n + 1, 129); calheight(r, sa, n); initRMQ(n); int tmp, p = 0, ans = 0; for(i = 0; i < len; ++i) { tmp = lcp(i, n - i - 1); if((tmp<<1) - 1 > ans) { ans = (tmp<<1) - 1; p = i - tmp + 1; } tmp = lcp(i, n - i); if((tmp<<1) > ans) { ans = (tmp<<1); p = i - tmp; } } for(i = p; i < ans + p; ++i) { printf("%c", r[i]); } puts(""); } return 0; }
POJ 2406
给一个字符串L,一直这个字符串是由某个字符串S重复R次得到的,求最大的R。
枚举S的长度k,判断Suffix(0)和Suffix(k)的最长公共前缀是否为n - k。(n%k == 0);
找Suffix(k) 和 Suffix(0)的最长公共前缀时可以预处理出每个位置到Rank[0]位置的最小height值。
ps:这题只能dc3过,倍增TLE,T_T
详见代码:
//#pragma comment(linker,"/STACK:327680000,327680000") #include <iostream> #include <cstdio> #include <cmath> #include <vector> #include <cstring> #include <algorithm> #include <string> #include <set> #include <functional> #include <numeric> #include <sstream> //#include <stack> #include <map> #include <queue> #define CL(arr, val) memset(arr, val, sizeof(arr)) #define REP(i, n) for((i) = 0; (i) < (n); ++(i)) #define FOR(i, l, h) for((i) = (l); (i) <= (h); ++(i)) #define FORD(i, h, l) for((i) = (h); (i) >= (l); --(i)) #define L(x) (x) << 1 #define R(x) (x) << 1 | 1 #define MID(l, r) (l + r) >> 1 #define Min(x, y) (x) < (y) ? (x) : (y) #define Max(x, y) (x) < (y) ? (y) : (x) #define E(x) (1 << (x)) #define iabs(x) (x) < 0 ? -(x) : (x) #define OUT(x) printf("%I64d\n", x) #define Read() freopen("data.in", "r", stdin) #define Write() freopen("data.out", "w", stdout); #define F(x) ((x)/3+((x)%3==1?0:tb)) //Suffix Array dc3 #define G(x) ((x)<tb?(x)*3+1:((x)-tb)*3+2) typedef long long LL; const double eps = 1e-8; const double PI = acos(-1.0); const int inf = ~0u>>2; using namespace std; const int maxn = 3e6+9; int wa[maxn], wb[maxn], wv[maxn], WS[maxn]; int r[maxn], sa[maxn], n, K; int rank[maxn], height[maxn]; int c0(int *r,int a,int b) {return r[a]==r[b]&&r[a+1]==r[b+1]&&r[a+2]==r[b+2];} int c12(int k,int *r,int a,int b) {if(k==2) return r[a]<r[b]||r[a]==r[b]&&c12(1,r,a+1,b+1); else return r[a]<r[b]||r[a]==r[b]&&wv[a+1]<wv[b+1];} void sort(int *r,int *a,int *b,int n,int m) { int i; for(i=0;i<n;i++) wv[i]=r[a[i]]; for(i=0;i<m;i++) WS[i]=0; for(i=0;i<n;i++) WS[wv[i]]++; for(i=1;i<m;i++) WS[i]+=WS[i-1]; for(i=n-1;i>=0;i--) b[--WS[wv[i]]]=a[i]; return; } void dc3(int *r,int *sa,int n,int m) { int i,j,*rn=r+n,*san=sa+n,ta=0,tb=(n+1)/3,tbc=0,p; r[n]=r[n+1]=0; for(i=0;i<n;i++) if(i%3!=0) wa[tbc++]=i; sort(r+2,wa,wb,tbc,m); sort(r+1,wb,wa,tbc,m); sort(r,wa,wb,tbc,m); for(p=1,rn[F(wb[0])]=0,i=1;i<tbc;i++) rn[F(wb[i])]=c0(r,wb[i-1],wb[i])?p-1:p++; if(p<tbc) dc3(rn,san,tbc,p); else for(i=0;i<tbc;i++) san[rn[i]]=i; for(i=0;i<tbc;i++) if(san[i]<tb) wb[ta++]=san[i]*3; if(n%3==1) wb[ta++]=n-1; sort(r,wb,wa,ta,m); for(i=0;i<tbc;i++) wv[wb[i]=G(san[i])]=i; for(i=0,j=0,p=0;i<ta && j<tbc;p++) sa[p]=c12(wb[j]%3,r,wa[i],wb[j])?wa[i++]:wb[j++]; for(;i<ta;p++) sa[p]=wa[i++]; for(;j<tbc;p++) sa[p]=wb[j++]; return; } void calheight(int *r,int *sa,int n) { int i,j,k=0; for(i=1;i<=n;i++) rank[sa[i]]=i; for(i=0;i<n;height[rank[i++]]=k) for(k?k--:0,j=sa[rank[i]-1];r[i+k]==r[j+k];k++); return; } /* int wa[maxn], wb[maxn], wv[maxn], WS[maxn]; int r[maxn], sa[maxn], n, K; int rank[maxn], height[maxn]; int cmp(int *r, int a, int b, int l) { return r[a] == r[b]&&r[a+l] == r[b+l]; } void da(int* r, int* sa, int n, int m) { int i, j, p, *x = wa, *y = wb, *t; for(i = 0; i < m; ++i) WS[i] = 0; for(i = 0; i < n; ++i) WS[x[i]=r[i]]++; for(i = 1; i < m; ++i) WS[i] += WS[i-1]; for(i = n - 1; i >= 0; --i) sa[--WS[x[i]]] = i; for(j = 1, p = 1; p < n; j *= 2, m = p) { for(p = 0, i = n - j; i < n; ++i) y[p++] = i; for(i = 0; i < n; ++i) if(sa[i] >= j) y[p++] = sa[i] - j; for(i = 0; i < n; ++i) wv[i] = x[y[i]]; for(i = 0; i < m; ++i) WS[i] = 0; for(i = 0; i < n; ++i) WS[wv[i]]++; for(i = 1; i < m; ++i) WS[i] += WS[i-1]; for(i = n - 1; i >= 0; --i) sa[--WS[wv[i]]] = y[i]; for(t = x, x = y, y = t, p = 1, x[sa[0]] = 0, i = 1; i < n; ++i) x[sa[i]] = cmp(y, sa[i-1], sa[i], j)?p-1:p++; } return ; } void calheight(int* r, int* sa, int n) { int i, j, k = 0; for(i = 1; i <= n; ++i) rank[sa[i]] = i; for(i = 0; i < n; height[rank[i++]] = k) for(k?k--:0, j = sa[rank[i]-1]; r[i+k] == r[j+k]; ++k); return ; }*/ /* int RMQ[maxn]; int mm[maxn]; int best[20][maxn]; void initRMQ(int n) { int i, j, a, b; for(i = 1; i <= n; ++i) RMQ[i] = height[i]; for(mm[0] = -1, i = 1; i <= n; ++i) mm[i] = ((i&(i-1)) == 0) ? mm[i-1] + 1 : mm[i-1]; for(i = 1; i <= n; ++i) best[0][i] = i; for(i = 1; i <= mm[n]; ++i) { for(j = 1; j <= n + 1 - (1<<i); ++j) { a = best[i-1][j]; b = best[i-1][j+(1<<(i-1))]; if(RMQ[a] < RMQ[b]) best[i][j] = a; else best[i][j] = b; } } return ; } int askRMQ(int a, int b) { int t; t = mm[b-a+1]; b -= (1<<t) - 1; a = best[t][a]; b = best[t][b]; return RMQ[a] < RMQ[b] ? a : b; } int lcp(int a, int b) { a = rank[a]; b = rank[b]; if(a > b) swap(a, b); return height[askRMQ(a + 1, b)]; } */ char ss[maxn]; int rmq[maxn]; int main() { //Read(); while(~scanf("%s", ss)) { if(ss[0] == '.') break ; n = strlen(ss); int i, t, ans; for(i = 0; i < n; ++i) { r[i] = ss[i]; } r[n] = 0; dc3(r, sa, n + 1, 129); calheight(r, sa, n); t = rank[0]; rmq[t] = inf; for(i = t - 1; i >= 1; --i) { rmq[i] = min(rmq[i+1], height[i+1]); } for(i = t + 1; i <= n; ++i) { rmq[i] = min(rmq[i-1], height[i]); } ans = 0; for(int k = 1; k <= n; ++k) { if(n%k) continue; if(rmq[rank[k]] == n - k) { ans = n/k; break; } } printf("%d\n", ans); } return 0; }
POJ 2274 && URAL 1517
给两个字符串,求最长公共子串。
将两个串合并成一个串,中间用一个没有出现过的字符隔开。构造后缀数组,然后找height数组中的最大值height[i](这个最大值必须满足sa[i-1]和sa[i]不在同一个串内)
//#pragma comment(linker,"/STACK:327680000,327680000") #include <iostream> #include <cstdio> #include <cmath> #include <vector> #include <cstring> #include <algorithm> #include <string> #include <set> #include <functional> #include <numeric> #include <sstream> //#include <stack> #include <map> #include <queue> #define CL(arr, val) memset(arr, val, sizeof(arr)) #define REP(i, n) for((i) = 0; (i) < (n); ++(i)) #define FOR(i, l, h) for((i) = (l); (i) <= (h); ++(i)) #define FORD(i, h, l) for((i) = (h); (i) >= (l); --(i)) #define L(x) (x) << 1 #define R(x) (x) << 1 | 1 #define MID(l, r) (l + r) >> 1 #define Min(x, y) (x) < (y) ? (x) : (y) #define Max(x, y) (x) < (y) ? (y) : (x) #define E(x) (1 << (x)) #define iabs(x) (x) < 0 ? -(x) : (x) #define OUT(x) printf("%I64d\n", x) #define Read() freopen("data.in", "r", stdin) #define Write() freopen("data.out", "w", stdout); #define F(x) ((x)/3+((x)%3==1?0:tb)) //Suffix Array dc3 #define G(x) ((x)<tb?(x)*3+1:((x)-tb)*3+2) typedef long long LL; const double eps = 1e-8; const double PI = acos(-1.0); const int inf = ~0u>>2; using namespace std; const int maxn = 600010; int wa[maxn], wb[maxn], wv[maxn], WS[maxn]; int r[maxn], sa[maxn], n, K; int rank[maxn], height[maxn]; int c0(int *r,int a,int b) {return r[a]==r[b]&&r[a+1]==r[b+1]&&r[a+2]==r[b+2];} int c12(int k,int *r,int a,int b) {if(k==2) return r[a]<r[b]||r[a]==r[b]&&c12(1,r,a+1,b+1); else return r[a]<r[b]||r[a]==r[b]&&wv[a+1]<wv[b+1];} void sort(int *r,int *a,int *b,int n,int m) { int i; for(i=0;i<n;i++) wv[i]=r[a[i]]; for(i=0;i<m;i++) WS[i]=0; for(i=0;i<n;i++) WS[wv[i]]++; for(i=1;i<m;i++) WS[i]+=WS[i-1]; for(i=n-1;i>=0;i--) b[--WS[wv[i]]]=a[i]; return; } void dc3(int *r,int *sa,int n,int m) { int i,j,*rn=r+n,*san=sa+n,ta=0,tb=(n+1)/3,tbc=0,p; r[n]=r[n+1]=0; for(i=0;i<n;i++) if(i%3!=0) wa[tbc++]=i; sort(r+2,wa,wb,tbc,m); sort(r+1,wb,wa,tbc,m); sort(r,wa,wb,tbc,m); for(p=1,rn[F(wb[0])]=0,i=1;i<tbc;i++) rn[F(wb[i])]=c0(r,wb[i-1],wb[i])?p-1:p++; if(p<tbc) dc3(rn,san,tbc,p); else for(i=0;i<tbc;i++) san[rn[i]]=i; for(i=0;i<tbc;i++) if(san[i]<tb) wb[ta++]=san[i]*3; if(n%3==1) wb[ta++]=n-1; sort(r,wb,wa,ta,m); for(i=0;i<tbc;i++) wv[wb[i]=G(san[i])]=i; for(i=0,j=0,p=0;i<ta && j<tbc;p++) sa[p]=c12(wb[j]%3,r,wa[i],wb[j])?wa[i++]:wb[j++]; for(;i<ta;p++) sa[p]=wa[i++]; for(;j<tbc;p++) sa[p]=wb[j++]; return; } void calheight(int *r,int *sa,int n) { int i,j,k=0; for(i=1;i<=n;i++) rank[sa[i]]=i; for(i=0;i<n;height[rank[i++]]=k) for(k?k--:0,j=sa[rank[i]-1];r[i+k]==r[j+k];k++); return; } /* int wa[maxn], wb[maxn], wv[maxn], WS[maxn]; int r[maxn], sa[maxn], n, K; int rank[maxn], height[maxn]; int cmp(int *r, int a, int b, int l) { return r[a] == r[b]&&r[a+l] == r[b+l]; } void da(int* r, int* sa, int n, int m) { int i, j, p, *x = wa, *y = wb, *t; for(i = 0; i < m; ++i) WS[i] = 0; for(i = 0; i < n; ++i) WS[x[i]=r[i]]++; for(i = 1; i < m; ++i) WS[i] += WS[i-1]; for(i = n - 1; i >= 0; --i) sa[--WS[x[i]]] = i; for(j = 1, p = 1; p < n; j *= 2, m = p) { for(p = 0, i = n - j; i < n; ++i) y[p++] = i; for(i = 0; i < n; ++i) if(sa[i] >= j) y[p++] = sa[i] - j; for(i = 0; i < n; ++i) wv[i] = x[y[i]]; for(i = 0; i < m; ++i) WS[i] = 0; for(i = 0; i < n; ++i) WS[wv[i]]++; for(i = 1; i < m; ++i) WS[i] += WS[i-1]; for(i = n - 1; i >= 0; --i) sa[--WS[wv[i]]] = y[i]; for(t = x, x = y, y = t, p = 1, x[sa[0]] = 0, i = 1; i < n; ++i) x[sa[i]] = cmp(y, sa[i-1], sa[i], j)?p-1:p++; } return ; } void calheight(int* r, int* sa, int n) { int i, j, k = 0; for(i = 1; i <= n; ++i) rank[sa[i]] = i; for(i = 0; i < n; height[rank[i++]] = k) for(k?k--:0, j = sa[rank[i]-1]; r[i+k] == r[j+k]; ++k); return ; }*/ /* int RMQ[maxn]; int mm[maxn]; int best[20][maxn]; void initRMQ(int n) { int i, j, a, b; for(i = 1; i <= n; ++i) RMQ[i] = height[i]; for(mm[0] = -1, i = 1; i <= n; ++i) mm[i] = ((i&(i-1)) == 0) ? mm[i-1] + 1 : mm[i-1]; for(i = 1; i <= n; ++i) best[0][i] = i; for(i = 1; i <= mm[n]; ++i) { for(j = 1; j <= n + 1 - (1<<i); ++j) { a = best[i-1][j]; b = best[i-1][j+(1<<(i-1))]; if(RMQ[a] < RMQ[b]) best[i][j] = a; else best[i][j] = b; } } return ; } int askRMQ(int a, int b) { int t; t = mm[b-a+1]; b -= (1<<t) - 1; a = best[t][a]; b = best[t][b]; return RMQ[a] < RMQ[b] ? a : b; } int lcp(int a, int b) { a = rank[a]; b = rank[b]; if(a > b) swap(a, b); return height[askRMQ(a + 1, b)]; } */ char s1[maxn], s2[maxn]; int main() { //Read(); int i, len, ans, p; while(~scanf("%s", s1)) { if(s1[0] == '#') break; scanf("%s", s2); len = strlen(s1); for(n = 0, i = 0; s1[i]; ++i) r[n++] = s1[i]; r[n++] = '$'; for(i = 0; s2[i]; ++i) r[n++] = s2[i]; r[n++] = 0; dc3(r, sa, n + 1, 128); calheight(r, sa, n); ans = 0; for(i = 2; i <= n; ++i) { if(height[i] > ans && ((sa[i-1] < len && sa[i] > len) || (sa[i-1] > len && sa[i] < len))) { ans = height[i]; p = i; } } printf("%d\n", ans); } return 0; }
POJ 3415
http://www.cnblogs.com/vongang/archive/2012/11/20/2778481.html