后缀数组练习题若干
POJ 1743 不可重叠最长重复子串
二分答案。 即子串的长度,假设为k时。
利用height数组,将排序后的后缀分为若干组。
每组内的height值都不小于k。
然后只需查看组内是否有满足要求的两个不会产生重叠的子串即可。
#include <iostream> #include <cstdio> #include <cstring> #include <vector> #include <set> #include <queue> #include <algorithm> #define MAXN 22222 #define MAXM 111 #define INF 1000000000 using namespace std; int r[MAXN]; int wa[MAXN], wb[MAXN], wv[MAXN], tmp[MAXN]; int sa[MAXN]; //index range 1~n value range 0~n-1 int cmp(int *r, int a, int b, int l) { return r[a] == r[b] && r[a + l] == r[b + l]; } void da(int *r, int *sa, int n, int m) { int i, j, p, *x = wa, *y = wb, *ws = tmp; for (i = 0; i < m; i++) ws[i] = 0; for (i = 0; i < n; i++) ws[x[i] = r[i]]++; for (i = 1; i < m; i++) ws[i] += ws[i - 1]; for (i = n - 1; i >= 0; i--) sa[--ws[x[i]]] = i; for (j = 1, p = 1; p < n; j *= 2, m = p) { for (p = 0, i = n - j; i < n; i++) y[p++] = i; for (i = 0; i < n; i++) if (sa[i] >= j) y[p++] = sa[i] - j; for (i = 0; i < n; i++) wv[i] = x[y[i]]; for (i = 0; i < m; i++) ws[i] = 0; for (i = 0; i < n; i++) ws[wv[i]]++; for (i = 1; i < m; i++) ws[i] += ws[i - 1]; for (i = n - 1; i >= 0; i--) sa[--ws[wv[i]]] = y[i]; for (swap(x, y), p = 1, x[sa[0]] = 0, i = 1; i < n; i++) x[sa[i]] = cmp(y, sa[i - 1], sa[i], j) ? p - 1 : p++; } } int rank[MAXN]; //index range 0~n-1 value range 1~n int height[MAXN]; //index from 1 (height[1] = 0) void calheight(int *r, int *sa, int n) { int i, j, k = 0; for (i = 1; i <= n; ++i) rank[sa[i]] = i; for (i = 0; i < n; height[rank[i++]] = k) for (k ? k-- : 0, j = sa[rank[i] - 1]; r[i + k] == r[j + k]; ++k); return; } int n, a[MAXN]; bool check(int mid, int n) { int flag = 0; int mx = -1, mi = n; for(int i = 2; i <= n + 1; i++) { if((i == n + 1 && flag) || (height[i] < mid && flag)) { flag = 0; mx = max(mx, sa[i - 1]); mi = min(mi, sa[i - 1]); if(mx - mi >= mid) return true; mi = n, mx = -1; } else if(height[i] >= mid) { flag = 1; mx = max(mx, sa[i - 1]); mi = min(mi, sa[i - 1]); } } return false; } int main() { while(scanf("%d", &n) != EOF && n) { for(int i = 0; i < n; i++) scanf("%d", &a[i]); for(int i = 0; i < n - 1; i++) r[i] = a[i + 1] - a[i] + 89; r[--n] = 0; da(r, sa, n + 1, 200); calheight(r, sa, n); int low = 4, high = n / 2, ans = 0; while(low <= high) { int mid = (low + high) >> 1; if(check(mid, n)) { low = mid + 1; ans = max(ans, mid); } else high = mid - 1; } if(ans < 4) printf("0\n"); else printf("%d\n", ans + 1); } return 0; }
POJ 3261 可重叠的出现K次的最长重复子串
还是二分子串长度。 后缀分为若干组,然后判断是否有一个组的size不小于k
#include <iostream> #include <cstdio> #include <cstring> #include <vector> #include <set> #include <queue> #include <algorithm> #define MAXN 22222 #define MAXM 111 #define INF 1000000000 using namespace std; int r[MAXN]; int wa[MAXN], wb[MAXN], wv[MAXN], tmp[MAXN]; int sa[MAXN]; //index range 1~n value range 0~n-1 int cmp(int *r, int a, int b, int l) { return r[a] == r[b] && r[a + l] == r[b + l]; } void da(int *r, int *sa, int n, int m) { int i, j, p, *x = wa, *y = wb, *ws = tmp; for (i = 0; i < m; i++) ws[i] = 0; for (i = 0; i < n; i++) ws[x[i] = r[i]]++; for (i = 1; i < m; i++) ws[i] += ws[i - 1]; for (i = n - 1; i >= 0; i--) sa[--ws[x[i]]] = i; for (j = 1, p = 1; p < n; j *= 2, m = p) { for (p = 0, i = n - j; i < n; i++) y[p++] = i; for (i = 0; i < n; i++) if (sa[i] >= j) y[p++] = sa[i] - j; for (i = 0; i < n; i++) wv[i] = x[y[i]]; for (i = 0; i < m; i++) ws[i] = 0; for (i = 0; i < n; i++) ws[wv[i]]++; for (i = 1; i < m; i++) ws[i] += ws[i - 1]; for (i = n - 1; i >= 0; i--) sa[--ws[wv[i]]] = y[i]; for (swap(x, y), p = 1, x[sa[0]] = 0, i = 1; i < n; i++) x[sa[i]] = cmp(y, sa[i - 1], sa[i], j) ? p - 1 : p++; } } int rank[MAXN]; //index range 0~n-1 value range 1~n int height[MAXN]; //index from 1 (height[1] = 0) void calheight(int *r, int *sa, int n) { int i, j, k = 0; for (i = 1; i <= n; ++i) rank[sa[i]] = i; for (i = 0; i < n; height[rank[i++]] = k) for (k ? k-- : 0, j = sa[rank[i] - 1]; r[i + k] == r[j + k]; ++k); return; } int n, k; bool check(int mid) { int cnt = 1; for(int i = 2; i <= n; i++) { if(height[i] < mid) cnt = 1; else cnt++; if(cnt >= k) return 1; } return 0; } int main() { int m = 0; scanf("%d%d", &n, &k); for(int i = 0; i < n; i++) { scanf("%d", &r[i]); r[i]++; m = max(r[i], m); } r[n] = 0; da(r, sa, n + 1, m + 1); calheight(r, sa, n); int low = 1, high = n; int ans = 0; while(low <= high) { int mid = (low + high) >> 1; if(check(mid)) { ans = max(ans, mid); low = mid + 1; } else high = mid - 1; } printf("%d\n", ans); return 0; }
SPOJ SUBST1 求一个串中不同子串的个数
每个子串都是某个后缀的前缀
对于一个后缀。 它将产生n - sa[k]个前缀
但是有height[k]个前缀是跟前一个字符串的前缀相同。
故每个后缀的贡献是n - sa[k] - height[k]
求和即可
#include <iostream> #include <cstdio> #include <cstring> #include <vector> #include <set> #include <queue> #include <algorithm> #define MAXN 55555 #define MAXM 111 #define INF 1000000000 using namespace std; int r[MAXN]; int wa[MAXN], wb[MAXN], wv[MAXN], tmp[MAXN]; int sa[MAXN]; //index range 1~n value range 0~n-1 int cmp(int *r, int a, int b, int l) { return r[a] == r[b] && r[a + l] == r[b + l]; } void da(int *r, int *sa, int n, int m) { int i, j, p, *x = wa, *y = wb, *ws = tmp; for (i = 0; i < m; i++) ws[i] = 0; for (i = 0; i < n; i++) ws[x[i] = r[i]]++; for (i = 1; i < m; i++) ws[i] += ws[i - 1]; for (i = n - 1; i >= 0; i--) sa[--ws[x[i]]] = i; for (j = 1, p = 1; p < n; j *= 2, m = p) { for (p = 0, i = n - j; i < n; i++) y[p++] = i; for (i = 0; i < n; i++) if (sa[i] >= j) y[p++] = sa[i] - j; for (i = 0; i < n; i++) wv[i] = x[y[i]]; for (i = 0; i < m; i++) ws[i] = 0; for (i = 0; i < n; i++) ws[wv[i]]++; for (i = 1; i < m; i++) ws[i] += ws[i - 1]; for (i = n - 1; i >= 0; i--) sa[--ws[wv[i]]] = y[i]; for (swap(x, y), p = 1, x[sa[0]] = 0, i = 1; i < n; i++) x[sa[i]] = cmp(y, sa[i - 1], sa[i], j) ? p - 1 : p++; } } int rank[MAXN]; //index range 0~n-1 value range 1~n int height[MAXN]; //index from 1 (height[1] = 0) void calheight(int *r, int *sa, int n) { int i, j, k = 0; for (i = 1; i <= n; ++i) rank[sa[i]] = i; for (i = 0; i < n; height[rank[i++]] = k) for (k ? k-- : 0, j = sa[rank[i] - 1]; r[i + k] == r[j + k]; ++k); return; } char s[MAXN]; int main() { int T; scanf("%d", &T); while(T--) { scanf("%s", s); int n = strlen(s); int m = 0; for(int i = 0; i < n; i++) { r[i] = (int)s[i]; m = max(m, r[i]); } r[n] = 0; da(r, sa, n + 1, m + 1); calheight(r, sa, n); long long ans = 0; for(int i = 1; i <= n; i++) ans += n - sa[i] - height[i]; printf("%lld\n", ans); } return 0; }
URAL 1297 求最长回文串
假设原串为S,将原串倒置后是T。
建立一个新串S+“~”+T
然后对新串做后缀数组。
然后我们枚举的是回文串的中心。
假设中心的位置为i。
有两种情况
回文为奇数
那么求lcp(i, n - i - 1)
回文为偶数那么求lcp(i, n - i)
然后更新最优解即可
用手画一画就知道是什么意思了。
#include <iostream> #include <cstdio> #include <cstring> #include <vector> #include <set> #include <queue> #include <algorithm> #define MAXN 111111 #define MAXM 111 #define INF 1000000000 using namespace std; int r[MAXN]; int wa[MAXN], wb[MAXN], wv[MAXN], tmp[MAXN]; int sa[MAXN]; //index range 1~n value range 0~n-1 int cmp(int *r, int a, int b, int l) { return r[a] == r[b] && r[a + l] == r[b + l]; } void da(int *r, int *sa, int n, int m) { int i, j, p, *x = wa, *y = wb, *ws = tmp; for (i = 0; i < m; i++) ws[i] = 0; for (i = 0; i < n; i++) ws[x[i] = r[i]]++; for (i = 1; i < m; i++) ws[i] += ws[i - 1]; for (i = n - 1; i >= 0; i--) sa[--ws[x[i]]] = i; for (j = 1, p = 1; p < n; j *= 2, m = p) { for (p = 0, i = n - j; i < n; i++) y[p++] = i; for (i = 0; i < n; i++) if (sa[i] >= j) y[p++] = sa[i] - j; for (i = 0; i < n; i++) wv[i] = x[y[i]]; for (i = 0; i < m; i++) ws[i] = 0; for (i = 0; i < n; i++) ws[wv[i]]++; for (i = 1; i < m; i++) ws[i] += ws[i - 1]; for (i = n - 1; i >= 0; i--) sa[--ws[wv[i]]] = y[i]; for (swap(x, y), p = 1, x[sa[0]] = 0, i = 1; i < n; i++) x[sa[i]] = cmp(y, sa[i - 1], sa[i], j) ? p - 1 : p++; } } int rank[MAXN]; //index range 0~n-1 value range 1~n int height[MAXN]; //index from 1 (height[1] = 0) void calheight(int *r, int *sa, int n) { int i, j, k = 0; for (i = 1; i <= n; ++i) rank[sa[i]] = i; for (i = 0; i < n; height[rank[i++]] = k) for (k ? k-- : 0, j = sa[rank[i] - 1]; r[i + k] == r[j + k]; ++k); return; } int Log[MAXN]; int mi[MAXN][20]; void rmqinit(int n) { for(int i = 1; i <= n; i++) mi[i][0] = height[i]; int m = Log[n]; for(int i = 1; i <= m; i++) for(int j = 1; j <= n; j++) { mi[j][i] = mi[j][i - 1]; if(j + (1 << (i - 1)) <= n) mi[j][i] = min(mi[j][i], mi[j + (1 << (i - 1))][i - 1]); } } int lcp(int a, int b) { a = rank[a]; b = rank[b]; if(a > b) swap(a,b); a ++; int t = Log[b - a + 1]; return min(mi[a][t] , mi[b - (1<<t) + 1][t]); } char s[MAXN * 2]; int main() { Log[1] = 0; for(int i = 2; i < MAXN; i++) Log[i] = Log[i >> 1] + 1; while(scanf("%s", s) != EOF) { int len = strlen(s); for(int i = 0; i < len; i++) r[i] = (int)s[i]; r[len] = 128; for(int i = 0; i < len; i++) r[len + 1 + i] = (int)s[len - 1 - i]; int n = 2 * len + 1; r[n] = 0; da(r, sa, n + 1, 130); calheight(r, sa, n); rmqinit(n); int ans = 0; int pos; for(int i = 0; i < len; i++) { int tmp = lcp(i, n - i - 1); //奇数 if(tmp * 2 - 1 > ans) { ans= tmp * 2 - 1; pos = i - tmp + 1; } tmp = lcp(i, n - i); //偶数 if(tmp * 2 > ans) { ans = tmp * 2; pos = i - tmp; } } for(int i = 0; i < ans; i++) putchar(s[pos + i]); puts(""); } return 0; }
给定一个字符串S,已知该串是由某串重复K次 连接得到的。
求最大的k
这题的话。 貌似POJ上暴力跑的很快。
用后缀数组需要的求是枚举子串的长度。
假设长度为len, 那么检查lcp(0, len)是否等于n - len即可
倍增在这里被卡掉了
用的DC3
#include <iostream> #include <cstdio> #include <cstring> #include <vector> #include <set> #include <queue> #include <cmath> #include <algorithm> #define MAXN 1111111 #define MAXM 111 #define INF 1000000000 #define F(x) ((x)/3+((x)%3==1?0:tb)) #define G(x) ((x)<tb?(x)*3+1:((x)-tb)*3+2) using namespace std; int wa[MAXN] , wb[MAXN] , wv[MAXN] , tmp[MAXN]; int c0(int *r, int a, int b){ return r[a] == r[b] && r[a + 1] == r[b + 1] && r[a + 2] == r[b + 2]; } int c12(int k, int *r, int a, int b){ if (k == 2) return r[a] < r[b] || r[a] == r[b] && c12(1, r, a + 1, b + 1); else return r[a] < r[b] || r[a] == r[b] && wv[a + 1] < wv[b + 1]; } void sort(int *r, int *a, int *b, int n, int m) { int i; for (i = 0; i < n; i++) wv[i] = r[a[i]]; for (i = 0; i < m; i++) tmp[i] = 0; for (i = 0; i < n; i++) tmp[wv[i]]++; for (i = 1; i < m; i++) tmp[i] += tmp[i-1]; for (i = n-1; i >= 0; i--) b[--tmp[wv[i]]] = a[i]; } void dc3(int *r, int *sa, int n, int m) { int i, j, *rn = r + n; int *san = sa + n, ta = 0, tb = (n + 1) / 3, tbc = 0, p; r[n] = r[n + 1] = 0; for (i = 0; i < n; i++) if (i % 3 != 0) wa[tbc++] = i; sort(r + 2, wa, wb, tbc, m); sort(r + 1, wb, wa, tbc, m); sort(r, wa, wb, tbc, m); for (p = 1, rn[F(wb[0])] = 0, i = 1; i < tbc; i++) rn[F(wb[i])] = c0(r, wb[i-1], wb[i]) ? p-1 : p++; if (p < tbc) dc3(rn, san, tbc, p); else for (i = 0; i < tbc; i++) san[rn[i]] = i; for (i = 0; i < tbc; i++) if (san[i] < tb) wb[ta++] = san[i] * 3; if (n % 3 == 1) wb[ta++] = n-1; sort(r, wb, wa, ta, m); for (i = 0; i < tbc; i++) wv[wb[i] = G(san[i])] = i; for (i = 0, j = 0, p = 0; i < ta && j < tbc; p++) sa[p] = c12(wb[j] % 3, r, wa[i], wb[j]) ? wa[i++] : wb[j++]; for (; i < ta; p++) sa[p] = wa[i++]; for (; j < tbc; p++) sa[p] = wb[j++]; } void da(int str[], int sa[], int rank[], int height[], int n, int m) { // for (int i = n; i < n * 3; i++) // str[i] = 0; dc3 (str , sa , n + 1 , m); int i, j, k; for (i = 0; i < n; i++){ sa[i] = sa[i + 1]; rank[sa[i]] = i; } for (i = 0, j = 0, k = 0; i < n; height[rank[i ++]] = k) if (rank[i] > 0) for (k ? k--: 0 , j = sa[rank[i]-1]; i + k < n && j + k < n && str[i + k] == str[j + k]; k++); } int lcp[MAXN]; int r[MAXN]; int sa[MAXN], rank[MAXN] , height[MAXN]; int n; void getlcp() { int k = rank[0]; lcp[k] = n; for(int i = k; i >= 2; i--) lcp[i - 1] = min(lcp[i], height[i]); for(int i = k + 1; i <= n; i++) lcp[i] = min(lcp[i - 1], height[i]); } char s[MAXN]; bool ok(int k) { int rk = rank[k]; if(lcp[rk] == n - k) return true; return false; } int main() { while(gets(s)) { if(s[0] == '.') break; n = strlen(s); for(int i = 0; i <= n; i++) r[i] = s[i]; da(r, sa, rank, height, n + 1, 130); getlcp(); int tmp = (int)sqrt(n + 0.5); int ans = 0; for(int i = 1; i <= tmp; i++) { if(n % i != 0) continue; if(ok(i)) ans = max(ans, n / i); if(ok(n / i)) ans = max(ans, i); } printf("%d\n", ans); } return 0; }