[POI2000]公共串 - 后缀数组
Update 2018.1.8:sto lsy orz 给出了一个 \(O(n)\) 的做法。
Description
求若干个串的最长的公共子串的长度。
Solution
考虑将这若干个串全部拼起来,中间用一些不在字符集内的符号隔开。
然后二分答案 \(K\),如果连续的一段 \(height\) 都大于等于 \(K\),且每个串都出现了至少一次,则是可行的。
O(N)
并没有必要二分答案,前面是二分长度,然后 \(check\) 一下是否有一段连续的 \(height\) 区间使得每个值都大于等于 \(K\),且每个串都出现至少一次。
反过来,我们可以枚举左端点,然后不断向右扩展右端点,当每种串都出现恰好至少一次时,就对把答案和这一段区间的 \(\min\) 取个 \(\max\),然后区间的 \(\min\) 用单调队列维护即可。
然后可能是由于我的代码比较丑陋,且在洛谷上此题数据范围比较小,我的 \(O(N)\) 居然跑得比 \(O(N\log N)\) 慢。(笑
Code
O(NlogN)
#include <bits/stdc++.h>
using namespace std;
const int _ = 1e5 + 10;
int N, n, s[_], belong[_];
int rnk[_], sa[_], height[_];
char str[_];
void SA() {
static int t[_], a[_], buc[_], fir[_], sec[_], tmp[_];
copy(s + 1, s + N + 1, t + 1);
sort(t + 1, t + N + 1);
int *end = unique(t + 1, t + N + 1);
for (int i = 1; i <= N; ++i) a[i] = lower_bound(t + 1, end, s[i]) - t;
fill(buc + 1, buc + N + 1, 0);
for (int i = 1; i <= N; ++i) ++buc[a[i]];
for (int i = 1; i <= N; ++i) buc[i] += buc[i - 1];
for (int i = 1; i <= N; ++i) rnk[i] = buc[a[i] - 1] + 1;
for (int len = 1; len <= N; len <<= 1) {
for (int i = 1; i <= N; ++i) {
fir[i] = rnk[i];
sec[i] = i + len > N ? 0 : rnk[i + len];
}
fill(buc + 1, buc + N + 1, 0);
for (int i = 1; i <= N; ++i) ++buc[sec[i]];
for (int i = 1; i <= N; ++i) buc[i] += buc[i - 1];
for (int i = 1; i <= N; ++i) tmp[N - --buc[sec[i]]] = i;
fill(buc + 1, buc + N + 1, 0);
for (int i = 1; i <= N; ++i) ++buc[fir[i]];
for (int i = 1; i <= N; ++i) buc[i] += buc[i - 1];
for (int i, j = 1; j <= N; ++j) {
i = tmp[j];
sa[buc[fir[i]]--] = i;
}
bool same = false;
for (int i, j = 1, last = 0; j <= N; ++j) {
i = sa[j];
if (!last) rnk[i] = 1;
else if (fir[i] == fir[last] && sec[i] == sec[last])
rnk[i] = rnk[last], same = true;
else rnk[i] = rnk[last] + 1;
last = i;
}
if (!same) break;
}
for (int i = 1, k = 0; i <= N; ++i) {
if (rnk[i] == 1) k = 0;
else {
if (k > 0) --k;
int j = sa[rnk[i] - 1];
while (i + k <= N && j + k <= N && a[i + k] == a[j + k]) ++k;
}
height[rnk[i]] = k;
}
}
bool check(int k) {
static int vis[_], tot = 0;
int cnt = 0;
++tot;
for (int i = 1; i <= N; ++i) {
if (height[i] < k) cnt = 0, ++tot;
else {
if (vis[belong[sa[i]]] != tot)
vis[belong[sa[i]]] = tot, ++cnt;
if (vis[belong[sa[i - 1]]] != tot)
vis[belong[sa[i - 1]]] = tot, ++cnt;
if (cnt == n) return true;
}
}
return false;
}
int main() {
#ifndef ONLINE_JUDGE
freopen("string.in", "r", stdin);
freopen("string.out", "w", stdout);
#endif
scanf("%d", &n);
int now = 0;
for (int i = 1; i <= n; ++i) {
++now;
scanf("%s", str);
int len = strlen(str);
for (int j = now; j <= now + len - 1; ++j)
s[j] = str[j - now] - 'a' + 1, belong[j] = i;
now += len - 1;
s[++now] = i + 26;
}
N = now;
SA();
int l = 0, r = N;
while (l < r) {
int mid = (l + r + 1) >> 1;
if (check(mid)) l = mid;
else r = mid - 1;
}
printf("%d\n", l);
return 0;
}
O(N)
#include <bits/stdc++.h>
using namespace std;
const int _ = 1e5 + 10;
const int INF = 0x3f3f3f3f;
int N, n, s[_], belong[_];
int rk[_], sa[_], height[_];
char str[_];
inline void SA() {
int M = 125, p = 0; // 字符集
static int buc[_], id[_], fir[_], t[_], oldrk[_];
copy(s + 1, s + N + 1, t + 1);
sort(t + 1, t + N + 1);
int *end = unique(t + 1, t + N + 1);
for (int i = 1; i <= N; ++i) s[i] = lower_bound(t + 1, end, s[i]) - t;
for (int i = 1; i <= N; ++i) ++buc[rk[i] = s[i]];
for (int i = 1; i <= M; ++i) buc[i] += buc[i - 1];
for (int i = N; i >= 1; --i) sa[buc[rk[i]]--] = i;
for (int w = 1; w < N; w <<= 1, M = p) {
p = 0;
for (int i = N; i > N - w; --i) id[++p] = i;
for (int i = 1; i <= N; ++i)
if (sa[i] > w) id[++p] = sa[i] - w;
fill(buc + 1, buc + M + 1, 0);
for (int i = 1; i <= N; ++i) ++buc[fir[i] = rk[id[i]]];
for (int i = 1; i <= M; ++i) buc[i] += buc[i - 1];
for (int i = N; i >= 1; --i) sa[buc[fir[i]]--] = id[i];
copy(rk + 1, rk + N + 1, oldrk + 1);
p = 0;
for (int i = 1; i <= N; ++i) {
int x = sa[i], y = sa[i - 1];
rk[sa[i]] =
(oldrk[x] == oldrk[y] && oldrk[x + w] == oldrk[y + w]) ? p : ++p;
}
if (p == N) break;
}
for (int i = 1, k = 0; i <= N; ++i) {
if (rk[i] == 1)
k = 0;
else {
if (k > 0) --k;
int j = sa[rk[i] - 1];
while (i + k <= N && j + k <= N && s[i + k] == s[j + k]) ++k;
}
height[rk[i]] = k;
}
}
int ans = 0;
void solve() {
static int vis[_], cnt = 0;
static int q[_];
int l = 1, r = 1;
for (int i = 1, j = 1; i <= N; ++i) {
while (l < r && q[l] < i) ++l;
while (j <= N && cnt < n) {
if (!vis[belong[sa[j]]]) ++cnt;
++vis[belong[sa[j]]];
while (l < r && height[q[r - 1]] >= height[j]) --r;
q[r++] = j;
++j;
}
if (cnt == n)
ans = max(ans, height[q[l]]);
else
return;
if (i - 1 > 0) {
--vis[belong[sa[i - 1]]];
if (!vis[belong[sa[i - 1]]]) --cnt;
}
}
}
int main() {
#ifndef ONLINE_JUDGE
freopen("string.in", "r", stdin);
freopen("string.out", "w", stdout);
#endif
scanf("%d", &n);
int now = 0;
for (int i = 1; i <= n; ++i) {
++now;
scanf("%s", str);
int len = strlen(str);
for (int j = now; j <= now + len - 1; ++j)
s[j] = str[j - now] - 'a' + 1, belong[j] = i;
now += len - 1;
s[++now] = i + 26;
}
N = now;
SA();
solve();
printf("%d\n", ans);
return 0;
}
既然选择了远方,便只顾风雨兼程。