SP705 SUBST1 - New Distinct Substrings
知识点: SAM,后缀树,SA
原题面 Luogu
分析题意
SAM
SAM 板子背诵检查。
一个字符串唯一对应一个状态,\(ans = \sum\limits_i{\operatorname{len}(i)-\operatorname{len}(\operatorname{link}(i))}\)。
后缀树
一种显然的做法是建出后缀树,答案即未压缩信息的后缀树中的节点个数。
在缩链成边的同时维护边中包含的节点个数即可。
总复杂度 \(O(n)\)。
后缀数组
另一种想法是用所有子串的个数 \(\frac{n(n+1)}{2}\) 减去重复子串的个数,显然重复的串一定出现在某两个后缀的公共前缀部分。
考虑后缀树上重复统计部分的位置,有下图所示:
观察其中的单色区域的形态,可以考虑增量法统计答案,按照字典序依次将所有后缀加入到后缀树中。
考虑加入 \(sa_i\) 后,新增的本质不同的子串的数量,显然即 \(\operatorname{dep}(sa_i) - \operatorname{dep}(\operatorname{lca}(sa_i, sa_{i-1}))\),代表不作为之前加入的,\(sa_i\) 的前缀的数量。
字典序相邻的节点的 \(lca\) 的深度即为 SA 中的 \(\operatorname{height}\),则最终答案即:
\[\frac{n(n+1)}{2} - \sum_{i = 2}^{n}\operatorname{height}_i
\]
SA 简单实现即可,总复杂度 \(O(n)\sim O(n\log n)\),依赖于实现。
代码实现
SAM
//知识点:SAM
/*
By:Luckyblock
*/
#include <algorithm>
#include <cctype>
#include <cstdio>
#include <cstring>
#define ll long long
const int kMaxn = 5e4 + 10;
const int kMaxm = 300 + 10;
//=============================================================
int n, last = 1, node_num = 1, ans;
int ch[kMaxn << 1][kMaxm], link[kMaxn << 1], len[kMaxn << 1];
char S[kMaxn];
//=============================================================
inline int read() {
int f = 1, w = 0;
char ch = getchar();
for (; !isdigit(ch); ch = getchar())
if (ch == '-') f = -1;
for (; isdigit(ch); ch = getchar()) w = (w << 3) + (w << 1) + (ch ^ '0');
return f * w;
}
void GetMax(int &fir, int sec) {
if (sec > fir) fir = sec;
}
void GetMin(int &fir, int sec) {
if (sec < fir) fir = sec;
}
void Insert(int c_) {
int p = last, now = last = ++ node_num;
len[now] = len[p] + 1;
for (; p && ! ch[p][c_]; p = link[p]) ch[p][c_] = now;
if (! p) {link[now] = 1; return ;}
int q = ch[p][c_];
if (len[q] == len[p] + 1) {link[now] = q; return ;}
int newq = ++ node_num;
memcpy(ch[newq], ch[q], sizeof (ch[q]));
link[newq] = link[q], len[newq] = len[p] + 1;
link[q] = link[now] = newq;
for (; p && ch[p][c_] == q; p = link[p]) ch[p][c_] = newq;
}
void Init() {
last = node_num = 1;
ans = 0;
memset(ch, 0, sizeof (ch));
memset(link, 0, sizeof (link));
memset(len, 0, sizeof (len));
}
//=============================================================
int main() {
int T = read();
while (T --) {
Init();
scanf("%s", S + 1);
n = strlen(S + 1);
for (int i = 1; i <= n; ++ i) Insert(S[i]);
for (int i = 2; i <= node_num; ++ i) ans += len[i] - len[link[i]];
printf("%d\n", ans);
}
return 0;
}
SA
//知识点:SA
/*
By:Luckyblock
*/
#include <algorithm>
#include <cctype>
#include <cstdio>
#include <cstring>
#define LL long long
const int kN = 1e5 + 10;
//=============================================================
char s[kN];
int n, m, sa[kN], rk[kN << 1], oldrk[kN << 1], height[kN];
int id[kN], cnt[kN], rkid[kN];
//=============================================================
inline int read() {
int f = 1, w = 0;
char ch = getchar();
for (; !isdigit(ch); ch = getchar())
if (ch == '-') f = -1;
for (; isdigit(ch); ch = getchar()) {
w = (w << 3) + (w << 1) + (ch ^ '0');
}
return f * w;
}
void Chkmax(int &fir_, int sec_) {
if (sec_ > fir_) fir_ = sec_;
}
void Chkmin(int &fir_, int sec_) {
if (sec_ < fir_) fir_ = sec_;
}
bool cmp(int x_, int y_, int w_) {
return oldrk[x_] == oldrk[y_] &&
oldrk[x_ + w_] == oldrk[y_ + w_];
}
void GetHeight() {
for (int i = 1, k = 0; i <= n; ++ i) {
if (rk[i] == 1) k = 0;
else {
if (k > 0) -- k;
int j = sa[rk[i] - 1];
while (i + k <= n && j + k <=n &&
s[i + k] == s[j + k]) {
++ k;
}
}
height[rk[i]] = k;
}
}
void SuffixSort() {
scanf("%s", s + 1);
m = 300;
n = strlen(s + 1);
memset(cnt, 0, sizeof (cnt));
for (int i = 1; i <= n; ++ i) cnt[rk[i] = s[i]] ++;
for (int i = 1; i <= m; ++ i) cnt[i] += cnt[i - 1];
for (int i = n; i >= 1; -- i) sa[cnt[rk[i]] --] = i;
for (int p, w = 1; w < n; w <<= 1) {
p = 0;
for (int i = n; i > n - w; -- i) id[++ p] = i;
for (int i = 1; i <= n; ++ i) {
if (sa[i] > w) id[++ p] = sa[i] - w;
}
memset(cnt, 0, sizeof (cnt));
for (int i = 1; i <= n; ++ i) cnt[rkid[i] = rk[id[i]]] ++;
for (int i = 1; i <= m; ++ i) cnt[i] += cnt[i - 1];
for (int i = n; i >= 1; -- i) sa[cnt[rkid[i]] --] = id[i];
m = 0;
memcpy(oldrk, rk, sizeof (rk));
for (int i = 1; i <= n; ++ i) {
m += (cmp(sa[i], sa[i - 1], w) ^ 1);
rk[sa[i]] = m;
}
}
GetHeight();
}
//=============================================================
int main() {
int T = read();
while (T --) {
SuffixSort();
LL ans = 1ll * n * (n + 1) / 2ll;
for (int i = 1; i <= n; ++ i) ans -= height[i];
printf("%lld\n", ans);
}
return 0;
}
作者@Luckyblock,转载请声明出处。