后缀自动机

记录一下这几天刷的后缀自动机的题目

Glass Beads UVA - 719
\(S\) 循环同构的最小表示
\(S\) copy 成 \(SS\) 后建出后缀自动机,按字典序走 \(n\) 步即可

代码 ```cpp const int N = 4e4 + 7; char s[N];

namespace SAM {
int ch[N][26], len[N], link[N];
int last, tol, n;
void init() {
len[tol = last = 0] = 0; link[0] = -1;
rep (i, 0, 26) ch[0][i] = 0;
tol++;
}
int newnode() {
int p = tol++;
rep (i, 0, 26) ch[p][i] = 0;
len[p] = link[p] = 0;
return p;
}
void extend(int c) {
int cur = newnode();
len[cur] = len[last] + 1;
int p = last;
while (p != -1 && !ch[p][c]) {
ch[p][c] = cur;
p = link[p];
}
if (p == -1) {
link[cur] = 0;
} else {
int q = ch[p][c];
if (len[p] + 1 == len[q]) {
link[cur] = q;
} else {
int clone = newnode();
len[clone] = len[p] + 1;
memcpy(ch[clone], ch[q], sizeof(ch[q]));
link[clone] = link[q];
while (p != -1 && ch[p][c] == q) {
ch[p][c] = clone;
p = link[p];
}
link[q] = link[cur] = clone;
}
}
last = cur;
}
int dfs(int u, int l) {
if (l == n) return len[u];
rep (i, 0, 26) if (ch[u][i]) return dfs(ch[u][i], l + 1);
}
void solve() {
scanf("%s", s);
n = strlen(s);
init();
rep (l, 0, 2) rep (i, 0, n) extend(s[i] - 'a');
printf("%d\n", dfs(0, 0) - n + 1);
}
}

int main() {

ifdef LOCAL

freopen("ans.out", "w", stdout);

endif

int T;
scanf("%d", &T);
while (T--) SAM::solve();

ifdef LOCAL

printf("%.10f\n", (db)clock() / CLOCKS_PER_SEC);

endif

return 0;

}

</details>


[Longest Common Substring SPOJ - LCS](https://vjudge.net/problem/SPOJ-LCS)
求两个串的最长公共子串
对第一个串建后缀自动机,第二个串在自动机上匹配,若匹配上了则长度加一,并且转移到下一个状态,匹配失败则走fail节点,更新 $len$,每次都更新一下答案即可
<details>
<summary>代码</summary>
```cpp
const int N = 5e5 + 7;

namespace SAM {
	int ch[N][26], len[N], link[N], sz[N];
	int last, tol;
	void init() {
		len[tol = last = 0] = 0; link[0] = -1;
		rep (i, 0, 26) ch[0][i] = 0;
		tol++;
	}
	int newnode() {
		int p = tol++;
		rep (i, 0, 26) ch[p][0] = 0;
		len[p] = link[p] = sz[p] = 0;
		return p;
	}
	void extend(int c) {
		int cur = newnode();
		len[cur] = len[last] + 1;
		sz[cur] = 1;
		int p = last;
		while (p != -1 && !ch[p][c]) {
			ch[p][c] = cur;
			p = link[p];
		}
		if (p == -1) {
			link[cur] = 0;
		} else {
			int q = ch[p][c];
			if (len[p] + 1 == len[q]) {
				link[cur] = q;
			} else {
				int clone = newnode();
				len[clone] = len[p] + 1;
				memcpy(ch[clone], ch[q], sizeof(ch[q]));
				link[clone] = link[q];
				while (p != -1 && ch[p][c] == q) {
					ch[p][c] = clone;
					p = link[p];
				}
				link[q] = link[cur] = clone;
			}
		}
		last = cur;
	}
	void solve(char *s) {
		int n = strlen(s);
		int cur = 0;
		int ans = 0, l = 0;
		rep (i, 0, n) {
			int d = s[i] - 'a';
			if (ch[cur][d]) {
				cur = ch[cur][d];
				l++;
			} else {
				while (cur != -1 && !ch[cur][d]) cur = link[cur];
				if (cur == -1) cur = l = 0;
				else l = len[cur] + 1, cur = ch[cur][d];
			}
			chkmax(ans, l);
		}
		printf("%d\n", ans);
	}
}

char s[N], t[N];

int main() {
#ifdef LOCAL
	freopen("ans.out", "w", stdout);
#endif
	scanf("%s%s", s, t);
	int n = strlen(s);
	SAM::init();
	rep (i, 0, n) SAM::extend(s[i] - 'a');
	SAM::solve(t);
#ifdef LOCAL
	printf("%.10f\n", (db)clock() / CLOCKS_PER_SEC);
#endif
	return 0;
}

Substrings SPOJ - NSUBSTR
求每个长度right集合最大的大小
right集合大小通过fail树进行dfs或拓扑序求出,然后用right集合大小更新状态里最长串
最后从后往前依次用 \(f_{i+1}\) 更新 \(f_i\) 即可

代码 ```cpp const int N = 5e5 + 7, E = 5e5 + 7; int f[N]; namespace SAM { int ch[N][26], len[N], link[N], cnt[N]; int last, tol; void init() { len[tol = last = 0] = 0; link[0] = -1; rep (i, 0, 26) ch[0][i] = 0; tol++; } int newnode() { int p = tol++; rep (i, 0, 26) ch[p][0] = 0; len[p] = link[p] = cnt[p] = 0; return p; } void extend(int c) { int cur = newnode(); len[cur] = len[last] + 1; cnt[cur] = 1; int p = last; while (p != -1 && !ch[p][c]) { ch[p][c] = cur; p = link[p]; } if (p == -1) { link[cur] = 0; } else { int q = ch[p][c]; if (len[p] + 1 == len[q]) { link[cur] = q; } else { int clone = newnode(); len[clone] = len[p] + 1; memcpy(ch[clone], ch[q], sizeof(ch[q])); link[clone] = link[q]; while (p != -1 && ch[p][c] == q) { ch[p][c] = clone; p = link[p]; } link[q] = link[cur] = clone; } } last = cur; } Edg void dfs(int u) { es (u, i, v) dfs(v), cnt[u] += cnt[v]; chkmax(f[len[u]], cnt[u]); } void count() { rep (i, 1, tol) addd(link[i], i); dfs(0); } }

char s[N];
int n;

int main() {

ifdef LOCAL

freopen("ans.out", "w", stdout);

endif

SAM::init();
scanf("%s", s);
n = strlen(s);
rep (i, 0, n) SAM::extend(s[i] - 'a');
SAM::count();
per (i, 1, n) chkmax(f[i], f[i + 1]);
rep (i, 1, n + 1) printf("%d\n", f[i]);

ifdef LOCAL

printf("%.10f\n", (db)clock() / CLOCKS_PER_SEC);

endif

return 0;

}

</details>


[Longest Common Substring II SPOJ - LCS2](https://vjudge.net/problem/SPOJ-LCS2)
求多个串之间的最长公共子串
首先对第一个串建后缀自动机,然后对后面每一个串进行匹配,保存在每个状态上能匹配长度的最大值,再用拓扑序更新fail能匹配的最大值(因为当前节点能匹配的长度是这个状态的一个后缀,fail系欸但也是该状态的后缀,那么当前状态能匹配上的长度fail也能匹配上)
然后分别取min即可(因为要满足每个串都能匹配上,那么是最短的限制了它们能匹配的最长长度)
<details>
<summary>代码</summary>
```cpp
const int N = 5e5 + 7;
int ch[N][26], len[N], link[N], cnt[N];
int last, tol;
void init() {
	len[tol = last = 0] = 0; link[0] = -1;
	rep (i, 0, 26) ch[0][i] = 0;
	tol++;
}
int newnode() {
	int p = tol++;
	rep (i, 0, 26) ch[p][0] = 0;
	len[p] = link[p] = cnt[p] = 0;
	return p;
}
void extend(int c) {
	int cur = newnode();
	len[cur] = len[last] + 1;
	cnt[cur] = 1;
	int p = last;
	while (p != -1 && !ch[p][c]) {
		ch[p][c] = cur;
		p = link[p];
	}
	if (p == -1) {
		link[cur] = 0;
	} else {
		int q = ch[p][c];
		if (len[p] + 1 == len[q]) {
			link[cur] = q;
		} else {
			int clone = newnode();
			len[clone] = len[p] + 1;
			memcpy(ch[clone], ch[q], sizeof(ch[q]));
			link[clone] = link[q];
			while (p != -1 && ch[p][c] == q) {
				ch[p][c] = clone;
				p = link[p];
			}
			link[q] = link[cur] = clone;
		}
	}
	last = cur;
}
int c[N], a[N];
int mx[N], ans[N];
char s[N];

int main() {
#ifdef LOCAL
	freopen("ans.out", "w", stdout);
#endif
	memset(ans, 0x3f, sizeof(ans));
	init();
	scanf("%s", s);
	for (int i = 0; s[i]; i++) extend(s[i] - 'a');
	rep (i, 0, tol) c[len[i]]++;
	rep (i, 1, tol) c[i] += c[i - 1];
	rep (i, 0, tol) a[--c[len[i]]] = i;
	while (~scanf("%s", s)) {
		int l = 0, p = 0;
		for (int i = 0; s[i]; i++) {
			int d = s[i] - 'a';
			while (~p && !ch[p][d]) p = link[p], l = len[p];
			if (p == -1) p = l = 0;
			else ++l, p = ch[p][d], chkmax(mx[p], l);
		}
		per (i, 0, tol) {
			int p = a[i];
			if (~link[p]) chkmax(mx[link[p]], std::min(len[link[p]], mx[p]));
			chkmin(ans[p], mx[p]);
			mx[p] = 0;
		}
	}
	int res = 0;
	rep (i, 0, tol) chkmax(res, ans[i]);
	printf("%d\n", res);
#ifdef LOCAL
	printf("%.10f\n", (db)clock() / CLOCKS_PER_SEC);
#endif
	return 0;
}

Lexicographical Substring Search SPOJ - SUBLEX
求出字典序第 \(k\) 小的子串
每个状态的right集合为 \(1\),再求出每个状态往 \(trans\) 边走的路径数,通过路径数类似于主席树求第 \(k\) 大的方法dfs即可
求路径数为 \(f_u = right_u + \sum \limits_{u \to v} f_v\)

代码 ```cpp const int N = 2e5 + 7; int ch[N][26], len[N], link[N], cnt[N], f[N]; int last, tol; void init() { len[tol = last = 0] = 0; link[0] = -1; rep (i, 0, 26) ch[0][i] = 0; tol++; } int newnode() { int p = tol++; rep (i, 0, 26) ch[p][0] = 0; len[p] = link[p] = cnt[p] = 0; return p; } void extend(int c) { int cur = newnode(); len[cur] = len[last] + 1; cnt[cur] = 1; int p = last; while (p != -1 && !ch[p][c]) { ch[p][c] = cur; p = link[p]; } if (p == -1) { link[cur] = 0; } else { int q = ch[p][c]; if (len[p] + 1 == len[q]) { link[cur] = q; } else { int clone = newnode(); len[clone] = len[p] + 1; memcpy(ch[clone], ch[q], sizeof(ch[q])); link[clone] = link[q]; while (p != -1 && ch[p][c] == q) { ch[p][c] = clone; p = link[p]; } link[q] = link[cur] = clone; } } last = cur; }

void dfs(int u, int x) {
if (!x) return;
rep (i, 0, 26) {
int ne = ch[u][i];
if (ne <= 0) continue;
if (f[ne] >= x) {
putchar('a' + i);
dfs(ne, x - 1);
return;
}
x -= f[ne];
}
}
char s[N];
int a[N], c[N];

int main() {

ifdef LOCAL

freopen("ans.out", "w", stdout);

endif

init();
scanf("%s", s);
for (int i = 0; s[i]; i++) extend(s[i] - 'a');
rep (i, 0, tol) c[len[i]]++;
rep (i, 1, tol) c[i] += c[i - 1];
rep (i, 0, tol) a[--c[len[i]]] = i;
per (i, 0, tol) {
	int p = a[i];
	f[p] = 1;
	rep (j, 0, 26) if (ch[p][j]) f[p] += f[ch[p][j]];
}
int T;
scanf("%d", &T);
while (T--) {
	int x;
	scanf("%d", &x);
	dfs(0, x);
	puts("");
}

ifdef LOCAL

printf("%.10f\n", (db)clock() / CLOCKS_PER_SEC);

endif

return 0;

}

</details>

[HDU4622 Reincarnation](http://acm.hdu.edu.cn/showproblem.php?pid=4622)
求区间本质不同子串个数
离线询问,按左端点排序,左端点相同的建后缀自动机,复杂度为 $O(n^2)$
<details>
<summary>代码</summary>
```cpp
const int N = 2e4 + 7;
const int Q = 2e4 + 7;
struct Qu {
	int l, r, id;
	bool operator < (const Qu &p) const {
		return l == p.l ? r < p.r : l < p.l;
	}
} query[N];
int ch[N][26], len[N], link[N], sz[N];
int last, tol;
int ans;
void init() {
	len[0] = 0; link[0] = -1;
	tol = 0, last = 0, ans = 0;
	rep (i, 0, 26) ch[0][i] = 0;
	tol++;
}
int newnode() {
	int p = tol++;
	rep (i, 0, 26) ch[p][i] = 0;
	len[p] = link[p] = sz[p] = 0;
	return p;
}
void extend(int c) {
	int cur = newnode();
	len[cur] = len[last] + 1;
	sz[cur] = 1;
	int p = last;
	while (p != -1 && !ch[p][c]) {
		ch[p][c] = cur;
		p = link[p];
	}
	if (p == -1) {
		link[cur] = 0;
	} else {
		int q = ch[p][c];
		if (len[p] + 1 == len[q]) {
			link[cur] = q;
		} else {
			int clone = newnode();
			len[clone] = len[p] + 1;
			memcpy(ch[clone], ch[q], sizeof(ch[q]));
			link[clone] = link[q];
			while (p != -1 && ch[p][c] == q) {
				ch[p][c] = clone;
				p = link[p];
			}
			link[q] = link[cur] = clone;
		}
	}
	last = cur;
	ans += len[cur] - len[link[cur]];
}

char s[N];
int res[Q];
void solve() {
	scanf("%s", s + 1);
	int n = strlen(s + 1);
	int q;
	scanf("%d", &q);
	rep (i, 1, q + 1) scanf("%d%d", &query[i].l, &query[i].r), query[i].id = i;
	std::sort(query + 1, query + q + 1);
	query[0].l = -1;
	rep (i, 1, q + 1) {
		if (query[i].l != query[i - 1].l) {
			init();
			rep (j, query[i].l, query[i].r + 1) extend(s[j] - 'a');
			res[query[i].id] = ans;
		} else {
			rep (j, query[i - 1].r + 1, query[i].r + 1) extend(s[j] - 'a');
			res[query[i].id] = ans;
		}
	}
	rep (i, 1, q + 1) printf("%d\n", res[i]);
}

int main() {
#ifdef LOCAL
	freopen("ans.out", "w", stdout);
#endif
	int T;
	scanf("%d", &T);
	while (T--) solve();
#ifdef LOCAL
	printf("%.10f\n", (db)clock() / CLOCKS_PER_SEC);
#endif
	return 0;
}

HDU4641 K-string
插入字符或求right集合不小于 \(K\) 的子串个数
离线,建出后缀自动机后,一个right集合对答案有贡献对于时间来说是一个后缀,具体为从right集合第 \(k\) 大开始贡献
在fail树上从下至上进行线段树合并,查询线段树第 \(k\) 大即可

代码 ```cpp const int N = 5e5 + 7; namespace SAM { int ch[N][26], len[N], fail[N], id[N]; int last, tol; void init() { len[0] = 0; fail[0] = -1; tol = 0, last = 0; rep (i, 0, 26) ch[0][i] = 0; tol++; } int newnode() { int p = tol++; rep (i, 0, 26) ch[p][i] = 0; len[p] = fail[p] = id[p] = 0; return p; } void extend(int c, int n) { int cur = newnode(); len[cur] = len[last] + 1; assert(len[cur] == n); id[n] = cur; int p = last; for (; p != -1 && !ch[p][c]; p = fail[p]) ch[p][c] = cur; if (p == -1) { fail[cur] = 0; } else { int q = ch[p][c]; if (len[p] + 1 == len[q]) { fail[cur] = q; } else { int clone = newnode(); len[clone] = len[p] + 1; memcpy(ch[clone], ch[q], sizeof(ch[q])); fail[clone] = fail[q]; for ( ; p != -1 && ch[p][c] == q; p = fail[p]) ch[p][c] = clone; fail[q] = fail[cur] = clone; } } last = cur; } } struct Tree { int l, r, sum; void clear() { l = r = sum = 0; } } tree[N * 16]; int tol; void init() { rep (i, 0, tol + 1) tree[i].clear(); tol = 0; } void update(int &p, int l, int r, int pos) { p = ++tol; tree[p].sum++; if (l == r) return; if (pos <= mid) update(lp, l, mid, pos); else update(rp, mid + 1, r, pos); } int merge(int x, int y, int l, int r) { if (!x || !y) return x | y; tree[x].sum = tree[x].sum + tree[y].sum; if (l < r) { tree[x].l = merge(tree[x].l, tree[y].l, l, mid); tree[x].r = merge(tree[x].r, tree[y].r, mid + 1, r); } else { tree[x].l = tree[x].r = 0; } return x; } int query(int p, int l, int r, int k) { if (l == r) return l; if (k <= tree[lp].sum) return query(lp, l, mid, k); return query(rp, mid + 1, r, k - tree[lp].sum); }

int n, m, k, que[N * 2], q, a[N], c[N], root[N];
ll ans[N];
char s[N];

void solve() {
scanf("%s", s + 1);
SAM::init();
rep (i, 1, n + 1) SAM::extend(s[i] - 'a', i);
q = 0;
rep (i, 0, m) {
int opt;
scanf("%d", &opt);
if (opt == 2) que[++q] = n;
else {
static char str[3];
scanf("%s", str);
n++;
SAM::extend(str[0] - 'a', n);
}
}
init();
rep (i, 0, SAM::tol) root[i] = 0;
rep (i, 1, n + 1) update(root[SAM::id[i]], 1, n, i), ans[i] = 0;
rep (i, 0, SAM::tol) c[i] = 0;
rep (i, 0, SAM::tol) c[SAM::len[i]]++;
rep (i, 1, SAM::tol) c[i] += c[i - 1];
rep (i, 0, SAM::tol) a[--c[SAM::len[i]]] = i;
per (i, 0, SAM::tol) {
int p = a[i];
if (tree[root[p]].sum >= k) ans[query(root[p], 1, n, k)] += SAM::len[p] - (SAM::fail[p] >= 0 ? SAM::len[SAM::fail[p]] : 0);
if (SAM::fail[p] >= 0) root[SAM::fail[p]] = merge(root[SAM::fail[p]], root[p], 1, n);
}
rep (i, 1, n + 1) ans[i] += ans[i - 1];
rep (i, 1, q + 1) printf("%lld\n", ans[que[i]]);
}

int main() {

ifdef LOCAL

freopen("ans.out", "w", stdout);

endif

while (~scanf("%d%d%d", &n, &m, &k)) solve();

ifdef LOCAL

printf("%.10f\n", (db)clock() / CLOCKS_PER_SEC);

endif

return 0;

}

</details>


[BZOJ 3998: [TJOI2015]弦论](http://www.lydsy.com/JudgeOnline/problem.php?id=3998)
T=0时right集合为 $1$,T=1时right集合为实际大小,拓扑序求出
再dfs得到第 $k$ 小字典序
<details>
<summary>代码</summary>
```cpp
const int N = 1e6 + 7;
int ch[N][26], len[N], fail[N], f[N], right[N];
char s[N];
int last, tol;
void init() {
	len[tol = last = 0] = 0; fail[0] = -1;
	rep (i, 0, 26) ch[0][i] = 0;
	tol++;
}
int newnode() {
	int p = tol++;
	rep (i, 0, 26) ch[p][i] = 0;
	len[p] = fail[p] = right[p] = f[p] = 0;
	return p;
}
void extend(int c) {
	int cur = newnode();
	len[cur] = len[last] + 1;
	right[cur] = 1;
	int p = last;
	while (p != -1 && !ch[p][c]) {
		ch[p][c] = cur;
		p = fail[p];
	}
	if (p == -1) {
		fail[cur] = 0;
	} else {
		int q = ch[p][c];
		if (len[p] + 1 == len[q]) {
			fail[cur] = q;
		} else {
			int clone = newnode();
			len[clone] = len[p] + 1;
			memcpy(ch[clone], ch[q], sizeof(ch[q]));
			fail[clone] = fail[q];
			while (p != -1 && ch[p][c] == q) {
				ch[p][c] = clone;
				p = fail[p];
			}
			fail[q] = fail[cur] = clone;
		}
	}
	last = cur;
}
void dfs(int cur, int k) {
	if (k <= right[cur]) return;
	k -= right[cur];
	rep (i, 0, 26) if (ch[cur][i] > 0) {
		int ne = ch[cur][i];
		if (k <= f[ne]) {
			putchar('a' + i);
			dfs(ne, k);
			return;
		}
		k -= f[ne];
	}
}
int a[N], c[N];

int main() {
#ifdef LOCAL
	freopen("ans.out", "w", stdout);
#endif
	scanf("%s", s);
	init();
	for (int i = 0; s[i]; i++) extend(s[i] - 'a');
	int opt, k;
	scanf("%d%d", &opt, &k);
	rep (i, 0, tol) c[len[i]]++;
	rep (i, 1, tol) c[i] += c[i - 1];
	rep (i, 0, tol) a[--c[len[i]]] = i;
	per (i, 0, tol) {
		int p = a[i];
		if (opt) {
			if (fail[p] >= 0) right[fail[p]] += right[p];
		} else {
			right[i] = 1;
		}
	}
	right[0] = 0;
	per (i, 0, tol) {
		int p = a[i];
		f[p] = right[p];
		rep (j, 0, 26) if (ch[p][j] >= 0) f[p] += f[ch[p][j]];
	}
	if (f[0] < k) return puts("-1"), 0;
	dfs(0, k);
	puts("");
#ifdef LOCAL
	printf("%.10f\n", (db)clock() / CLOCKS_PER_SEC);
#endif
	return 0;
}

BZOJ 4516: [Sdoi2016]生成魔咒
动态插入字符,求每个时刻的本质不同子串的个数
每次插入字符后答案加上 \(len_{cur}-len_{fail_{cur}}\) 即可

代码 ```cpp const int N = 2e5 + 7; namespace SAM { std::map ch[N]; int len[N], link[N], sz[N]; int last, tol; ll ans; void init() { len[0] = 0; link[0] = -1; tol++; last = 0; } void extend(int c) { int cur = tol++; len[cur] = len[last] + 1; sz[cur] = 1; int p = last; while (p != -1 && !ch[p].count(c)) { ch[p][c] = cur; p = link[p]; } if (p == -1) { link[cur] = 0; } else { int q = ch[p][c]; if (len[p] + 1 == len[q]) { link[cur] = q; } else { int clone = tol++; len[clone] = len[p] + 1; ch[clone] = ch[q]; link[clone] = link[q]; while (p != -1 && ch[p][c] == q) { ch[p][c] = clone; p = link[p]; } link[q] = link[cur] = clone; } } last = cur; ans += len[cur] - len[link[cur]]; } }

char s[N];

int main() {

ifdef LOCAL

freopen("ans.out", "w", stdout);

endif

SAM::init();
int n = _();
rep (i, 0, n) {
	int x = _();
	SAM::extend(x), printf("%lld\n", SAM::ans);
}

ifdef LOCAL

printf("%.10f\n", (db)clock() / CLOCKS_PER_SEC);

endif

return 0;

}

</details>
posted @ 2020-03-17 18:12  Mrzdtz220  阅读(129)  评论(0编辑  收藏  举报