广义后缀自动机 例题
模板题,把所有串加进去,计算不同子串的数量即可
//#pragma GCC optimize("O3")
//#pragma comment(linker, "/STACK:1024000000,1024000000")
#include<bits/stdc++.h>
using namespace std;
function<void(void)> ____ = [](){ios_base::sync_with_stdio(false); cin.tie(0); cout.tie(0);};
const int MAXN = 2e6+7;
struct EXSAM{
int len[MAXN],ch[MAXN][26],link[MAXN],tot,last;
EXSAM(){ tot = last = 1; }
void extend(int c){
int np = 0, p = last;
if(!ch[p][c]) np = last = ++tot, len[np] = len[p] + 1;
while(p and !ch[p][c]){
ch[p][c] = np;
p = link[p];
}
if(!p) link[np] = 1;
else{
int q = ch[p][c];
if(len[p]+1==len[q]) np?link[np]=q:last=q;
else{
int clone = ++tot;
len[clone] = len[p] + 1;
memcpy(ch[clone],ch[q],sizeof(ch[q]));
link[clone] = link[q];
while(p and ch[p][c]==q){
ch[p][c] = clone;
p = link[p];
}
link[q] = clone;
np?link[np]=clone:last=clone;
}
}
}
void append(char *s){
int l = strlen(s);
last = 1;
for(int i = 0; i < l; i++) extend(s[i]-'a');
}
long long solve(){
long long ret = 0;
for(int i = 2; i <= tot; i++) ret += len[i] - len[link[i]];
return ret;
}
}sam;
char s[MAXN];
int main(){
int n;
scanf("%d",&n);
while(n--) {
scanf("%s",s);
sam.append(s);
}
printf("%lld\n",sam.solve());
return 0;
}
给出一棵树,问树上任意两点的简单路径能形成的不同字符串有多少个
相当于以每个叶子节点为根的\(Trie\)都加入到\(SAM\)里去然后找不同字串个数
这里考虑在线做广义\(SAM\)
//#pragma GCC optimize("O3")
//#pragma comment(linker, "/STACK:1024000000,1024000000")
#include<bits/stdc++.h>
using namespace std;
function<void(void)> ____ = [](){ios_base::sync_with_stdio(false); cin.tie(0); cout.tie(0);};
const int MAXN = 4e6+7;
int n,m,col[MAXN];
vector<int> G[MAXN];
struct EXSAM{
int len[MAXN],link[MAXN],ch[MAXN][10],tot,last,sta[MAXN],par[MAXN];
EXSAM(){ tot = last = 1; }
void extend(int c){
int np = 0, p = last;
if(!ch[p][c]) last = np = ++tot, len[np] = len[p] + 1;
while(p and !ch[p][c]){
ch[p][c] = np;
p = link[p];
}
if(!p) link[np] = 1;
else{
int q = ch[p][c];
if(len[p] + 1 == len[q]) np?link[np]=q:last=q;
else{
int clone = ++tot;
len[clone] = len[p] + 1;
link[clone] = link[q];
memcpy(ch[clone],ch[q],sizeof(ch[q]));
while(p and ch[p][c]==q){
ch[p][c] = clone;
p = link[p];
}
link[q] = clone;
if(np) link[np] = clone;
else last = clone;
}
}
}
void bfs(int u){
queue<int> que;
que.push(u);
sta[par[u]=0] = 1;
while(!que.empty()){
u = que.front(); que.pop();
last = sta[par[u]], extend(col[u]), sta[u] = last;
for(int i = 0; i < (int)G[u].size(); i++) if(G[u][i]!=par[u]){
par[G[u][i]] = u;
que.push(G[u][i]);
}
}
}
long long query(){
long long ret = 0;
for(int i = 2; i <= tot; i++) ret += len[i] - len[link[i]];
return ret;
}
}sam;
int main(){
scanf("%d %d",&n,&m);
for(int i = 1; i <= n; i++) scanf("%d",&col[i]);
for(int i = 1; i < n; i++){
int u, v;
scanf("%d %d",&u,&v);
G[u].push_back(v);
G[v].push_back(u);
}
for(int i = 1; i <= n; i++) if(G[i].size()==1) sam.bfs(i);
printf("%lld\n",sam.query());
return 0;
}
给定n个字符串,询问每个字符串有多少子串(不包括空串)是所有n个字符串中至少k个字符串的子串?
建立广义\(SAM\),首先可以计算出自动机里每一个状态能被多少个串匹配,用变量\(cnt\)来记录
这里可以先正常建立自动机,然后对每一个串跑一遍,假设当前跑到状态\(u\),那么怎么确定当前状态是不是当前串重复出现的,那就在到这个状态之后,把这个状态标记为当前串的编号,如果遇到这个状态的时候,标号和当前串标号不一样,那么这个状态以及其所有后缀链接连上去的状态中标记和当前串标号不一样的\(cnt\)都\(+1\)
处理出来\(cnt\)数组之后,再来处理\(f\)数组(当前状态及其所有后缀链接中出现次数\ge k的子串数量),先拓扑排序,然后从\(parent\)树的根开始,将数组下传更新
对于每一个串,其答案就是它的每一个前缀的所有出现次数\(\ge k\)的后缀数量的和,只要找到每个前缀的出现次数\(\ge k\)的最长后缀在自动机里的状态就好了,记这个状态为\(u\),这个后缀的所有后缀都必然符合答案,而这个状态带来的贡献就是\(f[u]\),所以对于每个串,我们跑一遍,然后把跑的过程中遇到的状态表示的\(f\)值求和就是这个串的答案
//#pragma GCC optimize("O3")
//#pragma comment(linker, "/STACK:1024000000,1024000000")
#include<bits/stdc++.h>
using namespace std;
const int MAXN = 1e6+7;
char s[MAXN];
string str[MAXN];
int n,m;
struct EXSAM{
int len[MAXN],link[MAXN],ch[MAXN][26],tot,cnt[MAXN],last,pre[MAXN],c[MAXN],sa[MAXN],f[MAXN];
EXSAM(){ last = tot = 1; }
void extend(int c){
int np = 0, p = last;
if(!ch[p][c]) np = last = ++tot, len[np] = len[p] + 1;
while(p and !ch[p][c]){
ch[p][c] = np;
p = link[p];
}
if(!p) link[np] = 1;
else{
int q = ch[p][c];
if(len[p]+1==len[q]) np?link[np]=q:last=q;
else{
int clone = ++tot;
len[clone] = len[p] + 1;
link[clone] = link[q];
memcpy(ch[clone],ch[q],sizeof(ch[q]));
while(p and ch[p][c]==q){
ch[p][c] = clone;
p = link[p];
}
link[q] = clone;
np ? link[np] = clone : last = clone;
}
}
}
void match(string &s, int ID){
int u = 1;
for(int i = 0; i < s.length(); i++){
u = ch[u][s[i]-'a'];
int v = u;
while(v and pre[v]!=ID){
cnt[v]++, pre[v] = ID;
v = link[v];
}
}
}
void Radix_sort(){
for(int i = 0; i <= tot; i++) c[i] = 0;
for(int i = 1; i <= tot; i++) c[len[i]]++;
for(int i = 1; i <= tot; i++) c[i] += c[i-1];
for(int i = tot; i >= 1; i--) sa[c[len[i]]--] = i;
}
void solve(){
Radix_sort();
for(int i = 1; i <= tot; i++){
int u = sa[i];
f[u] = cnt[u]>=m?(len[u]-len[link[u]]):0;
f[u] += f[link[u]];
}
}
}sam;
int main(){
scanf("%d %d",&n,&m);
for(int i = 1; i <= n; i++){
scanf("%s",s);
str[i].assign(s);
sam.last = 1;
for(int j = 0; j < str[i].length(); j++) sam.extend(str[i][j]-'a');
}
for(int i = 1; i <= n; i++) sam.match(str[i],i);
sam.solve();
for(int i = 1; i <= n; i++){
long long ret = 0;
int u = 1;
for(int j = 0; j < str[i].length(); j++){
u = sam.ch[u][str[i][j]-'a'];
ret += sam.f[u];
}
printf("%lld ",ret);
}
return 0;
}
先按前\(n\)个串建\(SAM\),然后预处理出每个状态有多少字符串匹配
处理完之后对于\(m\)个询问串,每次输出询问串到达的状态有多少字符串匹配即可
//#pragma GCC optimize("O3")
//#pragma comment(linker, "/STACK:1024000000,1024000000")
#include<bits/stdc++.h>
using namespace std;
const int MAXN = 1e6+7;
int n,m;
char s[MAXN];
string str[MAXN];
struct EXSAM{
int len[MAXN],link[MAXN],ch[MAXN][26],tot,last,cnt[MAXN],pre[MAXN];
EXSAM(){ tot = last = 1; }
void extend(int c){
int np = 0, p = last;
if(!ch[p][c]) np = last = ++tot, len[np] = len[p] + 1;
while(p and !ch[p][c]){
ch[p][c] = np;
p = link[p];
}
if(!p) link[np] = 1;
else{
int q = ch[p][c];
if(len[p]+1==len[q]) np?link[np]=q:last=q;
else{
int clone = ++tot;
len[clone] = len[p] + 1;
link[clone] = link[q];
memcpy(ch[clone],ch[q],sizeof(ch[q]));
while(p and ch[p][c]==q){
ch[p][c] = clone;
p = link[p];
}
link[q] = clone;
np?link[np]=clone:last=clone;
}
}
}
void match(string &s, int ID){
int u = 1;
for(int i = 0; i < (int)s.length(); i++){
u = ch[u][s[i]-'a'];
int v = u;
while(v and pre[v]!=ID){
pre[v] = ID; cnt[v]++;
v = link[v];
}
}
}
int solve(char *s){
int l = strlen(s), u = 1;
for(int i = 0; i < l; i++) u = ch[u][s[i]-'a'];
return cnt[u];
}
}sam;
int main(){
scanf("%d %d",&n,&m);
for(int i = 1; i <= n; i++){
scanf("%s",s);
str[i].assign(s);
sam.last = 1;
for(int j = 0; j < (int)str[i].size(); j++) sam.extend(str[i][j]-'a');
}
for(int i = 1; i <= n; i++) sam.match(str[i],i);
for(int i = 1; i <= m; i++){
scanf("%s",s);
printf("%d\n",sam.solve(s));
}
return 0;
}
给出\(n\)个串,问只作为第\(i\)个串的子串出现的不同串有多少个
\(n\)个串建\(SAM\),对于自动机的每一个状态记录有多少个串出现过这个状态,最后要找的就是每个串的各个前缀所匹配的只出现一次的后缀次数,注意不能重复,要标记不要重复计算
//#pragma GCC optimize("O3")
//#pragma comment(linker, "/STACK:1024000000,1024000000")
#include<bits/stdc++.h>
using namespace std;
const int MAXN = 1e6+7;
typedef long long int LL;
struct EXSAM{
int len[MAXN],link[MAXN],ch[MAXN][26],tot,last,cnt[MAXN],pre[MAXN];
EXSAM(){ tot = last = 1; }
void extend(int c){
int np = 0, p = last;
if(!ch[p][c]) np = last = ++tot, len[np] = len[p] + 1;
while(p and !ch[p][c]){
ch[p][c] = np;
p = link[p];
}
if(!p) link[np] = 1;
else{
int q = ch[p][c];
if(len[p]+1==len[q]) np?link[np]=q:last=q;
else{
int clone = ++tot;
len[clone] = len[p] + 1;
link[clone] = link[q];
memcpy(ch[clone],ch[q],sizeof(ch[q]));
while(p and ch[p][c]==q){
ch[p][c] = clone;
p = link[p];
}
link[q] = clone;
np?link[np]=clone:last=clone;
}
}
}
void match(string &s, int ID){
int u = 1;
for(int i = 0; i < (int)s.length(); i++){
u = ch[u][s[i]-'a'];
int v = u;
while(v and pre[v]!=ID){
pre[v] = ID; cnt[v]++;
v = link[v];
}
}
}
LL query(string &s, int ID){
LL ret = 0;
int u = 1;
for(int i = 0; i < (int)s.length(); i++){
u = ch[u][s[i]-'a'];
int v = u;
while(v and cnt[v]==1 and pre[v]!=ID){
ret += len[v] - len[link[v]];
pre[v] = ID;
v = link[v];
}
}
return ret;
}
}sam;
int n;
char s[MAXN];
string str[MAXN];
int main(){
scanf("%d",&n);
for(int i = 1; i <= n; i++){
scanf("%s",s);
str[i].assign(s);
sam.last = 1;
for(int j = 0; j < (int)str[i].size(); j++) sam.extend(str[i][j]-'a');
}
for(int i = 1; i <= n; i++) sam.match(str[i],i);
for(int i = 1; i <= n; i++) printf("%lld\n",sam.query(str[i],i+n));
return 0;
}