AC自动机专题
全名: Aho-Corasick 自动机, 类似 Trie + next函数
写法: 动态 / 静态, 个人比较偏向与静态
内容: 网上一大堆,我是看的 LRJ新版白书214页
静态模板:
// Aho-Corasick #include<cstdio> #include<cstdlib> #include<cstring> #include<string> #include<queue> #include<algorithm> using namespace std; #define Clear(x) memset(x,0,sizeof(x)) const int N = (int)1e4; const int Max = 26; struct Aho_Corasick{ int ch[N][Max], last[N], f[N], val[N]; int cnt[N], top; void init(){ Clear(ch[0]); val[0]=last[0]=f[0]=0; top = 0; } int NewNode(){ int x = ++top; Clear(ch[x]); last[x]=f[x]=val[x]=0; return ++top; } void insert(char *T, int num){ int x = 0, n = strlen(T); for(int i = 0; i < n; i++){ int c = T[i]-'a'; if( !ch[x][c] ) ch[x][c] = NewNode(); x = ch[x][c]; } val[x] = num; } void getfail(){ queue<int> Q; for(int c = 0; c < Max; c++){ int u = ch[0][c]; if(u) f[u]=0, last[u]=0, Q.push(u); } while( !Q.empty() ){ int r = Q.front(); Q.pop(); for(int c = 0; c < Max; c++){ int u = ch[r][c]; if( !u ){ ch[r][c]=ch[f[r]][c]; conitnue;} Q.push(u); // ooxx Q.push(u); int v = f[r]; // ooxx v = f[r]; while( v&&!ch[v][c] ) v = f[v]; f[u] = ch[v][c]; last[u] = val[ f[u] ] ? f[u]: last[ f[u] ]; } } } void find(char *s){ int x = 0, n = strlen(s); for(int i = 0; i < n; i++){ int c = s[i]-'a'; x = ch[x][c]; if( val[x] ) count(x); else if( last[x] ) count( last[x] ); } } void count(int x){ if(x){ cnt[ val[x] ]++; if( last[x] ) count( last[x] ); } } }AC;
UVALive 4670 - Dominating Patterns
模板题, 统计模板串出现次数最大的. Trick是可能有多个同时,与 会出现相同模板串.,用个map映射都搞出来就可以了.
#include<cstdio> #include<cstdlib> #include<cstring> #include<string> #include<queue> #include<map> #include<algorithm> using namespace std; #define Clear(x) memset(x,0,sizeof(x)) const int N = 20000; const int M = (int)1e6+10; struct AC{ int ch[N][26],f[N],last[N]; int val[N],cnt[N]; int top; void init(){ Clear(ch); Clear(f); Clear(last); Clear(val); Clear(cnt); top = 0; } int NewNode(){ int x = ++top; Clear( ch[x] ); return x; } void insert(char *T, int num){ int n = strlen(T); int x = 0; for(int i = 0; i < n; i++){ int c = T[i]-'a'; if( !ch[x][c] ) ch[x][c] = NewNode(); x = ch[x][c]; } val[x] = num; } void getfail(){ queue<int> Q; for(int c = 0; c < 26; c++){ int u = ch[0][c]; if(u) f[u] = 0, last[u] = 0, Q.push(u); } while( !Q.empty() ){ int r = Q.front(); Q.pop(); for(int c = 0; c < 26; c++){ int u = ch[r][c]; // if( !u ) continue; if( !u ){ ch[r][c] = ch[ f[r] ][c]; continue; } //补上所有不存在的边 Q.push(u); int v = f[r]; while( v && !ch[v][c] ) v = f[v]; f[u] = ch[v][c]; // printf("f[%d] = ch[%d][%d] = %d\n", u,v,c, f[u] ); last[u] = val[ f[u] ] ? f[u]: last[ f[u] ]; } } } void find(char *T){ int n = strlen(T); int x = 0; for(int i = 0; i < n; i++){ int c = T[i]-'a'; // while( x && !ch[x][c] ) x = f[x]; x = ch[x][c]; //直接匹配到 // printf("Find: x = %d\n", x ); if( val[x] ) count(x); else if( last[x] ) count( f[x] ); } } void count(int x){ if(x){ // printf("Count: x = %d\n", x ); cnt[ val[x] ]++; if( last[x] ) count( last[x] ); } } }ac; map<string,int> mp; char str[200][100]; char text[M]; int main(){ int n; while( scanf("%d", &n), n ){ // init mp.clear(); ac.init(); int tot = 0; for(int i = 1; i <= n; i++){ scanf("%s", str[i] ); if( mp.count( str[i] ) == 0 ) mp[ str[i] ] = ++tot; ac.insert( str[i], mp[str[i]] ); } ac.getfail(); scanf("%s", text); ac.find( text ); int k = 0; for(int i = 1; i <= tot; i++) k = max( k, ac.cnt[i] ); printf("%d\n", k ); for(int i = 1; i <= n; i++){ if( ac.cnt[ mp[str[i]] ] == k ) printf("%s\n", str[i] ); } } return 0; }
HDU 2896 病毒侵袭
也是差不多模板题,统计哪些模式串出现过,Trick是 字符集是0-127,不是a-z有木有, 还有就是模式串总长500*200,开到6*10^4就行..坑爹大作... 还有模板姿势要对...AC开全局.不然会RE或者MLE.
#include<cstdio> #include<cstdlib> #include<cstring> #include<string> #include<queue> #include<algorithm> using namespace std; const int N = (int)6e4+10; const int M = (int)1e4+10; const int Max = 128; #define max Max #define clear(x) memset(x,0,sizeof(x)); struct AC{ int ch[N][128],f[N],last[N],val[N]; int cnt[N], top; void init(){ clear(ch);clear(f); clear(last);clear(val); top = 0; } int NewNode(){ return ++top; } void insert(char *T, int num){ int x = 0, n = strlen(T); for(int i = 0; i < n; i++){ int c = T[i]; if( !ch[x][c] ) ch[x][c] = NewNode(); x = ch[x][c]; } val[x] = num; } void getfail(){ queue<int> Q; for(int c = 0; c < max; c++){ int u = ch[0][c]; if( u ) Q.push(u); } while( !Q.empty() ){ int r = Q.front(); Q.pop(); for(int c = 0; c < max; c++){ int u = ch[r][c]; if( !u ){ ch[r][c] = ch[f[r]][c]; continue; } Q.push(u); int v = f[r]; while( v&&!ch[v][c] ) v = f[v]; f[u] = ch[v][c]; last[u] = val[ f[u] ] ? f[u] : last[ f[u] ]; } } } void find(char *T){ int x = 0, n = strlen(T); clear(cnt); for(int i = 0; i < n; i++){ int c = T[i]; x = ch[x][c]; if( val[x] ) count(x); else if( last[x] ) count( last[x] ); } } void count(int x){ if(x){ cnt[ val[x] ]++; count( last[x] ); } } }ac; //记得开全局啊啊啊啊啊啊啊啊 char s[510][210]; char text[M]; int main(){ int n, m; while( scanf("%d", &n) != EOF){ ac.init(); for(int i = 1; i <= n; i++){ scanf("%s", s[i] ); ac.insert( s[i], i ); } ac.getfail(); scanf("%d", &m); int tot = 0; for(int i = 1; i <= m; i++){ scanf("%s", text ); ac.find(text); int cnt = 0; for(int j = 1; j <= n; j++) if( ac.cnt[j] ) cnt++; if( cnt > 0 ){ tot++; printf("web %d:", i); for(int j = 1; j <= n; j++) if(ac.cnt[j]) printf(" %d", j); puts(""); } } printf("total: %d\n", tot); } return 0; }
HDU 1277 全文检索
坑爹的字符串输入, 不过幸好涨姿势了. 格式控制比较准. 解法模板变动下..按顺序存起来,输出. GetFail指针记得进队...老是忘记...OOXX
#include<cstdio> #include<cstring> #include<cstdlib> #include<string> #include<queue> #include<algorithm> #include<vector> using namespace std; #define Clear(x) memset(x,0,sizeof(x)) const int N = (int)6e5+10; const int M = (int)1e4+10; int n, m; vector<int> S; struct AC{ int ch[N][10], f[N], last[N], val[N]; int vis[M], top; void init(){ Clear(ch); Clear(f); Clear(last); Clear(val); top = 0; } int NewNode(){ return ++top; } void insert(char *T,int num){ int x = 0, L = strlen(T); for(int i = 0; i < L; i++){ int c = T[i]-'0'; if( !ch[x][c] ) ch[x][c] = NewNode(); x = ch[x][c]; } val[x] = num; } void getfail(){ queue<int> Q; for(int c = 0; c < 10; c++){ int u = ch[0][c]; if(u) Q.push(u); } while( !Q.empty() ){ int r = Q.front(); Q.pop(); for(int c = 0; c < 10; c++){ int u = ch[r][c]; if(!u){ ch[r][c]=ch[f[r]][c]; continue; } Q.push(u); // ooxx int v = f[r]; while( v&&!ch[v][c] ) v = f[v]; f[u] = ch[v][c]; last[u] = val[ f[u] ] ? f[u]: last[ f[u] ]; } } } void find(string T){ Clear(vis); int x = 0, L = T.size(); for(int i = 0; i < L; i++){ int c = T[i]-'0'; x = ch[x][c]; if( val[x] ) count(x); else if( last[x] ) count(last[x]); } } void count(int x){ if(x){ if( !vis[ val[x] ] ) vis[ val[x] ] = true, S.push_back( val[x] ); if( last[x] ) count( last[x] ); } } }ac; string text; char s[N], t[N]; int main(){ freopen("1.in","r",stdin); while( scanf("%d%d",&m,&n) != EOF){ text.clear(); for(int i = 0; i < m; i++){ scanf("%s", s); text += s; } ac.init(); for(int i = 1, x; i <= n; i++){ scanf(" [Key No. %d] %s", &x,s); ac.insert(s, i); } ac.getfail(); S.clear(); ac.find(text); int cnt = 0; for(int i = 1; i <= n; i++) if( ac.vis[i] ) cnt++; if( cnt ){ printf("Found key:"); for(int i = 0; i < (int)S.size(); i++) printf(" [Key No. %d]", S[i]); puts(""); } else puts("No key can be found !"); } return 0; }
HDU 2222 Keywords Search
题目Trick 是模板串 会出现相同的. 也要被计算.
#include<cstdio> #include<cstdlib> #include<cstring> #include<queue> #include<map> #include<string> using namespace std; const int N = (int)5e5+10; const int M = (int)1e6+10; #define Clear(x) memset(x,0,sizeof(x)) int cnt[N]; struct AC{ int ch[N][26],f[N],last[N]; int val[N],top, ans; void init(){ top = 0; Clear(ch[0]); last[0] = f[0] = val[0] = 0; } int NewNode(){ int x = ++top; Clear(ch[x]); last[x] = f[x] = val[x] = 0; return x; } void insert(char *T,int num){ int x = 0, n = strlen(T); for(int i = 0; i < n; i++){ int c = T[i]-'a'; if( !ch[x][c] ) ch[x][c] = NewNode(); x = ch[x][c]; } val[x] = num; } void getfail(){ queue<int>Q; for(int c = 0; c < 26; c++){ int u = ch[0][c]; if(u) Q.push(u), last[u] = 0, f[0] = 0; } while( !Q.empty() ){ int r = Q.front(); Q.pop(); for(int c = 0; c < 26; c++){ int u = ch[r][c]; if( !u ){ ch[r][c]=ch[f[r]][c]; continue; } Q.push(u); int v = f[r]; while( v&&!ch[v][c] ) v = f[v]; f[u] = ch[v][c]; last[u] = val[ f[u] ] ? f[u]: last[ f[u] ]; } } } int find(char *T){ int x = 0, n = strlen(T); ans = 0; for(int i = 0; i < n; i++){ int c = T[i]-'a'; x = ch[x][c]; if( val[x] ) count(x); else if( last[x] ) count( last[x] ); } return ans; } void count(int x){ if(x){ cnt[ val[x] ]++; if(last[x]) count(last[x]); } } }ac; map<string,int> mp; char s[M], t[10111][55]; int main(){ int _; scanf("%d", &_); while( _-- ){ int n; scanf("%d", &n); mp.clear(); ac.init(); Clear(cnt); int top = 0; for(int i=1; i <= n; i++){ scanf("%s", t[i]); //vis[i] = false; if( mp.count(t[i]) == 0 ){ mp[ t[i] ] = ++top; ac.insert( t[i], top ); } } ac.getfail(); scanf("%s", s); ac.find(s); int res = 0; for(int i = 1; i <= n; i++) if( cnt[ mp[t[i]] ] ) res++; printf("%d\n",res++ ); } return 0; }
HDU 3065 病毒持续侵袭中
若使用上述模板记得不要每次都直接清空全部的,而是用一个清一个,不然会MLE外带TLE....此题模板串无重复.只是文本串字符不仅仅是大写字母,特判下就可以了.
#include<cstdio> #include<cstdlib> #include<cstring> #include<string> #include<queue> #include<algorithm> using namespace std; const int N = (int)5e5+10; const int M = (int)2e6+10; #define Clear(x) memset(x,0,sizeof(x)); int cnt[N]; struct AC{ int ch[N][26],f[N],last[N]; int val[N], top; void init(){ Clear(ch[0]); val[0] = 0; top = 0; } int NewNode(){ int x = ++top; Clear(ch[x]); val[x]=f[x]=last[x] = 0; return x; } void insert(char *T,int num){ int x = 0; for(int i = 0; T[i] != 0; i++){ int c = T[i]-'A'; if( !ch[x][c] ) ch[x][c] = NewNode(); x = ch[x][c]; } val[x] = num; } void getfail(){ queue<int> Q; for(int c = 0; c < 26; c++){ int u = ch[0][c]; if(u) last[u]=f[u]=0, Q.push(u); } while( !Q.empty() ){ int r = Q.front(); Q.pop(); for(int c = 0; c < 26; c++){ int u = ch[r][c]; if( !u ){ ch[r][c]=ch[f[r]][c];continue; } Q.push(u); int v = f[r]; while( v&&!ch[v][c] ) v = f[v]; f[u] = ch[v][c]; last[u] = val[ f[u] ]? f[u]: last[ f[u] ]; } } } void find(char *T){ int x = 0; for(int i = 0; T[i] != 0; i++){ if( (T[i]<'A'||T[i]>'Z') ) x = 0; else{ int c = T[i]-'A'; x = ch[x][c]; if( val[x] ) count(x); else if( last[x] ) count( last[x] ); } } } void count(int x){ while(x){ cnt[ val[x] ]++; x = last[x]; } } }ac; char s[M], t[1010][55]; int main(){ int n; while( scanf("%d", &n) != EOF ){ ac.init(); Clear(cnt); for(int i = 1; i <= n; i++){ scanf("%s", t[i]); ac.insert(t[i], i); } ac.getfail(); scanf("%s", s); // getchar(); gets(s); ac.find(s); for(int i = 1; i <= n; i++){ if( cnt[i] ) printf("%s: %d\n", t[i], cnt[i] ); } } return 0; }
uva 11468 substring
只考虑端点是否为 模式串结尾, 然后概率计算 在AC自动机上走L步的, 根据全概率公式以及 记忆化搜索来做.不过要注意空间问题,用多少清多少.
#include<cstdio> #include<cstring> #include<cstdlib> #include<queue> #include<algorithm> using namespace std; const int N = 5010; #define Clear(x) memset(x,0,sizeof(x)) double p[100]; int idx(char ch){ if( ch>='0' && ch<='9' ) return ch-'0'; else if( ch>='A' && ch<='Z') return 10 + ch-'A'; else return 36 + ch-'a'; } struct Trie{ int ch[N][65], f[N], top; bool end[N]; void init(){ Clear(ch[0]); f[0] = end[0] = 0; top = 0; } int NewNode(){ int x = ++top; Clear(ch[x]); f[x] = end[x] = 0; return x; } void insert(char *T){ int x = 0; for(int i = 0; T[i] != 0; i++){ int c = idx( T[i] ); if( !ch[x][c] ) ch[x][c] = NewNode(); x = ch[x][c]; } end[x] = true; } void getfail(){ queue<int> Q; for(int c = 0; c < 65; c++){ int u = ch[0][c]; if(u) Q.push(u); } while( !Q.empty() ){ int r = Q.front(); Q.pop(); end[r] |= end[ f[r] ]; for(int c = 0; c < 65; c++){ int u = ch[r][c]; if( !u ){ ch[r][c] = ch[f[r]][c]; } else{ Q.push(u); f[u] = ch[f[r]][c]; } } } } }AC; double dp[N][310]; bool vis[N][310]; double getprob(int u,int L){ if( L == 0 ) return 1.0; if( vis[u][L] ) return dp[u][L]; vis[u][L] = true; double &ans = dp[u][L]; ans = 0.0; for(int c = 0; c < 65; c++){ if( !AC.end[ AC.ch[u][c] ] ){ ans += p[c]*getprob( AC.ch[u][c], L-1 ); } } return ans; } char str[50]; int main(){ int _; int n, k; scanf("%d", &_); for(int Case = 1; Case <= _; Case++){ scanf("%d", &k); AC.init(); for(int i = 0; i < k; i++){ scanf("%s", str); AC.insert(str); } AC.getfail(); scanf("%d", &n); Clear(p); for(int i = 0; i < n; i++){ double a; scanf("%s %lf", str, &a); p[ idx(str[0]) ] = a; } int L; scanf("%d", &L); Clear(vis); double res = getprob( 0, L ); printf("Case #%d: %.6lf\n", Case, res ); } return 0; }
uva 11019 Matrix Matcher
二维匹配, 将P按行拆分构自动机,然后 T按行匹配, 利用一个数组Count[N][N], 来组合行形成矩阵. 详细见 LRJ白书218. P中可能有多行相同.可为之建一链表存储.
#include<cstdio> #include<cstring> #include<cstdlib> #include<algorithm> #include<queue> using namespace std; const int N = 1010; const int M = 10101; #define Clear(x) memset(x,0,sizeof(x)) #define count Count char T[N][N], P[N][N]; int n, m, X, Y; int count[N][N]; struct node{ int row, nxt; }edge[N*N]; int idx; struct Trie{ int ch[M][26], f[M], end[M], last[M]; int top; void init(){ Clear(ch[0]); f[0] = 0; last[0] = 0; end[0] = -1; top = 0, idx = 0; } int NewNode(){ int x = ++top; Clear(ch[x]); f[x] = last[x] = 0; end[x] = -1; return x; } void AddEdge(int x,int row){ edge[idx].row = row; edge[idx].nxt = end[x]; end[x] = idx++; } void insert(char *s,int row){ int x = 0; for(int i = 0; s[i]!=0; i++){ int c = s[i]-'a'; if( !ch[x][c] ) ch[x][c] = NewNode(); x = ch[x][c]; } AddEdge( x, row ); } void getfail(){ queue<int> Q; for(int c = 0; c < 26; c++){ int u = ch[0][c]; if( u ) Q.push(u); } while( !Q.empty() ){ int r = Q.front(); Q.pop(); for(int c = 0; c < 26; c++){ int u = ch[r][c]; if( !u ){ ch[r][c] = ch[ f[r] ][c]; } else{ Q.push(u); int v = f[r]; while( v && !ch[v][c] ) v = f[v]; f[u] = ch[v][c]; last[u] = end[ f[u] ] != -1 ? f[u] : last[ f[u] ]; } } } } void find(char *s, int r){ int x = 0; for(int i = 0; s[i] != 0; i++){ int c = s[i]-'a', col = i+1; x = ch[x][c]; if( end[x] != -1 ) frac( x, r, col ); else if( last[x] ) frac( last[x],r,col ); } } void frac(int x,int r,int c){ if(x){ for(int j = end[x]; ~j; j = edge[j].nxt ){ int row = edge[j].row; if( (r-row+1 >= 1) && (c-Y+1 >= 1) ){ count[ r-row+1 ][ c-Y+1 ]++; } } if( last[x] ) frac( last[x],r,c ); } } }AC; int main(){ freopen("1.in","r",stdin); int _; scanf("%d",&_); while( _-- ){ scanf("%d%d", &n,&m); for(int i = 0; i < n; i++) scanf("%s", T[i] ); scanf("%d%d",&X,&Y); AC.init(); for(int i = 0; i < X; i++) { scanf("%s", P[i] ); AC.insert( P[i], i+1 ); } AC.getfail(); Clear( count ); for(int i = 0; i < n; i++) AC.find( T[i], i+1 ); int res = 0; for(int i = 1; i <= n; i++) for(int j = 1; j <= m; j++) if( count[i][j] == X ) res++; printf("%d\n", res); } return 0; }