AC自动机专题

全名: Aho-Corasick 自动机, 类似 Trie + next函数 

写法: 动态 / 静态, 个人比较偏向与静态

内容: 网上一大堆,我是看的 LRJ新版白书214页

 

静态模板: 

// Aho-Corasick
#include<cstdio>
#include<cstdlib>
#include<cstring>
#include<string>
#include<queue>
#include<algorithm>
using namespace std;

#define Clear(x) memset(x,0,sizeof(x))
const int N = (int)1e4;
const int Max = 26;

struct Aho_Corasick{
    int ch[N][Max], last[N], f[N], val[N];
    int cnt[N], top;
    void init(){
        Clear(ch[0]);
        val[0]=last[0]=f[0]=0;
        top = 0;
    }
    int NewNode(){
        int x = ++top;
        Clear(ch[x]);
        last[x]=f[x]=val[x]=0;
        return ++top;    
    }
    void insert(char *T, int num){
        int x = 0, n = strlen(T);
        for(int i = 0; i < n; i++){
            int c = T[i]-'a';
            if( !ch[x][c] ) ch[x][c] = NewNode();
            x  = ch[x][c];
        }
        val[x] = num;    
    }
    void getfail(){
        queue<int> Q;
        for(int c = 0; c < Max; c++){
            int u = ch[0][c];
            if(u) f[u]=0, last[u]=0, Q.push(u);
        }
        while( !Q.empty() ){
            int r = Q.front(); Q.pop();
            for(int c = 0; c < Max; c++){
                int u = ch[r][c];
                if( !u ){ ch[r][c]=ch[f[r]][c]; conitnue;}
                Q.push(u); // ooxx   Q.push(u);
                int v = f[r]; // ooxx  v = f[r];
                while( v&&!ch[v][c] )  v = f[v];
                f[u] = ch[v][c];
                last[u] = val[ f[u] ] ? f[u]: last[ f[u] ];
            }
        }    
    }
    void find(char *s){
        int x = 0, n = strlen(s);
        for(int i = 0; i < n; i++){
            int c = s[i]-'a';
            x = ch[x][c];
            if( val[x] ) count(x);
            else if( last[x] ) count( last[x] );
        }
    }
    void count(int x){
        if(x){
            cnt[ val[x] ]++;
            if( last[x] ) count( last[x] );
        }    
    }
}AC;
View Code

 

 

UVALive 4670 - Dominating Patterns

模板题, 统计模板串出现次数最大的. Trick是可能有多个同时,与 会出现相同模板串.,用个map映射都搞出来就可以了.

#include<cstdio>
#include<cstdlib>
#include<cstring>
#include<string>
#include<queue>
#include<map>
#include<algorithm>
using namespace std;

#define Clear(x) memset(x,0,sizeof(x))
const int N = 20000;
const int M = (int)1e6+10;

struct AC{
    int ch[N][26],f[N],last[N];
    int val[N],cnt[N];
    int top;

    void init(){
        Clear(ch); Clear(f); Clear(last);
        Clear(val); Clear(cnt);
        top = 0;    
    }
    int NewNode(){
        int x = ++top;
        Clear( ch[x] );
        return x;    
    }
    void insert(char *T, int num){
        int n = strlen(T);
        int x = 0;
        for(int i = 0; i < n; i++){
            int c = T[i]-'a';
            if( !ch[x][c] ) ch[x][c] = NewNode();
            x = ch[x][c];
        }
        val[x] = num;    
    }
    void getfail(){
        queue<int> Q;
        for(int c = 0; c < 26; c++){
            int u = ch[0][c];
            if(u) f[u] = 0, last[u] = 0, Q.push(u);
        }
        while( !Q.empty() ){
            int r = Q.front(); Q.pop();
            for(int c = 0; c < 26; c++){
                int u = ch[r][c];
            //    if( !u ) continue;
                if( !u ){ ch[r][c] = ch[ f[r] ][c]; continue; } //补上所有不存在的边
                Q.push(u);    
                int v = f[r];
                while( v && !ch[v][c] ) v = f[v];
                f[u] = ch[v][c];
                
        //        printf("f[%d] = ch[%d][%d] = %d\n", u,v,c, f[u] );
                last[u] = val[ f[u] ] ? f[u]: last[ f[u] ];
            }
        }    
    }
    void find(char *T){
        int n = strlen(T);
        int x = 0;
        for(int i = 0; i < n; i++){
            int c = T[i]-'a';
        //    while( x && !ch[x][c] ) x = f[x];    
            x = ch[x][c]; //直接匹配到
        
        //    printf("Find: x = %d\n", x );
            if( val[x] ) count(x);
            else if( last[x] ) count( f[x] );
        }
    }
    void count(int x){
        if(x){
//            printf("Count: x = %d\n", x );    
            cnt[ val[x] ]++;
            if( last[x] ) count( last[x] );
        }    
    }
}ac;

map<string,int> mp;

char str[200][100];
char text[M];

int main(){
    int n;
    while( scanf("%d", &n), n ){
        // init    
        mp.clear();
        ac.init();
    
        int tot = 0;
        for(int i = 1; i <= n; i++){
            scanf("%s", str[i] );
            if( mp.count( str[i] ) == 0 ) mp[ str[i] ] = ++tot;
            
            ac.insert( str[i], mp[str[i]] );
        }
        ac.getfail();    
        scanf("%s", text);
        ac.find( text );
        
        int k = 0;
        for(int i = 1; i <= tot; i++)
            k = max( k, ac.cnt[i] );
        printf("%d\n", k );
        for(int i = 1; i <= n; i++){
            if( ac.cnt[ mp[str[i]] ] == k )
                printf("%s\n", str[i] );
        }
    }
    return 0;
}
View Code

 

HDU 2896 病毒侵袭

也是差不多模板题,统计哪些模式串出现过,Trick是 字符集是0-127,不是a-z有木有, 还有就是模式串总长500*200,开到6*10^4就行..坑爹大作... 还有模板姿势要对...AC开全局.不然会RE或者MLE.

#include<cstdio>
#include<cstdlib>
#include<cstring>
#include<string>
#include<queue>
#include<algorithm>
using namespace std;
const int N = (int)6e4+10;
const int M = (int)1e4+10;
const int Max = 128;
#define max Max
#define clear(x) memset(x,0,sizeof(x));
struct AC{
    int ch[N][128],f[N],last[N],val[N];
    int cnt[N], top;
    void init(){
        clear(ch);clear(f);
        clear(last);clear(val);
        top = 0;    
    }
    int NewNode(){
        return ++top;        
    }
    void insert(char *T, int num){
        int x = 0, n = strlen(T);
        for(int i = 0; i < n; i++){
            int c = T[i];
            if( !ch[x][c] ) ch[x][c] = NewNode();
            x = ch[x][c];
        }    
        val[x] = num;    
    }
    void getfail(){
        queue<int> Q;
        for(int c = 0; c < max; c++){
            int u = ch[0][c];
            if( u ) Q.push(u);
        }
        while( !Q.empty() ){
            int r = Q.front(); Q.pop();
            for(int c = 0; c < max; c++){
                int u = ch[r][c];
                if( !u ){ ch[r][c] = ch[f[r]][c]; continue; }
                Q.push(u);
                int v = f[r];
                while( v&&!ch[v][c] ) v = f[v];
                f[u] = ch[v][c];
                last[u] = val[ f[u] ] ? f[u] : last[ f[u] ];
            }
        }    
    }
    void find(char *T){
        int x = 0, n = strlen(T);
        clear(cnt);
        for(int i = 0; i < n; i++){
            int c = T[i];
            x = ch[x][c];
            if( val[x] ) count(x);
            else if( last[x] ) count( last[x] );
        }
        
    }
    void count(int x){
        if(x){
            cnt[ val[x] ]++;
            count( last[x] );
        }
    }
}ac; //记得开全局啊啊啊啊啊啊啊啊

char s[510][210];
char text[M];

int main(){
    
    int n, m;
    while( scanf("%d", &n) != EOF){
        ac.init(); 
        for(int i = 1; i <= n; i++){
            scanf("%s", s[i] );
            ac.insert( s[i], i );
        }
        ac.getfail();
        scanf("%d", &m); 
        int tot = 0;    
        for(int i = 1; i <= m; i++){
            scanf("%s", text );
            ac.find(text);
            int cnt = 0;
            for(int j = 1; j <= n; j++)
                if( ac.cnt[j] ) cnt++;
            if( cnt > 0 ){
                tot++;
                printf("web %d:", i);
                for(int j = 1; j <= n; j++)
                    if(ac.cnt[j]) printf(" %d", j);
                puts("");
            }
        }
        printf("total: %d\n", tot);    
    }
    return 0;
}
View Code

 

HDU 1277 全文检索 

坑爹的字符串输入, 不过幸好涨姿势了.  格式控制比较准. 解法模板变动下..按顺序存起来,输出.  GetFail指针记得进队...老是忘记...OOXX

#include<cstdio>
#include<cstring>
#include<cstdlib>
#include<string>
#include<queue>
#include<algorithm>
#include<vector>
using namespace std;

#define Clear(x) memset(x,0,sizeof(x))
const int N = (int)6e5+10;
const int M = (int)1e4+10;

int n, m;
vector<int> S;

struct AC{
    int ch[N][10], f[N], last[N], val[N];
    int vis[M], top;
    void init(){
        Clear(ch); Clear(f);
        Clear(last); Clear(val);
        top = 0;
    }    
    int NewNode(){
        return ++top;    
    }
    void insert(char *T,int num){
        int x = 0, L = strlen(T);
        for(int i = 0; i < L; i++){
            int c = T[i]-'0';    
            if( !ch[x][c] ) ch[x][c] = NewNode();
            x = ch[x][c];
        }
        val[x] = num;    
    }
    void getfail(){
        queue<int> Q;    
        for(int c = 0; c < 10; c++){
            int u = ch[0][c];
            if(u) Q.push(u);
        }    
        while( !Q.empty() ){
            int r = Q.front(); Q.pop();
            for(int c = 0; c < 10; c++){
                int u = ch[r][c];
                if(!u){ ch[r][c]=ch[f[r]][c]; continue; }
                Q.push(u); // ooxx    
                int v = f[r];
                while( v&&!ch[v][c] ) v = f[v];
                f[u] = ch[v][c];
                last[u] = val[ f[u] ] ? f[u]: last[ f[u] ];
            }
        }    
    }
    void find(string T){
        Clear(vis);    
        int x = 0, L = T.size();
        for(int i = 0; i < L; i++){
            int c = T[i]-'0';
            x = ch[x][c];
            if( val[x] ) count(x);
            else if( last[x] ) count(last[x]);
        }
    }
    void count(int x){
        if(x){
            if( !vis[ val[x] ] )
                vis[ val[x] ] = true, S.push_back( val[x] );
            if( last[x] ) count( last[x] );    
        }    
    }
}ac;

string text;
char s[N], t[N];

int main(){

    freopen("1.in","r",stdin);
    while( scanf("%d%d",&m,&n) != EOF){
        text.clear();
        for(int i = 0; i < m; i++){
            scanf("%s", s);    
            text += s;
        }
        ac.init();    
        for(int i = 1, x; i <= n; i++){
            scanf(" [Key No. %d] %s", &x,s);
            ac.insert(s, i);
        }    
        ac.getfail();    
        S.clear();
        ac.find(text);
        int cnt = 0;
        for(int i = 1; i <= n; i++)
            if( ac.vis[i] ) cnt++;
        if( cnt ){
            printf("Found key:");    
            for(int i = 0; i < (int)S.size(); i++)
                printf(" [Key No. %d]", S[i]);    
            puts("");    
        }
        else
            puts("No key can be found !");    
    }
    return 0;
}
View Code

 

HDU 2222 Keywords Search

题目Trick 是模板串 会出现相同的. 也要被计算. 

#include<cstdio>
#include<cstdlib>
#include<cstring>
#include<queue>
#include<map>
#include<string>
using namespace std;

const int N = (int)5e5+10;
const int M = (int)1e6+10;
#define Clear(x) memset(x,0,sizeof(x))

int cnt[N];

struct AC{
    int ch[N][26],f[N],last[N];
    int val[N],top, ans;
    void init(){
        top = 0;
        Clear(ch[0]);
        last[0] = f[0] = val[0] = 0;
    }
    int NewNode(){ 
        int x = ++top;
        Clear(ch[x]);
        last[x] = f[x] = val[x] = 0;
        return x;
    }
    void insert(char *T,int num){
        int x = 0, n = strlen(T);
        for(int i = 0; i < n; i++){
            int c = T[i]-'a';
            if( !ch[x][c] ) ch[x][c] = NewNode();
            x = ch[x][c];
        }
        val[x] = num;    
    }
    void getfail(){
        queue<int>Q;    
        for(int c = 0; c < 26; c++){
            int u = ch[0][c];
            if(u) Q.push(u), last[u] = 0, f[0] = 0; 
        }    
        while( !Q.empty() ){
            int r = Q.front(); Q.pop();
            for(int c = 0; c < 26; c++){
                int u = ch[r][c]; 
                if( !u ){ ch[r][c]=ch[f[r]][c]; continue; }
                Q.push(u);
                int v = f[r];
                while( v&&!ch[v][c] ) v = f[v];
                f[u] = ch[v][c];
                last[u] = val[ f[u] ] ? f[u]: last[ f[u] ];
            }
        }    
    }
    int find(char *T){
        int x = 0, n = strlen(T);
        ans = 0;    
        for(int i = 0; i < n; i++){
            int c = T[i]-'a'; 
            x = ch[x][c];
            if( val[x] ) count(x);
            else if( last[x] ) count( last[x] );
        }
        return ans;    
    }
    void count(int x){
        if(x){
            cnt[ val[x] ]++;
            if(last[x]) count(last[x]);
        }    
    }
}ac;

map<string,int> mp;
char s[M], t[10111][55];

int main(){
    int _;
    scanf("%d", &_);
    while( _-- ){
        int n;
        scanf("%d", &n);
        mp.clear();    
        ac.init();    Clear(cnt);
        int top = 0;    
        for(int i=1; i <= n; i++){
            scanf("%s", t[i]); //vis[i] = false;
            if( mp.count(t[i]) == 0 ){
                mp[ t[i] ] = ++top;    
                ac.insert( t[i], top );    
            }
        }
        ac.getfail();
        scanf("%s", s);
        ac.find(s);    
        int res = 0;
        for(int i = 1; i <= n; i++)
            if( cnt[ mp[t[i]] ] ) res++;
        printf("%d\n",res++ );
    }
    return 0;
}
View Code

 

HDU 3065 病毒持续侵袭中

若使用上述模板记得不要每次都直接清空全部的,而是用一个清一个,不然会MLE外带TLE....此题模板串无重复.只是文本串字符不仅仅是大写字母,特判下就可以了.

#include<cstdio>
#include<cstdlib>
#include<cstring>
#include<string>
#include<queue>
#include<algorithm>
using namespace std;
const int N = (int)5e5+10;
const int M = (int)2e6+10;
#define Clear(x) memset(x,0,sizeof(x));

int cnt[N];

struct AC{
    int ch[N][26],f[N],last[N];
    int val[N], top;
    void init(){
        Clear(ch[0]); 
        val[0] = 0;
        top = 0;
    }
    int NewNode(){
        int x = ++top;
        Clear(ch[x]); 
        val[x]=f[x]=last[x] = 0;
        return x;
    }
    void insert(char *T,int num){
        int x = 0;
        for(int i = 0; T[i] != 0; i++){
            int c = T[i]-'A';    
            if( !ch[x][c] ) ch[x][c] = NewNode();
            x = ch[x][c];
        }
        val[x] = num;    
    }
    void getfail(){
        queue<int> Q;
        for(int c = 0; c < 26; c++){
            int u = ch[0][c];
            if(u) last[u]=f[u]=0, Q.push(u);
        }
        while( !Q.empty() ){
            int r = Q.front(); Q.pop();
            for(int c = 0; c < 26; c++){
                int u = ch[r][c];
                if( !u ){ ch[r][c]=ch[f[r]][c];continue; }
                Q.push(u);    
                int v = f[r];
                while( v&&!ch[v][c] ) v = f[v];
                f[u] = ch[v][c];
                last[u] = val[ f[u] ]? f[u]: last[ f[u] ];
            }
        }     
    }
    void find(char *T){
        int x = 0;
        for(int i = 0; T[i] != 0; i++){ 
            if( (T[i]<'A'||T[i]>'Z') ) x = 0;
            else{
                int c = T[i]-'A';
                x = ch[x][c]; 
                if( val[x] ) count(x);
                else if( last[x] ) count( last[x] );
            }
        }
    }    
    void count(int x){
        while(x){
            cnt[ val[x] ]++;
            x = last[x];
        }    
    }
}ac;

char s[M], t[1010][55];
int main(){
    int n;
    while( scanf("%d", &n) != EOF ){
        ac.init();
        Clear(cnt);    
        for(int i = 1; i <= n; i++){
            scanf("%s", t[i]);
            ac.insert(t[i], i);
        }
        ac.getfail();
        scanf("%s", s); 
    //    getchar(); gets(s);
        ac.find(s);
        for(int i = 1; i <= n; i++){
            if( cnt[i] ) printf("%s: %d\n", t[i], cnt[i] );    
        }
    }
    return 0;
}
View Code

 

uva 11468 substring

只考虑端点是否为 模式串结尾, 然后概率计算 在AC自动机上走L步的, 根据全概率公式以及 记忆化搜索来做.不过要注意空间问题,用多少清多少.

#include<cstdio>
#include<cstring>
#include<cstdlib>
#include<queue>
#include<algorithm>
using namespace std;

const int N = 5010;
#define Clear(x) memset(x,0,sizeof(x))

double p[100];
int idx(char ch){
    if( ch>='0' && ch<='9' ) return ch-'0';
    else if( ch>='A' && ch<='Z') return 10 + ch-'A';
    else return 36 + ch-'a';
}
struct Trie{
    int ch[N][65], f[N], top;
    bool end[N];
    void init(){
        Clear(ch[0]); 
        f[0] = end[0] = 0;
        top = 0;    
    }
    int NewNode(){
        int x = ++top;
        Clear(ch[x]);
        f[x] = end[x] = 0;
        return x;
    }
    void insert(char *T){
        int x = 0;
        for(int i = 0; T[i] != 0; i++){
            int c = idx( T[i] );
            if( !ch[x][c] ) ch[x][c] = NewNode();
            x = ch[x][c];
        }
        end[x] = true;    
    }
    void getfail(){
        queue<int> Q;
        for(int c = 0; c < 65; c++){
            int u = ch[0][c];
            if(u) Q.push(u);
        }    
        while( !Q.empty() ){
            int r = Q.front(); Q.pop();
            end[r] |= end[ f[r] ];
            for(int c = 0; c < 65; c++){
                int u = ch[r][c];
                if( !u ){ ch[r][c] = ch[f[r]][c]; }
                else{
                    Q.push(u); f[u] = ch[f[r]][c];
                }    
            }
        }    
    }
}AC;

double dp[N][310];
bool vis[N][310];

double getprob(int u,int L){
    if( L == 0 ) return 1.0;
    if( vis[u][L] ) return dp[u][L];
    vis[u][L] = true;
    double &ans = dp[u][L];
    ans = 0.0;
    for(int c = 0; c < 65; c++){
        if( !AC.end[ AC.ch[u][c] ] ){
            ans += p[c]*getprob( AC.ch[u][c], L-1 );    
        }    
    }
    return ans;    
}

char str[50];

int main(){
    int _;
    int n, k;
    scanf("%d", &_);
    for(int Case = 1; Case <= _; Case++){    
        scanf("%d", &k);
        AC.init();
        for(int i = 0; i < k; i++){
            scanf("%s", str);
            AC.insert(str);
        }
        AC.getfail();
        scanf("%d", &n);    
        Clear(p);    
        for(int i = 0; i < n; i++){
            double a;
            scanf("%s %lf", str, &a);
            p[ idx(str[0]) ] = a;
        }
        int L; scanf("%d", &L);    
        Clear(vis);
        double res = getprob( 0, L );
        printf("Case #%d: %.6lf\n", Case, res );
    }
    return 0;
}
View Code

 

uva 11019 Matrix Matcher 

二维匹配, 将P按行拆分构自动机,然后 T按行匹配,  利用一个数组Count[N][N], 来组合行形成矩阵. 详细见 LRJ白书218. P中可能有多行相同.可为之建一链表存储.

#include<cstdio>
#include<cstring>
#include<cstdlib>
#include<algorithm>
#include<queue>
using namespace std;
const int N = 1010;
const int M = 10101;
#define Clear(x) memset(x,0,sizeof(x))
#define count Count
char T[N][N], P[N][N];
int n, m, X, Y;
int count[N][N];

struct node{
    int row, nxt;
}edge[N*N];
int idx;

struct Trie{
    int ch[M][26], f[M], end[M], last[M];
    int top;
    void init(){
        Clear(ch[0]);
        f[0] = 0; last[0] = 0; end[0] = -1;
        top = 0, idx = 0;    
    }
    int NewNode(){
        int x = ++top;
        Clear(ch[x]);
        f[x] = last[x] = 0; end[x] = -1;
        return x;
    }
    void AddEdge(int x,int row){
        edge[idx].row = row; 
        edge[idx].nxt = end[x];
        end[x] = idx++;
    }        
    void insert(char *s,int row){
        int x = 0;
        for(int i = 0; s[i]!=0; i++){
            int c = s[i]-'a';
            if( !ch[x][c] ) ch[x][c] = NewNode();
            x = ch[x][c];
        }
        AddEdge( x, row );    
    }
    void getfail(){
        queue<int> Q;
        for(int c = 0; c < 26; c++){
            int u = ch[0][c];
            if( u ) Q.push(u);
        }
        while( !Q.empty() ){
            int r = Q.front(); Q.pop();
            for(int c = 0; c < 26; c++){
                int u = ch[r][c];
                if( !u ){
                    ch[r][c] = ch[ f[r] ][c];    
                }
                else{
                    Q.push(u);
                    int v = f[r];
                    while( v && !ch[v][c] ) v = f[v];
                    f[u] = ch[v][c];
                    last[u] = end[ f[u] ] != -1 ? f[u] : last[ f[u] ];    
                }    
            }
        }    
    }    
    void find(char *s, int r){
        int x = 0;
        for(int i = 0; s[i] != 0; i++){
            int c = s[i]-'a', col = i+1;
            x = ch[x][c];
            if( end[x] != -1 ) frac( x, r, col );
            else if( last[x] ) frac( last[x],r,col );
        }    
    }
    void frac(int x,int r,int c){
        if(x){
            for(int j = end[x]; ~j; j = edge[j].nxt ){
                int row = edge[j].row;
                if( (r-row+1 >= 1) && (c-Y+1 >= 1) ){
                    count[ r-row+1 ][ c-Y+1 ]++;        
                }    
            }    
            if( last[x] ) frac( last[x],r,c );
        }    
    }
}AC;

int main(){
    freopen("1.in","r",stdin);    
    int _; scanf("%d",&_);
    while( _-- ){
        scanf("%d%d", &n,&m);    
        for(int i = 0; i < n; i++)
            scanf("%s", T[i] );
        scanf("%d%d",&X,&Y);
        AC.init();
        for(int i = 0; i < X; i++)
        {
            scanf("%s", P[i] );
            AC.insert( P[i], i+1 );
        }
        AC.getfail();    
        Clear( count );
        for(int i = 0; i < n; i++)
            AC.find( T[i], i+1 );
        int res = 0;
        for(int i = 1; i <= n; i++)
            for(int j = 1; j <= m; j++)
                if( count[i][j] == X ) res++;
        printf("%d\n", res);
    }    
    return 0;
}
View Code

 

posted @ 2013-05-23 11:43  yefeng1627  阅读(935)  评论(0编辑  收藏  举报

Launch CodeCogs Equation Editor