AC automation 模板

  1 /*
  2 1.对n个字符串构造tire树                        insertWord(node *root, char *word);
  3 2.bfs构造fail指针                    makeFail(node *root);
  4 3.基于以上两点的查询                query(node *root, char *str);
  5 */
  6 #include <stdio.h>
  7 #include <string.h>
  8 #include <queue>
  9 using namespace std;
 10 const int N1 = 50 + 10;
 11 const int N2 = 1000000 + 10;
 12 char key[N1];
 13 char desc[N2];
 14 struct node
 15 {
 16     node *next[26];
 17     int cnt;
 18     node *fail;
 19     node(){for(int i=0; i<26; ++i) next[i] = NULL; fail = NULL; cnt = 0;}
 20 };
 21 void insertWord(node *root)//构造trie树
 22 {
 23     node *cur = root;
 24     int n = strlen(key);
 25     for(int i=0; i<n; ++i)
 26     {
 27         int index = key[i] - 'a';
 28         if(cur->next[index] != NULL)
 29             cur = cur->next[index];
 30         else
 31         {
 32             cur->next[index] = new node();
 33             cur = cur->next[index];
 34         }
 35     }
 36     cur->cnt++;
 37 }
 38 void makeFail(node *root)//构造fail指针
 39 {
 40     queue<node*> q;
 41     q.push(root);
 42     node *cur;
 43     while(!q.empty())
 44     {    
 45         cur = q.front();
 46         q.pop();
 47         for(int i=0; i<26; ++i)
 48         {
 49             if(cur->next[i] != NULL)
 50             {
 51                 if(cur == root)//与root相连的结点,即第二层的结点的fail指针都是root
 52                     cur->next[i]->fail = root;
 53                 else
 54                 {
 55                     node *tmp = cur;
 56                     while(tmp->fail != NULL)// why while?
 57                     {
 58                         if(tmp->fail->next[i] != NULL)
 59                         {
 60                             cur->next[i]->fail = tmp->fail->next[i];
 61                             break;
 62                         }                            
 63                         tmp = tmp->fail;
 64                     }
 65                     if(tmp->fail == NULL)
 66                         cur->next[i]->fail = root;
 67                 }
 68                 q.push(cur->next[i]);
 69             }
 70         }
 71     }
 72 }
 73 int query(node *root)//查询
 74 {
 75     node *cur = root;
 76     node *tmp = NULL;
 77     int i=0,cnt=0;
 78     while(desc[i])
 79     {
 80         int index = desc[i] - 'a';
 81         while(cur!=root && cur->next[index] == NULL)
 82             cur = cur->fail;
 83         if(cur->next[index] != NULL)
 84         {
 85             cur = cur->next[index];
 86             tmp = cur;
 87             while(tmp != root && tmp->cnt!=0)
 88             {
 89                 cnt += tmp->cnt;
 90                 tmp->cnt = 0;
 91                 tmp = tmp->fail;
 92             }
 93         }
 94         i++;
 95     }
 96     return cnt;
 97 }
 98 int main()
 99 {
100     int t,n;
101     scanf("%d",&t);
102     while(t--)
103     {
104         node *root = new node();
105         scanf("%d",&n);
106         for(int i=0; i<n; ++i)
107         {
108             scanf("%s",key);
109             insertWord(root);
110         }
111         makeFail(root);
112         scanf("%s",desc);
113         int ans = query(root);
114         printf("%d\n",ans);
115     }
116     return 0;
117 }
View Code
  1 //多串匹配,n个模式字符串构成AC自动机,然后目标串去匹配,看目标串中有多少个模式串
  2 #include <stdio.h>
  3 #include <string.h>
  4 #include <queue>
  5 using namespace std;
  6 /*
  7 根结点的fail指针为NULL,根结点的直接子结点的fail指针为root,很明显,当一个字符都不匹配时,从根结点再开始匹配
  8 每个结点的fail指针都是由它父结点的fail指针决定的。
  9 */
 10 const int N = 1000000 + 10;
 11 struct node
 12 {
 13     node *next[26],*fail;
 14     int cnt;
 15     node(){for(int i=0; i<26; ++i) next[i] = NULL; fail = NULL; cnt = 0;}
 16 };
 17 void insertWord(node *root, char *word)
 18 {
 19     node *cur = root;
 20     int i = 0;
 21     while(word[i])
 22     {
 23         int index = word[i] - 'a';
 24         if(cur->next[index] == NULL)
 25             cur->next[index] = new node();
 26         cur = cur->next[index];
 27         ++i;
 28     }
 29     cur->cnt ++;
 30 }
 31 char str[N];
 32 void makeFail(node *root)
 33 {
 34     node *cur,*tmp;
 35     queue<node*> q;
 36     q.push(root);
 37     while(!q.empty())
 38     {
 39         cur = q.front();
 40         q.pop();
 41         for(int i=0; i<26; ++i)
 42         {
 43             if(cur->next[i] != NULL)
 44             {
 45                 q.push(cur->next[i]);
 46                 if(cur == root)//如果当前结点是root,那么它的直接孩子结点的fail指针指向root
 47                     cur->next[i]->fail = root;
 48                 else
 49                 {
 50                     tmp = cur;
 51                     while(tmp->fail != NULL)//because root->fail == NULL,如果到了这个地步,说明当前字符串没有后缀
 52                     {
 53                         if(tmp->fail->next[i] != NULL)
 54                         {
 55                             cur->next[i]->fail = tmp->fail->next[i];
 56                             break;
 57                         }
 58                         tmp = tmp->fail;
 59                     }
 60                     if(tmp->fail == NULL)
 61                         cur->next[i]->fail = root;
 62                 }
 63             }
 64         }
 65     }
 66 }
 67 
 68 // how to query???
 69 int query(node *root, char *str)
 70 {
 71     node *cur = root;
 72     node *tmp = NULL;
 73     int i = 0, cnt = 0;
 74     while(str[i])
 75     {
 76         int index = str[i] - 'a';
 77         while(cur!=root && cur->next[index]==NULL)//如果失配,那么直接跳到fail指针处去匹配
 78             cur = cur->fail;
 79         if(cur->next[index] != NULL)
 80         {
 81             cur = cur->next[index];//如果当前字符匹配成功,则跳到那个字符,
 82             tmp = cur;
 83             //这就是为什么Ac自动机效率高的缘故,根据fail指针,跳到当前字符串的最长后缀去
 84             //如果tmp->cnt != 0 说明存在该最长后缀形成的字符串
 85             while(tmp->cnt!=0)
 86             {
 87                 cnt += tmp->cnt;
 88                 tmp->cnt = 0;
 89                 tmp = tmp->fail;
 90             }
 91         }
 92         ++i;
 93     }
 94     return cnt;
 95 }
 96 int main()
 97 {
 98     int t,n;
 99     scanf("%d",&t);
100     char word[55];
101     while(t--)
102     {
103         node *root = new node();
104         scanf("%d",&n);
105         for(int i=0; i<n; ++i)
106         {
107             scanf("%s",word);
108             insertWord(root, word);
109         }
110         scanf("%s",str);
111         makeFail(root);
112         int ans = query(root, str);
113         printf("%d\n",ans);
114     }
115     return 0;

给定n个模式串,长度均不超过m,和一个目标串(长度为L),问目标串中包含多少个模式串(可重叠
的)。
暴力算法是一个个模式串去与目标串匹配,时间复杂度是O(n*m*L)
有更好的算法是AC自动机,时间复杂度是O(n)(这个怎么算来着??)

AC自动机分为两步,1.构建trie树。2.构建fail指针。正是这个fail指针将时间复杂度给大大缩小了

fail指针是匹配失败时,该跳到那个结点去重新匹配
fail指针是指向当前字符串的最长后缀,比如she的fail指针应该指向he或e或root(即指向存在的最长后
缀)
所以当前结点的fail指针由父结点的fail指针所决定

 

 学习资料:http://www.cppblog.com/menjitianya/archive/2014/07/10/207604.html

posted @ 2014-09-10 11:05  justPassBy  阅读(281)  评论(0编辑  收藏  举报