AC automation 模板
1 /* 2 1.对n个字符串构造tire树 insertWord(node *root, char *word); 3 2.bfs构造fail指针 makeFail(node *root); 4 3.基于以上两点的查询 query(node *root, char *str); 5 */ 6 #include <stdio.h> 7 #include <string.h> 8 #include <queue> 9 using namespace std; 10 const int N1 = 50 + 10; 11 const int N2 = 1000000 + 10; 12 char key[N1]; 13 char desc[N2]; 14 struct node 15 { 16 node *next[26]; 17 int cnt; 18 node *fail; 19 node(){for(int i=0; i<26; ++i) next[i] = NULL; fail = NULL; cnt = 0;} 20 }; 21 void insertWord(node *root)//构造trie树 22 { 23 node *cur = root; 24 int n = strlen(key); 25 for(int i=0; i<n; ++i) 26 { 27 int index = key[i] - 'a'; 28 if(cur->next[index] != NULL) 29 cur = cur->next[index]; 30 else 31 { 32 cur->next[index] = new node(); 33 cur = cur->next[index]; 34 } 35 } 36 cur->cnt++; 37 } 38 void makeFail(node *root)//构造fail指针 39 { 40 queue<node*> q; 41 q.push(root); 42 node *cur; 43 while(!q.empty()) 44 { 45 cur = q.front(); 46 q.pop(); 47 for(int i=0; i<26; ++i) 48 { 49 if(cur->next[i] != NULL) 50 { 51 if(cur == root)//与root相连的结点,即第二层的结点的fail指针都是root 52 cur->next[i]->fail = root; 53 else 54 { 55 node *tmp = cur; 56 while(tmp->fail != NULL)// why while? 57 { 58 if(tmp->fail->next[i] != NULL) 59 { 60 cur->next[i]->fail = tmp->fail->next[i]; 61 break; 62 } 63 tmp = tmp->fail; 64 } 65 if(tmp->fail == NULL) 66 cur->next[i]->fail = root; 67 } 68 q.push(cur->next[i]); 69 } 70 } 71 } 72 } 73 int query(node *root)//查询 74 { 75 node *cur = root; 76 node *tmp = NULL; 77 int i=0,cnt=0; 78 while(desc[i]) 79 { 80 int index = desc[i] - 'a'; 81 while(cur!=root && cur->next[index] == NULL) 82 cur = cur->fail; 83 if(cur->next[index] != NULL) 84 { 85 cur = cur->next[index]; 86 tmp = cur; 87 while(tmp != root && tmp->cnt!=0) 88 { 89 cnt += tmp->cnt; 90 tmp->cnt = 0; 91 tmp = tmp->fail; 92 } 93 } 94 i++; 95 } 96 return cnt; 97 } 98 int main() 99 { 100 int t,n; 101 scanf("%d",&t); 102 while(t--) 103 { 104 node *root = new node(); 105 scanf("%d",&n); 106 for(int i=0; i<n; ++i) 107 { 108 scanf("%s",key); 109 insertWord(root); 110 } 111 makeFail(root); 112 scanf("%s",desc); 113 int ans = query(root); 114 printf("%d\n",ans); 115 } 116 return 0; 117 }
1 //多串匹配,n个模式字符串构成AC自动机,然后目标串去匹配,看目标串中有多少个模式串 2 #include <stdio.h> 3 #include <string.h> 4 #include <queue> 5 using namespace std; 6 /* 7 根结点的fail指针为NULL,根结点的直接子结点的fail指针为root,很明显,当一个字符都不匹配时,从根结点再开始匹配 8 每个结点的fail指针都是由它父结点的fail指针决定的。 9 */ 10 const int N = 1000000 + 10; 11 struct node 12 { 13 node *next[26],*fail; 14 int cnt; 15 node(){for(int i=0; i<26; ++i) next[i] = NULL; fail = NULL; cnt = 0;} 16 }; 17 void insertWord(node *root, char *word) 18 { 19 node *cur = root; 20 int i = 0; 21 while(word[i]) 22 { 23 int index = word[i] - 'a'; 24 if(cur->next[index] == NULL) 25 cur->next[index] = new node(); 26 cur = cur->next[index]; 27 ++i; 28 } 29 cur->cnt ++; 30 } 31 char str[N]; 32 void makeFail(node *root) 33 { 34 node *cur,*tmp; 35 queue<node*> q; 36 q.push(root); 37 while(!q.empty()) 38 { 39 cur = q.front(); 40 q.pop(); 41 for(int i=0; i<26; ++i) 42 { 43 if(cur->next[i] != NULL) 44 { 45 q.push(cur->next[i]); 46 if(cur == root)//如果当前结点是root,那么它的直接孩子结点的fail指针指向root 47 cur->next[i]->fail = root; 48 else 49 { 50 tmp = cur; 51 while(tmp->fail != NULL)//because root->fail == NULL,如果到了这个地步,说明当前字符串没有后缀 52 { 53 if(tmp->fail->next[i] != NULL) 54 { 55 cur->next[i]->fail = tmp->fail->next[i]; 56 break; 57 } 58 tmp = tmp->fail; 59 } 60 if(tmp->fail == NULL) 61 cur->next[i]->fail = root; 62 } 63 } 64 } 65 } 66 } 67 68 // how to query??? 69 int query(node *root, char *str) 70 { 71 node *cur = root; 72 node *tmp = NULL; 73 int i = 0, cnt = 0; 74 while(str[i]) 75 { 76 int index = str[i] - 'a'; 77 while(cur!=root && cur->next[index]==NULL)//如果失配,那么直接跳到fail指针处去匹配 78 cur = cur->fail; 79 if(cur->next[index] != NULL) 80 { 81 cur = cur->next[index];//如果当前字符匹配成功,则跳到那个字符, 82 tmp = cur; 83 //这就是为什么Ac自动机效率高的缘故,根据fail指针,跳到当前字符串的最长后缀去 84 //如果tmp->cnt != 0 说明存在该最长后缀形成的字符串 85 while(tmp->cnt!=0) 86 { 87 cnt += tmp->cnt; 88 tmp->cnt = 0; 89 tmp = tmp->fail; 90 } 91 } 92 ++i; 93 } 94 return cnt; 95 } 96 int main() 97 { 98 int t,n; 99 scanf("%d",&t); 100 char word[55]; 101 while(t--) 102 { 103 node *root = new node(); 104 scanf("%d",&n); 105 for(int i=0; i<n; ++i) 106 { 107 scanf("%s",word); 108 insertWord(root, word); 109 } 110 scanf("%s",str); 111 makeFail(root); 112 int ans = query(root, str); 113 printf("%d\n",ans); 114 } 115 return 0;
给定n个模式串,长度均不超过m,和一个目标串(长度为L),问目标串中包含多少个模式串(可重叠
的)。
暴力算法是一个个模式串去与目标串匹配,时间复杂度是O(n*m*L)
有更好的算法是AC自动机,时间复杂度是O(n)(这个怎么算来着??)
AC自动机分为两步,1.构建trie树。2.构建fail指针。正是这个fail指针将时间复杂度给大大缩小了
fail指针是匹配失败时,该跳到那个结点去重新匹配
fail指针是指向当前字符串的最长后缀,比如she的fail指针应该指向he或e或root(即指向存在的最长后
缀)
所以当前结点的fail指针由父结点的fail指针所决定
学习资料:http://www.cppblog.com/menjitianya/archive/2014/07/10/207604.html