hdu 2222(AC自动机模板题)
Keywords Search
Time Limit: 2000/1000 MS (Java/Others) Memory Limit: 131072/131072 K (Java/Others)
Total Submission(s): 78085 Accepted Submission(s): 27110
Problem Description
In the modern time, Search engine came into the life of everybody like Google, Baidu, etc.
Wiskey also wants to bring this feature to his image retrieval system.
Every image have a long description, when users type some keywords to find the image, the system will match the keywords with description of image and show the image which the most keywords be matched.
To simplify the problem, giving you a description of image, and some keywords, you should tell me how many keywords will be match.
Wiskey also wants to bring this feature to his image retrieval system.
Every image have a long description, when users type some keywords to find the image, the system will match the keywords with description of image and show the image which the most keywords be matched.
To simplify the problem, giving you a description of image, and some keywords, you should tell me how many keywords will be match.
Input
First line will contain one integer means how many cases will follow by.
Each case will contain two integers N means the number of keywords and N keywords follow. (N <= 10000)
Each keyword will only contains characters 'a'-'z', and the length will be not longer than 50.
The last line is the description, and the length will be not longer than 1000000.
Each case will contain two integers N means the number of keywords and N keywords follow. (N <= 10000)
Each keyword will only contains characters 'a'-'z', and the length will be not longer than 50.
The last line is the description, and the length will be not longer than 1000000.
Output
Print how many keywords are contained in the description.
Sample Input
1
5
she
he
say
shr
her
yasherhs
Sample Output
3
AC代码1:链表版
1 #include<iostream> 2 #include<cstdio> 3 #include<cstring> 4 #include<queue> 5 using namespace std; 6 #define Is_Lower(x) (((x)>='a')&&((x)<='z')) 7 const int maxn=27; 8 9 //next[] : 当前结点的儿子分支 10 //fail : 失配指针 11 //is_word : 标记该结点出是否构成单词 12 //Node() : 初始化操作 13 struct Node 14 { 15 Node *next[maxn]; 16 Node *fail; 17 int is_word; 18 Node() 19 { 20 fail=NULL; 21 is_word=0; 22 memset(next,NULL,sizeof(next)); 23 } 24 }; 25 26 //myqueue : 方便bfs构造失配指针 27 queue<Node *>myqueue; 28 29 //建立Tire树 30 void Build_Tire(Node *root,const char *s) 31 { 32 Node *p=root; 33 while(*s != '\0' && Is_Lower(*s)) 34 { 35 if(p->next[*s-'a'+1] == NULL) 36 p->next[*s-'a'+1]=new Node(); 37 p=p->next[*s-'a'+1]; 38 s++; 39 } 40 //结尾单词的is_word++ 41 p->is_word++; 42 } 43 //构造失配指针 44 void Build_fail(Node *root) 45 { 46 Node *p=root; 47 48 //root的失配指针可以是NULL也可以是其本身,即root->fail=root; 49 //将root入队列 50 root->fail=NULL; 51 myqueue.push(root); 52 53 //当队列为空时,循环停止 54 while(!myqueue.empty()) 55 { 56 //拿出队头元素 57 //从队列中拿出一个元素就从队列中弹出一个元素 58 Node *temp=myqueue.front(); 59 myqueue.pop(); 60 61 //遍历26个小写字母,判断是否含有某个字母 62 for(int i=1;i<maxn;i++) 63 { 64 //如果含有,进入if() 65 if(temp->next[i] != NULL) 66 { 67 //如果当前的 temp == root,则其所指元素的失配指针指向root 68 if(temp == root) 69 temp->next[i]->fail=root; 70 else 71 { 72 Node *p=temp->fail; 73 74 //如果 temp != root 75 //设这个结点上的字母为C 76 //沿着他父亲的失配指针向上走,直到某一结点的儿子含有C,或来到root结点 77 while(p != NULL) 78 { 79 //如果找到某一结点的儿子含有C,那就把当前结点的失配指针指向那个字母也为C的儿子 80 if(p->next[i] != NULL) 81 { 82 temp->next[i]->fail=p->next[i]; 83 break; 84 } 85 p=p->fail; 86 } 87 //当往上找一直来到root时,就把失配指针指向root 88 if(p == NULL) 89 temp->next[i]->fail=root; 90 } 91 myqueue.push(temp->next[i]); 92 } 93 } 94 } 95 } 96 /* 97 Query()样例解释 98 对于i=0,1,Trie中没有对应的路径,故不做任何操作; 99 i=2,3,4时,指针p走到左下节点e. 100 因为节点e的is_word信息为1,所以cnt+1,并且讲节点e的is_word值设置为-1,表示改单词已经出现过了, 101 防止重复计数,最后temp指向e节点的失败指针所指向的节点继续查找; 102 以此类推,最后temp指向root,退出while循环,这个过程中cnt增加了2.表示找到了2个单词she和he. 103 当i=5时,程序进入第5行,p指向其失败指针的节点,也就是右边那个e节点,随后在第6行指向r节点,r节点的 104 is_word值为1,从而cnt+1,循环直到temp指向root为止. 105 最后i=6,7时,找不到任何匹配,匹配过程结束. 106 */ 107 int Query(Node *root,const char *s) 108 { 109 int cnt=0; 110 Node *p=root; 111 while(*s != '\0') 112 { 113 while(p->next[*s-'a'+1] == NULL && p != root) 114 p=p->fail; 115 p=p->next[*s-'a'+1]; 116 p=(p == NULL ? root:p); 117 Node *temp=p; 118 119 //此处第二处判断是比较temp->is_word 是否等于 -1 120 //具体为啥,手动模拟一下末尾样例(如果条件改为temp->is_word > 0) 121 while(temp != root &&temp->is_word != -1) 122 { 123 cnt += temp->is_word; 124 temp->is_word=-1; 125 temp=temp->fail; 126 } 127 s++; 128 } 129 return cnt; 130 } 131 132 int main() 133 { 134 int T; 135 scanf("%d",&T); 136 while(T--) 137 { 138 int N; 139 scanf("%d",&N); 140 Node *root=new Node(); 141 for(int i=1;i<=N;i++) 142 { 143 char word[51]; 144 scanf("%s",word); 145 Build_Tire(root,word); 146 } 147 Build_fail(root); 148 char txt[1000001]; 149 scanf("%s",txt); 150 int ans=Query(root,txt); 151 printf("%d\n",ans); 152 153 delete root; 154 } 155 return 0; 156 } 157 /* 158 1 159 6 160 she 161 he 162 say 163 shr 164 her 165 h 166 yashers 167 */
AC代码2:数组版
参考kuangbin大神的代码
1 /** 2 完成时间:2018.9.11 3 依言 4 算法:AC自动机 5 */ 6 #include<iostream> 7 #include<cstdio> 8 #include<queue> 9 using namespace std; 10 11 struct AC_Auto 12 { 13 int next[500010][26];//最多有500010个字符,每个字符都有26个儿子 14 int fail[500010];//失配指针 15 int is_word[500010];//判断is_word[i]是否为某一单词的结尾字符 16 int root;//root : 根节点,整个程序中始终为0 17 int L;//L : 是next[i][j]数组i改变的变量 18 int New_Node()//为每个新出现的节点初始化 19 { 20 for(int i=0;i < 26;++i) 21 next[L][i]=-1; 22 is_word[L]=0; 23 return L++;//L始终比返回值多1 24 } 25 void Initial()//初始化操作,结果为L=1,root=0 26 { 27 L=0; 28 root=New_Node(); 29 } 30 void Build_Trie(const char *s)//建立字典树 31 { 32 int now=root;//当前的节点now指向root 33 while(*s != '\0') 34 { 35 int index=*s-'a'; 36 if(next[now][index] == -1) 37 next[now][index]=New_Node();//如果next[now][index] == -1,为其新建一个节点并初始化 38 now=next[now][index];//now指向当前节点 39 s++; 40 } 41 is_word[now]++; 42 } 43 //构造失配指针 44 //此过程需要自己手动模拟一下 45 //模拟时每个节点的儿子先少一些,比如next[][4] 46 //模拟时保证每个节点以及4个儿子都要画出来 47 void Build_Fail() 48 { 49 queue<int >myqueue; 50 fail[root]=root; 51 for(int i=0;i < 26;++i) 52 { 53 if(next[root][i] == -1) 54 next[root][i]=root; 55 else 56 { 57 fail[next[root][i]]=root; 58 myqueue.push(next[root][i]); 59 } 60 } 61 while(!myqueue.empty()) 62 { 63 int now=myqueue.front(); 64 myqueue.pop(); 65 for(int i=0;i < 26;++i) 66 { 67 if(next[now][i] == -1) 68 next[now][i]=next[fail[now]][i]; 69 else 70 { 71 fail[next[now][i]]=next[fail[now]][i]; 72 myqueue.push(next[now][i]); 73 } 74 } 75 } 76 } 77 int Query(const char *s) 78 { 79 int now=root; 80 int res=0; 81 while(*s != '\0') 82 { 83 int index=*s-'a'; 84 now=next[now][index]; 85 int temp=now; 86 while(temp != root) 87 { 88 res += is_word[temp]; 89 is_word[temp]=0; 90 temp=fail[temp]; 91 } 92 s++; 93 } 94 return res; 95 } 96 }; 97 char buf[1000010]; 98 AC_Auto ac; 99 100 int main() 101 { 102 int _T; 103 scanf("%d",&_T); 104 while(_T--) 105 { 106 int n; 107 scanf("%d",&n); 108 ac.Initial(); 109 for(int i=1;i <= n;++i) 110 { 111 scanf("%s",buf); 112 ac.Build_Trie(buf); 113 } 114 ac.Build_Fail(); 115 scanf("%s",buf); 116 printf("%d\n",ac.Query(buf)); 117 } 118 return 0; 119 }