hdu 2222 Keywords Search - Aho-Corasick自动机

 Time Limit: 2000/1000 MS (Java/Others)    Memory Limit: 131072/131072 K (Java/Others)
Total Submission(s): 51758    Accepted Submission(s): 16671

Problem Description
In the modern time, Search engine came into the life of everybody like Google, Baidu, etc.
Wiskey also wants to bring this feature to his image retrieval system.
Every image have a long description, when users type some keywords to find the image, the system will match the keywords with description of image and show the image which the most keywords be matched.
To simplify the problem, giving you a description of image, and some keywords, you should tell me how many keywords will be match.
 
Input
First line will contain one integer means how many cases will follow by.
Each case will contain two integers N means the number of keywords and N keywords follow. (N <= 10000)
Each keyword will only contains characters 'a'-'z', and the length will be not longer than 50.
The last line is the description, and the length will be not longer than 1000000.
 
Output
Print how many keywords are contained in the description.
 
Sample Input
1
5
she
he
say
shr
her
yasherhs
 
Sample Output
3
 
Author
Wiskey
 
Recommend
lcy   |   We have carefully selected several similar problems for you:  2896 3065 2243 2825 3341 
(转自 http://acm.hdu.edu.cn/showproblem.php?pid=2222)

  先讲讲大意,就是给一大堆模板串,再给一串很长的文本串,求有几个模板串在这个文本串中出现过
  首先,模板串的长度较短,数量多,极其符合ac自动机的特点,就这么愉快地决定使用ac自动机(其实也并不是特别地愉快
,虽说是裸题)
  接着注意几个事项
  1.模板串重复出现,比如下面这组数据
1
3
aa
bb
aa
aabbcc

  2.多组数据,第一个输入的数是数据组数(这个问题应该不大)

  3.如果用数组的话不要一次性用memset把整个数组都赋值,这样的话是十分浪费时间的

接着附上用指针写的AC自动机(虽说我用数组写了一个,调试了2天都没有调出来,果断重写)

Code

  1 /*
  2  * acm.hdu.edu.cn
  3  * Problem#2222
  4  * Accepted
  5  * Time:296ms
  6  * Memory:58152k 
  7  */
  8 #include<iostream>
  9 #include<cstdio>
 10 #include<queue>
 11 #include<cstring>
 12 using namespace std;
 13 #define SEG_SIZE 26
 14 typedef class TrieNode{
 15     public:
 16         TrieNode* next[SEG_SIZE];
 17         TrieNode* fail;                    //失配指针 
 18         TrieNode* last;                    //后缀链接 
 19         int value;
 20         TrieNode():fail(NULL),last(NULL),value(0){
 21             memset(next, 0, sizeof(next));
 22         }
 23 }TrieNode;
 24 typedef class Trie {
 25     public:
 26         TrieNode* root;
 27         Trie(){
 28             root = new TrieNode();
 29         }
 30         static int cti(char ch){        //将字符转换成Trie树中next数组的下标 
 31             return ch - 'a';
 32         }
 33         void insert(string s){
 34             TrieNode *p = root;
 35             for(int i = 0;i < s.length();i++){
 36                 int c = cti(s[i]);
 37                 if(p->next[c] == NULL){
 38                     TrieNode *newNode = new TrieNode();
 39                     p->next[c] = newNode;     //链接结点 
 40                 }
 41                 p = p->next[c];
 42             }
 43             p->value++;                        //用结点的特殊值来存储有多少个单词是在这结尾 
 44         }
 45 }Trie;
 46 typedef class AhoMachine{
 47     public:    
 48         Trie trie;
 49         int count;
 50         AhoMachine(){
 51             trie = Trie();
 52             count = 0;
 53         }
 54         void getFail(){
 55             queue<TrieNode*> que;                    //以bfs序构造状态转移图 
 56             trie.root->fail = trie.root;
 57             for(int i = 0;i < SEG_SIZE;i++){
 58                 TrieNode* pNode = trie.root->next[i];
 59                 if(pNode != NULL){
 60                     que.push(pNode);
 61                     pNode->fail = trie.root;        //根节点的直接子节点的失配指针都是指向根节点 
 62                 }
 63             }
 64             while(!que.empty()){
 65                 TrieNode* p = que.front();
 66                 que.pop();
 67                 for(int i = 0;i < SEG_SIZE;i++){
 68                     TrieNode* pNode = p->next[i];
 69                     if(pNode == NULL)    continue;
 70                     que.push(pNode);
 71                     TrieNode* pFail = p->fail;
 72                     //直到匹配,或者已经指向了根节点 
 73                     while(pFail != trie.root && pFail->next[i] == NULL)    pFail = pFail->fail;
 74                     pNode->fail = pFail->next[i];
 75                     if(pNode->fail == NULL) pNode->fail = trie.root;
 76                     //后缀链接的链接,指向失配后的结点或失配后的结点的后缀链接 
 77                     pNode->last = (pNode->fail->value != 0)?(pNode->fail):(pNode->fail->last);
 78                 }
 79             }
 80         }
 81         //当匹配完,或者是在一条链的中途仍然可能存在有匹配的结点 
 82         void rfind(TrieNode *p){
 83             if(p != NULL){
 84                 count += p->value;
 85                 rfind(p->last);
 86                 p->value = 0;
 87             }
 88         }
 89         void find(string s){
 90             TrieNode *pNode = trie.root;
 91             for(int i = 0;i < s.length();i++){
 92                 int c = Trie::cti(s[i]);
 93                 while(pNode != trie.root && pNode->next[c] == NULL)    pNode = pNode->fail;
 94                 pNode = pNode->next[c];
 95                 if(pNode == NULL) pNode = trie.root;
 96                 if(pNode->value != 0)    rfind(pNode);            //判断有没有可能匹配完其它的模板 
 97                 else if(pNode->last != NULL) rfind(pNode->last);
 98             }
 99         }
100 }AC;
101 AC *ac;
102 int cases;
103 string buf;
104 int n;
105 int main(){
106     ios::sync_with_stdio(false);            //取消同步,加快cin读取字符串的速度 
107     cin>>cases;
108     while(cases--){
109         ac = new AC();
110         cin>>n;
111         for(int i = 0;i < n;i++){
112             cin>>buf;
113             ac->trie.insert(buf);
114         }
115         cin>>buf;
116         ac->getFail();
117         ac->find(buf);
118         cout<<ac->count<<endl;
119         delete ac;
120     }
121     return 0;
122 }

[后记]
  附上自己调程序时所用的数据生成器,如果用随机数生成可能基本上刷个1000,2000组很多问题都不能找出来,
因为直接用随机数要刷出点神数据的概率还是比较低的
 1 #include<iostream>
 2 #include<fstream>
 3 #include<cstdio>
 4 #include<cstdlib>
 5 #include<ctime>
 6 #include<sstream>
 7 using namespace std;
 8 ofstream fout("ks.in");
 9 //测试数据最少组数 
10 #define CASE_LOW 3
11 //测试数据最大生成组数(CASE_LOW + CASE_LIMIT - 1) 
12 #define CASE_LIMIT 10
13 //关键字最小生成组数 
14 #define KEYWORD_LOW 3
15 #define KEYWORD_LIMIT 10
16 #define KEYWORD_MAX_LEN 5
17 //生成文章的次数 
18 #define STR_TIMES 10
19 #define SUBSTR_LEN 5
20 string buf[KEYWORD_LIMIT + KEYWORD_LOW];
21 string str;
22 int _count;
23 string operator +(string str, char c){
24     stringstream ss;
25     ss<<str<<c;
26     return ss.str();
27 }
28 int main(){
29     
30     srand((unsigned)time(NULL));
31     
32     int cases = rand()%CASE_LIMIT + CASE_LOW;
33     fout<<cases<<endl;
34     
35     for(int i = 0;i < cases;i++){
36         
37         _count = rand()%KEYWORD_LIMIT + KEYWORD_LOW;
38         fout<<_count<<endl;
39         for(int j = 1; j <= _count;j++){
40             int len = rand()%KEYWORD_MAX_LEN + 1;
41             buf[j] = "";
42             for(int k = 1; k <= len;k++){
43                 buf[j] = buf[j] + (char)(rand()%26 + 'a');
44             }
45             fout<<buf[j]<<endl;
46         }
47         
48         int times = rand()%STR_TIMES + 1;
49         str = "";
50         for(int j = 0;j <= times;j++){
51             
52             int v = rand()%3;
53             if(v == 0){
54                 str += buf[rand()%_count + 1];
55             }else{
56                 int len = rand()%SUBSTR_LEN + 1;
57                 for(int i = 1;i <= len;i++){
58                     str = str + (char)(rand()%26 + 'a');
59                 } 
60             }
61             
62         }
63         
64         fout<<str<<endl;
65         
66     }
67     
68     fout.close();
69     return 0;
70 }
md_ks.cpp
posted @ 2016-07-26 17:52  阿波罗2003  阅读(222)  评论(0编辑  收藏  举报