wenbao与ac自动机

 

 

--------------------------------------------

 

http://acm.hdu.edu.cn/showproblem.php?pid=2222

求文本串T中含有单词的数量

 

存模板。。

 vis[]数组优化

 1 #include <iostream>
 2 #include <string.h>
 3 #include <queue>
 4 using namespace std;
 5 
 6 const int sizech = 26;
 7 const int sizenode = 5e5+10;
 8 const int maxn = 1000009;
 9 bool vis[sizenode];
10 
11 struct AC{
12     int num;
13     int ch[sizenode][sizech], val[sizenode], fail[sizenode];
14     void init(){
15         num = 1;
16         memset(ch[0], 0, sizeof(ch[0]));
17         memset(val, 0, sizeof(val));
18     }
19     void insert(char *str){
20         int root = 0;
21         for(int i = 0; str[i]; ++i){
22             int xx = str[i] - 'a';
23             if(!ch[root][xx]){
24                 memset(ch[num], 0, sizeof(ch[num]));
25                 val[num] = 0;
26                 ch[root][xx] = num ++;
27             }
28             root = ch[root][xx];
29         }
30         val[root]++;
31     }
32     void getfail(){
33         queue<int> q;
34         fail[0] = 0;
35         for(int i = 0; i < sizech; ++i){
36             int xx = ch[0][i];
37             if(xx) fail[xx] = 0, q.push(xx);
38         }
39         while(!q.empty()){
40             int c = q.front();
41             q.pop();
42             for(int i = 0; i < sizech; ++i){
43                 int xx = ch[c][i];
44                 if(!xx){
45                     ch[c][i] = ch[fail[c]][i];
46                     continue;
47                 }
48                 q.push(xx);
49                 fail[xx] = ch[fail[c]][i];
50             }
51         }
52     }
53     int find(char *str){
54         int root = 0, cnt = 0;
55         for(int i = 0; str[i]; ++i){
56             int xx = str[i] - 'a';
57             root = ch[root][xx];
58             int tmp = root;
59             while(tmp && !vis[tmp]){
60                 vis[tmp] = true;
61                 cnt += val[tmp];
62                 val[tmp] = 0;
63                 tmp = fail[tmp];
64             }
65         }
66         return cnt;
67     }
68 }ac;
69 char str[maxn];
70 int main(){
71     int t, n;
72     scanf("%d", &t);
73     while(t--){
74         ac.init();
75         memset(vis, false, sizeof(vis));
76         scanf("%d", &n);
77         for(int i = 1; i <= n; ++i){
78             scanf("%s", str);
79             ac.insert(str);
80         }
81         ac.getfail();
82         scanf("%s", str);
83         printf("%d\n", ac.find(str));
84     }
85     return 0;
86 }

 

 

 

-------------------------------------------

 

la 4670

有n个小写字母组成的字符串和一个文本串,求哪些字符串出现的次数最多

 

  1 // LA4670 Dominating Patterns
  2 // Rujia Liu
  3 #include <cstring>
  4 #include <queue>
  5 #include <cstdio>
  6 #include <map>
  7 #include <string>
  8 using namespace std;
  9 
 10 const int SIGMA_SIZE = 26;
 11 const int MAXNODE = 11000;
 12 const int MAXS = 150 + 10;
 13 
 14 map<string,int> ms;
 15 
 16 struct AhoCorasickAutomata {
 17   int ch[MAXNODE][SIGMA_SIZE];
 18   int f[MAXNODE];    // fail函数
 19   int val[MAXNODE];  // 每个字符串的结尾结点都有一个非0的val
 20   int last[MAXNODE]; // 输出链表的下一个结点
 21   int cnt[MAXS];
 22   int sz;
 23 
 24   void init() {
 25     sz = 1;
 26     memset(ch[0], 0, sizeof(ch[0]));
 27     memset(cnt, 0, sizeof(cnt));
 28     ms.clear();
 29   }
 30 
 31   // 字符c的编号
 32   int idx(char c) {
 33     return c-'a';
 34   }
 35 
 36   // 插入字符串。v必须非0
 37   void insert(char *s, int v) {
 38     int u = 0, n = strlen(s);
 39     for(int i = 0; i < n; i++) {
 40       int c = idx(s[i]);
 41       if(!ch[u][c]) {
 42         memset(ch[sz], 0, sizeof(ch[sz]));
 43         val[sz] = 0;
 44         ch[u][c] = sz++;
 45       }
 46       u = ch[u][c];
 47     }
 48     val[u] = v;
 49     ms[string(s)] = v;
 50   }
 51 
 52   // 递归打印以结点j结尾的所有字符串
 53   void print(int j) {
 54     if(j) {
 55       cnt[val[j]]++;
 56       print(last[j]);
 57     }
 58   }
 59 
 60   // 在T中找模板
 61   int find(char* T) {
 62     int n = strlen(T);
 63     int j = 0; // 当前结点编号,初始为根结点
 64     for(int i = 0; i < n; i++) { // 文本串当前指针
 65       int c = idx(T[i]);
 66       while(j && !ch[j][c]) j = f[j]; // 顺着细边走,直到可以匹配
 67       j = ch[j][c];
 68       if(val[j]) print(j);
 69       else if(last[j]) print(last[j]); // 找到了!
 70     }
 71   }
 72 
 73   // 计算fail函数
 74   void getFail() {
 75     queue<int> q;
 76     f[0] = 0;
 77     // 初始化队列
 78     for(int c = 0; c < SIGMA_SIZE; c++) {
 79       int u = ch[0][c];
 80       if(u) { f[u] = 0; q.push(u); last[u] = 0; }
 81     }
 82     // 按BFS顺序计算fail
 83     while(!q.empty()) {
 84       int r = q.front(); q.pop();
 85       for(int c = 0; c < SIGMA_SIZE; c++) {
 86         int u = ch[r][c];
 87         if(!u) continue;
 88         q.push(u);
 89         int v = f[r];
 90         while(v && !ch[v][c]) v = f[v];
 91         f[u] = ch[v][c];
 92         last[u] = val[f[u]] ? f[u] : last[f[u]];
 93       }
 94     }
 95   }
 96 
 97 };
 98 
 99 AhoCorasickAutomata ac;
100 char text[1000001], P[151][80];
101 int n, T;
102 
103 int main() {
104   while(scanf("%d", &n) == 1 && n) {
105     ac.init();
106     for(int i = 1; i <= n; i++) {
107       scanf("%s", P[i]);
108       ac.insert(P[i], i);
109     }
110     ac.getFail();
111     scanf("%s", text);
112     ac.find(text);
113     int best =  -1;
114     for(int i = 1; i <= n; i++)
115       if(ac.cnt[i] > best) best = ac.cnt[i];
116     printf("%d\n", best);
117     for(int i = 1; i <= n; i++)
118       if(ac.cnt[ms[string(P[i])]] == best) printf("%s\n", P[i]);
119   }
120   return 0;
121 }

 

 

-------------------------------------------

 

只有不断学次才能进步!

 

posted @ 2018-04-14 13:42  wenbao  阅读(184)  评论(0编辑  收藏  举报