hdu 2222 Keywords Search
http://acm.hdu.edu.cn/showproblem.php?pid=2222
毫无遮掩的AC自动机一枚!刚开始的时候不知道这题是用AC自动机做的,于是我就用了后缀数组,大概算了一下,貌似是勉强可以通过少数的大数据的。不过实际情况是超时了。。。。。
这是后缀数组的代码:
Suffix Array
1 /* 2 * Author: Lyon 3 * Problem: hdu 2222 4 * Method: Suffix Array 5 */ 6 #include <cstdio> 7 #include <cstring> 8 #include <algorithm> 9 10 #define FOR(i, a, n) for (int i = a; i < n; i++) 11 #define REP(i, n) for (int i = 0; i < n; i++) 12 13 using namespace std; 14 15 const int maxn = 1000001; 16 int wa[maxn], wb[maxn], wv[maxn], wc[maxn]; 17 18 int cmp(int *r, int a, int b, int l){ 19 return r[a] == r[b] && r[a + l] == r[b + l]; 20 } 21 22 void work(int *r, int *sa, int n, int m){ 23 int *x = wa, *y = wb; 24 25 REP(i, m) wc[i] = 0; 26 REP(i, n) wc[x[i] = r[i]]++; 27 FOR(i, 1, m) wc[i] += wc[i - 1]; 28 for (int i = n - 1; i >= 0; i--){ 29 sa[--wc[x[i]]] = i; 30 } 31 for (int j = 1, p = 1; p < n; j *= 2, m = p){ 32 p = 0; 33 for (int i = n - j; i < n; i++){ 34 y[p++] = i; 35 } 36 REP(i, n){ 37 if (sa[i] >= j) y[p++] = sa[i] - j; 38 } 39 REP(i, n){ 40 wv[i] = x[y[i]]; 41 } 42 REP(i, m){ 43 wc[i] = 0; 44 } 45 REP(i, n){ 46 wc[wv[i]]++; 47 } 48 FOR(i, 1, m){ 49 wc[i] += wc[i - 1]; 50 } 51 for (int i = n - 1; i >= 0; i--){ 52 sa[--wc[wv[i]]] = y[i]; 53 } 54 swap(x, y); 55 p = 1; 56 x[sa[0]] = 0; 57 for (int i = 1; i < n; i++){ 58 x[sa[i]] = cmp(y, sa[i - 1], sa[i], j) ? p - 1 : p++; 59 } 60 } 61 62 return ; 63 } 64 65 int rk[maxn], ht[maxn]; 66 67 void saCal(int *r, int *sa, int n){ 68 int k = 0; 69 70 for (int i = 1; i <= n; i++){ 71 rk[sa[i]] = i; 72 } 73 for (int i = 0, j; i < n; ht[rk[i++]] = k){ 74 for (k ? k-- : 0, j = sa[rk[i] - 1]; r[i + k] == r[j + k]; k++); 75 } 76 77 return ; 78 } 79 80 char s[maxn]; 81 int r[maxn], sa[maxn]; 82 char sub[10001][55]; 83 84 int judge(char *str, char *sb){ 85 int i = 0; 86 87 while (*(str + i) && *(sb + i)){ 88 if (*(str + i) != *(sb + i)){ 89 break; 90 } 91 i++; 92 } 93 if (!(*(sb + i))) return 0; // sub is fully paired 94 return *(str + i) - *(sb + i); 95 } 96 97 bool bs(int n, char *sb){ 98 int h = 0, t = n - 1; 99 int m; 100 101 while (h <= t){ 102 m = (h + t) >> 1; 103 104 int dif = judge(&s[sa[m]], sb); 105 106 if (!dif) return true; 107 if (dif < 0) h = m + 1; 108 else t = m - 1; 109 } 110 111 return false; 112 } 113 114 int deal(int n){ 115 int ret = 0; 116 int len = strlen(s); 117 118 work(r, sa, len, 128); 119 /* 120 for (int i = 0; i < len; i++){ 121 printf("String %d:\n", i); 122 printf("%s\n", &s[sa[i]]); 123 printf("sa %d\n", sa[i]); 124 puts(""); 125 } 126 */ 127 for (int i = 0; i < n; i++){ 128 if (bs(len, sub[i])){ 129 ret++; 130 //printf("%s\n", sub[i]); 131 } 132 } 133 134 return ret; 135 } 136 137 int main(){ 138 int n, T; 139 140 scanf("%d", &T); 141 while (T-- && ~scanf("%d", &n)){ 142 for (int i = 0; i < n; i++){ 143 scanf("%s", sub[i]); 144 } 145 scanf("%s", s); 146 147 int len = strlen(s); 148 149 r[len] = 0; 150 while (len--){ 151 r[len] = s[len]; 152 } 153 printf("%d\n", deal(n)); 154 } 155 156 return 0; 157 }
然后我就突然想起了一直被我忽略的AC自动机算法,我在网上找了一下相关的资料,原来AC自动机是可以通过状态转移,在O(n)的复杂度下同时匹配多个较小规模的子串的。我粗略的看了一下它的原理,联系上上学期学过的数字逻辑的课,电路状态的转移和AC自动机的转移可谓是同出一辙!两者都是通过构建自动机,从而判断串行数据的是否满足给定的条件。有了数字逻辑课的基础,看这个就觉得相当简单了。
简单讲一下步骤,在子串输入的同时,构建tri树。输入完毕以后,用build函数,将结点fail的状态转移关系构建出来。在构建的过程中,如果学过KMP算法,很快就可以看出这个过程跟KMP是十分相似的,因为两者的原理都是自动机,也就是状态的转移。然后就是query的时候了,不断的输入被查找的字符串的字母,然后通过之前构建好的自动机,不断的判断并转移到下一个状态。不过根据题意,这里要注意将输出过的状态设置为不可再次访问。
下面的是AC自动机的代码(动态空间):
View Code
1 /* 2 * Author: SCAU_Lyon 3 * Problem: hdu 2222 4 * Method: AC-auto 5 */ 6 7 #include <cstdio> 8 #include <cstring> 9 #include <algorithm> 10 #include <cstdlib> 11 12 using namespace std; 13 14 const int kind = 26; 15 const int maxn = 51 * 10001; 16 17 struct node{ 18 node *fail; 19 node *next[kind]; 20 int count; 21 node(){ 22 fail = NULL; 23 count = 0; 24 memset(next, 0, sizeof(next)); 25 } 26 }*q[maxn]; 27 28 char keyword[51]; 29 char str[1000001]; 30 int head, tail; 31 32 void insert(char *s, node *root){ 33 node *p = root; 34 int i = 0, index; 35 36 while (s[i]){ 37 index = s[i] - 'a'; 38 if (p->next[index] == NULL) p->next[index] = new node(); 39 p = p->next[index]; 40 i++; 41 } 42 p->count++; 43 } 44 45 void build(node *root){ 46 int i; 47 48 root->fail = NULL; 49 head = tail = 0; 50 q[head++] = root; 51 while (head != tail){ 52 node *temp = q[tail++]; 53 node *p = NULL; 54 55 for (i = 0; i < kind; i++){ 56 if (temp->next[i] != NULL){ 57 if (temp == root) temp->next[i]->fail = root; 58 else{ 59 p = temp->fail; 60 while (p != NULL){ 61 if (p->next[i] != NULL){ 62 temp->next[i]->fail = p->next[i]; 63 break; 64 } 65 p = p->fail; 66 } 67 if (p == NULL) temp->next[i]->fail = root; 68 } 69 q[head++] = temp->next[i]; 70 } 71 } 72 } 73 } 74 75 int query(node *root){ 76 int i = 0, cnt = 0, index; 77 node *p = root; 78 79 while (str[i]){ 80 index = str[i] - 'a'; 81 while (p->next[index] == NULL && p != root) p = p->fail; 82 p = p->next[index]; 83 p = (p == NULL) ? root : p; 84 85 node *temp = p; 86 87 while (temp != root && temp->count != -1){ 88 cnt += temp->count; 89 temp->count = -1; 90 temp = temp->fail; 91 } 92 i++; 93 } 94 95 return cnt; 96 } 97 98 void traverse(node *root){ 99 if (root == NULL) return ; 100 printf("root %d\n", root); 101 printf("fail %d\n", root->fail); 102 printf("count %d\n", root->count); 103 for (int i = 0; i < kind; i++){ 104 if (root->next[i] == NULL) continue; 105 printf("%c %d\n", i + 'a', root->next[i]); 106 } 107 puts(""); 108 for (int i = 0; i < kind; i++){ 109 traverse(root->next[i]); 110 } 111 } 112 113 int main(){ 114 int n, T; 115 116 scanf("%d", &T); 117 while (T--){ 118 scanf("%d", &n); 119 120 node *root = new node(); 121 122 for(int i = 0; i < n; i++){ 123 scanf("%s", keyword); 124 insert(keyword, root); 125 } 126 build(root); 127 //traverse(root); 128 //puts("Built!"); 129 scanf("%s", str); 130 printf("%d\n", query(root)); 131 } 132 133 return 0; 134 }
静态链表:
View Code
1 #include <cstdio> 2 #include <cstring> 3 #include <algorithm> 4 #include <cstdlib> 5 6 using namespace std; 7 8 const int kind = 26; 9 const int maxn = 51 * 10001; 10 11 struct Node{ 12 int fail; 13 int next[kind]; 14 int count; 15 void init(){ 16 fail = -1; 17 count = 0; 18 memset(next, -1, sizeof(next)); 19 } 20 }node[maxn]; 21 22 char keyword[51]; 23 char str[1000001]; 24 int head, tail, nodeUsed; 25 int q[maxn]; 26 27 void insert(char *s, int root){ 28 int p = root; 29 int i = 0, index; 30 31 while (s[i]){ 32 index = s[i] - 'a'; 33 if (node[p].next[index] == -1){ 34 node[p].next[index] = nodeUsed; 35 node[nodeUsed].init(); 36 nodeUsed++; 37 } 38 p = node[p].next[index]; 39 i++; 40 } 41 node[p].count++; 42 } 43 44 void build(int root){ 45 int i; 46 47 node[root].fail = -1; 48 head = tail = 0; 49 q[head++] = root; 50 while (head != tail){ 51 int temp = q[tail++]; 52 int p = -1; 53 54 for (i = 0; i < kind; i++){ 55 if (node[temp].next[i] != -1){ 56 if (temp == root) node[node[temp].next[i]].fail = root; 57 else{ 58 p = node[temp].fail; 59 while (p != -1){ 60 if (node[p].next[i] != -1){ 61 node[node[temp].next[i]].fail = node[p].next[i]; 62 break; 63 } 64 p = node[p].fail; 65 } 66 if (p == -1) node[node[temp].next[i]].fail = root; 67 } 68 q[head++] = node[temp].next[i]; 69 } 70 } 71 } 72 } 73 74 int query(int root){ 75 int i = 0, cnt = 0, index; 76 int p = root; 77 78 while (str[i]){ 79 index = str[i] - 'a'; 80 while (node[p].next[index] == -1 && p != root) p = node[p].fail; 81 p = node[p].next[index]; 82 p = (p == -1) ? root : p; 83 84 int temp = p; 85 86 while (temp != root && node[temp].count != -1){ 87 cnt += node[temp].count; 88 node[temp].count = -1; 89 temp = node[temp].fail; 90 } 91 i++; 92 } 93 94 return cnt; 95 } 96 97 void traverse(int root){ 98 if (root == -1) return ; 99 printf("root %d\n", root); 100 printf("fail %d\n", node[root].fail); 101 printf("count %d\n", node[root].count); 102 for (int i = 0; i < kind; i++){ 103 if (node[root].next[i] == -1) continue; 104 printf("%c %d\n", i + 'a', node[root].next[i]); 105 } 106 puts(""); 107 for (int i = 0; i < kind; i++){ 108 traverse(node[root].next[i]); 109 } 110 } 111 112 int main(){ 113 int n, T; 114 115 scanf("%d", &T); 116 while (T--){ 117 scanf("%d", &n); 118 119 int root = 0; 120 121 node[0].init(); 122 nodeUsed = 1; 123 124 for(int i = 0; i < n; i++){ 125 scanf("%s", keyword); 126 insert(keyword, root); 127 } 128 build(root); 129 //traverse(root); 130 //puts("Built!"); 131 scanf("%s", str); 132 printf("%d\n", query(root)); 133 } 134 135 return 0; 136 }
UPD(2013.8.24):
1 #include <iostream> 2 #include <algorithm> 3 #include <cstring> 4 #include <cstdio> 5 6 using namespace std; 7 8 const int N = 11111; 9 const int M = 55; 10 const int K = 26; 11 const int L = 1111111; 12 13 struct Node { 14 Node *nx[K], *fail; 15 int cnt; 16 bool pre; 17 void init() { 18 for (int i = 0; i < K; i++) nx[i] = NULL; 19 fail = NULL; 20 cnt = 0; 21 pre = false; 22 } 23 } node[N * M]; 24 int ttnode; 25 26 struct Trie { 27 Node *RT; 28 void init() { 29 RT = node + ttnode, node[ttnode++].init(); 30 } 31 void insert(char *s) { 32 Node *p = RT; 33 int idx; 34 while (*s) { 35 idx = *s - 'a'; 36 if (p->nx[idx] == NULL) { 37 p->nx[idx] = node + ttnode, node[ttnode++].init(); 38 } 39 p = p->nx[idx], s++; 40 } 41 p->cnt++, p->pre = true; 42 } 43 Node *q[N * M]; 44 void build() { 45 RT->fail = RT; 46 int qh, qt; 47 qh = qt = 0; 48 for (int i = 0; i < K; i++) { 49 if (RT->nx[i]) { 50 RT->nx[i]->fail = RT; 51 q[qt++] = RT->nx[i]; 52 } 53 } 54 //cout << "pass" << endl; 55 while (qh < qt) { 56 Node *p = q[qh++]; 57 for (int i = 0; i < K; i++) { 58 if (p->nx[i]) { 59 Node *t = p->fail; 60 while (t->nx[i] == NULL && t != RT) t = t->fail; 61 t = t->nx[i]; 62 if (t) p->nx[i]->fail = t, p->nx[i]->pre |= t->pre; 63 else p->nx[i]->fail = RT; 64 q[qt++] = p->nx[i]; 65 } 66 } 67 } 68 } 69 int query(char *s) { 70 int cnt = 0, idx; 71 Node *p = RT; 72 while (*s) { 73 idx = *s - 'a'; 74 while (p->nx[idx] == NULL && p != RT) p = p->fail; 75 p = p->nx[idx]; 76 if (p) { 77 Node *t = p; 78 while (t != RT && t->pre) cnt += t->cnt, t->pre = false, t = t->fail; 79 } else p = RT; 80 //cout << *s << ' ' << cnt << endl; 81 s++; 82 } 83 return cnt; 84 } 85 } trie; 86 87 char str[L], buf[M]; 88 89 int main() { 90 //freopen("in", "r", stdin); 91 int T, n; 92 scanf("%d", &T); 93 while (T--) { 94 scanf("%d", &n); 95 ttnode = 0; 96 trie.init(); 97 for (int i = 0; i < n; i++) { 98 scanf("%s", buf); 99 trie.insert(buf); 100 //cout << buf << endl; 101 } 102 trie.build(); 103 //cout << "built" << endl; 104 scanf("%s", str); 105 printf("%d\n", trie.query(str)); 106 } 107 return 0; 108 }
详细解释: http://www.cppblog.com/mythit/archive/2009/04/21/80633.html
——written by Lyon