HDU 3901 Wildcard

题目大意:

给两个长度不超过100000的字符串, 一个是带有通配符?和*的模式串, 问能否匹配. 通配符不超过10个.

 

简要分析:

翻了网上盛传的一份讲AC自动机的论文(总觉得是生物论文), 学习了带通配符的匹配.

因为*可以是任意长度, 所以我们可以去掉*, 得到若干子串, 我们找出这些子串能匹配的位置, 并尽量往前放, 看是否满足要求即可. 这里需要注意首为*或尾为*的情况, 其实就是子串匹配时是否需要匹配到头或是尾.

于是问题变成了, 设文本串为S, 某一只含?和小写字母的模式串T, 去掉问号后得到若干子串P1, P2, P3, ...Pk, 每个子串结束的下标为L1, L2, L3, ...Lk, 则把Pi丢到AC自动机里面, 开始匹配, 一旦S匹配到j时匹配到某个Pi的末尾时, 向cnt[j - Li]累加1. 最后所有满足cnt[x] == k的x都是S中满足条件的子串开头.

写的时候注意细节就行了. 一个小优化: 构造AC自动机时, 把一个节点的fail指针中的信心整合过来, 这样匹配时就能快不少(其实不这样写会TLE...).

PS: j - Li可能小于0...坑爹啊...

 

代码实现:

View Code
  1 #include <cstdio>
2 #include <cstdlib>
3 #include <cstring>
4 #include <utility>
5 #include <queue>
6 #include <vector>
7 #include <algorithm>
8 using namespace std;
9
10 #define pb push_back
11
12 typedef pair <int, int> pt;
13 const int BUF_SIZE = 100000, SON = 26;
14 char s[BUF_SIZE + 1], t[BUF_SIZE + 1];
15 int lens, lent;
16 vector <pt> vec;
17 bool fir, las;
18
19 struct node_t {
20 node_t *son[SON], *fail;
21 vector <int> lis;
22 } node_pool[BUF_SIZE + 1], *node_idx, *root;
23
24 node_t *node_alloc() {
25 node_t *ret = node_idx ++;
26 memset(ret -> son, 0, sizeof(ret -> son));
27 ret -> fail = NULL;
28 ret -> lis.clear();
29 return ret;
30 }
31
32 void init() {
33 node_idx = node_pool;
34 root = node_alloc();
35 }
36
37 void ins(char *st, char *ed, int p) {
38 node_t *pos = root;
39 while (st != ed) {
40 int t = *(st ++) - 'a';
41 if (!pos -> son[t]) pos -> son[t] = node_alloc();
42 pos = pos -> son[t];
43 }
44 pos -> lis.pb(p);
45 }
46
47 void build() {
48 static queue <node_t *> q;
49 for (int i = 0; i < SON; i ++)
50 if (root -> son[i]) {
51 root -> son[i] -> fail = root;
52 q.push(root -> son[i]);
53 }
54 else root -> son[i] = root;
55 while (q.size()) {
56 node_t *u = q.front();
57 q.pop();
58 for (int i = 0; i < SON; i ++)
59 if (u -> son[i]) {
60 u -> son[i] -> fail = u -> fail -> son[i];
61 for (vector <int>::iterator it = u -> fail -> son[i] -> lis.begin(); it != u -> fail -> son[i] -> lis.end(); it ++)
62 u -> son[i] -> lis.pb(*it);
63 q.push(u -> son[i]);
64 }
65 else u -> son[i] = u -> fail -> son[i];
66 }
67 }
68
69 bool solve(int lb, int rb, bool st, bool ed) { // [lb, rb)
70 init();
71 int pat = 0;
72 static int cnt[BUF_SIZE];
73 memset(cnt, 0, sizeof(cnt));
74 for (int i = lb; i < rb; )
75 if (t[i] == '?') i ++;
76 else {
77 int j = i;
78 while (j < rb && t[j] != '?') j ++;
79 ins(t + i, t + j, j - 1 - lb);
80 i = j;
81 pat ++;
82 }
83 build();
84
85 node_t *pos = root;
86 for (int i = 0; i < lens; i ++) {
87 int p = s[i] - 'a';
88 pos = pos -> son[p];
89 node_t *tmp = pos;
90 for (vector <int>::iterator it = tmp -> lis.begin(); it != tmp -> lis.end(); it ++) {
91 int q = *it;
92 if (i - q >= 0)
93 cnt[i - q] ++;
94 }
95 }
96
97 for (int i = 0; i < lens; i ++)
98 if (cnt[i] == pat) {
99 if (vec.empty()) {
100 if (!fir && st && i != 0) continue;
101 if (!las && ed && i + rb - lb != lens) continue;
102 vec.pb(make_pair(i, i + rb - lb));
103 return 1;
104 }
105 else {
106 vector <pair <int, int> >::reverse_iterator it = vec.rbegin();
107 if (i >= it -> second) {
108 if (ed && !las && i + rb - lb != lens) continue;
109 vec.pb(make_pair(i, i + rb - lb));
110 return 1;
111 }
112 }
113 }
114
115 return 0;
116 }
117
118 int main() {
119 while (scanf("%s%s", s, t) != EOF) {
120 lens = strlen(s), lent = strlen(t);
121 fir = (t[0] == '*'), las = (t[lent - 1] == '*');
122 vec.clear();
123 bool fail = 0;
124 int split = 0;
125 int real_end = lent;
126 while (real_end && t[real_end - 1] == '*') real_end --;
127
128 for (int i = 0; i < lent; )
129 if (t[i] == '*') i ++;
130 else {
131 int j = i;
132 while (j < lent && t[j] != '*') j ++;
133 if (!solve(i, j, split == 0, j == real_end)) {
134 fail = 1;
135 split ++;
136 break;
137 }
138 i = j;
139 split ++;
140 }
141
142 if (split == 0) printf("YES\n");
143 else if (fail) printf("NO\n");
144 else printf("YES\n");
145 }
146 return 0;
147 }
posted @ 2012-03-10 21:21  zcwwzdjn  阅读(1342)  评论(0编辑  收藏  举报