BZOJ3172 单词 Fail树
题目大意:求一篇论文中每个单词分别在论文中出现多少次。
本题用AC自动机太慢,应该用Fail树将AC自动机中所有的Fail指针反向得到一个新树,这就是Fail树。对长度为x的字符串a和长度为y的字符串b,如果a是b的子串,则a可能与位于b[0,a],b[0,a+1],b[0,a+2]...b[0,y]中的后缀相等。根据fail指针的定义,只要沿着反向Fail边走,走到的节点所代表的字符串必然存在与a(前缀)相等的后缀。因此,一遍DFS,返回加上子节点的总Cnt值的当前节点的Cnt值,即可。注意,Trie树中,有些节点是多个字符串公用的,因此每次构造Trie树时,都要对每个节点的Cnt++,以等价于此处存在多个字符串。
#include <cstdio> #include <cstring> #include <cassert> #include <algorithm> #include <cmath> #include <queue> #include <vector> using namespace std; const int MAX_CHAR = 26, MAX_LEN = 1e6 + 10, MAX_STR = 210; struct FailTree { #define Root _nodes[0] #define Org(x) x - 'a' struct Node; struct Edge; struct Node { Node *Next[MAX_CHAR], *Fail; int Cnt; Edge *Head; Node() :Cnt(0), Fail(NULL), Head(NULL) { memset(Next, NULL, sizeof(Next)); } }; vector<Node*> _nodes, Tail; struct Edge { Node *To; Edge *Next; Edge(Node *to, Edge *next):To(to),Next(next){} }; vector<Edge*> _edges; FailTree() { _nodes.push_back(new Node()); } void AddEdge(Node *from, Node *to) { Edge *e = new Edge(to, from->Head); from->Head = e; _edges.push_back(e); } Node *BuildTrie(char *s) { int len = strlen(s); Node *cur = Root; for (int i = 0; i < len; i++) { if (!cur->Next[Org(s[i])]) _nodes.push_back(cur->Next[Org(s[i])] = new Node()); cur = cur->Next[Org(s[i])]; cur->Cnt++; } return cur; } void Insert(char *s) { Tail.push_back(BuildTrie(s)); } void SetFail() { static queue<Node*> q; q.push(Root); while (!q.empty()) { Node *cur = q.front(); q.pop(); for (int i = 0; i < MAX_CHAR; i++) { if (cur->Next[i]) { Node *temp = cur->Fail; while (temp) { if (temp->Next[i]) { cur->Next[i]->Fail = temp->Next[i]; AddEdge(temp->Next[i], cur->Next[i]); break; } temp = temp->Fail; } if (!temp) { cur->Next[i]->Fail = Root; AddEdge(Root, cur->Next[i]); } q.push(cur->Next[i]); } } } } int Dfs(Node *u) { for (Edge *e = u->Head; e; e = e->Next) u->Cnt += Dfs(e->To); return u->Cnt; } }g; int main() { #ifdef _DEBUG freopen("c:\\noi\\source\\input.txt", "r", stdin); #endif int tot; char s[MAX_LEN]; scanf("%d", &tot); for(int i=0; i<tot; i++) { scanf("%s", s); g.Insert(s); } g.SetFail(); g.Dfs(g.Root); for (int i = 0; i < tot; i++) printf("%d\n", g.Tail[i]->Cnt); return 0; }
或者不用反向Fail指针也可以,站在后缀上去找其所包含的前缀。这样编程复杂度低一些。
#include <cstdio> #include <cstring> #include <vector> #include <queue> #include <cassert> #include <cmath> #include <algorithm> using namespace std; const int MAX_CHAR = 26, MAX_NODE = 5e5 + 1, MAX_LEN = 1e6 + 1; struct Node { int Sum, Id, Cnt; Node *Fail; Node *Next[MAX_CHAR]; }Nodes[MAX_NODE]; int Nodes_Cnt = 1; char P[MAX_LEN]; Node *WordNode[MAX_NODE]; int Ord(char c) { return c - 'a'; } Node *NewNode() { return ++Nodes_Cnt + Nodes; } Node *Root() { return Nodes + 1; } void BuildTrie(char *s, int id) { Node *cur = Root(); int len = strlen(s); for (int i = 0; i < len; i++) { if (cur->Next[Ord(s[i])]) cur = cur->Next[Ord(s[i])]; else cur = cur->Next[Ord(s[i])] = NewNode(); } cur->Sum++; cur->Id = id; WordNode[id] = cur; } void SetFail() { queue<Node*> q; q.push(Root()); while (!q.empty()) { Node *cur = q.front(); q.pop(); for (int i = 0; i < MAX_CHAR; i++) { if (cur->Next[i]) { Node *temp = cur->Fail; while (temp) { if (temp->Next[i]) { cur->Next[i]->Fail = temp->Next[i]; break; } temp = temp->Fail; } if (!temp) { cur->Next[i]->Fail = Root(); } q.push(cur->Next[i]); } } } } int Dfs1(Node *cur) { int cnt = cur->Sum; for (int i = 0; i < MAX_CHAR; i++) if (cur->Next[i]) cnt += Dfs1(cur->Next[i]); for (Node *temp = cur; temp != Root(); temp = temp->Fail) if (temp->Sum) temp->Cnt+=cnt; //cur->Cnt += cnt; return cnt; } int main() { //freopen("c:\\noi\\source\\input.txt", "r", stdin); int totP; scanf("%d", &totP); for (int i = 0; i < totP; i++) { scanf("%s", P); BuildTrie(P, i); } SetFail(); Dfs1(Root()); for (int i = 0; i < totP; i++) printf("%d\n", WordNode[i]->Cnt); return 0; }