AC自动机(Trie图)复习笔记

友情提示:

本文仅是本菜鸡用来自我们复习\(Trie\)图的,也许并不适合作为初学者的学习笔记.

什么是Trie图?

\(\quad\)这里称之为Trie图而非\(AC\)自动机是因为本菜鸡认为,这更像是一个每个点有\(\Sigma\)条转移边的\(Trie\),而非自动机.因为大多数时候我们并不关心其接受什么状态.由于Trie树的性质,每个点代表原字符串集的一个前缀.

一些定义和约定

  • fail[u]:失配指针指向该节点u在Trie树上的最长后缀.

  • vis[u][c]:转移函数指向该点过后+字符'c',尽可能不舍弃或舍弃尽可能短的前缀能到达的点

构建

首先建Trie,然后bfs构建出\(Trie图\).具体构建如下:

void bfs(){
	int l = 1,r = 0;
	for(int i = 0; i < 26; ++i){
		int x = ac[root].vis[i];
		if(x){
			ac[x].fail = 0;
			q[++r] = x;
		}
	}
	while(l <= r){
		int x = q[l++];
		int fafail = ac[x].fail;
		for(int i = 0; i < 26; ++i){
			int y = ac[x].vis[i];
			if(y){
				ac[y].fail = ac[fafail].vis[i];
				q[++r] = y;
			}
			else
				ac[x].vis[i] = ac[fafail].vis[i];
		}
	}
	for(int i = 1; i <= cnt; ++i)
		add(ac[i].fail,i);
}

性质

  • 每个点的fail指针指向其最长后缀,类似SAM的,我们可以建一棵fail树,用法类似

基本用处

  • 将一个文本串往\(Trie\)图上跑匹配,显然我们可以得到,每个匹配串(即构成\(Trie\)树的若干个字符串)在文本串的出现次数.

一些例题

  1. BJOI2019奥术神杖

题意简述:实在简述不来

\(\quad\)考虑题目那个连乘再根号实在是太丑了,考虑取\(In\),由\(In\)函数的单调性就可以转化成求\(\frac{\sum In_{v_i}}{c}\)的最大值,考虑分数规划,套路地,二分答案,然后设一个\(dp_{i,j}\)表示当前\(dp\)到第\(i\)个字符,到\(Trie\)图第\(j\)个节点的最大值,之所以把\(Trie\)图的状态也存下来是为了方便转移,预处理每个点祖先的\(V_i\)的和,因为到一个点就等价于到了其\(fail\)树上所有祖先.输出方案只要把二分后的答案再\(dp\)一遍,记录下方案即可.
时间复杂度\(O(n*s*log)\)
垃圾卡常题

/*BJOI2019奥术神杖*/
#include<bits/stdc++.h>
using namespace std;
#define ll long long
int read(){
	char c = getchar();
	int x = 0;
	while(c < '0' || c > '9')		c = getchar();
	while(c >= '0' && c <= '9')		x = x * 10 + c - 48,c = getchar();
	return x;
}
#define ld double
#define re register 
const int N = 2010;
ld dp[N][N][2],f[N],V[N];
int n,m;char T[N];int val[N],cur[N];ld now[N];
int tran[N][N][2];
int zf[N][N][2];
int op[N][N][2];
const ld eps = 1e-6;
char s[N];
#define pb push_back
struct Edge{
	int nxt,point;
}edge[N<<1];
int head[N],tot;
void add_edge(int u,int v) {
	edge[++tot].nxt = head[u];
	edge[tot].point = v;
	head[u] = tot;
}
namespace AC_Node{
	int ch[N][12],fail[N];
	int cnt = 0;vector<int>End[N];
	void ins(char *s,int id){
		int n = strlen(s+1);
		int p = 0;
		for(int i = 1; i <= n; ++i){
			int c = s[i] - '0';
			if(!ch[p][c])	ch[p][c] = ++cnt;
			p = ch[p][c];
		}
		End[p].pb(id);
		cur[p]++;now[p] += V[id];
	}
	void build(){
		int rt = 0;/*置为0,即失配后依然回到这里*/
		int l = 1,r = 0;
		static int q[N];
		for(int c = 0; c < 10; ++c){
			int x = ch[rt][c];
			if(x){
				q[++r] = x;
				fail[x] = rt;
			}
		} 
		while(l <= r){
			int x = q[l++];int fafail = fail[x];
			for(int c = 0; c < 10; ++c){
				int y = ch[x][c];
				if(y){
					fail[y] = ch[fafail][c];
					q[++r] = y;
				}
				else{
					ch[x][c] = ch[fafail][c];
				}
			}
		}
		for(int i = 1; i <= cnt; ++i)	add_edge(fail[i],i);
	}
}
using namespace AC_Node;
void dfs(int u,int fa){
	if(fa != -1)
		now[u] += now[fa],cur[u] += cur[fa];
	for(re int i = head[u]; i; i = edge[i].nxt){
		int v = edge[i].point;
		dfs(v,u);
	}
}
int np;
#define print(x)	printf("%.10LF\n",x)
bool check(ld mid){
	for(re int x = 1; x <= cnt; ++x){
		f[x] = now[x] - mid * cur[x];
	}
//	dfs(0,-1);	
	for(int i = 0; i <= n; ++i)
		for(int j = 0; j <= cnt; ++j)	dp[i][j][0] = dp[i][j][1] = -1e9; 
	dp[0][0][0] = 0;
	ld ans = -1e9;
	for(re int i = 0; i < n; ++i){
		for(re int o = 0; o < 2; ++o)
		for(re int p = 0; p <= cnt; ++p){
			if(T[i+1] != '.'){
				int c = T[i+1] - '0';
				if(dp[i+1][ch[p][c]][o|(f[ch[p][c]] != 0)] < dp[i][p][o] + f[ch[p][c]]){
					tran[i+1][ch[p][c]][o|(f[ch[p][c]] != 0)]	= p;
					zf[i+1][ch[p][c]][o|(f[ch[p][c]] != 0)] = c;
					op[i+1][ch[p][c]][o|(f[ch[p][c]] != 0)] = o;
					dp[i+1][ch[p][c]][o|(f[ch[p][c]] != 0)] = dp[i][p][o] + f[ch[p][c]];
				}
			}
			else{
				for(re int c = 0; c < 10; ++c){
					if(dp[i+1][ch[p][c]][o|(f[ch[p][c]] != 0)] < dp[i][p][o] + f[ch[p][c]]){
						tran[i+1][ch[p][c]][o|(f[ch[p][c]] != 0)]	= p;
						zf[i+1][ch[p][c]][o|(f[ch[p][c]] != 0)] = c;
						op[i+1][ch[p][c]][o|(f[ch[p][c]] != 0)] = o;
						dp[i+1][ch[p][c]][o|(f[ch[p][c]] != 0)] = dp[i][p][o] + f[ch[p][c]];
					}	
				}
			}
		}
	}
	np = -1;
	for(re int p = 0; p <= cnt; ++p){
		if(np == -1 || dp[n][np][1] < dp[n][p][1])
			ans = dp[n][p][1],np = p;
	}
	return (ans >= 0);
}
int main(){
	n = read(),m = read();
	scanf("%s",T+1);
//	cout<<strlen(T+1)<<endl;
	ld l = 0,r = log(1e9+7);
	for(re int i = 1; i <= m; ++i){
		scanf("%s",s+1);val[i] = read(); 
		V[i] = log(val[i]);
		ins(s,i);
	}
	build();
	dfs(0,-1);
	ld ans = 0;
	while(r - l > eps){
		ld mid = (l + r) / 2.0;
		if(check(mid)){
			ans = mid;
			l = mid;
		} 
		else	r = mid;
	}
//	printf("%.10LF\n",ans);
	if(ans == 0){
		for(re int i = 1; i <= n; ++i){
			if(T[i] == '.')	putchar('0');
			else	putchar(T[i]);
		}
		return 0;
	}
	check(ans);
	int p = np,o = 1;
	for(re int i = n; i >= 1; --i){
		int c = zf[i][p][o];T[i] = c + '0';
		int _p = tran[i][p][o];
		int _o = op[i][p][o];
		p = _p,o = _o;
	}
	for(re int i = 1; i <= n; ++i){
		printf("%c",T[i]);
	}
	return 0;
}
posted @ 2021-03-09 22:38  y_dove  阅读(134)  评论(0编辑  收藏  举报