Bzoj 3145 - [Feyat cup 1.5]Str

bzoj 3145 - [Feyat cup 1.5]Str

Description

给你两个长度\(10^5\)级别的串\(S, T\)

\(S,T\)的最长模糊匹配公共子串

模糊匹配 : 至多一个位置上的字符不同

Analysis

屯了好久的题, 之前看过3次都看不懂, 一直觉得这道题非常恐怖

现在重新翻出来看很快就有思路了, 也不是很恐怖嘛 (写了一个下午系列)

定义模糊点 : 位置不同的字符

情况1. 模糊点均在两串的中间部分

那么我们可以分为\({A + * + B}\) , 其中\(*\)为模糊点

一个想法是对\(S\)建自动机, 然后从左往右扫\(T\)串, 在\(S\)中得到\(A\)部分的最大匹配的状态

然后那个状态有个\(right\)集, \(right + 2\)就是\(B\)部分的开头

我们用后缀数组来匹配\(B\)部分, 根据贪心, rk一定会是相邻的

我们在\(SAM\)(前缀逆序树)上维护两棵平衡树 , 一颗存出现在\(S\)串中的那些rk, 一颗存\(T\)的, 启发式合并的时候两两找相邻点在\(SA\)里求\(LCP\)

后来想了以下发现不行, 我这样\(A\)部分相当于贪心的找了最长的, 但是有可能该状态往上跳几下得到一对更优的\(B\)

我们发现\(S,T\)建广义自动机, 这样每个状态对应了一种长度的\(A\)部分公共子串

然后照样做就好了

Claris的方法是用反串的后缀数组替代广义自动机, 然后枚举公共\(A\)部分长度, 在rk上合并相邻\(lcp >= len\)的点

并查集 + 启发式合并

情况2. 至少一个模糊点在串头/串尾

在实现过程中特判处理即可

串头的用 后缀数组 匹配

串尾的用 前缀逆序树 匹配

Code

#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <cctype>
#include <cmath>
#include <algorithm>
#include <set>
#define rep(i,a,b) for (int i = (a); i <= (b); ++ i )
#define per(i,a,b) for (int i = (a); i >= (b); -- i )
#define For(i,a,b) for (int i = (a); i < (b); ++ i )
#define fore(i,a) for (its it = (a).begin(); it != (a).end(); ++ it )
using namespace std;
const int M = 2e5 + 7;
const int INF = 1e9 + 7;

char s[M], t[M];
int ls, lt;
int ans = 1;

namespace SA{
	static const int N = M << 1;
	char s[N]; int n, mid;
	int sa[N], t[N];
	int rk[N], f[N];
	int st[N][20], ln[N];
	int h[N];

	void getsa(){
		static int sum[M];
		int p, nw, i, j;
		for (i=1; i<=n; ++i) sum[s[i]]++;
		for (i=0; i<=255; ++i) sum[i] += sum[i-1];
		for (i=1; i<=n; ++i) sa[sum[s[i]]--] = i;
		for (p=0, i=1; i<=n; ++i) rk[sa[i]] = (p += (s[sa[i]] != s[sa[i-1]]));
		for (nw=p, j=1; nw!=n; j<<=1, nw=p) {
			memset(sum, 0, (1+nw)<<2);
			memcpy(f, rk, sizeof rk);
			for (p=0, i=n-j+1; i<=n; ++i) t[++p] = i;
			for (i=1; i<=n; ++i) if (sa[i] > j) t[++p] = sa[i] - j;
			for (i=1; i<=n; ++i) sum[f[i]]++;
			for (i=1; i<=nw; ++i) sum[i] += sum[i-1];
			for (i=n; i>=1; --i) sa[sum[f[t[i]]]--] = t[i];
			for (p=0, i=1; i<=n; ++i) rk[sa[i]] = (p += (f[sa[i]] != f[sa[i-1]] || f[sa[i]+j] != f[sa[i-1]+j]));
		}
	}

	void geth(){
		int i, j, p;
		for (p=0, i=1; i<=n; ++i){
			j = sa[rk[i]-1];
			for (; s[i+p] == s[j+p]; ++p);
			h[rk[i]] = p;
			if (p > 0) --p;
		}
	}

	void init(){
		rep (i, 1, n) st[i][0] = h[i];
		int i; for (ln[0]=ln[1]=0, i=2; i<=n; ++i) ln[i] = ln[i>>1] + 1;
		per (i, n, 1)
		rep (j, 1, ln[n-i+1])
			st[i][j] = min(st[i][j-1], st[i + (1 << j-1)][j-1]);
	}

	inline int LCP(int x, int y){
		if (x == y) return n-y+1;
		if (x > y) swap(x, y);
		++x; int l = ln[y-x+1];
		return min(st[x][l], st[y - (1 << l) + 1][l]);
	}

	void build(char *a, char *b, int la, int lb){
		n = 0;
		rep (i, 1, la) s[++n] = a[i];
		s[mid = ++n] = '+';
		rep (i, 1, lb) s[++n] = b[i];

		getsa();
		geth();
		init();
	}
}

namespace Set{
	static const int N = M << 1;
	int ans[N];
	int rt[2][N];
	set<int> s[2][N];
	typedef set<int> :: iterator its;

	int find(int x, int k, int p){
		int res = -1;
		its rt = s[k][x].upper_bound(p);
		if (rt != s[k][x].end())
			res = max(res, SA::LCP(*rt, p));
		if (rt != s[k][x].begin())
			res = max(res, SA::LCP(*(--rt), p));
		return res;
	}
	
	void ins(int x, int k, int p){
		ans[x] = max(ans[x], find(x, k^1, p));
		s[k][x].insert(p);
	}

	void merge(int &x, int y, int k){
		if (s[k][x].size() < s[k][y].size()) {
			fore (it, s[k][x]) s[k][y].insert(*it);
			x = y;
		}
		else
			fore (it, s[k][y]) s[k][x].insert(*it);
	}

	int upd(int x, int y, int kx, int ky){
		int res = -1;
		if (s[kx][x].size() < s[ky][y].size()) 
			fore (it, s[kx][x]) res = max(res, find(y, ky, *it));
		else
			fore (it, s[ky][y]) res = max(res, find(x, kx, *it));
		return res;
	}

	void Merge(int x, int y){
		rep (i, 0, 1) ans[x] = max(ans[x], upd(rt[i][x], rt[i^1][y], i, i^1));
		rep (i, 0, 1) merge(rt[i][x], rt[i][y], i);
		ans[x] = max(ans[x], ans[y]);		
	}
}

namespace SAM{
	static const int N = M << 1;
	int last, tot;
	int ch[N][26];
	int stp[N], fa[N];
	int rt[2][N], ed[2];

	inline int newnode(int l){
		stp[++tot] = l;
		rt[0][tot] = rt[1][tot] = INF;
		return tot;
	}

	int ext(int p, int q, int c){
		int nq = newnode(stp[p] + 1);
		fa[nq] = fa[q], fa[q] = nq;
		memcpy(ch[nq], ch[q], sizeof ch[q]);
		for (; p && ch[p][c] == q; p = fa[p]) ch[p][c] = nq;
		return nq;
	}

	int sam(int p, int c){
		int np = ch[p][c];
		if (np) return (stp[p] + 1 == stp[np]) ? np : ext(p, np, c);

		np = newnode(stp[p] + 1);
		for (; p && ch[p][c] == 0; p = fa[p]) ch[p][c] =np;
		if (!p) fa[np] = 1;
		else {
			int q = ch[p][c];
			fa[np] = (stp[p] + 1 == stp[q]) ? q : ext(p, q, c);
		}
		return np;
	}

	void trans(int &p, int c, int &len){
		if (!ch[p][c]) {
			for (; p && ch[p][c] == 0; p = fa[p]);
			len = stp[p];
		}
		if (ch[p][c]) p = ch[p][c], ++len;
		else p = 1, len = 0;
	}

	void build(char *a, char *b, int la, int lb){
		last = tot = 1;
		rep (i, 1, la)
			rt[0][ last = sam(last, a[i]-'a') ] = i;
		last = 1;
		rep (i, 1, lb)
			rt[1][ last = sam(last, b[i]-'a') ] = la + 1 + i;

		ed[0] = la, ed[1] = la + 1 + lb;
		rt[0][1] = 0;
		rt[1][1] = la + 1;
	}

	void topu(){
		static int sum[N], pid[N];
		rep (i, 1, tot) sum[stp[i]]++;
		rep (i, 1, tot) sum[i] += sum[i-1];
		rep (i, 1, tot) pid[sum[stp[i]]--] = i;
		rep (i, 1, tot) rep (j, 0, 1) {
			Set::rt[j][i] = i;
			Set::ans[i] = -1;
			if (rt[j][i] != INF && rt[j][i]+2 <= ed[j])
				Set::ins(i, j, SA::rk[rt[j][i] + 2]);
		}
		per (i, tot, 1) {
			int x = pid[i];
			if (rt[0][x] != INF && rt[1][x] != INF)
				ans = max(ans, stp[x] + 1 + Set::ans[x]),
				ans = max(ans, stp[x] + (rt[0][x] < ed[0] && rt[1][x] < ed[1]) );
			if (x != 1){
				rep (j, 0, 1)
					rt[j][fa[x]] = min(rt[j][fa[x]], rt[j][x]);
				Set::Merge(fa[x], x);
			}
		}
	}
}

int main(){
#ifndef ONLINE_JUDGE	
	freopen("a.in", "r", stdin);
#endif
	int i;

	scanf("%s", s+1); ls = strlen(s+1);
	scanf("%s", t+1); lt = strlen(t+1);

	SAM::build(s, t, ls, lt);
	SA::build(s, t, ls, lt);
	SAM::topu();

	printf("%d\n", ans);

	return 0;
}
posted @ 2017-09-10 20:26  _zwl  阅读(309)  评论(0编辑  收藏  举报