Bzoj 3145 - [Feyat cup 1.5]Str
bzoj 3145 - [Feyat cup 1.5]Str
Description
给你两个长度\(10^5\)级别的串\(S, T\)
求\(S,T\)的最长模糊匹配公共子串
模糊匹配 : 至多一个位置上的字符不同
Analysis
屯了好久的题, 之前看过3次都看不懂, 一直觉得这道题非常恐怖
现在重新翻出来看很快就有思路了, 也不是很恐怖嘛 (写了一个下午系列)
定义模糊点 : 位置不同的字符
情况1. 模糊点均在两串的中间部分
那么我们可以分为\({A + * + B}\) , 其中\(*\)为模糊点
一个想法是对\(S\)建自动机, 然后从左往右扫\(T\)串, 在\(S\)中得到\(A\)部分的最大匹配的状态
然后那个状态有个\(right\)集, \(right + 2\)就是\(B\)部分的开头
我们用后缀数组来匹配\(B\)部分, 根据贪心, rk
一定会是相邻的
我们在\(SAM\)(前缀逆序树)上维护两棵平衡树 , 一颗存出现在\(S\)串中的那些rk
, 一颗存\(T\)的, 启发式合并的时候两两找相邻点在\(SA\)里求\(LCP\)
后来想了以下发现不行, 我这样\(A\)部分相当于贪心的找了最长的, 但是有可能该状态往上跳几下得到一对更优的\(B\)呢
我们发现\(S,T\)建广义自动机, 这样每个状态对应了一种长度的\(A\)部分公共子串
然后照样做就好了
Claris的方法是用反串的后缀数组替代广义自动机, 然后枚举公共\(A\)部分长度, 在rk
上合并相邻\(lcp >= len\)的点
并查集 + 启发式合并
情况2. 至少一个模糊点在串头/串尾
在实现过程中特判处理即可
串头的用 后缀数组 匹配
串尾的用 前缀逆序树 匹配
Code
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <cctype>
#include <cmath>
#include <algorithm>
#include <set>
#define rep(i,a,b) for (int i = (a); i <= (b); ++ i )
#define per(i,a,b) for (int i = (a); i >= (b); -- i )
#define For(i,a,b) for (int i = (a); i < (b); ++ i )
#define fore(i,a) for (its it = (a).begin(); it != (a).end(); ++ it )
using namespace std;
const int M = 2e5 + 7;
const int INF = 1e9 + 7;
char s[M], t[M];
int ls, lt;
int ans = 1;
namespace SA{
static const int N = M << 1;
char s[N]; int n, mid;
int sa[N], t[N];
int rk[N], f[N];
int st[N][20], ln[N];
int h[N];
void getsa(){
static int sum[M];
int p, nw, i, j;
for (i=1; i<=n; ++i) sum[s[i]]++;
for (i=0; i<=255; ++i) sum[i] += sum[i-1];
for (i=1; i<=n; ++i) sa[sum[s[i]]--] = i;
for (p=0, i=1; i<=n; ++i) rk[sa[i]] = (p += (s[sa[i]] != s[sa[i-1]]));
for (nw=p, j=1; nw!=n; j<<=1, nw=p) {
memset(sum, 0, (1+nw)<<2);
memcpy(f, rk, sizeof rk);
for (p=0, i=n-j+1; i<=n; ++i) t[++p] = i;
for (i=1; i<=n; ++i) if (sa[i] > j) t[++p] = sa[i] - j;
for (i=1; i<=n; ++i) sum[f[i]]++;
for (i=1; i<=nw; ++i) sum[i] += sum[i-1];
for (i=n; i>=1; --i) sa[sum[f[t[i]]]--] = t[i];
for (p=0, i=1; i<=n; ++i) rk[sa[i]] = (p += (f[sa[i]] != f[sa[i-1]] || f[sa[i]+j] != f[sa[i-1]+j]));
}
}
void geth(){
int i, j, p;
for (p=0, i=1; i<=n; ++i){
j = sa[rk[i]-1];
for (; s[i+p] == s[j+p]; ++p);
h[rk[i]] = p;
if (p > 0) --p;
}
}
void init(){
rep (i, 1, n) st[i][0] = h[i];
int i; for (ln[0]=ln[1]=0, i=2; i<=n; ++i) ln[i] = ln[i>>1] + 1;
per (i, n, 1)
rep (j, 1, ln[n-i+1])
st[i][j] = min(st[i][j-1], st[i + (1 << j-1)][j-1]);
}
inline int LCP(int x, int y){
if (x == y) return n-y+1;
if (x > y) swap(x, y);
++x; int l = ln[y-x+1];
return min(st[x][l], st[y - (1 << l) + 1][l]);
}
void build(char *a, char *b, int la, int lb){
n = 0;
rep (i, 1, la) s[++n] = a[i];
s[mid = ++n] = '+';
rep (i, 1, lb) s[++n] = b[i];
getsa();
geth();
init();
}
}
namespace Set{
static const int N = M << 1;
int ans[N];
int rt[2][N];
set<int> s[2][N];
typedef set<int> :: iterator its;
int find(int x, int k, int p){
int res = -1;
its rt = s[k][x].upper_bound(p);
if (rt != s[k][x].end())
res = max(res, SA::LCP(*rt, p));
if (rt != s[k][x].begin())
res = max(res, SA::LCP(*(--rt), p));
return res;
}
void ins(int x, int k, int p){
ans[x] = max(ans[x], find(x, k^1, p));
s[k][x].insert(p);
}
void merge(int &x, int y, int k){
if (s[k][x].size() < s[k][y].size()) {
fore (it, s[k][x]) s[k][y].insert(*it);
x = y;
}
else
fore (it, s[k][y]) s[k][x].insert(*it);
}
int upd(int x, int y, int kx, int ky){
int res = -1;
if (s[kx][x].size() < s[ky][y].size())
fore (it, s[kx][x]) res = max(res, find(y, ky, *it));
else
fore (it, s[ky][y]) res = max(res, find(x, kx, *it));
return res;
}
void Merge(int x, int y){
rep (i, 0, 1) ans[x] = max(ans[x], upd(rt[i][x], rt[i^1][y], i, i^1));
rep (i, 0, 1) merge(rt[i][x], rt[i][y], i);
ans[x] = max(ans[x], ans[y]);
}
}
namespace SAM{
static const int N = M << 1;
int last, tot;
int ch[N][26];
int stp[N], fa[N];
int rt[2][N], ed[2];
inline int newnode(int l){
stp[++tot] = l;
rt[0][tot] = rt[1][tot] = INF;
return tot;
}
int ext(int p, int q, int c){
int nq = newnode(stp[p] + 1);
fa[nq] = fa[q], fa[q] = nq;
memcpy(ch[nq], ch[q], sizeof ch[q]);
for (; p && ch[p][c] == q; p = fa[p]) ch[p][c] = nq;
return nq;
}
int sam(int p, int c){
int np = ch[p][c];
if (np) return (stp[p] + 1 == stp[np]) ? np : ext(p, np, c);
np = newnode(stp[p] + 1);
for (; p && ch[p][c] == 0; p = fa[p]) ch[p][c] =np;
if (!p) fa[np] = 1;
else {
int q = ch[p][c];
fa[np] = (stp[p] + 1 == stp[q]) ? q : ext(p, q, c);
}
return np;
}
void trans(int &p, int c, int &len){
if (!ch[p][c]) {
for (; p && ch[p][c] == 0; p = fa[p]);
len = stp[p];
}
if (ch[p][c]) p = ch[p][c], ++len;
else p = 1, len = 0;
}
void build(char *a, char *b, int la, int lb){
last = tot = 1;
rep (i, 1, la)
rt[0][ last = sam(last, a[i]-'a') ] = i;
last = 1;
rep (i, 1, lb)
rt[1][ last = sam(last, b[i]-'a') ] = la + 1 + i;
ed[0] = la, ed[1] = la + 1 + lb;
rt[0][1] = 0;
rt[1][1] = la + 1;
}
void topu(){
static int sum[N], pid[N];
rep (i, 1, tot) sum[stp[i]]++;
rep (i, 1, tot) sum[i] += sum[i-1];
rep (i, 1, tot) pid[sum[stp[i]]--] = i;
rep (i, 1, tot) rep (j, 0, 1) {
Set::rt[j][i] = i;
Set::ans[i] = -1;
if (rt[j][i] != INF && rt[j][i]+2 <= ed[j])
Set::ins(i, j, SA::rk[rt[j][i] + 2]);
}
per (i, tot, 1) {
int x = pid[i];
if (rt[0][x] != INF && rt[1][x] != INF)
ans = max(ans, stp[x] + 1 + Set::ans[x]),
ans = max(ans, stp[x] + (rt[0][x] < ed[0] && rt[1][x] < ed[1]) );
if (x != 1){
rep (j, 0, 1)
rt[j][fa[x]] = min(rt[j][fa[x]], rt[j][x]);
Set::Merge(fa[x], x);
}
}
}
}
int main(){
#ifndef ONLINE_JUDGE
freopen("a.in", "r", stdin);
#endif
int i;
scanf("%s", s+1); ls = strlen(s+1);
scanf("%s", t+1); lt = strlen(t+1);
SAM::build(s, t, ls, lt);
SA::build(s, t, ls, lt);
SAM::topu();
printf("%d\n", ans);
return 0;
}