后缀数组
追随蔡大神的脚步,开始后缀数组的学习。
http://www.cnblogs.com/EC-Ecstasy/
//时间不够不定时不定期完善
一、后缀数组的定义
把一个字符串的后缀全部搞出来,比如“aabaaaab”的后缀就是"aabaaaab”,“abaaaab”,“baaaab”,“aaaab”,“aaab”,“aab”,“ab”,“b”,分别编号为1,2,3,4,5,6,7,8。
然后就有两个数组,一个是rank[],一个是sa[]。rank[i]表示第i个后缀排在第几名,sa[i]表示排第i名是哪个后缀。显然这两个数组为逆运算。(sa[rank[i]]=i,rank[sa[i]]=i)
基排倍增写法。
每次倍增,分两个关键字。
模版1(远古写法)
var
s:ansistring;
n,tot:longint;
c,x,y,rank,sa:array[0..1000]of longint;
procedure first;
var
i:longint;
begin
readln(s);
n:=length(s);
for i:=1 to n do x[i]:=ord(s[i]);
fillchar(c,sizeof(c),0);
for i:=1 to n do inc(c[x[i]]);
for i:=1 to 128 do inc(c[i],c[i-1]);
for i:=1 to n do begin
sa[c[x[i]]]:=i;
dec(c[x[i]]);
end;
tot:=1;
rank[sa[1]]:=1;
for i:=2 to n do begin
if x[sa[i]]<>x[sa[i-1]] then inc(tot);
rank[sa[i]]:=tot;
end;
end;
procedure calcsa;
var
i,p:longint;
begin
p:=1;
while p<n do begin
for i:=1 to n-p do y[i]:=rank[i+p];
for i:=n-p+1 to n do y[i]:=0;
fillchar(c,sizeof(c),0);
for i:=1 to n do inc(c[y[i]]);
for i:=1 to n do inc(c[i],c[i-1]);
for i:=1 to n do begin
sa[c[y[i]]]:=i;
dec(c[y[i]]);
end;
for i:=1 to n do x[i]:=rank[i];
fillchar(c,sizeof(c),0);
for i:=1 to n do inc(c[x[i]]);
for i:=1 to n do inc(c[i],c[i-1]);
for i:=n downto 1 do begin
y[sa[i]]:=c[x[sa[i]]];
dec(c[x[sa[i]]]);
end;
for i:=1 to n do sa[y[i]]:=i;
tot:=1;
rank[sa[1]]:=1;
for i:=2 to n do begin
if (x[sa[i]]<>x[sa[i-1]]) or (x[sa[i]+p]<>x[sa[i-1]+p]) then inc(tot);
rank[sa[i]]:=tot;
end;
if tot=n then break;
p:=p<<1;
end;
for i:=1 to n do
sa[rank[i]]:=i;
for i:=1 to n do write(sa[i],' ');
writeln;
for i:=1 to n do write(rank[i],' ');
end;
begin
first;
calcsa;
readln;
readln;
end.
跪了论文后……
模版2(巨快)
var
s:ansistring;
n,tot:longint;
c,x,y,rank,sa:array[0..1000]of longint;
procedure swap(var j,k:longint);
var
i:longint;
begin
i:=j;
j:=k;
k:=i;
end;
procedure qsort(l,r:longint);
var
i,j,mid:longint;
begin
i:=l;
j:=r;
mid:=x[(l+r) div 2];
repeat
while x[i]<mid do inc(i);
while x[j]>mid do dec(j);
if i<=j then begin
swap(x[i],x[j]);
swap(y[i],y[j]);
inc(i);
dec(j);
end;
until i>j;
if i<r then qsort(i,r);
if l<j then qsort(l,j);
end;
procedure first;
var
i:longint;
begin
readln(s);
n:=length(s);
{ //如果n太大用快排据说会快?
for i:=1 to n do begin
x[i]:=ord(s[i]);
y[i]:=i;
end;
qsort(1,n);
for i:=1 to n do sa[i]:=y[i];
tot:=1;
rank[sa[1]]:=1;
for i:=2 to n do begin
if x[i]<>x[i-1] then inc(tot);
rank[sa[i]]:=tot;
end;
}
for i:=1 to n do x[i]:=ord(s[i]);
fillchar(c,sizeof(c),0);
for i:=1 to n do inc(c[x[i]]);
for i:=1 to 128 do inc(c[i],c[i-1]);
for i:=1 to n do begin
sa[c[x[i]]]:=i;
dec(c[x[i]]);
end;
tot:=1;
rank[sa[1]]:=1;
for i:=2 to n do begin
if x[sa[i]]<>x[sa[i-1]] then inc(tot);
rank[sa[i]]:=tot;
end;
end;
procedure calcsa;
var
i,p,sum:longint;
begin
p:=1;
while p<n do begin
sum:=0;
for i:=n-p+1 to n do begin
inc(sum);
y[sum]:=i;
end;
for i:=1 to n do
if sa[i]>p then begin
inc(sum);
y[sum]:=sa[i]-p;
if sum=n then break;
end;
for i:=1 to n do x[i]:=rank[y[i]];
fillchar(c,sizeof(c),0);
for i:=1 to n do inc(c[x[i]]);
for i:=1 to n do inc(c[i],c[i-1]);
for i:=n downto 1 do begin
sa[c[x[i]]]:=y[i];
dec(c[x[i]]);
end;
tot:=1;
x[sa[1]]:=1;
for i:=2 to n do begin
if (rank[sa[i]]<>rank[sa[i-1]]) or (rank[sa[i]+p]<>rank[sa[i-1]+p]) then inc(tot);
x[sa[i]]:=tot;
end;
for i:=1 to n do rank[i]:=x[i];
if tot=n then break;
p:=p<<1;
end;
for i:=1 to n do sa[rank[i]]:=i;
for i:=1 to n do write(sa[i],' ');
writeln;
for i:=1 to n do write(rank[i],' ');
end;
begin
first;
calcsa;
readln;
readln;
end.
Staginner大神的博客讲的非常好……以及某个笔记(然后2B了两小时看了这个东西还得再半小时才搞出来了……蒟蒻果然是太弱了)
height[i]表示排名i的后缀和前一个后缀从头开始相同字符的个数。
然后有个性质自己看论文。
结论是可以for一遍求。每次last-1(last至少为0),然后去sa[rank[i]-1],然后尽量找(可以的话last++),最后height[rank[i]]:=last。
procedure makeheight;
var
last,i,j:longint;
begin
h[1]:=0;
last:=0;
for i:=1 to n do begin
last:=max(last-1,0);
if rank[i]=1 then continue;
j:=sa[rank[i]-1];
while (i+last<=n) and (j+last<=n) and (s[i+last]=s[j+last]) do inc(last);
h[rank[i]]:=last;
end;
end;
由height[]数组的定义,我们可以用于求最长公共子串(基本上后缀数组的应用都在于这个height[]数组的应用!!)
比如找两个后缀的最长公共子串(这个名称英语缩写叫lcp)那么就直接找到他们在height[]数组的位置,然后两个位置间height[]的最小值就是lcp。那么就可以转成rmp问题来求两个后缀的lcp。时间复杂度为(nlogn)。
预处理
procedure first;
var
i,j:longint;
begin
for i:=1 to n do f[i,0]:=h[i];
for j:=1 to trunc(ln(n)/ln(2)) do
for i:=1 to n-1<<j+1 do
f[i,j]:=min(f[i,j-1],f[i+1<<(j-1),j-1]);
end;
询问
function lcp(x,y:longint):longint;
var
i:longint;
begin
x:=rank[x];
y:=rank[y];
if x>y then swap(x,y);
if x<y then inc(x);
i:=trunc(ln(y-x+1)/ln(2));
exit(min(f[x,i],f[y-1<<i+1,i]));
end;
常数优化:不断取对数运算是有点费时的,所以先预处理出一个ft[]来记录每个数log2后的结果
for tt:=1 to 50000 do ft[tt]:=trunc(ln(tt)/ln(2));
然后就开始做题了啦啦啦啦
2.2.1重复子串
可重叠最长重复子串
就直接找height[]的最大值就好了。(不给代码&…………)
不可重复的最长重复子串(pku1743)
按height[]分组是height[]数组应用的常用方案。
二分答案。判断一下同一组中sa[]值最大和sa[]最小的差有没有>=答案。
var
c,x,y,rank,sa,s,h:array[0..203000]of longint;
n,tot:longint;
function max(x,y:longint):longint;
begin
if x<y then exit(y);
exit(x);
end;
function min(x,y:longint):longint;
begin
if x<y then exit(x);
exit(y);
end;
procedure makesa;
var
i,j,p:longint;
begin
p:=1;
while p<n do begin
for i:=1 to p do y[i]:=n-p+i;
j:=p;
for i:=1 to n do
if sa[i]>p then begin
inc(j);
y[j]:=sa[i]-p;
if j=n then break;
end;
for i:=1 to n do x[i]:=rank[y[i]];
for i:=0 to tot do c[i]:=0;
for i:=1 to n do inc(c[x[i]]);
for i:=1 to tot do inc(c[i],c[i-1]);
for i:=n downto 1 do begin
sa[c[x[i]]]:=y[i];
dec(c[x[i]]);
end;
tot:=1;
x[sa[1]]:=1;
for i:=2 to n do begin
if (rank[sa[i]]<>rank[sa[i-1]]) or (rank[sa[i]+p]<>rank[sa[i-1]+p]) then inc(tot);
x[sa[i]]:=tot;
end;
for i:=1 to n do rank[i]:=x[i];
if tot=n then break;
p:=p<<1;
end;
for i:=1 to n do
sa[rank[i]]:=i;
end;
procedure makeheight;
var
i,j,last:longint;
begin
h[1]:=0;
last:=0;
for i:=1 to n do begin
last:=max(last-1,0);
if rank[i]=1 then continue;
j:=sa[rank[i]-1];
while (i+last<=n) and (j+last<=n) and (s[i+last]=s[j+last]) do inc(last);
h[rank[i]]:=last;
end;
end;
function check(x:longint):boolean;
var
i,j,ans1,ans2:longint;
begin
i:=1;
while i<=n do begin
ans1:=sa[i];
ans2:=sa[i];
j:=i+1;
while (h[j]>=x) and (j<=n) do begin
ans1:=max(ans1,sa[j]);
ans2:=min(ans2,sa[j]);
inc(j);
end;
if ans1-ans2>=x then exit(true);
i:=j;
end;
exit(false);
end;
procedure into;
var
i,j,k:longint;
begin
read(j);
for i:=2 to n do begin
read(k);
s[i-1]:=k-j+88;
j:=k;
end;
n:=n-1;
for i:=1 to n do x[i]:=s[i];
for i:=1 to 200 do c[i]:=0;
for i:=1 to n do inc(c[x[i]]);
for i:=1 to 200 do inc(c[i],c[i-1]);
for i:=n downto 1 do begin
sa[c[x[i]]]:=i;
dec(c[x[i]]);
end;
rank[sa[1]]:=1;
tot:=1;
for i:=2 to n do begin
if x[sa[i]]<>x[sa[i-1]] then inc(tot);
rank[sa[i]]:=tot;
end;
if tot<>n then makesa;
makeheight;
// for i:=1 to n do write(sa[i],' ');
end;
procedure work;
var
l,r,mid:longint;
begin
l:=0;
r:=n<<1+1;
while l+1<r do begin
mid:=(l+r)>>1;
if check(mid) then l:=mid
else r:=mid;
end;
if l<4 then writeln(0)
else writeln(l+1);
end;
begin
while true do begin
readln(n);
if n=0 then break;
into;
work;
end;
readln;
readln;
end.
可重复k次的最长重复子串(pku3261)
按height[]分组。
二分答案。判断有没有一个组中后缀个数>=答案。
第一次基排写法280+ms
const
mm=1000000;
var
x,y,sa,h,rank,s:array[0..50300]of longint;
c:array[0..mm]of longint;
n,m,tot:longint;
function max(x,y:longint):longint;
begin
if x<y then exit(y);
exit(x)
end;
function check(x:longint):boolean;
var
i,j:longint;
begin
i:=1;
while i<=n do begin
j:=i+1;
while (h[j]>=x) and (j<=n) do inc(j);
if j-i>=m then exit(true);
i:=j;
end;
exit(false)
end;
procedure makeheight;
var
last,i,j:longint;
begin
h[1]:=0;
last:=0;
for i:=1 to n do begin
last:=max(last-1,0);
if rank[i]=1 then continue;
j:=sa[rank[i]-1];
while (i+last<=n) and (j+last<=n) and (s[i+last]=s[j+last]) do inc(last);
h[rank[i]]:=last;
end;
end;
procedure makesa;
var
i,j,p:longint;
begin
p:=1;
while p<n do begin
for i:=1 to p do y[i]:=n-p+i;
j:=p;
for i:=1 to n do
if sa[i]>p then begin
inc(j);
y[j]:=sa[i]-p;
if j=n then break;
end;
for i:=1 to n do x[i]:=rank[y[i]];
fillchar(c,sizeof(c),0);
for i:=1 to n do inc(c[x[i]]);
for i:=1 to tot do inc(c[i],c[i-1]);
for i:=n downto 1 do begin
sa[c[x[i]]]:=y[i];
dec(c[x[i]]);
end;
tot:=1;
x[sa[1]]:=1;
for i:=2 to n do begin
if (rank[sa[i]]<>rank[sa[i-1]]) or (rank[sa[i]+p]<>rank[sa[i-1]+p]) then inc(tot);
x[sa[i]]:=tot;
end;
for i:=1 to n do rank[i]:=x[i];
if tot=n then break;
p:=p<<1;
end;
for i:=1 to n do sa[rank[i]]:=i;
end;
procedure into;
var
i:longint;
begin
readln(n,m);
for i:=1 to n do read(s[i]);
for i:=1 to n do x[i]:=s[i];
fillchar(c,sizeof(c),0);
for i:=1 to n do inc(c[x[i]]);
for i:=1 to mm do inc(c[i],c[i-1]);
for i:=n downto 1 do begin
sa[c[x[i]]]:=i;
dec(c[x[i]]);
end;
rank[sa[1]]:=1;
tot:=1;
for i:=2 to n do begin
if x[sa[i]]<>x[sa[i-1]] then inc(tot);
rank[sa[i]]:=tot;
end;
if tot<>n then makesa;
//for i:=1 to n do write(sa[i],' ');
//writeln;
makeheight;
//for i:=1 to n do write(h[i],' ');
end;
procedure work;
var
l,r,mid:longint;
begin
l:=0;
r:=n+1;
while l+1<r do begin
mid:=(l+r)>>1;
if check(mid) then l:=mid
else r:=mid;
end;
writeln(l);
end;
begin
into;
work;
readln;
readln;
end.
第一次快排写法60+ms
const
mm=20000;
var
x,y,sa,h,rank,s:array[0..50300]of longint;
c:array[0..mm]of longint;
n,m,tot:longint;
function max(x,y:longint):longint;
begin
if x<y then exit(y);
exit(x)
end;
procedure swap(var j,k:longint);
var
i:longint;
begin
i:=j;
j:=k;
k:=i;
end;
procedure qsort(l,r:longint);
var
i,j,mid:longint;
begin
i:=l;
j:=r;
mid:=x[(l+r) div 2];
repeat
while x[i]<mid do inc(i);
while x[j]>mid do dec(j);
if i<=j then begin
swap(x[i],x[j]);
swap(y[i],y[j]);
inc(i);
dec(j);
end;
until i>j;
if i<r then qsort(i,r);
if l<j then qsort(l,j);
end;
function check(x:longint):boolean;
var
i,j:longint;
begin
i:=1;
while i<=n do begin
j:=i+1;
while (h[j]>=x) and (j<=n) do inc(j);
if j-i>=m then exit(true);
i:=j;
end;
exit(false)
end;
procedure makeheight;
var
last,i,j:longint;
begin
h[1]:=0;
last:=0;
for i:=1 to n do begin
last:=max(last-1,0);
if rank[i]=1 then continue;
j:=sa[rank[i]-1];
while (i+last<=n) and (j+last<=n) and (s[i+last]=s[j+last]) do inc(last);
h[rank[i]]:=last;
end;
end;
procedure makesa;
var
i,j,p:longint;
begin
p:=1;
while p<n do begin
for i:=1 to p do y[i]:=n-p+i;
j:=p;
for i:=1 to n do
if sa[i]>p then begin
inc(j);
y[j]:=sa[i]-p;
if j=n then break;
end;
for i:=1 to n do x[i]:=rank[y[i]];
fillchar(c,sizeof(c),0);
for i:=1 to n do inc(c[x[i]]);
for i:=1 to tot do inc(c[i],c[i-1]);
for i:=n downto 1 do begin
sa[c[x[i]]]:=y[i];
dec(c[x[i]]);
end;
tot:=1;
x[sa[1]]:=1;
for i:=2 to n do begin
if (rank[sa[i]]<>rank[sa[i-1]]) or (rank[sa[i]+p]<>rank[sa[i-1]+p]) then inc(tot);
x[sa[i]]:=tot;
end;
for i:=1 to n do rank[i]:=x[i];
if tot=n then break;
p:=p<<1;
end;
for i:=1 to n do sa[rank[i]]:=i;
end;
procedure into;
var
i:longint;
begin
readln(n,m);
for i:=1 to n do read(s[i]);
for i:=1 to n do begin
x[i]:=s[i];
y[i]:=i;
end;
qsort(1,n);
for i:=1 to n do sa[i]:=y[i];
tot:=1;
rank[sa[1]]:=1;
for i:=2 to n do begin
if x[i]<>x[i-1] then inc(tot);
rank[sa[i]]:=tot;
end;
{ for i:=1 to n do x[i]:=s[i];
fillchar(c,sizeof(c),0);
for i:=1 to n do inc(c[x[i]]);
for i:=1 to mm do inc(c[i],c[i-1]);
for i:=n downto 1 do begin
sa[c[x[i]]]:=i;
dec(c[x[i]]);
end;
rank[sa[1]]:=1;
tot:=1;
for i:=2 to n do begin
if x[sa[i]]<>x[sa[i-1]] then inc(tot);
rank[sa[i]]:=tot;
end; }
if tot<>n then makesa;
//for i:=1 to n do write(sa[i],' ');
//writeln;
makeheight;
//for i:=1 to n do write(h[i],' ');
end;
procedure work;
var
l,r,mid:longint;
begin
l:=0;
r:=n+1;
while l+1<r do begin
mid:=(l+r)>>1;
if check(mid) then l:=mid
else r:=mid;
end;
writeln(l);
end;
begin
into;
work;
readln;
readln;
end.
也就是如果第一次基排的那个基太大(基本上就是排的是数字而不是字符时)第一次排序时用快排会比基排快。
2.2.2子串个数
不相同子串的个数(spoj694,spoj705)
论文话:
每个子串一定是某个后缀的前缀,那么原问题等价于求所有后缀之间的不相
同的前缀的个数。如果所有的后缀按照 suffix(sa[1]), suffix(sa[2]),
suffix(sa[3]), …… ,suffix(sa[n])的顺序计算,不难发现,对于每一次新加
进来的后缀 suffix(sa[k]),它将产生 n-sa[k]+1 个新的前缀。但是其中有
height[k]个是和前面的字符串的前缀是相同的。所以 suffix(sa[k])将“贡献”
出 n-sa[k]+1- height[k]个不同的子串。累加后便是原问题的答案。这个做法
的时间复杂度为 O(n)。
两道题就是多组和单组数据的区别罢了。只贴一个
var
c,x,y,rank,sa,h:array[0..100000]of longint;
n,t,tot:longint;
s:ansistring;
function max(x,y:longint):longint;
begin
if x<y then exit(y);
exit(x);
end;
procedure makeheight;
var
last,i,j:longint;
begin
last:=0;
for i:=1 to n do begin
last:=max(last-1,0);
if rank[i]=1 then continue;
j:=sa[rank[i]-1];
while (i+last<=n) and (j+last<=n) and (s[i+last]=s[j+last]) do inc(last);
h[rank[i]]:=last;
end;
end;
procedure makesa;
var
i,j,p:longint;
begin
p:=1;
while p<n do begin
for i:=1 to p do y[i]:=n-p+i;
j:=p;
for i:=1 to n do
if sa[i]>p then begin
inc(j);
y[j]:=sa[i]-p;
if j=n then break;
end;
for i:=1 to n do x[i]:=rank[y[i]];
for i:=1 to tot do c[i]:=0;
for i:=1 to n do inc(c[x[i]]);
for i:=1 to tot do inc(c[i],c[i-1]);
for i:=n downto 1 do begin
sa[c[x[i]]]:=y[i];
dec(c[x[i]]);
end;
tot:=1;
x[sa[1]]:=1;
for i:=2 to n do begin
if (rank[sa[i]]<>rank[sa[i-1]]) or (rank[sa[i]+p]<>rank[sa[i-1]+p]) then inc(tot);
x[sa[i]]:=tot;
end;
for i:=1 to n do rank[i]:=x[i];
if tot=n then break;
p:=p<<1;
end;
for i:=1 to n do sa[rank[i]]:=i;
end;
procedure into;
var
i:longint;
begin
readln(s);
n:=length(s);
for i:=1 to n do x[i]:=ord(s[i]);
fillchar(c,sizeof(c),0);
for i:=1 to n do inc(c[x[i]]);
for i:=1 to 128 do inc(c[i],c[i-1]);
for i:=n downto 1 do begin
sa[c[x[i]]]:=i;
dec(c[x[i]]);
end;
tot:=1;
rank[sa[1]]:=1;
for i:=2 to n do begin
if x[sa[i]]<>x[sa[i-1]] then inc(tot);
rank[sa[i]]:=tot;
end;
if tot<>n then makesa;
makeheight;
end;
procedure work;
var
ans,i:longint;
begin
ans:=0;
for i:=1 to n do inc(ans,n-sa[i]+1-h[i]);
writeln(ans);
end;
begin
readln(t);
while t>0 do begin
dec(t);
into;
work;
end;
end.
2.2.3回文子串
最长回文子串
(为什么不去写马拉车算法呢?又快又短)
把原串复制一边,中间用一个没出现过的字符隔开。
然后穷举每一位,比如当前是i,找他反转后的位置(n-i+1),然后分奇数和偶数。
奇数的话,那么就是suffix(i)和suffix(n-i+1)的lcp。长度:lcp<<1-1;
如果是偶数,那么就是suffix(i)和suffix(n-i+2)的lcp。长度:lcp<<1
var
x,y,sa,c,rank,h:array[0..3000]of longint;
f:array[0..3000,0..15]of longint;
n,tot:longint;
s,s1:ansistring;
function min(x,y:longint):longint;
begin
if x<y then exit(x);
exit(y);
end;
function max(x,y:longint):longint;
begin
if x<y then exit(y);
exit(x);
end;
procedure swap(var x,y:longint);
var
i:longint;
begin
i:=x;
x:=y;
y:=i;
end;
procedure first;
var
i,j,k:longint;
pow:array[0..20]of longint;
begin
fillchar(f,sizeof(f),$7f);
for i:=1 to n do f[i,0]:=h[i];
k:=trunc(ln(n)/ln(2));
for i:=1 to k do
for j:=1 to n do
if j+1<<i<=n then
f[j,i]:=min(f[j,i-1],f[j+1<<(i-1),i-1]);
{ for i:=1 to n do begin
writeln(i);
for j:=0 to k do
write(f[i,j],' ');
writeln;
end;}
end;
function lcp(x,y:longint):longint;
var
i:longint;
begin
x:=rank[x];
y:=rank[y];
if x>y then swap(x,y);
if x<y then inc(x);
i:=trunc(ln(y-x+1)/ln(2));
exit(min(f[x,i],f[y-1<<i+1,i]));
end;
procedure makeheight;
var
last,i,j:longint;
begin
h[1]:=0;
last:=0;
for i:=1 to n do begin
last:=max(last-1,0);
if rank[i]=1 then continue;
j:=sa[rank[i]-1];
while (i+last<=n) and (j+last<=n) and (s[i+last]=s[j+last]) do inc(last);
h[rank[i]]:=last;
end;
end;
procedure makesa;
var
i,j,p:longint;
begin
p:=1;
while p<n do begin
for i:=1 to p do y[i]:=n-p+i;
j:=p;
for i:=1 to n do
if sa[i]>p then begin
inc(j);
y[j]:=sa[i]-p;
if j=n then break;
end;
for i:=1 to n do x[i]:=rank[y[i]];
fillchar(c,sizeof(c),0);
for i:=1 to n do inc(c[x[i]]);
for i:=1 to tot do inc(c[i],c[i-1]);
for i:=n downto 1 do begin
sa[c[x[i]]]:=y[i];
dec(c[x[i]]);
end;
tot:=1;
x[sa[1]]:=1;
for i:=2 to n do begin
if (rank[sa[i]]<>rank[sa[i-1]]) or (rank[sa[i]+p]<>rank[sa[i-1]+p]) then inc(tot);
x[sa[i]]:=tot;
end;
for i:=1 to n do rank[i]:=x[i];
if tot=n then break;
p:=p<<1;
end;
for i:=1 to n do sa[rank[i]]:=i;
end;
procedure into;
var
i:longint;
begin
readln(s);
//s:=s1+'#'+s1;
s:=s+'#';
for i:= length(s)-1 downto 1 do s:=s+s[i];
n:=length(s);
for i:=1 to n do x[i]:=ord(s[i]);
for i:=1 to n do inc(c[x[i]]);
for i:=1 to 128 do inc(c[i],c[i-1]);
for i:=n downto 1 do begin
sa[c[x[i]]]:=i;
dec(c[x[i]]);
end;
tot:=1;
rank[sa[1]]:=1;
for i:=2 to n do begin
if x[sa[i]]<>x[sa[i-1]] then inc(tot);
rank[sa[i]]:=tot;
end;
if tot<>n then makesa;
makeheight;
first;
end;
procedure work;
var
i,j,k,ans,st:longint;
begin
ans:=0;
for i:=1 to n do begin
k:=lcp(i,n-i+1);
if k<<1-1>ans then begin
ans:=k<<1-1;
st:=i-k+1;
end;
if i=1 then continue;
k:=lcp(i,n-i+2);
if k<<1>ans then begin
ans:=k<<1;
st:=i-k;
end;
end;
for i:=st to st+ans-1 do write(s[i]);
writeln;
end;
begin
into;
work;
end.
2.2.4连续重复子串
连续重复子串(pku2406)
按照论文写法……然后光荣tle。后来发现这题不适合用后缀数组,大材小用,用kmp就行了。主要是为了之后的加强版热身吧。
做法就是枚举重复字符串的长度,比如当前是l(当然一定得整除字符串长度n),然后看suffix(1)和suffix(l+1)是否为n-l。
后缀数组版(tle):
var
x,y,rank,sa,c,rmp,h:array[0..1200000]of longint;
s:ansistring;
n,tot:longint;
function min(x,y:longint):longint;
begin
if x<y then exit(x);
exit(y);
end;
function max(x,y:longint):longint;
begin
if x<y then exit(y);
exit(x);
end;
procedure makesa;
var
i,j,p:longint;
begin
p:=1;
while p<n do begin
for i:=1 to p do y[i]:=n-p+i;
j:=p;
for i:=1 to n do
if sa[i]>p then begin
inc(j);
y[j]:=sa[i]-p;
if j=n then break;
end;
for i:=1 to n do x[i]:=rank[y[i]];
fillchar(c,sizeof(c),0);
for i:=1 to n do inc(c[x[i]]);
for i:=1 to tot do inc(c[i],c[i-1]);
for i:=n downto 1 do begin
sa[c[x[i]]]:=y[i];
dec(c[x[i]]);
end;
tot:=1;
x[sa[1]]:=1;
for i:=2 to n do begin
if (rank[sa[i]]<>rank[sa[i-1]]) or (rank[sa[i]+p]<>rank[sa[i-1]+p]) then inc(tot);
x[sa[i]]:=tot;
end;
for i:=1 to n do rank[i]:=x[i];
if tot=n then break;
p:=p<<1;
end;
for i:=1 to n do sa[rank[i]]:=i;
end;
procedure makeheight;
var
last,i,j:longint;
begin
h[1]:=0;
last:=0;
for i:=1 to n do begin
if last>1 then dec(last)
else last:=0;
if rank[i]=1 then continue;
j:=sa[rank[i]-1];
while (i+last<=n) and (j+last<=n) and (s[i+last]=s[j+last]) do inc(last);
h[rank[i]]:=last;
end;
end;
procedure first;
var
i:longint;
begin
rmp[rank[1]]:=0;
if rank[1]<n then begin
rmp[rank[1]+1]:=h[rank[1]+1];
for i:=rank[1]+2 to n do
rmp[i]:=min(rmp[i-1],h[i]);
end;
if rank[1]>1 then begin
rmp[rank[1]-1]:=h[rank[1]];
for i:=rank[1]-2 downto 1 do
rmp[i]:=min(rmp[i+1],h[i]);
end;
end;
procedure into;
var
i:longint;
begin
n:=length(s);
for i:=1 to n do x[i]:=ord(s[i]);
fillchar(c,sizeof(c),0);
for i:=1 to n do inc(c[x[i]]);
for i:=1 to 128 do inc(c[i],c[i-1]);
for i:=n downto 1 do begin
sa[c[x[i]]]:=i;
dec(c[x[i]]);
end;
tot:=1;
rank[sa[1]]:=1;
for i:=2 to n do begin
if x[sa[i]]<>x[sa[i-1]] then inc(tot);
rank[sa[i]]:=tot;
end;
if tot<>n then makesa;
makeheight;
first;
end;
procedure work;
var
i:longint;
begin
for i:=1 to n do begin
if n mod i<>0 then continue;
if n-i=rmp[rank[i+1]] then begin
writeln(n div i);
exit;
end;
end;
end;
begin
while true do begin
readln(s);
if s[1]='.' then break;
into;
work;
end;
end.
kmp版:
var
p:array[0..1000000]of longint;
s:ansistring;
procedure work;
var
i,j,n:longint;
begin
n:=length(s);
p[1]:=0;
for i:=2 to n do begin
j:=p[i-1];
while (j>0) and (s[j+1]<>s[i]) do j:=p[j];
if s[i]=s[j+1] then inc(j);
p[i]:=j;
end;
if n mod (n-p[n])=0 then writeln(n div (n-p[n]))
else writeln(1);
end;
begin
while true do begin
readln(s);
if s[1]='.' then break;
work;
end;
readln;
end.
重复次数最多的连续重复子串(spoj687,pku3693)
两道题区别在于是否输出方案(关于输出方案有个小地方要注意写在程序中了)
见论文……然后我的程序有两个小小的优化,一个是枚举时如果两个后缀连第一个字符都不一样的话就不要算lcp了,另一个是当前答案+1都小于等于最优答案就不要在算啦。
关于poj3693求同样有多少个,一开始是边找最长边记录,后来发现这样慢的要死,所以就直接求出最长后再找符合答案的字符串。
poj3693
var
c,x,y,rank,sa,h,num:array[0..100000]of longint;
f:array[0..100000,0..20]of longint;
n,tot,tt:longint;
s:ansistring;
function min(x,y:longint):longint;
begin
if x<y then exit(x);
exit(y);
end;
procedure swap(var x,y:longint);
var
i:longint;
begin
i:=x;
x:=y;
y:=i;
end;
function lcp(x,y:longint):longint;
var
i:longint;
begin
x:=rank[x];
y:=rank[y];
if x>y then swap(x,y);
if x<y then inc(x);
i:=trunc(ln(y-x+1)/ln(2));
exit(min(f[x,i],f[y-1<<i+1,i]));
end;
function check(x,y:ansistring):boolean;
var
i,j,k:longint;
begin
//writeln(x);
//writeln(y);
k:=length(x);
j:=length(y);
for i:=1 to min(k,j) do begin
if x[i]>y[i] then exit(false);
if x[i]<y[i] then exit(true);
end;
end;
procedure makesa;
var
i,j,p:longint;
begin
p:=1;
while p<n do begin
for i:=1 to p do y[i]:=n-p+i;
j:=p;
for i:=1 to n do
if sa[i]>p then begin
inc(j);
y[j]:=sa[i]-p;
if j=n then break;
end;
for i:=1 to n do x[i]:=rank[y[i]];
fillchar(c,sizeof(c),0);
for i:=1 to n do inc(c[x[i]]);
for i:=1 to tot do inc(c[i],c[i-1]);
for i:=n downto 1 do begin
sa[c[x[i]]]:=y[i];
dec(c[x[i]]);
end;
tot:=1;
x[sa[1]]:=1;
for i:=2 to n do begin
if (rank[sa[i]]<>rank[sa[i-1]]) or (rank[sa[i]+p]<>rank[sa[i-1]+p]) then inc(tot);
x[sa[i]]:=tot;
end;
for i:=1 to n do rank[i]:=x[i];
if tot=n then break;
p:=p<<1;
end;
for i:=1 to n do sa[rank[i]]:=i;
end;
procedure makehi;
var
i,j,last:longint;
begin
h[1]:=0;
last:=0;
for i:=1 to n do begin
if last>1 then dec(last)
else last:=0;
if rank[i]=1 then continue;
j:=sa[rank[i]-1];
while (i+last<=n) and (j+last<=n) and (s[i+last]=s[j+last]) do inc(last);
h[rank[i]]:=last;
end;
end;
procedure first;
var
i,j:longint;
begin
for i:=1 to n do f[i,0]:=h[i];
for j:=1 to trunc(ln(n)/ln(2)) do
for i:=1 to n-1<<j+1 do
f[i,j]:=min(f[i,j-1],f[i+1<<(j-1),j-1]);
end;
procedure into;
var
i:longint;
begin
n:=length(s);
s:=s+'#';
for i:=1 to n do x[i]:=ord(s[i]);
fillchar(c,sizeof(c),0);
for i:=1 to n do inc(c[x[i]]);
for i:=1 to 128 do inc(c[i],c[i-1]);
for i:=n downto 1 do begin
sa[c[x[i]]]:=i;
dec(c[x[i]]);
end;
tot:=1;
rank[sa[1]]:=1;
for i:=2 to n do begin
if x[sa[i]]<>x[sa[i-1]] then inc(tot);
rank[sa[i]]:=tot;
end;
if tot<>n then makesa;
makehi;
first;
end;
procedure work;
var
i,j,k,l,ans,long,more,best,total:longint;
begin
best:=0;
total:=0;
for i:=1 to n-1 do begin
j:=1;
while i+j<=n do begin
if s[j]=s[j+i] then begin
long:=lcp(j,j+i);
ans:=long div i+1;
if (ans+1>=best) and (ans>1) then begin
if (long mod i<>0) then begin
more:=i-long mod i;
if (j-more>0) and (lcp(j-more,j+i-more)>=more) then inc(ans);
end;
if ans>best then begin
best:=ans;
total:=0;
end;
if ans=best then begin
inc(total);
num[total]:=i;
end;
end;
end;
inc(j,i);
end;
end;
for i:=1 to n do //一定要再找一次,不能直接在上面算最长时计算,反例babababaccaccaccaccac,上面找到的长度是caccaccaccac(构造了好久出来的反例),这样字典序并不是最小,所以上面找到的字符串只是长度最长,而没有包括所以情况
for j:=1 to total do begin
k:=num[j];
if lcp(sa[i],sa[i]+k)>=(best-1)*k then begin
write('Case ',tt,': ');
for l:=sa[i] to sa[i]+best*k -1do write(s[l]);
writeln;
exit;
end;
end;
end;
begin
tt:=0;
while true do begin
readln(s);
if s[1]='#' then break;
inc(tt);
into;
work;
end;
end.
spoj687
var
x,y,rank,sa,h,c:array[0..50000]of longint;
f:array[0..50000,0..15]of longint;
ft:array[0..50000]of longint;
n,tt,tot:longint;
s:array[0..50000]of char;
function min(x,y:longint):longint;
begin
if x<y then exit(x);
exit(y)
end;
procedure swap(var x,y:longint);
var
i:longint;
begin
i:=x;
x:=y;
y:=i;
end;
function lcp(x,y:longint):longint;
var
i:longint;
begin
x:=rank[x];
y:=rank[y];
if x>y then swap(x,y);
inc(x);
i:=ft[y-x+1];
exit(min(f[x,i],f[y-1<<i+1,i]))
end;
procedure makesa;
var
i,j,p:longint;
begin
p:=1;
while p<n do begin
for i:=1 to p do y[i]:=n-p+i;
j:=p;
for i:=1 to n do
if sa[i]>p then begin
inc(j);
y[j]:=sa[i]-p;
if j=n then break;
end;
for i:=1 to n do x[i]:=rank[y[i]];
fillchar(c,sizeof(c),0);
for i:=1 to n do inc(c[x[i]]);
for i:=1 to tot do inc(c[i],c[i-1]);
for i:=n downto 1 do begin
sa[c[x[i]]]:=y[i];
dec(c[x[i]]);
end;
tot:=1;
x[sa[1]]:=1;
for i:=2 to n do begin
if (rank[sa[i]]<>rank[sa[i-1]]) or (rank[sa[i]+p]<>rank[sa[i-1]+p]) then inc(tot);
x[sa[i]]:=tot;
end;
for i:=1 to n do rank[i]:=x[i];
if tot=n then break;
p:=p<<1;
end;
for i:=1 to n do sa[rank[i]]:=i;
end;
procedure makeheigh;
var
i,j,last:longint;
begin
last:=0;
h[1]:=0;
for i:=1 to n do begin
if last>1 then dec(last)
else last:=0;
if rank[i]=1 then continue;
j:=sa[rank[i]-1];
while (i+last<=n) and (j+last<=n) and (s[i+last]=s[j+last]) do inc(last);
h[rank[i]]:=last;
end;
end;
procedure first;
var
i,j,k:longint;
begin
for i:=1 to n do f[i,0]:=h[i];
k:=ft[n];
for i:=1 to k do
for j:=1 to n-1<<i+1 do
f[j,i]:=min(f[j,i-1],f[j+1<<(i-1),i-1]);
end;
procedure into;
var
i:longint;
begin
readln(n);
for i:=1 to n do readln(s[i]);
for i:=1 to n do x[i]:=ord(s[i]);
fillchar(c,sizeof(c),0);
for i:=1 to n do inc(c[x[i]]);
for i:=1 to 128 do inc(c[i],c[i-1]);
for i:=n downto 1 do begin
sa[c[x[i]]]:=i;
dec(c[x[i]]);
end;
tot:=1;
rank[sa[1]]:=1;
for i:=2 to n do begin
if x[sa[i]]<>x[sa[i-1]] then inc(tot);
rank[sa[i]]:=tot;
end;
if tot<>n then makesa;
makeheigh;
first;
end;
procedure work;
var
i,j,k,best,long,ans,more:longint;
begin
best:=1;
for i:=1 to n-1 do begin
j:=1;
while i+j<=n do begin
if s[j]=s[i+j] then begin
long:=lcp(j,j+i);
ans:=long div i+1;
if (ans+1>=best) and (ans>1) then begin
if long mod i<>0 then begin
more:=i-long mod i;
if (j-more>0) and (lcp(j-more,j+i-more)>=more) then inc(ans);
end;
if ans>best then best:=ans;
end;
end;
inc(j,i);
end;
end;
writeln(best);
end;
begin
for tt:=1 to 50000 do ft[tt]:=trunc(ln(tt)/ln(2));
readln(tt);
while tt>0 do begin
dec(tt);
into;
work;
end
end.
2.3两个字符串的相关问题
2.3.1公共子串
最长公共子串(pku2774,ural1517)
求两个字符串的最长公共子串。连接在一起后找排名相邻且属不同串的h[]的最大值。
pku2774
var
x,y,rank,sa,h,c:array[0..200300]of longint;
s,s1,s2:ansistring;
n,tot,n1:longint;
procedure makesa;
var
i,j,p:longint;
begin
p:=1;
while p<n do begin
for i:=1 to p do y[i]:=n-p+i;
j:=p;
for i:=1 to n do
if sa[i]>p then begin
inc(j);
y[j]:=sa[i]-p;
if j=n then break;
end;
for i:=1 to n do x[i]:=rank[y[i]];
fillchar(c,sizeof(c),0);
for i:=1 to n do inc(c[x[i]]);
for i:=1 to tot do inc(c[i],c[i-1]);
for i:=n downto 1 do begin
sa[c[x[i]]]:=y[i];
dec(c[x[i]]);
end;
tot:=1;
x[sa[1]]:=1;
for i:=2 to n do begin
if (rank[sa[i]]<>rank[sa[i-1]]) or (rank[sa[i]+p]<>rank[sa[i-1]+p]) then inc(tot);
x[sa[i]]:=tot;
end;
for i:=1 to n do rank[i]:=x[i];
if tot=n then break;
p:=p<<1;
end;
for i:=1 to n do sa[rank[i]]:=i;
end;
procedure makeheight;
var
i,j,last:longint;
begin
last:=0;
h[1]:=0;
for i:=1 to n do begin
if last>1 then dec(last)
else last:=0;
if rank[i]=1 then continue;
j:=sa[rank[i]-1];
while {(i+last<=n) and (j+last<=n) and} (s[i+last]=s[j+last])do inc(last);
h[rank[i]]:=last;
end;
end;
procedure into;
var
i:longint;
begin
readln(s1);
readln(s2);
s:=s1+'$'+s2+'*';
n:=length(s);
n1:=length(s1);
for i:=1 to n do x[i]:=ord(s[i]);
fillchar(c,sizeof(c),0);
for i:=1 to n do inc(c[x[i]]);
for i:=1 to 128 do inc(c[i],c[i-1]);
for i:=n downto 1 do begin
sa[c[x[i]]]:=i;
dec(c[x[i]]);
end;
tot:=1;
rank[sa[1]]:=1;
for i:=2 to n do begin
if x[sa[i]]<>x[sa[i-1]] then inc(tot);
rank[sa[i]]:=tot;
end;
if tot<>n then makesa;
// for i:=1 to n do write(sa[i],' ');writeln;
// for i:=1 to n do write(rank[i],' ');writeln;
makeheight;
// for i:=1 to n do write(h[i],' ');writeln;
end;
procedure work;
var
i,max:longint;
begin
max:=0;
for i:=2 to n do
if ( not ((sa[i]<=n1) xor (sa[i-1]>n1)) ) and (h[i]>max) then max:=h[i];
writeln(max);
end;
begin
into;
work;
end.
ural1517
var
x,y,rank,sa,c,h:array[0..200300]of longint;
n,tot,n1:longint;
s,s1,s2:ansistring;
procedure makesa;
var
i,j,p:longint;
begin
p:=1;
while p<n do begin
for i:=1 to p do y[i]:=n-p+i;
j:=p;
for i:=1 to n do
if sa[i]>p then begin
inc(j);
y[j]:=sa[i]-p;
if j=n then break;
end;
for i:=1 to n do x[i]:=rank[y[i]];
fillchar(c,sizeof(c),0);
for i:=1 to n do inc(c[x[i]]);
for i:=1 to tot do inc(c[i],c[i-1]);
for i:=n downto 1 do begin
sa[c[x[i]]]:=y[i];
dec(c[x[i]]);
end;
tot:=1;
x[sa[1]]:=1;
for i:=2 to n do begin
if (rank[sa[i]]<>rank[sa[i-1]]) or (rank[sa[i]+p]<>rank[sa[i-1]+p]) then inc(tot);
x[sa[i]]:=tot;
end;
for i:=1 to n do rank[i]:=x[i];
if tot=n then break;
p:=p<<1;
end;
for i:=1 to n do sa[rank[i]]:=i;
end;
procedure makeheight;
var
i,j,last:longint;
begin
h[1]:=0;
last:=0;
for i:=1 to n do begin
if last>1 then dec(last)
else last:=0;
if rank[i]=1 then continue;
j:=sa[rank[i]-1];
while s[i+last]=s[j+last] do inc(last);
h[rank[i]]:=last;
end;
end;
procedure into;
var
i:longint;
begin
readln(n1);
readln(s1);
readln(s2);
s:=s1+'$'+s2+'%';
n:=n1<<1+2;
for i:=1 to n do x[i]:=ord(s[i]);
fillchar(c,sizeof(c),0);
for i:=1 to n do inc(c[x[i]]);
for i:=1 to 128 do inc(c[i],c[i-1]);
for i:=n downto 1 do begin
sa[c[x[i]]]:=i;
dec(c[x[i]]);
end;
tot:=1;
rank[sa[1]]:=1;
for i:=2 to n do begin
if x[sa[i]]<>x[sa[i-1]] then inc(tot);
rank[sa[i]]:=tot;
end;
if tot<>n then makesa;
makeheight;
end;
procedure work;
var
max,top,i:longint;
begin
max:=0;
top:=0;
for i:=2 to n do
if not ((sa[i]<=n1) xor (sa[i-1]>n1)) then
if h[i]>max then begin
max:=h[i];
top:=sa[i];
end;
writeln(copy(s,top,max));
end;
begin
into;
work;
end.
2.3.2子串的个数
长度不小于 k 的公共子串的个数(pku3415)
论文里面没有写清楚。
蔡大神找到不错的题解http://www.cnblogs.com/EC-Ecstasy/p/4174671.html,做法是用容斥做。
后来我跪论文提供的程序,写出了用b求a用a求b的。
无论是两种,都是得需要单调栈维护。
就是把单调栈里面大于h[i]的都变成h[i]。
写法一:
var
x,y,sa,saa,rank,h,p,c:array[0..200333]of longint;
n,tot,kk:longint;
s1,s2,s:ansistring;
procedure makesa;
var
i,j,p:longint;
begin
p:=1;
while p<n do begin
for i:=1 to p do y[i]:=n-p+i;
j:=p;
for i:=1 to n do
if sa[i]>p then begin
inc(j);
y[j]:=sa[i]-p;
if j=n then break;
end;
for i:=1 to n do x[i]:=rank[y[i]];
fillchar(c,sizeof(c),0);
for i:=1 to n do inc(c[x[i]]);
for i:=1 to tot do inc(c[i],c[i-1]);
for i:=n downto 1 do begin
sa[c[x[i]]]:=y[i];
dec(c[x[i]]);
end;
tot:=1;
x[sa[1]]:=1;
for i:=2 to n do begin
if (rank[sa[i]]<>rank[sa[i-1]]) or (rank[sa[i]+p]<>rank[sa[i-1]+p]) then inc(tot);
x[sa[i]]:=tot;
end;
for i:=1 to n do rank[i]:=x[i];
if tot=n then break;
p:=p<<1;
end;
for i:=1 to n do sa[rank[i]]:=i;
end;
procedure into;
var
i:longint;
begin
readln(s1);
readln(s2);
s:=s1+'$'+s2;
fillchar(c,sizeof(c),0);
n:=length(s);
for i:=1 to n do x[i]:=ord(s[i]);
for i:=1 to n do inc(c[x[i]]);
for i:=1 to 128 do inc(c[i],c[i-1]);
for i:=n downto 1 do begin
sa[c[x[i]]]:=i;
dec(c[x[i]]);
end;
tot:=1;
rank[sa[1]]:=1;
for i:=1 to n do begin
if x[sa[i]]<>x[sa[i-1]] then inc(tot);
rank[sa[i]]:=tot;
end;
if tot<>n then makesa;
end;
procedure makeheight;
var
i,j,last:longint;
begin
last:=0;
h[1]:=0;
for i:=1 to n do begin
if last>1 then dec(last)
else last:=0;
if rank[i]=1 then continue;
j:=sa[rank[i]-1];
while (i+last<=n) and (j+last<=n) and (s[i+last]=s[j+last]) do inc(last);
h[rank[i]]:=last;
end;
end;
function calc:int64;
var
i,ht,more,top,m:longint;
ans:int64;
begin
for i:=1 to n do rank[sa[i]]:=i;
makeheight;
//for i:=1 to n do write(h[i],' ');writeln;
ans:=0;
h[0]:=kk-1;
h[n+1]:=kk-1;
top:=0;
i:=1;
p[0]:=0;
while i<=n+1 do begin
ht:=h[p[top]];
if ((h[i]<kk) and (top=0)) or (h[i]=ht) then inc(i)
else
if h[i]>ht then begin
inc(top);
p[top]:=i;
inc(i);
end
else begin
m:=i-p[top]+1;
if (h[i]>=kk) and (h[i]>h[p[top-1]]) then begin
more:=ht-h[i];
h[p[top]]:=h[i];
end
else begin
more:=ht-h[p[top-1]];
dec(top);
end;
inc(ans,int64(m)*(m-1)>>1*more);
end;
end;
exit(ans);
end;
procedure work;
var
sum,i,j:longint;
ans1,ans2:int64;
begin
sum:=n;
for i:=1 to n do saa[i]:=sa[i];
ans1:=calc;
s:=s1;
n:=length(s1);
j:=0;
for i:=1 to sum do
if saa[i]<=n then begin
inc(j);
sa[j]:=saa[i];
if j=n then break;
end;
ans2:=calc;
s:=s2;
n:=length(s2);
j:=0;
for i:=1 to sum do
if saa[i]>sum-n then begin
inc(j);
sa[j]:=saa[i]-(sum-n);
if j=n then break;
end;
writeln(ans1-ans2-calc);
end;
begin
while true do begin
readln(kk);
if kk=0 then break;
into;
work;
end;
end.
写法二:
var
x,y,p,rank,sa,h,c,num1,num2:array[0..200333]of longint;
n,tot,n1,kk:longint;
s,s1,s2:ansistring;
procedure makeheight;
var
i,j,last:longint;
begin
last:=0;
h[1]:=0;
for i:=1 to n do begin
if last>1 then dec(last)
else last:=0;
if rank[i]=1 then continue;
j:=sa[rank[i]-1];
while (i+last<=n) and (j+last<=n) and (s[i+last]=s[j+last]) do inc(last);
h[rank[i]]:=last;
end;
end;
procedure makesa;
var
i,j,p:longint;
begin
p:=1;
while p<n do begin
for i:=1 to p do y[i]:=n-p+i;
j:=p;
for i:=1 to n do
if sa[i]>p then begin
inc(j);
y[j]:=sa[i]-p;
if j=n then break;
end;
for i:=1 to n do x[i]:=rank[y[i]];
fillchar(c,sizeof(c),0);
for i:=1 to n do inc(c[x[i]]);
for i:=1 to tot do inc(c[i],c[i-1]);
for i:=n downto 1 do begin
sa[c[x[i]]]:=y[i];
dec(c[x[i]]);
end;
tot:=1;
x[sa[1]]:=1;
for i:=2 to n do begin
if (rank[sa[i]]<>rank[sa[i-1]]) or (rank[sa[i]+p]<>rank[sa[i-1]+p]) then inc(tot);
x[sa[i]]:=tot;
end;
for i:=1 to n do rank[i]:=x[i];
if tot=n then break;
p:=p<<1;
end;
for i:=1 to n do sa[rank[i]]:=i;
end;
procedure into;
var
i:longint;
begin
readln(s1);
readln(s2);
s:=s1+'$'+s2;
n1:=length(s1);
n:=length(s);
for i:=1 to n do x[i]:=ord(s[i]);
fillchar(c,sizeof(c),0);
for i:=1 to n do inc(c[x[i]]);
for i:=1 to 128 do inc(c[i],c[i-1]);
for i:=n downto 1 do begin
sa[c[x[i]]]:=i;
dec(c[x[i]]);
end;
tot:=1;
rank[sa[1]]:=1;
for i:=2 to n do begin
if x[sa[i]]<>x[sa[i-1]] then inc(tot);
rank[sa[i]]:=tot;
end;
if tot<>n then makesa;
makeheight;
end;
procedure work;
var
i,top,tota,totb:longint;
ans,suma,sumb:int64;
begin
for i:=1 to n do
if h[i]>=kk then dec(h[i],kk-1)
else h[i]:=0;
h[n+1]:=0;
top:=0;
suma:=0;
sumb:=0;
ans:=0;
p[0]:=0;
for i:=2 to n do begin
if sa[i]<=n1 then ans:=sumb+ans
else ans:=suma+ans;
if h[i+1]<1 then begin
top:=0;
suma:=0;
sumb:=0;
continue;
end;
tota:=0;
totb:=0;
while (h[i+1]<h[p[top]]) and (top>0) do begin
suma:=suma-int64(h[p[top]]-h[i+1])*num1[top];
sumb:=sumb-int64(h[p[top]]-h[i+1])*num2[top];
tota:=tota+num1[top];
totb:=totb+num2[top];
dec(top);
end;
if sa[i]<=n1 then begin
inc(tota);
suma:=suma+h[i+1];
end
else begin
inc(totb);
sumb:=sumb+h[i+1];
end;
if (h[i+1]=h[p[top]]) and (top>0) then begin
inc(num1[top],tota);
inc(num2[top],totb);
end
else begin
inc(top);
num1[top]:=tota;
num2[top]:=totb;
p[top]:=i+1;
end;
end;
writeln(ans);
end;
begin
while true do begin
readln(kk);
if kk=0 then break;
into;
work;
end;
end.
记得用int64!!!!
2.4多个字符串
跟两个字符串一样的做法,用不同串直接隔开。
然后多个字符串有两个优化,一个是先给每个字符串都染色颜色,这样之后容易处理;另一个是很神的优化(跪kmp大神代码学到的!),算每个字符串最多延长多长(就是到字符串末尾的距离),最后算出h[]后在跟d[sa[i]]和d[sa[i-1]]中保存最小的,这样就巧妙的去掉很多把连接符也算入答案的错误!!
不小于 k 个字符串中的最长子串(pku3294)
其实和求单个差不多。染色,height[]数组分组,统计颜色就行了。
(第二优化后直接从2400+ms变成400+ms)
var
x,y,c,rank,sa,h,p,col,d:array[0..100300]of longint;
chose:array[0..200]of longint;
s,s1:ansistring;
n,total,tot,nn,kk,time,right:longint;
function min(x,y:longint):longint;
begin
if x<y then exit(x);
exit(y);
end;
procedure makesa;
var
i,j,p:longint;
begin
p:=1;
while p<n do begin
for i:=1 to p do y[i]:=n-p+i;
j:=p;
for i:=1 to n do
if sa[i]>p then begin
inc(j);
y[j]:=sa[i]-p;
if j=n then break;
end;
for i:=1 to n do x[i]:=rank[y[i]];
fillchar(c,sizeof(c),0);
for i:=1 to n do inc(c[x[i]]);
for i:=1 to tot do inc(c[i],c[i-1]);
for i:=n downto 1 do begin
sa[c[x[i]]]:=y[i];
dec(c[x[i]]);
end;
tot:=1;
x[sa[1]]:=1;
for i:=2 to n do begin
if (rank[sa[i]]<>rank[sa[i-1]]) or (rank[sa[i]+p]<>rank[sa[i-1]+p]) then inc(tot);
x[sa[i]]:=tot;
end;
for i:=1 to n do rank[i]:=x[i];
if tot=n then break;
p:=p<<1;
end;
for i:=1 to n do sa[rank[i]]:=i;
end;
procedure makeheight;
var
i,j,last:longint;
begin
h[1]:=0;
last:=0;
for i:=1 to n do begin
if last>1 then dec(last)
else last:=0;
if rank[i]=1 then continue;
j:=sa[rank[i]-1];
while s[i+last]=s[j+last] do inc(last);
h[rank[i]]:=last;
end;
end;
function check(x:longint):boolean;
var
all,i,j,k,tot:longint;
flag:boolean;
begin
i:=2;
all:=0;
while i<=n do begin
while (h[i]<x) and (i<=n) do inc(i);
j:=i;
tot:=0;
inc(time);
chose[col[sa[i-1]]]:=time;
while (h[j]>=x) and (j<=n) do begin
if (chose[col[sa[j]]]<time) then begin
chose[col[sa[j]]]:=time;
inc(tot);
end;
inc(j);
end;
if tot>=kk then begin
inc(all);
p[all]:=sa[i];
end;
i:=j+1;
end;
if all>0 then begin
total:=all;
exit(true);
end;
exit(false);
end;
procedure into;
var
i,sum,n1,j:longint;
begin
total:=0;
kk:=nn>>1;
right:=maxlongint;
sum:=0;
s:='';
for i:=1 to nn do begin
readln(s1);
s:=s+s1+'$';
n1:=length(s1);
if right>n1 then right:=n1;
for j:=1 to n1 do begin
col[sum+j]:=i;
d[sum+j]:=n1-j+1;
end;
sum:=sum+n1+1;
col[sum]:=0;
end;
s:=s+'#';
n:=sum+1;
col[n]:=0;
for i:=1 to n do x[i]:=ord(s[i]);
fillchar(c,sizeof(c),0);
for i:=1 to n do inc(c[x[i]]);
for i:=1 to 128 do inc(c[i],c[i-1]);
for i:=n downto 1 do begin
sa[c[x[i]]]:=i;
dec(c[x[i]]);
end;
tot:=1;
rank[sa[1]]:=1;
for i:=2 to n do begin
if x[sa[i]]<>x[sa[i-1]] then inc(tot);
rank[sa[i]]:=tot;
end;
if tot<>n then makesa;
makeheight;
for i:=2 to n do h[i]:=min(h[i],min(d[sa[i]],d[sa[i-1]]));
//for i:=2 to n do writeln(h[i]);
end;
procedure work;
var
left,mid,i,j:longint;
begin
//for i:=1 to n do writeln(copy(s,sa[i],n-sa[i]+1));
left:=0;
inc(right);
while left+1<right do begin
mid:=(left+right)>>1;
if check(mid) then left:=mid
else right:=mid
end;
if left=0 then writeln('?')
else
for i:=1 to total do
writeln(copy(s,p[i],left))
end;
begin
time:=0;
while true do begin
readln(nn);
if nn=0 then break;
into;
work;
writeln;
end
end.
每个字符串至少出现两次且不重叠的最长子串(spoj220)
都写到这道题了,应该知道怎么写了吧?!做饭和上面类似
const
mm=10000000;
var
x,y,rank,sa,h,c,d,col:array[0..200000]of longint;
num1,num2:array[0..100]of longint;
n,tot,sum,right,tt:longint;
s,s1:ansistring;
function min(x,y:longint):longint;
begin
if x<y then exit(x);
exit(y);
end;
function check(x:longint):boolean;
var
i,j:longint;
flag:boolean;
begin
for j:=1 to sum do begin
num1[j]:=mm;
num2[j]:=-mm;
end;
for i:=sum+2 to n do begin
j:=sa[i];
if col[j]<>0 then begin
if j<num1[col[j]] then num1[col[j]]:=j;
if j>num2[col[j]] then num2[col[j]]:=j;
end;
if i=n then break;
if h[i+1]<x then begin
flag:=true;
for j:=1 to sum do
if (num1[j]=mm) or (num2[j]-num1[j]<x) then begin
flag:=false;
break;
end;
if flag then exit(true);
for j:=1 to sum do begin
num1[j]:=mm;
num2[j]:=-mm;
end;
end;
end;
flag:=true;
for j:=1 to sum do
if (num1[j]=mm) or (num2[j]-num1[j]<x) then begin
flag:=false;
break;
end;
if flag then exit(true);
exit(false);
end;
procedure makesa;
var
i,j,p:longint;
begin
p:=1;
while p<n do begin
for i:=1 to p do y[i]:=n-p+i;
j:=p;
for i:=1 to n do
if sa[i]>p then begin
inc(j);
y[j]:=sa[i]-p;
if j=n then break;
end;
for i:=1 to n do x[i]:=rank[y[i]];
fillchar(c,sizeof(c),0);
for i:=1 to n do inc(c[x[i]]);
for i:=1 to tot do inc(c[i],c[i-1]);
for i:=n downto 1 do begin
sa[c[x[i]]]:=y[i];
dec(c[x[i]]);
end;
tot:=1;
x[sa[1]]:=1;
for i:=2 to n do begin
if (rank[sa[i]]<>rank[sa[i-1]]) or (rank[sa[i]+p]<>rank[sa[i-1]+p]) then inc(tot);
x[sa[i]]:=tot;
end;
for i:=1 to n do rank[i]:=x[i];
if tot=n then break;
p:=p<<1;
end;
for i:=1 to n do sa[rank[i]]:=i;
end;
procedure makeheight;
var
i,j,last:longint;
begin
h[1]:=0;
last:=0;
for i:=1 to n do begin
if last>0 then dec(last)
else last:=0;
if rank[i]=1 then continue;
j:=sa[rank[i]-1];
while s[j+last]=s[i+last] do inc(last);
h[rank[i]]:=last;
end;
end;
procedure into;
var
i,n1,j:longint;
begin
readln(sum);
n:=0;
s:='';
right:=maxlongint;
for j:=1 to sum do begin
readln(s1);
n1:=length(s1);
if n1<right then right:=n1;
s:=s+s1+'$';
for i:=1 to n1 do begin
col[n+i]:=j;
d[n+i]:=n1-i+1;
end;
n:=n+n1+1;
col[n]:=0;
d[n]:=0;
end;
inc(n);
col[n]:=0;
d[n]:=0;
s:=s+'#';
for i:=1 to n do x[i]:=ord(s[i]);
fillchar(c,sizeof(c),0);
for i:=1 to n do inc(c[x[i]]);
for i:=1 to 128 do inc(c[i],c[i-1]);
for i:=n downto 1 do begin
sa[c[x[i]]]:=i;
dec(c[x[i]]);
end;
tot:=1;
rank[sa[1]]:=1;
for i:=2 to n do begin
if x[sa[i]]<>x[sa[i-1]] then inc(tot);
rank[sa[i]]:=tot;
end;
if tot<>n then makesa;
makeheight;
for i:=2 to n do h[i]:=min(h[i],min(d[sa[i]],d[sa[i-1]]));
// for i:=1 to n do writeln(h[i],' ',copy(s,sa[i],n-sa[i]+1));
end;
procedure work;
var
left,mid:longint;
begin
left:=0;
while left+1<right do begin
mid:=(left+right)>>1;
if check(mid) then left:=mid
else right:=mid;
end;
writeln(left);
end;
begin
readln(tt);
while tt>0 do begin
dec(tt);
into;
work;
end;
end.
出现或反转后出现在每个字符串中的最长子串(pku1226)
要反转就跟着再反转一次。然后没了。
var
x,y,rank,c,h,d,sa,col:array[0..200300]of longint;
chose:array[0..200]of longint;
n,tot,time,tt,sum,right:longint;
s,s1:ansistring;
function min(x,y:longint):longint;
begin
if x<y then exit(x);
exit(y);
end;
procedure makesa;
var
i,j,p:longint;
begin
p:=1;
while p<n do begin
for i:=1 to p do y[i]:=n-p+i;
j:=p;
for i:=1 to n do
if sa[i]>p then begin
inc(j);
y[j]:=sa[i]-p;
if j=n then break;
end;
for i:=1 to n do x[i]:=rank[y[i]];
fillchar(c,sizeof(c),0);
for i:=1 to n do inc(c[x[i]]);
for i:=1 to tot do inc(c[i],c[i-1]);
for i:=n downto 1 do begin
sa[c[x[i]]]:=y[i];
dec(c[x[i]]);
end;
tot:=1;
x[sa[1]]:=1;
for i:=2 to n do begin
if (rank[sa[i]]<>rank[sa[i-1]]) or (rank[sa[i]+p]<>rank[sa[i-1]+p]) then inc(tot);
x[sa[i]]:=tot;
end;
for i:=1 to n do rank[i]:=x[i];
if tot=n then break;
p:=p<<1;
end;
for i:=1 to n do sa[rank[i]]:=i;
end;
procedure makeheight;
var
i,j,last:longint;
begin
last:=0;
h[1]:=0;
for i:=1 to n do begin
if last>1 then dec(last)
else last:=0;
if rank[i]=1 then continue;
j:=sa[rank[i]-1];
while s[j+last]=s[i+last] do inc(last);
h[rank[i]]:=last;
end;
end;
function check(x:longint):boolean;
var
i,all:longint;
begin
inc(time);
all:=0;
for i:=sum<<1+2 to n-1 do begin
if chose[col[sa[i]]]<time then begin
chose[col[sa[i]]]:=time;
inc(all);
if all=sum then exit(true);
end;
if h[i+1]<x then begin
inc(time);
all:=0;
end;
end;
exit(false);
end;
procedure into;
var
i,j,n1:longint;
begin
readln(sum);
s:='';
n:=0;
right:=maxlongint;
for j:=1 to sum do begin
readln(s1);
n1:=length(s1);
if n1<right then right:=n1;
s1:=s1+'@';
d[n1+1]:=0;
col[n1+1]:=0;
for i:=1 to n1 do begin
s1:=s1+s1[n1-i+1];
d[n+i]:=n1-i+1;
d[n+n1+i+1]:=n1-i+1;
col[n+i]:=j;
col[n+n1++1+i]:=j;
end;
n:=n+n1<<1+2;
s:=s+s1+'#';
col[n]:=0;
d[n]:=0;
end;
s:=s+'$';
inc(n);
for i:=1 to n do x[i]:=ord(s[i]);
fillchar(c,sizeof(c),0);
for i:=1 to n do inc(c[x[i]]);
for i:=1 to 128 do inc(c[i],c[i-1]);
for i:=n downto 1 do begin
sa[c[x[i]]]:=i;
dec(c[x[i]]);
end;
tot:=1;
rank[sa[1]]:=1;
for i:=2 to n do begin
if x[sa[i]]<>x[sa[i-1]] then inc(tot);
rank[sa[i]]:=tot;
end;
if tot<>n then makesa;
makeheight;
for i:=1 to n do h[i]:=min(h[i],min(d[sa[i]],d[sa[i-1]]));
//for i:=1 to n do writeln(h[i],' ',copy(s,sa[i],n-sa[i]+1));
end;
procedure work;
var
left,mid:longint;
begin
inc(right);
left:=0;
while left+1<right do begin
mid:=(left+right)>>1;
if check(mid) then left:=mid
else right:=mid;
end;
writeln(left)
end;
begin
readln(tt);
while tt>0 do begin
dec(tt);
into;
work;
end
end.
========================
暂时告一段落吧,完结撒花!!2014.1.23