后缀数组

追随蔡大神的脚步,开始后缀数组的学习。

http://www.cnblogs.com/EC-Ecstasy/

//时间不够不定时不定期完善

 

一、后缀数组的定义

   把一个字符串的后缀全部搞出来,比如“aabaaaab”的后缀就是"aabaaaab”,“abaaaab”,“baaaab”,“aaaab”,“aaab”,“aab”,“ab”,“b”,分别编号为1,2,3,4,5,6,7,8。

   然后就有两个数组,一个是rank[],一个是sa[]。rank[i]表示第i个后缀排在第几名,sa[i]表示排第i名是哪个后缀。显然这两个数组为逆运算。(sa[rank[i]]=i,rank[sa[i]]=i)

 

基排倍增写法。

每次倍增,分两个关键字。

模版1(远古写法)

var
  s:ansistring;
  n,tot:longint;
  c,x,y,rank,sa:array[0..1000]of longint;

procedure first;
var
  i:longint;
begin
  readln(s);
  n:=length(s);
  for i:=1 to n do x[i]:=ord(s[i]);
  fillchar(c,sizeof(c),0);
  for i:=1 to n do inc(c[x[i]]);
  for i:=1 to 128 do inc(c[i],c[i-1]);
  for i:=1 to n do begin
    sa[c[x[i]]]:=i;
    dec(c[x[i]]);
  end;
  tot:=1;
  rank[sa[1]]:=1;
  for i:=2 to n do begin
    if x[sa[i]]<>x[sa[i-1]] then inc(tot);
    rank[sa[i]]:=tot;
  end;
end;


procedure calcsa;
var
  i,p:longint;
begin
  p:=1;
  while p<n do begin
    for i:=1 to n-p do y[i]:=rank[i+p];
    for i:=n-p+1 to n do y[i]:=0;
    fillchar(c,sizeof(c),0);
    for i:=1 to n do inc(c[y[i]]);
    for i:=1 to n do inc(c[i],c[i-1]);
    for i:=1 to n do begin
      sa[c[y[i]]]:=i;
      dec(c[y[i]]);
    end;
    for i:=1 to n do x[i]:=rank[i];
    fillchar(c,sizeof(c),0);
    for i:=1 to n do inc(c[x[i]]);
    for i:=1 to n do inc(c[i],c[i-1]);
    for i:=n downto 1 do begin
      y[sa[i]]:=c[x[sa[i]]];
      dec(c[x[sa[i]]]);
    end;
    for i:=1 to n do sa[y[i]]:=i;
    tot:=1;
    rank[sa[1]]:=1;
    for i:=2 to n do begin
      if (x[sa[i]]<>x[sa[i-1]]) or (x[sa[i]+p]<>x[sa[i-1]+p]) then inc(tot);
      rank[sa[i]]:=tot;
    end;
    if tot=n then break;
    p:=p<<1;
  end;
  for i:=1 to n do
    sa[rank[i]]:=i;
  for i:=1 to n do write(sa[i],' ');
  writeln;
  for i:=1 to n do write(rank[i],' ');
end;


begin
  first;
  calcsa;
  readln;
  readln;
end.
View Code

跪了论文后……

模版2(巨快)

var
  s:ansistring;
  n,tot:longint;
  c,x,y,rank,sa:array[0..1000]of longint;


procedure swap(var j,k:longint);
var
  i:longint;
begin
  i:=j;
  j:=k;
  k:=i;
end;

procedure qsort(l,r:longint);
var
  i,j,mid:longint;
begin
  i:=l;
  j:=r;
  mid:=x[(l+r) div 2];
  repeat
    while x[i]<mid do inc(i);
    while x[j]>mid do dec(j);
    if i<=j then begin
      swap(x[i],x[j]);
      swap(y[i],y[j]);
      inc(i);
      dec(j);
    end;
  until i>j;
  if i<r then qsort(i,r);
  if l<j then qsort(l,j);
end;

procedure first;
var
  i:longint;
begin
  readln(s);
  n:=length(s);

  {  //如果n太大用快排据说会快?
  for i:=1 to n do begin
    x[i]:=ord(s[i]);
    y[i]:=i;
  end;
  qsort(1,n);
  for i:=1 to n do sa[i]:=y[i];
  tot:=1;
  rank[sa[1]]:=1;
  for i:=2 to n do begin
    if x[i]<>x[i-1] then inc(tot);
    rank[sa[i]]:=tot;
  end;
  }

  for i:=1 to n do x[i]:=ord(s[i]);
  fillchar(c,sizeof(c),0);
  for i:=1 to n do inc(c[x[i]]);
  for i:=1 to 128 do inc(c[i],c[i-1]);
  for i:=1 to n do begin
    sa[c[x[i]]]:=i;
    dec(c[x[i]]);
  end;
  tot:=1;
  rank[sa[1]]:=1;
  for i:=2 to n do begin
    if x[sa[i]]<>x[sa[i-1]] then inc(tot);
    rank[sa[i]]:=tot;
  end;
end;

procedure calcsa;
var
  i,p,sum:longint;
begin
  p:=1;
  while p<n do begin
    sum:=0;
    for i:=n-p+1 to n do begin
      inc(sum);
      y[sum]:=i;
    end;
    for i:=1 to n do
      if sa[i]>p then begin
        inc(sum);
        y[sum]:=sa[i]-p;
        if sum=n then break;
      end;

    for i:=1 to n do x[i]:=rank[y[i]];
    fillchar(c,sizeof(c),0);
    for i:=1 to n do inc(c[x[i]]);
    for i:=1 to n do inc(c[i],c[i-1]);
    for i:=n downto 1 do begin
      sa[c[x[i]]]:=y[i];
      dec(c[x[i]]);
    end;
    tot:=1;
    x[sa[1]]:=1;
    for i:=2 to n do begin
      if (rank[sa[i]]<>rank[sa[i-1]]) or (rank[sa[i]+p]<>rank[sa[i-1]+p]) then inc(tot);
      x[sa[i]]:=tot;
    end;
    for i:=1 to n do rank[i]:=x[i];
    if tot=n then break;
    p:=p<<1;
  end;
  for i:=1 to n do sa[rank[i]]:=i;
  for i:=1 to n do write(sa[i],' ');
  writeln;
  for i:=1 to n do write(rank[i],' ');
end;


begin
  first;
  calcsa;
  readln;
  readln;
end.
View Code

Staginner大神的博客讲的非常好……以及某个笔记(然后2B了两小时看了这个东西还得再半小时才搞出来了……蒟蒻果然是太弱了)

 

height[i]表示排名i的后缀和前一个后缀从头开始相同字符的个数。

然后有个性质自己看论文。

结论是可以for一遍求。每次last-1(last至少为0),然后去sa[rank[i]-1],然后尽量找(可以的话last++),最后height[rank[i]]:=last。

procedure makeheight;
var
  last,i,j:longint;
begin
  h[1]:=0;
  last:=0;
  for i:=1 to n do begin
    last:=max(last-1,0);
    if rank[i]=1 then continue;
    j:=sa[rank[i]-1];
    while (i+last<=n) and (j+last<=n) and (s[i+last]=s[j+last]) do inc(last);
    h[rank[i]]:=last;
  end;
end;
View Code

 由height[]数组的定义,我们可以用于求最长公共子串(基本上后缀数组的应用都在于这个height[]数组的应用!!)

比如找两个后缀的最长公共子串(这个名称英语缩写叫lcp)那么就直接找到他们在height[]数组的位置,然后两个位置间height[]的最小值就是lcp。那么就可以转成rmp问题来求两个后缀的lcp。时间复杂度为(nlogn)。

预处理
procedure first;
var
  i,j:longint;
begin
  for i:=1 to n do f[i,0]:=h[i];
  for j:=1 to trunc(ln(n)/ln(2)) do
    for i:=1 to n-1<<j+1 do
      f[i,j]:=min(f[i,j-1],f[i+1<<(j-1),j-1]);
end;

询问
function lcp(x,y:longint):longint;
var
  i:longint;
begin
  x:=rank[x];
  y:=rank[y];
  if x>y then swap(x,y);
  if x<y then inc(x);
  i:=trunc(ln(y-x+1)/ln(2));
  exit(min(f[x,i],f[y-1<<i+1,i]));
end;

常数优化:不断取对数运算是有点费时的,所以先预处理出一个ft[]来记录每个数log2后的结果
 for tt:=1 to 50000 do ft[tt]:=trunc(ln(tt)/ln(2));
View Code

 

然后就开始做题了啦啦啦啦

 

2.2.1重复子串

 

可重叠最长重复子串

就直接找height[]的最大值就好了。(不给代码&…………)

 

不可重复的最长重复子串(pku1743)

按height[]分组是height[]数组应用的常用方案。

二分答案。判断一下同一组中sa[]值最大和sa[]最小的差有没有>=答案。

var
  c,x,y,rank,sa,s,h:array[0..203000]of longint;
  n,tot:longint;

function max(x,y:longint):longint;
begin
  if x<y then exit(y);
  exit(x);
end;

function min(x,y:longint):longint;
begin
  if x<y then exit(x);
  exit(y);
end;

procedure makesa;
var
  i,j,p:longint;
begin
  p:=1;
  while p<n do begin
    for i:=1 to p do y[i]:=n-p+i;
    j:=p;
    for i:=1 to n do
      if sa[i]>p then begin
        inc(j);
        y[j]:=sa[i]-p;
        if j=n then break;
      end;
    for i:=1 to n do x[i]:=rank[y[i]];
    for i:=0 to tot do c[i]:=0;
    for i:=1 to n do inc(c[x[i]]);
    for i:=1 to tot do inc(c[i],c[i-1]);
    for i:=n downto 1 do begin
      sa[c[x[i]]]:=y[i];
      dec(c[x[i]]);
    end;
    tot:=1;
    x[sa[1]]:=1;
    for i:=2 to n do begin
      if (rank[sa[i]]<>rank[sa[i-1]]) or (rank[sa[i]+p]<>rank[sa[i-1]+p]) then inc(tot);
      x[sa[i]]:=tot;
    end;
    for i:=1 to n do rank[i]:=x[i];
    if tot=n then break;
    p:=p<<1;
  end;
  for i:=1 to n do
    sa[rank[i]]:=i;
end;

procedure makeheight;
var
  i,j,last:longint;
begin
  h[1]:=0;
  last:=0;
  for i:=1 to n do begin
    last:=max(last-1,0);
    if rank[i]=1 then continue;
    j:=sa[rank[i]-1];
    while (i+last<=n) and (j+last<=n) and (s[i+last]=s[j+last]) do inc(last);
    h[rank[i]]:=last;
  end;
end;

function check(x:longint):boolean;
var
  i,j,ans1,ans2:longint;
begin
  i:=1;
  while i<=n do begin
    ans1:=sa[i];
    ans2:=sa[i];
    j:=i+1;
    while (h[j]>=x) and (j<=n) do begin
      ans1:=max(ans1,sa[j]);
      ans2:=min(ans2,sa[j]);
      inc(j);
    end;
    if ans1-ans2>=x then exit(true);
    i:=j;
  end;
  exit(false);
end;

procedure into;
var
  i,j,k:longint;
begin
  read(j);
  for i:=2 to n do begin
    read(k);
    s[i-1]:=k-j+88;
    j:=k;
  end;
  n:=n-1;
  for i:=1 to n do x[i]:=s[i];
  for i:=1 to 200 do c[i]:=0;
  for i:=1 to n do inc(c[x[i]]);
  for i:=1 to 200 do inc(c[i],c[i-1]);
  for i:=n downto 1 do begin
    sa[c[x[i]]]:=i;
    dec(c[x[i]]);
  end;
  rank[sa[1]]:=1;
  tot:=1;
  for i:=2 to n do begin
    if x[sa[i]]<>x[sa[i-1]] then inc(tot);
    rank[sa[i]]:=tot;
  end;
  if tot<>n then makesa;
  makeheight;
 // for i:=1 to n do write(sa[i],' ');
end;

procedure work;
var
  l,r,mid:longint;
begin
  l:=0;
  r:=n<<1+1;
  while l+1<r do begin
    mid:=(l+r)>>1;
    if check(mid) then l:=mid
      else r:=mid;
  end;
  if l<4 then writeln(0)
    else writeln(l+1);
end;


begin
  while true do begin
    readln(n);
    if n=0 then break;
    into;
    work;
  end;
    readln;
    readln;
end.
View Code

 

可重复k次的最长重复子串(pku3261)

按height[]分组。

二分答案。判断有没有一个组中后缀个数>=答案。

第一次基排写法280+ms

const
  mm=1000000;
var
  x,y,sa,h,rank,s:array[0..50300]of longint;
  c:array[0..mm]of longint;
  n,m,tot:longint;

function max(x,y:longint):longint;
begin
  if x<y then exit(y);
  exit(x)
end;

function check(x:longint):boolean;
var
  i,j:longint;
begin
  i:=1;
  while i<=n do begin
    j:=i+1;
    while (h[j]>=x) and (j<=n) do inc(j);
    if j-i>=m then exit(true);
    i:=j;
  end;
  exit(false)
end;

procedure makeheight;
var
  last,i,j:longint;
begin
  h[1]:=0;
  last:=0;
  for i:=1 to n do begin
    last:=max(last-1,0);
    if rank[i]=1 then continue;
    j:=sa[rank[i]-1];
    while (i+last<=n) and (j+last<=n) and (s[i+last]=s[j+last]) do inc(last);
    h[rank[i]]:=last;
  end;
end;

procedure makesa;
var
  i,j,p:longint;
begin
  p:=1;
  while p<n do begin
    for i:=1 to p do y[i]:=n-p+i;
    j:=p;
    for i:=1 to n do
      if sa[i]>p then begin
        inc(j);
        y[j]:=sa[i]-p;
        if j=n then break;
      end;
    for i:=1 to n do x[i]:=rank[y[i]];
    fillchar(c,sizeof(c),0);
    for i:=1 to n do inc(c[x[i]]);
    for i:=1 to tot do inc(c[i],c[i-1]);
    for i:=n downto 1 do begin
      sa[c[x[i]]]:=y[i];
      dec(c[x[i]]);
    end;
    tot:=1;
    x[sa[1]]:=1;
    for i:=2 to n do begin
      if (rank[sa[i]]<>rank[sa[i-1]]) or (rank[sa[i]+p]<>rank[sa[i-1]+p]) then inc(tot);
      x[sa[i]]:=tot;
    end;
    for i:=1 to n do rank[i]:=x[i];
    if tot=n then break;
    p:=p<<1;
  end;
  for i:=1 to n do sa[rank[i]]:=i;
end;

procedure into;
var
  i:longint;
begin
  readln(n,m);
  for i:=1 to n do read(s[i]);
  for i:=1 to n do x[i]:=s[i];
  fillchar(c,sizeof(c),0);
  for i:=1 to n do inc(c[x[i]]);
  for i:=1 to mm do inc(c[i],c[i-1]);
  for i:=n downto 1 do begin
    sa[c[x[i]]]:=i;
    dec(c[x[i]]);
  end;
  rank[sa[1]]:=1;
  tot:=1;
  for i:=2 to n do begin
    if x[sa[i]]<>x[sa[i-1]] then inc(tot);
    rank[sa[i]]:=tot;
  end;
  if tot<>n then makesa;
  //for i:=1 to n do write(sa[i],' ');
  //writeln;
  makeheight;
  //for i:=1 to n do write(h[i],' ');
end;

procedure work;
var
  l,r,mid:longint;
begin
  l:=0;
  r:=n+1;
  while l+1<r do begin
    mid:=(l+r)>>1;
    if check(mid) then l:=mid
      else r:=mid;
  end;
  writeln(l);
end;

begin
  into;
  work;
  readln;
  readln;
end.
View Code

第一次快排写法60+ms

const
  mm=20000;
var
  x,y,sa,h,rank,s:array[0..50300]of longint;
  c:array[0..mm]of longint;
  n,m,tot:longint;


function max(x,y:longint):longint;
begin
  if x<y then exit(y);
  exit(x)
end;

procedure swap(var j,k:longint);
var
  i:longint;
begin
  i:=j;
  j:=k;
  k:=i;
end;

procedure qsort(l,r:longint);
var
  i,j,mid:longint;
begin
  i:=l;
  j:=r;
  mid:=x[(l+r) div 2];
  repeat
    while x[i]<mid do inc(i);
    while x[j]>mid do dec(j);
    if i<=j then begin
      swap(x[i],x[j]);
      swap(y[i],y[j]);
      inc(i);
      dec(j);
    end;
  until i>j;
  if i<r then qsort(i,r);
  if l<j then qsort(l,j);
end;

function check(x:longint):boolean;
var
  i,j:longint;
begin
  i:=1;
  while i<=n do begin
    j:=i+1;
    while (h[j]>=x) and (j<=n) do inc(j);
    if j-i>=m then exit(true);
    i:=j;
  end;
  exit(false)
end;

procedure makeheight;
var
  last,i,j:longint;
begin
  h[1]:=0;
  last:=0;
  for i:=1 to n do begin
    last:=max(last-1,0);
    if rank[i]=1 then continue;
    j:=sa[rank[i]-1];
    while (i+last<=n) and (j+last<=n) and (s[i+last]=s[j+last]) do inc(last);
    h[rank[i]]:=last;
  end;
end;

procedure makesa;
var
  i,j,p:longint;
begin
  p:=1;
  while p<n do begin
    for i:=1 to p do y[i]:=n-p+i;
    j:=p;
    for i:=1 to n do
      if sa[i]>p then begin
        inc(j);
        y[j]:=sa[i]-p;
        if j=n then break;
      end;
    for i:=1 to n do x[i]:=rank[y[i]];
    fillchar(c,sizeof(c),0);
    for i:=1 to n do inc(c[x[i]]);
    for i:=1 to tot do inc(c[i],c[i-1]);
    for i:=n downto 1 do begin
      sa[c[x[i]]]:=y[i];
      dec(c[x[i]]);
    end;
    tot:=1;
    x[sa[1]]:=1;
    for i:=2 to n do begin
      if (rank[sa[i]]<>rank[sa[i-1]]) or (rank[sa[i]+p]<>rank[sa[i-1]+p]) then inc(tot);
      x[sa[i]]:=tot;
    end;
    for i:=1 to n do rank[i]:=x[i];
    if tot=n then break;
    p:=p<<1;
  end;
  for i:=1 to n do sa[rank[i]]:=i;
end;

procedure into;
var
  i:longint;
begin
  readln(n,m);
  for i:=1 to n do read(s[i]);
  for i:=1 to n do begin
    x[i]:=s[i];
    y[i]:=i;
  end;
  qsort(1,n);
  for i:=1 to n do sa[i]:=y[i];
  tot:=1;
  rank[sa[1]]:=1;
  for i:=2 to n do begin
    if x[i]<>x[i-1] then inc(tot);
    rank[sa[i]]:=tot;
  end;
 { for i:=1 to n do x[i]:=s[i];
  fillchar(c,sizeof(c),0);
  for i:=1 to n do inc(c[x[i]]);
  for i:=1 to mm do inc(c[i],c[i-1]);
  for i:=n downto 1 do begin
    sa[c[x[i]]]:=i;
    dec(c[x[i]]);
  end;
  rank[sa[1]]:=1;
  tot:=1;
  for i:=2 to n do begin
    if x[sa[i]]<>x[sa[i-1]] then inc(tot);
    rank[sa[i]]:=tot;
  end;      }
  if tot<>n then makesa;
  //for i:=1 to n do write(sa[i],' ');
  //writeln;
  makeheight;
  //for i:=1 to n do write(h[i],' ');
end;

procedure work;
var
  l,r,mid:longint;
begin
  l:=0;
  r:=n+1;
  while l+1<r do begin
    mid:=(l+r)>>1;
    if check(mid) then l:=mid
      else r:=mid;
  end;
  writeln(l);
end;

begin
  into;
  work;
  readln;
  readln;
end.
View Code

也就是如果第一次基排的那个基太大(基本上就是排的是数字而不是字符时)第一次排序时用快排会比基排快。

 

2.2.2子串个数

 

不相同子串的个数(spoj694,spoj705)

论文话:

每个子串一定是某个后缀的前缀,那么原问题等价于求所有后缀之间的不相
同的前缀的个数。如果所有的后缀按照 suffix(sa[1]), suffix(sa[2]),
suffix(sa[3]), …… ,suffix(sa[n])的顺序计算,不难发现,对于每一次新加
进来的后缀 suffix(sa[k]),它将产生 n-sa[k]+1 个新的前缀。但是其中有
height[k]个是和前面的字符串的前缀是相同的。所以 suffix(sa[k])将“贡献”
出 n-sa[k]+1- height[k]个不同的子串。累加后便是原问题的答案。这个做法
的时间复杂度为 O(n)。

两道题就是多组和单组数据的区别罢了。只贴一个

var
  c,x,y,rank,sa,h:array[0..100000]of longint;
  n,t,tot:longint;
  s:ansistring;

function max(x,y:longint):longint;
begin
  if x<y then exit(y);
  exit(x);
end;

procedure makeheight;
var
  last,i,j:longint;
begin
  last:=0;
  for i:=1 to n do begin
    last:=max(last-1,0);
    if rank[i]=1 then continue;
    j:=sa[rank[i]-1];
    while (i+last<=n) and (j+last<=n) and (s[i+last]=s[j+last]) do inc(last);
    h[rank[i]]:=last;
  end;
end;

procedure makesa;
var
  i,j,p:longint;
begin
  p:=1;
  while p<n do begin
    for i:=1 to p do y[i]:=n-p+i;
    j:=p;
    for i:=1 to n do
      if sa[i]>p then begin
        inc(j);
        y[j]:=sa[i]-p;
        if j=n then break;
      end;
    for i:=1 to n do x[i]:=rank[y[i]];
    for i:=1 to tot do c[i]:=0;
    for i:=1 to n do inc(c[x[i]]);
    for i:=1 to tot do inc(c[i],c[i-1]);
    for i:=n downto 1 do begin
      sa[c[x[i]]]:=y[i];
      dec(c[x[i]]);
    end;
    tot:=1;
    x[sa[1]]:=1;
    for i:=2 to n do begin
      if (rank[sa[i]]<>rank[sa[i-1]]) or (rank[sa[i]+p]<>rank[sa[i-1]+p]) then inc(tot);
      x[sa[i]]:=tot;
    end;
    for i:=1 to n do rank[i]:=x[i];
    if tot=n then break;
    p:=p<<1;
  end;
  for i:=1 to n do sa[rank[i]]:=i;
end;

procedure into;
var
  i:longint;
begin
  readln(s);
  n:=length(s);
  for i:=1 to n do x[i]:=ord(s[i]);
  fillchar(c,sizeof(c),0);
  for i:=1 to n do inc(c[x[i]]);
  for i:=1 to 128 do inc(c[i],c[i-1]);
  for i:=n downto 1 do begin
    sa[c[x[i]]]:=i;
    dec(c[x[i]]);
  end;
  tot:=1;
  rank[sa[1]]:=1;
  for i:=2 to n do begin
    if x[sa[i]]<>x[sa[i-1]] then inc(tot);
    rank[sa[i]]:=tot;
  end;
  if tot<>n then makesa;
  makeheight;
end;

procedure work;
var
  ans,i:longint;
begin
  ans:=0;
  for i:=1 to n do inc(ans,n-sa[i]+1-h[i]);
  writeln(ans);
end;

begin
  readln(t);
  while t>0 do begin
    dec(t);
    into;
    work;
  end;
end.
View Code

 

2.2.3回文子串

最长回文子串

(为什么不去写马拉车算法呢?又快又短)

把原串复制一边,中间用一个没出现过的字符隔开。

然后穷举每一位,比如当前是i,找他反转后的位置(n-i+1),然后分奇数和偶数。

奇数的话,那么就是suffix(i)和suffix(n-i+1)的lcp。长度:lcp<<1-1;

如果是偶数,那么就是suffix(i)和suffix(n-i+2)的lcp。长度:lcp<<1

var
  x,y,sa,c,rank,h:array[0..3000]of longint;
  f:array[0..3000,0..15]of longint;
  n,tot:longint;
  s,s1:ansistring;

function min(x,y:longint):longint;
begin
  if x<y then exit(x);
  exit(y);
end;

function max(x,y:longint):longint;
begin
  if x<y then exit(y);
  exit(x);
end;

procedure swap(var x,y:longint);
var
  i:longint;
begin
  i:=x;
  x:=y;
  y:=i;
end;

procedure first;
var
  i,j,k:longint;
  pow:array[0..20]of longint;
begin
  fillchar(f,sizeof(f),$7f);
  for i:=1 to n do f[i,0]:=h[i];
  k:=trunc(ln(n)/ln(2));
  for i:=1 to k do
    for j:=1 to n do
      if j+1<<i<=n then
        f[j,i]:=min(f[j,i-1],f[j+1<<(i-1),i-1]);
 { for i:=1 to n do begin
    writeln(i);
    for j:=0 to k do
      write(f[i,j],' ');
    writeln;
  end;}
end;

function lcp(x,y:longint):longint;
var
  i:longint;
begin
  x:=rank[x];
  y:=rank[y];
  if x>y then swap(x,y);
  if x<y then inc(x);
  i:=trunc(ln(y-x+1)/ln(2));
  exit(min(f[x,i],f[y-1<<i+1,i]));
end;

procedure makeheight;
var
  last,i,j:longint;
begin
  h[1]:=0;
  last:=0;
  for i:=1 to n do begin
    last:=max(last-1,0);
    if rank[i]=1 then continue;
    j:=sa[rank[i]-1];
    while (i+last<=n) and (j+last<=n) and (s[i+last]=s[j+last]) do inc(last);
    h[rank[i]]:=last;
  end;
end;

procedure makesa;
var
  i,j,p:longint;
begin
  p:=1;
  while p<n do begin
    for i:=1 to p do y[i]:=n-p+i;
    j:=p;
    for i:=1 to n do
      if sa[i]>p then begin
        inc(j);
        y[j]:=sa[i]-p;
        if j=n then break;
      end;
    for i:=1 to n do x[i]:=rank[y[i]];
    fillchar(c,sizeof(c),0);
    for i:=1 to n do inc(c[x[i]]);
    for i:=1 to tot do inc(c[i],c[i-1]);
    for i:=n downto 1 do begin
      sa[c[x[i]]]:=y[i];
      dec(c[x[i]]);
    end;
    tot:=1;
    x[sa[1]]:=1;
    for i:=2 to n do begin
      if (rank[sa[i]]<>rank[sa[i-1]]) or (rank[sa[i]+p]<>rank[sa[i-1]+p]) then inc(tot);
      x[sa[i]]:=tot;
    end;
    for i:=1 to n do rank[i]:=x[i];
    if tot=n then break;
    p:=p<<1;
  end;
  for i:=1 to n do sa[rank[i]]:=i;
end;

procedure into;
var
  i:longint;
begin
  readln(s);
  //s:=s1+'#'+s1;
  s:=s+'#';
  for i:= length(s)-1 downto 1 do s:=s+s[i];
  n:=length(s);
  for i:=1 to n do x[i]:=ord(s[i]);
  for i:=1 to n do inc(c[x[i]]);
  for i:=1 to 128 do inc(c[i],c[i-1]);
  for i:=n downto 1 do begin
    sa[c[x[i]]]:=i;
    dec(c[x[i]]);
  end;
  tot:=1;
  rank[sa[1]]:=1;
  for i:=2 to n do begin
    if x[sa[i]]<>x[sa[i-1]] then inc(tot);
    rank[sa[i]]:=tot;
  end;
  if tot<>n then makesa;
  makeheight;
  first;
end;

procedure work;
var
  i,j,k,ans,st:longint;
begin
  ans:=0;
  for i:=1 to n do begin
    k:=lcp(i,n-i+1);
    if k<<1-1>ans then begin
      ans:=k<<1-1;
      st:=i-k+1;
    end;
    if i=1 then continue;
    k:=lcp(i,n-i+2);
    if k<<1>ans then begin
      ans:=k<<1;
      st:=i-k;
    end;
  end;
  for i:=st to st+ans-1 do write(s[i]);
  writeln;
end;

begin
  into;
  work;
end.
View Code

 

 

2.2.4连续重复子串

连续重复子串(pku2406)

按照论文写法……然后光荣tle。后来发现这题不适合用后缀数组,大材小用,用kmp就行了。主要是为了之后的加强版热身吧。

做法就是枚举重复字符串的长度,比如当前是l(当然一定得整除字符串长度n),然后看suffix(1)和suffix(l+1)是否为n-l。

后缀数组版(tle):

var
  x,y,rank,sa,c,rmp,h:array[0..1200000]of longint;
  s:ansistring;
  n,tot:longint;

function min(x,y:longint):longint;
begin
  if x<y then exit(x);
  exit(y);
end;

function max(x,y:longint):longint;
begin
  if x<y then exit(y);
  exit(x);
end;


procedure makesa;
var
  i,j,p:longint;
begin
  p:=1;
  while p<n do begin
    for i:=1 to p do y[i]:=n-p+i;
    j:=p;
    for i:=1 to n do
      if sa[i]>p then begin
        inc(j);
        y[j]:=sa[i]-p;
        if j=n then break;
      end;
    for i:=1 to n do x[i]:=rank[y[i]];
    fillchar(c,sizeof(c),0);
    for i:=1 to n do inc(c[x[i]]);
    for i:=1 to tot do inc(c[i],c[i-1]);
    for i:=n downto 1 do begin
      sa[c[x[i]]]:=y[i];
      dec(c[x[i]]);
    end;
    tot:=1;
    x[sa[1]]:=1;
    for i:=2 to n do begin
      if (rank[sa[i]]<>rank[sa[i-1]]) or (rank[sa[i]+p]<>rank[sa[i-1]+p]) then inc(tot);
      x[sa[i]]:=tot;
    end;
    for i:=1 to n do rank[i]:=x[i];
    if tot=n then break;
    p:=p<<1;
  end;
  for i:=1 to n do sa[rank[i]]:=i;
end;

procedure makeheight;
var
  last,i,j:longint;
begin
  h[1]:=0;
  last:=0;
  for i:=1 to n do begin
    if last>1 then dec(last)
      else last:=0;
    if rank[i]=1 then continue;
    j:=sa[rank[i]-1];
    while (i+last<=n) and (j+last<=n) and (s[i+last]=s[j+last]) do inc(last);
    h[rank[i]]:=last;
  end;
end;

procedure first;
var
  i:longint;
begin
  rmp[rank[1]]:=0;
  if rank[1]<n then begin
    rmp[rank[1]+1]:=h[rank[1]+1];
    for i:=rank[1]+2 to n do
      rmp[i]:=min(rmp[i-1],h[i]);
  end;
  if rank[1]>1 then begin
    rmp[rank[1]-1]:=h[rank[1]];
    for i:=rank[1]-2 downto 1 do
      rmp[i]:=min(rmp[i+1],h[i]);
  end;
end;

procedure into;
var
  i:longint;
begin
  n:=length(s);
  for i:=1 to n do x[i]:=ord(s[i]);
  fillchar(c,sizeof(c),0);
  for i:=1 to n do inc(c[x[i]]);
  for i:=1 to 128 do inc(c[i],c[i-1]);
  for i:=n downto 1 do begin
    sa[c[x[i]]]:=i;
    dec(c[x[i]]);
  end;
  tot:=1;
  rank[sa[1]]:=1;
  for i:=2 to n do begin
    if x[sa[i]]<>x[sa[i-1]] then inc(tot);
    rank[sa[i]]:=tot;
  end;
  if tot<>n then makesa;
  makeheight;
  first;
end;

procedure work;
var
  i:longint;
begin
  for i:=1 to n do begin
    if n mod i<>0 then continue;
    if n-i=rmp[rank[i+1]] then begin
      writeln(n div i);
      exit;
    end;
  end;
end;

begin
  while true do begin
    readln(s);
    if s[1]='.' then break;
    into;
    work;
  end;
end.
View Code

kmp版:

var
  p:array[0..1000000]of longint;
  s:ansistring;



procedure work;
var
  i,j,n:longint;
begin
  n:=length(s);
  p[1]:=0;
  for i:=2 to n do begin
    j:=p[i-1];
    while (j>0) and (s[j+1]<>s[i]) do j:=p[j];
    if s[i]=s[j+1] then inc(j);
    p[i]:=j;
  end;
  if n mod (n-p[n])=0 then writeln(n div (n-p[n]))
    else writeln(1);
end;

begin
  while true do begin
    readln(s);
    if s[1]='.' then break;
    work;
  end;
  readln;
end.
View Code

 

重复次数最多的连续重复子串(spoj687,pku3693)

两道题区别在于是否输出方案(关于输出方案有个小地方要注意写在程序中了)

见论文……然后我的程序有两个小小的优化,一个是枚举时如果两个后缀连第一个字符都不一样的话就不要算lcp了,另一个是当前答案+1都小于等于最优答案就不要在算啦。

关于poj3693求同样有多少个,一开始是边找最长边记录,后来发现这样慢的要死,所以就直接求出最长后再找符合答案的字符串。

poj3693

var
  c,x,y,rank,sa,h,num:array[0..100000]of longint;
  f:array[0..100000,0..20]of longint;
  n,tot,tt:longint;
  s:ansistring;

function min(x,y:longint):longint;
begin
  if x<y then exit(x);
  exit(y);
end;

procedure swap(var x,y:longint);
var
  i:longint;
begin
  i:=x;
  x:=y;
  y:=i;
end;

function lcp(x,y:longint):longint;
var
  i:longint;
begin
  x:=rank[x];
  y:=rank[y];
  if x>y then swap(x,y);
  if x<y then inc(x);
  i:=trunc(ln(y-x+1)/ln(2));
  exit(min(f[x,i],f[y-1<<i+1,i]));
end;

function check(x,y:ansistring):boolean;
var
  i,j,k:longint;
begin
  //writeln(x);
  //writeln(y);
  k:=length(x);
  j:=length(y);
  for i:=1 to min(k,j) do begin
    if x[i]>y[i] then exit(false);
    if x[i]<y[i] then exit(true);
  end;
end;

procedure makesa;
var
  i,j,p:longint;
begin
  p:=1;
  while p<n do begin
    for i:=1 to p do y[i]:=n-p+i;
    j:=p;
    for i:=1 to n do
      if sa[i]>p then begin
        inc(j);
        y[j]:=sa[i]-p;
        if j=n then break;
      end;
    for i:=1 to n do x[i]:=rank[y[i]];
    fillchar(c,sizeof(c),0);
    for i:=1 to n do inc(c[x[i]]);
    for i:=1 to tot do inc(c[i],c[i-1]);
    for i:=n downto 1 do begin
      sa[c[x[i]]]:=y[i];
      dec(c[x[i]]);
    end;
    tot:=1;
    x[sa[1]]:=1;
    for i:=2 to n do begin
      if (rank[sa[i]]<>rank[sa[i-1]]) or (rank[sa[i]+p]<>rank[sa[i-1]+p]) then inc(tot);
      x[sa[i]]:=tot;
    end;
    for i:=1 to n do rank[i]:=x[i];
    if tot=n then break;
    p:=p<<1;
  end;
  for i:=1 to n do sa[rank[i]]:=i;
end;

procedure makehi;
var
  i,j,last:longint;
begin
  h[1]:=0;
  last:=0;
  for i:=1 to n do begin
    if last>1 then dec(last)
      else last:=0;
    if rank[i]=1 then continue;
    j:=sa[rank[i]-1];
    while (i+last<=n) and (j+last<=n) and (s[i+last]=s[j+last]) do inc(last);
    h[rank[i]]:=last;
  end;
end;

procedure first;
var
  i,j:longint;
begin
  for i:=1 to n do f[i,0]:=h[i];
  for j:=1 to trunc(ln(n)/ln(2)) do
    for i:=1 to n-1<<j+1 do
      f[i,j]:=min(f[i,j-1],f[i+1<<(j-1),j-1]);
end;

procedure into;
var
  i:longint;
begin
  n:=length(s);
  s:=s+'#';
  for i:=1 to n do x[i]:=ord(s[i]);
  fillchar(c,sizeof(c),0);
  for i:=1 to n do inc(c[x[i]]);
  for i:=1 to 128 do inc(c[i],c[i-1]);
  for i:=n downto 1 do begin
    sa[c[x[i]]]:=i;
    dec(c[x[i]]);
  end;
  tot:=1;
  rank[sa[1]]:=1;
  for i:=2 to n do begin
    if x[sa[i]]<>x[sa[i-1]] then inc(tot);
    rank[sa[i]]:=tot;
  end;
  if tot<>n then makesa;
  makehi;
  first;
end;

procedure work;
var
  i,j,k,l,ans,long,more,best,total:longint;
begin
  best:=0;
  total:=0;
  for i:=1 to n-1 do begin
    j:=1;
    while i+j<=n do begin
      if s[j]=s[j+i] then begin
        long:=lcp(j,j+i);
        ans:=long div i+1;
        if (ans+1>=best) and (ans>1) then begin
          if (long mod i<>0) then begin
            more:=i-long mod i;
            if (j-more>0) and (lcp(j-more,j+i-more)>=more) then inc(ans);
          end;
          if ans>best then begin
            best:=ans;
            total:=0;
          end;
          if ans=best then begin
            inc(total);
            num[total]:=i;
          end;
        end;
      end;
      inc(j,i);
    end;
  end;
  for i:=1 to n do  //一定要再找一次,不能直接在上面算最长时计算,反例babababaccaccaccaccac,上面找到的长度是caccaccaccac(构造了好久出来的反例),这样字典序并不是最小,所以上面找到的字符串只是长度最长,而没有包括所以情况
    for j:=1 to total do begin
      k:=num[j];
      if lcp(sa[i],sa[i]+k)>=(best-1)*k then begin
        write('Case ',tt,': ');
        for l:=sa[i] to sa[i]+best*k -1do write(s[l]);
        writeln;
        exit;
      end;
    end;
end;

begin
  tt:=0;
  while true do begin
    readln(s);
    if s[1]='#' then break;
    inc(tt);
    into;
    work;
  end;
end.
View Code

 spoj687

var
  x,y,rank,sa,h,c:array[0..50000]of longint;
  f:array[0..50000,0..15]of longint;
  ft:array[0..50000]of longint;
  n,tt,tot:longint;
  s:array[0..50000]of char;

function min(x,y:longint):longint;
begin
  if x<y then exit(x);
  exit(y)
end;

procedure swap(var x,y:longint);
var
  i:longint;
begin
  i:=x;
  x:=y;
  y:=i;
end;

function lcp(x,y:longint):longint;
var
  i:longint;
begin
  x:=rank[x];
  y:=rank[y];
  if x>y then swap(x,y);
  inc(x);
  i:=ft[y-x+1];
  exit(min(f[x,i],f[y-1<<i+1,i]))
end;

procedure makesa;
var
  i,j,p:longint;
begin
  p:=1;
  while p<n do begin
    for i:=1 to p do y[i]:=n-p+i;
    j:=p;
    for i:=1 to n do
      if sa[i]>p then begin
        inc(j);
        y[j]:=sa[i]-p;
        if j=n then break;
      end;
    for i:=1 to n do x[i]:=rank[y[i]];
    fillchar(c,sizeof(c),0);
    for i:=1 to n do inc(c[x[i]]);
    for i:=1 to tot do inc(c[i],c[i-1]);
    for i:=n downto 1 do begin
      sa[c[x[i]]]:=y[i];
      dec(c[x[i]]);
    end;
    tot:=1;
    x[sa[1]]:=1;
    for i:=2 to n do begin
      if (rank[sa[i]]<>rank[sa[i-1]]) or (rank[sa[i]+p]<>rank[sa[i-1]+p]) then inc(tot);
      x[sa[i]]:=tot;
    end;
    for i:=1 to n do rank[i]:=x[i];
    if tot=n then break;
    p:=p<<1;
  end;
  for i:=1 to n do sa[rank[i]]:=i;
end;

procedure makeheigh;
var
  i,j,last:longint;
begin
  last:=0;
  h[1]:=0;
  for i:=1 to n do begin
    if last>1 then dec(last)
      else last:=0;
    if rank[i]=1 then continue;
    j:=sa[rank[i]-1];
    while (i+last<=n) and (j+last<=n) and (s[i+last]=s[j+last]) do inc(last);
    h[rank[i]]:=last;
  end;
end;

procedure first;
var
  i,j,k:longint;
begin
  for i:=1 to n do f[i,0]:=h[i];
  k:=ft[n];
  for i:=1 to k do
    for j:=1 to n-1<<i+1 do
      f[j,i]:=min(f[j,i-1],f[j+1<<(i-1),i-1]);
end;

procedure into;
var
  i:longint;
begin
  readln(n);
  for i:=1 to n do readln(s[i]);
  for i:=1 to n do x[i]:=ord(s[i]);
  fillchar(c,sizeof(c),0);
  for i:=1 to n do inc(c[x[i]]);
  for i:=1 to 128 do inc(c[i],c[i-1]);
  for i:=n downto 1 do begin
    sa[c[x[i]]]:=i;
    dec(c[x[i]]);
  end;
  tot:=1;
  rank[sa[1]]:=1;
  for i:=2 to n do begin
    if x[sa[i]]<>x[sa[i-1]] then inc(tot);
    rank[sa[i]]:=tot;
  end;
  if tot<>n then makesa;
  makeheigh;
  first;
end;

procedure work;
var
  i,j,k,best,long,ans,more:longint;
begin
  best:=1;
  for i:=1 to n-1 do begin
    j:=1;
    while i+j<=n do begin
      if s[j]=s[i+j] then begin
        long:=lcp(j,j+i);
        ans:=long div i+1;
        if (ans+1>=best) and (ans>1) then begin
          if long mod i<>0 then begin
            more:=i-long mod i;
            if (j-more>0) and (lcp(j-more,j+i-more)>=more) then inc(ans);
          end;
          if ans>best then best:=ans;
        end;
      end;
      inc(j,i);
    end;
  end;
  writeln(best);
end;

begin
  for tt:=1 to 50000 do ft[tt]:=trunc(ln(tt)/ln(2));
  readln(tt);
  while tt>0 do begin
    dec(tt);
    into;
    work;
  end
end.
View Code

 

2.3两个字符串的相关问题

 

2.3.1公共子串

最长公共子串(pku2774,ural1517)

求两个字符串的最长公共子串。连接在一起后找排名相邻且属不同串的h[]的最大值。

pku2774

var
  x,y,rank,sa,h,c:array[0..200300]of longint;
  s,s1,s2:ansistring;
  n,tot,n1:longint;


procedure makesa;
var
  i,j,p:longint;
begin
  p:=1;
  while p<n do begin
    for i:=1 to p do y[i]:=n-p+i;
    j:=p;
    for i:=1 to n do
      if sa[i]>p then begin
        inc(j);
        y[j]:=sa[i]-p;
        if j=n then break;
      end;
    for i:=1 to n do x[i]:=rank[y[i]];
    fillchar(c,sizeof(c),0);
    for i:=1 to n do inc(c[x[i]]);
    for i:=1 to tot do inc(c[i],c[i-1]);
    for i:=n downto 1 do begin
      sa[c[x[i]]]:=y[i];
      dec(c[x[i]]);
    end;
    tot:=1;
    x[sa[1]]:=1;
    for i:=2 to n do begin
      if (rank[sa[i]]<>rank[sa[i-1]]) or (rank[sa[i]+p]<>rank[sa[i-1]+p]) then inc(tot);
      x[sa[i]]:=tot;
    end;
    for i:=1 to n do rank[i]:=x[i];
    if tot=n then break;
    p:=p<<1;
  end;
  for i:=1 to n do sa[rank[i]]:=i;
end;

procedure makeheight;
var
  i,j,last:longint;
begin
  last:=0;
  h[1]:=0;
  for i:=1 to n do begin
    if last>1 then dec(last)
      else last:=0;
    if rank[i]=1 then continue;
    j:=sa[rank[i]-1];
    while {(i+last<=n) and (j+last<=n) and} (s[i+last]=s[j+last])do inc(last);
    h[rank[i]]:=last;
  end;
end;

procedure into;
var
  i:longint;
begin
  readln(s1);
  readln(s2);
  s:=s1+'$'+s2+'*';
  n:=length(s);
  n1:=length(s1);
  for i:=1 to n do x[i]:=ord(s[i]);
  fillchar(c,sizeof(c),0);
  for i:=1 to n do inc(c[x[i]]);
  for i:=1 to 128 do inc(c[i],c[i-1]);
  for i:=n downto 1 do begin
    sa[c[x[i]]]:=i;
    dec(c[x[i]]);
  end;
  tot:=1;
  rank[sa[1]]:=1;
  for i:=2 to n do begin
    if x[sa[i]]<>x[sa[i-1]] then inc(tot);
    rank[sa[i]]:=tot;
  end;
  if tot<>n then makesa;
 // for i:=1 to n do write(sa[i],' ');writeln;
//  for i:=1 to n do write(rank[i],' ');writeln;
  makeheight;
 // for i:=1 to n do write(h[i],' ');writeln;
end;

procedure work;
var
  i,max:longint;
begin
  max:=0;
  for i:=2 to n do
    if ( not ((sa[i]<=n1) xor (sa[i-1]>n1)) ) and (h[i]>max) then max:=h[i];
  writeln(max);
end;

begin
  into;
  work;
end.
View Code

ural1517

var
  x,y,rank,sa,c,h:array[0..200300]of longint;
  n,tot,n1:longint;
  s,s1,s2:ansistring;


procedure makesa;
var
  i,j,p:longint;
begin
  p:=1;
  while p<n do begin
    for i:=1 to p do y[i]:=n-p+i;
    j:=p;
    for i:=1 to n do
      if sa[i]>p then begin
        inc(j);
        y[j]:=sa[i]-p;
        if j=n then break;
      end;
    for i:=1 to n do x[i]:=rank[y[i]];
    fillchar(c,sizeof(c),0);
    for i:=1 to n do inc(c[x[i]]);
    for i:=1 to tot do inc(c[i],c[i-1]);
    for i:=n downto 1 do begin
      sa[c[x[i]]]:=y[i];
      dec(c[x[i]]);
    end;
    tot:=1;
    x[sa[1]]:=1;
    for i:=2 to n do begin
      if (rank[sa[i]]<>rank[sa[i-1]]) or (rank[sa[i]+p]<>rank[sa[i-1]+p]) then inc(tot);
      x[sa[i]]:=tot;
    end;
    for i:=1 to n do rank[i]:=x[i];
    if tot=n then break;
    p:=p<<1;
  end;
  for i:=1 to n do sa[rank[i]]:=i;
end;

procedure makeheight;
var
  i,j,last:longint;
begin
  h[1]:=0;
  last:=0;
  for i:=1 to n do begin
    if last>1 then dec(last)
      else last:=0;
    if rank[i]=1 then continue;
    j:=sa[rank[i]-1];
    while s[i+last]=s[j+last] do inc(last);
    h[rank[i]]:=last;
  end;
end;

procedure into;
var
  i:longint;
begin
  readln(n1);
  readln(s1);
  readln(s2);
  s:=s1+'$'+s2+'%';
  n:=n1<<1+2;
  for i:=1 to n do x[i]:=ord(s[i]);
  fillchar(c,sizeof(c),0);
  for i:=1 to n do inc(c[x[i]]);
  for i:=1 to 128 do inc(c[i],c[i-1]);
  for i:=n downto 1 do begin
    sa[c[x[i]]]:=i;
    dec(c[x[i]]);
  end;
  tot:=1;
  rank[sa[1]]:=1;
  for i:=2 to n do begin
    if x[sa[i]]<>x[sa[i-1]] then inc(tot);
    rank[sa[i]]:=tot;
  end;
  if tot<>n then makesa;
  makeheight;
end;

procedure work;
var
  max,top,i:longint;
begin
  max:=0;
  top:=0;
  for i:=2 to n do
    if not ((sa[i]<=n1) xor (sa[i-1]>n1)) then
      if h[i]>max then begin
        max:=h[i];
        top:=sa[i];
      end;
  writeln(copy(s,top,max));
end;

begin
  into;
  work;
end.
View Code

 

2.3.2子串的个数

长度不小于 k 的公共子串的个数(pku3415)

论文里面没有写清楚。

蔡大神找到不错的题解http://www.cnblogs.com/EC-Ecstasy/p/4174671.html,做法是用容斥做。

后来我跪论文提供的程序,写出了用b求a用a求b的。

无论是两种,都是得需要单调栈维护。

就是把单调栈里面大于h[i]的都变成h[i]。

写法一:

var
  x,y,sa,saa,rank,h,p,c:array[0..200333]of longint;
  n,tot,kk:longint;
  s1,s2,s:ansistring;


procedure makesa;
var
  i,j,p:longint;
begin
  p:=1;
  while p<n do begin
    for i:=1 to p do y[i]:=n-p+i;
    j:=p;
    for i:=1 to n do
      if sa[i]>p then begin
        inc(j);
        y[j]:=sa[i]-p;
        if j=n then break;
      end;
    for i:=1 to n do x[i]:=rank[y[i]];
    fillchar(c,sizeof(c),0);
    for i:=1 to n do inc(c[x[i]]);
    for i:=1 to tot do inc(c[i],c[i-1]);
    for i:=n downto 1 do begin
      sa[c[x[i]]]:=y[i];
      dec(c[x[i]]);
    end;
    tot:=1;
    x[sa[1]]:=1;
    for i:=2 to n do begin
      if (rank[sa[i]]<>rank[sa[i-1]]) or (rank[sa[i]+p]<>rank[sa[i-1]+p]) then inc(tot);
      x[sa[i]]:=tot;
    end;
    for i:=1 to n do rank[i]:=x[i];
    if tot=n then break;
    p:=p<<1;
  end;
  for i:=1 to n do sa[rank[i]]:=i;
end;

procedure into;
var
  i:longint;
begin
  readln(s1);
  readln(s2);
  s:=s1+'$'+s2;
  fillchar(c,sizeof(c),0);
  n:=length(s);
  for i:=1 to n do x[i]:=ord(s[i]);
  for i:=1 to n do inc(c[x[i]]);
  for i:=1 to 128 do inc(c[i],c[i-1]);
  for i:=n downto 1 do begin
    sa[c[x[i]]]:=i;
    dec(c[x[i]]);
  end;
  tot:=1;
  rank[sa[1]]:=1;
  for i:=1 to n do begin
    if x[sa[i]]<>x[sa[i-1]] then inc(tot);
    rank[sa[i]]:=tot;
  end;
  if tot<>n then makesa;
end;

procedure makeheight;
var
  i,j,last:longint;
begin
  last:=0;
  h[1]:=0;
  for i:=1 to n do begin
    if last>1 then dec(last)
      else last:=0;
    if rank[i]=1 then continue;
    j:=sa[rank[i]-1];
    while (i+last<=n) and (j+last<=n) and (s[i+last]=s[j+last]) do inc(last);
    h[rank[i]]:=last;
  end;
end;

function calc:int64;
var
  i,ht,more,top,m:longint;
  ans:int64;
begin
  for i:=1 to n do rank[sa[i]]:=i;
  makeheight;
  //for i:=1 to n do write(h[i],' ');writeln; 
  ans:=0;
  h[0]:=kk-1;
  h[n+1]:=kk-1;
  top:=0;
  i:=1;
  p[0]:=0;
  while i<=n+1 do begin
    ht:=h[p[top]];
    if ((h[i]<kk) and (top=0)) or (h[i]=ht) then inc(i)
      else
        if h[i]>ht then begin
          inc(top);
          p[top]:=i;
          inc(i);
        end
        else begin
          m:=i-p[top]+1;
          if (h[i]>=kk) and (h[i]>h[p[top-1]]) then begin
            more:=ht-h[i];
            h[p[top]]:=h[i];
          end
          else begin
            more:=ht-h[p[top-1]];
            dec(top);
          end;
          inc(ans,int64(m)*(m-1)>>1*more);
        end;
  end;
  exit(ans);
end;

procedure work;
var
  sum,i,j:longint;
  ans1,ans2:int64;
begin
  sum:=n;
  for i:=1 to n do saa[i]:=sa[i];
  ans1:=calc;
  s:=s1;
  n:=length(s1);
  j:=0;
  for i:=1 to sum do
    if saa[i]<=n then begin
      inc(j);
      sa[j]:=saa[i];
      if j=n then break;
    end;
  ans2:=calc;
  s:=s2;
  n:=length(s2);
  j:=0;
  for i:=1 to sum do
    if saa[i]>sum-n then begin
      inc(j);
      sa[j]:=saa[i]-(sum-n);
      if j=n then break;
    end;
  writeln(ans1-ans2-calc);
end;


begin
  while true do begin
    readln(kk);
    if kk=0 then break;
    into;
    work;
  end;
end.
View Code

写法二:

var
  x,y,p,rank,sa,h,c,num1,num2:array[0..200333]of longint;
  n,tot,n1,kk:longint;
  s,s1,s2:ansistring;


procedure makeheight;
var
  i,j,last:longint;
begin
  last:=0;
  h[1]:=0;
  for i:=1 to n do begin
    if last>1 then dec(last)
      else last:=0;
    if rank[i]=1 then continue;
    j:=sa[rank[i]-1];
    while (i+last<=n) and (j+last<=n) and (s[i+last]=s[j+last]) do inc(last);
    h[rank[i]]:=last;
  end;
end;

procedure makesa;
var
  i,j,p:longint;
begin
  p:=1;
  while p<n do begin
    for i:=1 to p do y[i]:=n-p+i;
    j:=p;
    for i:=1 to n do
      if sa[i]>p then begin
        inc(j);
        y[j]:=sa[i]-p;
        if j=n then break;
      end;
    for i:=1 to n do x[i]:=rank[y[i]];
    fillchar(c,sizeof(c),0);
    for i:=1 to n do inc(c[x[i]]);
    for i:=1 to tot do inc(c[i],c[i-1]);
    for i:=n downto 1 do begin
      sa[c[x[i]]]:=y[i];
      dec(c[x[i]]);
    end;
    tot:=1;
    x[sa[1]]:=1;
    for i:=2 to n do begin
      if (rank[sa[i]]<>rank[sa[i-1]]) or (rank[sa[i]+p]<>rank[sa[i-1]+p]) then inc(tot);
      x[sa[i]]:=tot;
    end;
    for i:=1 to n do rank[i]:=x[i];
    if tot=n then break;
    p:=p<<1;
  end;
  for i:=1 to n do sa[rank[i]]:=i;
end;

procedure into;
var
  i:longint;
begin
  readln(s1);
  readln(s2);
  s:=s1+'$'+s2;
  n1:=length(s1);
  n:=length(s);
  for i:=1 to n do x[i]:=ord(s[i]);
  fillchar(c,sizeof(c),0);
  for i:=1 to n do inc(c[x[i]]);
  for i:=1 to 128 do inc(c[i],c[i-1]);
  for i:=n downto 1 do begin
    sa[c[x[i]]]:=i;
    dec(c[x[i]]);
  end;
  tot:=1;
  rank[sa[1]]:=1;
  for i:=2 to n do begin
    if x[sa[i]]<>x[sa[i-1]] then inc(tot);
    rank[sa[i]]:=tot;
  end;
  if tot<>n then makesa;
  makeheight;
end;

procedure work;
var
  i,top,tota,totb:longint;
  ans,suma,sumb:int64;
begin
  for i:=1 to n do
    if h[i]>=kk then dec(h[i],kk-1)
      else h[i]:=0;
  h[n+1]:=0;
  top:=0;
  suma:=0;
  sumb:=0;
  ans:=0;
  p[0]:=0;
  for i:=2 to n do begin
    if sa[i]<=n1 then ans:=sumb+ans
      else ans:=suma+ans;
    if h[i+1]<1 then begin
      top:=0;
      suma:=0;
      sumb:=0;
      continue;
    end;
    tota:=0;
    totb:=0;
    while (h[i+1]<h[p[top]]) and (top>0) do begin
      suma:=suma-int64(h[p[top]]-h[i+1])*num1[top];
      sumb:=sumb-int64(h[p[top]]-h[i+1])*num2[top];
      tota:=tota+num1[top];
      totb:=totb+num2[top];
      dec(top);
    end;
    if sa[i]<=n1 then begin
      inc(tota);
      suma:=suma+h[i+1];
    end
    else begin
      inc(totb);
      sumb:=sumb+h[i+1];
    end;
    if (h[i+1]=h[p[top]]) and (top>0) then begin
      inc(num1[top],tota);
      inc(num2[top],totb);
    end
    else begin
      inc(top);
      num1[top]:=tota;
      num2[top]:=totb;
      p[top]:=i+1;
    end;
  end;
  writeln(ans);
end;


begin
  while true do begin
    readln(kk);
    if kk=0 then break;
    into;
    work;
  end;
end.
View Code

记得用int64!!!!

 

2.4多个字符串

跟两个字符串一样的做法,用不同串直接隔开。

然后多个字符串有两个优化,一个是先给每个字符串都染色颜色,这样之后容易处理;另一个是很神的优化(跪kmp大神代码学到的!),算每个字符串最多延长多长(就是到字符串末尾的距离),最后算出h[]后在跟d[sa[i]]和d[sa[i-1]]中保存最小的,这样就巧妙的去掉很多把连接符也算入答案的错误!!

 

不小于 k 个字符串中的最长子串(pku3294)

其实和求单个差不多。染色,height[]数组分组,统计颜色就行了。

(第二优化后直接从2400+ms变成400+ms)

var
  x,y,c,rank,sa,h,p,col,d:array[0..100300]of longint;
  chose:array[0..200]of longint;
  s,s1:ansistring;
  n,total,tot,nn,kk,time,right:longint;

function min(x,y:longint):longint;
begin
  if x<y then exit(x);
  exit(y);
end;

procedure makesa;
var
  i,j,p:longint;
begin
  p:=1;
  while p<n do begin
    for i:=1 to p do y[i]:=n-p+i;
    j:=p;
    for i:=1 to n do
      if sa[i]>p then begin
        inc(j);
        y[j]:=sa[i]-p;
        if j=n then break;
      end;
    for i:=1 to n do x[i]:=rank[y[i]];
    fillchar(c,sizeof(c),0);
    for i:=1 to n do inc(c[x[i]]);
    for i:=1 to tot do inc(c[i],c[i-1]);
    for i:=n downto 1 do begin
      sa[c[x[i]]]:=y[i];
      dec(c[x[i]]);
    end;
    tot:=1;
    x[sa[1]]:=1;
    for i:=2 to n do begin
      if (rank[sa[i]]<>rank[sa[i-1]]) or (rank[sa[i]+p]<>rank[sa[i-1]+p]) then inc(tot);
      x[sa[i]]:=tot;
    end;
    for i:=1 to n do rank[i]:=x[i];
    if tot=n then break;
    p:=p<<1;
  end;
  for i:=1 to n do sa[rank[i]]:=i;
end;

procedure makeheight;
var
  i,j,last:longint;
begin
  h[1]:=0;
  last:=0;
  for i:=1 to n do begin
    if last>1 then dec(last)
      else last:=0;
    if rank[i]=1 then continue;
    j:=sa[rank[i]-1];
    while s[i+last]=s[j+last] do inc(last);
    h[rank[i]]:=last;
  end;
end;

function check(x:longint):boolean;
var
  all,i,j,k,tot:longint;
  flag:boolean;
begin
  i:=2;
  all:=0;
  while i<=n do begin
    while (h[i]<x) and (i<=n) do inc(i);
    j:=i;
    tot:=0;
    inc(time);
    chose[col[sa[i-1]]]:=time;
    while (h[j]>=x) and (j<=n) do begin
      if (chose[col[sa[j]]]<time) then begin
        chose[col[sa[j]]]:=time;
        inc(tot);
      end;
      inc(j);
    end;
    if tot>=kk then begin
      inc(all);
      p[all]:=sa[i];
    end;
    i:=j+1;
  end;
  if all>0 then begin
    total:=all;
    exit(true);
  end;
  exit(false);
end;

procedure into;
var
  i,sum,n1,j:longint;
begin
  total:=0;
  kk:=nn>>1;
  right:=maxlongint;
  sum:=0;
  s:='';
  for i:=1 to nn do begin
    readln(s1);
    s:=s+s1+'$';
    n1:=length(s1);
    if right>n1 then right:=n1;
    for j:=1 to n1 do begin
      col[sum+j]:=i;
      d[sum+j]:=n1-j+1;
    end;
    sum:=sum+n1+1;
    col[sum]:=0;
  end;
  s:=s+'#';
  n:=sum+1;
  col[n]:=0;
  for i:=1 to n do x[i]:=ord(s[i]);
  fillchar(c,sizeof(c),0);
  for i:=1 to n do inc(c[x[i]]);
  for i:=1 to 128 do inc(c[i],c[i-1]);
  for i:=n downto 1 do begin
    sa[c[x[i]]]:=i;
    dec(c[x[i]]);
  end;
  tot:=1;
  rank[sa[1]]:=1;
  for i:=2 to n do begin
    if x[sa[i]]<>x[sa[i-1]] then inc(tot);
    rank[sa[i]]:=tot;
  end;
  if tot<>n then makesa;
  makeheight;
  for i:=2 to n do h[i]:=min(h[i],min(d[sa[i]],d[sa[i-1]]));
  //for i:=2 to n do writeln(h[i]);
end;

procedure work;
var
  left,mid,i,j:longint;
begin
  //for i:=1 to n do writeln(copy(s,sa[i],n-sa[i]+1));
  left:=0;
  inc(right);
  while left+1<right do begin
    mid:=(left+right)>>1;
    if check(mid) then left:=mid
      else right:=mid
  end;
  if left=0 then writeln('?')
    else
      for i:=1 to total do
        writeln(copy(s,p[i],left))
end;

begin
  time:=0;
  while true do begin
    readln(nn);
    if nn=0 then break;
    into;
    work;
    writeln;
  end
end.
View Code

 

每个字符串至少出现两次且不重叠的最长子串(spoj220)

都写到这道题了,应该知道怎么写了吧?!做饭和上面类似

const
  mm=10000000;

var
  x,y,rank,sa,h,c,d,col:array[0..200000]of longint;
  num1,num2:array[0..100]of longint;
  n,tot,sum,right,tt:longint;
  s,s1:ansistring;

function min(x,y:longint):longint;
begin
  if x<y then exit(x);
  exit(y);
end;

function check(x:longint):boolean;
var
  i,j:longint;
  flag:boolean;
begin
  for j:=1 to sum do begin
    num1[j]:=mm;
    num2[j]:=-mm;
  end;
  for i:=sum+2 to n do begin
    j:=sa[i];
    if col[j]<>0 then begin
      if j<num1[col[j]] then num1[col[j]]:=j;
      if j>num2[col[j]] then num2[col[j]]:=j;
    end;
    if i=n then break;
    if h[i+1]<x then begin
      flag:=true;
      for j:=1 to sum do
        if (num1[j]=mm) or (num2[j]-num1[j]<x) then begin
          flag:=false;
          break;
        end;
      if flag then exit(true);
      for j:=1 to sum do begin
        num1[j]:=mm;
        num2[j]:=-mm;
      end;
    end;
  end;
  flag:=true;
  for j:=1 to sum do
    if (num1[j]=mm) or (num2[j]-num1[j]<x) then begin
      flag:=false;
      break;
    end;
  if flag then exit(true);
  exit(false);
end;

procedure makesa;
var
  i,j,p:longint;
begin
  p:=1;
  while p<n do begin
    for i:=1 to p do y[i]:=n-p+i;
    j:=p;
    for i:=1 to n do
      if sa[i]>p then begin
        inc(j);
        y[j]:=sa[i]-p;
        if j=n then break;
      end;
    for i:=1 to n do x[i]:=rank[y[i]];
    fillchar(c,sizeof(c),0);
    for i:=1 to n do inc(c[x[i]]);
    for i:=1 to tot do inc(c[i],c[i-1]);
    for i:=n downto 1 do begin
      sa[c[x[i]]]:=y[i];
      dec(c[x[i]]);
    end;
    tot:=1;
    x[sa[1]]:=1;
    for i:=2 to n do begin
      if (rank[sa[i]]<>rank[sa[i-1]]) or (rank[sa[i]+p]<>rank[sa[i-1]+p]) then inc(tot);
      x[sa[i]]:=tot;
    end;
    for i:=1 to n do rank[i]:=x[i];
    if tot=n then break;
    p:=p<<1;
  end;
  for i:=1 to n do sa[rank[i]]:=i;
end;

procedure makeheight;
var
  i,j,last:longint;
begin
  h[1]:=0;
  last:=0;
  for i:=1 to n do begin
    if last>0 then dec(last)
      else last:=0;
    if rank[i]=1 then continue;
    j:=sa[rank[i]-1];
    while s[j+last]=s[i+last] do inc(last);
    h[rank[i]]:=last;
  end;
end;

procedure into;
var
  i,n1,j:longint;
begin
  readln(sum);
  n:=0;
  s:='';
  right:=maxlongint;
  for j:=1 to sum do begin
    readln(s1);
    n1:=length(s1);
    if n1<right then right:=n1;
    s:=s+s1+'$';
    for i:=1 to n1 do begin
      col[n+i]:=j;
      d[n+i]:=n1-i+1;
    end;
    n:=n+n1+1;
    col[n]:=0;
    d[n]:=0;
  end;
  inc(n);
  col[n]:=0;
  d[n]:=0;
  s:=s+'#';
  for i:=1 to n do x[i]:=ord(s[i]);
  fillchar(c,sizeof(c),0);
  for i:=1 to n do inc(c[x[i]]);
  for i:=1 to 128 do inc(c[i],c[i-1]);
  for i:=n downto 1 do begin
    sa[c[x[i]]]:=i;
    dec(c[x[i]]);
  end;
  tot:=1;
  rank[sa[1]]:=1;
  for i:=2 to n do begin
    if x[sa[i]]<>x[sa[i-1]] then inc(tot);
    rank[sa[i]]:=tot;
  end;
  if tot<>n then makesa;
  makeheight;
  for i:=2 to n do h[i]:=min(h[i],min(d[sa[i]],d[sa[i-1]]));
 // for i:=1 to n do writeln(h[i],' ',copy(s,sa[i],n-sa[i]+1));
end;

procedure work;
var
  left,mid:longint;
begin
  left:=0;
  while left+1<right do begin
    mid:=(left+right)>>1;
    if check(mid) then left:=mid
      else right:=mid;
  end;
  writeln(left);
end;

begin
  readln(tt);
  while tt>0 do begin
    dec(tt);
    into;
    work;
  end;
end.
View Code

 

出现或反转后出现在每个字符串中的最长子串(pku1226)

要反转就跟着再反转一次。然后没了。

var
  x,y,rank,c,h,d,sa,col:array[0..200300]of longint;
  chose:array[0..200]of longint;
  n,tot,time,tt,sum,right:longint;
  s,s1:ansistring;

function min(x,y:longint):longint;
begin
  if x<y then exit(x);
  exit(y);
end;

procedure makesa;
var
  i,j,p:longint;
begin
  p:=1;
  while p<n do begin
    for i:=1 to p do y[i]:=n-p+i;
    j:=p;
    for i:=1 to n do
      if sa[i]>p then begin
        inc(j);
        y[j]:=sa[i]-p;
        if j=n then break;
      end;
    for i:=1 to n do x[i]:=rank[y[i]];
    fillchar(c,sizeof(c),0);
    for i:=1 to n do inc(c[x[i]]);
    for i:=1 to tot do inc(c[i],c[i-1]);
    for i:=n downto 1 do begin
      sa[c[x[i]]]:=y[i];
      dec(c[x[i]]);
    end;
    tot:=1;
    x[sa[1]]:=1;
    for i:=2 to n do begin
      if (rank[sa[i]]<>rank[sa[i-1]]) or (rank[sa[i]+p]<>rank[sa[i-1]+p]) then inc(tot);
      x[sa[i]]:=tot;
    end;
    for i:=1 to n do rank[i]:=x[i];
    if tot=n then break;
    p:=p<<1;
  end;
  for i:=1 to n do sa[rank[i]]:=i;
end;

procedure makeheight;
var
  i,j,last:longint;
begin
  last:=0;
  h[1]:=0;
  for i:=1 to n do begin
    if last>1 then dec(last)
      else last:=0;
    if rank[i]=1 then continue;
    j:=sa[rank[i]-1];
    while s[j+last]=s[i+last] do inc(last);
    h[rank[i]]:=last;
  end;
end;

function check(x:longint):boolean;
var
  i,all:longint;
begin
  inc(time);
  all:=0;
  for i:=sum<<1+2 to n-1 do begin
    if chose[col[sa[i]]]<time then begin
      chose[col[sa[i]]]:=time;
      inc(all);
      if all=sum then exit(true);
    end;
    if h[i+1]<x then begin
      inc(time);
      all:=0;
    end;
  end;
  exit(false);
end;

procedure into;
var
  i,j,n1:longint;
begin
  readln(sum);
  s:='';
  n:=0;
  right:=maxlongint;
  for j:=1 to sum do begin
    readln(s1);
    n1:=length(s1);
    if n1<right then right:=n1;
    s1:=s1+'@';
    d[n1+1]:=0;
    col[n1+1]:=0;
    for i:=1 to n1 do begin
      s1:=s1+s1[n1-i+1];
      d[n+i]:=n1-i+1;
      d[n+n1+i+1]:=n1-i+1;
      col[n+i]:=j;
      col[n+n1++1+i]:=j;
    end;
    n:=n+n1<<1+2;
    s:=s+s1+'#';
    col[n]:=0;
    d[n]:=0;
  end;
  s:=s+'$';
  inc(n);
  for i:=1 to n do x[i]:=ord(s[i]);
  fillchar(c,sizeof(c),0);
  for i:=1 to n do inc(c[x[i]]);
  for i:=1 to 128 do inc(c[i],c[i-1]);
  for i:=n downto 1 do begin
    sa[c[x[i]]]:=i;
    dec(c[x[i]]);
  end;
  tot:=1;
  rank[sa[1]]:=1;
  for i:=2 to n do begin
    if x[sa[i]]<>x[sa[i-1]] then inc(tot);
    rank[sa[i]]:=tot;
  end;
  if tot<>n then makesa;
  makeheight;
  for i:=1 to n do h[i]:=min(h[i],min(d[sa[i]],d[sa[i-1]]));
  //for i:=1 to n do writeln(h[i],' ',copy(s,sa[i],n-sa[i]+1));
end;

procedure work;
var
  left,mid:longint;
begin
  inc(right);
  left:=0;
  while left+1<right do begin
    mid:=(left+right)>>1;
    if check(mid) then left:=mid
      else right:=mid;
  end;
  writeln(left)
end;

begin
  readln(tt);
  while tt>0 do begin
    dec(tt);
    into;
    work;
  end
end.
View Code

 

 

 

========================

暂时告一段落吧,完结撒花!!2014.1.23

posted @ 2015-01-11 22:09  Macaulish  阅读(641)  评论(0编辑  收藏  举报