后缀数组专题

后缀数组

按照惯例,先上模板:

/*
*suffix array
*倍增算法  O(n*logn)
*待排序数组长度为n,放在0~n-1中,在最后面补一个0
*build_sa( ,n+1, );//注意是n+1;
*getHeight(,n);
*例如:
*n   = 8;
*num[]   = { 1, 1, 2, 1, 1, 1, 1, 2, $ };注意num最后一位为0,其他大于0
*rank[]  = { 4, 6, 8, 1, 2, 3, 5, 7, 0 };rank[0~n-1]为有效值,rank[n]必定为0无效值
*sa[]    = { 8, 3, 4, 5, 0, 6, 1, 7, 2 };sa[1~n]为有效值,sa[0]必定为n是无效值
*height[]= { 0, 0, 3, 2, 3, 1, 2, 0, 1 };height[2~n]为有效值
*
*/

int sa[MAXN];//SA数组,表示将S的n个后缀从小到大排序后把排好序的
             //的后缀的开头位置顺次放入SA中
int t1[MAXN],t2[MAXN],c[MAXN];//求SA数组需要的中间变量,不需要赋值
int rank[MAXN],height[MAXN];
//待排序的字符串放在s数组中,从s[0]到s[n-1],长度为n,且最大值小于m,
//除s[n-1]外的所有s[i]都大于0,r[n-1]=0
//函数结束以后结果放在sa数组中
void build_sa(int s[],int n,int m)
{
    int i,j,p,*x=t1,*y=t2;
    //第一轮基数排序,如果s的最大值很大,可改为快速排序
    for(i=0;i<m;i++)c[i]=0;
    for(i=0;i<n;i++)c[x[i]=s[i]]++;
    for(i=1;i<m;i++)c[i]+=c[i-1];
    for(i=n-1;i>=0;i--)sa[--c[x[i]]]=i;
    for(j=1;j<=n;j<<=1)
    {
        p=0;
        //直接利用sa数组排序第二关键字
        for(i=n-j;i<n;i++)y[p++]=i;//后面的j个数第二关键字为空的最小
        for(i=0;i<n;i++)if(sa[i]>=j)y[p++]=sa[i]-j;
        //这样数组y保存的就是按照第二关键字排序的结果
        //基数排序第一关键字
        for(i=0;i<m;i++)c[i]=0;
        for(i=0;i<n;i++)c[x[y[i]]]++;
        for(i=1;i<m;i++)c[i]+=c[i-1];
        for(i=n-1;i>=0;i--)sa[--c[x[y[i]]]]=y[i];
        //根据sa和x数组计算新的x数组
        swap(x,y);
        p=1;x[sa[0]]=0;
        for(i=1;i<n;i++)
            x[sa[i]]=y[sa[i-1]]==y[sa[i]] && y[sa[i-1]+j]==y[sa[i]+j]?p-1:p++;
        if(p>=n)break;
        m=p;//下次基数排序的最大值
    }
}
void getHeight(int s[],int n)
{
    int i,j,k=0;
    for(i=0;i<=n;i++)rank[sa[i]]=i;
    for(i=0;i<n;i++)
    {
        if(k)k--;
        j=sa[rank[i]-1];
        while(s[i+k]==s[j+k])k++;
        height[rank[i]]=k;
    }
}
View Code

 

参考资料:

(某神论文): http://wenku.baidu.com/link?url=en6joFZZqv55q8AfAh8abnHbSyE6lu7C5maUFR5fTTOSUvus-tdZazcFOMxwv9nv-DvxxkGhFzhtSo4gen2l0ozeCTVywD52-1nOjCMjZdq

height[i]=LCP(suf(sa[i]),suf(sa[i-1])).

 1,询问两个子串的最长公共前缀。

由height数组的性质,LCP(suf(i),suf(j))=min(height[rank[i]]+1~height[rank[j]]).  转化为RMQ问题,o(nlogn)预处理,o(1)查询.

2,求最长可重叠的重复子串。

由于后缀数组按字典序构造,最长重复子串一定是名次相邻的,因此只需求height的最小值即可。

3,求最长不可重叠的重复子串长度。

二分查找长度,看长度是否符合条件,即不可重叠,sa[j]-sa[k]>len。

bool check(int k)
{
    int Max=sa[1],Min=sa[1];
    REP(i,2,n){
        if(height[i]<k) Max=Min=sa[i];
        else{
            Max=max(sa[i],Max);
            Min=min(sa[i],Min);
            if(Max-Min>k) return 1; ///注意这里的>号而不是>=
        }
    }
    return 0;
}

        int l=1,r=n/2;
        while(l<=r){
            int m=(l+r)>>1;
            if(check(m)) ans=max(ans,m),l=m+1;
            else r=m-1;
        }
View Code

 

poj1743:

求某一变化出现多次的子串长度。

前后作差,转化为求最长不可重叠的重复子串长度。

#include<iostream>
#include<cstdio>
#include<cstring>
#include<cstdlib>
#include<algorithm>
#include<vector>
#include<stack>
#include<queue>
#include<set>
#include<map>
#include<string>
#include<math.h>
#include<cctype>
#define ll long long
#define REP(i,a,b) for(int (i)=(a);(i)<=(b);(i)++)
#define REPP(i,a,b,t) for(int (i)=(a);(i)<=(b);(i)+=(t))
#define rep(i,a,b) for(int (i)=(a);(i)>=(b);(i)--)
#define repp(i,a,b,t) for(int (i)=(a);(i)>=(b);(i)-=(t))
#define PII pair<int,int>
#define fst first
#define snd second
#define MP make_pair
#define PB push_back
#define RI(x) scanf("%d",&(x))
#define RII(x,y) scanf("%d%d",&(x),&(y))
#define RIII(x,y,z) scanf("%d%d%d",&(x),&(y),&(z))
#define DRI(x) int (x);scanf("%d",&(x))
#define DRII(x,y) int (x),(y);scanf("%d%d",&(x),&(y))
#define DRIII(x,y,z) int (x),(y),(z);scanf("%d%d%d",&(x),&(y),&(z))
#define RS(x) scanf("%s",x)
#define RSS(x,y) scanf("%s%s",x,y)
#define DRS(x) char x[maxn];scanf("%s",x)
#define DRSS(x,y) char x[maxn],y[maxn];scanf("%s%s",x,y)
#define MS0(a) memset((a),0,sizeof((a)))
#define MS1(a) memset((a),-1,sizeof((a)))
#define MS(a,b) memset((a),(b),sizeof((a)))
#define ALL(v) v.begin(),v.end()
#define SZ(v) (int)(v).size()

using namespace std;

const int maxn=1000100;
const int INF=(1<<29);
const double EPS=0.0000000001;
const double Pi=acos(-1.0);

int a[maxn],n;
int str[maxn];
int sa[maxn],height[maxn],rank[maxn];

int t1[maxn],t2[maxn],c[maxn];
void build_sa(int *s,int n,int m)
{
    int i,j,p,*x=t1,*y=t2;
    REP(i,0,m-1) c[i]=0;
    REP(i,0,n-1) c[x[i]=s[i]]++;
    REP(i,1,m-1) c[i]+=c[i-1];
    rep(i,n-1,0) sa[--c[x[i]]]=i;
    for(j=1;j<=n;j<<=1){
        p=0;
        REP(i,n-j,n-1) y[p++]=i;
        REP(i,0,n-1) if(sa[i]>=j) y[p++]=sa[i]-j;
        REP(i,0,m-1) c[i]=0;
        REP(i,0,n-1) c[x[y[i]]]++;
        REP(i,1,m-1) c[i]+=c[i-1];
        rep(i,n-1,0) sa[--c[x[y[i]]]]=y[i];
        swap(x,y);
        p=1;x[sa[0]]=0;
        REP(i,1,n-1) x[sa[i]]=y[sa[i-1]]==y[sa[i]]&&y[sa[i-1]+j]==y[sa[i]+j]?p-1:p++;
        if(p>=n) break;
        m=p;
    }
}

void getHeight(int *s,int n)
{
    int i,j,k=0;
    REP(i,0,n) rank[sa[i]]=i;
    REP(i,0,n-1){
        if(k) k--;
        j=sa[rank[i]-1];
        while(s[i+k]==s[j+k]) k++;
        height[rank[i]]=k;
    }
}

bool check(int k)
{
    int Max=sa[1],Min=sa[1];
    REP(i,2,n){
        if(height[i]<k) Max=Min=sa[i];
        else{
            Max=max(sa[i],Max);
            Min=min(sa[i],Min);
            if(Max-Min>k) return 1;
        }
    }
    return 0;
}

int main()
{
    freopen("in.txt","r",stdin);
    while(cin>>n,n){
        REP(i,1,n) RI(a[i]);
        REP(i,2,n) str[i-2]=a[i]-a[i-1]+100;
        n--;
        str[n]=0;
        build_sa(str,n+1,200);
        getHeight(str,n);
        int ans=0;
        int l=1,r=n/2;
        while(l<=r){
            int m=(l+r)>>1;
            if(check(m)) ans=max(ans,m),l=m+1;
            else r=m-1;
        }
        if(ans+1>=5) cout<<ans+1<<endl;
        else cout<<0<<endl;
    }
    return 0;
}
View Code

 poj3261:

求至少出现k次的子串,可重叠。

二分查找长度,看长度是否符合条件即出现k次或以上。对height分组,是否有一组区间长度>=k-1(k-1个height对应k个后缀)。

#include<iostream>
#include<cstdio>
#include<cstring>
#include<cstdlib>
#include<algorithm>
#include<vector>
#include<stack>
#include<queue>
#include<set>
#include<map>
#include<string>
#include<math.h>
#include<cctype>
#define ll long long
#define REP(i,a,b) for(int (i)=(a);(i)<=(b);(i)++)
#define REPP(i,a,b,t) for(int (i)=(a);(i)<=(b);(i)+=(t))
#define rep(i,a,b) for(int (i)=(a);(i)>=(b);(i)--)
#define repp(i,a,b,t) for(int (i)=(a);(i)>=(b);(i)-=(t))
#define PII pair<int,int>
#define fst first
#define snd second
#define MP make_pair
#define PB push_back
#define RI(x) scanf("%d",&(x))
#define RII(x,y) scanf("%d%d",&(x),&(y))
#define RIII(x,y,z) scanf("%d%d%d",&(x),&(y),&(z))
#define DRI(x) int (x);scanf("%d",&(x))
#define DRII(x,y) int (x),(y);scanf("%d%d",&(x),&(y))
#define DRIII(x,y,z) int (x),(y),(z);scanf("%d%d%d",&(x),&(y),&(z))
#define RS(x) scanf("%s",x)
#define RSS(x,y) scanf("%s%s",x,y)
#define DRS(x) char x[maxn];scanf("%s",x)
#define DRSS(x,y) char x[maxn],y[maxn];scanf("%s%s",x,y)
#define MS0(a) memset((a),0,sizeof((a)))
#define MS1(a) memset((a),-1,sizeof((a)))
#define MS(a,b) memset((a),(b),sizeof((a)))
#define ALL(v) v.begin(),v.end()
#define SZ(v) (int)(v).size()

using namespace std;

const int maxn=1000100;
const int INF=(1<<29);
const double EPS=0.0000000001;
const double Pi=acos(-1.0);

int n,k;
int a[maxn];
int sa[maxn],height[maxn],rank[maxn];
int t1[maxn],t2[maxn],c[maxn];

void build_sa(int *s,int n,int m)
{
    int i,j,p,*x=t1,*y=t2;
    REP(i,0,m-1) c[i]=0;
    REP(i,0,n-1) c[x[i]=s[i]]++;
    REP(i,1,m-1) c[i]+=c[i-1];
    rep(i,n-1,0) sa[--c[x[i]]]=i;
    for(j=1;j<=n;j<<=1){
        p=0;
        REP(i,n-j,n-1) y[p++]=i;
        REP(i,0,n-1) if(sa[i]>=j) y[p++]=sa[i]-j;
        REP(i,0,m-1) c[i]=0;
        REP(i,0,n-1) c[x[y[i]]]++;
        REP(i,1,m-1) c[i]+=c[i-1];
        rep(i,n-1,0) sa[--c[x[y[i]]]]=y[i];
        swap(x,y);
        p=1;x[sa[0]]=0;
        REP(i,1,n-1) x[sa[i]]=y[sa[i-1]]==y[sa[i]]&&y[sa[i-1]+j]==y[sa[i]+j]?p-1:p++;
        if(p>=n) break;
        m=p;
    }
}

void getHeight(int *s,int n)
{
    int i,j,k=0;
    REP(i,0,n) rank[sa[i]]=i;
    REP(i,0,n-1){
        if(k) k--;
        j=sa[rank[i]-1];
        while(s[i+k]==s[j+k]) k++;
        height[rank[i]]=k;
    }
}

bool check(int len)
{
    int cnt=0;
    REP(i,2,n){
        if(height[i]>=len) cnt++;
        else cnt=0;
        if(cnt+1>=k) return 1;
    }
    return 0;
}

int main()
{
    freopen("in.txt","r",stdin);
    while(cin>>n>>k){
        REP(i,0,n-1) RI(a[i]);
        a[n]=0;
        build_sa(a,n+1,1000010);
        getHeight(a,n);
        //REP(i,2,n) cout<<height[i]<<" ";cout<<endl;
        int ans=0;
        int l=1,r=n;
        while(l<=r){
            int m=(l+r)>>1;
            if(check(m)) ans=max(m,ans),l=m+1;
            else r=m-1;
        }
        cout<<ans<<endl;
    }
    return 0;
}
View Code

SPOJ DISUBSTR && SPOJ SUBST1:

 求一个串中的所有不同的子串个数。

子串就是后缀的前缀,height数组就是LCP,后面的就是不同的了,因此只要求所有的n-sa-height的和就行了。详情见上面某神论文。

#include<iostream>
#include<cstdio>
#include<cstring>
#include<cstdlib>
#include<algorithm>
#include<vector>
#include<stack>
#include<queue>
#include<set>
#include<map>
#include<string>
#include<math.h>
#include<cctype>
#define ll long long
#define REP(i,a,b) for(int (i)=(a);(i)<=(b);(i)++)
#define REPP(i,a,b,t) for(int (i)=(a);(i)<=(b);(i)+=(t))
#define rep(i,a,b) for(int (i)=(a);(i)>=(b);(i)--)
#define repp(i,a,b,t) for(int (i)=(a);(i)>=(b);(i)-=(t))
#define PII pair<int,int>
#define fst first
#define snd second
#define MP make_pair
#define PB push_back
#define RI(x) scanf("%d",&(x))
#define RII(x,y) scanf("%d%d",&(x),&(y))
#define RIII(x,y,z) scanf("%d%d%d",&(x),&(y),&(z))
#define DRI(x) int (x);scanf("%d",&(x))
#define DRII(x,y) int (x),(y);scanf("%d%d",&(x),&(y))
#define DRIII(x,y,z) int (x),(y),(z);scanf("%d%d%d",&(x),&(y),&(z))
#define RS(x) scanf("%s",x)
#define RSS(x,y) scanf("%s%s",x,y)
#define DRS(x) char x[maxn];scanf("%s",x)
#define DRSS(x,y) char x[maxn],y[maxn];scanf("%s%s",x,y)
#define MS0(a) memset((a),0,sizeof((a)))
#define MS1(a) memset((a),-1,sizeof((a)))
#define MS(a,b) memset((a),(b),sizeof((a)))
#define ALL(v) v.begin(),v.end()
#define SZ(v) (int)(v).size()

using namespace std;

const int maxn=1000100;
const int INF=(1<<29);
const double EPS=0.0000000001;
const double Pi=acos(-1.0);

char s[maxn];
int a[maxn],n;
int sa[maxn],height[maxn],rank[maxn];
int t1[maxn],t2[maxn],c[maxn];

void build_sa(int *s,int n,int m)
{
    int i,j,p,*x=t1,*y=t2;
    REP(i,0,m-1) c[i]=0;
    REP(i,0,n-1) c[x[i]=s[i]]++;
    REP(i,1,m-1) c[i]+=c[i-1];
    rep(i,n-1,0) sa[--c[x[i]]]=i;
    for(j=1;j<=n;j<<=1){
        p=0;
        REP(i,n-j,n-1) y[p++]=i;
        REP(i,0,n-1) if(sa[i]>=j) y[p++]=sa[i]-j;
        REP(i,0,m-1) c[i]=0;
        REP(i,0,n-1) c[x[y[i]]]++;
        REP(i,1,m-1) c[i]+=c[i-1];
        rep(i,n-1,0) sa[--c[x[y[i]]]]=y[i];
        swap(x,y);
        p=1;x[sa[0]]=0;
        REP(i,1,n-1) x[sa[i]]=y[sa[i-1]]==y[sa[i]]&&y[sa[i-1]+j]==y[sa[i]+j]?p-1:p++;
        if(p>=n) break;
        m=p;
    }
}

void getHeight(int *s,int n)
{
    int i,j,k=0;
    REP(i,0,n) rank[sa[i]]=i;
    REP(i,0,n-1){
        if(k) k--;
        j=sa[rank[i]-1];
        while(s[i+k]==s[j+k]) k++;
        height[rank[i]]=k;
    }
}

int main()
{
    freopen("in.txt","r",stdin);
    DRI(T);
    while(T--){
        scanf("%s",s);
        n=strlen(s);
        REP(i,0,n) a[i]=s[i];
        build_sa(a,n+1,300);
        getHeight(a,n);
        ll ans=0;
        REP(i,1,n) ans+=n-sa[i]-height[i];
        cout<<ans<<endl;
    }
    return 0;
}
View Code

 poj2406:

找字符串的循环节。

kmp水过。。。

#include<iostream>
#include<cstdio>
#include<cstring>
#include<cstdlib>
#include<algorithm>
#include<vector>
#include<stack>
#include<queue>
#include<set>
#include<map>
#include<string>
#include<math.h>
#include<cctype>
#define ll long long
#define REP(i,a,b) for(int (i)=(a);(i)<=(b);(i)++)
#define REPP(i,a,b,t) for(int (i)=(a);(i)<=(b);(i)+=(t))
#define rep(i,a,b) for(int (i)=(a);(i)>=(b);(i)--)
#define repp(i,a,b,t) for(int (i)=(a);(i)>=(b);(i)-=(t))
#define PII pair<int,int>
#define fst first
#define snd second
#define MP make_pair
#define PB push_back
#define RI(x) scanf("%d",&(x))
#define RII(x,y) scanf("%d%d",&(x),&(y))
#define RIII(x,y,z) scanf("%d%d%d",&(x),&(y),&(z))
#define DRI(x) int (x);scanf("%d",&(x))
#define DRII(x,y) int (x),(y);scanf("%d%d",&(x),&(y))
#define DRIII(x,y,z) int (x),(y),(z);scanf("%d%d%d",&(x),&(y),&(z))
#define RS(x) scanf("%s",x)
#define RSS(x,y) scanf("%s%s",x,y)
#define DRS(x) char x[maxn];scanf("%s",x)
#define DRSS(x,y) char x[maxn],y[maxn];scanf("%s%s",x,y)
#define MS0(a) memset((a),0,sizeof((a)))
#define MS1(a) memset((a),-1,sizeof((a)))
#define MS(a,b) memset((a),(b),sizeof((a)))
#define ALL(v) v.begin(),v.end()
#define SZ(v) (int)(v).size()

using namespace std;

const int maxn=1000100;
const int INF=(1<<29);
const double EPS=0.0000000001;
const double Pi=acos(-1.0);

char s[maxn];
int Next[maxn];

void getNext(char *s,int n)
{
    Next[0]=-1;
    int i=0,j=-1;
    while(i<n&&j<n){
        if(j==-1||s[i]==s[j]) Next[++i]=++j;
        else j=Next[j];
    }
}

int main()
{
    freopen("in.txt","r",stdin);
    while(~RS(s)){
        if(strcmp(s,".")==0) break;
        int n=strlen(s);
        getNext(s,n);
        if(n%(n-Next[n])==0) cout<<n/(n-Next[n])<<endl;
        else puts("1");
    }
    return 0;
}
View Code

 后缀数组解法,很简单,枚举长度k,如果能构成循环,则LCP(suf(0),suf(k))==n-k。由于suf(0)是固定的,所以不需要用RMQ,预处理一下就行了。

然而倍增的nlogn预处理被卡了。。。

#include<iostream>
#include<cstdio>
#include<cstring>
#include<cstdlib>
#include<algorithm>
#include<vector>
#include<stack>
#include<queue>
#include<set>
#include<map>
#include<string>
#include<math.h>
#include<cctype>
#define ll long long
#define REP(i,a,b) for(int (i)=(a);(i)<=(b);(i)++)
#define REPP(i,a,b,t) for(int (i)=(a);(i)<=(b);(i)+=(t))
#define rep(i,a,b) for(int (i)=(a);(i)>=(b);(i)--)
#define repp(i,a,b,t) for(int (i)=(a);(i)>=(b);(i)-=(t))
#define PII pair<int,int>
#define fst first
#define snd second
#define MP make_pair
#define PB push_back
#define RI(x) scanf("%d",&(x))
#define RII(x,y) scanf("%d%d",&(x),&(y))
#define RIII(x,y,z) scanf("%d%d%d",&(x),&(y),&(z))
#define DRI(x) int (x);scanf("%d",&(x))
#define DRII(x,y) int (x),(y);scanf("%d%d",&(x),&(y))
#define DRIII(x,y,z) int (x),(y),(z);scanf("%d%d%d",&(x),&(y),&(z))
#define RS(x) scanf("%s",x)
#define RSS(x,y) scanf("%s%s",x,y)
#define DRS(x) char x[maxn];scanf("%s",x)
#define DRSS(x,y) char x[maxn],y[maxn];scanf("%s%s",x,y)
#define MS0(a) memset((a),0,sizeof((a)))
#define MS1(a) memset((a),-1,sizeof((a)))
#define MS(a,b) memset((a),(b),sizeof((a)))
#define ALL(v) v.begin(),v.end()
#define SZ(v) (int)(v).size()

using namespace std;

const int maxn=1000100;
const int INF=(1<<29);
const double EPS=0.0000000001;
const double Pi=acos(-1.0);

char s[maxn];
int str[maxn],n;
int sa[maxn],height[maxn],rank[maxn];
int t1[maxn],t2[maxn],c[maxn];
int LCP[maxn];

void build_sa(int *s,int n,int m)
{
    int i,j,p,*x=t1,*y=t2;
    REP(i,0,m-1) c[i]=0;
    REP(i,0,n-1) c[x[i]=s[i]]++;
    REP(i,1,m-1) c[i]+=c[i-1];
    rep(i,n-1,0) sa[--c[x[i]]]=i;
    for(j=1;j<=n;j<<=1){
        p=0;  
        REP(i,n-j,n-1) y[p++]=i;
        REP(i,0,n-1) if(sa[i]>=j) y[p++]=sa[i]-j;
        REP(i,0,m-1) c[i]=0;
        REP(i,0,n-1) c[x[y[i]]]++;
        REP(i,1,m-1) c[i]+=c[i-1];
        rep(i,n-1,0) sa[--c[x[y[i]]]]=y[i];
        swap(x,y);
        p=1;x[sa[0]]=0;
        p=1;x[sa[0]]=0;
        REP(i,1,n-1)
            x[sa[i]]=y[sa[i-1]]==y[sa[i]]&&y[sa[i-1]+j]==y[sa[i]+j]?p-1:p++;
        if(p>=n) break;
        m=p;
    }
}

void getHeight(int *s,int n)
{
    int i,j,k=0;
    REP(i,0,n) rank[sa[i]]=i;
    REP(i,0,n-1){
        if(k) k--;
        j=sa[rank[i]-1];
        while(s[i+k]==s[j+k]) k++;
        height[rank[i]]=k;
    }
}
 
int main()
{
    freopen("in.txt","r",stdin);
    while(~RS(s)){
        if(strcmp(s,".")==0) return 0;
        n=strlen(s);
        REP(i,0,n) str[i]=s[i];
        build_sa(str,n+1,300);
        getHeight(str,n);
        int Min=height[rank[0]+1];
        REP(i,rank[0]+1,n){
            LCP[i]=Min=min(height[i],Min);
        }
        Min=height[rank[0]];
        rep(i,rank[0],2){
            LCP[i-1]=Min=min(height[i],Min);
        }
        int ans=1;
        REP(k,1,n/2){
            if(n%k==0){
                if(LCP[rank[k]]==n-k){
                    ans=n/k;break;
                }
            }
        }
        cout<<ans<<endl;
    }
}
View Code

 SPOJ REPEATS :

求字符串中连续出现次数最多的串的次数。

枚举长度,详解见上面的某神论文。

后缀数组+RMQ查询。不会ST,怒建一颗线段树!!!1890ms线段树暴力水过。。

#include<iostream>
#include<cstdio>
#include<cstring>
#include<cstdlib>
#include<algorithm>
#include<vector>
#include<stack>
#include<queue>
#include<set>
#include<map>
#include<string>
#include<math.h>
#include<cctype>
#define ll long long
#define REP(i,a,b) for(int (i)=(a);(i)<=(b);(i)++)
#define REPP(i,a,b,t) for(int (i)=(a);(i)<=(b);(i)+=(t))
#define rep(i,a,b) for(int (i)=(a);(i)>=(b);(i)--)
#define repp(i,a,b,t) for(int (i)=(a);(i)>=(b);(i)-=(t))
#define PII pair<int,int>
#define fst first
#define snd second
#define MP make_pair
#define PB push_back
#define RI(x) scanf("%d",&(x))
#define RII(x,y) scanf("%d%d",&(x),&(y))
#define RIII(x,y,z) scanf("%d%d%d",&(x),&(y),&(z))
#define DRI(x) int (x);scanf("%d",&(x))
#define DRII(x,y) int (x),(y);scanf("%d%d",&(x),&(y))
#define DRIII(x,y,z) int (x),(y),(z);scanf("%d%d%d",&(x),&(y),&(z))
#define RS(x) scanf("%s",x)
#define RSS(x,y) scanf("%s%s",x,y)
#define DRS(x) char x[maxn];scanf("%s",x)
#define DRSS(x,y) char x[maxn],y[maxn];scanf("%s%s",x,y)
#define MS0(a) memset((a),0,sizeof((a)))
#define MS1(a) memset((a),-1,sizeof((a)))
#define MS(a,b) memset((a),(b),sizeof((a)))
#define ALL(v) v.begin(),v.end()
#define SZ(v) (int)(v).size()

using namespace std;

const int maxn=1000100;
const int INF=(1<<29);
const double EPS=0.0000000001;
const double Pi=acos(-1.0);

char s[maxn];
int str[maxn],n;
int sa[maxn],height[maxn],rank[maxn];
int t1[maxn],t2[maxn],c[maxn];

void build_sa(int *s,int n,int m)
{
    int i,j,p,*x=t1,*y=t2;
    REP(i,0,m-1) c[i]=0;
    REP(i,0,n-1) c[x[i]=s[i]]++;
    REP(i,1,m-1) c[i]+=c[i-1];
    rep(i,n-1,0) sa[--c[x[i]]]=i;
    for(j=1;j<=n;j<<=1){
        p=0;
        REP(i,n-j,n-1) y[p++]=i;
        REP(i,0,n-1) if(sa[i]>=j) y[p++]=sa[i]-j;
        REP(i,0,m-1) c[i]=0;
        REP(i,0,n-1) c[x[y[i]]]++;
        REP(i,1,m-1) c[i]+=c[i-1];
        rep(i,n-1,0) sa[--c[x[y[i]]]]=y[i];
        swap(x,y);
        p=1;x[sa[0]]=0;
        REP(i,1,n-1) x[sa[i]]=y[sa[i-1]]==y[sa[i]]&&y[sa[i-1]+j]==y[sa[i]+j]?p-1:p++;
        if(p>=n) break;
        m=p;
    }
}

void getHeight(int *s,int n)
{
    int i,j,k=0;
    REP(i,0,n) rank[sa[i]]=i;
    REP(i,0,n-1){
        if(k) k--;
        j=sa[rank[i]-1];
        while(s[i+k]==s[j+k]) k++;
        height[rank[i]]=k;
    }
}

int Min[maxn<<2];
#define lson l,m,rt<<1
#define rson m+1,r,rt<<1|1

void push_up(int rt)
{
    Min[rt]=min(Min[rt<<1],Min[rt<<1|1]);
}

void build(int l,int r,int rt)
{
    if(l==r){
        Min[rt]=height[l];
        return;
    }
    int m=(l+r)>>1;
    build(lson);
    build(rson);
    push_up(rt);
}

int query(int L,int R,int l,int r,int rt)
{
    if(L<=l&&r<=R){
        return Min[rt];
    }
    int m=(l+r)>>1;
    int res=INF;
    if(L<=m) res=min(res,query(L,R,lson));
    if(R>m) res=min(res,query(L,R,rson));
    return res;
}

int lcp(int a,int b)
{
    a=rank[a];b=rank[b];
    //cout<<a<<" "<<b<<endl;
    if(a>b) swap(a,b);
    return query(a+1,b,1,n,1);
}

int main()
{
   // freopen("in.txt","r",stdin);
    DRI(T);
    while(T--){
        RI(n);
        REP(i,0,n-1) cin>>s[i];
        REP(i,0,n-1) str[i]=s[i];
        str[n]=0;
        build_sa(str,n+1,300);
        getHeight(str,n);
        build(1,n,1);
        int ans=1;
        REP(L,1,n){
            for(int i=0;i+L<n;i+=L){
                int K=0;
                if(str[i]==str[i+L]){
                    K=lcp(i,i+L);
                }
                int cnt=K/L+1;
                if(K%L){
                    int t=i-(L-K%L);
                    //cout<<"t="<<t<<endl;
                    if(t>=0&&lcp(t,t+L)>=K) cnt++;
                }
                //if(cnt>ans) cout<<"L="<<L<<endl;
                ans=max(ans,cnt);
            }
        }
        cout<<ans<<endl;
    }
    return 0;
}
View Code

 抄了个ST的模板,330ms,果然快了很多

#include<iostream>
#include<cstdio>
#include<cstring>
#include<cstdlib>
#include<algorithm>
#include<vector>
#include<stack>
#include<queue>
#include<set>
#include<map>
#include<string>
#include<math.h>
#include<cctype>
#define ll long long
#define REP(i,a,b) for(int (i)=(a);(i)<=(b);(i)++)
#define REPP(i,a,b,t) for(int (i)=(a);(i)<=(b);(i)+=(t))
#define rep(i,a,b) for(int (i)=(a);(i)>=(b);(i)--)
#define repp(i,a,b,t) for(int (i)=(a);(i)>=(b);(i)-=(t))
#define PII pair<int,int>
#define fst first
#define snd second
#define MP make_pair
#define PB push_back
#define RI(x) scanf("%d",&(x))
#define RII(x,y) scanf("%d%d",&(x),&(y))
#define RIII(x,y,z) scanf("%d%d%d",&(x),&(y),&(z))
#define DRI(x) int (x);scanf("%d",&(x))
#define DRII(x,y) int (x),(y);scanf("%d%d",&(x),&(y))
#define DRIII(x,y,z) int (x),(y),(z);scanf("%d%d%d",&(x),&(y),&(z))
#define RS(x) scanf("%s",x)
#define RSS(x,y) scanf("%s%s",x,y)
#define DRS(x) char x[maxn];scanf("%s",x)
#define DRSS(x,y) char x[maxn],y[maxn];scanf("%s%s",x,y)
#define MS0(a) memset((a),0,sizeof((a)))
#define MS1(a) memset((a),-1,sizeof((a)))
#define MS(a,b) memset((a),(b),sizeof((a)))
#define ALL(v) v.begin(),v.end()
#define SZ(v) (int)(v).size()

using namespace std;

const int maxn=1000100;
const int INF=(1<<29);
const double EPS=0.0000000001;
const double Pi=acos(-1.0);

char s[maxn];
int str[maxn],n;
int sa[maxn],height[maxn],rank[maxn];
int t1[maxn],t2[maxn],c[maxn];

void build_sa(int *s,int n,int m)
{
    int i,j,p,*x=t1,*y=t2;
    REP(i,0,m-1) c[i]=0;
    REP(i,0,n-1) c[x[i]=s[i]]++;
    REP(i,1,m-1) c[i]+=c[i-1];
    rep(i,n-1,0) sa[--c[x[i]]]=i;
    for(j=1;j<=n;j<<=1){
        p=0;
        REP(i,n-j,n-1) y[p++]=i;
        REP(i,0,n-1) if(sa[i]>=j) y[p++]=sa[i]-j;
        REP(i,0,m-1) c[i]=0;
        REP(i,0,n-1) c[x[y[i]]]++;
        REP(i,1,m-1) c[i]+=c[i-1];
        rep(i,n-1,0) sa[--c[x[y[i]]]]=y[i];
        swap(x,y);
        p=1;x[sa[0]]=0;
        REP(i,1,n-1) x[sa[i]]=y[sa[i-1]]==y[sa[i]]&&y[sa[i-1]+j]==y[sa[i]+j]?p-1:p++;
        if(p>=n) break;
        m=p;
    }
}

void getHeight(int *s,int n)
{
    int i,j,k=0;
    REP(i,0,n) rank[sa[i]]=i;
    REP(i,0,n-1){
        if(k) k--;
        j=sa[rank[i]-1];
        while(s[i+k]==s[j+k]) k++;
        height[rank[i]]=k;
    }
}

int dp[maxn][20];
int mm[maxn];

void initRMQ(int n,int *b)
{
    mm[0]=-1;
    REP(i,1,n){
        mm[i]=((i&(i-1))==0)?mm[i-1]+1:mm[i-1];
        dp[i][0]=b[i];
    }
    REP(j,1,mm[n]){
        for(int i=1;i+(1<<j)-1<=n;i++){
            dp[i][j]=min(dp[i][j-1],dp[i+(1<<(j-1))][j-1]);
        }
    }
}

int rmq(int x,int y)
{
    int k=mm[y-x+1];
    return min(dp[x][k],dp[y-(1<<k)+1][k]);
}

int lcp(int a,int b)
{
    a=rank[a];b=rank[b];
    if(a>b) swap(a,b);
    return rmq(a+1,b);
}

int main()
{
   // freopen("in.txt","r",stdin);
    DRI(T);
    while(T--){
        RI(n);
        REP(i,0,n-1) cin>>s[i];
        REP(i,0,n-1) str[i]=s[i];
        str[n]=0;
        build_sa(str,n+1,300);
        getHeight(str,n);
        initRMQ(n,height);
        int ans=1;
        REP(L,1,n){
            for(int i=0;i+L<n;i+=L){
                int K=0;
                if(str[i]==str[i+L]){
                    K=lcp(i,i+L);
                }
                int cnt=K/L+1;
                if(K%L){
                    int t=i-(L-K%L);
                    if(t>=0&&lcp(t,t+L)>=K) cnt++;
                }
                ans=max(ans,cnt);
            }
        }
        cout<<ans<<endl;
    }
    return 0;
}
View Code

 poj3693:

求字符串中具有循环节且循环次数最多的子串,输出字典序最小的。

找循环次数最多的方法和上面一样,关键是找字典序最小的,而字典序首先应该想到sa数组,因为sa数组就是后缀的字典序排名。方法是找到循环次数最大的所有可能的循环节长度,然后在sa数组中枚举lcp(sa[i],sa[i]+L)是否大于循环节*(循环次数-1),枚举到的第一个符合条件的一定是字典序最小的。

#include<iostream>
#include<cstdio>
#include<cstring>
#include<cstdlib>
#include<algorithm>
#include<set>
#define REP(i,a,b) for(int i=a;i<=b;i++)
#define rep(i,a,b) for(int i=a;i>=b;i--)
#define MS0(a) memset(a,0,sizeof(a))

using namespace std;

const int maxn=1000100;

char s[maxn];
int str[maxn],n;
int sa[maxn],height[maxn],rank[maxn];
int t1[maxn],t2[maxn],c[maxn];

bool cmp(int *r,int a,int b,int l)
{
    return r[a]==r[b] && r[a+l]==r[b+l];
}

void build_sa(int *s,int n,int m)
{
    int i,j,p,*x=t1,*y=t2;
    REP(i,0,m-1) c[i]=0;
    REP(i,0,n-1) c[x[i]=s[i]]++;
    REP(i,1,m-1) c[i]+=c[i-1];
    rep(i,n-1,0) sa[--c[x[i]]]=i;
    for(j=1;j<=n;j<<=1){
        p=0;
        REP(i,n-j,n-1) y[p++]=i;
        REP(i,0,n-1) if(sa[i]>=j) y[p++]=sa[i]-j;
        REP(i,0,m-1) c[i]=0;
        REP(i,0,n-1) c[x[y[i]]]++;
        REP(i,1,m-1) c[i]+=c[i-1];
        rep(i,n-1,0) sa[--c[x[y[i]]]]=y[i];
        swap(x,y);
        p=1;x[sa[0]]=0;
        REP(i,1,n-1)
            x[sa[i]]=cmp(y,sa[i-1],sa[i],j)?p-1:p++;
        if(p>=n) break;
        m=p;
    }
}

void getHeight(int *s,int n)
{
    int i,j,k=0;
    REP(i,0,n) rank[sa[i]]=i;
    REP(i,0,n-1){
        if(k) k--;
        j=sa[rank[i]-1];
        while(s[i+k]==s[j+k]) k++;
        height[rank[i]]=k;
    }
}

int dp[maxn][20];
int mm[maxn];
void initRMQ(int n,int *b)
{
    mm[0]=-1;
    REP(i,1,n){
        mm[i]=((i&(i-1))==0)?mm[i-1]+1:mm[i-1];
        dp[i][0]=b[i];
    }
    REP(j,1,mm[n]){
        for(int i=1;i+(1<<j)-1<=n;i++)
            dp[i][j]=min(dp[i][j-1],dp[i+(1<<(j-1))][j-1]);
    }
}
int rmq(int x,int y)
{
    int k=mm[y-x+1];
    return min(dp[x][k],dp[y-(1<<k)+1][k]);
}

int lcp(int a,int b)
{
    a=rank[a];b=rank[b];
    if(a>b) swap(a,b);
    return rmq(a+1,b);
}

int main()
{
    freopen("in.txt","r",stdin);
    int casen=1;
    while(~scanf("%s",s)&&strcmp(s,"#")){
        n=strlen(s);
        REP(i,0,n) str[i]=s[i];
        build_sa(str,n+1,300);
        getHeight(str,n);
        initRMQ(n,height);
        int maxcnt=1;
        set<int> maxL;
        REP(L,1,n){
            for(int i=0;i+L<n;i+=L){
                int cnt=0,K=0;
                if(str[i]==str[i+L]){
                    K=lcp(i,i+L);
                    cnt=K/L+1;
                    if(K%L&&i-(L-K%L)>=0){
                        if(lcp(i-(L-K%L),i-(L-K%L)+L)>K) cnt++;
                    }
                    if(cnt>=maxcnt){
                        maxcnt=cnt;
                        maxL.insert(L);
                    }
                }
            }
        }
        printf("Case %d: ",casen++);
        if(maxcnt==1){
            char ans='z';
            REP(i,0,n-1){
                if(s[i]<ans) ans=s[i];
            }
            cout<<ans<<endl;
        }
        else{
            bool flag=0;
            string ans="";
            for(int i=1;i<=n;i++){
                for(set<int>::iterator it=maxL.begin();it!=maxL.end();++it){
                    int L=*it;
                    if(lcp(sa[i],sa[i]+L)>=L*maxcnt-L){
                        REP(k,sa[i],sa[i]+L*maxcnt-1) ans+=s[k];
                        flag=1;break;
                    }
                }
                if(flag) break;
            }
            cout<<ans<<endl;
        }
    }
    return 0;
}
View Code

 poj2774:

求两个串的最长连续公共子串。

把两个串接在一起,中间加个'}',合成新串,再处理,本来加'#'的,居然WA了。。

一开始的思路是对长度进行二分,复杂度nlogn,因为如果直接找height数组的话不一定有两个排名相邻的串是原来在两个串中的,然而完全没有必要,因为一定至少存在排名相邻的两个串在不同的原串中,因此只要扫一遍height数组就可以了,复杂度n。

二分解法:

#include<iostream>
#include<cstdio>
#include<cstdlib>
#include<cstring>
#include<algorithm>
#define REP(i,a,b) for(int i=a;i<=b;i++)
#define rep(i,a,b) for(int i=a;i>=b;i--)
#define MS0(a) memset(a,0,sizeof(a))

using namespace std;

const int maxn=1000100;

char s[maxn],t[maxn];
int str[maxn],n;
int sa[maxn],height[maxn],rank[maxn];
int t1[maxn],t2[maxn],c[maxn];
int ls,lt;

bool cmp(int *r,int a,int b,int l)
{
    return r[a]==r[b]&&r[a+l]==r[b+l];
}

void build_sa(int *s,int n,int m)
{
    int i,j,p,*x=t1,*y=t2;
    REP(i,0,m-1) c[i]=0;
    REP(i,0,n-1) c[x[i]=s[i]]++;
    REP(i,1,m-1) c[i]+=c[i-1];
    rep(i,n-1,0) sa[--c[x[i]]]=i;
    for(j=1;j<=n;j<<=1){
        p=0;
        REP(i,n-j,n-1) y[p++]=i;
        REP(i,0,n-1) if(sa[i]>=j) y[p++]=sa[i]-j;
        REP(i,0,m-1) c[i]=0;
        REP(i,0,n-1) c[x[y[i]]]++;
        REP(i,1,m-1) c[i]+=c[i-1];
        rep(i,n-1,0) sa[--c[x[y[i]]]]=y[i];
        swap(x,y);
        p=1;x[sa[0]]=0;
        REP(i,1,n-1) x[sa[i]]=cmp(y,sa[i-1],sa[i],j)?p-1:p++;
        if(p>=n) break;
        m=p;
    }
}

void getHeight(int *s,int n)
{
    int i,j,k=0;
    REP(i,0,n) rank[sa[i]]=i;
    REP(i,0,n-1){
        if(k) k--;
        j=sa[rank[i]-1];
        while(s[i+k]==s[j+k]) k++;
        height[rank[i]]=k;
    }
}

bool check(int k)
{
    int Max=sa[1],Min=sa[1];
    REP(i,2,n){
        if(height[i]>=k){
            Max=max(sa[i],Max);
            Min=min(sa[i],Min);
        }
        else{
            if(Max>ls&&Min<ls) return 1;
            Max=sa[i];
            Min=sa[i];
        }
    }
    return 0;
}

int main()
{
    freopen("in.txt","r",stdin);
    while(~scanf("%s%s",s,t)){
        ls=strlen(s);
        lt=strlen(t);
        s[ls]='}';s[ls+1]='\0';
        strcat(s,t);
        n=strlen(s);
        REP(i,0,n) str[i]=s[i];
        build_sa(str,n+1,300);
        getHeight(str,n);
        int l=1,r=n/2,ans=0;
        while(l<r){
            int m=(l+r)>>1;
            if(check(m)) l=m+1,ans=max(m,ans);
            else r=m;
        }
        cout<<ans<<endl;
    }
    return 0;
}
View Code

o(n)正解:、

#include<iostream>
#include<cstdio>
#include<cstdlib>
#include<cstring>
#include<algorithm>
#define REP(i,a,b) for(int i=a;i<=b;i++)
#define rep(i,a,b) for(int i=a;i>=b;i--)
#define MS0(a) memset(a,0,sizeof(a))

using namespace std;

const int maxn=1000100;

char s[maxn],t[maxn];
int str[maxn],n;
int sa[maxn],height[maxn],rank[maxn];
int t1[maxn],t2[maxn],c[maxn];
int ls,lt;

bool cmp(int *r,int a,int b,int l)
{
    return r[a]==r[b]&&r[a+l]==r[b+l];
}

void build_sa(int *s,int n,int m)
{
    int i,j,p,*x=t1,*y=t2;
    REP(i,0,m-1) c[i]=0;
    REP(i,0,n-1) c[x[i]=s[i]]++;
    REP(i,1,m-1) c[i]+=c[i-1];
    rep(i,n-1,0) sa[--c[x[i]]]=i;
    for(j=1;j<=n;j<<=1){
        p=0;
        REP(i,n-j,n-1) y[p++]=i;
        REP(i,0,n-1) if(sa[i]>=j) y[p++]=sa[i]-j;
        REP(i,0,m-1) c[i]=0;
        REP(i,0,n-1) c[x[y[i]]]++;
        REP(i,1,m-1) c[i]+=c[i-1];
        rep(i,n-1,0) sa[--c[x[y[i]]]]=y[i];
        swap(x,y);
        p=1;x[sa[0]]=0;
        REP(i,1,n-1) x[sa[i]]=cmp(y,sa[i-1],sa[i],j)?p-1:p++;
        if(p>=n) break;
        m=p;
    }
}

void getHeight(int *s,int n)
{
    int i,j,k=0;
    REP(i,0,n) rank[sa[i]]=i;
    REP(i,0,n-1){
        if(k) k--;
        j=sa[rank[i]-1];
        while(s[i+k]==s[j+k]) k++;
        height[rank[i]]=k;
    }
}

int main()
{
    freopen("in.txt","r",stdin);
    while(~scanf("%s%s",s,t)){
        ls=strlen(s);
        lt=strlen(t);
        s[ls]='}';s[ls+1]='\0';
        strcat(s,t);
        n=strlen(s);
        REP(i,0,n) str[i]=s[i];
        build_sa(str,n+1,300);
        getHeight(str,n);
        int ans=0;
        REP(i,2,n){
            if(height[i]>ans){
                int L=sa[i],R=sa[i-1];
                if(L>R) swap(L,R);
                if(L<ls&&R>ls){
                    ans=height[i];
                }
            }
        }
        cout<<ans<<endl;
    }
    return 0;
}
View Code

 poj3294:

给N个同样长度的字符串,求在N/2以上各字符串中出现的最长子串。若有多组解,按字典序输出。

二分求出最大长度,判断长度是否合法。判断合法的方法是,给height按长度分组,同组内的次数大于N/2,说明长度合法。

接着扫一遍height数组,输出答案,由于height数组是按sa数组来的,而sa数组是按字典序来的,正好符合题意的字典序。

注意点,最后一组要特判,因为可能没有进入循环。

#include<iostream>
#include<cstdio>
#include<cstring>
#include<cstdlib>
#include<algorithm>

using namespace std;

const int maxm=120;
const int LEN=1200;
const int maxn=1200100;

char s[maxm][LEN];
int N;
int ls;
char t[maxn];
int str[maxn],n;
bool vis[maxm];
int sa[maxn],height[maxn],rank[maxn];
int t1[maxn],t2[maxn],c[maxn];

bool cmp(int *r,int a,int b,int l)
{
    return r[a]==r[b]&&r[a+l]==r[b+l];
}

void build_sa(int *str,int n,int m)
{
    int i,j,p,*x=t1,*y=t2;
    for(int i=0;i<m;i++) c[i]=0;
    for(int i=0;i<n;i++) c[x[i]=str[i]]++;
    for(int i=1;i<m;i++) c[i]+=c[i-1];
    for(int i=n-1;i>=0;i--) sa[--c[x[i]]]=i;
    for(int j=1;j<=n;j<<=1){
        p=0;
        for(int i=n-j;i<n;i++) y[p++]=i;
        for(int i=0;i<n;i++) if(sa[i]>=j) y[p++]=sa[i]-j;
        for(int i=0;i<m;i++) c[i]=0;
        for(int i=0;i<n;i++) c[x[y[i]]]++;
        for(int i=1;i<m;i++) c[i]+=c[i-1];
        for(int i=n-1;i>=0;i--) sa[--c[x[y[i]]]]=y[i];
        swap(x,y);
        p=1;x[sa[0]]=0;
        for(int i=1;i<n;i++) x[sa[i]]=cmp(y,sa[i-1],sa[i],j)?p-1:p++;
        if(p>=n) break;
        m=p;
    }
}

void getHeight(int *str,int n)
{
    int i,j,k=0;
    for(int i=0;i<=n;i++) rank[sa[i]]=i;
    for(int i=0;i<n;i++){
        if(k) k--;
        j=sa[rank[i]-1];
        while(str[i+k]==str[j+k]) k++;
        height[rank[i]]=k;
    }
}

bool check(int m)
{
    memset(vis,0,sizeof(vis));
    int cnt=0;
    for(int i=2;i<=n;i++){
        if(height[i]>=m){
            int x=sa[i-1]/(ls+1);
            int y=sa[i]/(ls+1);
            if(!vis[x]) cnt++,vis[x]=1;
            if(!vis[y]) cnt++,vis[y]=1;
        }
        else{
            if(cnt>N/2) return 1;
            cnt=0;
            memset(vis,0,sizeof(vis));
        }
    }
    return cnt>N/2;
}

int main()
{
    freopen("in.txt","r",stdin);
    while(cin>>N&&N){
        for(int i=1;i<=N;i++) scanf("%s",s[i]);
        ls=strlen(s[1]);
        t[0]='\0';
        int ch=1;
        for(int i=1;i<=N;i++){
            strcat(t,s[i]);
            int lt=strlen(t);
            t[lt]=ch++;t[lt+1]='\0';
            if(ch==97) ch+=27;
        }
        n=strlen(t);
        for(int i=0;i<=n;i++) str[i]=t[i];
        build_sa(str,n+1,400);
        getHeight(str,n);
        int maxL=0;
        int l=1,r=ls;
        while(l<=r){
            int m=(l+r)>>1;
            //cout<<"l="<<l<<" r="<<r<<" m="<<m<<endl;
            if(check(m)) maxL=max(m,maxL),l=m+1;
            else r=m-1;
        }
        if(!maxL){
            puts("?\n");
            continue;
        }
        memset(vis,0,sizeof(vis));
        int cnt=0;
        for(int i=2;i<=n;i++){
            if(height[i]>=maxL){
                int x=sa[i-1]/(ls+1);
                int y=sa[i]/(ls+1);
                if(!vis[x]) cnt++,vis[x]=1;
                if(!vis[y]) cnt++,vis[y]=1;
            }
            else{
                if(cnt>N/2){
                    for(int j=sa[i-1];j<sa[i-1]+maxL;j++) putchar(t[j]);
                    puts("");
                }
                cnt=0;
                memset(vis,0,sizeof(vis));
            }
        }
        if(cnt>N/2){
            for(int j=sa[n];j<sa[n]+maxL;j++) putchar(t[j]);
            puts("");
        }
        puts("");
    }
    return 0;
}
View Code

 SPOJ PHRASES

给N个不同长度的字符串,求在每个字符串中都出现至少两次且不重叠的最长子串长度。

二分求最大长度,输出即可。出现两次和不重叠就再开两个数组,一个记录次数,另一个记录出现的位置,位置满足同一字符串的出现的两次的子串的位置之差大于等于长度,即不重叠。

#include<iostream>
#include<cstdio>
#include<cstring>
#include<cstdlib>
#include<algorithm>

using namespace std;

const int maxn=1000100;
const int INF=(1<<29);
const int maxm=20;
const int LEN=11000;

char s[maxm][LEN];
int N;
char t[maxn];
int str[maxn],n;
int sa[maxn],height[maxn],rank[maxn];
int id[maxn];
int vis[maxm];
int t1[maxn],t2[maxn],c[maxn];
int pos[maxm][3];

bool cmp(int *r,int a,int b,int l)
{
    return r[a]==r[b]&&r[a+l]==r[b+l];
}

void build_sa(int *str,int n,int m)
{
    int i,j,p,*x=t1,*y=t2;
    for(int i=0;i<m;i++) c[i]=0;
    for(int i=0;i<n;i++) c[x[i]=str[i]]++;
    for(int i=1;i<m;i++) c[i]+=c[i-1];
    for(int i=n-1;i>=0;i--) sa[--c[x[i]]]=i;
    for(int j=1;j<=n;j<<=1){
        p=0;
        for(int i=n-j;i<n;i++) y[p++]=i;
        for(int i=0;i<n;i++) if(sa[i]>=j) y[p++]=sa[i]-j;
        for(int i=0;i<m;i++) c[i]=0;
        for(int i=0;i<n;i++) c[x[y[i]]]++;
        for(int i=0;i<m;i++) c[i]+=c[i-1];
        for(int i=n-1;i>=0;i--) sa[--c[x[y[i]]]]=y[i];
        swap(x,y);
        p=1;x[sa[0]]=0;
        for(int i=1;i<n;i++) x[sa[i]]=cmp(y,sa[i-1],sa[i],j)?p-1:p++;
        if(p>=n) break;
        m=p;
    }
}

void getHeight(int *str,int n)
{
    int i,j,k=0;
    for(int i=0;i<=n;i++) rank[sa[i]]=i;
    for(int i=0;i<n;i++){
        if(k) k--;
        j=sa[rank[i]-1];
        while(str[i+k]==str[j+k]) k++;
        height[rank[i]]=k;
    }
}

bool check(int m)
{
    int cnt=0;
    memset(vis,0,sizeof(vis));
    memset(pos,-1,sizeof(pos));
    for(int i=2;i<=n;i++){
        if(height[i]>=m){
            int x=id[sa[i-1]];
            int p=sa[i-1];
            if(vis[x]<2&&(pos[x][vis[x]]==-1||abs(p-pos[x][vis[x]])>=m))
                vis[x]++,cnt++,pos[x][vis[x]]=p;
        }
        else{
            int x=id[sa[i-1]];
            int p=sa[i-1];
            if(vis[x]<2&&(pos[x][vis[x]]==-1||abs(p-pos[x][vis[x]])>=m))
                vis[x]++,cnt++,pos[x][vis[x]]=p;
            if(cnt==2*N) return 1;
            cnt=0;
            memset(vis,0,sizeof(vis));
            memset(pos,-1,sizeof(pos));
        }
    }
    int x=id[sa[n]];
    int p=sa[n];
    if(vis[x]<2&&(pos[x][vis[x]]==-1||abs(p-pos[x][vis[x]])>=m))
        vis[x]++,cnt++,pos[x][vis[x]]=p;
    return cnt==2*N;
}

int main()
{
   // freopen("in.txt","r",stdin);
    int T;
    cin>>T;
    while(T--){
        cin>>N;
        for(int i=1;i<=N;i++) scanf("%s",s[i]);
        t[0]='\0';
        memset(id,0,sizeof(id));
        int ch=1;
        for(int i=1;i<=N;i++){
            int st=strlen(t);
            strcat(t,s[i]);
            int lt=strlen(t);
            t[lt]=ch++;t[lt+1]='\0';
            if(ch==97) ch+=27;
            lt=strlen(t);
            for(int j=st;j<lt;j++) id[j]=i;
        }
        n=strlen(t);
        for(int i=0;i<=n;i++) str[i]=t[i];
        build_sa(str,n+1,400);
        getHeight(str,n);
        int ans=0;
        int l=0,r=n;
        while(l<r){
            int m=(l+r)>>1;
            if(check(m)) ans=max(ans,m),l=m+1;
            else r=m;
        }
        cout<<ans<<endl;
    }
    return 0;
}
View Code

 

posted @ 2015-08-13 15:51  __560  阅读(248)  评论(0编辑  收藏  举报