Longest Common Substring(最长公共子序列)

Longest Common Substring

Time Limit: 8000/4000 MS (Java/Others) Memory Limit: 65536/32768 K (Java/Others)
Total Submission(s): 37 Accepted Submission(s): 28
 
Problem Description
Given two strings, you have to tell the length of the Longest Common Substring of them.

For example:
str1 = banana
str2 = cianaic

So the Longest Common Substring is "ana", and the length is 3.
 
Input
The input contains several test cases. Each test case contains two strings, each string will have at most 100000 characters. All the characters are in lower-case.

Process to the end of file.
 
Output
For each test case, you have to tell the length of the Longest Common Substring of them.
 
Sample Input
banana
cianaic
 
Sample Output
3
 
Author
Ignatius.L
 
/*----------------------------------------------
File: F:\ACM源代码\数据结构--后缀数组\Longest_Common_Substring.cpp
Date: 2017/5/30 16:55:36
Author: LyuCheng
----------------------------------------------*/
/*
题意:最长公共子序列

思路:问题很多,DP基本不用考虑,因为时间复杂度空间复杂度都不允许,NlogN的算法也不行,最坏的情况
    转化成LIS的数组是1e10空间复杂的不允许,所以只能利用后缀数组的性质,将两个连接,然后前后两个
    前缀在两个不同的字符串中的时候,更新height的值,因为后缀加前缀,刚好是公共子序列
*/
#include <bits/stdc++.h>
#define MAXN 100005
using namespace std;
char s1[MAXN],s2[MAXN];
/****************************************后缀数组模板****************************************/
const int maxn=1000000+100;
struct SuffixArray
{
    char s[maxn];
    int sa[maxn],rank[maxn],height[maxn];
    int t1[maxn],t2[maxn],c[maxn],n;
    int dmin[maxn][20];
    void build_sa(int m)
    {
        int i,*x=t1,*y=t2;
        for(i=0;i<m;i++) c[i]=0;
        for(i=0;i<n;i++) c[x[i]=s[i]]++;
        for(i=1;i<m;i++) c[i]+=c[i-1];
        for(i=n-1;i>=0;i--) sa[--c[x[i]]]=i;
        for(int k=1;k<=n;k<<=1)
        {
            int p=0;
            for(i=n-k;i<n;i++) y[p++]=i;
            for(i=0;i<n;i++)if(sa[i]>=k) y[p++]=sa[i]-k;
            for(i=0;i<m;i++) c[i]=0;
            for(i=0;i<n;i++) c[x[y[i]]]++;
            for(i=1;i<m;i++) c[i]+=c[i-1];
            for(i=n-1;i>=0;i--) sa[--c[x[y[i]]]] = y[i];
            swap(x,y);
            p=1,x[sa[0]]=0;
            for(i=1;i<n;i++)
                x[sa[i]]= y[sa[i]]==y[sa[i-1]]&&y[sa[i]+k]==y[sa[i-1]+k]? p-1:p++;
            if(p>=n) break;
            m=p;
        }
    }
    void build_height()//n不能等于1,否则出BUG
    {
        int i,j,k=0;
        for(i=0;i<n;i++)rank[sa[i]]=i;
        for(i=0;i<n;i++)
        {
            if(k)k--;
            j=sa[rank[i]-1];
            while(s[i+k]==s[j+k])k++;
            height[rank[i]]=k;
        }
    }
    void initMin()
    {
        for(int i=1;i<=n;i++) dmin[i][0]=height[i];
        for(int j=1;(1<<j)<=n;j++)
            for(int i=1;i+(1<<j)-1<=n;i++)
                dmin[i][j]=min(dmin[i][j-1] , dmin[i+(1<<(j-1))][j-1]);
    }
    int RMQ(int L,int R)//取得范围最小值
    {
        int k=0;
        while((1<<(k+1))<=R-L+1)k++;
        return min(dmin[L][k] , dmin[R-(1<<k)+1][k]);
    }
    int LCP(int i,int j)//求后缀i和j的LCP最长公共前缀
    {
        int L=rank[i],R=rank[j];
        if(L>R) swap(L,R);
        L++;//注意这里
        return RMQ(L,R);
    }
}sa;
/****************************************后缀数组模板****************************************/

int main(){
    // freopen("in.txt","r",stdin);
    while(scanf("%s%s",s1,s2)!=EOF){
        int n=strlen(s1);
        int m=strlen(s2);
        for(int i=0;i<n;i++){
            sa.s[i]=s1[i];
        }
        sa.s[n]='$';
        for(int i=n;i<n+m;i++){
            sa.s[i]=s2[i-n];
        }
        sa.n=m+n+1;
        sa.build_sa(MAXN);
        sa.build_height();
        int maxLCS=-1;
        for(int i=0;i<m+n+1;i++){
            if(i==0){
                maxLCS=max(maxLCS,sa.height[i]);
            }else{
                if((sa.sa[i]-n)*(sa.sa[i-1]-n)<0)//保证两后缀是来自不同的字符串的
                    maxLCS=max(maxLCS,sa.height[i]);
            }
        }
        printf("%d\n",maxLCS);
    }
    return 0;
}

 

posted @ 2017-05-30 22:02  勿忘初心0924  阅读(1663)  评论(0编辑  收藏  举报