后缀数组应用5: 求两个不同字串串的最长公共子串

求两个不同字串串的最长公共子串

依然是将两个字串串拼接在一起。。然后求sa,height数组,最后枚举height值。。

View Code
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#include<iostream>
#include<vector>
#include<string>
#include<math.h>
#include<map>
#include<set>
#include<algorithm>
using namespace std;
#define MAXN 10010

int sa[MAXN], rank[MAXN], sum[MAXN], height[MAXN];
int wa[MAXN], wb[MAXN], wx[MAXN], wsum[MAXN];
char str[MAXN];
int dp[1010][20];
/*
RMQ:
dp[i][j] = max(dp[i][j-1], dp[i + 2 ^(j-1)][j-1])
dp[i][0] = A[i];

求区间最值[i,j]
int L =lg( j - i + 1 )
return max(dp[i][L], dp[j + 1 - 2 ^ L][L]); 
*/
//预处理height数组 
void pre(int n)
{
  for( int i = 0; i <= n; i++)
        dp[i][0] = height[i];
  int L = (int) log2(n); 
  for( int j = 1; j <= L; j++)
  {
     for( int i = 1; i <= n + 1 - (1<<j); i++)
        dp[i][j] = min(dp[i][j-1], dp[i + (1<<(j-1))][j-1]);
        
  }  
}

int get_min( int a, int b)
{
  int L = (int) log2(b - a + 1 );
  return min(dp[a][L], dp[b + 1 - (1<<L)][L]);       
}

//比较字符串是否相等 
int cmp( int *r, int a, int b, int l)
{
  return (r[a] == r[b] && r[a+l] == r[b+l]);    
}

//倍增算法求sa数组 
void get_sa(char *r, int *sa, int n, int m) //r为字符串, sa数组, n为字符串长度, m为字符串最大值 
{
   int i, j,p, *x = wa, *y = wb, *t;
   for( i = 0; i < m; i++)
       sum[i] = 0;
   //对长度为1时后缀字符串排序 
   for( i = 0; i < n; i++)
        sum[ x[i] = r[i] ]++;  //x相当于rank,但不是真正rank 
   for( i = 1; i < m; i++) 
        sum[i] += sum[i-1];
   for( i = n-1; i >= 0; i--)
        sa[--sum[x[i]]] = i; 
   //对长度为2,4,...的后缀字符串排序
   for(j = 1, p = 1; p < n && j <= n; j *= 2)
   {
      //首先对关键字y排序,排序后的结果保存在y数组中,即是这个后缀字符串的起始位置 
      for(p = 0,i = n - j; i < n; i++)
           y[p++] = i;
      for(i = 0; i < n; i++) if( sa[i] >= j )  y[p++] = sa[i] - j;
      //然后对关键字x排序,先要获取第1关键字x
      for(i = 0; i < n; i++)
           wx[i] = x[y[i]]; 
      for(i = 0; i < m; i++)
           wsum[i] = 0;
      for(i = 0; i < n; i++)
           wsum[ wx[i] ]++;
      for(i = 1; i < m; i++)
           wsum[i] += wsum[i-1];
      for(i = n - 1;i >= 0; i--)
           sa[--wsum[wx[i]]] = y[i];
      //更新x
      t = x, x = y, y = t;
      for( x[sa[0]] = 0,i = 1, p = 1; i < n; i++)
           x[ sa[i] ] = cmp(y, sa[i-1], sa[i], j) ? p - 1 : p++;   
   }     
}

//h[i] = height[rank[i]], h[i] >= h[i-1] - 1
void get_height(char *r, int n)
{
  int i, j, k = 0;//sa[0] = len 就是我们补的那个0 
  for(i = 1; i <= n; i++)
    rank[sa[i]] = i;
  for(i = 0; i < n ; height[rank[i++]] = k )  
    for( k ? k-- : 0, j = sa[rank[i]-1]; r[i+k] == r[j+k]; k++);
}

int main( )
{
  int a, b, n, m, q;
  char st[1000];
  while( scanf("%s%s",str,st) != EOF )
  {
    int len = strlen(str);
    int len2 = strlen(st);
    str[len] = '$';
    q = len;
    for( int i = len + 1, k = 0; k < len2; i++, k++)
       str[i] = st[k];
    len += len2;
    str[len+1] = '#';
    str[len+2] = 0;
    memset(wa,0,sizeof(wa));
    memset(wb,0,sizeof(wb));
    memset(sa,0,sizeof(sa));
    memset(height,0,sizeof(height));
    get_sa(str, sa, len + 1, 255);
    get_height( str, len ); 
    //枚举每个height值 
    int ans = 0, k;
    for( int i = 1; i <= len; i++)
    {
        if( height[i] > ans && ( (sa[i-1] < q && sa[i] > q) || (sa[i-1] > q && sa[i] < q ) ) )
        {
            ans = height[i];
            k = i;
        } 
        
    }
    printf("%d\n",ans);
    for( int i = sa[k]; i <= sa[k] + ans - 1; i++)
       printf("%c",str[i]);
    puts("");
  }
}

posted on 2012-09-26 16:32  more think, more gains  阅读(175)  评论(0编辑  收藏  举报

导航