SPOJ - PHRASES Relevant Phrases of Annihilation (后缀数组)

You are the King of Byteland. Your agents have just intercepted a batch of encrypted enemy messages concerning the date of the planned attack on your island. You immedietaly send for the Bytelandian Cryptographer, but he is currently busy eating popcorn and claims that he may only decrypt the most important part of the text (since the rest would be a waste of his time). You decide to select the fragment of the text which the enemy has strongly emphasised, evidently regarding it as the most important. So, you are looking for a fragment of text which appears in all the messages disjointly at least twice. Since you are not overfond of the cryptographer, try to make this fragment as long as possible.

Input

The first line of input contains a single positive integer t<=10, the number of test cases. t test cases follow. Each test case begins with integer n (n<=10), the number of messages. The next n lines contain the messages, consisting only of between 2 and 10000 characters 'a'-'z', possibly with some additional trailing white space which should be ignored.

Output

For each test case output the length of longest string which appears disjointly at least twice in all of the messages.

Example

Input:
1
4
abbabba
dabddkababa
bacaba
baba

Output:
2

(in the example above, the longest substring which fulfills the requirements is 'ba')

 

题意:

每个字符串至少出现两次且不重叠的最长子串的长度

思路:

二分答案,按height分组,记录同一组内同一字符串的起点的最大值和最小值,在判断最大值减最小值是否大于mid即可。

 

#include<iostream>
#include<algorithm>
#include<vector>
#include<stack>
#include<queue>
#include<map>
#include<set>
#include<cstdio>
#include<cstring>
#include<cmath>
#include<ctime>

#define fuck(x) cerr<<#x<<" = "<<x<<endl;
#define debug(a, x) cerr<<#a<<"["<<x<<"] = "<<a[x]<<endl;
#define ls (t<<1)
#define rs ((t<<1)|1)
using namespace std;
typedef long long ll;
typedef unsigned long long ull;
const int maxn = 100186;
const int maxm = 100086;
const int inf = 0x3f3f3f3f;
const ll Inf = 999999999999999999;
const int mod = 1000000007;
const double eps = 1e-6;
const double pi = acos(-1);

int s[maxn];
int len, Rank[maxn], sa[maxn], tlen, tmp[maxn];

bool compare_sa(int i, int j) {
    if (Rank[i] != Rank[j]) { return Rank[i] < Rank[j]; }
    //如果以i开始,长度为k的字符串的长度,已经超出了字符串尾,那么就赋值为-1
    //这是因为,在前面所有数据相同的情况下,字符串短的字典序小.
    int ri = i + tlen <= len ? Rank[i + tlen] : -inf;
    int rj = j + tlen <= len ? Rank[j + tlen] : -inf;
    return ri < rj;
}

void construct_sa() {
    //初始的RANK为字符的ASCII码
    for (int i = 0; i <= len; i++) {
        sa[i] = i;
        Rank[i] = i < len ? s[i] : -inf;
    }
    for (tlen = 1; tlen <= len; tlen *= 2) {
        sort(sa, sa + len + 1, compare_sa);
        tmp[sa[0]] = 0;
        //全新版本的RANK,tmp用来计算新的rank
        //将字典序最小的后缀rank计为0
        //sa之中表示的后缀都是有序的,所以将下一个后缀与前一个后缀比较,如果大于前一个后缀,rank就比前一个加一.
        //否则就和前一个相等.
        for (int i = 1; i <= len; i++) {
            tmp[sa[i]] = tmp[sa[i - 1]] + (compare_sa(sa[i - 1], sa[i]) ? 1 : 0);
        }
        for (int i = 0; i <= len; i++) {
            Rank[i] = tmp[i];

        }
    }
}

int height[maxn];

void construct_lcp() {
//    for(int i=0;i<=n;i++){Rank[sa[i]]=i;}
    int h = 0;
    height[0] = 0;
    for (int i = 0; i < len; i++) {//i为后缀数组起始位置
        int j = sa[Rank[i] - 1];//获取当前后缀的前一个后缀(排序后)
        if (h > 0)h--;
        for (; j + h < len && i + h < len; h++) {
            if (s[j + h] != s[i + h])break;
        }
        height[Rank[i]] = h;
    }
}

int st[maxn][20];

void rmq_init() {
    for (int i = 1; i <= len; i++) {
        st[i][0] = height[i];
    }
    int l = 2;
    for (int i = 1; l <= len; i++) {
        for (int j = 1; j + l / 2 <= len; j++) {
            st[j][i] = min(st[j][i - 1], st[j + l / 2][i - 1]);
        }
        l <<= 1;
    }
}

int ask_min(int i, int j) {
    int k = int(log(j - i + 1.0) / log(2.0));
    return min(st[i][k], st[j - (1 << k) + 1][k]);
}

int lcp(int a, int b)//此处参数是,原字符串下标
{
    a = Rank[a], b = Rank[b];
    if (a > b)
        swap(a, b);
    return ask_min(a + 1, b);
}


char str[maxn];
int intr[maxn];
int mx[105];
int mn[105];
int n;
bool check(int mid){
    if(mid==0){ return  true;}
    memset(mx,0,sizeof(mn));
    memset(mn,0x3f,sizeof(mn));
    for(int i=1;i<=len;i++){
        int prestart =  lower_bound(intr+1,intr+1+n,sa[i-1])-intr;
        int start = lower_bound(intr+1,intr+1+n,sa[i])-intr;
//        cout<<height[i]<<endl;
        if(height[i]>=mid){
            mn[start]=min(mn[start],sa[i]);
            mx[start]=max(mx[start],sa[i]);
            mn[prestart]=min(mn[prestart],sa[i-1]);
            mx[prestart]=max(mx[prestart],sa[i-1]);
        }else{
            bool flag=true;
//            fuck(n)
            for(int j=1;j<=n;j++){
//                cerr<<mx[j]<<" "<<mn[j]<<endl;
                if(mx[j]-mn[j]<mid){flag=false;}
            }
            if(flag){return true;}
            memset(mx,0,sizeof(mn));
            memset(mn,0x3f,sizeof(mn));
        }
    }
    return false;
}

int main() {
//    ios::sync_with_stdio(false);
//    freopen("in.txt", "r", stdin);

    int cases=0;

    int T;
    scanf("%d",&T);
    while (T--){
        scanf("%d",&n);
        cases++;
        len=0;
        int lenx = 0;
        for(int i=1;i<=n;i++){
            scanf("%s",str);
            int l=strlen(str);
            lenx = max(lenx,l);
            for(int j=0;j<l;j++){
                s[len++]=(int)str[j]-'a'+1;
            }
            s[len++]=200+i;
            intr[i]=len-1;
        }


        construct_sa();
        construct_lcp();


//        fuck(check(4));
        int l=0,r=lenx;
        int ans=0;
        while (r>=l){
            int mid=(l+r)/2;
//            cout<<mid<<endl;
//            cout<<l<<" "<<r<<endl;
            if(check(mid)){
                ans=mid;
                l=mid+1;
            }else{
                r=mid-1;
            }
        }
        printf("%d\n",ans);
    }

    return 0;
}
View Code
posted @ 2019-07-13 19:09  断腿三郎  阅读(253)  评论(0编辑  收藏  举报