POJ-3294 Life Forms n个字符串中出现超过n/2次的最长子串(按字典序依次输出)

 按照以前两个字符串找两者的最长公共子串的思路类似,可以把所有串拼接到一起,这里为了避免讨论LCP跨越多个串需需要特别处理的问题用不同的字符把所有串隔开(因为char只有128位,和可能不够用,更推荐设置成某一特殊字符,在后缀数组初始化的时候在对其映射的int值做处理)。二分长度然后遍历Height,判断SA是否处于同一个串。这里我一开始觉得判断不同串的计数有点麻烦,其实根据LCP确定起点,随终点延伸Height值不会上升的特性,一旦Height小于我们要求的值就切换到下一计数(因为必然不可能再和之前计数的串再产生重合了)。

#include <iostream>
#include <cstring>
#include <string>
#include <cmath>
#include <vector>
#include <algorithm>
#include <cstdio>
#define debug puts("debug")
#define LL long long
using namespace std;

const int N=111005;
string s,temp;
int nn;
int pos[N];
int vis[N];
int phs[N];
int endp[N];
int stx[N],sp;


class SF
{
    //N:数组大小
public:
    int x[N], y[N], c[N];
    int Height[N], str[N], SA[N], Rank[N];//Height数组从2开始,SA记录Rank=i的下标
    int slen;
    int m;//字符集处理大小(传入如果不是数字,需要做位移转换)
    bool cmp(int* r, int a, int b, int l)
    {
        return r[a] == r[b] && r[a + l] == r[b + l];
    }
    void Suffix(int n)
    {
        ++n;
        int i, j, p;
        for (i = 0; i < m; ++i) c[i] = 0;
        for (i = 0; i < n; ++i) c[x[i] = str[i]]++;
        for (i = 1; i < m; ++i) c[i] += c[i - 1];
        for (i = n - 1; i >= 0; --i) SA[--c[x[i]]] = i;

        for (j = 1; j <= n; j <<= 1)
        {
            p = 0;
            for (i = n - j; i < n; ++i) y[p++] = i;
            for (i = 0; i < n; ++i) if (SA[i] >= j) y[p++] = SA[i] - j;
            for (i = 0; i < m; ++i) c[i] = 0;
            for (i = 0; i < n; ++i) c[x[y[i]]]++;

            for (i = 1; i < m; ++i) c[i] += c[i - 1];
            for (i = n - 1; i >= 0; --i) SA[--c[x[y[i]]]] = y[i];

            swap(x, y);
            p = 1;
            x[SA[0]] = 0;
            for (i = 1; i < n; ++i)
            {
                x[SA[i]] = cmp(y, SA[i - 1], SA[i], j) ? p - 1 : p++;
            }
            if (p >= n)break;
            m = p;
        }

        int k = 0;
        n--;
        for (i = 0; i <= n; ++i) Rank[SA[i]] = i;

        for (i = 0; i < n; ++i)
        {
            if (k)--k;
            j = SA[Rank[i] - 1];
            while (str[i + k] == str[j + k])++k;
            Height[Rank[i]] = k;
            //cout << k << endl;
        }
    }
    static const int bitlen = 35;
    LL lg2(LL p)//计算log2(n)
    {
        return (LL)(log(p) / log(2));
    }
    LL dp[bitlen][N];
    LL bit[bitlen];
    void initRMQ()//初始化
    {
        bit[0] = 1;
        for (int i = 1; i < bitlen; i++) bit[i] = 2 * bit[i - 1];
        for (int i = 0; i <= slen; i++)
            dp[0][i] = Height[i];
        dp[0][0] = dp[0][1] = 0;
        for (LL i = 1; bit[i] < slen + 1; i++)
            for (LL j = 0; j + bit[i] <= slen + 1; j++)
                dp[i][j] = min(dp[i - 1][j], dp[i - 1][j + bit[i - 1]]);
    }
    LL query(LL l, LL r)//查询两个Rank之间的lcp
    {
        if (r == l) return slen - SA[l];
        if (l > r) swap(l, r);
        l++;
        LL mig = lg2(r - l + 1.0);
        return min(dp[mig][l], dp[mig][r - bit[mig] + 1]);
    }
    void init(string s)
    {
        slen = s.size();
        m=1050;//每次都需要初始化m
        int fuck=1;
        for (int i = 0; i < slen; i++)
        {
            if(s[i]!='#')
                str[i] = s[i] - 'a' + 2;//如果是字符,映射成从1开始的序列
            else str[i]=100+(fuck++);
        }
        str[slen] = 0;//1作为结束符,防止越界
        Suffix(slen);

        initRMQ();
    }
    bool ok(int lx)
    {
        sp=0;
        fill(vis,vis+slen+1,0);
        int cnt=1;
        vis[phs[SA[1]]]=1;
        //cout<<phs[1]<<endl;
        for(int i=2; i<=slen; i++)
        {
            //cout<<Height[i]<<endl;
            //cout<<phs[i]<<endl;
            if(Height[i]>=lx)
            {
                if(vis[phs[SA[i]]]==0)
                    cnt++,vis[phs[SA[i]]]=1;
            }
            else
            {
                if(cnt>nn/2)
                    stx[sp++]=SA[i-1];
                cnt=1;
                fill(vis,vis+nn+1,0);
                vis[phs[SA[i]]]=1;
            }
        }
        if(cnt>nn/2)
            stx[sp++]=SA[slen-1];
        return sp;
    }
    int bins(int l,int r)
    {
        while(r-l>=3)
        {
            int mid=(l+r)/2;
            if(ok(mid))l=mid;
            else r=mid-1;
        }
        //cout<<l<<' '<<r<<endl;
        for(; r>=l; r--)
        {
            //cout<<r<<endl;
            if(ok(r))return r;
        }
        return 0;
    }

    void solve()
    {
        int f=bins(1,slen);
        if(!f)cout<<"?"<<endl;
        else
        {
            //sort(stx,stx+f,cmp);
            for(int i=0; i<sp; i++)
            {
                //cout<<stx[i]<<endl;
                for(int j=0; j<f; j++)
                    cout<<s[stx[i]+j];
                cout<<endl;
            }
        }
    }
} sf;
int main()
{
    //freopen("in.txt","r",stdin);
    //freopen("out.txt","w",stdout);
    cin.sync_with_stdio(false);
    int cnt=0;
    while(cin>>nn)
    {
        if(!nn)break;
        if(cnt++)cout<<endl;
        s="";

        for(int i=0; i<nn; i++)
        {
            cin>>temp;
            pos[i]=temp.length();
            s+=temp;
            s+='#';
        }
        if(nn==1)
        {
            s=s.substr(0,s.length()-1);
            cout<<s<<endl;
            continue;
        }
        int p=0;
        for(int i=0; i<nn; i++)
        {
            for(int j=0; j<pos[i]; j++,p++)
                phs[p]=i;
            phs[p++]=nn;
            if(i==0)endp[i]=pos[i];
            else endp[i]=endp[i-1]+pos[i]+1;
        }
        sf.init(s);

        sf.solve();
        //sf.ok(1);
    }
    return 0;
}

 

posted @ 2017-11-23 08:26  Luke_Ye  阅读(228)  评论(0编辑  收藏  举报