Poj 3294 Life Forms (后缀数组 + 二分 + Hash)

题目链接:

  Poj 3294 Life Forms

题目描述:

  有n个文本串,问在一半以上的文本串出现过的最长连续子串?

解题思路:

  可以把文本串用没有出现过的不同字符连起来,然后求新文本串的height。然后二分答案串的长度K,根据K把新文本串的后缀串分块,统计每块中的原文本串出现的次数,大于原文本串数目的一半就作为答案记录下来,对于输出字典序,height就是排好序的后缀数组,只要按照顺序输出即可。

  1 #include <cstdio>
  2 #include <cstring>
  3 #include <iostream>
  4 #include <algorithm>
  5 using namespace std;
  6 typedef long long LL;
  7 const int maxn = 110000;
  8 
  9 int sa[maxn], rank[maxn], height[maxn], vis[110], res[maxn];
 10 int t1[maxn], t2[maxn], r[maxn], flag[maxn], c[maxn];
 11 
 12 bool cmp (int *str, int a, int b, int k)
 13 {
 14     return str[a]==str[b] && str[a+k]==str[b+k];
 15 }
 16 
 17 void da (int *str, int n, int m)
 18 {
 19     n ++;
 20     int *x = t1, *y = t2, i, j;
 21     
 22     for (i=0; i<m; i++) c[i] = 0;
 23     for (i=0; i<n; i++) c[x[i]=str[i]] ++;
 24     for (i=1; i<m; i++) c[i] += c[i-1];
 25     for (i=n-1; i>=0; i--) sa[-- c[x[i]]] = i;
 26     
 27     for (j=1; j<=n; j*=2)
 28     {
 29         int p = 0;
 30         for (i=n-j; i<n; i++) y[p++] = i;
 31         for (i=0; i<n; i++) if (sa[i] >= j) y[p++] = sa[i] - j;
 32 
 33         for (i=0; i<m; i++) c[i] = 0;
 34         for (i=0; i<n; i++) c[x[y[i]]] ++;
 35         for (i=1; i<m; i++) c[i] += c[i-1];
 36         for (i=n-1; i>=0; i--) sa[-- c[x[y[i]]]] = y[i];
 37 
 38         swap (x, y);
 39         p = 1;
 40         x[sa[0]] = 0;
 41         for (int i=1; i<n; i++)//i是rank
 42             x[sa[i]] = cmp(y, sa[i-1], sa[i], j)?p-1:p++;
 43         if (p >= n)
 44             break;
 45         m = p;
 46     }
 47     
 48     for (i=1; i<n; i++)
 49         rank[sa[i]] = i;
 50         
 51     int k = 0;
 52     n --;
 53     for (int i=0; i<n; i++)
 54     {
 55         if (k)  k --;
 56         int j = sa[rank[i] - 1];
 57         while (str[i+k] == str[j+k])    k++;
 58         height[rank[i]] = k;
 59     }
 60 }
 61 
 62 bool Bin_sreach (int x, int k, int n)
 63 {
 64     int ans, num;
 65     ans = num = 0;
 66     memset (vis, 0, sizeof(vis));
 67     
 68     for (int i=2; i<=k; i++)
 69     {
 70         if (height[i] >= x)
 71         {
 72             ans += vis[flag[sa[i-1]]]?0:1;
 73             vis[flag[sa[i-1]]] = 1;
 74             
 75             ans += vis[flag[sa[i]]]?0:1;
 76             vis[flag[sa[i]]] = 1;
 77         }
 78         else
 79         {
 80             if (ans*2 > n)
 81                 res[++ num] = sa[i-1];
 82                 
 83             ans = 0;
 84             memset (vis, 0, sizeof(vis));
 85         }
 86     }
 87     if (ans*2 > n)
 88         res[++ num] = sa[k-1];
 89         
 90     if (num)
 91     {
 92         res[0] = num;
 93         return true;
 94     }
 95     return false;
 96 }
 97 
 98 int main ()
 99 {
100     int n, l = 0;
101     char str[1010];
102     while (scanf ("%d", &n), n)
103     {
104         if (l ++)
105             printf ("\n");
106             
107         int k = 0;
108         for (int i=0; i<n; i++)
109         {
110             scanf ("%s", str);
111             for (int j=0; str[j]; j++)
112             {
113                 r[k] = str[j];
114                 flag[k++] = i;//记录k字母所在的字符串
115             }
116             r[k] = 130 + i;
117             flag[k++] = -1;
118         }
119         
120         r[k] = 0;
121         da (r, k, 250);
122         
123         int low = 1, high = k, mid, ans = 0;
124         while (low <= high)
125         {//二分枚举
126             mid = (low + high) / 2;
127             if (Bin_sreach(mid, k, n))
128             {
129                 ans = mid;
130                 low = mid + 1;
131             }
132             else
133                 high = mid - 1;
134         }
135         
136         if (low == 1)
137         {
138             printf ("?\n");
139             continue;
140         }
141         
142         for (int i=1; i<=res[0]; i++)
143             {
144                 for (int j=res[i]; j<res[i]+ans; j ++)
145                     printf ("%c", r[j]);
146                 printf ("\n");
147             }
148     }
149     return 0;
150 }

 

posted @ 2015-09-05 14:26  罗茜  阅读(408)  评论(0编辑  收藏  举报