POJ3294 Life Forms —— 后缀数组 最长公共子串

题目链接:https://vjudge.net/problem/POJ-3294

 

Life Forms
Time Limit: 5000MS   Memory Limit: 65536K
Total Submissions: 16905   Accepted: 4970

Description

You may have wondered why most extraterrestrial life forms resemble humans, differing by superficial traits such as height, colour, wrinkles, ears, eyebrows and the like. A few bear no human resemblance; these typically have geometric or amorphous shapes like cubes, oil slicks or clouds of dust.

The answer is given in the 146th episode of Star Trek - The Next Generation, titled The Chase. It turns out that in the vast majority of the quadrant's life forms ended up with a large fragment of common DNA.

Given the DNA sequences of several life forms represented as strings of letters, you are to find the longest substring that is shared by more than half of them.

Input

Standard input contains several test cases. Each test case begins with 1 ≤ n ≤ 100, the number of life forms. n lines follow; each contains a string of lower case letters representing the DNA sequence of a life form. Each DNA sequence contains at least one and not more than 1000 letters. A line containing 0 follows the last test case.

Output

For each test case, output the longest string or strings shared by more than half of the life forms. If there are many, output all of them in alphabetical order. If there is no solution with at least one letter, output "?". Leave an empty line between test cases.

Sample Input

3
abcdefg
bcdefgh
cdefghi
3
xxx
yyy
zzz
0

Sample Output

bcdefg
cdefgh

?

Source

 

题意:

给出n个字符串,问是否存在至少出现于n/2+1个字符串中的公共子串。如果存在,输入长度最大的;如果有多个答案,按字典序输出所有。

 

题解:

1.将n个字符串拼接在一起,并且相邻两个之间用分隔符隔开,并且分隔符应各异。因此得到新串。

2.求出新串的后缀数组,然后二分公共子串的长度mid:可知当前的mid可将新串的后缀按排名的顺序将其分成若干组,且每一组的最长公共前缀都大于等于mid,于是就在每一组内统计出现了多少个字符串,如果>n/2,即表明当前mid合法,否则不合法,因此可以根据此规则最终求得长度。

3.由于题目还要求按字典序输出所有答案。所以,在求得长度之后,再遍历一遍sa[]数组,并且判断每个分组是否满足要求,若满足,则输出答案。

 

注意点:

1.每个分隔符应该不一样,如果一样,在求后缀数组的时候就很可能从当前字符串匹配到下一个字符串,而这是不可能的,因为对于每个字符,最多只能匹配到串尾。

2.输出答案时,为了避免同一组内多次输出(每一组对应着一个子串),应该加个标记。

 

代码如下:

  1 #include <iostream>
  2 #include <cstdio>
  3 #include <cstring>
  4 #include <algorithm>
  5 #include <vector>
  6 #include <cmath>
  7 #include <queue>
  8 #include <stack>
  9 #include <map>
 10 #include <string>
 11 #include <set>
 12 using namespace std;
 13 typedef long long LL;
 14 const int INF = 2e9;
 15 const LL LNF = 9e18;
 16 const int MOD = 1e9+7;
 17 const int MAXN = 2e5+100;
 18 
 19 int id[MAXN];   //记录属于哪个字符串
 20 int r[MAXN], sa[MAXN], Rank[MAXN], height[MAXN];
 21 int t1[MAXN], t2[MAXN], c[MAXN];
 22 
 23 bool cmp(int *r, int a, int b, int l)
 24 {
 25     return r[a]==r[b] && r[a+l]==r[b+l];
 26 }
 27 
 28 void DA(int str[], int sa[], int Rank[], int height[], int n, int m)
 29 {
 30     n++;
 31     int i, j, p, *x = t1, *y = t2;
 32     for(i = 0; i<m; i++) c[i] = 0;
 33     for(i = 0; i<n; i++) c[x[i] = str[i]]++;
 34     for(i = 1; i<m; i++) c[i] += c[i-1];
 35     for(i = n-1; i>=0; i--) sa[--c[x[i]]] = i;
 36     for(j = 1; j<=n; j <<= 1)
 37     {
 38         p = 0;
 39         for(i = n-j; i<n; i++) y[p++] = i;
 40         for(i = 0; i<n; i++) if(sa[i]>=j) y[p++] = sa[i]-j;
 41 
 42         for(i = 0; i<m; i++) c[i] = 0;
 43         for(i = 0; i<n; i++) c[x[y[i]]]++;
 44         for(i = 1; i<m; i++) c[i] += c[i-1];
 45         for(i = n-1; i>=0; i--) sa[--c[x[y[i]]]] = y[i];
 46 
 47         swap(x, y);
 48         p = 1; x[sa[0]] = 0;
 49         for(i = 1; i<n; i++)
 50             x[sa[i]] = cmp(y, sa[i-1], sa[i], j)?p-1:p++;
 51 
 52         if(p>=n) break;
 53         m = p;
 54     }
 55 
 56     int k = 0;
 57     n--;
 58     for(i = 0; i<=n; i++) Rank[sa[i]] = i;
 59     for(i = 0; i<n; i++)
 60     {
 61         if(k) k--;
 62         j = sa[Rank[i]-1];
 63         while(str[i+k]==str[j+k]) k++;
 64         height[Rank[i]] = k;
 65     }
 66 }
 67 
 68 bool vis[110];
 69 bool test(int n, int len, int k)
 70 {
 71     int cnt = 0;
 72     memset(vis, false, sizeof(vis));
 73     for(int i = 2; i<=len; i++)
 74     {
 75         if(height[i]<k)
 76         {
 77             cnt = 0;
 78             memset(vis, false, sizeof(vis));
 79         }
 80         else
 81         {
 82             if(!vis[id[sa[i-1]]]) vis[id[sa[i-1]]] = true, cnt++;
 83             if(!vis[id[sa[i]]]) vis[id[sa[i]]] = true, cnt++;
 84             if(cnt>n/2) return true;
 85         }
 86     }
 87     return false;
 88 }
 89 
 90 void Print(int n, int len, int k)
 91 {
 92     int cnt = 0, flag = false;
 93     memset(vis, false, sizeof(vis));
 94     for(int i = 2; i<=len; i++)
 95     {
 96         if(height[i]<k)
 97         {
 98             flag = false;
 99             cnt = 0;
100             memset(vis, false, sizeof(vis));
101         }
102         else
103         {
104             if(!vis[id[sa[i-1]]]) vis[id[sa[i-1]]] = true, cnt++;
105             if(!vis[id[sa[i]]]) vis[id[sa[i]]] = true, cnt++;
106             if(cnt>n/2 &&!flag)
107             {
108                 flag = true;    //表明当前组已经输出了
109                 for(int j = sa[i]; j<sa[i]+k; j++)
110                     putchar(r[j]+'a'-1);
111                 putchar('\n');
112             }
113         }
114     }
115 }
116 
117 char str[MAXN];
118 int main()
119 {
120     int n, firCase = false;
121     while(scanf("%d", &n)&&n)
122     {
123         int len = 0;
124         for(int i = 0; i<n; i++)
125         {
126             scanf("%s", str);
127             int LEN = strlen(str);
128             for(int j = 0; j<LEN; j++)
129             {
130                 r[len] = str[j]-'a'+1;
131                 id[len++] = i;
132             }
133             r[len] = 30+i;  //分隔符要各异
134             id[len++] = i;
135         }
136         r[len] = 0;
137         DA(r,sa,Rank,height,len,200);
138 
139         int l = 0, r = 1000;
140         while(l<=r)
141         {
142             int mid = (l+r)>>1;
143             if(test(n,len,mid))
144                 l = mid + 1;
145             else
146                 r = mid - 1;
147         }
148 
149         if(firCase) printf("\n");
150         firCase = true;
151         if(r==0) puts("?");
152         else Print(n, len, r);
153     }
154 }
View Code

 

posted on 2018-02-26 15:11  h_z_cong  阅读(324)  评论(0编辑  收藏  举报

导航