SPOJ - PHRASES Relevant Phrases of Annihilation —— 后缀数组 出现于所有字符串中两次且不重叠的最长公共子串

题目链接:https://vjudge.net/problem/SPOJ-PHRASES

 

PHRASES - Relevant Phrases of Annihilation

no tags 

 

You are the King of Byteland. Your agents have just intercepted a batch of encrypted enemy messages concerning the date of the planned attack on your island. You immedietaly send for the Bytelandian Cryptographer, but he is currently busy eating popcorn and claims that he may only decrypt the most important part of the text (since the rest would be a waste of his time). You decide to select the fragment of the text which the enemy has strongly emphasised, evidently regarding it as the most important. So, you are looking for a fragment of text which appears in all the messages disjointly at least twice. Since you are not overfond of the cryptographer, try to make this fragment as long as possible.

Input

The first line of input contains a single positive integer t<=10, the number of test cases. t test cases follow. Each test case begins with integer n (n<=10), the number of messages. The next n lines contain the messages, consisting only of between 2 and 10000 characters 'a'-'z', possibly with some additional trailing white space which should be ignored.

Output

For each test case output the length of longest string which appears disjointly at least twice in all of the messages.

Example

Input:
1
4
abbabba
dabddkababa
bacaba
baba

Output:
2

(in the example above, the longest substring which fulfills the requirements is 'ba')

 

 

题意:

给出n个字符串,求出现于所有字符串中两次且不重叠的最长公共子串,输出长度。

 

题解:

1.将所有字符串拼接在一起,相邻两个之间用各异的分隔符隔开,得到新串。

2.求出新串的后缀数组,然后二分mid:mid将新串的后缀分成若干组,每一组对应着一个公共子串,且长度大于等于mid。在每一组中,统计公共子串出现于每个字符串中的最小和最大下标,如果最大下标-最小下标>=mid,即表明公共子串出现在该字符串内两次且不重叠。如果在同一组内,所有字符串都满足最大下标-最小下标>=mid,那么表明当前mid合法,否则不合法,因此根据此规则求出答案。

 

代码如下:

  1 #include <iostream>
  2 #include <cstdio>
  3 #include <cstring>
  4 #include <algorithm>
  5 #include <vector>
  6 #include <cmath>
  7 #include <queue>
  8 #include <stack>
  9 #include <map>
 10 #include <string>
 11 #include <set>
 12 using namespace std;
 13 typedef long long LL;
 14 const int INF = 2e9;
 15 const LL LNF = 9e18;
 16 const int MOD = 1e9+7;
 17 const int MAXN = 2e5+100;
 18 
 19 int id[MAXN];
 20 int r[MAXN], sa[MAXN], Rank[MAXN], height[MAXN];
 21 int t1[MAXN], t2[MAXN], c[MAXN];
 22 
 23 bool cmp(int *r, int a, int b, int l)
 24 {
 25     return r[a]==r[b] && r[a+l]==r[b+l];
 26 }
 27 
 28 void DA(int str[], int sa[], int Rank[], int height[], int n, int m)
 29 {
 30     n++;
 31     int i, j, p, *x = t1, *y = t2;
 32     for(i = 0; i<m; i++) c[i] = 0;
 33     for(i = 0; i<n; i++) c[x[i] = str[i]]++;
 34     for(i = 1; i<m; i++) c[i] += c[i-1];
 35     for(i = n-1; i>=0; i--) sa[--c[x[i]]] = i;
 36     for(j = 1; j<=n; j <<= 1)
 37     {
 38         p = 0;
 39         for(i = n-j; i<n; i++) y[p++] = i;
 40         for(i = 0; i<n; i++) if(sa[i]>=j) y[p++] = sa[i]-j;
 41 
 42         for(i = 0; i<m; i++) c[i] = 0;
 43         for(i = 0; i<n; i++) c[x[y[i]]]++;
 44         for(i = 1; i<m; i++) c[i] += c[i-1];
 45         for(i = n-1; i>=0; i--) sa[--c[x[y[i]]]] = y[i];
 46 
 47         swap(x, y);
 48         p = 1; x[sa[0]] = 0;
 49         for(i = 1; i<n; i++)
 50             x[sa[i]] = cmp(y, sa[i-1], sa[i], j)?p-1:p++;
 51 
 52         if(p>=n) break;
 53         m = p;
 54     }
 55 
 56     int k = 0;
 57     n--;
 58     for(i = 0; i<=n; i++) Rank[sa[i]] = i;
 59     for(i = 0; i<n; i++)
 60     {
 61         if(k) k--;
 62         j = sa[Rank[i]-1];
 63         while(str[i+k]==str[j+k]) k++;
 64         height[Rank[i]] = k;
 65     }
 66 }
 67 
 68 //pos用于记录在当前组中,字符串i的子串出现的最小下标以及最大下标
 69 //vis用于记录在当前组中,字符串i是否已经出现了两个不重叠的子串
 70 int pos[15][2], vis[15];
 71 bool test(int n, int len, int k)
 72 {
 73     int cnt = 0;
 74     memset(pos, -1, sizeof(pos));
 75     memset(vis, false, sizeof(vis));
 76     for(int i = 2; i<=len; i++)
 77     {
 78         if(height[i]<k)
 79         {
 80             cnt = 0;
 81             memset(pos, -1, sizeof(pos));
 82             memset(vis, false, sizeof(vis));
 83         }
 84         else
 85         {
 86             int b1 = id[sa[i-1]], b2 = id[sa[i]];
 87             pos[b1][0] = pos[b1][0]==-1?sa[i-1]:min(pos[b1][0], sa[i-1]);   //最小下标
 88             pos[b1][1] = pos[b1][1]==-1?sa[i-1]:max(pos[b1][1], sa[i-1]);   //最大下标
 89             pos[b2][0] = pos[b2][0]==-1?sa[i]:min(pos[b2][0], sa[i]);
 90             pos[b2][1] = pos[b2][1]==-1?sa[i]:max(pos[b2][1], sa[i]);
 91 
 92             if(!vis[b1] && pos[b1][0]!=-1 && pos[b1][1]!=-1 && pos[b1][1]-pos[b1][0]>=k)
 93                 vis[b1] = true, cnt++;
 94             if(!vis[b2] && pos[b2][0]!=-1 && pos[b2][1]!=-1 && pos[b2][1]-pos[b2][0]>=k)
 95                 vis[b2] = true, cnt++;
 96             if(cnt==n) return true;
 97         }
 98     }
 99     return false;
100 }
101 
102 char str[MAXN];
103 int main()
104 {
105     int T, n;
106     scanf("%d", &T);
107     while(T--)
108     {
109         scanf("%d", &n);
110         int len = 0;
111         for(int i = 0; i<n; i++)
112         {
113             scanf("%s", str);
114             int LEN = strlen(str);
115             for(int j = 0; j<LEN; j++)
116             {
117                 r[len] = str[j];
118                 id[len++] = i;
119             }
120             r[len] = 130+i;
121             id[len++] = i;
122         }
123         r[len] = 0;
124         DA(r,sa,Rank,height,len,150);
125 
126         int l = 0, r = len;
127         while(l<=r)
128         {
129             int mid = (l+r)>>1;
130             if(test(n,len,mid))
131                 l = mid + 1;
132             else
133                 r = mid - 1;
134         }
135         printf("%d\n", r);
136     }
137 }
View Code

 

posted on 2018-02-26 16:37  h_z_cong  阅读(194)  评论(0编辑  收藏  举报

导航