Life Forms POJ - 3294 (后缀数组)

You may have wondered why most extraterrestrial life forms resemble humans, differing by superficial traits such as height, colour, wrinkles, ears, eyebrows and the like. A few bear no human resemblance; these typically have geometric or amorphous shapes like cubes, oil slicks or clouds of dust.

The answer is given in the 146th episode of Star Trek - The Next Generation, titled The Chase. It turns out that in the vast majority of the quadrant's life forms ended up with a large fragment of common DNA.

Given the DNA sequences of several life forms represented as strings of letters, you are to find the longest substring that is shared by more than half of them.

Input

Standard input contains several test cases. Each test case begins with 1 ≤ n ≤ 100, the number of life forms. n lines follow; each contains a string of lower case letters representing the DNA sequence of a life form. Each DNA sequence contains at least one and not more than 1000 letters. A line containing 0 follows the last test case.

Output

For each test case, output the longest string or strings shared by more than half of the life forms. If there are many, output all of them in alphabetical order. If there is no solution with at least one letter, output "?". Leave an empty line between test cases.

Sample Input

3
abcdefg
bcdefgh
cdefghi
3
xxx
yyy
zzz
0

Sample Output

bcdefg
cdefgh

?


题意:找到字符串中出现在不同串次数在大于n/2的最长子串

思路:由于是最长,我们可以很快想到ht数组
那么我们就对ht数组扫描,设立区间左端点L,右端点R,如果区间包含的串种类大于等于n/2,那么我们就看看左端的所对应的区间值是否出现超过1次,出的话L++,然后取最大值就好
(我写麻烦了 )


  1 #include<iostream>
  2 #include<cstdio>
  3 #include<cstring>
  4 #include<algorithm>
  5 #include<cmath>
  6 using namespace std;
  7 
  8 const int maxn = 2e5+5;
  9 char s[maxn];
 10 int sa[maxn],t[maxn],t2[maxn],c[maxn],n,k;
 11 char ans[105][1005];
 12 void build_sa(int n,int m)
 13 {
 14     int i,*x=t,*y=t2;
 15     for(i=0;i<m;i++)c[i]=0;
 16     for(i=0;i<n;i++)c[x[i]=s[i]]++;
 17     for(i=1;i<m;i++)c[i]+=c[i-1];
 18     for(i=n-1;i>=0;i--)sa[--c[x[i]]]=i;
 19     for(int k=1;k<=n;k<<=1)
 20     {
 21         int p=0;
 22         for(i=n-k;i<n;i++)y[p++]=i;
 23         for(i=0;i<n;i++)if(sa[i] >= k)y[p++]=sa[i]-k;
 24         for(i=0;i<m;i++)c[i] = 0;
 25         for(i=0;i<n;i++)c[x[y[i]]]++;
 26         for(i=1;i<m;i++)c[i]+=c[i-1];
 27         for(i=n-1;i>=0;i--)sa[--c[x[y[i]]]] = y[i];
 28         swap(x,y);
 29         p=1,x[sa[0]]=0;
 30         for(i=1;i<n;i++)
 31             x[sa[i]] = y[sa[i-1]] == y[sa[i]] && y[sa[i-1]+k] == y[sa[i]+k]?p-1:p++;
 32         if(p>=n)break;
 33         m=p;
 34     }
 35 }
 36 
 37 int ht[maxn],rk[maxn];
 38 
 39 void getHeight(int n,int m)
 40 {
 41     int i,j,k=0;
 42     for(i=0;i<n;i++)rk[sa[i]] = i;
 43     for(i=0;i<n-1;i++)
 44     {
 45         if(k)k--;
 46         if(s[i] == '$')continue;
 47         int j = sa[rk[i]-1];
 48         while(s[i+k] == s[j+k] && s[i+k] != '$')k++;
 49        // printf("%d   %d    %d\n",i,j,k);
 50         ht[rk[i]] = k;
 51     }
 52 }
 53 
 54 int len[105];
 55 int pos[maxn];
 56 int vis[105];
 57 int pre[maxn];
 58 int f[maxn][20];
 59 void st_pre(int n)
 60 {
 61     for(int i=1;i<=n;i++)f[i][0] = ht[i];
 62     int t = log2(n)+1;
 63     for(int j=1;j<t;j++)
 64     {
 65         for(int i=1;i<=n;i++)
 66         {
 67             f[i][j] = min(f[i][j-1],f[i+(1<<(j-1))][j-1]);
 68         }
 69     }
 70 }
 71 
 72 int RMQ(int i,int j)
 73 {
 74     if(i > j)swap(i,j);
 75     i++;
 76     int k = pre[j-i+1];
 77     //printf("%d     %d       %d\n",k,f[i][k],f[j-(1<<k)+1][k]);
 78     return min(f[i][k],f[j-(1<<k)+1][k]);
 79 }
 80 
 81 int sta[maxn];
 82 int main()
 83 {
 84     for(int i=1;i<maxn;i++)pre[i] = log2(i);
 85     while(~scanf("%d",&k) && k)
 86     {
 87         memset(len,0,sizeof(len));
 88         memset(vis,0,sizeof(vis));
 89         int top = 0,ltop = 0,tot = 0;
 90         for(int i=1;i<=k;i++)
 91         {
 92             scanf("%s",s+len[i-1]);
 93             len[i] = strlen(s);
 94             s[len[i]] = '$';
 95             len[i]++;
 96         }
 97         if(k == 1)
 98         {
 99             for(int i=0;i<len[1]-1;i++)printf("%c",s[i]);
100             puts("");
101             continue;
102         }
103         build_sa(len[k],130);
104         getHeight(len[k],k);
105         st_pre(len[k]-1);
106         pos[k] = upper_bound(len+1,len+1+k,sa[k]+1)-len;
107         vis[pos[k]] = 1;
108         int ans_num = 0;
109         int cnt =  1;
110         ltop = 1;
111         sta[++top] = pos[k];
112         int limit = ceil(k*1.0/2);
113         if(limit * 2 == k)limit++;
114 //        for(int i=5;i<len[k];i++)
115 //        {
116 //            printf("%d  %d %d ",i,sa[i],upper_bound(len+1,len+1+k,sa[i]+1)-len);
117 //            for(int j=sa[i];j<len[k];j++)
118 //            {
119 //                if(s[j] != '$')printf("%c",s[j]);
120 //            }
121 //            puts("");
122 //        }
123         for(int i=k+1;i<len[k];i++)
124         {
125             pos[i] = upper_bound(len+1,len+1+k,sa[i]+1)-len;
126             sta[++top] = pos[i];
127             if(!vis[pos[i]])
128             {
129                 vis[pos[i]] = 1;
130                 cnt++;
131             }
132             else
133             {
134                 vis[pos[i]]++;
135                 int tmp = 0;
136                 while(sta[top-tmp] == sta[ltop] && top - tmp > ltop)
137                 {
138                   //  printf("%d    %d   %d     %d    \n",ltop,top-tmp,sta[ltop],sta[top-tmp]);
139                     tmp++;
140                  //   printf("%d    ------------- %d       %d\n",ltop,pos[ltop+k-1],vis[pos[ltop+k-1]]);
141                     vis[pos[ltop+k-1]]--;
142                     ltop++;
143                 }
144             }
145             if(cnt > limit)
146             {
147                 vis[pos[ltop+k-1]]--;
148                 ltop++;
149                 cnt--;
150                 //if(i == 26)printf("%d   %d    %d    %d ========================\n",ltop,pos[ltop+k-1],vis[pos[ltop+k-1]],cnt);
151                 while(vis[pos[ltop+k-1]] > 1)
152                 {
153                     vis[pos[ltop+k-1]]--;
154                     ltop++;
155                 }
156             }
157 
158             if(cnt == limit)
159             {
160                 while(vis[pos[ltop+k-1]] > 1)
161                 {
162                     vis[pos[ltop+k-1]]--;
163                     ltop++;
164                 }
165                 int tmp = RMQ(ltop+k-1,i);
166                 if(tmp > ans_num)
167                 {
168                     ans_num = tmp;
169                     tot = 0;
170                     strncpy(ans[++tot],s+sa[i],ans_num);
171                     ans[tot][ans_num] = '\0';
172                    // printf("%c ===      %d\n",s[sa[i]],sa[i]);
173                 }
174                 else if(tmp == ans_num && tmp )
175                 {
176                      //printf("%c ===      %d\n",s[sa[i]],sa[i]);
177                      if(strncmp(ans[tot],s+sa[i],ans_num))strncpy(ans[++tot],s+sa[i],ans_num),ans[tot][ans_num] = '\0';
178                 }
179             }
180            // printf("%d      %d   ===   %d",i,ltop+k-1,cnt);
181             //printf("                      %d\n",ans_num);
182            // printf("%d          %d                  %d\n",ltop+k-1,i,cnt);
183         }
184        // printf("%d ---\n",ans_num);
185         if(!tot)puts("?");
186         else for(int i=1;i<=tot;i++)printf("%s\n",ans[i]);
187         puts("");
188     }
189 }
View Code

 

 
posted @ 2019-09-07 22:13  进击的黑仔  阅读(208)  评论(0编辑  收藏  举报