Life Forms POJ - 3294 (后缀数组)
You may have wondered why most extraterrestrial life forms resemble humans, differing by superficial traits such as height, colour, wrinkles, ears, eyebrows and the like. A few bear no human resemblance; these typically have geometric or amorphous shapes like cubes, oil slicks or clouds of dust.
The answer is given in the 146th episode of Star Trek - The Next Generation, titled The Chase. It turns out that in the vast majority of the quadrant's life forms ended up with a large fragment of common DNA.
Given the DNA sequences of several life forms represented as strings of letters, you are to find the longest substring that is shared by more than half of them.
Input
Standard input contains several test cases. Each test case begins with 1 ≤ n ≤ 100, the number of life forms. n lines follow; each contains a string of lower case letters representing the DNA sequence of a life form. Each DNA sequence contains at least one and not more than 1000 letters. A line containing 0 follows the last test case.
Output
For each test case, output the longest string or strings shared by more than half of the life forms. If there are many, output all of them in alphabetical order. If there is no solution with at least one letter, output "?". Leave an empty line between test cases.
Sample Input
3 abcdefg bcdefgh cdefghi 3 xxx yyy zzz 0
Sample Output
bcdefg cdefgh ?
题意:找到字符串中出现在不同串次数在大于n/2的最长子串
思路:由于是最长,我们可以很快想到ht数组
那么我们就对ht数组扫描,设立区间左端点L,右端点R,如果区间包含的串种类大于等于n/2,那么我们就看看左端的所对应的区间值是否出现超过1次,出的话L++,然后取最大值就好
(我写麻烦了 )
1 #include<iostream> 2 #include<cstdio> 3 #include<cstring> 4 #include<algorithm> 5 #include<cmath> 6 using namespace std; 7 8 const int maxn = 2e5+5; 9 char s[maxn]; 10 int sa[maxn],t[maxn],t2[maxn],c[maxn],n,k; 11 char ans[105][1005]; 12 void build_sa(int n,int m) 13 { 14 int i,*x=t,*y=t2; 15 for(i=0;i<m;i++)c[i]=0; 16 for(i=0;i<n;i++)c[x[i]=s[i]]++; 17 for(i=1;i<m;i++)c[i]+=c[i-1]; 18 for(i=n-1;i>=0;i--)sa[--c[x[i]]]=i; 19 for(int k=1;k<=n;k<<=1) 20 { 21 int p=0; 22 for(i=n-k;i<n;i++)y[p++]=i; 23 for(i=0;i<n;i++)if(sa[i] >= k)y[p++]=sa[i]-k; 24 for(i=0;i<m;i++)c[i] = 0; 25 for(i=0;i<n;i++)c[x[y[i]]]++; 26 for(i=1;i<m;i++)c[i]+=c[i-1]; 27 for(i=n-1;i>=0;i--)sa[--c[x[y[i]]]] = y[i]; 28 swap(x,y); 29 p=1,x[sa[0]]=0; 30 for(i=1;i<n;i++) 31 x[sa[i]] = y[sa[i-1]] == y[sa[i]] && y[sa[i-1]+k] == y[sa[i]+k]?p-1:p++; 32 if(p>=n)break; 33 m=p; 34 } 35 } 36 37 int ht[maxn],rk[maxn]; 38 39 void getHeight(int n,int m) 40 { 41 int i,j,k=0; 42 for(i=0;i<n;i++)rk[sa[i]] = i; 43 for(i=0;i<n-1;i++) 44 { 45 if(k)k--; 46 if(s[i] == '$')continue; 47 int j = sa[rk[i]-1]; 48 while(s[i+k] == s[j+k] && s[i+k] != '$')k++; 49 // printf("%d %d %d\n",i,j,k); 50 ht[rk[i]] = k; 51 } 52 } 53 54 int len[105]; 55 int pos[maxn]; 56 int vis[105]; 57 int pre[maxn]; 58 int f[maxn][20]; 59 void st_pre(int n) 60 { 61 for(int i=1;i<=n;i++)f[i][0] = ht[i]; 62 int t = log2(n)+1; 63 for(int j=1;j<t;j++) 64 { 65 for(int i=1;i<=n;i++) 66 { 67 f[i][j] = min(f[i][j-1],f[i+(1<<(j-1))][j-1]); 68 } 69 } 70 } 71 72 int RMQ(int i,int j) 73 { 74 if(i > j)swap(i,j); 75 i++; 76 int k = pre[j-i+1]; 77 //printf("%d %d %d\n",k,f[i][k],f[j-(1<<k)+1][k]); 78 return min(f[i][k],f[j-(1<<k)+1][k]); 79 } 80 81 int sta[maxn]; 82 int main() 83 { 84 for(int i=1;i<maxn;i++)pre[i] = log2(i); 85 while(~scanf("%d",&k) && k) 86 { 87 memset(len,0,sizeof(len)); 88 memset(vis,0,sizeof(vis)); 89 int top = 0,ltop = 0,tot = 0; 90 for(int i=1;i<=k;i++) 91 { 92 scanf("%s",s+len[i-1]); 93 len[i] = strlen(s); 94 s[len[i]] = '$'; 95 len[i]++; 96 } 97 if(k == 1) 98 { 99 for(int i=0;i<len[1]-1;i++)printf("%c",s[i]); 100 puts(""); 101 continue; 102 } 103 build_sa(len[k],130); 104 getHeight(len[k],k); 105 st_pre(len[k]-1); 106 pos[k] = upper_bound(len+1,len+1+k,sa[k]+1)-len; 107 vis[pos[k]] = 1; 108 int ans_num = 0; 109 int cnt = 1; 110 ltop = 1; 111 sta[++top] = pos[k]; 112 int limit = ceil(k*1.0/2); 113 if(limit * 2 == k)limit++; 114 // for(int i=5;i<len[k];i++) 115 // { 116 // printf("%d %d %d ",i,sa[i],upper_bound(len+1,len+1+k,sa[i]+1)-len); 117 // for(int j=sa[i];j<len[k];j++) 118 // { 119 // if(s[j] != '$')printf("%c",s[j]); 120 // } 121 // puts(""); 122 // } 123 for(int i=k+1;i<len[k];i++) 124 { 125 pos[i] = upper_bound(len+1,len+1+k,sa[i]+1)-len; 126 sta[++top] = pos[i]; 127 if(!vis[pos[i]]) 128 { 129 vis[pos[i]] = 1; 130 cnt++; 131 } 132 else 133 { 134 vis[pos[i]]++; 135 int tmp = 0; 136 while(sta[top-tmp] == sta[ltop] && top - tmp > ltop) 137 { 138 // printf("%d %d %d %d \n",ltop,top-tmp,sta[ltop],sta[top-tmp]); 139 tmp++; 140 // printf("%d ------------- %d %d\n",ltop,pos[ltop+k-1],vis[pos[ltop+k-1]]); 141 vis[pos[ltop+k-1]]--; 142 ltop++; 143 } 144 } 145 if(cnt > limit) 146 { 147 vis[pos[ltop+k-1]]--; 148 ltop++; 149 cnt--; 150 //if(i == 26)printf("%d %d %d %d ========================\n",ltop,pos[ltop+k-1],vis[pos[ltop+k-1]],cnt); 151 while(vis[pos[ltop+k-1]] > 1) 152 { 153 vis[pos[ltop+k-1]]--; 154 ltop++; 155 } 156 } 157 158 if(cnt == limit) 159 { 160 while(vis[pos[ltop+k-1]] > 1) 161 { 162 vis[pos[ltop+k-1]]--; 163 ltop++; 164 } 165 int tmp = RMQ(ltop+k-1,i); 166 if(tmp > ans_num) 167 { 168 ans_num = tmp; 169 tot = 0; 170 strncpy(ans[++tot],s+sa[i],ans_num); 171 ans[tot][ans_num] = '\0'; 172 // printf("%c === %d\n",s[sa[i]],sa[i]); 173 } 174 else if(tmp == ans_num && tmp ) 175 { 176 //printf("%c === %d\n",s[sa[i]],sa[i]); 177 if(strncmp(ans[tot],s+sa[i],ans_num))strncpy(ans[++tot],s+sa[i],ans_num),ans[tot][ans_num] = '\0'; 178 } 179 } 180 // printf("%d %d === %d",i,ltop+k-1,cnt); 181 //printf(" %d\n",ans_num); 182 // printf("%d %d %d\n",ltop+k-1,i,cnt); 183 } 184 // printf("%d ---\n",ans_num); 185 if(!tot)puts("?"); 186 else for(int i=1;i<=tot;i++)printf("%s\n",ans[i]); 187 puts(""); 188 } 189 }