后缀数组的学习不得不告一段落了,还有好多要学的东西呢,最重要的是没有那么多时间了,人生苦短啊!
总的来说学的还比较顺利,除了没有深入研究以外个人感觉还行,做题的时候发现了一个奇怪的现象,别人认为很简单的题我就是做不会,别人认为很难得我却能很快找到思路,比如pku3294就是后者,好吧,就从这儿说起。
这一题的意思的是给你n个字符串,让你求最长的至少在超过一半的串中出现的子串。一看到这个题就想到了height数组分组,先二分答案,找到最大的len值,然后再输出满足len的所有组的子串(很简单的思路,不知道为啥网上给这道题定位为难,感觉3729的思路都比他难想)。
想想也没啥好说的了,代码如下:
#pragma warning(disable:4786) #include<stdio.h> #include<string> #include<iostream> #include<set> using namespace std; set<string> s; #define maxm 110000 #define maxn 110000 int sa[maxn],height[maxn],bar[maxm],Rank[maxn],Rank_f[maxn],Result_s[maxn]; int visit[1005],map[maxn]; bool cmp(int *r,int a,int b,int len) { return r[a] == r[b]&&r[a+len] == r[b+len]; } void get_sa(int *r,int n) { int i,j,p,*rank = Rank,*rank_f = Rank_f,*result_s = Result_s,*t,m = maxm-2; for (i = 0; i <= m; i++) bar[i] = 0; for (i = 0; i < n; i++) bar[rank[i] = r[i]]++; for (i = 0; i< m; i++) bar[i+1] += bar[i]; for (i = n-1; i>= 0; i--) sa[--bar[rank[i]]] = i; for (j = 1,p = 1; p < n; j *= 2,m = p){ for (p = 0,i = n-j; i < n; i++) result_s[p++] = i; for (i = 0; i< n; i++) if (sa[i] >= j) result_s[p++] = sa[i] -j; for (i = 0; i < n; i++) rank_f[i] = rank[result_s[i]]; for (i = 0; i<=m; i++) bar[i] = 0; for (i = 0; i< n; i++) bar[rank_f[i]]++; for (i = 0; i< m; i++) bar[i+1] += bar[i]; for (i = n-1; i >= 0; i--) sa[--bar[rank_f[i]]] = result_s[i]; t = result_s; result_s = rank; rank = t; for (rank[sa[0]] = 0,i = 1,p = 1; i < n; i++) rank[sa[i]] = cmp(result_s,sa[i],sa[i-1],j)?p-1:p++; } } void get_height(int *r,int n) { int i,j,*rank = Rank,len = 0; for (i = 0; i< n; i++) rank[sa[i]] = i; height[0] = 0; for (i = 0; i < n-1; i++){ if (len != 0) len--; for (j = sa[rank[i] -1]; r[j+len] == r[i+len]; len++); height[rank[i]] = len; } } bool check(int mid,int len,int n) { int num,i,k = 1; memset(visit,0,sizeof(visit)); num = 1; visit[map[sa[1]]] = 1; for (i = 2; i < len; i++){ if (height[i] >= mid){ if (visit[map[sa[i]]] != k) {num++; visit[map[sa[i]]] = k;} if (num >= n) return true; } else{ num = 1; k++; visit[map[sa[i]]] = k; } } return false; } void solve(int *r,int mid,int len,int n) { int num,i,j,k = 1; memset(visit,0,sizeof(visit)); num = 1; visit[map[sa[1]]] = 1; for (i = 2; i < len; i++){ if (height[i] >= mid){ if (visit[map[sa[i]]] != k) {num++; visit[map[sa[i]]] = k;} } else{ if (num >= n){ for (j = sa[i-1]; j < sa[i-1] + mid; j++) printf ("%c",r[j] - 1 + 'a'); printf ("\n"); } num = 1; k++; visit[map[sa[i]]] = k; } } } int main() { int n,i,j,k,len,r[110000],max = 0; char str[1100]; bool f=false; while (scanf ("%d",&n) && n){ if (f) printf ("\n"); else f = true; for (j = 0,i = 1; i<= n; i++){ memset(str,0,sizeof(str)); scanf ("%s",&str); len = strlen(str); max = max>len?max:len; if (i > 1) r[j++] = 1000+i; for (k = 0; k < len; k++){ r[j++] = str[k] - 'a' + 1; map[j-1] = i; } } r[j++] = 0; len = j; get_sa(r,len); get_height(r,len); len = len-n+1; int start = 1,end = max,mid,sum = n/2+1;//二分查找函数 while (start <= end){ mid = (start+end)/2; if (check(mid,len,sum)) start = mid+1; else end = mid-1; } if (end == 0) printf ("?\n"); else solve(r,end,len,sum); s.clear(); } return 0; }