后缀数组的学习不得不告一段落了,还有好多要学的东西呢,最重要的是没有那么多时间了,人生苦短啊!

  总的来说学的还比较顺利,除了没有深入研究以外个人感觉还行,做题的时候发现了一个奇怪的现象,别人认为很简单的题我就是做不会,别人认为很难得我却能很快找到思路,比如pku3294就是后者,好吧,就从这儿说起。

  这一题的意思的是给你n个字符串,让你求最长的至少在超过一半的串中出现的子串。一看到这个题就想到了height数组分组,先二分答案,找到最大的len值,然后再输出满足len的所有组的子串(很简单的思路,不知道为啥网上给这道题定位为难,感觉3729的思路都比他难想)。

  想想也没啥好说的了,代码如下:

#pragma warning(disable:4786)
#include<stdio.h>
#include<string>
#include<iostream>
#include<set>
using namespace std;
set<string> s;
#define maxm 110000
#define maxn 110000
int sa[maxn],height[maxn],bar[maxm],Rank[maxn],Rank_f[maxn],Result_s[maxn];
int visit[1005],map[maxn];
bool cmp(int *r,int a,int b,int len)
{
	return r[a] == r[b]&&r[a+len] == r[b+len];
}
void get_sa(int *r,int n)
{
	int i,j,p,*rank = Rank,*rank_f = Rank_f,*result_s = Result_s,*t,m = maxm-2;
	for (i = 0; i <= m; i++) bar[i] = 0;
	for (i = 0; i < n; i++) bar[rank[i] = r[i]]++;
	for (i = 0; i< m; i++) bar[i+1] += bar[i];
	for (i = n-1; i>= 0; i--) sa[--bar[rank[i]]] = i;
	for (j = 1,p = 1; p < n; j *= 2,m = p){
		for (p = 0,i = n-j; i < n; i++)  result_s[p++] = i;
		for (i = 0; i< n; i++) if (sa[i] >= j) result_s[p++] = sa[i] -j;

		for (i = 0; i < n; i++) rank_f[i] = rank[result_s[i]];
		for (i = 0; i<=m; i++) bar[i] = 0;
		for (i = 0; i< n; i++) bar[rank_f[i]]++;
		for (i = 0; i< m; i++) bar[i+1] += bar[i];
		for (i = n-1; i >= 0; i--) sa[--bar[rank_f[i]]] = result_s[i];

		t = result_s; result_s = rank; rank = t;
		for (rank[sa[0]] = 0,i = 1,p = 1; i < n; i++) 
			rank[sa[i]] = cmp(result_s,sa[i],sa[i-1],j)?p-1:p++;
	}
}
void get_height(int *r,int n)
{
	int i,j,*rank = Rank,len = 0;
	for (i = 0; i< n; i++) rank[sa[i]] = i;
	height[0] = 0;
	for (i = 0; i < n-1; i++){
		if (len != 0) len--;
		for (j = sa[rank[i] -1]; r[j+len] == r[i+len]; len++);
		height[rank[i]] = len;
	}
}
bool check(int mid,int len,int n)
{
	int num,i,k = 1; memset(visit,0,sizeof(visit));
	num = 1; visit[map[sa[1]]] = 1;
	for (i = 2; i < len; i++){
		if (height[i] >= mid){
			if (visit[map[sa[i]]] != k) {num++; visit[map[sa[i]]] = k;}
			if (num >= n) return true;
		}
		else{
			num = 1; k++; visit[map[sa[i]]] = k;
		}
	}
	return false;
}
void solve(int *r,int mid,int len,int n)
{
	int num,i,j,k = 1; memset(visit,0,sizeof(visit));
	num = 1; visit[map[sa[1]]] = 1;
	for (i = 2; i < len; i++){
		if (height[i] >= mid){
			if (visit[map[sa[i]]] != k) {num++; visit[map[sa[i]]] = k;}
		}
		else{
			if (num >= n){
				for (j = sa[i-1]; j < sa[i-1] + mid; j++)
				    printf ("%c",r[j] - 1 + 'a'); 
				printf ("\n");
			}
			num = 1; k++; visit[map[sa[i]]] = k;
		}
	}
}
int main()
{
	int n,i,j,k,len,r[110000],max = 0;
	char str[1100]; bool f=false;
	while (scanf ("%d",&n) && n){
		if (f) printf ("\n");
		else f = true;
		for (j = 0,i = 1; i<= n; i++){
			memset(str,0,sizeof(str));
			scanf ("%s",&str); len = strlen(str); max = max>len?max:len;
			if (i > 1) r[j++] = 1000+i;
			for (k = 0; k < len; k++){
				r[j++] = str[k] - 'a' + 1;
				map[j-1] = i;
			}
		}
		r[j++] = 0; len = j;
		get_sa(r,len); get_height(r,len); len = len-n+1;
		int start = 1,end = max,mid,sum = n/2+1;//二分查找函数
		while (start <= end){
			mid =  (start+end)/2;
			if (check(mid,len,sum)) start = mid+1;
			else end = mid-1;
		}
		if (end == 0) printf ("?\n");
		else solve(r,end,len,sum);
		s.clear();
	}
	return 0;
}

 

posted on 2010-07-24 17:46  looker  阅读(242)  评论(0编辑  收藏  举报