hdu 3518 Boring counting (后缀数组)
Boring counting
Time Limit: 2000/1000 MS (Java/Others) Memory Limit: 65536/32768 K (Java/Others)
Total Submission(s): 1331 Accepted Submission(s): 538
Problem Description
035 now faced a tough problem,his english teacher gives him a string,which consists with n lower case letter,he must figure out how many substrings appear at least twice,moreover,such apearances can not overlap each other.
Take aaaa as an example.”a” apears four times,”aa” apears two times without overlaping.however,aaa can’t apear more than one time without overlaping.since we can get “aaa” from [0-2](The position of string begins with 0) and [1-3]. But the interval [0-2] and [1-3] overlaps each other.So “aaa” can not take into account.Therefore,the answer is 2(“a”,and “aa”).
Take aaaa as an example.”a” apears four times,”aa” apears two times without overlaping.however,aaa can’t apear more than one time without overlaping.since we can get “aaa” from [0-2](The position of string begins with 0) and [1-3]. But the interval [0-2] and [1-3] overlaps each other.So “aaa” can not take into account.Therefore,the answer is 2(“a”,and “aa”).
The input data consist with several test cases.The input ends with a line “#”.each test case contain a string consists with lower letter,the length n won’t exceed 1000(n <= 1000).
For each test case output an integer ans,which represent the answer for the test case.you’d better use int64 to avoid unnecessary trouble.
Sample Input
Sample Output
1 //78MS 252K 2341 B G++ 2 /* 3 4 题意: 5 求不相同的最少出现两次的不重叠子串的数量 6 例: 7 8 input: aabaaaab 9 output: 5 10 其中有 a、b、aa、ab、aab 符合条件 11 12 后缀数组: 13 第一题后缀数组.无尽的深渊. 14 sa[i]表示排第i的是谁 15 rank[i]表示第i个排第几 16 具体的细节就看 罗穗骞大牛 的论文 17 本题是后缀数组中的一个应用 18 19 */ 20 #include<stdio.h> 21 #include<string.h> 22 #define N 1005 23 int wa[N],wb[N],wv[N],ws[N]; 24 int rank[N],height[N]; //名次数组与height数组 25 int Max(int a,int b) 26 { 27 return a>b?a:b; 28 } 29 int Min(int a,int b) 30 { 31 return a<b?a:b; 32 } 33 int cmp(int *r,int a,int b,int l) 34 { 35 return r[a]==r[b]&&r[a+l]==r[b+l]; 36 } 37 void DA(int *r,int *sa,int n,int m) //倍增算法 38 { 39 int i,j,p,*x=wa,*y=wb,*t; 40 for(i=0;i<m;i++) ws[i]=0; 41 for(i=0;i<n;i++) ws[x[i]=r[i]]++; 42 for(i=1;i<m;i++) ws[i]+=ws[i-1]; 43 for(i=n-1;i>=0;i--) sa[--ws[x[i]]]=i; //对长度为1的字符串排序 44 //for(i=0;i<n;i++) printf("%d %d\n",i,sa[i]); 45 for(p=1,j=1;p<n;j*=2,m=p){ 46 for(p=0,i=n-j;i<n;i++) y[p++]=i; 47 for(i=0;i<n;i++) if(sa[i]>=j) y[p++]=sa[i]-j;//第二关键字排序结果 48 49 for(i=0;i<n;i++) wv[i]=x[y[i]]; 50 for(i=0;i<m;i++) ws[i]=0; 51 for(i=0;i<n;i++) ws[wv[i]]++; 52 for(i=1;i<m;i++) ws[i]+=ws[i-1]; 53 for(i=n-1;i>=0;i--) sa[--ws[wv[i]]]=y[i]; //第一关键字排序 54 55 for(t=x,x=y,y=t,p=1,x[sa[0]]=0,i=1;i<n;i++) 56 x[sa[i]]=cmp(y,sa[i-1],sa[i],j)?p-1:p++; //更新rank数组 57 } 58 return; 59 } 60 void get_height(int *r,int *sa,int n) //求height数组 61 { 62 int i,j,k=0; 63 for(i=1;i<=n;i++) rank[sa[i]]=i; 64 for(i=0;i<n;height[rank[i++]]=k) 65 for(k?k--:0,j=sa[rank[i]-1];r[i+k]==r[j+k];k++); 66 return; 67 } 68 int main(void) 69 { 70 char str[N]; 71 int s[N],sa[N]; //sa为后缀数组 72 int minn,maxn; 73 while(scanf("%s",str)!=EOF) 74 { 75 int len=strlen(str); 76 if(str[0]=='#') break; 77 for(int i=0;i<len;i++) 78 s[i]=str[i]-'a'+1; 79 s[len]=0; 80 DA(s,sa,len+1,27); 81 get_height(s,sa,len); 82 //for(int i=0;i<len+1;i++) printf("%d %d\n",i,sa[i]); 83 int cnt=0; 84 for(int i=1;i<=(len+1)/2;i++){ //长度为i的重复子串 85 minn=N; 86 maxn=-1; 87 for(int j=1;j<=len;j++){ 88 if(height[j]>=i){ 89 minn=Min(minn,Min(sa[j-1],sa[j])); 90 maxn=Max(maxn,Max(sa[j-1],sa[j])); 91 }else{ 92 if(minn+i<=maxn) cnt++; 93 minn=N,maxn=-1; 94 } 95 } 96 if(minn+i<=maxn) cnt++; 97 } 98 printf("%d\n",cnt); 99 } 100 return 0; 101 }