pat 1005 Programming Pattern (35 分)
Programmers often have a preference among program constructs. For example, some may prefer if(0==a)
, while others may prefer if(!a)
. Analyzing such patterns can help to narrow down a programmer's identity, which is useful for detecting plagiarism.
Now given some text sampled from someone's program, can you find the person's most commonly used pattern of a specific length?
Input Specification:
Each input file contains one test case. For each case, there is one line consisting of the pattern length N (1), followed by one line no less than N and no more than 1048576 characters in length, terminated by a carriage return \n
. The entire input is case sensitive.
Output Specification:
For each test case, print in one line the length-N substring that occurs most frequently in the input, followed by a space and the number of times it has occurred in the input. If there are multiple such substrings, print the lexicographically smallest one.
Whitespace characters in the input should be printed as they are. Also note that there may be multiple occurrences of the same substring overlapping each other.
Sample Input 1:
4
//A can can can a can.
Sample Output 1:
can 4
Sample Input 2:
3
int a=~~~~~~~~~~~~~~~~~~~~~0;
Sample Output 2:
~~~ 19
这题是字符串很常规的题目,可以用后缀数组来做,但是我是用hash硬搞的,使用了自然溢出,我甚至连字典序都没盘,就过了,再次说明pat的数据实在是太水了。
另外,pat的g++编译器不能用gets是什么鬼,求g++的版本???
1 #include<cstdio> 2 #include<cstring> 3 #include<cmath> 4 #include<iostream> 5 #include<algorithm> 6 #include<cstdlib> 7 using namespace std; 8 #define ui unsigned long long 9 int const N=1048576+100; 10 ui const p=100007; 11 int n; 12 ui h[N],pw[N]; 13 char s[N],ans[N]; 14 struct node{ 15 ui v; 16 int id; 17 }a[N]; 18 int cmp(node x,node y){ 19 return x.v<y.v; 20 } 21 int main(){ 22 scanf("%d",&n); 23 getchar(); 24 scanf("%[^\n]",s+1); 25 int len=strlen(s+1); 26 pw[0]=1; 27 for(int i=1;i<=len;i++) 28 pw[i]=pw[i-1]*p; 29 for(int i=1;i<=n;i++) 30 h[n]=h[n]*p+s[i]; 31 for(int i=n+1;i<=len;i++){ 32 h[i]=(h[i-1]-s[i-n]*pw[n-1])*p+s[i]; 33 } 34 for(int i=n;i<=len;i++){ 35 a[i].id=i; 36 a[i].v=h[i]; 37 } 38 sort(a+n,a+len+1,cmp); 39 int sum=0,cnt=0,x; 40 for(int i=n;i<=len;i++) 41 { 42 if(a[i].v==a[i-1].v) sum++; 43 else sum=1; 44 if(sum>cnt){ 45 x=a[i].id;cnt=sum; 46 } 47 } 48 for(int i=x-n+1;i<=x;i++) 49 printf("%c",s[i]); 50 printf(" "); 51 printf("%d\n",cnt); 52 return 0; 53 }
后缀数组
1 #include<bits/stdc++.h> 2 using namespace std; 3 int const N=(1<<20)+10; 4 int wa[N<<1],wb[N<<1],num[N],n,rk[N],sa[N],h[N],wv[N]; 5 char s[N]; 6 int inline cmp(int *r,int x,int y,int z){ 7 return r[x]==r[y] && r[x+z]==r[y+z]; 8 } 9 void build_sa(char *r,int n,int m){ 10 int *x=wa,*y=wb; 11 for(int i=0;i<m;i++) num[i]=0; 12 for(int i=0;i<n;i++) num[x[i]=r[i]]++; 13 for(int i=1;i<m;i++) num[i]+=num[i-1]; 14 for(int i=n-1;i>=0;i--) sa[--num[x[i]]]=i; 15 for(int j=1,p=0;p<n;j<<=1,m=p){ 16 p=0; 17 for(int i=n-j;i<n;i++) y[p++]=i; 18 for(int i=0;i<n;i++) if(sa[i]>=j) y[p++]=sa[i]-j; 19 for(int i=0;i<m;i++) num[i]=0; 20 for(int i=0;i<n;i++) num[wv[i]=x[y[i]]]++; 21 for(int i=1;i<m;i++) num[i]+=num[i-1]; 22 for(int i=n-1;i>=0;i--) sa[--num[wv[i]]]=y[i]; 23 swap(x,y); 24 p=1;x[sa[0]]=0; 25 for(int i=1;i<n;i++) 26 x[sa[i]]=cmp(y,sa[i],sa[i-1],j)? p-1:p++; 27 } 28 for(int i=0;i<n;i++) rk[i]=x[i]; 29 } 30 void build_h(char *r,int n){ 31 int k=0; 32 for(int i=0;i<n;i++){ 33 if(k) k--; 34 int j=sa[rk[i]-1]; 35 while (r[i+k]==r[j+k]) k++; 36 h[rk[i]]=k; 37 } 38 } 39 void solve(int len){ 40 int ans=0,id,cnt=0; 41 for(int i=1;i<=len;i++){ 42 if(h[i]>=n){ 43 if(cnt==0) cnt=2; 44 else cnt++; 45 }else cnt=0; 46 if(cnt>ans) { 47 ans=cnt; 48 id=sa[i]; 49 } 50 } 51 if(ans==0) id=sa[1],ans=1; 52 for(int i=id;i<=id+n-1;i++) 53 putchar(s[i]); 54 putchar(' '); 55 printf("%d\n",ans); 56 } 57 int main(){ 58 scanf("%d",&n); 59 getchar(); 60 scanf("%[^\n]",s); 61 int len=strlen(s); 62 build_sa(s,len+1,130); 63 build_h(s,len); 64 solve(len); 65 return 0; 66 }