HDU 4622 Reincarnation 后缀自动机 // BKDRHash(最优hash)
Reincarnation
Time Limit: 6000/3000 MS (Java/Others) Memory Limit: 131072/65536 K (Java/Others)
Problem Description
Now you are back,and have a task to do:
Given you a string s consist of lower-case English letters only,denote f(s) as the number of distinct sub-string of s.
And you have some query,each time you should calculate f(s[l...r]), s[l...r] means the sub-string of s start from l end at r.
Given you a string s consist of lower-case English letters only,denote f(s) as the number of distinct sub-string of s.
And you have some query,each time you should calculate f(s[l...r]), s[l...r] means the sub-string of s start from l end at r.
Input
The first line contains integer T(1<=T<=5), denote the number of the test cases.
For each test cases,the first line contains a string s(1 <= length of s <= 2000).
Denote the length of s by n.
The second line contains an integer Q(1 <= Q <= 10000),denote the number of queries.
Then Q lines follows,each lines contains two integer l, r(1 <= l <= r <= n), denote a query.
For each test cases,the first line contains a string s(1 <= length of s <= 2000).
Denote the length of s by n.
The second line contains an integer Q(1 <= Q <= 10000),denote the number of queries.
Then Q lines follows,each lines contains two integer l, r(1 <= l <= r <= n), denote a query.
Output
For each test cases,for each query,print the answer in one line.
Sample Input
2
bbaba
5
3 4
2 2
2 5
2 4
1 4
baaba
5
3 3
3 4
1 4
3 5
5 5
Sample Output
3
1
7
5
8
1
3
8
5
1
Hint
I won't do anything against hash because I am nice.Of course this problem has a solution that don't rely on hash.
题意:
给你一个母串,
Q个询问,每次询问你[L,R] 属于这一段中不同子串的个数是多少
题解:
考虑离线
把询问缩小,相同L的询问划分为一类
这样最多就是建立 2000 个后缀自动机了
#include <bits/stdc++.h> inline long long read(){long long x=0,f=1;char ch=getchar();while(ch<'0'||ch>'9'){if(ch=='-')f=-1;ch=getchar();}while(ch>='0'&&ch<='9'){x=x*10+ch-'0';ch=getchar();}return x*f;} using namespace std; const int N = 2e3+7; const long long mod = 1000000007; long long now; int isPlus[N * 2],endpos[N * 2];int d[N * 2]; int tot,slink[2*N],trans[2*N][28],minlen[2*N],maxlen[2*N],pre; int newstate(int _maxlen,int _minlen,int* _trans,int _slink){ maxlen[++tot]=_maxlen;minlen[tot]=_minlen; slink[tot]=_slink; if(_trans)for(int i=0;i<26;i++)trans[tot][i]=_trans[i],d[_trans[i]]+=1; return tot; } long long update(int u) { return 1LL*(maxlen[u] - minlen[u] + 1); } int add_char(char ch,int u){ int c=ch-'a',v=u; int z=newstate(maxlen[u]+1,-1,NULL,0); isPlus[z] = 1; while(v&&!trans[v][c]){trans[v][c]=z;d[z]+=1;v=slink[v];} if(!v){ minlen[z]=1;slink[z]=1;now += update(z);return z;} int x=trans[v][c]; if(maxlen[v]+1==maxlen[x]){slink[z]=x;minlen[z]=maxlen[x]+1;now += update(z);return z;} int y=newstate(maxlen[v]+1,-1,trans[x],slink[x]); now -= update(x); slink[z]=slink[x]=y;minlen[x]=minlen[z]=maxlen[y]+1; now += update(x); while(v&&trans[v][c]==x){trans[v][c]=y;d[x]--,d[y]++;v=slink[v];} minlen[y]=maxlen[slink[y]]+1; now += update(y);now += update(z); return z; } void init_sam() { for(int i = 1; i <= tot; ++i) for(int j = 0; j < 26; ++j) trans[i][j] = 0; pre = tot = 1; } int T,n; long long ans[20000]; char a[N * 2]; struct ss{int L,R,id;}Q[20000]; int cmp(ss s1,ss s2) { if(s1.L == s2.L)return s1.R < s2.R; return s1.L < s2.L; } int main() { scanf("%d",&T); while(T--) { scanf("%s%d",a+1,&n); for(int i = 1; i <= n; ++i) scanf("%d%d",&Q[i].L,&Q[i].R),Q[i].id = i; sort(Q+1,Q+n+1,cmp); int l = 1,r = 0; for(int i = 1; i <= n; ++i) { if(Q[i].L != Q[i-1].L) {init_sam(); l = Q[i].L,r = l-1; now = 0; } while(r < Q[i].R){ pre = add_char(a[(++r)],pre); } ans[Q[i].id] = now; } for(int i = 1; i <= n; ++i) printf("%lld\n",ans[i]); } return 0; }
BKDRHash
#include <cstdio> #include <cstdlib> #include <cstring> typedef unsigned long long int ULL; //BKDRHash,最优的字符串hash算法。hash一开始是等于0的 const int seed = 13131; // 31 131 1313 13131 131313 etc.. const int maxn = 2000+10; char str[maxn]; ULL powseed[maxn]; // seed的i次方 爆了也没所谓,sumHash的也爆。用了ULL,爆了也没所谓,也能唯一确定它,无符号 ULL sumHash[maxn]; //前缀hash值 int ans[maxn][maxn]; //ans[L][R]就代表ans,就是区间[L,R]内不同子串的个数 const int MOD = 10007; struct StringHash { int first[MOD+2],num; ULL EdgeNum[maxn]; // 表明第i条边放的数字(就是sumHash那个数字) int next[maxn],close[maxn]; //close[i]表示与第i条边所放权值相同的开始的最大位置 //就比如baba,现在枚举长度是2,开始的时候ba,close[1] = 1;表明"ba"开始最大位置是从1开始 //然后枚举到下一个ba的时候,close[1]就要变成3了,开始位置从3开始了 void init () { num = 0; memset (first,0,sizeof first); return ; } int insert (ULL val,int id) //id是用来改变close[]的 { int u = val % MOD; for (int i = first[u]; i ; i = next[i]) //存在边不代表出现过,出现过要用val判断,val才是唯一的,边还是压缩后(%MOD)的呢 { if (val == EdgeNum[i]) //出现过了 { int t = close[i]; close[i] = id;//更新最大位置 return t; } } ++num; //没出现过的话,就加入图吧 EdgeNum[num] = val; // 这个才是精确的 close[num] = id; next[num] = first[u]; first[u] = num; return 0;//没出现过 } }H; void work () { scanf ("%s",str+1); int lenstr = strlen(str+1); for (int i=1;i<=lenstr;++i) sumHash[i] = sumHash[i-1]*seed + str[i]; memset(ans,0,sizeof(ans)); for (int L=1;L<=lenstr;++L) //暴力枚举子串长度 { H.init(); for (int i=1;i+L-1<=lenstr;++i) { int pos = H.insert(sumHash[i+L-1]-powseed[L]*sumHash[i-1],i); ans[i][i+L-1] ++;//ans[L][R]++,自己是一个 ans[pos][i+L-1]--;//pos放回0是没用的 } } for (int i = lenstr; i>=1; i--) { for (int j=i;j<=lenstr;j++) { ans[i][j] += ans[i+1][j]+ans[i][j-1]-ans[i+1][j-1]; } } int m; scanf ("%d",&m); while (m--) { int L,R; scanf ("%d%d",&L,&R); printf ("%d\n",ans[L][R]); } return ; } int main () { powseed[0] = 1; for (int i = 1; i <= maxn-20; ++i) powseed[i] = powseed[i-1] * seed; int t; scanf ("%d",&t); while (t--) work(); return 0; }