字符串哈希
hash[l...r] = (hash[r] - hash[l - 1] * (p ^ (r - l + 1))) % mod;求中间字符串的哈希值
假设一个串s,那么字串s[i, j]的Hash值就是H[i, j]=s[i]+s[i+1]*x+s[i+2]*(x^2)+...+s[j]*(x^(i-j))。
https://vjudge.net/contest/242212#problem/B
哈希值有时和二分一起出题,这一题就是把每一个字符串转成一个hash值,每次找这个字符串的时候就二分去找这个字符串的hash值。
哈希值的乘数一般取131与13331,取模的数一般可以去1e9 + 7与1e9 + 9,有时候可能要用双哈希,就是两种取模值,两个都相同才算这个字符串相同、
A - Crazy Search
//求一个字符串中有多少个长度为n的子串,所以就用hash值来区分,然后题目还输入了只有几种字母,所以哈希的乘数可以就取这个数,把他当做一个x进制数。
#include<cstdio> #include<cstring> #include<algorithm> using namespace std; const int maxm = 16000000; int has[maxm]; char a[maxm]; int num[300]; int n, m; int main() { while(~scanf("%d%d", &n, &m)) { scanf("%s", a); int len = strlen(a); memset(has, 0, sizeof(has)); memset(num, 0, sizeof(num)); int cnt = 0; for(int i = 0; i < len; i++) { if(!num[a[i] ]) num[a[i] ] = cnt++; } int res = 0; for(int i = 0; i <= len - n; i++) { int sum = 0; for(int j = i; j < n + i; j++) { sum = sum * m + num[a[j] ]; } if(!has[sum]) { res++; has[sum] = 1; } } printf("%d\n", res); } return 0; }
D - Subpalindromes
//这一题可以用bits或者线段树做,下面是树状数组的做法
//题目是要判断l到r是不是回文串,或者单点更新,回文串判断就是把字符翻转过来,取两个树状数组,然后在来比较hash值
#include <cstdio> #include <cstring> #include <algorithm> #define ULL unsigned long long using namespace std; const int N = 100005; int n, m, l, r, pos; char str[N], q[20], c; ULL C[N][2], Hash[N]; int lowbit(int x) { return x & (-x); } ULL Sum(int x, int y) { ULL ret = 0; while (x > 0) { ret += C[x][y]; x -= lowbit(x); } return ret; } void Add(int x, ULL d, int y) { while (x <= n) { C[x][y] += d; x += lowbit(x); } } int main() { Hash[0] = 1; for (int i = 1; i < N; i++) Hash[i] = Hash[i - 1] * 27;//都是小写字母 while (scanf("%s", str) == 1) { memset(C, 0, sizeof(C)); n = strlen(str); for (int i = 0; i < n; i++) { Add(i + 1, (str[i] - 'a') * Hash[i], 0); Add(i + 1, (str[n - i - 1] - 'a') * Hash[i], 1); } scanf("%d", &m); while (m--) { scanf("%s", q); if (q[0] == 'p') { scanf("%d%d", &l, &r); ULL h1 = (Sum(r, 0) - Sum(l - 1, 0)) * Hash[n - r]; ULL h2 = (Sum(n - l + 1, 1) - Sum(n - r, 1)) * Hash[l - 1];
//这个还额外乘一个hash值是因为头和尾剩余的串长度不同,所以要乘。如果是正向的,那么他其实有了前面l的数量级,而反向的就是后面r的数量级,所以就都把剩余的补上。 if (h1 == h2) printf("Yes\n"); else printf("No\n"); } else { scanf("%d %c", &pos, &c); int x = c - str[pos - 1]; Add(pos, x * Hash[pos - 1], 0); Add(n - pos + 1, x * Hash[n - pos], 1); str[pos - 1] = c; } } } return 0; }
Subpalindromes
https://vjudge.net/contest/242212#problem/D
一个字符串是否能够分成三个回文串,hash做法。
#include<cstdio> #include<cstring> #include<algorithm> #include<iostream> #include<cmath> //#define rep(i,l,r) for (int i=l;i<=r;i++) #define maxn 200500 #define p 1000000007 using namespace std; typedef unsigned long long ll; ll a[maxn],b[maxn],k[maxn]; int c[maxn],d[maxn],t1,t2,cnt,n,t; char s[maxn]; int main() { scanf("%d",&t); k[0]=1; // rep(i,1,20000) k[i]=k[i-1]*p; for(int i = 1; i <= 20000; i++) k[i] = k[i - 1] * p; while (t--) { scanf("%s",s+1); n=strlen(s+1); t1=t2=cnt=b[n+1]=0; for (int i=1; i<=n; i++) a[i]=a[i-1]*p+s[i]; for (int i=n; i>=1; i--) b[i]=b[i+1]*p+s[i]; for (int i=1; i<=n; i++) { ll h=b[1]-b[i+1]*k[i]; if (h==a[i]) c[++t1]=i; h=a[n]-a[n-i]*k[i]; if (h==b[n-i+1]) d[++t2]=n-i+1; }
//对于字符串延伸到端点的字符串,可以这样计算。 reverse(c+1,c+1+t1); int l,r,flag=0; // rep(i,1,t1) for(int i = 1; i <= t1; i++) { if (flag) break; // rep(j,1,t2) for(int j = 1; j <= t2; j++) { l=c[i]+1; r=d[j]-1; if (l>r||cnt>2e6) break; ++cnt; ll h1=a[r]-a[l-1]*k[r-l+1]; ll h2=b[l]-b[r+1]*k[r-l+1]; if (h1==h2) flag=1; } } if (flag) printf("Yes\n"); else printf("No\n"); } return 0; }
二维哈希模板
https://cn.vjudge.net/contest/299832#problem/K
题目是给出一个矩形模式串,然后给出一个矩形文本串,求模式串矩阵最多在文本串中出现多少次,直接二维哈希(蓝书上这题是用ac自动机做得,复杂度明显比不过哈希)。
#include<cstdio> #include<iostream> using namespace std; #define maxn 2005 #define ULL unsigned long long ULL maxSize = 4000007; ULL seedx = 131; ULL seedy = 13331; struct HashString { ULL xp[maxn]; ULL yp[maxn]; ULL H[maxn][maxn]; char s[maxn][maxn]; int x,y; void init(int n, int m) { xp[0] = yp[0] = 1; x = n,y = m; for(int i = 1; i <= x; i++) xp[i] = xp[i - 1]*seedx; for(int i = 1; i <= y; i++) yp[i] = yp[i - 1]*seedy; return; } void initHash() { for(int i = 0; i < x; i++) { H[i][0] = s[i][0] - 'A' + 1; for(int j = 1; j < y; j++) H[i][j] = H[i][j - 1]*seedy + s[i][j] - 'A' + 1; } for(int i = 1; i < x; i++)//从1开始, 第0行是不变的 for(int j = 0; j < y; j++) H[i][j] += H[i - 1][j]*seedx; return; } ULL askHash(int x1, int y1, int x2, int y2) { //询问以(x1, y1)为左上角, (x2, y2)为右上角的字符矩阵的hash值 ULL ret = H[x2][y2]; if(x1 > 0) ret -= H[x1 - 1][y2]*xp[x2 - x1 + 1]; if(y1 > 0) ret -= H[x2][y1 - 1]*yp[y2 - y1 + 1]; if(x1 > 0 && y1 > 0) ret += H[x1 - 1][y1 - 1]*xp[x2 - x1 + 1]*yp[y2 - y1 + 1]; return ret; } void read() { for(int i=0;i<x;i++) scanf("%s",s[i]); } }s1,s2; int t; int main() { // #ifdef ACBang // freopen("1.in","r",stdin); // #endif // ACBang int x,y,n,m; scanf("%d", &t); while(t--) { scanf("%d%d", &n, &m); s1.init(n,m); s1.read(); s1.initHash(); scanf("%d%d", &x, &y); s2.init(x,y); s2.read(); s2.initHash(); ULL tt = s2.askHash(0,0,x-1,y-1); int ans = 0; for(int i = 0;i<n-x+1;i++) { for(int j = 0;j<m-y+1;j++) { if(tt == s1.askHash(i,j,i+x-1,j+y-1)) ans ++; } } printf("%d\n",ans); } return 0; }
求两个字符串的最长公共回文串长度。
https://ac.nowcoder.com/acm/contest/908/H
字符串哈希做法,可以用后缀数组做。
#include <bits/stdc++.h> #define dbg(x) std::cerr << #x << " = " << x << "\n" using namespace std; typedef long long ll; const int N = 100000 + 10; char x[N], y[N]; unsigned long long px[N], px1[N], py[N], pw[N]; int n, m; unordered_map<unsigned long long, int> mp; bool ok(int l, int r) { return (px[r] - px[l - 1] * pw[r - l + 1]) == (px1[l] - px1[r + 1] * pw[r - l + 1]); } bool check(int len) {
//这里改成vector也是可行的,排序之后二分去找哈希值。 mp.clear(); for(int i = len; i <= n; i++) if(ok(i - len + 1, i)) mp[px[i] - px[i - len] * pw[len]] = 1; for(int i = len; i <= m; i++) { if(mp[py[i] - py[i - len] * pw[len]]) return 1; } return 0; } int main() { pw[0] = 1; for(int i = 1; i <= 100000; i++) pw[i] = pw[i - 1] * 131; while(~scanf("%s%s", (x + 1), (y + 1))) { n = strlen(x + 1), m = strlen(y + 1); for(int i = 1; i <= n; i++) { px[i] = px[i - 1] * 131 + x[i]; } for(int i = 1; i <= m; i++) py[i] = py[i - 1] * 131 + y[i]; for(int i = n; i >= 1; i--) px1[i] = px1[i + 1] * 131 + x[i]; int ans = 0; int l = 1, r = min(n, m) / 2, ans2 = 0; while(l <= r) { int mid = (l + r) >> 1; if(check(2 * mid)) ans2 = 2 * mid, l = mid + 1; else r = mid - 1; } ans = max(ans, ans2); l = 0, r = min(n - 1, m - 1) / 2, ans2 = 0; while(l <= r) { int mid = (l + r) >> 1; if(check(2 * mid + 1)) ans2 = 2 * mid + 1, l = mid + 1; else r = mid - 1; } ans = max(ans, ans2); printf("%d\n", ans); } return 0; }
挂链哈希,更快
#include <bits/stdc++.h> #define dbg(x) std::cerr << #x << " = " << x << "\n" using namespace std; typedef long long ll; const int N = 100000 + 10; char x[N], y[N]; unsigned long long px[N], px1[N], py[N], pw[N]; int n, m; bool ok(int l, int r) { return (px[r] - px[l - 1] * pw[r - l + 1]) == (px1[l] - px1[r + 1] * pw[r - l + 1]); } struct HashSet{ static const int mo = 9997; int head[mo + 100],total; int nex[N]; unsigned long long true_hash[N]; void Clear() { memset(head,0,sizeof(head)); total = 0; } void inset(unsigned long long ha) { int x = ha % mo; nex[++total] = head[x]; true_hash[total] = ha; head[x] = total; } bool fid(unsigned long long ha) { int x = ha % mo; for(int i = head[x]; i; i = nex[i]) if(true_hash[i] == ha) return true; return false; } } mp; bool check(int len) { mp.Clear(); for(int i = len; i <= n; i++) if(ok(i - len + 1, i)) mp.inset(px[i] - px[i - len] * pw[len]); for(int i = len; i <= m; i++) { if(mp.fid(py[i] - py[i - len] * pw[len])) return true; } return false; } int main() { pw[0] = 1; for(int i = 1; i <= 100000; i++) pw[i] = pw[i - 1] * 131; while(~scanf("%s%s", (x + 1), (y + 1))) { n = strlen(x + 1), m = strlen(y + 1); for(int i = 1; i <= n; i++) { px[i] = px[i - 1] * 131 + x[i]; } for(int i = 1; i <= m; i++) py[i] = py[i - 1] * 131 + y[i]; for(int i = n; i >= 1; i--) px1[i] = px1[i + 1] * 131 + x[i]; int ans = 0; int l = 1, r = min(n, m) / 2, ans2 = 0; while(l <= r) { int mid = (l + r) >> 1; if(check(2 * mid)) ans2 = 2 * mid, l = mid + 1; else r = mid - 1; } ans = max(ans, ans2); l = 0, r = min(n - 1, m - 1) / 2, ans2 = 0; while(l <= r) { int mid = (l + r) >> 1; if(check(2 * mid + 1)) ans2 = 2 * mid + 1, l = mid + 1; else r = mid - 1; } ans = max(ans, ans2); printf("%d\n", ans); } return 0; }
http://poj.org/problem?id=1743
一个序列,求变化趋势相同的子串,可以先差分一下,检查出现两次以上且不重合的最长长度。
这题正解是用 后缀自动机做。
用hash写主要是学一下挂链哈希。
#include<cstdio> #include<cstring> #include<queue> #include<vector> #include<algorithm> #include<map> #include<cstdlib> #include<iostream> using namespace std; typedef unsigned long long ll; const int seed = 131; const int N = 2e4 + 5; int n; int a[N]; ll pw[N], px[N]; struct HashSet{ static const int mo = 9997; int head[mo + 100],total; int next[N],pos[N]; unsigned long long true_hash[N]; void Clear() { memset(head,0,sizeof(head)); total = 0; } bool Insert(unsigned long long ha,int _pos,int ans) { int x = ha % mo; for(int i = head[x];i;i = next[i]) if(true_hash[i] == ha && _pos - pos[i] >= ans) return true; next[++total] = head[x]; true_hash[total] = ha; pos[total] = _pos; head[x] = total; return false; } } mp; bool check(int x) { mp.Clear(); for(int i = x; i <= n; i++) { if(mp.Insert(px[i] - px[i - x] * pw[x], i, x)) return true; } return false; } int main() { pw[0] = 1; for(int i = 1; i < N; i++) pw[i] = pw[i - 1] * seed; while(~scanf("%d", &n) && n) { for(int i = 1; i <= n; i++) scanf("%d", &a[i]); for(int i = 1; i < n; i++) a[i] = a[i + 1] - a[i] + 88; pw[0] = 1; n--; for(int i = 1; i <= n; i++) px[i] = (px[i - 1] * seed + a[i]); int l = 4, r = n / 2; int ans = 0; while(l <= r) { int mid = (l + r) >> 1; if(check(mid)) { l = mid + 1; ans = mid; } else r = mid - 1; } if(ans >= 4)printf("%d\n", ans + 1); else printf("0\n"); } return 0; }