字符串hash

似乎没写过多少字符串hash
今天补一补

字符串hash重要思想就是把字符串看做一个N进制大整数,进行取模后直接比较
这样子做的优劣很直观:很快很简单,也有取模后蜜汁碰撞的风险

对于i位置的hash值,可以这样求:

    for (int i = 1; i <= n; i++) H[i] = H[i - 1] * p + s[i];

我们要取出子串[l,r]的hash值时,显然就是\(H[r] - H[l - 1] * p^{r - l + 1}\)

来道【正解SAM】的例题:
最长公共子串
当然对串a建SAM,用串b在上边匹配就可以了

SAM太深奥了,我们来看看简单暴力的字符串hash
我们二分长度len,对A串的所有位置的长度为len的hash排序,那B串所有位置长度为len的hash去查找
复杂度O(nlog^2n)【似乎SAM接近O(n)?

#include<iostream>
#include<cstdio>
#include<cmath>
#include<cstring>
#include<algorithm>
#define LL long long int
#define Redge(u) for (int k = h[u]; k; k = ed[k].nxt)
#define REP(i,n) for (int i = 1; i <= (n); i++)
#define ULL unsigned long long int
using namespace std;
const int maxn = 200005,maxm = 100005,INF = 1000000000;
inline int read(){
	int out = 0,flag = 1; char c = getchar();
	while (c < 48 || c > 57){if (c == '-') flag = -1; c = getchar();}
	while (c >= 48 && c <= 57){out = (out << 3) + (out << 1) + c - 48; c = getchar();}
	return out * flag;
}
char A[maxn],B[maxn];
int lena,lenb,n;
ULL Ha[maxn],Hb[maxn];
ULL b[maxn];
bool check(int len){
	n = 0;
	ULL P = 1;
	for (int i = 1; i <= len; i++) P *= 27;
	for (int i = len; i <= lena; i++) b[++n] = Ha[i] - Ha[i - len] * P;
	sort(b + 1,b + 1 + n);
	for (int i = len; i <= lenb; i++){
		ULL temp = Hb[i] - Hb[i - len] * P;
		if (b[lower_bound(b + 1,b + 1 + n,temp) - b] == temp) return true;
	}
	return false;
}
int main(){
	scanf("%s",A + 1); lena = strlen(A + 1);
	scanf("%s",B + 1); lenb = strlen(B + 1);
	for (int i = 1; i <= lena; i++) Ha[i] = Ha[i - 1] * 27 + A[i];
	for (int i = 1; i <= lenb; i++) Hb[i] = Hb[i - 1] * 27 + B[i];
	int l = 0,r = min(lena,lenb),mid;
	while (l < r){
		mid = l + r + 1 >> 1;
		if (check(mid)) l = mid;
		else r = mid - 1;
	}
	printf("%d\n",l);
	return 0;
}

BZOJ3207
此题K很小,我们用上hash之后,每个位置就对应一个hash值,问题就转化为了一个区间内是否存在某个值,用可持续化线段树就可以了

#include<iostream>
#include<cmath>
#include<cstdio>
#include<cstring>
#include<algorithm>
#define LL long long int
#define REP(i,n) for (int i = 1; i <= (n); i++)
#define Redge(u) for (int k = h[u],to; k; k = ed[k].nxt)
#define BUG(s,n) for (int i = 1; i <= (n); i++) cout<<s[i]<<' '; puts("");
#define inf 18446744073709551615UL
#define uLL unsigned long long int
using namespace std;
const int maxn = 100010,maxm = 8000005;
inline int read(){
	int out = 0,flag = 1; char c = getchar();
	while (c < 48 || c > 57) {if (c == '-') flag = -1; c = getchar();}
	while (c >= 48 && c <= 57) {out = (out << 3) + (out << 1) + c - '0'; c = getchar();}
	return out * flag;
}
int ls[maxm],rs[maxm],sum[maxm],rt[maxn];
int n,m,K,cnt;
int A[maxn],T[maxn];
uLL H[maxn];
void modify(int& u,int pre,uLL l,uLL r,uLL pos){
	u = ++cnt; sum[u] = sum[pre] + 1; ls[u] = ls[pre]; rs[u] = rs[pre];
	if (l == r) return;
	uLL mid = l / 2 + r / 2;
	if (mid >= pos) modify(ls[u],ls[pre],l,mid,pos);
	else modify(rs[u],rs[pre],mid + 1,r,pos);
}
int query(int u,int v,uLL l,uLL r,uLL pos){
	if (l == r) return sum[u] - sum[v];
	uLL mid = l / 2 + r / 2;
	if (mid >= pos) return query(ls[u],ls[v],l,mid,pos);
	else return query(rs[u],rs[v],mid + 1,r,pos);
}
int main(){
	n = read(); m = read(); K = read();
	REP(i,n) A[i] = read();
	REP(i,n) H[i] = H[i - 1] * 107 + A[i];
	uLL P = 1; REP(i,K) P *= 107;
	for (int i = K; i <= n; i++)
		modify(rt[i],rt[i - 1],0,inf,H[i] - H[i - K] * P);
	while (m--){
		int l = read() + K - 1,r = read();
		uLL val = 0;
		for (int i = 1; i <= K; i++) val = val * 107 + read();
		if (query(rt[r],rt[l - 1],0,inf,val)) puts("No");
		else puts("Yes");
	}
	return 0;
}

posted @ 2018-03-04 13:16  Mychael  阅读(248)  评论(0编辑  收藏  举报