Loading

Hash

概念

通过一个hash函数H,将一组数据(包括字符串,较大的数等)转化成能够用变量表示或直接可以作为下标的数,可以通过hash函数转化得到的数值成为hash值,hash可以实现快速查找和匹配,常用的有字符串hash哈希表

字符串hash

题目

给定一个字符串 \(A\) 和一个字符串 \(B\),求在 \(B\) 中的出现次数。 \(A\)\(B\)中的字符均为英语大写字母或小写字母。

\(A\) 中不同位置出现的 \(B\) 可重叠

我们选取两个合适的互质的数 \(b\)\(h\) (b < h)假设字符串\(C = c_1c_2c_3……c_m\)

\(H(C) = (c_1b^{m-1} + c_2b^{m-2} + c_mb^{0})~mod~h;\)

\(b\) 代表的是基数,相当于把字符串看做 b 进制数——《一本通提高篇》(很诡异的东西

\(H(C,K)\) 为前 \(K\) 个字符组成的字符串的哈希值

\(H(C,K + 1) = H(C,K)*b + C_{k + 1}\)

举个栗子:

如果字符串\(C = “ACDA”\)(令A表示1,B表示2)则

\(H(C,1) = 1;\)

\(H(C,2) = 1*b + 3;\)

\(H(C,3) = 1*b^2 + 3*b + 4;\)

\(H(C,4) = 1*b^3 + 3*b^2 + 4 * b + 1;\)

判断主串的一个字符和另一个字符是够匹配

即判断字符串\(C = c_1c_2……c_m\)从位置\(k+1\)开始的长度为\(n\)的子串\(C^, = c_{k + 1}c_{k + 2}……c_{k + n}\)的哈希值与另一个匹串\(S = s_1s_2……s_n\)是否相等

\(H(C') = H(C, k + n) - H(C, K)*b^n\)

因此可得出求字符串区间hash值(l为左边界,r为右边界)

\(H(C') = H(C, r) - H(C, l)*b^(r - l + 1)\)

int get(int l,int r){ 
   return hs[r] - hs[l - 1] * pre[r - l + 1];
}

字符串区间删去一个字符后的hash值类比可以得到

int del(int l,int r,int pos){
	return get(l, pos - 1) * pre[r-pos] + get(pos + 1, r);
}

然后就是习题了= =

T1 子串查找

模板题

#include <iostream>
#include <cstdio>
#include <cstring>
#include <string>
#include <algorithm>
using namespace std;
const int M = 1e6 + 10;
typedef unsigned long long ull;
int read(){
  int x = 0,f = 1;char c = getchar();
  while(c < '0'||c > '9'){if(c == '-')f = -1;c = getchar();}
  while(c >= '0'&&c <= '9'){x = x*10 + c - '0';c = getchar();}
  return x * f;
}
char sa[M],sb[M];
ull base = 155,sum[M],pow[M],falg;
int ans;
int main(){
   pow[0] = 1;
   scanf("%s%s",sa + 1,sb + 1);
   
   int lena = strlen(sa + 1),lenb = strlen(sb + 1);
   
   for(int i = 1;i < 1000000; i++)
   	   pow[i] = pow[i - 1]*base;//处理进位
   	   
   sum[0] = 0;
    
   for(int i = 1;i <= lena; i++){
   	  sum[i] = sum[i - 1] * base + (ull)(sa[i] - 'A' + 1);
   }
   for(int i = 1; i <= lenb; i++){
   	   falg = falg * base + (ull)(sb[i] - 'A' + 1);
   }
   for(int i = 0;i <= lena - lenb; i++){
   	 if(falg == sum[i + lenb] - sum[i] * pow[lenb]) ans++; 
   }
   cout<<ans;
}

T2 图书管理

#include <iostream>
#include <cstdio>
#include <cstring>
#include <string>
#include <algorithm>
using namespace std;
typedef unsigned long long ull;
const int mod1 = 1e7 + 7;
const int mod2 = 1e7 + 9;
const int base = 1e9;
const int M = 1e7;
int n;
char flag[5], S[M];
bool A[M], B[M];
int main() {
    cin>>n;

    for (int i = 1; i <= n; i++) {
        cin >> flag;
        ull sum1 = 1, sum2 = 1;
        gets(S);

        for (int j = 0; j < strlen(S); j++)
            sum1 = (sum1 * base % mod1 + S[j]) % mod1, sum2 = (sum2 * base % mod2 + S[j]) % mod2;

        if (flag[0] == 'a')
           A[sum1] = 1, B[sum2] = 1;

        if (flag[0] == 'f'){
            if (A[sum1] && B[sum2])
                puts("yes");
            else
                puts("no");
        }
    }

    return 0;
}

T3 Power Strings

#include <iostream>
#include <cstdio>
#include <queue>
#include <cstring>
#include <vector>
#include <cmath>
#include <algorithm>
using namespace std;
const int A = 1e3 + 2;
const int B = 1e4 + 2;
const int C = 1e5 + 2;
const int D = 1e6 + 2;
const int inf = 0x3f3f3f3f;
typedef unsigned long long ull;
int read(){
	int x = 0,f = 1;char c = getchar();
	while(c < '0'||c > '9'){if(c == '-')f = -1;c = getchar();}
	while(c >= '0'&&c <= '9'){x = x*10 + c - '0';c = getchar();}
	return x*f;
}
char c[D];
ull hs[D],power[D],base = 37;
int main(){
	power[0] = 1; 
	for(int i = 1;i <= D; i++)
	  power[i] = power[i - 1]*base;
	  
	while (scanf("%s", c)){
	  	
	  if (!strcmp(c, "."))break;
	  
		ull ans = 0;int len = strlen(c);
		hs[len] = 0;
		
		for (int i = len - 1; i >= 0; i--){
			
			hs[i] = hs[i + 1] * base + c[i] - 'a' + 1;
		}
		for (int k = 1; k <= len; k++)
		{
			if (len % k != 0)
				continue;
			 
			 ull tomp = hs[0] - hs[k] * power[k];
			 int j = 0;
			 for(j = k;j < len; j = j + k){
			 	
			 	 if(tomp != hs[j] - hs[k + j]*power[k]) break;
			 	 else tomp = hs[j] - hs[k + j]*power[k];
			 }
			 if(j == len){
			 	ans = len / k;break;
			 }	
		}
		cout<<ans<<"\n";	
	}
	return 0;
}

T4 Seek the Name, Seek the Fame

#include <iostream>
#include <cstring>
#include <cstdio>
#include <queue>
#include <vector>
#include <cmath>
#include <algorithm>
using namespace std;
const int A = 1e3 + 2;
const int B = 1e4 + 2;
const int C = 1e5 + 2;
const int D = 1e6 + 2;
const int inf = 0x3f3f3f3f;
const int mod = 1e9 + 7;
typedef unsigned long long ull;
int read(){
	int x = 0,f = 1;char c = getchar();
	while(c < '0'||c > '9'){if(c == '-')f = -1;c = getchar();}
	while(c >= '0'&&c <= '9'){x = x*10 + c - '0';c = getchar();}
	return x*f;
}
char s[D];
ull base = 34,power[D],hs[D];
int main(){
	
   power[0] = 1;
   
   for(int i = 1;i <= D; i++) power[i] = power[i - 1] * base;
   
   while(scanf ("%s", s + 1) != EOF){
   	   int len = strlen(s + 1);
   	   hs[0] = 0;
   	   for(int i = 1; i <= len; i++){
   	        hs[i] = hs[i - 1] * base + s[i] - 'a' + 1; 	  
	   }
	   for(int i = 1;i <= len; i++){
	   	   if(hs[i] == hs[len] - hs[len - i] * power[i]){
	   	   	      printf("%d ",i);
			}
	   }
	   printf("\n");
   }
}

T5 「BalticOI 2014 Day 1」三个朋友

求区间hash和删去字符后的hash

#include <iostream>
#include <cstdio>
#include <queue>
#include <cstring>
#include <string>
#include <cmath>
#include <map>
#define int unsigned long long
using namespace std;
const int A = 1e3 + 2;
const int B = 1e4 + 2;
const int C = 1e5 + 2;
const int D = 2e6 + 5;
const int inf = 0x3f3f3f3f;
const int mod = 99984198447;
int read(){
	int x = 0,f = 1;char c = getchar();
	while(c < '0'||c > '9'){if(c == '-')f = -1;c = getchar();}
	while(c >= '0'&&c <= '9'){x = x*10 + c - '0';c = getchar();}
	return x*f;
}

char s[D];
int pre[D],base = 999983,hs[D],ans,ll,rr,flag,n,mid,mark;;
map<unsigned long long, int> vis;
int get(int l,int r){
	return hs[r] - hs[l - 1] * pre[r - l + 1];
}

int del(int l,int r,int pos){
	return get(l, pos - 1) * pre[r-pos] + get(pos + 1, r);
}

bool check(int pos){

	if(pos == mid){
		ll = get(1, pos - 1);
		rr = get(pos + 1, n);
		return ll == rr;
	}

	else if(pos < mid){
		ll = del(1, mid, pos);
		rr = get(mid + 1, n);
		return ll == rr;
	}

	else{
		ll = get(1, mid - 1);
		rr = del(mid, n, pos);
		return ll == rr;
	}

}
void itit(){
   pre[0]=1;
  for(int i = 1;i <= n; i++){
		pre[i] = pre[i - 1] * base;hs[i] = hs[i - 1] * base + s[i];
	}
}
signed main(){
	
	cin >> n >> (s + 1);
	
	mid = (n + 1) >> 1; //取字符串的中点下标
	itit();
		
	for(int i = 1;i <= n;i++){
		
		if(check(i) == 1){ //删掉下标i的元素之后,能够得到俩个一样的子串
		
            mark = i;
            
			if(mark <= mid)
				flag = rr;
			
	    	else{
	    		flag = ll;	
			}
			
			if(vis[flag] > 0) continue;
			vis[flag] = 1;
			ans++; 
			if(ans > 1){
				cout<<"NOT UNIQUE"<<endl;return 0;
			}
        }
	}

	if(!ans){
		cout<<"NOT POSSIBLE"<<endl;
	}
	else{
	   if(mark <= mid){
	   	 for(int i = mid + 1;i <= n; i++)cout<<s[i];
	   	 printf("\n");
	   }
	   else{
	   	 for(int i = 1;i <= mid - 1; i++)cout<<s[i];
	   	  printf("\n");
	   }
	}
	return 0;
}

T6 A Horrible Poem

被困一上午,只因进制没取质数

暴力枚举循环节,如果循环节长度不能被区间整除,直接跳过,不过显然T了

正解是数论??

假设最短循环节长度为len

则原串长度显然为len*k。若只考虑k,

并且将k的质因数依次分解,每次试除k,

则得到的k。和len的乘积仍是循环节,

利用这个性质。依次用质因数 i 试除n,

若除去后仍是循环节,说明i属于k,将其除去,结果就留下了len

#include <iostream>
#include <cstdio>
#include <queue>
#include <cstring>
#include <string>
#include <cmath>
#include <map>
#define int unsigned long long
using namespace std;
const int A = 1e3 + 2;
const int B = 1e4 + 2;
const int C = 5e5 + 2;
const int D = 5e5 + 10;
const int inf = 0x3f3f3f3f;
const int mod = 99984198447;
int read(){
	int x = 0,f = 1;char c = getchar();
	while(c < '0'||c > '9'){if(c == '-')f = -1;c = getchar();}
	while(c >= '0'&&c <= '9'){x = x*10 + c - '0';c = getchar();}
	return x*f;
}
int  q, n, pre[D], base = 63, hs[D],prim[D],ans,len;
bool vis[D],flag;
char s[D];
void calc(){
    for (int i = 2; i <= n; ++i) {
        if (vis[i]) continue;
        for (int j = 1; i * j <= n; ++j) {
            int t = i * j;
            if (vis[t]) continue;
            vis[t] = 1;
            prim[t] = i;
        }
    }
}

signed main(){
	n = read();calc();
    scanf("%s", s + 1);
	pre[0] = 1;
	for(int i = 1;i <= n; i++){
		 pre[i] = pre[i - 1] * base;hs[i] = hs[i - 1] * base + s[i];
	}
		
	int q = read();
	while(q--){
		int l = read(), r = read();
	   len = ans = r - l + 1;
		while(len > 1){
			int k = ans / prim[len];
			len /= prim[len];
			if(hs[r - k] - hs[l - 1] * pre[r - k - l + 1] == hs[r] - hs[l - 1 + k] * pre[r - k - l + 1])
				ans = k;
		}
		printf("%d\n", ans);
	}
}
posted @ 2021-01-10 14:53  Dita  阅读(142)  评论(5编辑  收藏  举报