字符串Hash

字符串Hash用于判断两个字符串是否相同。

字符串Hash的哈希函数构造：

设长度为m的字符串c为H（c）

H(c)=(c₁*base^m-1+c₂*base^m-2+...+c_m*base^m-m)%mod;

其中c一般取1-26或1-52；base取自己喜欢的质数，比如131；mod取2147583647或者我们可以使用unsigned long long

unsigned long long 可以支持自然溢出。

字符串哈希的性质：

性质1：H(c,k)=H(c,k-1)*base+c_k；

有了这个性质，我们就可以通过递推求解字符串的字符串哈希值。

性质2：H(c,k-k+n)=H(c,k+n)-H(c,k)*baseⁿ;

有了这个性质，我们可以从主串中摘出一部分连续串与匹配串比较。

为了减少哈希冲突的概率，有时我们会采取双模数来降低哈希冲突的概率。

一般字符串匹配运用的是KMP算法，若匹配串与主串来自同一个串，我们采用字符串Hash算法。

洛谷字符串哈希模板

代码1：

 1 #include<cstdio>
 2 #include<iostream>
 3 #include<cstring>
 4 #include<algorithm>
 5 
 6 using namespace std;
 7 
 8 typedef unsigned long long ULL;
 9 ULL base=131;
10 ULL a[10010];
11 char s[1010];
12 
13 inline ULL hashe(char s[])
14 {
15     int len=strlen(s);
16     ULL ans=0;
17     for(int i=0;i<len;i++)
18         ans=(ans*base+(ULL)(s[i]-'0'));
19     return ans;
20 }
21 
22 int main()
23 {
24     int n;
25     scanf("%d",&n);
26     for(int i=1;i<=n;i++)
27     {
28         scanf("%s",s);
29         a[i]=hashe(s);
30     }
31     sort(a+1,a+n+1);
32     int res=1;
33     for(int i=2;i<=n;i++)
34     {
35         if(a[i]!=a[i-1]) res++;
36     }
37     printf("%d",res);
38     return 0;
39 }

代码2：

 1 #include<cstdio>
 2 #include<iostream>
 3 #include<cstring>
 4 #include<algorithm>
 5 
 6 using namespace std;
 7 
 8 typedef unsigned long long ULL;
 9 ULL base=131;
10 ULL a[10010];
11 ULL mod=0x7fffffff;
12 char s[1010];
13 
14 inline ULL hashe(char s[])
15 {
16     int len=strlen(s);
17     ULL ans=0;
18     for(int i=0;i<len;i++)
19         ans=(ans*base+(ULL)(s[i]-'0'))%mod;
20     return ans;
21 }
22 
23 int main()
24 {
25     int n;
26     scanf("%d",&n);
27     for(int i=1;i<=n;i++)
28     {
29         scanf("%s",s);
30         a[i]=hashe(s);
31     }
32     sort(a+1,a+n+1);
33     int res=1;
34     for(int i=2;i<=n;i++)
35     {
36         if(a[i]!=a[i-1]) res++;
37     }
38     printf("%d",res);
39     return 0;
40 }

posted @ 2019-08-29 17:31 Hoyoak 阅读(526) 评论(0) 编辑收藏举报

刷新页面返回顶部

Hoyoak

伏笔止留白，放空至物外

字符串Hash

公告