Hash与Trie树

填坑(((

哈希(Hash) 与 踹树(Trie)


 

引入:

hxs: 我是踹树 , 我可以踹任何人


 

Hash

哈希 , 一种字符串算法 , 可以把一段字符串映射成一个数字

通常是这样写

1 inline int getHash(char *s){
2     int len = strlen(s + 1);
3     int Hash = 0;
4     for(int i = 1; i <= len; i++)
5         Hash = (Hash * seed + s[i] - 40) % ha;
6     return Hash;
7 }

其中 ha 和 seed 分别是自己定的质数

这样就可以将字符串映射成一个 0~ha-1这个区间中的一个数 , 但是并不一定唯一 , 不唯一的情况请使用链表或者别的方法(以后看到题了再填这个坑)

例题

会了Hash以后这就是一道sb题 , Hash完排个序就完事儿了

 1 //#pragma GCC optimize(2)
 2 #include<cmath>
 3 #include<cstdio>
 4 #include<cstring>
 5 #include<cstdlib>
 6 #include<queue>
 7 #include<vector>
 8 #include<iostream>
 9 #include<algorithm>
10 #define N 510
11 #define debug 1
12 #define osu auto
13 #define FILETEST 1
14 #define inf 100010
15 #define int long long
16 #define ha 1019260817
17 #define INF 0x7fffffff
18 #define pii std::pair <int, int>
19 #define INF_T 9223372036854775807
20 #define APART puts("----------------------")
21 #define DEBUG printf("%s %d\n",__FUNCTION__,__LINE__)
22 
23 namespace chino{
24 
25 const int seed = 131;
26 
27 inline void setting(){
28 #if FILETEST
29     freopen("_test.in", "r", stdin);
30     freopen("_test.me.out", "w", stdout);
31 #endif
32     return;
33 }
34 
35 inline int read(){
36     register char c = getchar(), up = c; register int num = 0;
37     while(c < '0' || c > '9') up = c, c = getchar();
38     while(c >= '0' && c <= '9') num = (num << 3) + (num << 1) + (c ^ '0'), c = getchar();
39     return up == '-' ? -num : num;
40 }
41 
42 int n;
43 int hash[inf];
44 char s[inf];
45 
46 inline bool cmp(const int &a, const int &b){ return a < b;}
47 
48 inline int getHash(char *s){
49     int len = strlen(s + 1);
50     int Hash = 0;
51     for(int i = 1; i <= len; i++)
52         Hash = (Hash * seed + s[i] - 40) % ha;
53     return Hash;
54 }
55 
56 inline int main(){
57     n = read();
58     for(int i = 1; i <= n; i++){
59         scanf("%s", s + 1);
60         hash[i] = getHash(s);
61     }
62     std::sort(hash + 1, hash + 1 + n, cmp);
63     int ans = n;
64     for(int i = 1; i <= n; i++){
65         if(hash[i] == hash[i - 1]) --ans;
66     }
67     printf("%lld\n", ans);
68     return 0;
69 }
70 
71 }//namespace chino
72 
73 signed main(){return chino::main();}

附:双模Hash

冲突率会降低 , 但是慢的很

 1 //#pragma GCC optimize(2)
 2 #include<cmath>
 3 #include<cstdio>
 4 #include<cstring>
 5 #include<cstdlib>
 6 #include<queue>
 7 #include<vector>
 8 #include<iostream>
 9 #include<algorithm>
10 #define N 510
11 #define debug 1
12 #define osu auto
13 #define FILETEST 1
14 #define inf 100010
15 #define int long long
16 #define ha 998244353
17 #define INF 0x7fffffff
18 #define pii std::pair <int, int>
19 #define INF_T 9223372036854775807
20 #define APART puts("----------------------")
21 #define DEBUG printf("%s %d\n",__FUNCTION__,__LINE__)
22 
23 namespace chino{
24 
25 const int seed = 131;
26 const int mod1 = 1019260817;
27 const int mod2 = 19491001;
28 
29 inline void setting(){
30 #if FILETEST
31     freopen("_test.in", "r", stdin);
32     freopen("_test.me.out", "w", stdout);
33 #endif
34     return;
35 }
36 
37 inline int read(){
38     register char c = getchar(), up = c; register int num = 0;
39     while(c < '0' || c > '9') up = c, c = getchar();
40     while(c >= '0' && c <= '9') num = (num << 3) + (num << 1) + (c ^ '0'), c = getchar();
41     return up == '-' ? -num : num;
42 }
43 
44 int n;
45 char s[inf];
46 std::pair <int, int> hash[inf];
47 
48 inline std::pair <int, int> getHash(char *s){
49     int len = strlen(s + 1);
50     std::pair <int, int> Hash = {0, 0};
51     for(int i = 1; i <= len; i++)
52         Hash.first = (Hash.first * seed + s[i] - 40) % mod1;
53     for(int i = 1; i <= len; i++)
54         Hash.second = (Hash.second * seed + s[i] - 40) % mod2;
55     return Hash;
56 }
57 
58 inline int main(){
59     n = read();
60     for(int i = 1; i <= n; i++){
61         scanf("%s", s + 1);
62         hash[i] = getHash(s);
63     }
64     std::sort(hash + 1, hash + 1 + n);
65     int ans = n;
66     for(int i = 1; i <= n; i++){
67         if(hash[i].first == hash[i - 1].first && hash[i].second == hash[i - 1].second)
68             --ans;
69     }
70     printf("%lld\n", ans);
71     return 0;
72 }
73 
74 }//namespace chino
75 
76 signed main(){return chino::main();}

再附:自然溢出Hash

不mod ha了 , 定义成unsigned int 等着她爆掉

因为没有mod ha操作所以会快很多

 1 //#pragma GCC optimize(2)
 2 #include<cmath>
 3 #include<cstdio>
 4 #include<cstring>
 5 #include<cstdlib>
 6 #include<queue>
 7 #include<vector>
 8 #include<iostream>
 9 #include<algorithm>
10 #define N 510
11 #define debug 1
12 #define osu auto
13 #define FILETEST 1
14 #define inf 100010
15 #define ll long long
16 #define ull unsigned int
17 #define ha 998244353
18 #define INF 0x7fffffff
19 #define pii std::pair <int, int>
20 #define INF_T 9223372036854775807
21 #define APART puts("----------------------")
22 #define DEBUG printf("%s %d\n",__FUNCTION__,__LINE__)
23 
24 namespace chino{
25 
26 const int seed = 131;
27 const int mod1 = 1019260817;
28 const int mod2 = 19491001;
29 
30 inline void setting(){
31 #if FILETEST
32     freopen("_test.in", "r", stdin);
33     freopen("_test.me.out", "w", stdout);
34 #endif
35     return;
36 }
37 
38 inline int read(){
39     register char c = getchar(), up = c; register int num = 0;
40     while(c < '0' || c > '9') up = c, c = getchar();
41     while(c >= '0' && c <= '9') num = (num << 3) + (num << 1) + (c ^ '0'), c = getchar();
42     return up == '-' ? -num : num;
43 }
44 
45 int n;
46 ull hash[inf];
47 char s[inf];
48 
49 inline ull getHash(char *s){
50     int len = strlen(s + 1);
51     ull Hash = 0;
52     for(int i = 1; i <= len; i++)
53         Hash = Hash * seed + s[i] - 40;
54     return Hash;
55 }
56 
57 inline int main(){
58     n = read();
59     for(int i = 1; i <= n; i++){
60         scanf("%s", s + 1);
61         hash[i] = getHash(s);
62     }
63     std::sort(hash + 1, hash + 1 + n);
64     ull ans = n;
65     for(int i = 1; i <= n; i++){
66         if(hash[i] == hash[i - 1])
67             --ans;
68     }
69     printf("%u\n", ans);
70     return 0;
71 }
72 
73 }//namespace chino
74 
75 signed main(){return chino::main();}

 


踹树(Trie)

每个点 u , 她的父亲为 v , v->u 这条边记录一个字符

对于一个字符串 , 很显然她一定有一条唯一的路径从根到树的某一个节点

很显然 , 踹树是一个空间换时间的做法

因为我太菜了所以本文章的 踹 是非指针的

踹树例题

加入字符串操作

定义 index 初始为 1 

定义 child[i = 30][j = inf] 数组

表示 j 点下一个字符如果是 i+'a' 那么下一个点的下标是 child[i][j]

(把小的一维放在前面为了节约时间 , 虽然有时候有点反人类)

 1 inline void add(char *s){
 2     int len = strlen(s + 1);
 3     int now = 1;
 4     for(int i = 1; i <= len; i++){
 5         int c = s[i] - 'a';
 6         if(child[c][now] == 0)
 7             child[c][now] = ++index;
 8         now = child[c][now];
 9     }
10     return;
11 }

 在本题中的的查询操作

mes数组记录附加信息 , 在本题中也就是是否被查过

 1 inline int query(char *s){
 2     int len = strlen(s + 1);
 3     int now = 1;
 4     for(int i = 1; i <= len; i++){
 5         int c = s[i] - 'a';
 6         if(child[c][now] == 0)
 7             return 0;
 8         now = child[c][now];
 9     }
10     if(mes[now] == 0){
11         ++mes[now];
12         return 1;
13     } else return 2;
14 }

然后这道题就做完了


 

例题:[HNOI2004]L语言

这道题的附加信息就是当前字符是否是一个字典中的字符串的结尾字符

如果是的话就可以更新答案 , 并且从下一个字符再开始踹

就这样

(本题我用的结构体)

  1 //#pragma GCC optimize(2)
  2 #include<cmath>
  3 #include<cstdio>
  4 #include<cstring>
  5 #include<cstdlib>
  6 #include<queue>
  7 #include<vector>
  8 #include<iostream>
  9 #include<algorithm>
 10 #define N 510
 11 #define debug 1
 12 #define osu auto
 13 #define FILETEST 1
 14 #define inf 1000010
 15 #define ll long long
 16 #define ha 998244353
 17 #define INF 0x7fffffff
 18 #define pii std::pair <int, int>
 19 #define INF_T 9223372036854775807
 20 #define APART puts("----------------------")
 21 #define DEBUG printf("%s %d\n",__FUNCTION__,__LINE__)
 22 
 23 namespace chino{
 24 
 25 inline void setting(){
 26 #if FILETEST
 27     freopen("_test.in", "r", stdin);
 28     freopen("_test.me.out", "w", stdout);
 29 #endif
 30     return;
 31 }
 32 
 33 inline int read(){
 34     register char c = getchar(), up = c; register int num = 0;
 35     while(c < '0' || c > '9') up = c, c = getchar();
 36     while(c >= '0' && c <= '9') num = (num << 3) + (num << 1) + (c ^ '0'), c = getchar();
 37     return up == '-' ? -num : num;
 38 }
 39 
 40 int n, m;
 41 int index = 1;
 42 bool mark[inf];
 43 char s[inf];
 44 struct Trie{
 45     int mark;
 46     int son[30];
 47 }trie[inf];
 48 
 49 inline void add(char *s){
 50     int len = strlen(s + 1);
 51     int now = 1;
 52     for(int i = 1; i <= len; i++){
 53         int c = s[i] - 'a';
 54         if(trie[now].son[c] == 0)
 55             trie[now].son[c] = ++index;
 56         now = trie[now].son[c];
 57     }
 58     trie[now].mark = 1;
 59     return;
 60 }
 61 
 62 inline int query(char *s){
 63     int len = strlen(s + 1);
 64     int now = 1, ans = 0;
 65     memset(mark, 0, sizeof mark);
 66     for(int i = 1; i <= len; i++){ // 首先开始的时候会为了寻找第一个mark非0的点而先踹一下
 67         int c = s[i] - 'a';
 68         now = trie[now].son[c];
 69         if(now == 0)
 70             break;
 71         if(trie[now].mark)
 72             mark[i] = 1;
 73     }
 74     for(int i = 1; i <= len; i++){
 75         if(mark[i])
 76             ans = i;
 77         else continue;
 78         int now = 1;
 79         for(int j = i + 1; j <= len; j++){
 80             int c = s[j] - 'a';
 81             now = trie[now].son[c];
 82             if(now == 0)
 83                 break;
 84             if(trie[now].mark) // 踹到了一个字符串
 85                 mark[j] = 1;
 86         }
 87     }
 88     return ans;
 89 }
 90 
 91 inline int main(){
 92     n = read(), m = read();
 93     for(int i = 1; i <= n; i++){
 94         scanf("%s", s + 1);
 95         add(s);
 96     }
 97     for(int i = 1; i <= m; i++){
 98         scanf("%s", s + 1);
 99         printf("%d\n", query(s));
100     }
101     return 0;
102 }
103 
104 }//namespace chino
105 
106 signed main(){return chino::main();}

 

posted @ 2019-11-08 21:32  ChiaroShiro  阅读(301)  评论(0编辑  收藏  举报