Hash与Trie树
填坑(((
哈希(Hash) 与 踹树(Trie)
引入:
hxs: 我是踹树 , 我可以踹任何人
Hash
哈希 , 一种字符串算法 , 可以把一段字符串映射成一个数字
通常是这样写
1 inline int getHash(char *s){ 2 int len = strlen(s + 1); 3 int Hash = 0; 4 for(int i = 1; i <= len; i++) 5 Hash = (Hash * seed + s[i] - 40) % ha; 6 return Hash; 7 }
其中 ha 和 seed 分别是自己定的质数
这样就可以将字符串映射成一个 0~ha-1这个区间中的一个数 , 但是并不一定唯一 , 不唯一的情况请使用链表或者别的方法(以后看到题了再填这个坑)
会了Hash以后这就是一道sb题 , Hash完排个序就完事儿了
1 //#pragma GCC optimize(2) 2 #include<cmath> 3 #include<cstdio> 4 #include<cstring> 5 #include<cstdlib> 6 #include<queue> 7 #include<vector> 8 #include<iostream> 9 #include<algorithm> 10 #define N 510 11 #define debug 1 12 #define osu auto 13 #define FILETEST 1 14 #define inf 100010 15 #define int long long 16 #define ha 1019260817 17 #define INF 0x7fffffff 18 #define pii std::pair <int, int> 19 #define INF_T 9223372036854775807 20 #define APART puts("----------------------") 21 #define DEBUG printf("%s %d\n",__FUNCTION__,__LINE__) 22 23 namespace chino{ 24 25 const int seed = 131; 26 27 inline void setting(){ 28 #if FILETEST 29 freopen("_test.in", "r", stdin); 30 freopen("_test.me.out", "w", stdout); 31 #endif 32 return; 33 } 34 35 inline int read(){ 36 register char c = getchar(), up = c; register int num = 0; 37 while(c < '0' || c > '9') up = c, c = getchar(); 38 while(c >= '0' && c <= '9') num = (num << 3) + (num << 1) + (c ^ '0'), c = getchar(); 39 return up == '-' ? -num : num; 40 } 41 42 int n; 43 int hash[inf]; 44 char s[inf]; 45 46 inline bool cmp(const int &a, const int &b){ return a < b;} 47 48 inline int getHash(char *s){ 49 int len = strlen(s + 1); 50 int Hash = 0; 51 for(int i = 1; i <= len; i++) 52 Hash = (Hash * seed + s[i] - 40) % ha; 53 return Hash; 54 } 55 56 inline int main(){ 57 n = read(); 58 for(int i = 1; i <= n; i++){ 59 scanf("%s", s + 1); 60 hash[i] = getHash(s); 61 } 62 std::sort(hash + 1, hash + 1 + n, cmp); 63 int ans = n; 64 for(int i = 1; i <= n; i++){ 65 if(hash[i] == hash[i - 1]) --ans; 66 } 67 printf("%lld\n", ans); 68 return 0; 69 } 70 71 }//namespace chino 72 73 signed main(){return chino::main();}
附:双模Hash
冲突率会降低 , 但是慢的很
1 //#pragma GCC optimize(2) 2 #include<cmath> 3 #include<cstdio> 4 #include<cstring> 5 #include<cstdlib> 6 #include<queue> 7 #include<vector> 8 #include<iostream> 9 #include<algorithm> 10 #define N 510 11 #define debug 1 12 #define osu auto 13 #define FILETEST 1 14 #define inf 100010 15 #define int long long 16 #define ha 998244353 17 #define INF 0x7fffffff 18 #define pii std::pair <int, int> 19 #define INF_T 9223372036854775807 20 #define APART puts("----------------------") 21 #define DEBUG printf("%s %d\n",__FUNCTION__,__LINE__) 22 23 namespace chino{ 24 25 const int seed = 131; 26 const int mod1 = 1019260817; 27 const int mod2 = 19491001; 28 29 inline void setting(){ 30 #if FILETEST 31 freopen("_test.in", "r", stdin); 32 freopen("_test.me.out", "w", stdout); 33 #endif 34 return; 35 } 36 37 inline int read(){ 38 register char c = getchar(), up = c; register int num = 0; 39 while(c < '0' || c > '9') up = c, c = getchar(); 40 while(c >= '0' && c <= '9') num = (num << 3) + (num << 1) + (c ^ '0'), c = getchar(); 41 return up == '-' ? -num : num; 42 } 43 44 int n; 45 char s[inf]; 46 std::pair <int, int> hash[inf]; 47 48 inline std::pair <int, int> getHash(char *s){ 49 int len = strlen(s + 1); 50 std::pair <int, int> Hash = {0, 0}; 51 for(int i = 1; i <= len; i++) 52 Hash.first = (Hash.first * seed + s[i] - 40) % mod1; 53 for(int i = 1; i <= len; i++) 54 Hash.second = (Hash.second * seed + s[i] - 40) % mod2; 55 return Hash; 56 } 57 58 inline int main(){ 59 n = read(); 60 for(int i = 1; i <= n; i++){ 61 scanf("%s", s + 1); 62 hash[i] = getHash(s); 63 } 64 std::sort(hash + 1, hash + 1 + n); 65 int ans = n; 66 for(int i = 1; i <= n; i++){ 67 if(hash[i].first == hash[i - 1].first && hash[i].second == hash[i - 1].second) 68 --ans; 69 } 70 printf("%lld\n", ans); 71 return 0; 72 } 73 74 }//namespace chino 75 76 signed main(){return chino::main();}
再附:自然溢出Hash
不mod ha了 , 定义成unsigned int 等着她爆掉
因为没有mod ha操作所以会快很多
1 //#pragma GCC optimize(2) 2 #include<cmath> 3 #include<cstdio> 4 #include<cstring> 5 #include<cstdlib> 6 #include<queue> 7 #include<vector> 8 #include<iostream> 9 #include<algorithm> 10 #define N 510 11 #define debug 1 12 #define osu auto 13 #define FILETEST 1 14 #define inf 100010 15 #define ll long long 16 #define ull unsigned int 17 #define ha 998244353 18 #define INF 0x7fffffff 19 #define pii std::pair <int, int> 20 #define INF_T 9223372036854775807 21 #define APART puts("----------------------") 22 #define DEBUG printf("%s %d\n",__FUNCTION__,__LINE__) 23 24 namespace chino{ 25 26 const int seed = 131; 27 const int mod1 = 1019260817; 28 const int mod2 = 19491001; 29 30 inline void setting(){ 31 #if FILETEST 32 freopen("_test.in", "r", stdin); 33 freopen("_test.me.out", "w", stdout); 34 #endif 35 return; 36 } 37 38 inline int read(){ 39 register char c = getchar(), up = c; register int num = 0; 40 while(c < '0' || c > '9') up = c, c = getchar(); 41 while(c >= '0' && c <= '9') num = (num << 3) + (num << 1) + (c ^ '0'), c = getchar(); 42 return up == '-' ? -num : num; 43 } 44 45 int n; 46 ull hash[inf]; 47 char s[inf]; 48 49 inline ull getHash(char *s){ 50 int len = strlen(s + 1); 51 ull Hash = 0; 52 for(int i = 1; i <= len; i++) 53 Hash = Hash * seed + s[i] - 40; 54 return Hash; 55 } 56 57 inline int main(){ 58 n = read(); 59 for(int i = 1; i <= n; i++){ 60 scanf("%s", s + 1); 61 hash[i] = getHash(s); 62 } 63 std::sort(hash + 1, hash + 1 + n); 64 ull ans = n; 65 for(int i = 1; i <= n; i++){ 66 if(hash[i] == hash[i - 1]) 67 --ans; 68 } 69 printf("%u\n", ans); 70 return 0; 71 } 72 73 }//namespace chino 74 75 signed main(){return chino::main();}
踹树(Trie)
每个点 u , 她的父亲为 v , v->u 这条边记录一个字符
对于一个字符串 , 很显然她一定有一条唯一的路径从根到树的某一个节点
很显然 , 踹树是一个空间换时间的做法
因为我太菜了所以本文章的 踹 是非指针的
加入字符串操作
定义 index 初始为 1
定义 child[i = 30][j = inf] 数组
表示 j 点下一个字符如果是 i+'a' 那么下一个点的下标是 child[i][j]
(把小的一维放在前面为了节约时间 , 虽然有时候有点反人类)
1 inline void add(char *s){ 2 int len = strlen(s + 1); 3 int now = 1; 4 for(int i = 1; i <= len; i++){ 5 int c = s[i] - 'a'; 6 if(child[c][now] == 0) 7 child[c][now] = ++index; 8 now = child[c][now]; 9 } 10 return; 11 }
在本题中的的查询操作
mes数组记录附加信息 , 在本题中也就是是否被查过
1 inline int query(char *s){ 2 int len = strlen(s + 1); 3 int now = 1; 4 for(int i = 1; i <= len; i++){ 5 int c = s[i] - 'a'; 6 if(child[c][now] == 0) 7 return 0; 8 now = child[c][now]; 9 } 10 if(mes[now] == 0){ 11 ++mes[now]; 12 return 1; 13 } else return 2; 14 }
然后这道题就做完了
这道题的附加信息就是当前字符是否是一个字典中的字符串的结尾字符
如果是的话就可以更新答案 , 并且从下一个字符再开始踹
就这样
(本题我用的结构体)
1 //#pragma GCC optimize(2) 2 #include<cmath> 3 #include<cstdio> 4 #include<cstring> 5 #include<cstdlib> 6 #include<queue> 7 #include<vector> 8 #include<iostream> 9 #include<algorithm> 10 #define N 510 11 #define debug 1 12 #define osu auto 13 #define FILETEST 1 14 #define inf 1000010 15 #define ll long long 16 #define ha 998244353 17 #define INF 0x7fffffff 18 #define pii std::pair <int, int> 19 #define INF_T 9223372036854775807 20 #define APART puts("----------------------") 21 #define DEBUG printf("%s %d\n",__FUNCTION__,__LINE__) 22 23 namespace chino{ 24 25 inline void setting(){ 26 #if FILETEST 27 freopen("_test.in", "r", stdin); 28 freopen("_test.me.out", "w", stdout); 29 #endif 30 return; 31 } 32 33 inline int read(){ 34 register char c = getchar(), up = c; register int num = 0; 35 while(c < '0' || c > '9') up = c, c = getchar(); 36 while(c >= '0' && c <= '9') num = (num << 3) + (num << 1) + (c ^ '0'), c = getchar(); 37 return up == '-' ? -num : num; 38 } 39 40 int n, m; 41 int index = 1; 42 bool mark[inf]; 43 char s[inf]; 44 struct Trie{ 45 int mark; 46 int son[30]; 47 }trie[inf]; 48 49 inline void add(char *s){ 50 int len = strlen(s + 1); 51 int now = 1; 52 for(int i = 1; i <= len; i++){ 53 int c = s[i] - 'a'; 54 if(trie[now].son[c] == 0) 55 trie[now].son[c] = ++index; 56 now = trie[now].son[c]; 57 } 58 trie[now].mark = 1; 59 return; 60 } 61 62 inline int query(char *s){ 63 int len = strlen(s + 1); 64 int now = 1, ans = 0; 65 memset(mark, 0, sizeof mark); 66 for(int i = 1; i <= len; i++){ // 首先开始的时候会为了寻找第一个mark非0的点而先踹一下 67 int c = s[i] - 'a'; 68 now = trie[now].son[c]; 69 if(now == 0) 70 break; 71 if(trie[now].mark) 72 mark[i] = 1; 73 } 74 for(int i = 1; i <= len; i++){ 75 if(mark[i]) 76 ans = i; 77 else continue; 78 int now = 1; 79 for(int j = i + 1; j <= len; j++){ 80 int c = s[j] - 'a'; 81 now = trie[now].son[c]; 82 if(now == 0) 83 break; 84 if(trie[now].mark) // 踹到了一个字符串 85 mark[j] = 1; 86 } 87 } 88 return ans; 89 } 90 91 inline int main(){ 92 n = read(), m = read(); 93 for(int i = 1; i <= n; i++){ 94 scanf("%s", s + 1); 95 add(s); 96 } 97 for(int i = 1; i <= m; i++){ 98 scanf("%s", s + 1); 99 printf("%d\n", query(s)); 100 } 101 return 0; 102 } 103 104 }//namespace chino 105 106 signed main(){return chino::main();}