hash
今天看代码的时候注意了一下 hash 以及看了下读书时候的笔记, 主要就是说: hash桶大小以及hash 算法
1、目前hash桶的大小都是素数(和2倍相近的一个素数)
设有一个哈希函数
H( c ) = c % N;
当N取一个合数时,最简单的例子是取2^n,比如说取2^3=8,这时候
H( 11100(二进制) ) = H( 28 ) = 4
H( 10100(二进制) ) = H( 20 )= 4
这时候c的二进制第4位开始(从右向左数)就”失效”了,也就是说,无论第c的4位取什么值,都会导致H( c )的值一样.这时候c的第四位就根本不参与H( c )的运算,这样H( c )就无法完整地反映c的特性,增大了导致冲突的几率.
7ul, 23ul, \ 53ul, 97ul, 193ul, 389ul, 769ul, \ 1543ul, 3079ul, 6151ul, 12289ul, 24593ul, \ 49157ul, 98317ul, 196613ul, 393241ul, 786433ul, \ 1572869ul, 3145739ul, 6291469ul, 12582917ul, 25165843ul, \ 50331653ul, 100663319ul, 201326611ul, 402653189ul, 805306457ul,\ 1610612741ul, 3221225473ul, 4294967291ul \
但是也有一个问题:如果桶的大小是2的n次方, 那么(n - 1) & hash实际上是计算出 key 在 tab 中索引位置; 此公式满足了:(n - 1) & hash = hash % n
- &运算速度快,至少比%取模运算块
2、hash 算法:
1、DJB Hash 也叫 Time 33 hash哈希算法
Times33的算法很简单,就是不断的乘33;hash(i) = hash(i-1) * 33 + str[i] Time33在效率和随机性两方面上俱佳;
unsigned int DJBHash(const char* str, unsigned int length){ unsigned int hash = 5381; unsigned int i = 0; for (i = 0; i < length; ++str, ++i) { hash = ((hash << 5) + hash) + (*str); }//((hash << 5) + hash) 就是位操作实现的hash * 32+hash,即 hash * 33。 return hash; }
Kernighan 和 Ritchie 在《The C Programming Language》提出BKDR Hash,采用/ 31 131 1313 13131 131313 etc.. / 作为种子计算hash;这个就是数学家的事了
MurmurHash算法:高运算性能,低碰撞率
/* * The following hash function is based on MurmurHash64A(), placed into the * public domain by Austin Appleby. See http://murmurhash.googlepages.com/ for * details. */ JEMALLOC_INLINE uint64_t hash(const void *key, size_t len, uint64_t seed) { const uint64_t m = UINT64_C(0xc6a4a7935bd1e995); const int r = 47; uint64_t h = seed ^ (len * m); const uint64_t *data = (const uint64_t *)key; const uint64_t *end = data + (len/8); const unsigned char *data2; assert(((uintptr_t)key & 0x7) == 0); while(data != end) { uint64_t k = *data++; k *= m; k ^= k >> r; k *= m; h ^= k; h *= m; } data2 = (const unsigned char *)data; switch(len & 7) { case 7: h ^= ((uint64_t)(data2[6])) << 48; case 6: h ^= ((uint64_t)(data2[5])) << 40; case 5: h ^= ((uint64_t)(data2[4])) << 32; case 4: h ^= ((uint64_t)(data2[3])) << 24; case 3: h ^= ((uint64_t)(data2[2])) << 16; case 2: h ^= ((uint64_t)(data2[1])) << 8; case 1: h ^= ((uint64_t)(data2[0])); h *= m; } h ^= h >> r; h *= m; h ^= h >> r; return (h); }
常见的散列算法有:CRC-32、MD5、SHA-1,SM3,以及广泛使用 SHA-2(SHA-224、SHA-356、SHA-384、SHA-512)
目前使用dpdk 以及内核进行三层转发等时, 发现dpdk 、内核 目前使用jhash以及 hash_long比较多,
hash_long实现:

#ifndef _LINUX_HASH_H #define _LINUX_HASH_H /* Fast hashing routine for ints, longs and pointers. (C) 2002 Nadia Yvette Chambers, IBM */ /* * Knuth recommends primes in approximately golden ratio to the maximum * integer representable by a machine word for multiplicative hashing. * Chuck Lever verified the effectiveness of this technique: * http://www.citi.umich.edu/techreports/reports/citi-tr-00-1.pdf * * These primes are chosen to be bit-sparse, that is operations on * them can use shifts and additions instead of multiplications for * machines where multiplications are slow. */ #include <asm/types.h> #include <linux/compiler.h> /* 2^31 + 2^29 - 2^25 + 2^22 - 2^19 - 2^16 + 1 */ #define GOLDEN_RATIO_PRIME_32 0x9e370001UL /* 2^63 + 2^61 - 2^57 + 2^54 - 2^51 - 2^18 + 1 */ #define GOLDEN_RATIO_PRIME_64 0x9e37fffffffc0001UL #if BITS_PER_LONG == 32 #define GOLDEN_RATIO_PRIME GOLDEN_RATIO_PRIME_32 #define hash_long(val, bits) hash_32(val, bits) #elif BITS_PER_LONG == 64 #define hash_long(val, bits) hash_64(val, bits) #define GOLDEN_RATIO_PRIME GOLDEN_RATIO_PRIME_64 #else #error Wordsize not 32 or 64 #endif /* * The above primes are actively bad for hashing, since they are * too sparse. The 32-bit one is mostly ok, the 64-bit one causes * real problems. Besides, the "prime" part is pointless for the * multiplicative hash. * * Although a random odd number will do, it turns out that the golden * ratio phi = (sqrt(5)-1)/2, or its negative, has particularly nice * properties. * * These are the negative, (1 - phi) = (phi^2) = (3 - sqrt(5))/2. * (See Knuth vol 3, section 6.4, exercise 9.) */ #define GOLDEN_RATIO_32 0x61C88647 #define GOLDEN_RATIO_64 0x61C8864680B583EBull static __always_inline u64 hash_64(u64 val, unsigned int bits) { u64 hash = val; #if BITS_PER_LONG == 64 hash = hash * GOLDEN_RATIO_64; #else /* Sigh, gcc can't optimise this alone like it does for 32 bits. */ u64 n = hash; n <<= 18; hash -= n; n <<= 33; hash -= n; n <<= 3; hash += n; n <<= 3; hash -= n; n <<= 4; hash += n; n <<= 2; hash += n; #endif /* High bits are more random, so use them. */ return hash >> (64 - bits); } static inline u32 hash_32(u32 val, unsigned int bits) { /* On some cpus multiply is faster, on others gcc will do shifts */ u32 hash = val * GOLDEN_RATIO_PRIME_32; /* High bits are more random, so use them. */ return hash >> (32 - bits); } static inline unsigned long hash_ptr(const void *ptr, unsigned int bits) { return hash_long((unsigned long)ptr, bits); } static inline u32 hash32_ptr(const void *ptr) { unsigned long val = (unsigned long)ptr; #if BITS_PER_LONG == 64 val ^= (val >> 32); #endif return (u32)val; } #endif /* _LINUX_HASH_H */
jhash 实现:

#ifndef _LINUX_JHASH_H #define _LINUX_JHASH_H /* jhash.h: Jenkins hash support. * * Copyright (C) 2006. Bob Jenkins (bob_jenkins@burtleburtle.net) * * http://burtleburtle.net/bob/hash/ * * These are the credits from Bob's sources: * * lookup3.c, by Bob Jenkins, May 2006, Public Domain. * * These are functions for producing 32-bit hashes for hash table lookup. * hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final() * are externally useful functions. Routines to test the hash are included * if SELF_TEST is defined. You can use this free for any purpose. It's in * the public domain. It has no warranty. * * Copyright (C) 2009-2010 Jozsef Kadlecsik (kadlec@blackhole.kfki.hu) * * I've modified Bob's hash to be useful in the Linux kernel, and * any bugs present are my fault. * Jozsef */ #include <linux/bitops.h> #include <linux/unaligned/packed_struct.h> /* Best hash sizes are of power of two */ #define jhash_size(n) ((u32)1<<(n)) /* Mask the hash value, i.e (value & jhash_mask(n)) instead of (value % n) */ #define jhash_mask(n) (jhash_size(n)-1) /* __jhash_mix -- mix 3 32-bit values reversibly. */ #define __jhash_mix(a, b, c) \ { \ a -= c; a ^= rol32(c, 4); c += b; \ b -= a; b ^= rol32(a, 6); a += c; \ c -= b; c ^= rol32(b, 8); b += a; \ a -= c; a ^= rol32(c, 16); c += b; \ b -= a; b ^= rol32(a, 19); a += c; \ c -= b; c ^= rol32(b, 4); b += a; \ } /* __jhash_final - final mixing of 3 32-bit values (a,b,c) into c */ #define __jhash_final(a, b, c) \ { \ c ^= b; c -= rol32(b, 14); \ a ^= c; a -= rol32(c, 11); \ b ^= a; b -= rol32(a, 25); \ c ^= b; c -= rol32(b, 16); \ a ^= c; a -= rol32(c, 4); \ b ^= a; b -= rol32(a, 14); \ c ^= b; c -= rol32(b, 24); \ } /* An arbitrary initial parameter */ #define JHASH_INITVAL 0xdeadbeef /* jhash - hash an arbitrary key * @k: sequence of bytes as key * @length: the length of the key * @initval: the previous hash, or an arbitray value * * The generic version, hashes an arbitrary sequence of bytes. * No alignment or length assumptions are made about the input key. * * Returns the hash value of the key. The result depends on endianness. */ static inline u32 jhash(const void *key, u32 length, u32 initval) { u32 a, b, c; const u8 *k = key; /* Set up the internal state */ a = b = c = JHASH_INITVAL + length + initval; /* All but the last block: affect some 32 bits of (a,b,c) */ while (length > 12) { a += __get_unaligned_cpu32(k); b += __get_unaligned_cpu32(k + 4); c += __get_unaligned_cpu32(k + 8); __jhash_mix(a, b, c); length -= 12; k += 12; } /* Last block: affect all 32 bits of (c) */ /* All the case statements fall through */ switch (length) { case 12: c += (u32)k[11]<<24; case 11: c += (u32)k[10]<<16; case 10: c += (u32)k[9]<<8; case 9: c += k[8]; case 8: b += (u32)k[7]<<24; case 7: b += (u32)k[6]<<16; case 6: b += (u32)k[5]<<8; case 5: b += k[4]; case 4: a += (u32)k[3]<<24; case 3: a += (u32)k[2]<<16; case 2: a += (u32)k[1]<<8; case 1: a += k[0]; __jhash_final(a, b, c); case 0: /* Nothing left to add */ break; } return c; } /* jhash2 - hash an array of u32's * @k: the key which must be an array of u32's * @length: the number of u32's in the key * @initval: the previous hash, or an arbitray value * * Returns the hash value of the key. */ static inline u32 jhash2(const u32 *k, u32 length, u32 initval) { u32 a, b, c; /* Set up the internal state */ a = b = c = JHASH_INITVAL + (length<<2) + initval; /* Handle most of the key */ while (length > 3) { a += k[0]; b += k[1]; c += k[2]; __jhash_mix(a, b, c); length -= 3; k += 3; } /* Handle the last 3 u32's: all the case statements fall through */ switch (length) { case 3: c += k[2]; case 2: b += k[1]; case 1: a += k[0]; __jhash_final(a, b, c); case 0: /* Nothing left to add */ break; } return c; } /* __jhash_nwords - hash exactly 3, 2 or 1 word(s) */ static inline u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval) { a += initval; b += initval; c += initval; __jhash_final(a, b, c); return c; } static inline u32 jhash_3words(u32 a, u32 b, u32 c, u32 initval) { return __jhash_nwords(a, b, c, initval + JHASH_INITVAL + (3 << 2)); } static inline u32 jhash_2words(u32 a, u32 b, u32 initval) { return __jhash_nwords(a, b, 0, initval + JHASH_INITVAL + (2 << 2)); } static inline u32 jhash_1word(u32 a, u32 initval) { return __jhash_nwords(a, 0, 0, initval + JHASH_INITVAL + (1 << 2)); } #endif /* _LINUX_JHASH_H */
http代理服务器(3-4-7层代理)-网络事件库公共组件、内核kernel驱动 摄像头驱动 tcpip网络协议栈、netfilter、bridge 好像看过!!!!
但行好事 莫问前程
--身高体重180的胖子
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· AI与.NET技术实操系列:基于图像分类模型对图像进行分类
· go语言实现终端里的倒计时
· 如何编写易于单元测试的代码
· 10年+ .NET Coder 心语,封装的思维:从隐藏、稳定开始理解其本质意义
· .NET Core 中如何实现缓存的预热?
· 25岁的心里话
· 闲置电脑爆改个人服务器(超详细) #公网映射 #Vmware虚拟网络编辑器
· 基于 Docker 搭建 FRP 内网穿透开源项目(很简单哒)
· 零经验选手,Compose 一天开发一款小游戏!
· 一起来玩mcp_server_sqlite,让AI帮你做增删改查!!
2019-07-30 linux 异步I/O 信号