Bob Hash
Bob 研究了很多哈希,并实现了自己的高效hash。 我测试一下了,5w多的数据,time33出现了6条重复,bobhash没有一条重复。不过time33的速度是比bobhash快的。
http://burtleburtle.net/bob/hash/doobs.html
源码
typedef unsigned int uint32_t;
#define hashsize(n) ((uint32_t)1<<(n))
#define hashmask(n) (hashsize(n)-1)#define mix(a,b,c) \
{ \
a -= b; a -= c; a ^= (c>>13); \
b -= c; b -= a; b ^= (a<<8); \
c -= a; c -= b; c ^= (b>>13); \
a -= b; a -= c; a ^= (c>>12); \
b -= c; b -= a; b ^= (a<<16); \
c -= a; c -= b; c ^= (b>>5); \
a -= b; a -= c; a ^= (c>>3); \
b -= c; b -= a; b ^= (a<<10); \
c -= a; c -= b; c ^= (b>>15); \
}/* same, but slower, works on systems that might have 8 byte ub4's */
#define mix2(a,b,c) \
{ \
a -= b; a -= c; a ^= (c>>13); \
b -= c; b -= a; b ^= (a<< 8); \
c -= a; c -= b; c ^= ((b&0xffffffff)>>13); \
a -= b; a -= c; a ^= ((c&0xffffffff)>>12); \
b -= c; b -= a; b = (b ^ (a<<16)) & 0xffffffff; \
c -= a; c -= b; c = (c ^ (b>> 5)) & 0xffffffff; \
a -= b; a -= c; a = (a ^ (c>> 3)) & 0xffffffff; \
b -= c; b -= a; b = (b ^ (a<<10)) & 0xffffffff; \
c -= a; c -= b; c = (c ^ (b>>15)) & 0xffffffff; \
}
inline
uint32_t bob_hash(char const *k, int length, uint32_t initval)
{
uint32_t a,b,c,len;/* Set up the internal state */
len = length;
a = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */
c = initval; /* the previous hash value *//*---------------------------------------- handle most of the key */
while (len >= 3)
{
a += k[0];
b += k[1];
c += k[2];
mix(a,b,c);
k += 3; len -= 3;
}/*-------------------------------------- handle the last 2 ub4's */
c += (length<<2); /* <<2 to produce the same results as hash() */
switch(len) /* all the case statements fall through */
{
/* c is reserved for the length */
case 2 : b+=k[1];
case 1 : a+=k[0];
/* case 0: nothing left to add */
}
mix(a,b,c);
/*-------------------------------------------- report the result */
return c;
}
测试
使用53个字节的key, 运行1000w次
real 0m3.935s
user 0m3.912s
sys 0m0.000s
time33 耗时
real 0m1.047s
user 0m1.036s
sys 0m0.000s
所以慢了3倍. Bob哈希的优点是散列性好(大概有十万分之一的概率冲突)。