Bob Hash

Bob 研究了很多哈希,并实现了自己的高效hash。 我测试一下了,5w多的数据,time33出现了6条重复,bobhash没有一条重复。不过time33的速度是比bobhash快的。

http://burtleburtle.net/bob/hash/doobs.html

源码

typedef  unsigned int uint32_t;

#define hashsize(n) ((uint32_t)1<<(n))
#define hashmask(n) (hashsize(n)-1)

#define mix(a,b,c) \
{ \
  a -= b; a -= c; a ^= (c>>13); \
  b -= c; b -= a; b ^= (a<<8); \
  c -= a; c -= b; c ^= (b>>13); \
  a -= b; a -= c; a ^= (c>>12);  \
  b -= c; b -= a; b ^= (a<<16); \
  c -= a; c -= b; c ^= (b>>5); \
  a -= b; a -= c; a ^= (c>>3);  \
  b -= c; b -= a; b ^= (a<<10); \
  c -= a; c -= b; c ^= (b>>15); \
}

/* same, but slower, works on systems that might have 8 byte ub4's */
#define mix2(a,b,c) \
{ \
  a -= b; a -= c; a ^= (c>>13); \
  b -= c; b -= a; b ^= (a<< 8); \
  c -= a; c -= b; c ^= ((b&0xffffffff)>>13); \
  a -= b; a -= c; a ^= ((c&0xffffffff)>>12); \
  b -= c; b -= a; b = (b ^ (a<<16)) & 0xffffffff; \
  c -= a; c -= b; c = (c ^ (b>> 5)) & 0xffffffff; \
  a -= b; a -= c; a = (a ^ (c>> 3)) & 0xffffffff; \
  b -= c; b -= a; b = (b ^ (a<<10)) & 0xffffffff; \
  c -= a; c -= b; c = (c ^ (b>>15)) & 0xffffffff; \
}


inline
uint32_t bob_hash(char const *k, int length, uint32_t initval)
{
   uint32_t a,b,c,len;

   /* Set up the internal state */
   len = length;
   a = b = 0x9e3779b9;  /* the golden ratio; an arbitrary value */
   c = initval;           /* the previous hash value */

   /*---------------------------------------- handle most of the key */
   while (len >= 3)
   {
      a += k[0];
      b += k[1];
      c += k[2];
      mix(a,b,c);
      k += 3; len -= 3;
   }

   /*-------------------------------------- handle the last 2 ub4's */
   c += (length<<2);   /* <<2 to produce the same results as hash() */
   switch(len)              /* all the case statements fall through */
   {
     /* c is reserved for the length */
   case 2 : b+=k[1];
   case 1 : a+=k[0];
     /* case 0: nothing left to add */
   }
   mix(a,b,c);
   /*-------------------------------------------- report the result */
   return c;
}

测试

使用53个字节的key, 运行1000w次

real    0m3.935s
user    0m3.912s
sys     0m0.000s

time33 耗时

real    0m1.047s
user    0m1.036s
sys     0m0.000s

 

所以慢了3倍. Bob哈希的优点是散列性好(大概有十万分之一的概率冲突)。

posted @ 2010-12-22 14:15  napoleon_liu  阅读(1189)  评论(0编辑  收藏  举报