常用的Hash算法

1、RSHash
unsigned int RSHash(const std::string& str)
{
   unsigned int b    = 378551;
   unsigned int a    = 63689;
   unsigned int hash = 0;
 
   for(std::size_t i = 0; i < str.length(); i++)
   {
      hash = hash * a + str[i];
      a    = a * b;
   }
 
   return hash;
}
 
2、JSHash
unsigned int JSHash(const std::string& str)
{
   unsigned int hash = 1315423911;
   for(std::size_t i = 0; i < str.length(); i++)
   {
      hash ^= ((hash << 5) + str[i] + (hash >> 2));
   }
   return hash;
}
 
3、PJWHash
unsigned int PJWHash(const std::string& str)
{
   unsigned int BitsInUnsignedInt = (unsigned int)(sizeof(unsigned int) * 8);
   unsigned int ThreeQuarters     = (unsigned int)((BitsInUnsignedInt  * 3) / 4);
   unsigned int OneEighth         = (unsigned int)(BitsInUnsignedInt / 8);
   unsigned int HighBits          = (unsigned int)(0xFFFFFFFF) << (BitsInUnsignedInt - OneEighth);
   unsigned int hash              = 0;
   unsigned int test              = 0;
 
   for(std::size_t i = 0; i < str.length(); i++)
   {
      hash = (hash << OneEighth) + str[i];
 
      if((test = hash & HighBits)  != 0)
      {
         hash = (( hash ^ (test >> ThreeQuarters)) & (~HighBits));
      }
   }
   return hash;
}
 
4、ELFHash
unsigned int ELFHash(const std::string& str)
{
   unsigned int hash = 0;
   unsigned int x    = 0;
 
   for(std::size_t i = 0; i < str.length(); i++)
   {
      hash = (hash << 4) + str[i];
      if((x = hash & 0xF0000000L) != 0)
      {
         hash ^= (x >> 24);
      }
      hash &= ~x;
   }
 
   return hash;
}
 
5、BKDRHash
unsigned int BKDRHash(const std::string& str)
{
   unsigned int seed = 131; // 31 131 1313 13131 131313 etc..
   unsigned int hash = 0;
 
   for(std::size_t i = 0; i < str.length(); i++)
   {
      hash = (hash * seed) + str[i];
   }
   return hash;
}
 
6、SDBMHash
unsigned int SDBMHash(const std::string& str)
{
   unsigned int hash = 0;
 
   for(std::size_t i = 0; i < str.length(); i++)
   {
      hash = str[i] + (hash << 6) + (hash << 16) - hash;
   }
 
   return hash;
}
 
7、DJBHash(times33)-这个用得非常多,很多库都用它。
unsigned int DJBHash(const std::string& str)
{
   unsigned int hash = 5381;
 
   for(std::size_t i = 0; i < str.length(); i++)
   {
      hash = ((hash << 5) + hash) + str[i];
   }
   return hash;
}
 
8、DEKHash
unsigned int DEKHash(const std::string& str)
{
   unsigned int hash = static_cast(str.length());
 
   for(std::size_t i = 0; i < str.length(); i++)
   {
      hash = ((hash << 5) ^ (hash >> 27)) ^ str[i];
   }
 
   return hash;
}
 
9、BPHash
unsigned int BPHash(const std::string& str)
{
   unsigned int hash = 0;
   for(std::size_t i = 0; i < str.length(); i++)
   {
      hash = hash << 7 ^ str[i];
   }
 
   return hash;
}
 
10、FNVHash
unsigned int FNVHash(const std::string& str)
{
   const unsigned int fnv_prime = 0x811C9DC5;
   unsigned int hash = 0;
   for(std::size_t i = 0; i < str.length(); i++)
   {
      hash *= fnv_prime;
      hash ^= str[i];
   }
 
   return hash;
}
 
11、APHash
unsigned int APHash(const std::string& str)
{
   unsigned int hash = 0xAAAAAAAA;
 
   for(std::size_t i = 0; i < str.length(); i++)
   {
      hash ^= ((i & 1) == 0) ? (  (hash <<  7) ^ str[i] * (hash >> 3)) :
                               (~((hash << 11) + (str[i] ^ (hash >> 5))));
   }
 
   return hash;
}
 
12、MurmurHash - 非常新的一个哈希算法,应该是目前效率最高的一个哈希算法,使用率很高。(伪代码来自维基百科)
Murmur2(key, len, seed)
    m <- 0x5bd1e995
    r <- 24
    seed  0x9747b28c
    hash <- seed XOR len
    for each fourByteChunk of key
        k <- fourByteChunk
        k <- k * m
        k <- k XOR (k >> r)
        k <- k * m
 
        hash <- hash * m
        hash <- hash XOR k
 
    with any remainingBytesInKey
        remainingBytes  SwapEndianOrderOf(remainingBytesInKey)
 
        hash <- hash XOR remainingBytes
        hash <- hash * m
 
    hash <- hash XOR (hash >> 13)
    hash <- hash * m
    hash <- hash XOR (hash >> 15)
 
 
 
MurmurHash算法:高运算性能,低碰撞率,由Austin Appleby创建于2008年,现已应用到Hadoop、libstdc++、nginx、libmemcached等开源系统。2011年Appleby被Google雇佣,随后Google推出其变种的CityHash算法。 

官方网站:https://sites.google.com/site/murmurhash/ 

MurmurHash算法,自称超级快的hash算法,是FNV的4-5倍。官方数据如下: 

OneAtATime – 354.163715 mb/sec 
FNV – 443.668038 mb/sec 
SuperFastHash – 985.335173 mb/sec 
lookup3 – 988.080652 mb/sec 
MurmurHash 1.0 – 1363.293480 mb/sec 
MurmurHash 2.0 – 2056.885653 mb/sec 

但也有文章声称,只有当key的长度大于10字节的时候,MurmurHash的运算速度才快于DJB。“从计算速度上来看,MurmurHash只适用于已知长度的、长度比较长的字符”。 
 
 

posted on 2016-04-24 11:15  舒润  阅读(1101)  评论(0编辑  收藏  举报

导航