记录几个经典的字符串hash算法

记录几个经典的字符串hash算法,方便以后查看:

推荐一篇文章:

http://www.partow.net/programming/hashfunctions/#

(1)暴雪字符串hash

  1 #include <stdio.h>
  2 #include <string.h>
  3 #include <stdlib.h>
  4 #include <stdint.h>
  5 #include <stdbool.h>
  6 
  7 #define MAXTABLELEN 102400000
  8 
  9 typedef struct  _HASHTABLE
 10 {  
 11     long nHashA;  
 12     long nHashB;  
 13     bool bExists;  
 14 }HASHTABLE, *PHASHTABLE;
 15 
 16 const unsigned long nTableLength = MAXTABLELEN;
 17 unsigned long m_tablelength;    // 哈希索引表长度  
 18 HASHTABLE *m_HashIndexTable;
 19 unsigned long cryptTable[0x500];
 20 
 21 int collc = 0;
 22 int errstr = 0;
 23 
 24 void InitCryptTable()  
 25 {   
 26     unsigned long seed = 0x00100001, index1 = 0, index2 = 0, i;  
 27  
 28     for( index1 = 0; index1 < 0x100; index1++ )  
 29     {   
 30         for( index2 = index1, i = 0; i < 5; i++, index2 += 0x100 )  
 31         {   
 32             unsigned long temp1, temp2;  
 33             seed = (seed * 125 + 3) % 0x2AAAAB;  
 34             temp1 = (seed & 0xFFFF) << 0x10;  
 35             seed = (seed * 125 + 3) % 0x2AAAAB;  
 36             temp2 = (seed & 0xFFFF);  
 37             cryptTable[index2] = ( temp1 | temp2 );   
 38         }   
 39     }   
 40 }  
 41  
 42 /************************************************************************/
 43 /*函数名:HashString
 44  *功  能:求取哈希值   
 45  *返回值:返回hash值
 46  ************************************************************************/
 47 unsigned long HashString(char *lpszString, unsigned long dwHashType)
 48 {
 49     unsigned char *key = (unsigned char *)lpszString;
 50     unsigned long seed1 = 0x7FED7FED, seed2 = 0xEEEEEEEE;
 51     int ch; 
 52 
 53     while(*key != 0)
 54     {   
 55         ch = toupper(*key++);
 56 
 57         seed1 = cryptTable[(dwHashType << 8) + ch] ^ (seed1 + seed2);
 58         seed2 = ch + seed1 + seed2 + (seed2 << 5) + 3;
 59     }   
 60     return seed1;
 61 }
 62 /************************************************************************/
 63 /*函数名:Hashed
 64  *功  能:检测一个字符串是否被hash过
 65  *返回值:如果存在,返回位置;否则,返回-1
 66  ************************************************************************/
 67 unsigned long Hashed(char * lpszString)  
 68  
 69 {   
 70     const unsigned long HASH_OFFSET = 0, HASH_A = 1, HASH_B = 2;  
 71     //不同的字符串三次hash还会碰撞的率无限接近于不可能
 72     unsigned long nHash = HashString(lpszString, HASH_OFFSET);  
 73     unsigned long nHashA = HashString(lpszString, HASH_A);  
 74     unsigned long nHashB = HashString(lpszString, HASH_B);  
 75     unsigned long nHashStart = nHash % m_tablelength;  
 76     unsigned long nHashPos = nHashStart;  
 77  
 78     while (m_HashIndexTable[nHashPos].bExists)  
 79     {   
 80         if (m_HashIndexTable[nHashPos].nHashA == nHashA && m_HashIndexTable[nHashPos].nHashB == nHashB)   
 81             return nHashPos;   
 82         else  
 83             nHashPos = (nHashPos + 1) % m_tablelength;  
 84  
 85         if (nHashPos == nHashStart)   
 86             break;   
 87     }  
 88     errstr++;
 89  
 90     return -1; //没有找到  
 91 }  
 92  
 93 /************************************************************************/
 94 /*函数名:Hash
 95  *功  能:hash一个字符串 
 96  *返回值:成功,返回true;失败,返回false
 97  ************************************************************************/
 98 bool Hash(char * lpszString)
 99 {  
100     const unsigned long HASH_OFFSET = 0, HASH_A = 1, HASH_B = 2;  
101     unsigned long nHash = HashString(lpszString, HASH_OFFSET);  
102     unsigned long nHashA = HashString(lpszString, HASH_A);  
103     unsigned long nHashB = HashString(lpszString, HASH_B);  
104     unsigned long nHashStart = nHash % m_tablelength, 
105     nHashPos = nHashStart;  
106  
107     while (m_HashIndexTable[nHashPos].bExists)  
108     {    
109         nHashPos = (nHashPos + 1) % m_tablelength;  
110         if (nHashPos == nHashStart) //一个轮回  
111         { 
112             collc ++; 
113             //hash表中没有空余的位置了,无法完成hash
114             return false;   
115         }  
116     }  
117     m_HashIndexTable[nHashPos].bExists = true;  
118     m_HashIndexTable[nHashPos].nHashA = nHashA;  
119     m_HashIndexTable[nHashPos].nHashB = nHashB;  
120  
121     return true;  
122 }
123 
124 int InitHashTable()
125 {
126     int i;
127     
128     InitCryptTable();  
129     m_tablelength = nTableLength;
130     
131     m_HashIndexTable = (HASHTABLE *)malloc(nTableLength * sizeof(HASHTABLE));
132     if (NULL == m_HashIndexTable) {
133         printf("Init HashTable failure!!\n");
134         return -1;
135     }
136 
137     for (i = 0; i < nTableLength; i++ )  
138     {  
139         m_HashIndexTable[i].nHashA = 0;  
140         m_HashIndexTable[i].nHashB = 0;  
141         m_HashIndexTable[i].bExists = false;  
142     }
143     
144     return 0;    
145 }
146 
147 void do_test()
148 {
149     int count = 0;
150     FILE *fp;
151     char url[2048] = {0};
152     
153     fp = fopen("urllist", "rb+");
154     if (NULL == fp) {
155         return;
156     }
157 
158     if (InitHashTable()) {
159         return;
160     }
161 
162     while (!feof(fp)) {
163         fgets(url, 2048, fp);
164         Hash(url);
165         count++;
166     }
167 
168     printf("count: %d\n", count);
169 
170     fclose(fp);
171 }
172 
173 /*test main*/
174 int main()
175 {
176     do_test();
177 
178     printf("conflict: %d\n", collc);
179     printf("not find: %d\n", errstr);
180 
181     return 0;
182 }

 (2)字符串hash算法 ELFhash 

#include <stdio.h>
#include <string.h>
#include <stdlib.h>

#define MOD 10

int ELFhash(char*key)
{
    unsigned long h=0;
    while(*key)
    {   
        h = (h << 4) + *key++;
        unsigned long g = h & 0xF0000000L;
        if(g)
            h ^= g >> 24; 
        h &= ~g; 
    }   
    return h % MOD;
}

int main(int argc, char **argv)
{
    if (argc < 2) {
        printf("using %s <string>\n", argv[0]);
        return -1; 
    }   

    int num = 0;
    num = ELFhash(argv[1]);

    printf("num is %d\n", num);
}

 

posted @ 2017-04-25 14:47  zhangwju  阅读(6967)  评论(1编辑  收藏  举报