redis源码笔记－dict.c

这篇blog介绍dict的实现。
dict.c
  1 #include "fmacros.h"
  2 
  3 #include <stdio.h>
  4 #include <stdlib.h>
  5 #include <string.h>
  6 #include <stdarg.h>
  7 #include <assert.h>
  8 #include <limits.h>
  9 #include <sys/time.h>
 10 #include <ctype.h>
 11 
 12 #include "dict.h"
 13 #include "zmalloc.h"
 14 
 15 /* Using dictEnableResize() / dictDisableResize() we make possible to
 16  * enable/disable resizing of the hash table as needed. This is very important
 17  * for Redis, as we use copy-on-write and don't want to move too much memory
 18  * around when there is a child performing saving operations.
 19  *
 20  * Note that even when dict_can_resize is set to 0, not all resizes are
 21  * prevented: an hash table is still allowed to grow if the ratio between
 22  * the number of elements and the buckets > dict_force_resize_ratio. */
     注释已经说的很清楚了。redis有些后台进程会做一些工作，比如rdb save、aof rewrite之类的，所以如果在后台进程运行过程中，将允许对hash table进行resize操作的话，会造成大量的内存拷贝（默认是copy-on-write，只在内存有变化时才拷贝）。注释中说，也没有完全避免，如果存的entry过多，比如存的entry是slot数的五倍，则也会强制进行resize
 23 static int dict_can_resize = 1;
 24 static unsigned int dict_force_resize_ratio = 5;
 25 
 26 /* -------------------------- private prototypes ---------------------------- */
 27 
 28 static int _dictExpandIfNeeded(dict *ht);     //如果需要的话，将hash表进行扩展
 29 static unsigned long _dictNextPower(unsigned long size);  
 30 static int _dictKeyIndex(dict *ht, const void *key);
 31 static int _dictInit(dict *ht, dictType *type, void *privDataPtr);
 32 //这4个函数的具体含义，到后边实现以及用到的时候再讨论

 33 /* -------------------------- hash functions -------------------------------- */
 34 
 35 /* Thomas Wang's 32 bit Mix Function */
 36 unsigned int dictIntHashFunction(unsigned int key)
 37 {
 38     key += ~(key << 15);
 39     key ^=  (key >> 10);
 40     key +=  (key << 3);
 41     key ^=  (key >> 6);
 42     key += ~(key << 11);
 43     key ^=  (key >> 16);
 44     return key;
 45 }
 46 
 47 /* Identity hash function for integer keys */
 48 unsigned int dictIdentityHashFunction(unsigned int key)
 49 {
 50     return key;
 51 }
 52 
 53 /* Generic hash function (a popular one from Bernstein).
 54  * I tested a few and this was the best. */
 55 unsigned int dictGenHashFunction(const unsigned char *buf, int len) {
 56     unsigned int hash = 5381;
 57 
 58     while (len--)
 59         hash = ((hash << 5) + hash) + (*buf++); /* hash * 33 + c */
 60     return hash;
 61 }
 62 
 63 /* And a case insensitive version */
 64 unsigned int dictGenCaseHashFunction(const unsigned char *buf, int len) {
 65     unsigned int hash = 5381;
 66 
 67     while (len--)
 68         hash = ((hash << 5) + hash) + (tolower(*buf++)); /* hash * 33 + c */
 69     return hash;
 70 }
 71 //给出针对整数和字符串的hash函数，因为redis中做dict key的只有这两种类型 
   
 72 /* ----------------------------- API implementation ------------------------- */
 73 
 74 /* Reset an hashtable already initialized with ht_init().
 75  * NOTE: This function should only called by ht_destroy(). */ //Note: and _dictInit
 76 static void _dictReset(dictht *ht)
 77 {
 78     ht->table = NULL;
 79     ht->size = 0;
 80     ht->sizemask = 0;
 81     ht->used = 0;
 82 }
 83 
 84 /* Create a new hash table */
 85 dict *dictCreate(dictType *type,
 86         void *privDataPtr)
 87 {
 88     dict *d = zmalloc(sizeof(*d));
 89 
 90     _dictInit(d,type,privDataPtr);
 91     return d;
 92 }
 93 //解释下参数中的privDataPtr，它是dict struct的一个成员，然后分别在dictType的keyDup函数、valDup函数、keyCompare函数、keyDestructor函数、valDestructor函数中作为第一个参数使用。dict的对外API通过dictCreate函数对其进行初始化。我在看dict.h时也很疑惑这个接口设计。这里揭底，在redis当前版本对dictCreate的所有调用中（networking.c一次、object.c两次、redis.c七次、t_hash.c一次、t_set.c一次、t_zset.c一次），此参数均被赋为了NULL，可能是历史原因或是考虑后续兼容性？求大牛指点。anyway，现在可以无视之了。

 94 /* Initialize the hash table */
 95 int _dictInit(dict *d, dictType *type,
 96         void *privDataPtr)
 97 {
 98     _dictReset(&d->ht[0]);
 99     _dictReset(&d->ht[1]);
100     d->type = type;
101     d->privdata = privDataPtr;
102     d->rehashidx = -1;
103     d->iterators = 0;
104     return DICT_OK;
105 }
106 
107 /* Resize the table to the minimal size that contains all the elements,
108  * but with the invariant of a USER/BUCKETS ratio near to <= 1 */
109 int dictResize(dict *d)
110 {
111     int minimal;
112 
113     if (!dict_can_resize || dictIsRehashing(d)) return DICT_ERR;
114     minimal = d->ht[0].used;
115     if (minimal < DICT_HT_INITIAL_SIZE)
116         minimal = DICT_HT_INITIAL_SIZE;
117     return dictExpand(d, minimal);
118 }
119 
120 /* Expand or create the hashtable */
121 int dictExpand(dict *d, unsigned long size)
122 {
123     dictht n; /* the new hashtable */
124     unsigned long realsize = _dictNextPower(size);  //dict的size是2的倍数，所以这里记录了真是的记录长度，需要比size长
125 
126     /* the size is invalid if it is smaller than the number of
127      * elements already inside the hashtable */
128     if (dictIsRehashing(d) || d->ht[0].used > size)
129         return DICT_ERR;
130 
131     /* Allocate the new hashtable and initialize all pointers to NULL */
132     n.size = realsize;
133     n.sizemask = realsize-1;
134     n.table = zcalloc(realsize*sizeof(dictEntry*));
135     n.used = 0;
136 
137     /* Is this the first initialization? If so it's not really a rehashing
138      * we just set the first hash table so that it can accept keys. */
139     if (d->ht[0].table == NULL) {
140         d->ht[0] = n;
141         return DICT_OK;
142     }
143 
144     /* Prepare a second hash table for incremental rehashing */
145     d->ht[1] = n;
146     d->rehashidx = 0;  //只有在第二次调用dictExpand时，才是rehash的前奏
147     return DICT_OK;
148 }
149 
150 /* Performs N steps of incremental rehashing. Returns 1 if there are still
151  * keys to move from the old to the new hash table, otherwise 0 is returned.
152  * Note that a rehashing step consists in moving a bucket (that may have more
153  * thank one key as we use chaining) from the old to the new hash table. */
154 int dictRehash(dict *d, int n) {
155     if (!dictIsRehashing(d)) return 0;
156 
157     while(n--) {
158         dictEntry *de, *nextde;
159 
160         /* Check if we already rehashed the whole table... */
161         if (d->ht[0].used == 0) {
162             zfree(d->ht[0].table);
163             d->ht[0] = d->ht[1];
164             _dictReset(&d->ht[1]);
165             d->rehashidx = -1;
166             return 0;
167         }
168 
169         /* Note that rehashidx can't overflow as we are sure there are more
170          * elements because ht[0].used != 0 */
171         while(d->ht[0].table[d->rehashidx] == NULL) d->rehashidx++;
172         de = d->ht[0].table[d->rehashidx];
173         /* Move all the keys in this bucket from the old to the new hash HT */
174         while(de) {
175             unsigned int h;
176 
177             nextde = de->next;
178             /* Get the index in the new hash table */
179             h = dictHashKey(d, de->key) & d->ht[1].sizemask;
180             de->next = d->ht[1].table[h];
181             d->ht[1].table[h] = de;
182             d->ht[0].used--;
183             d->ht[1].used++;
184             de = nextde;
185         }
186         d->ht[0].table[d->rehashidx] = NULL;
187         d->rehashidx++;
188     }
189     return 1;
190 }
191 
192 long long timeInMilliseconds(void) {
193     struct timeval tv;
194 
195     gettimeofday(&tv,NULL);
196     return (((long long)tv.tv_sec)*1000)+(tv.tv_usec/1000);
197 }     //时间单位是ms
198 
199 /* Rehash for an amount of time between ms milliseconds and ms+1 milliseconds */
200 int dictRehashMilliseconds(dict *d, int ms) {
201     long long start = timeInMilliseconds();
202     int rehashes = 0;
203 
204     while(dictRehash(d,100)) {
205         rehashes += 100;
206         if (timeInMilliseconds()-start > ms) break;
207     }
208     return rehashes;
209 } //返回值是rehash数目，为100的整数倍，传进参数的ms只是个下限，并不严格限制
210 
211 /* This function performs just a step of rehashing, and only if there are
212  * no safe iterators bound to our hash table. When we have iterators in the
213  * middle of a rehashing we can't mess with the two hash tables otherwise
214  * some element can be missed or duplicated.
215  *
216  * This function is called by common lookup or update operations in the
217  * dictionary so that the hash table automatically migrates from H1 to H2
218  * while it is actively used. */
219 static void _dictRehashStep(dict *d) {
220     if (d->iterators == 0) dictRehash(d,1);
221 }  //在dict上存在遍历用的iterator时，不会尽心rehash操作，否则遍历将会丢数据或出现重复。这个函数是在lookup和update时调用，使得rehash的过程伴随着针对dict操作的过程慢慢完成
222 
223 /* Add an element to the target hash table */
224 int dictAdd(dict *d, void *key, void *val)
225 {
226     int index;
227     dictEntry *entry;
228     dictht *ht;
229 
230     if (dictIsRehashing(d)) _dictRehashStep(d);
231 
232     /* Get the index of the new element, or -1 if
233      * the element already exists. */
234     if ((index = _dictKeyIndex(d, key)) == -1)
235         return DICT_ERR;
236 
237     /* Allocates the memory and stores key */
238     ht = dictIsRehashing(d) ? &d->ht[1] : &d->ht[0]; //如果dict正在进行rehash，则将新来的数据存在d->ht[1]上，因为d->ht[0]是“旧表”,不再接受新的数据
239     entry = zmalloc(sizeof(*entry));
240     entry->next = ht->table[index];
241     ht->table[index] = entry;
242     ht->used++;
243 
244     /* Set the hash entry fields. */
245     dictSetHashKey(d, entry, key);
246     dictSetHashVal(d, entry, val);
247     return DICT_OK;
248 }
249 
250 /* Add an element, discarding the old if the key already exists.
251  * Return 1 if the key was added from scratch, 0 if there was already an
252  * element with such key and dictReplace() just performed a value update
253  * operation. */
254 int dictReplace(dict *d, void *key, void *val)
255 {
256     dictEntry *entry, auxentry;
257 
258     /* Try to add the element. If the key
259      * does not exists dictAdd will suceed. */
260     if (dictAdd(d, key, val) == DICT_OK)
261         return 1;
262     /* It already exists, get the entry */
263     entry = dictFind(d, key);
264     /* Free the old value and set the new one */
265     /* Set the new value and free the old one. Note that it is important
266      * to do that in this order, as the value may just be exactly the same
267      * as the previous one. In this context, think to reference counting,
268      * you want to increment (set), and then decrement (free), and not the
269      * reverse. */
270     auxentry = *entry;
271     dictSetHashVal(d, entry, val);
272     dictFreeEntryVal(d, &auxentry);
273     return 0;
274 }
275 
276 /* Search and remove an element */
277 static int dictGenericDelete(dict *d, const void *key, int nofree)
278 {
279     unsigned int h, idx;
280     dictEntry *he, *prevHe;
281     int table;
282 
283     if (d->ht[0].size == 0) return DICT_ERR; /* d->ht[0].table is NULL */
284     if (dictIsRehashing(d)) _dictRehashStep(d);
285     h = dictHashKey(d, key);
286 
287     for (table = 0; table <= 1; table++) {
288         idx = h & d->ht[table].sizemask;
289         he = d->ht[table].table[idx];
290         prevHe = NULL;
291         while(he) {
292             if (dictCompareHashKeys(d, key, he->key)) {
293                 /* Unlink the element from the list */
294                 if (prevHe)
295                     prevHe->next = he->next;
296                 else
297                     d->ht[table].table[idx] = he->next;
298                 if (!nofree) {
299                     dictFreeEntryKey(d, he);
300                     dictFreeEntryVal(d, he);
301                 }
302                 zfree(he);
303                 d->ht[table].used--;
304                 return DICT_OK;
305             }
306             prevHe = he;
307             he = he->next;
308         }
309         if (!dictIsRehashing(d)) break;   //如果在查找的时刻，dict没有正在进行rehash操作，则此时dict->ht[1]未使用，所以只在dict->ht[0]进行查找操作就可以了
310     }
311     return DICT_ERR; /* not found */
312 }
313 
314 int dictDelete(dict *ht, const void *key) {
315     return dictGenericDelete(ht,key,0);
316 }
317 
318 int dictDeleteNoFree(dict *ht, const void *key) {
319     return dictGenericDelete(ht,key,1);  
320 } //。。。redis的src目录下没有任何一处调用这个不料理自己后事的delete函数
321 
322 /* Destroy an entire dictionary */
323 int _dictClear(dict *d, dictht *ht)  //clear的粒度是hash table而不是dict
324 {
325     unsigned long i;
326 
327     /* Free all the elements */
328     for (i = 0; i < ht->size && ht->used > 0; i++) {
329         dictEntry *he, *nextHe;
330 
331         if ((he = ht->table[i]) == NULL) continue;
332         while(he) {
333             nextHe = he->next;
334             dictFreeEntryKey(d, he);
335             dictFreeEntryVal(d, he);
336             zfree(he);
337             ht->used--;
338             he = nextHe;
339         }
340     }
341     /* Free the table and the allocated cache structure */
342     zfree(ht->table);
343     /* Re-initialize the table */
344     _dictReset(ht);
345     return DICT_OK; /* never fails */
346 }
347 
348 /* Clear & Release the hash table */
349 void dictRelease(dict *d)
350 {
351     _dictClear(d,&d->ht[0]);
352     _dictClear(d,&d->ht[1]);
353     zfree(d);
354 }   //release的粒度是dict，_dictClear只是一个内部接口
355 
356 dictEntry *dictFind(dict *d, const void *key)
357 {
358     dictEntry *he;
359     unsigned int h, idx, table;
360 
361     if (d->ht[0].size == 0) return NULL; /* We don't have a table at all */
362     if (dictIsRehashing(d)) _dictRehashStep(d);
363     h = dictHashKey(d, key);
364     for (table = 0; table <= 1; table++) {
365         idx = h & d->ht[table].sizemask;
366         he = d->ht[table].table[idx];
367         while(he) {
368             if (dictCompareHashKeys(d, key, he->key))     //为何要传dict参数，因为需要知道具体用什么compare函数作比较操作
369                 return he;
370             he = he->next;
371         }
372         if (!dictIsRehashing(d)) return NULL;  //没找到，返回NULL，函数就结束了
373     }
374     return NULL;
375 }
376 
377 void *dictFetchValue(dict *d, const void *key) {
378     dictEntry *he;
379 
380     he = dictFind(d,key);
381     return he ? dictGetEntryVal(he) : NULL;
382 }
383 
384 dictIterator *dictGetIterator(dict *d)
385 {
386     dictIterator *iter = zmalloc(sizeof(*iter));
387 
388     iter->d = d;
389     iter->table = 0;
390     iter->index = -1;
391     iter->safe = 0;
392     iter->entry = NULL;
393     iter->nextEntry = NULL;
394     return iter;
395 } //在调用GetIterator函数时，并没有将这个iter在对应的dict上进行注册
396 
397 dictIterator *dictGetSafeIterator(dict *d) {
398     dictIterator *i = dictGetIterator(d);
399 
400     i->safe = 1;
401     return i;
402 }
403 
404 dictEntry *dictNext(dictIterator *iter)
405 {
406     while (1) {
407         if (iter->entry == NULL) {
408             dictht *ht = &iter->d->ht[iter->table];   
409             if (iter->safe && iter->index == -1 && iter->table == 0)
410                 iter->d->iterators++;   //在对应的dict上进行注册
411             iter->index++;
412             if (iter->index >= (signed) ht->size) { //在对第一个ht遍历完成后，转到第二张表
413                 if (dictIsRehashing(iter->d) && iter->table == 0) {
414                     iter->table++;
415                     iter->index = 0;
416                     ht = &iter->d->ht[1];
417                 } else {
418                     break;
419                 }
420             }
421             iter->entry = ht->table[iter->index];
422         } else {
423             iter->entry = iter->nextEntry;
424         }
425         if (iter->entry) {
426             /* We need to save the 'next' here, the iterator user
427              * may delete the entry we are returning. */
428             iter->nextEntry = iter->entry->next;
429             return iter->entry;
430         }
431     }
432     return NULL;
433 }
434 
435 void dictReleaseIterator(dictIterator *iter)
436 {
437     if (iter->safe && !(iter->index == -1 && iter->table == 0))
438         iter->d->iterators--;
439     zfree(iter);
440 }
441 
442 /* Return a random entry from the hash table. Useful to
443  * implement randomized algorithms */
444 dictEntry *dictGetRandomKey(dict *d)
445 {
446     dictEntry *he, *orighe;
447     unsigned int h;
448     int listlen, listele;
449 
450     if (dictSize(d) == 0) return NULL;
451     if (dictIsRehashing(d)) _dictRehashStep(d);
452     if (dictIsRehashing(d)) {
453         do {
454             h = random() % (d->ht[0].size+d->ht[1].size);
455             he = (h >= d->ht[0].size) ? d->ht[1].table[h - d->ht[0].size] :
456                                       d->ht[0].table[h];
457         } while(he == NULL);
458     } else {
459         do {
460             h = random() & d->ht[0].sizemask;
461             he = d->ht[0].table[h];
462         } while(he == NULL);
463     }
464 
465     /* Now we found a non empty bucket, but it is a linked
466      * list and we need to get a random element from the list.
467      * The only sane way to do so is counting the elements and
468      * select a random index. */
469     listlen = 0;
470     orighe = he;
471     while(he) {
472         he = he->next;
473         listlen++;
474     }
475     listele = random() % listlen;
476     he = orighe;
477     while(listele--) he = he->next;
478     return he;
479 }   //这个函数效率很低啊。
480 
481 /* ------------------------- private functions ------------------------------ */
482 
483 /* Expand the hash table if needed */
484 static int _dictExpandIfNeeded(dict *d)
485 {
486     /* Incremental rehashing already in progress. Return. */
487     if (dictIsRehashing(d)) return DICT_OK;
488 
489     /* If the hash table is empty expand it to the intial size. */
490     if (d->ht[0].size == 0) return dictExpand(d, DICT_HT_INITIAL_SIZE);
491 
492     /* If we reached the 1:1 ratio, and we are allowed to resize the hash
493      * table (global setting) or we should avoid it but the ratio between
494      * elements/buckets is over the "safe" threshold, we resize doubling
495      * the number of buckets. */
496     if (d->ht[0].used >= d->ht[0].size &&
497         (dict_can_resize ||
498          d->ht[0].used/d->ht[0].size > dict_force_resize_ratio))
499     {
500         return dictExpand(d, ((d->ht[0].size > d->ht[0].used) ?
501                                     d->ht[0].size : d->ht[0].used)*2);
502     }
503     return DICT_OK;
504 }
505 
506 /* Our hash table capability is a power of two */
507 static unsigned long _dictNextPower(unsigned long size)
508 {
509     unsigned long i = DICT_HT_INITIAL_SIZE;
510 
511     if (size >= LONG_MAX) return LONG_MAX;
512     while(1) {
513         if (i >= size)
514             return i;
515         i *= 2;
516     }
517 }
518 
519 /* Returns the index of a free slot that can be populated with
520  * an hash entry for the given 'key'.
521  * If the key already exists, -1 is returned.
522  *
523  * Note that if we are in the process of rehashing the hash table, the
524  * index is always returned in the context of the second (new) hash table. */
525 static int _dictKeyIndex(dict *d, const void *key)
526 {
527     unsigned int h, idx, table;
528     dictEntry *he;
529 
530     /* Expand the hashtable if needed */
531     if (_dictExpandIfNeeded(d) == DICT_ERR)
532         return -1;
533     /* Compute the key hash value */
534     h = dictHashKey(d, key);
535     for (table = 0; table <= 1; table++) {
536         idx = h & d->ht[table].sizemask;
537         /* Search if this slot does not already contain the given key */
538         he = d->ht[table].table[idx];
539         while(he) {
540             if (dictCompareHashKeys(d, key, he->key))
541                 return -1;
542             he = he->next;
543         }
544         if (!dictIsRehashing(d)) break;
545     }
546     return idx;
547 }
548 
549 void dictEmpty(dict *d) {
550     _dictClear(d,&d->ht[0]);
551     _dictClear(d,&d->ht[1]);
552     d->rehashidx = -1;
553     d->iterators = 0;
554 }
555 
556 #define DICT_STATS_VECTLEN 50
557 static void _dictPrintStatsHt(dictht *ht) {
558     unsigned long i, slots = 0, chainlen, maxchainlen = 0;
559     unsigned long totchainlen = 0;
560     unsigned long clvector[DICT_STATS_VECTLEN];  //这个clvector是统计hash table中长度为其下标的槽位的个数
561 
562     if (ht->used == 0) {
563         printf("No stats available for empty dictionaries\n");
564         return;
565     }
566 
567     for (i = 0; i < DICT_STATS_VECTLEN; i++) clvector[i] = 0;
568     for (i = 0; i < ht->size; i++) {
569         dictEntry *he;
570 
571         if (ht->table[i] == NULL) {
572             clvector[0]++;
573             continue;
574         }
575         slots++;
576         /* For each hash entry on this slot... */
577         chainlen = 0;
578         he = ht->table[i];
579         while(he) {
580             chainlen++;
581             he = he->next;
582         }
583         clvector[(chainlen < DICT_STATS_VECTLEN) ? chainlen : (DICT_STATS_VECTLEN-1)]++;
584         if (chainlen > maxchainlen) maxchainlen = chainlen;
585         totchainlen += chainlen;
586     }
587     printf("Hash table stats:\n");
588     printf(" table size: %ld\n", ht->size);
589     printf(" number of elements: %ld\n", ht->used);
590     printf(" different slots: %ld\n", slots);   //非空槽位数
591     printf(" max chain length: %ld\n", maxchainlen);
592     printf(" avg chain length (counted): %.02f\n", (float)totchainlen/slots);
593     printf(" avg chain length (computed): %.02f\n", (float)ht->used/slots);
594     printf(" Chain length distribution:\n");
595     for (i = 0; i < DICT_STATS_VECTLEN-1; i++) {
596         if (clvector[i] == 0) continue;
597         printf("   %s%ld: %ld (%.02f%%)\n",(i == DICT_STATS_VECTLEN-1)?">= ":"", i, clvector[i], ((float)clvector[i]/ht->size)*100);
598     }
599 }
600 
601 void dictPrintStats(dict *d) {
602     _dictPrintStatsHt(&d->ht[0]);
603     if (dictIsRehashing(d)) {
604         printf("-- Rehashing into ht[1]:\n");
605         _dictPrintStatsHt(&d->ht[1]);
606     }
607 }
608 
609 void dictEnableResize(void) {
610     dict_can_resize = 1;
611 }
612 
613 void dictDisableResize(void) {
614     dict_can_resize = 0;
615 }