redis 5.0.2 源码阅读——字典dict

redis中字典相关的文件为:dict.h与dict.c

与其说是一个字典,道不如说是一个哈希表。

一、数据结构

1.1 dictEntry结构体

 1 /**
 2  * dictEntry是一个kv对的单向链表,其中v是一个联合体,支持数字,或者是指向一块内存的指针。
 3  */
 4 typedef struct dictEntry {
 5     void *key;
 6     union {
 7         void *val;
 8         uint64_t u64;
 9         int64_t s64;
10         double d;
11     } v;
12     struct dictEntry *next;
13 } dictEntry;

具体结构形如

 1 /*
 2 +---------------+
 3 |void *key      |
 4 +---------------+
 5 |union{...} v   |
 6 +---------------+
 7 |dictEntry *next|---+
 8 +---------------+   |
 9                     |
10 +---------------+ <-+
11 |void *key      |
12 +---------------+
13 |union{...} v   |
14 +---------------+
15 |dictEntry *next|
16 +---------------+
17 */

为了节约篇幅,后续用以下结构表示

1 /*
2 +---+  +---+
3 |K|V|->|K|V|->NULL
4 +---+  +---+
5 */

1.2 distht结构体

 1 /**
 2  * This is our hash table structure. Every dictionary has two of this as we
 3  * implement incremental rehashing, for the old to the new table.
 4  * 这是我们的哈希表结构。 每个字典都有两个这样的,因为我们实现了增量重新散列,从旧表到新表。
 5  * 使用开链法解决冲突问题
 6  *
 7  * 其中,table指向大小为sizeof(dictEntry*) * size的一片内存空间,每个dictEntry*可以视为一个bucket,
 8  * 每个bucket下挂着一个dictEntry单向链表。
 9  * size的值始终为2的位数,而sizemask的值始终为size-1,其作用是决定kv对要挂在哪个bucket上。
10  * 举个例子,size=4时,sizemask=3,其二进制为 0011,若通过hash函数计算出来key对应的hash值hash_value为5,
11  * 二进制为0101,则通过位运算 sizemask & hash_value = 0011 & 0101 = 0001,十进制为1,则将会挂在idx = 1的bucket上。
12  */
13 typedef struct dictht {
14     //dictEntry*类型的数组
15     dictEntry **table;
16     //dictEntry*数组的长度
17     unsigned long size;
18     /**
19      * 这样写可能更容易理解
20      * const unsigned long size = 4;
21      * dictEntry *table[size];
22      */
23 
24     //sizemask,始终为size-1
25     unsigned long sizemask;
26 
27     //当前总dictEntry数量
28     unsigned long used;
29 } dictht;

dictht是一个hash table,整体结构大致为

 1 /*
 2 +----------------------+   +---> +-----------------+  +---+
 3 |dictEntry **table     |---+     |dictEntry *bucket|->|K|V|->NULL
 4 +----------------------+         +-----------------+  +---+
 5 |unsigned long size = 4|         |dictEntry *bucket|->NULL
 6 +----------------------+         +-----------------+
 7 |unsigned long sizemask|         |dictEntry *bucket|->NULL
 8 +----------------------+         +-----------------+
 9 |unsigned long used    |         |dictEntry *bucket|->NULL
10 +----------------------+         +-----------------+
11 */

1.3 dictType结构体

 1 /**
 2  * dictType用于自定义一些操作的方法,如hash函数、拷贝key、拷贝value、比较key、销毁key、销毁value。
 3  */
 4 typedef struct dictType {
 5     uint64_t (*hashFunction)(const void *key);
 6     void *(*keyDup)(void *privdata, const void *key);
 7     void *(*valDup)(void *privdata, const void *obj);
 8     int (*keyCompare)(void *privdata, const void *key1, const void *key2);
 9     void (*keyDestructor)(void *privdata, void *key);
10     void (*valDestructor)(void *privdata, void *obj);
11 } dictType;

1.4 dict结构体

 1 typedef struct dict {
 2     dictType *type;
 3     //type中函数的传入参数
 4     void *privdata;
 5     dictht ht[2];
 6     /**
 7      * rehashidx,是与ht[2]配合实现渐进式rehash操作的。若使用一步到位的方式,
 8      * 当key的数量非常大的时候,rehashing期间,是会卡死所有操作的。
 9      */
10     long rehashidx; /* rehashing not in progress if rehashidx == -1 */
11     /**
12      * iterators,是用于记录当前使用的迭代器数量,与rehashing操作有关。
13      */
14     unsigned long iterators; /* number of iterators currently running */
15 } dict;

之前提到的dictType与dictht都是dict的成员变量。除此之外,还有privdata,是在创建dict的时候调用者传入,用于特定操作时回传给函数的。如

 1 /**
 2  * 利用宏定义实现函数的调用
 3  * 依次是设置有符号的信号值、设置无符号类型的信号值、设置double类型的值、
 4  *      释放key、设置key、判断key是否相等
 5  */
 6 #define dictSetSignedIntegerVal(entry, _val_) \
 7     do { (entry)->v.s64 = _val_; } while(0)
 8 
 9 #define dictSetUnsignedIntegerVal(entry, _val_) \
10     do { (entry)->v.u64 = _val_; } while(0)
11 
12 #define dictSetDoubleVal(entry, _val_) \
13     do { (entry)->v.d = _val_; } while(0)
14 
15 #define dictFreeKey(d, entry) \
16     if ((d)->type->keyDestructor) \
17         (d)->type->keyDestructor((d)->privdata, (entry)->key)
18 
19 #define dictSetKey(d, entry, _key_) do { \
20     if ((d)->type->keyDup) \
21         (entry)->key = (d)->type->keyDup((d)->privdata, _key_); \
22     else \
23         (entry)->key = (_key_); \
24 } while(0)
25 
26 #define dictCompareKeys(d, key1, key2) \
27     (((d)->type->keyCompare) ? \
28         (d)->type->keyCompare((d)->privdata, key1, key2) : \
29         (key1) == (key2))

1.5 迭代器

iterators,是用于记录当前使用的安全迭代器数量,与rehashing操作有关。

 1 /**
 2  * If safe is set to 1 this is a safe iterator, that means, you can call
 3  * dictAdd, dictFind, and other functions against the dictionary even while
 4  * iterating. Otherwise it is a non safe iterator, and only dictNext()
 5  * should be called while iterating.
 6  * 如果是个安全的迭代器,即safe == 1,则在迭代中可以调用dictAdd、dictFind等方法,否则只能调用dictNext。
 7  * index表示,ht[table]对应的bucket的idx。
 8  */
 9 typedef struct dictIterator {
10     dict *d;
11     long index;
12     int table, safe;
13     dictEntry *entry, *nextEntry;
14     /* unsafe iterator fingerprint for misuse detection. */
15     long long fingerprint;
16 } dictIterator;

整体结构如下:

 1 /*
 2 +---------+    /+-----------+   +-->+----------+  +---+
 3 |dictType*|   / |dictEntry**|---+   |dictEntry*|->|K|V|->NULL
 4 +---------+  /  +-----------+       +----------+  +---+
 5 |privdata | /   |size       |       |dictEntry*|->NULL
 6 +---------+/    +-----------+       +----------+
 7 |ht[2]    |     |sizemask   |       |dictEntry*|->NULL
 8 +---------+\    +-----------+       +----------+
 9 |rehashidx| \   |used       |       |dictEntry*|->NULL
10 +---------+  \  +-----------+       +----------+
11 |iterators|   \
12 +---------+    \+-----------+
13                 |dictEntry**|-->NULL
14                 +-----------+
15                 |size       |
16                 +-----------+
17                 |sizemask   |
18                 +-----------+
19                 |used       |
20                 +-----------+
21 */

二、创建

2.1 创建和复位函数

 1 /**
 2  * Reset a hash table already initialized with ht_init().
 3  * NOTE: This function should only be called by ht_destroy().
 4  * 重置一个使用ht_init函数初始化的哈希表
 5  * 只能通过ht_destroy函数调用
 6  */
 7 
 8 static void _dictReset(dictht *ht)
 9 {
10     ht->table = NULL;
11     ht->size = 0;
12     ht->sizemask = 0;
13     ht->used = 0;
14 }
15 
16 /* Create a new hash table 创建一个新的哈希表*/
17 dict *dictCreate(dictType *type,
18         void *privDataPtr)
19 {
20     //分配内存空间
21     dict *d = zmalloc(sizeof(*d));
22 
23     //对哈希表进行初始化
24     _dictInit(d,type,privDataPtr);
25     return d;
26 }
27 
28 /* Initialize the hash table 哈希表的初始化*/
29 int _dictInit(dict *d, dictType *type,
30         void *privDataPtr)
31 {
32     _dictReset(&d->ht[0]);
33     _dictReset(&d->ht[1]);
34     d->type = type;
35     d->privdata = privDataPtr;
36     d->rehashidx = -1;
37     d->iterators = 0;
38     return DICT_OK;
39 }

  可以调用dictCreate创建一个空的dict,它会分配好dict的空间,并初始化所有成员变量。在这里把privdata传入并保存。搜了一下整个redis源码的dictCreate调用,看到传入的值全为NULL。目前的理解暂时不清楚这个变量是什么时候赋值的。

2.2 结构图

初始化后的dict结构如下:

 1 /*
 2 +------------+    /+-----------+
 3 |dictType*   |   / |dictEntry**|-->NULL
 4 +------------+  /  +-----------+
 5 |privdata    | /   |size=0     |
 6 +------------+/    +-----------+
 7 |ht[2]       |     |sizemask=0 |
 8 +------------+\    +-----------+
 9 |rehashidx=-1| \   |used=0     |
10 +------------+  \  +-----------+
11 |iterators=0 |   \
12 +------------+    \+-----------+
13                    |dictEntry**|-->NULL
14                    +-----------+
15                    |size=0     |
16                    +-----------+
17                    |sizemask=0 |
18                    +-----------+
19                    |used=0     |
20                    +-----------+
21 */

刚创建好的dict是存不了任何数据的,其两个hash table的size都为0

2.3 resize函数

 1 /**
 2  * Resize the table to the minimal size that contains all the elements,
 3  * but with the invariant of a USED/BUCKETS ratio near to <= 1
 4  * 重新设置哈希表的大小,重新设置后的大小能保存所有的元素
 5  * 保持used/buckets的比例<=1不变
 6 */
 7 int dictResize(dict *d)
 8 {
 9     int minimal;
10 
11     /**
12      * #define dictIsRehashing(d) ((d)->rehashidx != -1)
13      * 当dict_can_resize为0或(d)->rehashidx不为-1时,直接返回1,也就是失败
14      */
15     if (!dict_can_resize || dictIsRehashing(d))
16         return DICT_ERR;
17     //得到当前元素的个数
18     minimal = d->ht[0].used;
19     if (minimal < DICT_HT_INITIAL_SIZE) //DICT_HT_INITIAL_SIZE:4
20         minimal = DICT_HT_INITIAL_SIZE;
21     return dictExpand(d, minimal);
22 }
23 
24 /**
25  * Expand or create the hash table
26  * 扩容或者创建哈希表
27  * d:原来的封装哈希表
28  * size:期望的哈希表桶数
29  *
30  * 这个函数主要是产生一个新的HASH表(dictht),并让将dict.rehashidx= 0。表示開始进行rehash动作
31  */
32 int dictExpand(dict *d, unsigned long size)
33 {
34     /* the size is invalid if it is smaller than the number of
35      * elements already inside the hash table */
36     if (dictIsRehashing(d) || d->ht[0].used > size)
37         return DICT_ERR;
38 
39     dictht n; /* the new hash table */
40     //得到合适大小的哈希表的桶数
41     unsigned long realsize = _dictNextPower(size);
42 
43     /* Rehashing to the same table size is not useful. 如果容量大小没有发生变化,返回DICT_ERR,也就是1*/
44     if (realsize == d->ht[0].size) return DICT_ERR;
45 
46     /**
47      * Allocate the new hash table and initialize all pointers to NULL
48      * 初始化新的哈希表的size和sizemask,为table分配内存空间
49      */
50     n.size = realsize;
51     n.sizemask = realsize-1;
52     n.table = zcalloc(realsize*sizeof(dictEntry*));
53     n.used = 0;
54 
55     /**
56      * Is this the first initialization? If so it's not really a rehashing
57      * we just set the first hash table so that it can accept keys.
58      * 判断是否是滴第一次初始化,如果是,那就是不是rehashing操作,我们只需要设置ht的第一个哈希表(ht[0])
59      * 然后返回DICT_OK,也就是0,成功。也就是说如果ht[0].table == NULL,说明是第一次初始化,
60      * 那不是真正的重新哈希,相当于创建哈希表的操作,只需要设置第一个哈希表即可
61      */
62     if (d->ht[0].table == NULL) {
63         d->ht[0] = n;
64         return DICT_OK;
65     }
66 
67     /**
68      * Prepare a second hash table for incremental rehashing
69      * 假设 ht[0] 不为空。那么这就是一次扩展字典的行为
70      * 将新哈希表设置为 ht[1] ,并打开 rehash 标识
71      */
72     d->ht[1] = n;
73     d->rehashidx = 0;
74     return DICT_OK;
75 }
76 
77 /* Our hash table capability is a power of two 哈希表的容量是2的倍数*/
78 static unsigned long _dictNextPower(unsigned long size)
79 {
80     //#define DICT_HT_INITIAL_SIZE     4
81     unsigned long i = DICT_HT_INITIAL_SIZE;
82 
83     //如果size大于等于LONG_MAX,设置为LONG_MAX + 1LU
84     if (size >= LONG_MAX)
85         return LONG_MAX + 1LU;
86     while(1) {
87         if (i >= size)
88             return i;
89         //以两倍的速度扩大
90         i *= 2;
91     }
92 }

  _dictNextPower用于获取当前要分配给hash table的size,得到的值一定是2的倍数,初始值为4。

  dictExpand,从源码注释上看,它是为了扩容hash table,或者创建一个。它不允许与rehashing操作同时进行,也不能强制缩容。在使用_dictNextPower得到需要的size之后,它先是使用一个临时变量n去分配空间,然后进行判断,若ht[0].table的值为NULL,则认为是刚create出来的dict,直接把n赋值给ht[0],否则给ht[1],并开始rehashing操作。

三、rehashing操作

3.1 示例字典

若有这样一个dict,假设K1、K2、K3、K4计算出来的hash值分别为0、5、2、7,使用sizemask计算出来的idx分别为0、1、2、3

 1 /*
 2                                                       +----+
 3                                                    +->|K1|V|->NULL
 4 +------------+    /+-----------+  +->+----------+ /   +----+
 5 |dictType*   |   / |dictEntry**|--+  |dictEntry*|/    +----+
 6 +------------+  /  +-----------+     +----------+ +-->|K2|V|->NULL
 7 |privdata    | /   |size=4     |     |dictEntry*|/    +----+
 8 +------------+/    +-----------+     +----------+
 9 |ht[2]       |     |sizemask=3 |     |dictEntry*|\    +----+
10 +------------+\    +-----------+     +----------+ +-->|K3|V|->NULL
11 |rehashidx=-1| \   |used=4     |     |dictEntry*|\    +----+
12 +------------+  \  +-----------+     +----------+ \   +----+
13 |iterators=0 |   \                                 +->|K4|V|->NULL
14 +------------+    \+-----------+                      +----+
15                    |dictEntry**|-->NULL
16                    +-----------+
17                    |size=0     |
18                    +-----------+
19                    |sizemask=0 |
20                    +-----------+
21                    |used=0     |
22                    +-----------+
23 */

3.2 是否rehashing判断

判断是否需要对哈希表大小进行扩容

 1 /* Expand the hash table if needed 如果必要的话就扩大这个哈希表*/
 2 static int _dictExpandIfNeeded(dict *d)
 3 {
 4     /**
 5      * Incremental rehashing already in progress. Return.
 6      * 如果已经处于rehashing过程中
 7      * #define dictIsRehashing(d) ((d)->rehashidx != -1)
 8      */
 9     if (dictIsRehashing(d))
10         return DICT_OK;
11 
12     /**
13      * If the hash table is empty expand it to the initial size.
14      * 如果哈希表是空的,就是直接将ht[0]扩容为哈希表的初始值4
15      */
16     if (d->ht[0].size == 0)
17         return dictExpand(d, DICT_HT_INITIAL_SIZE);
18 
19     /* If we reached the 1:1 ratio, and we are allowed to resize the hash
20      * table (global setting) or we should avoid it but the ratio between
21      * elements/buckets is over the "safe" threshold, we resize doubling
22      * the number of buckets.
23      *
24      * static unsigned int dict_force_resize_ratio = 5;
25      * static int dict_can_resize = 1;
26      *
27      * 当used >= size并且(dict_can_resize == TRUE或ht[0]哈希表中存在的元素个数超过哈希表桶数的五倍)的时候
28      * 需要调用dictExpand进入rehashing状态。dict_can_resize默认为1
29      *
30      * 假设哈希表的已用节点数 >= 哈希表的大小。
31      * 而且下面条件任一个为真:
32      *      1) dict_can_resize 为真
33      *      2) 已用节点数除以哈希表大小之比大于
34      *          dict_force_resize_ratio
35      * 那么调用 dictExpand 对哈希表进行扩展
36      * 扩展的体积至少为已使用节点数的两倍
37      *
38      * DICT便会进行收缩。让total / bk_num 接近 1:1。
39      */
40     if (d->ht[0].used >= d->ht[0].size &&
41         (dict_can_resize ||
42          d->ht[0].used/d->ht[0].size > dict_force_resize_ratio))
43     {
44         //需要的size为当前used * 2,即为8。调用dictExpand之后的结构:
45         return dictExpand(d, d->ht[0].used*2);
46     }
47     return DICT_OK;
48 }

通过函数_dictExpandIfNeeded,可知当used >= size且dict_can_resize == TRUE的时候,需要调用dictExpand进入rehashing状态dict_can_resize默认为1

1 static int dict_can_resize = 1;
2 static unsigned int dict_force_resize_ratio = 5;

3.3 开始rehashing

需要的size为当前used * 2,即为8。调用dictExpand之后的结构:

 1 /*
 2                                                        +----+
 3                                                     +->|K1|V|->NULL
 4                                    +->+----------+ /   +----+
 5                                    |  |dictEntry*|/    +----+
 6                                    |  +----------+ +-->|K2|V|->NULL
 7                                    |  |dictEntry*|/    +----+
 8  +------------+    /+-----------+  |  +----------+
 9  |dictType*   |   / |dictEntry**|--+  |dictEntry*|\    +----+
10  +------------+  /  +-----------+     +----------+ +-->|K3|V|->NULL
11  |privdata    | /   |size=4     |     |dictEntry*|\    +----+
12  +------------+/    +-----------+     +----------+ \   +----+
13  |ht[2]       |     |sizemask=3 |                   +->|K4|V|->NULL
14  +------------+\    +-----------+                      +----+
15  |rehashidx=0 | \   |used=4     |
16  +------------+  \  +-----------+
17  |iterators=0 |   \
18  +------------+    \+-----------+  +->+----------+
19                     |dictEntry**|--+  |dictEntry*|->NULL
20                     +-----------+     +----------+
21                     |size=8     |     |dictEntry*|->NULL
22                     +-----------+     +----------+
23                     |sizemask=7 |     |dictEntry*|->NULL
24                     +-----------+     +----------+
25                     |used=0     |     |dictEntry*|->NULL
26                     +-----------+     +----------+
27                                       |dictEntry*|->NULL
28                                       +----------+
29                                       |dictEntry*|->NULL
30                                       +----------+
31                                       |dictEntry*|->NULL
32                                       +----------+
33                                       |dictEntry*|->NULL
34                                       +----------+
35 */

  经过_dictExpandIfNeeded可以知道是否需要进行rehash操作,如果需要的话,再通过dictExpand函数,就可以得到合适大小的哈希表,并且该函数还会将rehashing设置为0,这样dictrehash函数就可以根据rehashing操作

 1 /**
 2  * Performs N steps of incremental rehashing. Returns 1 if there are still
 3  * keys to move from the old to the new hash table, otherwise 0 is returned.
 4  *
 5  * Note that a rehashing step consists in moving a bucket (that may have more
 6  * than one key as we use chaining) from the old to the new hash table, however
 7  * since part of the hash table may be composed of empty spaces, it is not
 8  * guaranteed that this function will rehash even a single bucket, since it
 9  * will visit at max N*10 empty buckets in total, otherwise the amount of
10  * work it does would be unbound and the function may block for a long time.
11  *
12  * 实现持续的重新哈希,如果还有需要重新哈希的key,返回1,否则返回0
13  *
14  * 需要注意的是,rehash持续将bucket从老的哈希表移到新的哈希表,但是,因为有的哈希表是空的,
15  * 因此函数不能保证即使一个bucket也会被rehash,因为函数最多一共会访问N*10个空bucket,不然的话,
16  * 函数将会耗费过多性能,而且函数会被阻塞一段时间
17  */
18 int dictRehash(dict *d, int n) {
19     int empty_visits = n*10; /* Max number of empty buckets to visit. */
20     if (!dictIsRehashing(d)) return 0;
21 
22     while(n-- && d->ht[0].used != 0) {
23         dictEntry *de, *nextde;
24 
25         /* Note that rehashidx can't overflow as we are sure there are more
26          * elements because ht[0].used != 0 */
27         assert(d->ht[0].size > (unsigned long)d->rehashidx);
28 
29         /* 找到非空的哈希表下标 */
30         while(d->ht[0].table[d->rehashidx] == NULL) {
31             d->rehashidx++;
32             /**
33              * rehashing时允许最多跳过10n的空bucket,就要退出流程
34              */
35             if (--empty_visits == 0)
36                 return 1;
37         }
38         de = d->ht[0].table[d->rehashidx];
39         /**
40          * Move all the keys in this bucket from the old to the new hash HT
41          * 实现将bucket从老的哈希表移到新的哈希表
42          */
43         while(de) {
44             uint64_t h;
45 
46             nextde = de->next;
47             /* Get the index in the new hash table 获取哈希值*/
48             h = dictHashKey(d, de->key) & d->ht[1].sizemask;
49             de->next = d->ht[1].table[h];
50             d->ht[1].table[h] = de;
51             d->ht[0].used--;
52             d->ht[1].used++;
53             de = nextde;
54         }
55         d->ht[0].table[d->rehashidx] = NULL;
56         d->rehashidx++;
57     }
58 
59     /**
60      * Check if we already rehashed the whole table...
61      * 当ht[0]->used为0时,认为ht[0]的所有dictEntry已经移至ht[1],此时return 0,
62      * 否则 return 1,告诉调用者,还需要继续进行rehashing操作.
63      */
64     if (d->ht[0].used == 0) {
65         /**
66          * 此时ht[0]->used为0,释放原ht[0]的hash table,把ht[1]赋值给ht[0],并设置ht[1] = NULL,
67          * 最后重置rehashidx=-1,rehashing操作结束
68          */
69         zfree(d->ht[0].table);
70         d->ht[0] = d->ht[1];
71         _dictReset(&d->ht[1]);
72         d->rehashidx = -1;
73         return 0;
74     }
75 
76     /* More to rehash... */
77     return 1;
78 }

  rehashing操作将会把ht[0]里,rehashidx的值对应的bucket下的所有dictEntry,移至ht[1],之后对rehashidx进行自增处理。当ht[0]->used为0时,认为ht[0]的所有dictEntry已经移至ht[1],此时return 0,否则 return 1,告诉调用者,还需要继续进行rehashing操作。同时,rehashing时允许最多跳过10n的空bucket,否则,就要退出流程,返回1,。假设传入的n=1,即只进行一次rehashing操作,转换至完成之后的结构:

 1 /*
 2 
 3                                                     +->NULL
 4                                    +->+----------+ /
 5                                    |  |dictEntry*|/    +----+
 6                                    |  +----------+ +-->|K2|V|->NULL
 7                                    |  |dictEntry*|/    +----+
 8  +------------+    /+-----------+  |  +----------+
 9  |dictType*   |   / |dictEntry**|--+  |dictEntry*|\    +----+
10  +------------+  /  +-----------+     +----------+ +-->|K3|V|->NULL
11  |privdata    | /   |size=4     |     |dictEntry*|\    +----+
12  +------------+/    +-----------+     +----------+ \   +----+
13  |ht[2]       |     |sizemask=3 |                   +->|K4|V|->NULL
14  +------------+\    +-----------+                      +----+
15  |rehashidx=1 | \   |used=3     |
16  +------------+  \  +-----------+
17  |iterators=0 |   \
18  +------------+    \+-----------+  +->+----------+   +----+
19                     |dictEntry**|--+  |dictEntry*|-->|K1|V|->NULL
20                     +-----------+     +----------+   +----+
21                     |size=8     |     |dictEntry*|->NULL
22                     +-----------+     +----------+
23                     |sizemask=7 |     |dictEntry*|->NULL
24                     +-----------+     +----------+
25                     |used=1     |     |dictEntry*|->NULL
26                     +-----------+     +----------+
27                                       |dictEntry*|->NULL
28                                       +----------+
29                                       |dictEntry*|->NULL
30                                       +----------+
31                                       |dictEntry*|->NULL
32                                       +----------+
33                                       |dictEntry*|->NULL
34                                       +----------+
35 */

所有节点移完时

 1 /*
 2 
 3 
 4                                    +->+----------+
 5                                    |  |dictEntry*|->NULL
 6                                    |  +----------+
 7                                    |  |dictEntry*|->NULL
 8  +------------+    /+-----------+  |  +----------+
 9  |dictType*   |   / |dictEntry**|--+  |dictEntry*|->NULL
10  +------------+  /  +-----------+     +----------+
11  |privdata    | /   |size=4     |     |dictEntry*|->NULL
12  +------------+/    +-----------+     +----------+
13  |ht[2]       |     |sizemask=3 |
14  +------------+\    +-----------+
15  |rehashidx=4 | \   |used=0     |
16  +------------+  \  +-----------+
17  |iterators=0 |   \
18  +------------+    \+-----------+  +->+----------+   +----+
19                     |dictEntry**|--+  |dictEntry*|-->|K1|V|->NULL
20                     +-----------+     +----------+   +----+
21                     |size=8     |     |dictEntry*|->NULL
22                     +-----------+     +----------+   +----+
23                     |sizemask=7 |     |dictEntry*|-->|K3|V|->NULL
24                     +-----------+     +----------+   +----+
25                     |used=4     |     |dictEntry*|->NULL
26                     +-----------+     +----------+
27                                       |dictEntry*|->NULL
28                                       +----------+   +----+
29                                       |dictEntry*|-->|K2|V|->NULL
30                                       +----------+   +----+
31                                       |dictEntry*|->NULL
32                                       +----------+   +----+
33                                       |dictEntry*|-->|K4|V|->NULL
34                                       +----------+   +----+
35 */

此时ht[0]->used为0,释放原ht[0]的hash table,把ht[1]赋值给ht[0],并设置ht[1] = NULL,最后重置rehashidx=-1,rehashing操作结束

 1 /*
 2  +------------+    /+-----------+   +-->+----------+   +----+
 3  |dictType*   |   / |dictEntry**|---+   |dictEntry*|-->|K1|V|->NULL
 4  +------------+  /  +-----------+       +----------+   +----+
 5  |privdata    | /   |size=8     |       |dictEntry*|->NULL
 6  +------------+/    +-----------+       +----------+   +----+
 7  |ht[2]       |     |sizemask=7 |       |dictEntry*|-->|K3|V|->NULL
 8  +------------+\    +-----------+       +----------+   +----+
 9  |rehashidx=-1| \   |used=4     |       |dictEntry*|->NULL
10  +------------+  \  +-----------+       +----------+
11  |iterators=0 |   \                     |dictEntry*|->NULL
12  +------------+    \+-----------+       +----------+   +----+
13                     |dictEntry**|->NULL |dictEntry*|-->|K2|V|->NULL
14                     +-----------+       +----------+   +----+
15                     |size=0     |       |dictEntry*|->NULL
16                     +-----------+       +----------+   +----+
17                     |sizemask=0 |       |dictEntry*|-->|K4|V|->NULL
18                     +-----------+       +----------+   +----+
19                     |used=0     |
20                     +-----------+
21 */

3.4 rehashing操作的触发共有两种方式

3.4.1 定时操作

 1 /**
 2  * 返回当前时间,单位:毫秒
 3  */
 4 long long timeInMilliseconds(void) {
 5     struct timeval tv;
 6 
 7     gettimeofday(&tv,NULL);
 8     return (((long long)tv.tv_sec)*1000)+(tv.tv_usec/1000);
 9 }
10 
11 /**
12  * Rehash for an amount of time between ms milliseconds and ms+1 milliseconds 
13  * 定时执行rehash,定时时间是1毫秒
14  */
15 int dictRehashMilliseconds(dict *d, int ms) {
16     //获取当前的时间,单位是毫秒
17     long long start = timeInMilliseconds();
18     int rehashes = 0;
19 
20     while(dictRehash(d,100)) {
21         rehashes += 100;
22         if (timeInMilliseconds()-start > ms) break;
23     }
24     return rehashes;
25 }

外部传入一个毫秒时间(实际上就是1ms),在这时间内循环执行rehashing,每次执行100次。

3.4.2 操作时触发

 1 /* This function performs just a step of rehashing, and only if there are
 2  * no safe iterators bound to our hash table. When we have iterators in the
 3  * middle of a rehashing we can't mess with the two hash tables otherwise
 4  * some element can be missed or duplicated.
 5  *
 6  * This function is called by common lookup or update operations in the
 7  * dictionary so that the hash table automatically migrates from H1 to H2
 8  * while it is actively used.
 9  * 在插入、删除、查找等操作时,顺带执行一次rehashing操作。
10  * 值得注意的是,如果存在安全的迭代器,即d->iterators != 0,则不会进行rehashing操作
11  * */
12 static void _dictRehashStep(dict *d) {
13     if (d->iterators == 0) dictRehash(d,1);
14 }

四、插入

4.1 获取出入位置

获取可插入新节点的bucket idx的方法

 1 /* Returns the index of a free slot that can be populated with
 2  * a hash entry for the given 'key'.
 3  * If the key already exists, -1 is returned
 4  * and the optional output parameter may be filled.
 5  *
 6  * Note that if we are in the process of rehashing the hash table, the
 7  * index is always returned in the context of the second (new) hash table.
 8  * 获取可插入新节点的bucket idx
 9  *
10  * 此方法在进行查找idx之前,先进行一次判断,是否需要rehashing操作。而后进行查找。
11  * idx的值就是通过hash函数计算出来的hash_value与sizemask做位运算的结果,然后遍历此idx对应的bucket,
12  * 若已存在相同的key,则认为不可插入,并把对应的dictEntry用传入的二级指针的方式传出,供调用者使用。
13  * 若不存在,则需要判断是否正在进行rehashing操作。若在,则会对ht[1]做一次相同的操作。最终可以得到一个idx值,
14  * 或传出一个dictEntry。
15  *
16  * 由于rehashing期间,将会把ht[0]的所有dictEntry依次转移至ht[1],
17  * 为了防止新插入的dictEntry落到ht[0]已完成rehashing操作的bucket上,在rehashing期间,
18  * 返回的可插入的idx一定是属于ht[1]的。
19  */
20 static long _dictKeyIndex(dict *d, const void *key, uint64_t hash, dictEntry **existing)
21 {
22     unsigned long idx, table;
23     dictEntry *he;
24     if (existing) *existing = NULL;
25 
26     /* Expand the hash table if needed 假设有须要。对字典进行扩展*/
27     if (_dictExpandIfNeeded(d) == DICT_ERR)
28         return -1;
29     // 在两个哈希表(ht[0]、ht[1])中进行查找给定 key
30     for (table = 0; table <= 1; table++) {
31         /**
32          * 依据哈希值和哈希表的 sizemask
33          * 计算出 key 可能出如今 table 数组中的哪个索引
34          */
35         idx = hash & d->ht[table].sizemask;
36         /* Search if this slot does not already contain the given key */
37         he = d->ht[table].table[idx];
38 
39         /**
40          * 在节点链表里查找给定 key
41          * 由于链表的元素数量通常为 1 或者是一个非常小的比率
42          * 所以能够将这个操作看作 O(1) 来处理
43          */
44         while(he) {
45             // key 已经存在
46             if (key==he->key || dictCompareKeys(d, key, he->key)) {
47                 if (existing) *existing = he;
48                 return -1;
49             }
50             he = he->next;
51         }
52         /**
53          * 第一次进行执行到这里时,说明已经查找完 d->ht[0] 了
54          * 这时假设哈希表不在 rehash 其中。就没有必要查找 d->ht[1]
55          */
56         if (!dictIsRehashing(d)) break;
57     }
58     return idx;
59 }

4.2 插入函数

 1 /**
 2  * Low level add or find:底层的添加和查找函数
 3  * This function adds the entry but instead of setting a value returns the
 4  * dictEntry structure to the user, that will make sure to fill the value
 5  * field as he wishes.
 6  *
 7  * This function is also directly exposed to the user API to be called
 8  * mainly in order to store non-pointers inside the hash value, example:
 9  *
10  * entry = dictAddRaw(dict,mykey,NULL);
11  * if (entry != NULL) dictSetSignedIntegerVal(entry,1000);
12  *
13  * Return values:
14  *
15  * If key already exists NULL is returned, and "*existing" is populated
16  * with the existing entry if existing is not NULL.
17  *
18  * If key was added, the hash entry is returned to be manipulated by the caller.
19  *
20  * 函数增加一个元素到entry,函数保证将值放到调用者想要放的位置,而不是仅仅设置一个值然后返回
21  * 函数会直接暴露API给用户调用,主要为了保存空指针而不是哈希值,比如:
22  * entry = dictAddRaw(dict,mykey,NULL);
23  * if (entry != NULL) dictSetSignedIntegerVal(entry,1000);
24  *
25  * 若不存在相同key,则插入,否则,传出dictEntry的指针。插入时,由于没有记录每个dictEntry链表的尾指针,
26  * 所以使用头插法,可以节约插入时的时间消耗。
27  */
28 dictEntry *dictAddRaw(dict *d, void *key, dictEntry **existing)
29 {
30     long index;
31     dictEntry *entry;
32     dictht *ht;
33 
34     // 如果正在rehash,顺带执行rehash操作
35     if (dictIsRehashing(d))
36         _dictRehashStep(d);
37 
38     /**
39      * Get the index of the new element, or -1 if
40      * the element already exists.
41      * 获取新元素的下标,如果已经存在,返回-1
42      */
43     if ((index = _dictKeyIndex(d, key, dictHashKey(d,key), existing)) == -1)
44         return NULL;
45 
46     /* Allocate the memory and store the new entry.
47      * Insert the element in top, with the assumption that in a database
48      * system it is more likely that recently added entries are accessed
49      * more frequently.
50      * 如果正在进行rehash操作,返回ht[1],否则返回ht[0]
51      * */
52     ht = dictIsRehashing(d) ? &d->ht[1] : &d->ht[0];
53     entry = zmalloc(sizeof(*entry));
54     //插入头部
55     entry->next = ht->table[index];
56     //更改头部节点
57     ht->table[index] = entry;
58     //节点增加
59     ht->used++;
60 
61     /* Set the hash entry fields. 设置entry中的key*/
62     dictSetKey(d, entry, key);
63     //返回插入元素对应的哈希节点
64     return entry;
65 }

若不存在相同key,则插入,否则,传出dictEntry的指针。插入时,由于没有记录每个dictEntry链表的尾指针,所以使用头插法,可以节约插入时的时间消耗

4.3 底层插入函数

dictAddRaw做为最终插入的方法,被多个方法所调用:

 1 /**
 2  * Add an element to the target hash table
 3  * 向目标哈希表中给添加一个元素
 4  * 若不存在,则插入,否则,报错
 5  */
 6 int dictAdd(dict *d, void *key, void *val)
 7 {
 8     dictEntry *entry = dictAddRaw(d,key,NULL);
 9 
10     if (!entry) return DICT_ERR;
11     dictSetVal(d, entry, val);
12     return DICT_OK;
13 }
14 
15 /**
16  * Add or Overwrite:
17  * Add an element, discarding the old value if the key already exists.
18  * Return 1 if the key was added from scratch, 0 if there was already an
19  * element with such key and dictReplace() just performed a value update
20  * operation.
21  * 若存在,则替换value,否则插入
22  */
23 int dictReplace(dict *d, void *key, void *val)
24 {
25     dictEntry *entry, *existing, auxentry;
26 
27     /**
28      * Try to add the element. If the key
29      * does not exists dictAdd will succeed.
30      * 如果添加成功,dictAddRaw返回非空值
31      */
32     entry = dictAddRaw(d,key,&existing);
33     if (entry) {
34         dictSetVal(d, entry, val);
35         return 1;
36     }
37 
38     /**
39      * Set the new value and free the old one. Note that it is important
40      * to do that in this order, as the value may just be exactly the same
41      * as the previous one. In this context, think to reference counting,
42      * you want to increment (set), and then decrement (free), and not the
43      * reverse.
44      * 设置新值,释放旧值,这个顺序很重要,因为值可能是与原来一样的
45      * 在这个上下文里,考虑引用计数,我们希望的是先加再减,而不是反过来
46      */
47     auxentry = *existing;
48     dictSetVal(d, existing, val);
49     dictFreeVal(d, &auxentry);
50     return 0;
51 }
52 
53 /* Add or Find:
54  * dictAddOrFind() is simply a version of dictAddRaw() that always
55  * returns the hash entry of the specified key, even if the key already
56  * exists and can't be added (in that case the entry of the already
57  * existing key is returned.)
58  *
59  * See dictAddRaw() for more information.
60  * 若存在,则返回对应dictEntry,否则插入后返回新的dictEntry
61  */
62 dictEntry *dictAddOrFind(dict *d, void *key) {
63     dictEntry *entry, *existing;
64     entry = dictAddRaw(d,key,&existing);
65     return entry ? entry : existing;
66 }

4.4 插入过程

对于一个刚刚create的dict:

 1 /*
 2 
 3 +------------+    /+-----------+
 4 |dictType*   |   / |dictEntry**|-->NULL
 5 +------------+  /  +-----------+
 6 |privdata    | /   |size=0     |
 7 +------------+/    +-----------+
 8 |ht[2]       |     |sizemask=0 |
 9 +------------+\    +-----------+
10 |rehashidx=-1| \   |used=0     |
11 +------------+  \  +-----------+
12 |iterators=0 |   \
13 +------------+    \+-----------+
14                    |dictEntry**|-->NULL
15                    +-----------+
16                    |size=0     |
17                    +-----------+
18                    |sizemask=0 |
19                    +-----------+
20                    |used=0     |
21                    +-----------+
22 */

假设K1、K2、K3、K4计算出来的hash值分别为0、5、2、7,使用sizemask计算出来的idx分别为0、1、2、3

4.4.1 插入K1

现调用dictAdd方法进行插入

执行完dictAddRaw中的_dictKeyIndex里的_dictExpandIfNeeded:

 1 /*
 2 
 3                                                    +-->NULL
 4 +------------+    /+-----------+  +->+----------+ /
 5 |dictType*   |   / |dictEntry**|--+  |dictEntry*|/
 6 +------------+  /  +-----------+     +----------+ +--->NULL
 7 |privdata    | /   |size=4     |     |dictEntry*|/
 8 +------------+/    +-----------+     +----------+
 9 |ht[2]       |     |sizemask=3 |     |dictEntry*|\
10 +------------+\    +-----------+     +----------+ +--->NULL
11 |rehashidx=-1| \   |used=0     |     |dictEntry*|\
12 +------------+  \  +-----------+     +----------+ \
13 |iterators=0 |   \                                 +-->NULL
14 +------------+    \+-----------+
15                    |dictEntry**|-->NULL
16                    +-----------+
17                    |size=0     |
18                    +-----------+
19                    |sizemask=0 |
20                    +-----------+
21                    |used=0     |
22                    +-----------+
23 */

同时得到其在ht[0]的idx = 0,且不在rehashing操作中,于是直接插入

 1 /*
 2                                                       +----+
 3                                                    +->|K1|V|->NULL
 4 +------------+    /+-----------+  +->+----------+ /   +----+
 5 |dictType*   |   / |dictEntry**|--+  |dictEntry*|/
 6 +------------+  /  +-----------+     +----------+ +--->NULL
 7 |privdata    | /   |size=4     |     |dictEntry*|/
 8 +------------+/    +-----------+     +----------+
 9 |ht[2]       |     |sizemask=3 |     |dictEntry*|\
10 +------------+\    +-----------+     +----------+ +--->NULL
11 |rehashidx=-1| \   |used=1     |     |dictEntry*|\
12 +------------+  \  +-----------+     +----------+ \
13 |iterators=0 |   \                                 +-->NULL
14 +------------+    \+-----------+
15                    |dictEntry**|-->NULL
16                    +-----------+
17                    |size=0     |
18                    +-----------+
19                    |sizemask=0 |
20                    +-----------+
21                    |used=0     |
22                    +-----------+
23 */

4.4.2 依次插入K2、K3、K4后

 1 /*
 2                                                       +----+
 3                                                    +->|K1|V|->NULL
 4 +------------+    /+-----------+  +->+----------+ /   +----+
 5 |dictType*   |   / |dictEntry**|--+  |dictEntry*|/    +-----
 6 +------------+  /  +-----------+     +----------+ +-->|K2|V|->NULL
 7 |privdata    | /   |size=4     |     |dictEntry*|/    +----+
 8 +------------+/    +-----------+     +----------+
 9 |ht[2]       |     |sizemask=3 |     |dictEntry*|\    +----+
10 +------------+\    +-----------+     +----------+ +-->|K3|V|->NULL
11 |rehashidx=-1| \   |used=4     |     |dictEntry*|\    +----+
12 +------------+  \  +-----------+     +----------+ \   +----+
13 |iterators=0 |   \                                 +->|K4|V|->NULL
14 +------------+    \+-----------+                      +----+
15                    |dictEntry**|-->NULL
16                    +-----------+
17                    |size=0     |
18                    +-----------+
19                    |sizemask=0 |
20                    +-----------+
21                    |used=0     |
22                    +-----------+
23 */

4.4.3 此时若有一个K5

计算出来的hash值为8,则:

i.因此刻不在rehashing操作,所以不用做处理

ii.执行完dictAddRaw中的_dictKeyIndex里的_dictExpandIfNeeded:

 1 /*
 2                                                        +----+
 3                                                     +->|K1|V|->NULL
 4                                    +->+----------+ /   +----+
 5                                    |  |dictEntry*|/    +----+
 6                                    |  +----------+ +-->|K2|V|->NULL
 7                                    |  |dictEntry*|/    +----+
 8  +------------+    /+-----------+  |  +----------+
 9  |dictType*   |   / |dictEntry**|--+  |dictEntry*|\    +----+
10  +------------+  /  +-----------+     +----------+ +-->|K3|V|->NULL
11  |privdata    | /   |size=4     |     |dictEntry*|\    +----+
12  +------------+/    +-----------+     +----------+ \   +----+
13  |ht[2]       |     |sizemask=3 |                   +->|K4|V|->NULL
14  +------------+\    +-----------+                      +----+
15  |rehashidx=0 | \   |used=4     |
16  +------------+  \  +-----------+
17  |iterators=0 |   \
18  +------------+    \+-----------+  +->+----------+
19                     |dictEntry**|--+  |dictEntry*|->NULL
20                     +-----------+     +----------+
21                     |size=8     |     |dictEntry*|->NULL
22                     +-----------+     +----------+
23                     |sizemask=7 |     |dictEntry*|->NULL
24                     +-----------+     +----------+
25                     |used=0     |     |dictEntry*|->NULL
26                     +-----------+     +----------+
27                                       |dictEntry*|->NULL
28                                       +----------+
29                                       |dictEntry*|->NULL
30                                       +----------+
31                                       |dictEntry*|->NULL
32                                       +----------+
33                                       |dictEntry*|->NULL
34                                       +----------+
35 */

同时得到其在ht[1]的idx=0

iii.插入,因为此时已经满足的了扩容的条件,所以正在处于rehashing过程中,所以将元素插入ht[1]对应的哈希表。通常情况下,如果不是处于rehashing过程中,就会将元素插入ht[0]对应的哈希表

 1 /*
 2                                                        +----+
 3                                                     +->|K1|V|->NULL
 4                                    +->+----------+ /   +----+
 5                                    |  |dictEntry*|/    +----+
 6                                    |  +----------+ +-->|K2|V|->NULL
 7                                    |  |dictEntry*|/    +----+
 8  +------------+    /+-----------+  |  +----------+
 9  |dictType*   |   / |dictEntry**|--+  |dictEntry*|\    +----+
10  +------------+  /  +-----------+     +----------+ +-->|K3|V|->NULL
11  |privdata    | /   |size=4     |     |dictEntry*|\    +----+
12  +------------+/    +-----------+     +----------+ \   +----+
13  |ht[2]       |     |sizemask=3 |                   +->|K4|V|->NULL
14  +------------+\    +-----------+                      +----+
15  |rehashidx=0 | \   |used=4     |
16  +------------+  \  +-----------+
17  |iterators=0 |   \
18  +------------+    \+-----------+  +->+----------+   +----+
19                     |dictEntry**|--+  |dictEntry*|-->|K5|V|->NULL
20                     +-----------+     +----------+   +----+
21                     |size=8     |     |dictEntry*|->NULL
22                     +-----------+     +----------+
23                     |sizemask=7 |     |dictEntry*|->NULL
24                     +-----------+     +----------+
25                     |used=1     |     |dictEntry*|->NULL
26                     +-----------+     +----------+
27                                       |dictEntry*|->NULL
28                                       +----------+
29                                       |dictEntry*|->NULL
30                                       +----------+
31                                       |dictEntry*|->NULL
32                                       +----------+
33                                       |dictEntry*|->NULL
34                                       +----------+
35 */

4.4.4 此时若有一个K6

计算出来的hash值为16,则:

i.此时已处理rehashing操作,执行一步,将h[0]中的k1|v移入h[1]中:

 1 /*
 2 
 3                                                     +-->NULL
 4                                    +->+----------+ /
 5                                    |  |dictEntry*|/    +----+
 6                                    |  +----------+ +-->|K2|V|->NULL
 7                                    |  |dictEntry*|/    +----+
 8  +------------+    /+-----------+  |  +----------+
 9  |dictType*   |   / |dictEntry**|--+  |dictEntry*|\    +----+
10  +------------+  /  +-----------+     +----------+ +-->|K3|V|->NULL
11  |privdata    | /   |size=4     |     |dictEntry*|\    +----+
12  +------------+/    +-----------+     +----------+ \   +----+
13  |ht[2]       |     |sizemask=3 |                   +->|K4|V|->NULL
14  +------------+\    +-----------+                      +----+
15  |rehashidx=1 | \   |used=3     |
16  +------------+  \  +-----------+
17  |iterators=0 |   \
18  +------------+    \+-----------+  +->+----------+   +----+  +----+
19                     |dictEntry**|--+  |dictEntry*|-->|K1|V|->|K5|V|->NULL
20                     +-----------+     +----------+   +----+  +----+
21                     |size=8     |     |dictEntry*|->NULL
22                     +-----------+     +----------+
23                     |sizemask=7 |     |dictEntry*|->NULL
24                     +-----------+     +----------+
25                     |used=2     |     |dictEntry*|->NULL
26                     +-----------+     +----------+
27                                       |dictEntry*|->NULL
28                                       +----------+
29                                       |dictEntry*|->NULL
30                                       +----------+
31                                       |dictEntry*|->NULL
32                                       +----------+
33                                       |dictEntry*|->NULL
34                                       +----------+
35 */

ii.执行完dictAddRaw中的_dictKeyIndex里的_dictExpandIfNeeded,因已在进行rehashing,所以不做任何处理,只返回其在ht[1]的idx 0

iii.头插法将K6插入

 1 /*
 2 
 3                                                     +-->NULL
 4                                    +->+----------+ /
 5                                    |  |dictEntry*|/    +----+
 6                                    |  +----------+ +-->|K2|V|->NULL
 7                                    |  |dictEntry*|/    +----+
 8  +------------+    /+-----------+  |  +----------+
 9  |dictType*   |   / |dictEntry**|--+  |dictEntry*|\    +----+
10  +------------+  /  +-----------+     +----------+ +-->|K3|V|->NULL
11  |privdata    | /   |size=4     |     |dictEntry*|\    +----+
12  +------------+/    +-----------+     +----------+ \   +----+
13  |ht[2]       |     |sizemask=3 |                   +->|K4|V|->NULL
14  +------------+\    +-----------+                      +----+
15  |rehashidx=1 | \   |used=3     |
16  +------------+  \  +-----------+
17  |iterators=0 |   \
18  +------------+    \+-----------+  +->+----------+   +----+  +----+  +----+
19                     |dictEntry**|--+  |dictEntry*|-->|K6|V|->|K1|V|->|K5|V|->NULL
20                     +-----------+     +----------+   +----+  +----+  +----+
21                     |size=8     |     |dictEntry*|->NULL
22                     +-----------+     +----------+
23                     |sizemask=7 |     |dictEntry*|->NULL
24                     +-----------+     +----------+
25                     |used=3     |     |dictEntry*|->NULL
26                     +-----------+     +----------+
27                                       |dictEntry*|->NULL
28                                       +----------+
29                                       |dictEntry*|->NULL
30                                       +----------+
31                                       |dictEntry*|->NULL
32                                       +----------+
33                                       |dictEntry*|->NULL
34                                       +----------+
35 */

以上为正常插入时的情况,key已存在,或是调用另外两个方法的情况与之大同小异,有时间的时候再补充。

五、查找 

5.1 dicFind函数

返回找到的dictEntry

 1 /**
 2  * 若在rehashing期间,则执行一次。首先在ht[0]里查找,计算出hash值对应ht[0]的idx,取得其bucket,
 3  * 然后遍历之,找到与指定key相同的dictEntry。若ht[0]中找不到指定的key,且正在进行rehashing操作,
 4  * 则去ht[1]以相同方式也查找一次。
 5  */
 6 dictEntry *dictFind(dict *d, const void *key)
 7 {
 8     dictEntry *he;
 9     uint64_t h, idx, table;
10 
11     if (d->ht[0].used + d->ht[1].used == 0) return NULL; /* dict is empty 如果哈希表为空直接返回NULL*/
12     if (dictIsRehashing(d)) _dictRehashStep(d); //是否处于rehashing操作中
13     h = dictHashKey(d, key);
14     for (table = 0; table <= 1; table++) {
15         idx = h & d->ht[table].sizemask;
16         he = d->ht[table].table[idx];
17         while(he) {
18             if (key==he->key || dictCompareKeys(d, key, he->key))
19                 return he;
20             he = he->next;
21         }
22         /**
23          * 如果不是处于rehashing操作中,就不需要遍历ht[1],因为rehashing结束后,会将整个h[1]赋值给h[0],
24          * 然后将h[1]置为NULL,也就是h[1]不再存在任何人元素
25          */
26         if (!dictIsRehashing(d)) return NULL;
27     }
28     return NULL;
29 }

5.2 dictFetchValue函数

redis额外提供一个,根据key只获取其value的方法,返回找的value值。

1 /**
2  * redis额外提供一个,根据key只获取其value的方法:key不存在时返回NULL,实际上调用的还是dictFind
3  */
4 void *dictFetchValue(dict *d, const void *key) {
5     dictEntry *he;
6 
7     he = dictFind(d,key);
8     return he ? dictGetVal(he) : NULL;
9 }

六、删除

6.1 底层删除函数

 1 /**
 2  * Search and remove an element. This is an helper function for
 3  * dictDelete() and dictUnlink(), please check the top comment
 4  * of those functions.
 5  * 查找方式与dictFind相同。找到之后,由调用者指定是否要销毁此dictEntry,
 6  * 若不销毁,则要把对应指针传出来,给外部使用。
 7  */
 8 static dictEntry *dictGenericDelete(dict *d, const void *key, int nofree) {
 9     uint64_t h, idx;
10     dictEntry *he, *prevHe;
11     int table;
12 
13     if (d->ht[0].used == 0 && d->ht[1].used == 0) return NULL;
14 
15     if (dictIsRehashing(d)) _dictRehashStep(d);
16     h = dictHashKey(d, key);
17 
18     for (table = 0; table <= 1; table++) {
19         idx = h & d->ht[table].sizemask;
20         he = d->ht[table].table[idx];
21         prevHe = NULL;
22         while(he) {
23             if (key==he->key || dictCompareKeys(d, key, he->key)) {
24                 /* Unlink the element from the list */
25                 if (prevHe)
26                     prevHe->next = he->next;
27                 else
28                     d->ht[table].table[idx] = he->next;
29                 if (!nofree) {
30                     //需要销毁
31                     dictFreeKey(d, he);
32                     dictFreeVal(d, he);
33                     zfree(he);
34                 }
35                 /**
36                  * 为什么无论是否释放该元素都需要将元素个数减少1,因为查找到的元素是一定会被删除的元素,
37                  * 即使在这里不进行释放,后续也会调用dictFreeUnlinkedEntry函数将其释放掉,之所以右这样的一个操作,
38                  * 是因为某些情况下,在释放该元素之前,可能还会利用该元素进行一些必要的操作,
39                  * 所以通过这样的一种特殊删除方式来满足这种需求。
40                  */
41                 d->ht[table].used--;
42                 return he;
43             }
44             prevHe = he;
45             he = he->next;
46         }
47         if (!dictIsRehashing(d)) break;
48     }
49     return NULL; /* not found */
50 }

查找方式与dictFind相同。找到之后,由调用者指定是否要销毁此dictEntry,若不销毁,则要把对应指针传出来,给外部使用

6.2 上层删除函数

6.1中的方法被两个接口所调用:

 1 /**
 2  * Remove an element, returning DICT_OK on success or DICT_ERR if the
 3  * element was not found.
 4  * 删除一个元素,如果成功删除返回DICT_OK,如果不存在,返回DICT_ERR
 5  */
 6 int dictDelete(dict *ht, const void *key) {
 7     return dictGenericDelete(ht,key,0) ? DICT_OK : DICT_ERR;
 8 }
 9 
10 /* Remove an element from the table, but without actually releasing
11  * the key, value and dictionary entry. The dictionary entry is returned
12  * if the element was found (and unlinked from the table), and the user
13  * should later call `dictFreeUnlinkedEntry()` with it in order to release it.
14  * Otherwise if the key is not found, NULL is returned.
15  *
16  * This function is useful when we want to remove something from the hash
17  * table but want to use its value before actually deleting the entry.
18  * Without this function the pattern would require two lookups:
19  *
20  *  entry = dictFind(...);
21  *  // Do something with entry
22  *  dictDelete(dictionary,entry);
23  *
24  * Thanks to this function it is possible to avoid this, and use
25  * instead:
26  *
27  * entry = dictUnlink(dictionary,entry);
28  * // Do something with entry
29  * dictFreeUnlinkedEntry(entry); // <- This does not need to lookup again.
30  *
31  * dictDelete就不用多说了,直接删除对应dictEntry。关于为什么需要dictUnlink,源码的注释上写道,
32  * 如果有某种操作,需要先查找指定key对应的dictEntry,然后对其做点操作,接着就直接删除,在没有dictUnlink的时候,
33  * 需要这样:
34  *      1 entry = dictFind(...);
35  *      2 // Do something with entry
36  *      3 dictDelete(dictionary,entry);
37  * 实际需要查找两次。而在有dictUnlink的情况下:
38  *      1 entry = dictUnlink(dictionary,entry);
39  *      2 // Do something with entry
40  *      3 dictFreeUnlinkedEntry(entry);
41  * 只需要一次查找,配合专门的删除操作,即可。
42  */
43 dictEntry *dictUnlink(dict *ht, const void *key) {
44     return dictGenericDelete(ht,key,1);
45 }
46 
47 /**
48  * You need to call this function to really free the entry after a call
49  * to dictUnlink(). It's safe to call this function with 'he' = NULL.
50  * 配合dictUnlink函数,对dictUnlink的返回结果进行释放
51  */
52 void dictFreeUnlinkedEntry(dict *d, dictEntry *he) {
53     if (he == NULL) return;
54     dictFreeKey(d, he);
55     dictFreeVal(d, he);
56     zfree(he);
57 }

七、销毁

清空一个hash table的方法

 1 /**
 2  * Destroy an entire dictionary
 3  * 销毁整个哈希表
 4  * 两层循环,分别遍历所有bucket与单bucket里所有dictEntry进行释放。
 5  * 关于这里的 (i&65535) == 0的判断,_dictClear方法仅在相同文件的方法dictEmpty与dictRelease调用
 6  */
 7 int _dictClear(dict *d, dictht *ht, void(callback)(void *)) {
 8     unsigned long i;
 9 
10     /* Free all the elements */
11     for (i = 0; i < ht->size && ht->used > 0; i++) {
12         dictEntry *he, *nextHe;
13 
14         if (callback && (i & 65535) == 0) callback(d->privdata);
15 
16         if ((he = ht->table[i]) == NULL) continue;
17         while(he) {
18             nextHe = he->next;
19             dictFreeKey(d, he);
20             dictFreeVal(d, he);
21             zfree(he);
22             ht->used--;
23             he = nextHe;
24         }
25     }
26     /* Free the table and the allocated cache structure */
27     zfree(ht->table);
28     /* Re-initialize the table */
29     _dictReset(ht);
30     return DICT_OK; /* never fails */
31 }

用户使用的API函数dictEmpty与dictRelease

 1 /**
 2  * 迭代器销毁
 3  * 与首次执行next操作相对应,若为safe的迭代器,要给dict的计算减1,否则要校验期间dict的指纹是否发生了变化。
 4  */
 5 void dictReleaseIterator(dictIterator *iter)
 6 {
 7     if (!(iter->index == -1 && iter->table == 0)) {
 8         if (iter->safe)
 9             iter->d->iterators--;
10         else
11             assert(iter->fingerprint == dictFingerprint(iter->d));
12     }
13     zfree(iter);
14 }
15 
16 /**
17  * 释放所有元素,将表恢复为初始化状态,也就是刚刚创建的状态
18  * 第二个参数传入的NULL
19  */
20 void dictEmpty(dict *d, void(callback)(void*)) {
21     _dictClear(d,&d->ht[0],callback);
22     _dictClear(d,&d->ht[1],callback);
23     d->rehashidx = -1;
24     d->iterators = 0;
25 }

dictRelease不用多说,传入的callback为NULL。而dictEmpty,搜索redis源码所有文件的调用,

 1 匹配到二进制文件 src/redis-check-aof
 2 src/replication.c:    dictEmpty(server.repl_scriptcache_dict,NULL);
 3 src/dict.h:void dictEmpty(dict *d, void(callback)(void*));
 4 匹配到二进制文件 src/redis-cli
 5 匹配到二进制文件 src/dict.o
 6 src/dict.c:void dictEmpty(dict *d, void(callback)(void*)) {
 7 匹配到二进制文件 src/sentinel.o
 8 src/db.c:            dictEmpty(server.db[j].dict,callback);
 9 src/db.c:            dictEmpty(server.db[j].expires,callback);
10 匹配到二进制文件 src/redis-sentinel
11 匹配到二进制文件 src/redis-check-rdb
12 匹配到二进制文件 src/replication.o
13 src/sentinel.c:    dictEmpty(server.commands,NULL);
14 匹配到二进制文件 src/db.o
15 匹配到二进制文件 src/blocked.o
16 src/blocked.c:    dictEmpty(c->bpop.keys,NULL);
17 匹配到二进制文件 src/redis-server

仅db.c里传了callback进来,对应的方法为

1 long long emptyDb(int dbnum, int flags, void(callback)(void*));

继续搜索emptyDb

 1 cluster.c:        emptyDb(-1,EMPTYDB_NO_FLAGS,NULL);
 2 匹配到二进制文件 cluster.o
 3 db.c:long long emptyDb(int dbnum, int flags, void(callback)(void*)) {
 4 db.c:            emptyDbAsync(&server.db[j]);
 5 db.c:/* Return the set of flags to use for the emptyDb() call for FLUSHALL
 6 db.c:    server.dirty += emptyDb(c->db->id,flags,NULL);
 7 db.c:    server.dirty += emptyDb(-1,flags,NULL);
 8 匹配到二进制文件 db.o
 9 debug.c:        emptyDb(-1,EMPTYDB_NO_FLAGS,NULL);
10 debug.c:        emptyDb(-1,EMPTYDB_NO_FLAGS,NULL);
11 匹配到二进制文件 debug.o
12 lazyfree.c:void emptyDbAsync(redisDb *db) {
13 匹配到二进制文件 lazyfree.o
14 匹配到二进制文件 redis-check-aof
15 匹配到二进制文件 redis-check-rdb
16 匹配到二进制文件 redis-sentinel
17 匹配到二进制文件 redis-server
18 replication.c: * data with emptyDb(), and while we load the new data received as an
19 replication.c:/* Callback used by emptyDb() while flushing away old data to load
20 replication.c:        emptyDb(
21 匹配到二进制文件 replication.o
22 server.h:long long emptyDb(int dbnum, int flags, void(callback)(void*));
23 server.h:void emptyDbAsync(redisDb *db);

真正调用的地方传入的也是NULL,并不知道为什么这样设计

八、迭代器

8.1  数据结构

 1 /**
 2  * If safe is set to 1 this is a safe iterator, that means, you can call
 3  * dictAdd, dictFind, and other functions against the dictionary even while
 4  * iterating. Otherwise it is a non safe iterator, and only dictNext()
 5  * should be called while iterating.
 6  * 如果是个安全的迭代器,即safe == 1,则在迭代中可以调用dictAdd、dictFind等方法,否则只能调用dictNext。
 7  * index表示,ht[table]对应的bucket的idx。
 8  */
 9 typedef struct dictIterator {
10     dict *d;
11     long index;
12     int table, safe;
13     dictEntry *entry, *nextEntry;
14     /* unsafe iterator fingerprint for misuse detection. */
15     long long fingerprint;
16 } dictIterator;

8.2 获取迭代器

 1 //获取普通类型迭代器,刚获取的迭代器并不指向具体哪个dictEntry
 2 dictIterator *dictGetIterator(dict *d)
 3 {
 4     dictIterator *iter = zmalloc(sizeof(*iter));
 5 
 6     iter->d = d;
 7     iter->table = 0;
 8     iter->index = -1;
 9     iter->safe = 0;
10     iter->entry = NULL;
11     iter->nextEntry = NULL;
12     return iter;
13 }
14 
15 //获取安全类型的迭代器
16 dictIterator *dictGetSafeIterator(dict *d) {
17     dictIterator *i = dictGetIterator(d);
18 
19     i->safe = 1;
20     return i;
21 }

8.3 迭代器的next操作

 1 /**
 2  * 对于一个新的迭代器,首次调用时,会根据是否安全,做不同操作。安全的迭代器会给dict里的计数器+1,
 3  * 不安全的将会记录本字典的指纹。之后会遍历ht[0],取到第一个非NULL的dictEntry。
 4  * 当ht[0]遍历完且取不到非NULL的dictEntry时,如果正在进行rehashing操作,则会去ht[1]里找。
 5  */
 6 dictEntry *dictNext(dictIterator *iter)
 7 {
 8     while (1) {
 9         if (iter->entry == NULL) {
10             dictht *ht = &iter->d->ht[iter->table];
11             if (iter->index == -1 && iter->table == 0) {
12                 if (iter->safe)
13                     iter->d->iterators++;
14                 else
15                     iter->fingerprint = dictFingerprint(iter->d);
16             }
17             iter->index++;
18             if (iter->index >= (long) ht->size) {
19                 if (dictIsRehashing(iter->d) && iter->table == 0) {
20                     iter->table++;
21                     iter->index = 0;
22                     ht = &iter->d->ht[1];
23                 } else {
24                     break;
25                 }
26             }
27             iter->entry = ht->table[iter->index];
28         } else {
29             iter->entry = iter->nextEntry;
30         }
31         if (iter->entry) {
32             /* We need to save the 'next' here, the iterator user
33              * may delete the entry we are returning. */
34             iter->nextEntry = iter->entry->next;
35             return iter->entry;
36         }
37     }
38     return NULL;
39 }

8.4 遍历过程

 1 /*
 2 
 3      +-------------------------+
 4 +----|dict *                   |
 5 |    +-------------------------+
 6 |    |long index               |
 7 |    +-------------------------+
 8 |    |int table                |
 9 |    +-------------------------+
10 |    |int safe                 |
11 |    +-------------------------+
12 |    |dictEntry *entry         |->NULL
13 |    +-------------------------+
14 |    |dictEntry *entrynextEntry|->NULL
15 |    +-------------------------+
16 |    |long long fingerprint    |
17 |    +-------------------------+
18 |
19 |
20 |
21 |                                                       +-->NULL
22 |                                      +->+----------+ /
23 |                                      |  |dictEntry*|/    +----+
24 |                                      |  +----------+ +-->|K2|V|->NULL
25 |                                      |  |dictEntry*|/    +----+
26 +--->+------------+    /+-----------+  |  +----------+
27      |dictType*   |   / |dictEntry**|--+  |dictEntry*|\    +----+
28      +------------+  /  +-----------+     +----------+ +-->|K3|V|->NULL
29      |privdata    | /   |size=4     |     |dictEntry*|\    +----+
30      +------------+/    +-----------+     +----------+ \   +----+
31      |ht[2]       |     |sizemask=3 |                   +->|K4|V|->NULL
32      +------------+\    +-----------+                      +----+
33      |rehashidx=1 | \   |used=3     |
34      +------------+  \  +-----------+
35      |iterators=0 |   \
36      +------------+    \+-----------+  +->+----------+   +----+  +----+
37                         |dictEntry**|--+  |dictEntry*|-->|K1|V|->|K5|V|->NULL
38                         +-----------+     +----------+   +----+  +----+
39                         |size=8     |     |dictEntry*|->NULL
40                         +-----------+     +----------+
41                         |sizemask=7 |     |dictEntry*|->NULL
42                         +-----------+     +----------+
43                         |used=3     |     |dictEntry*|->NULL
44                         +-----------+     +----------+
45                                           |dictEntry*|->NULL
46                                           +----------+
47                                           |dictEntry*|->NULL
48                                           +----------+
49                                           |dictEntry*|->NULL
50                                           +----------+
51                                           |dictEntry*|->NULL
52                                           +----------+
53 */

遍历顺序为,K2,K3,K4,K1,K5。

8.5 迭代器销毁

 1 /**
 2  * 迭代器销毁
 3  * 与首次执行next操作相对应,若为safe的迭代器,要给dict的计算减1,否则要校验期间dict的指纹是否发生了变化。
 4  */
 5 void dictReleaseIterator(dictIterator *iter)
 6 {
 7     if (!(iter->index == -1 && iter->table == 0)) {
 8         if (iter->safe)
 9             iter->d->iterators--;
10         else
11             assert(iter->fingerprint == dictFingerprint(iter->d));
12     }
13     zfree(iter);
14 }

8.6 普通迭代器的指纹

 1 /**
 2  * A fingerprint is a 64 bit number that represents the state of the dictionary
 3  * at a given time, it's just a few dict properties xored together.
 4  *
 5  * 指纹是一个 64 位的数字,代表字典在给定时间的状态,它只是几个字典属性异或在一起。
 6  *
 7  * When an unsafe iterator is initialized, we get the dict fingerprint, and check
 8  * the fingerprint again when the iterator is released.
 9  *
10  * 当一个不安全的迭代器被初始化时,我们得到dict指纹,当迭代器被释放时再次检查指纹。
11  *
12  * If the two fingerprints are different it means that the user of the iterator
13  * performed forbidden operations against the dictionary while iterating.
14  *
15  * 如果两个指纹不同,则表示迭代器的用户在迭代时对字典执行了禁止操作。
16  *
17  * 指纹的计算
18  *
19  * 对于不安全的迭代器,在迭代过程中,不允许执行任何修改dict的操作,是只读的,不会发生迭代器失效的问题。
20  * 对于安全的迭代器,在进行操作本节点的时候,redis中记录了当前迭代的bucket idx,以及当前dictEntry的next节点。
21  * 如果只是add操作,即使是用了头插法把新dictEntry插在本节点之前,对迭代器本身并没有影响。
22  * 如果是delete了本节点,迭代器中还记录了next节点的位置,调用next时直接取就好。
23  * 如果next为空,则可以认为当前bucket遍历完了,取下一个bucket就行了。
24  * 当然,如果在add/delete等操作的时候,进行了rehashing操作,那么当前迭代器里记录的next,在rehashing之后,
25  * 可能就不是当前节点新位置的next了。所以在使用安全迭代器的时候,禁止了rehashing操作。
26  */
27 long long dictFingerprint(dict *d) {
28     long long integers[6], hash = 0;
29     int j;
30 
31     integers[0] = (long) d->ht[0].table;
32     integers[1] = d->ht[0].size;
33     integers[2] = d->ht[0].used;
34     integers[3] = (long) d->ht[1].table;
35     integers[4] = d->ht[1].size;
36     integers[5] = d->ht[1].used;
37 
38     /* We hash N integers by summing every successive integer with the integer
39      * hashing of the previous sum. Basically:
40      *
41      * Result = hash(hash(hash(int1)+int2)+int3) ...
42      *
43      * This way the same set of integers in a different order will (likely) hash
44      * to a different number. */
45     for (j = 0; j < 6; j++) {
46         hash += integers[j];
47         /* For the hashing step we use Tomas Wang's 64 bit integer hash. */
48         hash = (~hash) + (hash << 21); // hash = (hash << 21) - hash - 1;
49         hash = hash ^ (hash >> 24);
50         hash = (hash + (hash << 3)) + (hash << 8); // hash * 265
51         hash = hash ^ (hash >> 14);
52         hash = (hash + (hash << 2)) + (hash << 4); // hash * 21
53         hash = hash ^ (hash >> 28);
54         hash = hash + (hash << 31);
55     }
56     return hash;
57 }

九、其它操作

dict还支持其它的一些操作。

9.1、随机获取一个key

  dictGetRandomKey

9.2 随机获取n个key

  dictGetSomeKeys

9.3 scan操作

  关于scan操作,redis采用了一个很巧妙的方法,保证了在开始scan时未删除的元素一定能遍历到,又能保证尽量少地重复遍历。采用了reverse binary iteration方法,也就是每次是向cursor的最高位加1,并向低位方向进位

 这就是该算法的精妙所在,使用该算法,可以做到下面两点:

 

         a:开始遍历那一刻的所有元素,只要不被删除,肯定能被遍历到,不管字典扩展还是缩小

 

         b:该算法可能会返回重复元素,但是已经把返回重复元素的可能性降到了最低;

 

参考网址:

  https://blog.csdn.net/gqtcgq/article/details/50533336

  https://github.com/redis/redis/pull/579

9.3.1 函数原型

  1 /* dictScan() is used to iterate over the elements of a dictionary.
  2  *
  3  * Iterating works the following way:
  4  * 迭代器的工作方式如下
  5  *
  6  * 1) Initially you call the function using a cursor (v) value of 0.
  7  * 初始访问位置为0
  8  * 2) The function performs one step of the iteration, and returns the
  9  *    new cursor value you must use in the next call.
 10  * 访问当前位置的元素,并返回下一次访问的cursor的位置(实际上就是二进制高位+1,然后如果必要的话向低位进位)
 11  * 3) When the returned cursor is 0, the iteration is complete.
 12  * 当返回的下一个访问位置是0,表示整个哈希表遍历结束
 13  *
 14  * The function guarantees all elements present in the
 15  * dictionary get returned between the start and end of the iteration.
 16  * However it is possible some elements get returned multiple times.
 17  * 这个函数保证了从开始遍历那一刻开始哈希表中的所有元素,只要不被删除,肯定能被遍历到,不管字典扩展还是缩小
 18  * 但是有的元素可能会被遍历多次,但是已经把返回重复元素的可能性降到了最低;
 19  *
 20  * For every element returned, the callback argument 'fn' is
 21  * called with 'privdata' as first argument and the dictionary entry
 22  * 'de' as second argument.
 23  * 对于每一个遍历到的元素,都是使用fn函数进行处理,fn的第一个参数是privdata,第二个参数是遍历到的元素de
 24  *
 25  * HOW IT WORKS.
 26  *
 27  * The iteration algorithm was designed by Pieter Noordhuis.
 28  * The main idea is to increment a cursor starting from the higher order
 29  * bits. That is, instead of incrementing the cursor normally, the bits
 30  * of the cursor are reversed, then the cursor is incremented, and finally
 31  * the bits are reversed again.
 32  *
 33  * This strategy is needed because the hash table may be resized between
 34  * iteration calls.
 35  *
 36  * dict.c hash tables are always power of two in size, and they
 37  * use chaining, so the position of an element in a given table is given
 38  * by computing the bitwise AND between Hash(key) and SIZE-1
 39  * (where SIZE-1 is always the mask that is equivalent to taking the rest
 40  *  of the division between the Hash of the key and SIZE).
 41  *
 42  * For example if the current hash table size is 16, the mask is
 43  * (in binary) 1111. The position of a key in the hash table will always be
 44  * the last four bits of the hash output, and so forth.
 45  *
 46  * WHAT HAPPENS IF THE TABLE CHANGES IN SIZE?
 47  *
 48  * If the hash table grows, elements can go anywhere in one multiple of
 49  * the old bucket: for example let's say we already iterated with
 50  * a 4 bit cursor 1100 (the mask is 1111 because hash table size = 16).
 51  *
 52  * If the hash table will be resized to 64 elements, then the new mask will
 53  * be 111111. The new buckets you obtain by substituting in ??1100
 54  * with either 0 or 1 can be targeted only by keys we already visited
 55  * when scanning the bucket 1100 in the smaller hash table.
 56  *
 57  * By iterating the higher bits first, because of the inverted counter, the
 58  * cursor does not need to restart if the table size gets bigger. It will
 59  * continue iterating using cursors without '1100' at the end, and also
 60  * without any other combination of the final 4 bits already explored.
 61  *
 62  * Similarly when the table size shrinks over time, for example going from
 63  * 16 to 8, if a combination of the lower three bits (the mask for size 8
 64  * is 111) were already completely explored, it would not be visited again
 65  * because we are sure we tried, for example, both 0111 and 1111 (all the
 66  * variations of the higher bit) so we don't need to test it again.
 67  *
 68  * WAIT... YOU HAVE *TWO* TABLES DURING REHASHING!
 69  *
 70  * Yes, this is true, but we always iterate the smaller table first, then
 71  * we test all the expansions of the current cursor into the larger
 72  * table. For example if the current cursor is 101 and we also have a
 73  * larger table of size 16, we also test (0)101 and (1)101 inside the larger
 74  * table. This reduces the problem back to having only one table, where
 75  * the larger one, if it exists, is just an expansion of the smaller one.
 76  *
 77  * LIMITATIONS
 78  *
 79  * This iterator is completely stateless, and this is a huge advantage,
 80  * including no additional memory used.
 81  *
 82  * The disadvantages resulting from this design are:
 83  *
 84  * 1) It is possible we return elements more than once. However this is usually
 85  *    easy to deal with in the application level.
 86  * 2) The iterator must return multiple elements per call, as it needs to always
 87  *    return all the keys chained in a given bucket, and all the expansions, so
 88  *    we are sure we don't miss keys moving during rehashing.
 89  * 3) The reverse cursor is somewhat hard to understand at first, but this
 90  *    comment is supposed to help.
 91  */
 92 unsigned long dictScan(dict *d,
 93                        unsigned long v,
 94                        dictScanFunction *fn,
 95                        dictScanBucketFunction* bucketfn,
 96                        void *privdata)
 97 {
 98     dictht *t0, *t1;
 99     const dictEntry *de, *next;
100     unsigned long m0, m1;
101 
102     if (dictSize(d) == 0) return 0;
103 
104     if (!dictIsRehashing(d)) {
105         /**
106          * 如果不是处于rehash过程中,只需要遍历哈希表ht[0],然后使用用户定义的函数fn对齐进行处理
107          */
108         t0 = &(d->ht[0]);
109         m0 = t0->sizemask;
110 
111         /* Emit entries at cursor */
112         if (bucketfn) bucketfn(privdata, &t0->table[v & m0]);
113         de = t0->table[v & m0];
114         while (de) {
115             next = de->next;
116             fn(privdata, de);
117             de = next;
118         }
119 
120         /* Set unmasked bits so incrementing the reversed cursor
121          * operates on the masked bits */
122         v |= ~m0;
123 
124         /* Increment the reverse cursor */
125         v = rev(v);
126         v++;
127         v = rev(v);
128 
129     } else {
130         /**
131          * 如果处于rehash过程中,先遍历较小的哈希表,再遍历较大的哈希表,同时使用用户定义的函数fn对齐进行处理
132          */
133         t0 = &d->ht[0];
134         t1 = &d->ht[1];
135 
136         /* Make sure t0 is the smaller and t1 is the bigger table 根据哈希表的大小调整遍历顺序*/
137         if (t0->size > t1->size) {
138             t0 = &d->ht[1];
139             t1 = &d->ht[0];
140         }
141 
142         //获取两张哈希表的掩码
143         m0 = t0->sizemask;
144         m1 = t1->sizemask;
145 
146         /* Emit entries at cursor */
147         if (bucketfn) bucketfn(privdata, &t0->table[v & m0]);
148         //根据v&m0,找到t0中需要迭代的bucket,然后迭代其中的每个节点即可。
149         de = t0->table[v & m0];
150         while (de) {
151             next = de->next;
152             fn(privdata, de);
153             de = next;
154         }
155 
156         /**
157          * Iterate over indices in larger table that are the expansion
158          * of the index pointed to by the cursor in the smaller table
159          * 接下来的代码稍显复杂,但是,本质上,就是t0中,索引为v&m0的bucket中的所有节点,
160          * 再其扩展到t1中后,遍历其所有可能的bucket中的节点。语言不好描述,
161          * 举个例子就明白了:若t0长度为8,则m0为111,v&m0就是保留v的低三位,假设为abc。
162          * 若t1长度为32,则m1为11111,该过程就是:遍历完t0中索引为abc的bucket之后,接着遍历t1中,
163          * 索引为00abc、01abc、10abc、11abc的bucket中的节点。
164          */
165         do {
166             /* Emit entries at cursor */
167             if (bucketfn) bucketfn(privdata, &t1->table[v & m1]);
168             de = t1->table[v & m1];
169             while (de) {
170                 next = de->next;
171                 fn(privdata, de);
172                 de = next;
173             }
174 
175             /* Increment the reverse cursor not covered by the smaller mask.*/
176             v |= ~m1;
177             v = rev(v);
178             v++;
179             v = rev(v);
180 
181             /* Continue while bits covered by mask difference is non-zero */
182         } while (v & (m0 ^ m1));
183     }
184 
185     //返回下一个需要遍历的位置
186     return v;
187 }

9.3.2 核心算法测试测试

下面是抽取核心代码的逻辑而写的测试代码

 1 #include <iostream>
 2 #include <vector>
 3 
 4 using namespace std;
 5 
 6 static unsigned long rev(unsigned long v) {
 7     unsigned long s = 8 * sizeof(v); // bit size; must be power of 2
 8     unsigned long mask = ~0;
 9     while ((s >>= 1) > 0) {
10         mask ^= (mask << s);
11         v = ((v >> s) & mask) | ((v << s) & ~mask);
12     }
13     return v;
14 }
15 
16 void printbits(int n, int x)
17 {
18     vector<int> reg;
19     int bit = 1;
20     while (x)
21     {
22         reg.push_back((n & bit) ? 1 : 0);
23         bit *= 2;
24         x--;
25     }
26     for (int i = reg.size() - 1; i >= 0; i--)
27         cout << reg[i];
28 }
29 
30 void test_dictScan_iter(int smalltablesize, int largetablesize)
31 {
32     unsigned long v;
33     unsigned long m0, m1;
34 
35     v = 0;
36     m0 = smalltablesize - 1;
37     m1 = largetablesize - 1;
38 
39     do
40     {
41         printf("\nsmall v is: ");
42         printbits(v & m0, (int)log2(smalltablesize));
43         printf("\n");
44         int vt = v;
45 
46         do
47         {
48             printf("large v is: ");
49             printbits(vt & m1, (int)log2(largetablesize));
50             printf("\n");
51 
52             vt |= ~m1;
53             vt = rev(vt);
54             vt++;
55             vt = rev(vt);
56 
57             //v = (((v | m0) + 1) & ~m0) | (v & m0);
58         } while (vt & (m0 ^ m1));
59 
60         v |= ~m0;
61         v = rev(v);
62         v++;
63         v = rev(v);
64     } while (v != 0);
65 }
66 
67 int main()
68 {
69     test_dictScan_iter(8, 32);
70     return 0;
71 }

测试结果

 1 small v is: 000
 2 large v is: 00000
 3 large v is: 01000
 4 large v is: 10000
 5 large v is: 11000
 6 
 7 small v is: 100
 8 large v is: 00100
 9 large v is: 01100
10 large v is: 10100
11 large v is: 11100
12 
13 small v is: 010
14 large v is: 00010
15 large v is: 01010
16 large v is: 10010
17 large v is: 11010
18 
19 small v is: 110
20 large v is: 00110
21 large v is: 01110
22 large v is: 10110
23 large v is: 11110
24 
25 small v is: 001
26 large v is: 00001
27 large v is: 01001
28 large v is: 10001
29 large v is: 11001
30 
31 small v is: 101
32 large v is: 00101
33 large v is: 01101
34 large v is: 10101
35 large v is: 11101
36 
37 small v is: 011
38 large v is: 00011
39 large v is: 01011
40 large v is: 10011
41 large v is: 11011
42 
43 small v is: 111
44 large v is: 00111
45 large v is: 01111
46 large v is: 10111
47 large v is: 11111

可见,无论v取何值,只要字典开始扩展了,都会遍历大表中,相应于小表的所有节点。具体的核心逻辑代码如下:

 1 do {
 2     de = t1->table[v & m1];
 3 
 4     ...
 5 
 6     /* Increment the reverse cursor not covered by the smaller mask.*/
 7     v |= ~m1;
 8     v = rev(v);
 9     v++;
10     v = rev(v);
11 
12     /* Continue while bits covered by mask difference is non-zero */
13 } while (v & (m0 ^ m1));

首先迭代t1中,索引为v&m1的bucket,然后就是对v的低m1-m0位加1,并保留v的低m0位。循环条件v &(m0 ^ m1),表示直到v的低m1-m0位到低m1位之间全部为0为止。

参考文章

https://www.cnblogs.com/chinxi/p/12235526.html

posted @ 2021-07-05 09:44  Mr-xxx  阅读(54)  评论(0编辑  收藏  举报