redis 5.0.2 源码阅读——字典dict
redis中字典相关的文件为:dict.h与dict.c
与其说是一个字典,道不如说是一个哈希表。
一、数据结构
1.1 dictEntry结构体
1 /**
2 * dictEntry是一个kv对的单向链表,其中v是一个联合体,支持数字,或者是指向一块内存的指针。
3 */
4 typedef struct dictEntry {
5 void *key;
6 union {
7 void *val;
8 uint64_t u64;
9 int64_t s64;
10 double d;
11 } v;
12 struct dictEntry *next;
13 } dictEntry;
具体结构形如
1 /*
2 +---------------+
3 |void *key |
4 +---------------+
5 |union{...} v |
6 +---------------+
7 |dictEntry *next|---+
8 +---------------+ |
9 |
10 +---------------+ <-+
11 |void *key |
12 +---------------+
13 |union{...} v |
14 +---------------+
15 |dictEntry *next|
16 +---------------+
17 */
为了节约篇幅,后续用以下结构表示
1 /*
2 +---+ +---+
3 |K|V|->|K|V|->NULL
4 +---+ +---+
5 */
1.2 distht结构体
1 /**
2 * This is our hash table structure. Every dictionary has two of this as we
3 * implement incremental rehashing, for the old to the new table.
4 * 这是我们的哈希表结构。 每个字典都有两个这样的,因为我们实现了增量重新散列,从旧表到新表。
5 * 使用开链法解决冲突问题
6 *
7 * 其中,table指向大小为sizeof(dictEntry*) * size的一片内存空间,每个dictEntry*可以视为一个bucket,
8 * 每个bucket下挂着一个dictEntry单向链表。
9 * size的值始终为2的位数,而sizemask的值始终为size-1,其作用是决定kv对要挂在哪个bucket上。
10 * 举个例子,size=4时,sizemask=3,其二进制为 0011,若通过hash函数计算出来key对应的hash值hash_value为5,
11 * 二进制为0101,则通过位运算 sizemask & hash_value = 0011 & 0101 = 0001,十进制为1,则将会挂在idx = 1的bucket上。
12 */
13 typedef struct dictht {
14 //dictEntry*类型的数组
15 dictEntry **table;
16 //dictEntry*数组的长度
17 unsigned long size;
18 /**
19 * 这样写可能更容易理解
20 * const unsigned long size = 4;
21 * dictEntry *table[size];
22 */
23
24 //sizemask,始终为size-1
25 unsigned long sizemask;
26
27 //当前总dictEntry数量
28 unsigned long used;
29 } dictht;
dictht是一个hash table,整体结构大致为
1 /*
2 +----------------------+ +---> +-----------------+ +---+
3 |dictEntry **table |---+ |dictEntry *bucket|->|K|V|->NULL
4 +----------------------+ +-----------------+ +---+
5 |unsigned long size = 4| |dictEntry *bucket|->NULL
6 +----------------------+ +-----------------+
7 |unsigned long sizemask| |dictEntry *bucket|->NULL
8 +----------------------+ +-----------------+
9 |unsigned long used | |dictEntry *bucket|->NULL
10 +----------------------+ +-----------------+
11 */
1.3 dictType结构体
1 /**
2 * dictType用于自定义一些操作的方法,如hash函数、拷贝key、拷贝value、比较key、销毁key、销毁value。
3 */
4 typedef struct dictType {
5 uint64_t (*hashFunction)(const void *key);
6 void *(*keyDup)(void *privdata, const void *key);
7 void *(*valDup)(void *privdata, const void *obj);
8 int (*keyCompare)(void *privdata, const void *key1, const void *key2);
9 void (*keyDestructor)(void *privdata, void *key);
10 void (*valDestructor)(void *privdata, void *obj);
11 } dictType;
1.4 dict结构体
1 typedef struct dict {
2 dictType *type;
3 //type中函数的传入参数
4 void *privdata;
5 dictht ht[2];
6 /**
7 * rehashidx,是与ht[2]配合实现渐进式rehash操作的。若使用一步到位的方式,
8 * 当key的数量非常大的时候,rehashing期间,是会卡死所有操作的。
9 */
10 long rehashidx; /* rehashing not in progress if rehashidx == -1 */
11 /**
12 * iterators,是用于记录当前使用的迭代器数量,与rehashing操作有关。
13 */
14 unsigned long iterators; /* number of iterators currently running */
15 } dict;
之前提到的dictType与dictht都是dict的成员变量。除此之外,还有privdata,是在创建dict的时候调用者传入,用于特定操作时回传给函数的。如
1 /**
2 * 利用宏定义实现函数的调用
3 * 依次是设置有符号的信号值、设置无符号类型的信号值、设置double类型的值、
4 * 释放key、设置key、判断key是否相等
5 */
6 #define dictSetSignedIntegerVal(entry, _val_) \
7 do { (entry)->v.s64 = _val_; } while(0)
8
9 #define dictSetUnsignedIntegerVal(entry, _val_) \
10 do { (entry)->v.u64 = _val_; } while(0)
11
12 #define dictSetDoubleVal(entry, _val_) \
13 do { (entry)->v.d = _val_; } while(0)
14
15 #define dictFreeKey(d, entry) \
16 if ((d)->type->keyDestructor) \
17 (d)->type->keyDestructor((d)->privdata, (entry)->key)
18
19 #define dictSetKey(d, entry, _key_) do { \
20 if ((d)->type->keyDup) \
21 (entry)->key = (d)->type->keyDup((d)->privdata, _key_); \
22 else \
23 (entry)->key = (_key_); \
24 } while(0)
25
26 #define dictCompareKeys(d, key1, key2) \
27 (((d)->type->keyCompare) ? \
28 (d)->type->keyCompare((d)->privdata, key1, key2) : \
29 (key1) == (key2))
1.5 迭代器
iterators,是用于记录当前使用的安全迭代器数量,与rehashing操作有关。
1 /**
2 * If safe is set to 1 this is a safe iterator, that means, you can call
3 * dictAdd, dictFind, and other functions against the dictionary even while
4 * iterating. Otherwise it is a non safe iterator, and only dictNext()
5 * should be called while iterating.
6 * 如果是个安全的迭代器,即safe == 1,则在迭代中可以调用dictAdd、dictFind等方法,否则只能调用dictNext。
7 * index表示,ht[table]对应的bucket的idx。
8 */
9 typedef struct dictIterator {
10 dict *d;
11 long index;
12 int table, safe;
13 dictEntry *entry, *nextEntry;
14 /* unsafe iterator fingerprint for misuse detection. */
15 long long fingerprint;
16 } dictIterator;
整体结构如下:
1 /*
2 +---------+ /+-----------+ +-->+----------+ +---+
3 |dictType*| / |dictEntry**|---+ |dictEntry*|->|K|V|->NULL
4 +---------+ / +-----------+ +----------+ +---+
5 |privdata | / |size | |dictEntry*|->NULL
6 +---------+/ +-----------+ +----------+
7 |ht[2] | |sizemask | |dictEntry*|->NULL
8 +---------+\ +-----------+ +----------+
9 |rehashidx| \ |used | |dictEntry*|->NULL
10 +---------+ \ +-----------+ +----------+
11 |iterators| \
12 +---------+ \+-----------+
13 |dictEntry**|-->NULL
14 +-----------+
15 |size |
16 +-----------+
17 |sizemask |
18 +-----------+
19 |used |
20 +-----------+
21 */
二、创建
2.1 创建和复位函数
1 /**
2 * Reset a hash table already initialized with ht_init().
3 * NOTE: This function should only be called by ht_destroy().
4 * 重置一个使用ht_init函数初始化的哈希表
5 * 只能通过ht_destroy函数调用
6 */
7
8 static void _dictReset(dictht *ht)
9 {
10 ht->table = NULL;
11 ht->size = 0;
12 ht->sizemask = 0;
13 ht->used = 0;
14 }
15
16 /* Create a new hash table 创建一个新的哈希表*/
17 dict *dictCreate(dictType *type,
18 void *privDataPtr)
19 {
20 //分配内存空间
21 dict *d = zmalloc(sizeof(*d));
22
23 //对哈希表进行初始化
24 _dictInit(d,type,privDataPtr);
25 return d;
26 }
27
28 /* Initialize the hash table 哈希表的初始化*/
29 int _dictInit(dict *d, dictType *type,
30 void *privDataPtr)
31 {
32 _dictReset(&d->ht[0]);
33 _dictReset(&d->ht[1]);
34 d->type = type;
35 d->privdata = privDataPtr;
36 d->rehashidx = -1;
37 d->iterators = 0;
38 return DICT_OK;
39 }
可以调用dictCreate创建一个空的dict,它会分配好dict的空间,并初始化所有成员变量。在这里把privdata传入并保存。搜了一下整个redis源码的dictCreate调用,看到传入的值全为NULL。目前的理解暂时不清楚这个变量是什么时候赋值的。
2.2 结构图
初始化后的dict结构如下:
1 /*
2 +------------+ /+-----------+
3 |dictType* | / |dictEntry**|-->NULL
4 +------------+ / +-----------+
5 |privdata | / |size=0 |
6 +------------+/ +-----------+
7 |ht[2] | |sizemask=0 |
8 +------------+\ +-----------+
9 |rehashidx=-1| \ |used=0 |
10 +------------+ \ +-----------+
11 |iterators=0 | \
12 +------------+ \+-----------+
13 |dictEntry**|-->NULL
14 +-----------+
15 |size=0 |
16 +-----------+
17 |sizemask=0 |
18 +-----------+
19 |used=0 |
20 +-----------+
21 */
刚创建好的dict是存不了任何数据的,其两个hash table的size都为0。
2.3 resize函数
1 /**
2 * Resize the table to the minimal size that contains all the elements,
3 * but with the invariant of a USED/BUCKETS ratio near to <= 1
4 * 重新设置哈希表的大小,重新设置后的大小能保存所有的元素
5 * 保持used/buckets的比例<=1不变
6 */
7 int dictResize(dict *d)
8 {
9 int minimal;
10
11 /**
12 * #define dictIsRehashing(d) ((d)->rehashidx != -1)
13 * 当dict_can_resize为0或(d)->rehashidx不为-1时,直接返回1,也就是失败
14 */
15 if (!dict_can_resize || dictIsRehashing(d))
16 return DICT_ERR;
17 //得到当前元素的个数
18 minimal = d->ht[0].used;
19 if (minimal < DICT_HT_INITIAL_SIZE) //DICT_HT_INITIAL_SIZE:4
20 minimal = DICT_HT_INITIAL_SIZE;
21 return dictExpand(d, minimal);
22 }
23
24 /**
25 * Expand or create the hash table
26 * 扩容或者创建哈希表
27 * d:原来的封装哈希表
28 * size:期望的哈希表桶数
29 *
30 * 这个函数主要是产生一个新的HASH表(dictht),并让将dict.rehashidx= 0。表示開始进行rehash动作
31 */
32 int dictExpand(dict *d, unsigned long size)
33 {
34 /* the size is invalid if it is smaller than the number of
35 * elements already inside the hash table */
36 if (dictIsRehashing(d) || d->ht[0].used > size)
37 return DICT_ERR;
38
39 dictht n; /* the new hash table */
40 //得到合适大小的哈希表的桶数
41 unsigned long realsize = _dictNextPower(size);
42
43 /* Rehashing to the same table size is not useful. 如果容量大小没有发生变化,返回DICT_ERR,也就是1*/
44 if (realsize == d->ht[0].size) return DICT_ERR;
45
46 /**
47 * Allocate the new hash table and initialize all pointers to NULL
48 * 初始化新的哈希表的size和sizemask,为table分配内存空间
49 */
50 n.size = realsize;
51 n.sizemask = realsize-1;
52 n.table = zcalloc(realsize*sizeof(dictEntry*));
53 n.used = 0;
54
55 /**
56 * Is this the first initialization? If so it's not really a rehashing
57 * we just set the first hash table so that it can accept keys.
58 * 判断是否是滴第一次初始化,如果是,那就是不是rehashing操作,我们只需要设置ht的第一个哈希表(ht[0])
59 * 然后返回DICT_OK,也就是0,成功。也就是说如果ht[0].table == NULL,说明是第一次初始化,
60 * 那不是真正的重新哈希,相当于创建哈希表的操作,只需要设置第一个哈希表即可
61 */
62 if (d->ht[0].table == NULL) {
63 d->ht[0] = n;
64 return DICT_OK;
65 }
66
67 /**
68 * Prepare a second hash table for incremental rehashing
69 * 假设 ht[0] 不为空。那么这就是一次扩展字典的行为
70 * 将新哈希表设置为 ht[1] ,并打开 rehash 标识
71 */
72 d->ht[1] = n;
73 d->rehashidx = 0;
74 return DICT_OK;
75 }
76
77 /* Our hash table capability is a power of two 哈希表的容量是2的倍数*/
78 static unsigned long _dictNextPower(unsigned long size)
79 {
80 //#define DICT_HT_INITIAL_SIZE 4
81 unsigned long i = DICT_HT_INITIAL_SIZE;
82
83 //如果size大于等于LONG_MAX,设置为LONG_MAX + 1LU
84 if (size >= LONG_MAX)
85 return LONG_MAX + 1LU;
86 while(1) {
87 if (i >= size)
88 return i;
89 //以两倍的速度扩大
90 i *= 2;
91 }
92 }
_dictNextPower用于获取当前要分配给hash table的size,得到的值一定是2的倍数,初始值为4。
dictExpand,从源码注释上看,它是为了扩容hash table,或者创建一个。它不允许与rehashing操作同时进行,也不能强制缩容。在使用_dictNextPower得到需要的size之后,它先是使用一个临时变量n去分配空间,然后进行判断,若ht[0].table的值为NULL,则认为是刚create出来的dict,直接把n赋值给ht[0],否则给ht[1],并开始rehashing操作。
三、rehashing操作
3.1 示例字典
若有这样一个dict,假设K1、K2、K3、K4计算出来的hash值分别为0、5、2、7,使用sizemask计算出来的idx分别为0、1、2、3
1 /*
2 +----+
3 +->|K1|V|->NULL
4 +------------+ /+-----------+ +->+----------+ / +----+
5 |dictType* | / |dictEntry**|--+ |dictEntry*|/ +----+
6 +------------+ / +-----------+ +----------+ +-->|K2|V|->NULL
7 |privdata | / |size=4 | |dictEntry*|/ +----+
8 +------------+/ +-----------+ +----------+
9 |ht[2] | |sizemask=3 | |dictEntry*|\ +----+
10 +------------+\ +-----------+ +----------+ +-->|K3|V|->NULL
11 |rehashidx=-1| \ |used=4 | |dictEntry*|\ +----+
12 +------------+ \ +-----------+ +----------+ \ +----+
13 |iterators=0 | \ +->|K4|V|->NULL
14 +------------+ \+-----------+ +----+
15 |dictEntry**|-->NULL
16 +-----------+
17 |size=0 |
18 +-----------+
19 |sizemask=0 |
20 +-----------+
21 |used=0 |
22 +-----------+
23 */
3.2 是否rehashing判断
判断是否需要对哈希表大小进行扩容
1 /* Expand the hash table if needed 如果必要的话就扩大这个哈希表*/
2 static int _dictExpandIfNeeded(dict *d)
3 {
4 /**
5 * Incremental rehashing already in progress. Return.
6 * 如果已经处于rehashing过程中
7 * #define dictIsRehashing(d) ((d)->rehashidx != -1)
8 */
9 if (dictIsRehashing(d))
10 return DICT_OK;
11
12 /**
13 * If the hash table is empty expand it to the initial size.
14 * 如果哈希表是空的,就是直接将ht[0]扩容为哈希表的初始值4
15 */
16 if (d->ht[0].size == 0)
17 return dictExpand(d, DICT_HT_INITIAL_SIZE);
18
19 /* If we reached the 1:1 ratio, and we are allowed to resize the hash
20 * table (global setting) or we should avoid it but the ratio between
21 * elements/buckets is over the "safe" threshold, we resize doubling
22 * the number of buckets.
23 *
24 * static unsigned int dict_force_resize_ratio = 5;
25 * static int dict_can_resize = 1;
26 *
27 * 当used >= size并且(dict_can_resize == TRUE或ht[0]哈希表中存在的元素个数超过哈希表桶数的五倍)的时候
28 * 需要调用dictExpand进入rehashing状态。dict_can_resize默认为1
29 *
30 * 假设哈希表的已用节点数 >= 哈希表的大小。
31 * 而且下面条件任一个为真:
32 * 1) dict_can_resize 为真
33 * 2) 已用节点数除以哈希表大小之比大于
34 * dict_force_resize_ratio
35 * 那么调用 dictExpand 对哈希表进行扩展
36 * 扩展的体积至少为已使用节点数的两倍
37 *
38 * DICT便会进行收缩。让total / bk_num 接近 1:1。
39 */
40 if (d->ht[0].used >= d->ht[0].size &&
41 (dict_can_resize ||
42 d->ht[0].used/d->ht[0].size > dict_force_resize_ratio))
43 {
44 //需要的size为当前used * 2,即为8。调用dictExpand之后的结构:
45 return dictExpand(d, d->ht[0].used*2);
46 }
47 return DICT_OK;
48 }
通过函数_dictExpandIfNeeded,可知当used >= size且dict_can_resize == TRUE的时候,需要调用dictExpand进入rehashing状态。dict_can_resize默认为1。
1 static int dict_can_resize = 1;
2 static unsigned int dict_force_resize_ratio = 5;
3.3 开始rehashing
需要的size为当前used * 2,即为8。调用dictExpand之后的结构:
1 /*
2 +----+
3 +->|K1|V|->NULL
4 +->+----------+ / +----+
5 | |dictEntry*|/ +----+
6 | +----------+ +-->|K2|V|->NULL
7 | |dictEntry*|/ +----+
8 +------------+ /+-----------+ | +----------+
9 |dictType* | / |dictEntry**|--+ |dictEntry*|\ +----+
10 +------------+ / +-----------+ +----------+ +-->|K3|V|->NULL
11 |privdata | / |size=4 | |dictEntry*|\ +----+
12 +------------+/ +-----------+ +----------+ \ +----+
13 |ht[2] | |sizemask=3 | +->|K4|V|->NULL
14 +------------+\ +-----------+ +----+
15 |rehashidx=0 | \ |used=4 |
16 +------------+ \ +-----------+
17 |iterators=0 | \
18 +------------+ \+-----------+ +->+----------+
19 |dictEntry**|--+ |dictEntry*|->NULL
20 +-----------+ +----------+
21 |size=8 | |dictEntry*|->NULL
22 +-----------+ +----------+
23 |sizemask=7 | |dictEntry*|->NULL
24 +-----------+ +----------+
25 |used=0 | |dictEntry*|->NULL
26 +-----------+ +----------+
27 |dictEntry*|->NULL
28 +----------+
29 |dictEntry*|->NULL
30 +----------+
31 |dictEntry*|->NULL
32 +----------+
33 |dictEntry*|->NULL
34 +----------+
35 */
经过_dictExpandIfNeeded可以知道是否需要进行rehash操作,如果需要的话,再通过dictExpand函数,就可以得到合适大小的哈希表,并且该函数还会将rehashing设置为0,这样dictrehash函数就可以根据rehashing操作。
1 /**
2 * Performs N steps of incremental rehashing. Returns 1 if there are still
3 * keys to move from the old to the new hash table, otherwise 0 is returned.
4 *
5 * Note that a rehashing step consists in moving a bucket (that may have more
6 * than one key as we use chaining) from the old to the new hash table, however
7 * since part of the hash table may be composed of empty spaces, it is not
8 * guaranteed that this function will rehash even a single bucket, since it
9 * will visit at max N*10 empty buckets in total, otherwise the amount of
10 * work it does would be unbound and the function may block for a long time.
11 *
12 * 实现持续的重新哈希,如果还有需要重新哈希的key,返回1,否则返回0
13 *
14 * 需要注意的是,rehash持续将bucket从老的哈希表移到新的哈希表,但是,因为有的哈希表是空的,
15 * 因此函数不能保证即使一个bucket也会被rehash,因为函数最多一共会访问N*10个空bucket,不然的话,
16 * 函数将会耗费过多性能,而且函数会被阻塞一段时间
17 */
18 int dictRehash(dict *d, int n) {
19 int empty_visits = n*10; /* Max number of empty buckets to visit. */
20 if (!dictIsRehashing(d)) return 0;
21
22 while(n-- && d->ht[0].used != 0) {
23 dictEntry *de, *nextde;
24
25 /* Note that rehashidx can't overflow as we are sure there are more
26 * elements because ht[0].used != 0 */
27 assert(d->ht[0].size > (unsigned long)d->rehashidx);
28
29 /* 找到非空的哈希表下标 */
30 while(d->ht[0].table[d->rehashidx] == NULL) {
31 d->rehashidx++;
32 /**
33 * rehashing时允许最多跳过10n的空bucket,就要退出流程
34 */
35 if (--empty_visits == 0)
36 return 1;
37 }
38 de = d->ht[0].table[d->rehashidx];
39 /**
40 * Move all the keys in this bucket from the old to the new hash HT
41 * 实现将bucket从老的哈希表移到新的哈希表
42 */
43 while(de) {
44 uint64_t h;
45
46 nextde = de->next;
47 /* Get the index in the new hash table 获取哈希值*/
48 h = dictHashKey(d, de->key) & d->ht[1].sizemask;
49 de->next = d->ht[1].table[h];
50 d->ht[1].table[h] = de;
51 d->ht[0].used--;
52 d->ht[1].used++;
53 de = nextde;
54 }
55 d->ht[0].table[d->rehashidx] = NULL;
56 d->rehashidx++;
57 }
58
59 /**
60 * Check if we already rehashed the whole table...
61 * 当ht[0]->used为0时,认为ht[0]的所有dictEntry已经移至ht[1],此时return 0,
62 * 否则 return 1,告诉调用者,还需要继续进行rehashing操作.
63 */
64 if (d->ht[0].used == 0) {
65 /**
66 * 此时ht[0]->used为0,释放原ht[0]的hash table,把ht[1]赋值给ht[0],并设置ht[1] = NULL,
67 * 最后重置rehashidx=-1,rehashing操作结束
68 */
69 zfree(d->ht[0].table);
70 d->ht[0] = d->ht[1];
71 _dictReset(&d->ht[1]);
72 d->rehashidx = -1;
73 return 0;
74 }
75
76 /* More to rehash... */
77 return 1;
78 }
rehashing操作将会把ht[0]里,rehashidx的值对应的bucket下的所有dictEntry,移至ht[1],之后对rehashidx进行自增处理。当ht[0]->used为0时,认为ht[0]的所有dictEntry已经移至ht[1],此时return 0,否则 return 1,告诉调用者,还需要继续进行rehashing操作。同时,rehashing时允许最多跳过10n的空bucket,否则,就要退出流程,返回1,。假设传入的n=1,即只进行一次rehashing操作,转换至完成之后的结构:
1 /*
2
3 +->NULL
4 +->+----------+ /
5 | |dictEntry*|/ +----+
6 | +----------+ +-->|K2|V|->NULL
7 | |dictEntry*|/ +----+
8 +------------+ /+-----------+ | +----------+
9 |dictType* | / |dictEntry**|--+ |dictEntry*|\ +----+
10 +------------+ / +-----------+ +----------+ +-->|K3|V|->NULL
11 |privdata | / |size=4 | |dictEntry*|\ +----+
12 +------------+/ +-----------+ +----------+ \ +----+
13 |ht[2] | |sizemask=3 | +->|K4|V|->NULL
14 +------------+\ +-----------+ +----+
15 |rehashidx=1 | \ |used=3 |
16 +------------+ \ +-----------+
17 |iterators=0 | \
18 +------------+ \+-----------+ +->+----------+ +----+
19 |dictEntry**|--+ |dictEntry*|-->|K1|V|->NULL
20 +-----------+ +----------+ +----+
21 |size=8 | |dictEntry*|->NULL
22 +-----------+ +----------+
23 |sizemask=7 | |dictEntry*|->NULL
24 +-----------+ +----------+
25 |used=1 | |dictEntry*|->NULL
26 +-----------+ +----------+
27 |dictEntry*|->NULL
28 +----------+
29 |dictEntry*|->NULL
30 +----------+
31 |dictEntry*|->NULL
32 +----------+
33 |dictEntry*|->NULL
34 +----------+
35 */
所有节点移完时
1 /*
2
3
4 +->+----------+
5 | |dictEntry*|->NULL
6 | +----------+
7 | |dictEntry*|->NULL
8 +------------+ /+-----------+ | +----------+
9 |dictType* | / |dictEntry**|--+ |dictEntry*|->NULL
10 +------------+ / +-----------+ +----------+
11 |privdata | / |size=4 | |dictEntry*|->NULL
12 +------------+/ +-----------+ +----------+
13 |ht[2] | |sizemask=3 |
14 +------------+\ +-----------+
15 |rehashidx=4 | \ |used=0 |
16 +------------+ \ +-----------+
17 |iterators=0 | \
18 +------------+ \+-----------+ +->+----------+ +----+
19 |dictEntry**|--+ |dictEntry*|-->|K1|V|->NULL
20 +-----------+ +----------+ +----+
21 |size=8 | |dictEntry*|->NULL
22 +-----------+ +----------+ +----+
23 |sizemask=7 | |dictEntry*|-->|K3|V|->NULL
24 +-----------+ +----------+ +----+
25 |used=4 | |dictEntry*|->NULL
26 +-----------+ +----------+
27 |dictEntry*|->NULL
28 +----------+ +----+
29 |dictEntry*|-->|K2|V|->NULL
30 +----------+ +----+
31 |dictEntry*|->NULL
32 +----------+ +----+
33 |dictEntry*|-->|K4|V|->NULL
34 +----------+ +----+
35 */
此时ht[0]->used为0,释放原ht[0]的hash table,把ht[1]赋值给ht[0],并设置ht[1] = NULL,最后重置rehashidx=-1,rehashing操作结束。
1 /*
2 +------------+ /+-----------+ +-->+----------+ +----+
3 |dictType* | / |dictEntry**|---+ |dictEntry*|-->|K1|V|->NULL
4 +------------+ / +-----------+ +----------+ +----+
5 |privdata | / |size=8 | |dictEntry*|->NULL
6 +------------+/ +-----------+ +----------+ +----+
7 |ht[2] | |sizemask=7 | |dictEntry*|-->|K3|V|->NULL
8 +------------+\ +-----------+ +----------+ +----+
9 |rehashidx=-1| \ |used=4 | |dictEntry*|->NULL
10 +------------+ \ +-----------+ +----------+
11 |iterators=0 | \ |dictEntry*|->NULL
12 +------------+ \+-----------+ +----------+ +----+
13 |dictEntry**|->NULL |dictEntry*|-->|K2|V|->NULL
14 +-----------+ +----------+ +----+
15 |size=0 | |dictEntry*|->NULL
16 +-----------+ +----------+ +----+
17 |sizemask=0 | |dictEntry*|-->|K4|V|->NULL
18 +-----------+ +----------+ +----+
19 |used=0 |
20 +-----------+
21 */
3.4 rehashing操作的触发共有两种方式
3.4.1 定时操作
1 /**
2 * 返回当前时间,单位:毫秒
3 */
4 long long timeInMilliseconds(void) {
5 struct timeval tv;
6
7 gettimeofday(&tv,NULL);
8 return (((long long)tv.tv_sec)*1000)+(tv.tv_usec/1000);
9 }
10
11 /**
12 * Rehash for an amount of time between ms milliseconds and ms+1 milliseconds
13 * 定时执行rehash,定时时间是1毫秒
14 */
15 int dictRehashMilliseconds(dict *d, int ms) {
16 //获取当前的时间,单位是毫秒
17 long long start = timeInMilliseconds();
18 int rehashes = 0;
19
20 while(dictRehash(d,100)) {
21 rehashes += 100;
22 if (timeInMilliseconds()-start > ms) break;
23 }
24 return rehashes;
25 }
外部传入一个毫秒时间(实际上就是1ms),在这时间内循环执行rehashing,每次执行100次。
3.4.2 操作时触发
1 /* This function performs just a step of rehashing, and only if there are
2 * no safe iterators bound to our hash table. When we have iterators in the
3 * middle of a rehashing we can't mess with the two hash tables otherwise
4 * some element can be missed or duplicated.
5 *
6 * This function is called by common lookup or update operations in the
7 * dictionary so that the hash table automatically migrates from H1 to H2
8 * while it is actively used.
9 * 在插入、删除、查找等操作时,顺带执行一次rehashing操作。
10 * 值得注意的是,如果存在安全的迭代器,即d->iterators != 0,则不会进行rehashing操作
11 * */
12 static void _dictRehashStep(dict *d) {
13 if (d->iterators == 0) dictRehash(d,1);
14 }
四、插入
4.1 获取出入位置
获取可插入新节点的bucket idx的方法
1 /* Returns the index of a free slot that can be populated with
2 * a hash entry for the given 'key'.
3 * If the key already exists, -1 is returned
4 * and the optional output parameter may be filled.
5 *
6 * Note that if we are in the process of rehashing the hash table, the
7 * index is always returned in the context of the second (new) hash table.
8 * 获取可插入新节点的bucket idx
9 *
10 * 此方法在进行查找idx之前,先进行一次判断,是否需要rehashing操作。而后进行查找。
11 * idx的值就是通过hash函数计算出来的hash_value与sizemask做位运算的结果,然后遍历此idx对应的bucket,
12 * 若已存在相同的key,则认为不可插入,并把对应的dictEntry用传入的二级指针的方式传出,供调用者使用。
13 * 若不存在,则需要判断是否正在进行rehashing操作。若在,则会对ht[1]做一次相同的操作。最终可以得到一个idx值,
14 * 或传出一个dictEntry。
15 *
16 * 由于rehashing期间,将会把ht[0]的所有dictEntry依次转移至ht[1],
17 * 为了防止新插入的dictEntry落到ht[0]已完成rehashing操作的bucket上,在rehashing期间,
18 * 返回的可插入的idx一定是属于ht[1]的。
19 */
20 static long _dictKeyIndex(dict *d, const void *key, uint64_t hash, dictEntry **existing)
21 {
22 unsigned long idx, table;
23 dictEntry *he;
24 if (existing) *existing = NULL;
25
26 /* Expand the hash table if needed 假设有须要。对字典进行扩展*/
27 if (_dictExpandIfNeeded(d) == DICT_ERR)
28 return -1;
29 // 在两个哈希表(ht[0]、ht[1])中进行查找给定 key
30 for (table = 0; table <= 1; table++) {
31 /**
32 * 依据哈希值和哈希表的 sizemask
33 * 计算出 key 可能出如今 table 数组中的哪个索引
34 */
35 idx = hash & d->ht[table].sizemask;
36 /* Search if this slot does not already contain the given key */
37 he = d->ht[table].table[idx];
38
39 /**
40 * 在节点链表里查找给定 key
41 * 由于链表的元素数量通常为 1 或者是一个非常小的比率
42 * 所以能够将这个操作看作 O(1) 来处理
43 */
44 while(he) {
45 // key 已经存在
46 if (key==he->key || dictCompareKeys(d, key, he->key)) {
47 if (existing) *existing = he;
48 return -1;
49 }
50 he = he->next;
51 }
52 /**
53 * 第一次进行执行到这里时,说明已经查找完 d->ht[0] 了
54 * 这时假设哈希表不在 rehash 其中。就没有必要查找 d->ht[1]
55 */
56 if (!dictIsRehashing(d)) break;
57 }
58 return idx;
59 }
4.2 插入函数
1 /**
2 * Low level add or find:底层的添加和查找函数
3 * This function adds the entry but instead of setting a value returns the
4 * dictEntry structure to the user, that will make sure to fill the value
5 * field as he wishes.
6 *
7 * This function is also directly exposed to the user API to be called
8 * mainly in order to store non-pointers inside the hash value, example:
9 *
10 * entry = dictAddRaw(dict,mykey,NULL);
11 * if (entry != NULL) dictSetSignedIntegerVal(entry,1000);
12 *
13 * Return values:
14 *
15 * If key already exists NULL is returned, and "*existing" is populated
16 * with the existing entry if existing is not NULL.
17 *
18 * If key was added, the hash entry is returned to be manipulated by the caller.
19 *
20 * 函数增加一个元素到entry,函数保证将值放到调用者想要放的位置,而不是仅仅设置一个值然后返回
21 * 函数会直接暴露API给用户调用,主要为了保存空指针而不是哈希值,比如:
22 * entry = dictAddRaw(dict,mykey,NULL);
23 * if (entry != NULL) dictSetSignedIntegerVal(entry,1000);
24 *
25 * 若不存在相同key,则插入,否则,传出dictEntry的指针。插入时,由于没有记录每个dictEntry链表的尾指针,
26 * 所以使用头插法,可以节约插入时的时间消耗。
27 */
28 dictEntry *dictAddRaw(dict *d, void *key, dictEntry **existing)
29 {
30 long index;
31 dictEntry *entry;
32 dictht *ht;
33
34 // 如果正在rehash,顺带执行rehash操作
35 if (dictIsRehashing(d))
36 _dictRehashStep(d);
37
38 /**
39 * Get the index of the new element, or -1 if
40 * the element already exists.
41 * 获取新元素的下标,如果已经存在,返回-1
42 */
43 if ((index = _dictKeyIndex(d, key, dictHashKey(d,key), existing)) == -1)
44 return NULL;
45
46 /* Allocate the memory and store the new entry.
47 * Insert the element in top, with the assumption that in a database
48 * system it is more likely that recently added entries are accessed
49 * more frequently.
50 * 如果正在进行rehash操作,返回ht[1],否则返回ht[0]
51 * */
52 ht = dictIsRehashing(d) ? &d->ht[1] : &d->ht[0];
53 entry = zmalloc(sizeof(*entry));
54 //插入头部
55 entry->next = ht->table[index];
56 //更改头部节点
57 ht->table[index] = entry;
58 //节点增加
59 ht->used++;
60
61 /* Set the hash entry fields. 设置entry中的key*/
62 dictSetKey(d, entry, key);
63 //返回插入元素对应的哈希节点
64 return entry;
65 }
若不存在相同key,则插入,否则,传出dictEntry的指针。插入时,由于没有记录每个dictEntry链表的尾指针,所以使用头插法,可以节约插入时的时间消耗。
4.3 底层插入函数
dictAddRaw做为最终插入的方法,被多个方法所调用:
1 /**
2 * Add an element to the target hash table
3 * 向目标哈希表中给添加一个元素
4 * 若不存在,则插入,否则,报错
5 */
6 int dictAdd(dict *d, void *key, void *val)
7 {
8 dictEntry *entry = dictAddRaw(d,key,NULL);
9
10 if (!entry) return DICT_ERR;
11 dictSetVal(d, entry, val);
12 return DICT_OK;
13 }
14
15 /**
16 * Add or Overwrite:
17 * Add an element, discarding the old value if the key already exists.
18 * Return 1 if the key was added from scratch, 0 if there was already an
19 * element with such key and dictReplace() just performed a value update
20 * operation.
21 * 若存在,则替换value,否则插入
22 */
23 int dictReplace(dict *d, void *key, void *val)
24 {
25 dictEntry *entry, *existing, auxentry;
26
27 /**
28 * Try to add the element. If the key
29 * does not exists dictAdd will succeed.
30 * 如果添加成功,dictAddRaw返回非空值
31 */
32 entry = dictAddRaw(d,key,&existing);
33 if (entry) {
34 dictSetVal(d, entry, val);
35 return 1;
36 }
37
38 /**
39 * Set the new value and free the old one. Note that it is important
40 * to do that in this order, as the value may just be exactly the same
41 * as the previous one. In this context, think to reference counting,
42 * you want to increment (set), and then decrement (free), and not the
43 * reverse.
44 * 设置新值,释放旧值,这个顺序很重要,因为值可能是与原来一样的
45 * 在这个上下文里,考虑引用计数,我们希望的是先加再减,而不是反过来
46 */
47 auxentry = *existing;
48 dictSetVal(d, existing, val);
49 dictFreeVal(d, &auxentry);
50 return 0;
51 }
52
53 /* Add or Find:
54 * dictAddOrFind() is simply a version of dictAddRaw() that always
55 * returns the hash entry of the specified key, even if the key already
56 * exists and can't be added (in that case the entry of the already
57 * existing key is returned.)
58 *
59 * See dictAddRaw() for more information.
60 * 若存在,则返回对应dictEntry,否则插入后返回新的dictEntry
61 */
62 dictEntry *dictAddOrFind(dict *d, void *key) {
63 dictEntry *entry, *existing;
64 entry = dictAddRaw(d,key,&existing);
65 return entry ? entry : existing;
66 }
4.4 插入过程
对于一个刚刚create的dict:
1 /*
2
3 +------------+ /+-----------+
4 |dictType* | / |dictEntry**|-->NULL
5 +------------+ / +-----------+
6 |privdata | / |size=0 |
7 +------------+/ +-----------+
8 |ht[2] | |sizemask=0 |
9 +------------+\ +-----------+
10 |rehashidx=-1| \ |used=0 |
11 +------------+ \ +-----------+
12 |iterators=0 | \
13 +------------+ \+-----------+
14 |dictEntry**|-->NULL
15 +-----------+
16 |size=0 |
17 +-----------+
18 |sizemask=0 |
19 +-----------+
20 |used=0 |
21 +-----------+
22 */
假设K1、K2、K3、K4计算出来的hash值分别为0、5、2、7,使用sizemask计算出来的idx分别为0、1、2、3
4.4.1 插入K1
现调用dictAdd方法进行插入
执行完dictAddRaw中的_dictKeyIndex里的_dictExpandIfNeeded:
1 /*
2
3 +-->NULL
4 +------------+ /+-----------+ +->+----------+ /
5 |dictType* | / |dictEntry**|--+ |dictEntry*|/
6 +------------+ / +-----------+ +----------+ +--->NULL
7 |privdata | / |size=4 | |dictEntry*|/
8 +------------+/ +-----------+ +----------+
9 |ht[2] | |sizemask=3 | |dictEntry*|\
10 +------------+\ +-----------+ +----------+ +--->NULL
11 |rehashidx=-1| \ |used=0 | |dictEntry*|\
12 +------------+ \ +-----------+ +----------+ \
13 |iterators=0 | \ +-->NULL
14 +------------+ \+-----------+
15 |dictEntry**|-->NULL
16 +-----------+
17 |size=0 |
18 +-----------+
19 |sizemask=0 |
20 +-----------+
21 |used=0 |
22 +-----------+
23 */
同时得到其在ht[0]的idx = 0,且不在rehashing操作中,于是直接插入
1 /*
2 +----+
3 +->|K1|V|->NULL
4 +------------+ /+-----------+ +->+----------+ / +----+
5 |dictType* | / |dictEntry**|--+ |dictEntry*|/
6 +------------+ / +-----------+ +----------+ +--->NULL
7 |privdata | / |size=4 | |dictEntry*|/
8 +------------+/ +-----------+ +----------+
9 |ht[2] | |sizemask=3 | |dictEntry*|\
10 +------------+\ +-----------+ +----------+ +--->NULL
11 |rehashidx=-1| \ |used=1 | |dictEntry*|\
12 +------------+ \ +-----------+ +----------+ \
13 |iterators=0 | \ +-->NULL
14 +------------+ \+-----------+
15 |dictEntry**|-->NULL
16 +-----------+
17 |size=0 |
18 +-----------+
19 |sizemask=0 |
20 +-----------+
21 |used=0 |
22 +-----------+
23 */
4.4.2 依次插入K2、K3、K4后
1 /*
2 +----+
3 +->|K1|V|->NULL
4 +------------+ /+-----------+ +->+----------+ / +----+
5 |dictType* | / |dictEntry**|--+ |dictEntry*|/ +-----
6 +------------+ / +-----------+ +----------+ +-->|K2|V|->NULL
7 |privdata | / |size=4 | |dictEntry*|/ +----+
8 +------------+/ +-----------+ +----------+
9 |ht[2] | |sizemask=3 | |dictEntry*|\ +----+
10 +------------+\ +-----------+ +----------+ +-->|K3|V|->NULL
11 |rehashidx=-1| \ |used=4 | |dictEntry*|\ +----+
12 +------------+ \ +-----------+ +----------+ \ +----+
13 |iterators=0 | \ +->|K4|V|->NULL
14 +------------+ \+-----------+ +----+
15 |dictEntry**|-->NULL
16 +-----------+
17 |size=0 |
18 +-----------+
19 |sizemask=0 |
20 +-----------+
21 |used=0 |
22 +-----------+
23 */
4.4.3 此时若有一个K5
计算出来的hash值为8,则:
i.因此刻不在rehashing操作,所以不用做处理
ii.执行完dictAddRaw中的_dictKeyIndex里的_dictExpandIfNeeded:
1 /*
2 +----+
3 +->|K1|V|->NULL
4 +->+----------+ / +----+
5 | |dictEntry*|/ +----+
6 | +----------+ +-->|K2|V|->NULL
7 | |dictEntry*|/ +----+
8 +------------+ /+-----------+ | +----------+
9 |dictType* | / |dictEntry**|--+ |dictEntry*|\ +----+
10 +------------+ / +-----------+ +----------+ +-->|K3|V|->NULL
11 |privdata | / |size=4 | |dictEntry*|\ +----+
12 +------------+/ +-----------+ +----------+ \ +----+
13 |ht[2] | |sizemask=3 | +->|K4|V|->NULL
14 +------------+\ +-----------+ +----+
15 |rehashidx=0 | \ |used=4 |
16 +------------+ \ +-----------+
17 |iterators=0 | \
18 +------------+ \+-----------+ +->+----------+
19 |dictEntry**|--+ |dictEntry*|->NULL
20 +-----------+ +----------+
21 |size=8 | |dictEntry*|->NULL
22 +-----------+ +----------+
23 |sizemask=7 | |dictEntry*|->NULL
24 +-----------+ +----------+
25 |used=0 | |dictEntry*|->NULL
26 +-----------+ +----------+
27 |dictEntry*|->NULL
28 +----------+
29 |dictEntry*|->NULL
30 +----------+
31 |dictEntry*|->NULL
32 +----------+
33 |dictEntry*|->NULL
34 +----------+
35 */
同时得到其在ht[1]的idx=0
iii.插入,因为此时已经满足的了扩容的条件,所以正在处于rehashing过程中,所以将元素插入ht[1]对应的哈希表。通常情况下,如果不是处于rehashing过程中,就会将元素插入ht[0]对应的哈希表
1 /*
2 +----+
3 +->|K1|V|->NULL
4 +->+----------+ / +----+
5 | |dictEntry*|/ +----+
6 | +----------+ +-->|K2|V|->NULL
7 | |dictEntry*|/ +----+
8 +------------+ /+-----------+ | +----------+
9 |dictType* | / |dictEntry**|--+ |dictEntry*|\ +----+
10 +------------+ / +-----------+ +----------+ +-->|K3|V|->NULL
11 |privdata | / |size=4 | |dictEntry*|\ +----+
12 +------------+/ +-----------+ +----------+ \ +----+
13 |ht[2] | |sizemask=3 | +->|K4|V|->NULL
14 +------------+\ +-----------+ +----+
15 |rehashidx=0 | \ |used=4 |
16 +------------+ \ +-----------+
17 |iterators=0 | \
18 +------------+ \+-----------+ +->+----------+ +----+
19 |dictEntry**|--+ |dictEntry*|-->|K5|V|->NULL
20 +-----------+ +----------+ +----+
21 |size=8 | |dictEntry*|->NULL
22 +-----------+ +----------+
23 |sizemask=7 | |dictEntry*|->NULL
24 +-----------+ +----------+
25 |used=1 | |dictEntry*|->NULL
26 +-----------+ +----------+
27 |dictEntry*|->NULL
28 +----------+
29 |dictEntry*|->NULL
30 +----------+
31 |dictEntry*|->NULL
32 +----------+
33 |dictEntry*|->NULL
34 +----------+
35 */
4.4.4 此时若有一个K6
计算出来的hash值为16,则:
i.此时已处理rehashing操作,执行一步,将h[0]中的k1|v移入h[1]中:
1 /*
2
3 +-->NULL
4 +->+----------+ /
5 | |dictEntry*|/ +----+
6 | +----------+ +-->|K2|V|->NULL
7 | |dictEntry*|/ +----+
8 +------------+ /+-----------+ | +----------+
9 |dictType* | / |dictEntry**|--+ |dictEntry*|\ +----+
10 +------------+ / +-----------+ +----------+ +-->|K3|V|->NULL
11 |privdata | / |size=4 | |dictEntry*|\ +----+
12 +------------+/ +-----------+ +----------+ \ +----+
13 |ht[2] | |sizemask=3 | +->|K4|V|->NULL
14 +------------+\ +-----------+ +----+
15 |rehashidx=1 | \ |used=3 |
16 +------------+ \ +-----------+
17 |iterators=0 | \
18 +------------+ \+-----------+ +->+----------+ +----+ +----+
19 |dictEntry**|--+ |dictEntry*|-->|K1|V|->|K5|V|->NULL
20 +-----------+ +----------+ +----+ +----+
21 |size=8 | |dictEntry*|->NULL
22 +-----------+ +----------+
23 |sizemask=7 | |dictEntry*|->NULL
24 +-----------+ +----------+
25 |used=2 | |dictEntry*|->NULL
26 +-----------+ +----------+
27 |dictEntry*|->NULL
28 +----------+
29 |dictEntry*|->NULL
30 +----------+
31 |dictEntry*|->NULL
32 +----------+
33 |dictEntry*|->NULL
34 +----------+
35 */
ii.执行完dictAddRaw中的_dictKeyIndex里的_dictExpandIfNeeded,因已在进行rehashing,所以不做任何处理,只返回其在ht[1]的idx 0
iii.头插法将K6插入
1 /*
2
3 +-->NULL
4 +->+----------+ /
5 | |dictEntry*|/ +----+
6 | +----------+ +-->|K2|V|->NULL
7 | |dictEntry*|/ +----+
8 +------------+ /+-----------+ | +----------+
9 |dictType* | / |dictEntry**|--+ |dictEntry*|\ +----+
10 +------------+ / +-----------+ +----------+ +-->|K3|V|->NULL
11 |privdata | / |size=4 | |dictEntry*|\ +----+
12 +------------+/ +-----------+ +----------+ \ +----+
13 |ht[2] | |sizemask=3 | +->|K4|V|->NULL
14 +------------+\ +-----------+ +----+
15 |rehashidx=1 | \ |used=3 |
16 +------------+ \ +-----------+
17 |iterators=0 | \
18 +------------+ \+-----------+ +->+----------+ +----+ +----+ +----+
19 |dictEntry**|--+ |dictEntry*|-->|K6|V|->|K1|V|->|K5|V|->NULL
20 +-----------+ +----------+ +----+ +----+ +----+
21 |size=8 | |dictEntry*|->NULL
22 +-----------+ +----------+
23 |sizemask=7 | |dictEntry*|->NULL
24 +-----------+ +----------+
25 |used=3 | |dictEntry*|->NULL
26 +-----------+ +----------+
27 |dictEntry*|->NULL
28 +----------+
29 |dictEntry*|->NULL
30 +----------+
31 |dictEntry*|->NULL
32 +----------+
33 |dictEntry*|->NULL
34 +----------+
35 */
以上为正常插入时的情况,key已存在,或是调用另外两个方法的情况与之大同小异,有时间的时候再补充。
五、查找
5.1 dicFind函数
返回找到的dictEntry
1 /**
2 * 若在rehashing期间,则执行一次。首先在ht[0]里查找,计算出hash值对应ht[0]的idx,取得其bucket,
3 * 然后遍历之,找到与指定key相同的dictEntry。若ht[0]中找不到指定的key,且正在进行rehashing操作,
4 * 则去ht[1]以相同方式也查找一次。
5 */
6 dictEntry *dictFind(dict *d, const void *key)
7 {
8 dictEntry *he;
9 uint64_t h, idx, table;
10
11 if (d->ht[0].used + d->ht[1].used == 0) return NULL; /* dict is empty 如果哈希表为空直接返回NULL*/
12 if (dictIsRehashing(d)) _dictRehashStep(d); //是否处于rehashing操作中
13 h = dictHashKey(d, key);
14 for (table = 0; table <= 1; table++) {
15 idx = h & d->ht[table].sizemask;
16 he = d->ht[table].table[idx];
17 while(he) {
18 if (key==he->key || dictCompareKeys(d, key, he->key))
19 return he;
20 he = he->next;
21 }
22 /**
23 * 如果不是处于rehashing操作中,就不需要遍历ht[1],因为rehashing结束后,会将整个h[1]赋值给h[0],
24 * 然后将h[1]置为NULL,也就是h[1]不再存在任何人元素
25 */
26 if (!dictIsRehashing(d)) return NULL;
27 }
28 return NULL;
29 }
5.2 dictFetchValue函数
redis额外提供一个,根据key只获取其value的方法,返回找的value值。
1 /**
2 * redis额外提供一个,根据key只获取其value的方法:key不存在时返回NULL,实际上调用的还是dictFind
3 */
4 void *dictFetchValue(dict *d, const void *key) {
5 dictEntry *he;
6
7 he = dictFind(d,key);
8 return he ? dictGetVal(he) : NULL;
9 }
六、删除
6.1 底层删除函数
1 /**
2 * Search and remove an element. This is an helper function for
3 * dictDelete() and dictUnlink(), please check the top comment
4 * of those functions.
5 * 查找方式与dictFind相同。找到之后,由调用者指定是否要销毁此dictEntry,
6 * 若不销毁,则要把对应指针传出来,给外部使用。
7 */
8 static dictEntry *dictGenericDelete(dict *d, const void *key, int nofree) {
9 uint64_t h, idx;
10 dictEntry *he, *prevHe;
11 int table;
12
13 if (d->ht[0].used == 0 && d->ht[1].used == 0) return NULL;
14
15 if (dictIsRehashing(d)) _dictRehashStep(d);
16 h = dictHashKey(d, key);
17
18 for (table = 0; table <= 1; table++) {
19 idx = h & d->ht[table].sizemask;
20 he = d->ht[table].table[idx];
21 prevHe = NULL;
22 while(he) {
23 if (key==he->key || dictCompareKeys(d, key, he->key)) {
24 /* Unlink the element from the list */
25 if (prevHe)
26 prevHe->next = he->next;
27 else
28 d->ht[table].table[idx] = he->next;
29 if (!nofree) {
30 //需要销毁
31 dictFreeKey(d, he);
32 dictFreeVal(d, he);
33 zfree(he);
34 }
35 /**
36 * 为什么无论是否释放该元素都需要将元素个数减少1,因为查找到的元素是一定会被删除的元素,
37 * 即使在这里不进行释放,后续也会调用dictFreeUnlinkedEntry函数将其释放掉,之所以右这样的一个操作,
38 * 是因为某些情况下,在释放该元素之前,可能还会利用该元素进行一些必要的操作,
39 * 所以通过这样的一种特殊删除方式来满足这种需求。
40 */
41 d->ht[table].used--;
42 return he;
43 }
44 prevHe = he;
45 he = he->next;
46 }
47 if (!dictIsRehashing(d)) break;
48 }
49 return NULL; /* not found */
50 }
查找方式与dictFind相同。找到之后,由调用者指定是否要销毁此dictEntry,若不销毁,则要把对应指针传出来,给外部使用。
6.2 上层删除函数
6.1中的方法被两个接口所调用:
1 /**
2 * Remove an element, returning DICT_OK on success or DICT_ERR if the
3 * element was not found.
4 * 删除一个元素,如果成功删除返回DICT_OK,如果不存在,返回DICT_ERR
5 */
6 int dictDelete(dict *ht, const void *key) {
7 return dictGenericDelete(ht,key,0) ? DICT_OK : DICT_ERR;
8 }
9
10 /* Remove an element from the table, but without actually releasing
11 * the key, value and dictionary entry. The dictionary entry is returned
12 * if the element was found (and unlinked from the table), and the user
13 * should later call `dictFreeUnlinkedEntry()` with it in order to release it.
14 * Otherwise if the key is not found, NULL is returned.
15 *
16 * This function is useful when we want to remove something from the hash
17 * table but want to use its value before actually deleting the entry.
18 * Without this function the pattern would require two lookups:
19 *
20 * entry = dictFind(...);
21 * // Do something with entry
22 * dictDelete(dictionary,entry);
23 *
24 * Thanks to this function it is possible to avoid this, and use
25 * instead:
26 *
27 * entry = dictUnlink(dictionary,entry);
28 * // Do something with entry
29 * dictFreeUnlinkedEntry(entry); // <- This does not need to lookup again.
30 *
31 * dictDelete就不用多说了,直接删除对应dictEntry。关于为什么需要dictUnlink,源码的注释上写道,
32 * 如果有某种操作,需要先查找指定key对应的dictEntry,然后对其做点操作,接着就直接删除,在没有dictUnlink的时候,
33 * 需要这样:
34 * 1 entry = dictFind(...);
35 * 2 // Do something with entry
36 * 3 dictDelete(dictionary,entry);
37 * 实际需要查找两次。而在有dictUnlink的情况下:
38 * 1 entry = dictUnlink(dictionary,entry);
39 * 2 // Do something with entry
40 * 3 dictFreeUnlinkedEntry(entry);
41 * 只需要一次查找,配合专门的删除操作,即可。
42 */
43 dictEntry *dictUnlink(dict *ht, const void *key) {
44 return dictGenericDelete(ht,key,1);
45 }
46
47 /**
48 * You need to call this function to really free the entry after a call
49 * to dictUnlink(). It's safe to call this function with 'he' = NULL.
50 * 配合dictUnlink函数,对dictUnlink的返回结果进行释放
51 */
52 void dictFreeUnlinkedEntry(dict *d, dictEntry *he) {
53 if (he == NULL) return;
54 dictFreeKey(d, he);
55 dictFreeVal(d, he);
56 zfree(he);
57 }
七、销毁
清空一个hash table的方法
1 /**
2 * Destroy an entire dictionary
3 * 销毁整个哈希表
4 * 两层循环,分别遍历所有bucket与单bucket里所有dictEntry进行释放。
5 * 关于这里的 (i&65535) == 0的判断,_dictClear方法仅在相同文件的方法dictEmpty与dictRelease调用
6 */
7 int _dictClear(dict *d, dictht *ht, void(callback)(void *)) {
8 unsigned long i;
9
10 /* Free all the elements */
11 for (i = 0; i < ht->size && ht->used > 0; i++) {
12 dictEntry *he, *nextHe;
13
14 if (callback && (i & 65535) == 0) callback(d->privdata);
15
16 if ((he = ht->table[i]) == NULL) continue;
17 while(he) {
18 nextHe = he->next;
19 dictFreeKey(d, he);
20 dictFreeVal(d, he);
21 zfree(he);
22 ht->used--;
23 he = nextHe;
24 }
25 }
26 /* Free the table and the allocated cache structure */
27 zfree(ht->table);
28 /* Re-initialize the table */
29 _dictReset(ht);
30 return DICT_OK; /* never fails */
31 }
用户使用的API函数dictEmpty与dictRelease
1 /**
2 * 迭代器销毁
3 * 与首次执行next操作相对应,若为safe的迭代器,要给dict的计算减1,否则要校验期间dict的指纹是否发生了变化。
4 */
5 void dictReleaseIterator(dictIterator *iter)
6 {
7 if (!(iter->index == -1 && iter->table == 0)) {
8 if (iter->safe)
9 iter->d->iterators--;
10 else
11 assert(iter->fingerprint == dictFingerprint(iter->d));
12 }
13 zfree(iter);
14 }
15
16 /**
17 * 释放所有元素,将表恢复为初始化状态,也就是刚刚创建的状态
18 * 第二个参数传入的NULL
19 */
20 void dictEmpty(dict *d, void(callback)(void*)) {
21 _dictClear(d,&d->ht[0],callback);
22 _dictClear(d,&d->ht[1],callback);
23 d->rehashidx = -1;
24 d->iterators = 0;
25 }
dictRelease不用多说,传入的callback为NULL。而dictEmpty,搜索redis源码所有文件的调用,
1 匹配到二进制文件 src/redis-check-aof
2 src/replication.c: dictEmpty(server.repl_scriptcache_dict,NULL);
3 src/dict.h:void dictEmpty(dict *d, void(callback)(void*));
4 匹配到二进制文件 src/redis-cli
5 匹配到二进制文件 src/dict.o
6 src/dict.c:void dictEmpty(dict *d, void(callback)(void*)) {
7 匹配到二进制文件 src/sentinel.o
8 src/db.c: dictEmpty(server.db[j].dict,callback);
9 src/db.c: dictEmpty(server.db[j].expires,callback);
10 匹配到二进制文件 src/redis-sentinel
11 匹配到二进制文件 src/redis-check-rdb
12 匹配到二进制文件 src/replication.o
13 src/sentinel.c: dictEmpty(server.commands,NULL);
14 匹配到二进制文件 src/db.o
15 匹配到二进制文件 src/blocked.o
16 src/blocked.c: dictEmpty(c->bpop.keys,NULL);
17 匹配到二进制文件 src/redis-server
仅db.c里传了callback进来,对应的方法为
1 long long emptyDb(int dbnum, int flags, void(callback)(void*));
继续搜索emptyDb
1 cluster.c: emptyDb(-1,EMPTYDB_NO_FLAGS,NULL);
2 匹配到二进制文件 cluster.o
3 db.c:long long emptyDb(int dbnum, int flags, void(callback)(void*)) {
4 db.c: emptyDbAsync(&server.db[j]);
5 db.c:/* Return the set of flags to use for the emptyDb() call for FLUSHALL
6 db.c: server.dirty += emptyDb(c->db->id,flags,NULL);
7 db.c: server.dirty += emptyDb(-1,flags,NULL);
8 匹配到二进制文件 db.o
9 debug.c: emptyDb(-1,EMPTYDB_NO_FLAGS,NULL);
10 debug.c: emptyDb(-1,EMPTYDB_NO_FLAGS,NULL);
11 匹配到二进制文件 debug.o
12 lazyfree.c:void emptyDbAsync(redisDb *db) {
13 匹配到二进制文件 lazyfree.o
14 匹配到二进制文件 redis-check-aof
15 匹配到二进制文件 redis-check-rdb
16 匹配到二进制文件 redis-sentinel
17 匹配到二进制文件 redis-server
18 replication.c: * data with emptyDb(), and while we load the new data received as an
19 replication.c:/* Callback used by emptyDb() while flushing away old data to load
20 replication.c: emptyDb(
21 匹配到二进制文件 replication.o
22 server.h:long long emptyDb(int dbnum, int flags, void(callback)(void*));
23 server.h:void emptyDbAsync(redisDb *db);
真正调用的地方传入的也是NULL,并不知道为什么这样设计
八、迭代器
8.1 数据结构
1 /**
2 * If safe is set to 1 this is a safe iterator, that means, you can call
3 * dictAdd, dictFind, and other functions against the dictionary even while
4 * iterating. Otherwise it is a non safe iterator, and only dictNext()
5 * should be called while iterating.
6 * 如果是个安全的迭代器,即safe == 1,则在迭代中可以调用dictAdd、dictFind等方法,否则只能调用dictNext。
7 * index表示,ht[table]对应的bucket的idx。
8 */
9 typedef struct dictIterator {
10 dict *d;
11 long index;
12 int table, safe;
13 dictEntry *entry, *nextEntry;
14 /* unsafe iterator fingerprint for misuse detection. */
15 long long fingerprint;
16 } dictIterator;
8.2 获取迭代器
1 //获取普通类型迭代器,刚获取的迭代器并不指向具体哪个dictEntry
2 dictIterator *dictGetIterator(dict *d)
3 {
4 dictIterator *iter = zmalloc(sizeof(*iter));
5
6 iter->d = d;
7 iter->table = 0;
8 iter->index = -1;
9 iter->safe = 0;
10 iter->entry = NULL;
11 iter->nextEntry = NULL;
12 return iter;
13 }
14
15 //获取安全类型的迭代器
16 dictIterator *dictGetSafeIterator(dict *d) {
17 dictIterator *i = dictGetIterator(d);
18
19 i->safe = 1;
20 return i;
21 }
8.3 迭代器的next操作
1 /**
2 * 对于一个新的迭代器,首次调用时,会根据是否安全,做不同操作。安全的迭代器会给dict里的计数器+1,
3 * 不安全的将会记录本字典的指纹。之后会遍历ht[0],取到第一个非NULL的dictEntry。
4 * 当ht[0]遍历完且取不到非NULL的dictEntry时,如果正在进行rehashing操作,则会去ht[1]里找。
5 */
6 dictEntry *dictNext(dictIterator *iter)
7 {
8 while (1) {
9 if (iter->entry == NULL) {
10 dictht *ht = &iter->d->ht[iter->table];
11 if (iter->index == -1 && iter->table == 0) {
12 if (iter->safe)
13 iter->d->iterators++;
14 else
15 iter->fingerprint = dictFingerprint(iter->d);
16 }
17 iter->index++;
18 if (iter->index >= (long) ht->size) {
19 if (dictIsRehashing(iter->d) && iter->table == 0) {
20 iter->table++;
21 iter->index = 0;
22 ht = &iter->d->ht[1];
23 } else {
24 break;
25 }
26 }
27 iter->entry = ht->table[iter->index];
28 } else {
29 iter->entry = iter->nextEntry;
30 }
31 if (iter->entry) {
32 /* We need to save the 'next' here, the iterator user
33 * may delete the entry we are returning. */
34 iter->nextEntry = iter->entry->next;
35 return iter->entry;
36 }
37 }
38 return NULL;
39 }
8.4 遍历过程
1 /*
2
3 +-------------------------+
4 +----|dict * |
5 | +-------------------------+
6 | |long index |
7 | +-------------------------+
8 | |int table |
9 | +-------------------------+
10 | |int safe |
11 | +-------------------------+
12 | |dictEntry *entry |->NULL
13 | +-------------------------+
14 | |dictEntry *entrynextEntry|->NULL
15 | +-------------------------+
16 | |long long fingerprint |
17 | +-------------------------+
18 |
19 |
20 |
21 | +-->NULL
22 | +->+----------+ /
23 | | |dictEntry*|/ +----+
24 | | +----------+ +-->|K2|V|->NULL
25 | | |dictEntry*|/ +----+
26 +--->+------------+ /+-----------+ | +----------+
27 |dictType* | / |dictEntry**|--+ |dictEntry*|\ +----+
28 +------------+ / +-----------+ +----------+ +-->|K3|V|->NULL
29 |privdata | / |size=4 | |dictEntry*|\ +----+
30 +------------+/ +-----------+ +----------+ \ +----+
31 |ht[2] | |sizemask=3 | +->|K4|V|->NULL
32 +------------+\ +-----------+ +----+
33 |rehashidx=1 | \ |used=3 |
34 +------------+ \ +-----------+
35 |iterators=0 | \
36 +------------+ \+-----------+ +->+----------+ +----+ +----+
37 |dictEntry**|--+ |dictEntry*|-->|K1|V|->|K5|V|->NULL
38 +-----------+ +----------+ +----+ +----+
39 |size=8 | |dictEntry*|->NULL
40 +-----------+ +----------+
41 |sizemask=7 | |dictEntry*|->NULL
42 +-----------+ +----------+
43 |used=3 | |dictEntry*|->NULL
44 +-----------+ +----------+
45 |dictEntry*|->NULL
46 +----------+
47 |dictEntry*|->NULL
48 +----------+
49 |dictEntry*|->NULL
50 +----------+
51 |dictEntry*|->NULL
52 +----------+
53 */
遍历顺序为,K2,K3,K4,K1,K5。
8.5 迭代器销毁
1 /**
2 * 迭代器销毁
3 * 与首次执行next操作相对应,若为safe的迭代器,要给dict的计算减1,否则要校验期间dict的指纹是否发生了变化。
4 */
5 void dictReleaseIterator(dictIterator *iter)
6 {
7 if (!(iter->index == -1 && iter->table == 0)) {
8 if (iter->safe)
9 iter->d->iterators--;
10 else
11 assert(iter->fingerprint == dictFingerprint(iter->d));
12 }
13 zfree(iter);
14 }
8.6 普通迭代器的指纹
1 /**
2 * A fingerprint is a 64 bit number that represents the state of the dictionary
3 * at a given time, it's just a few dict properties xored together.
4 *
5 * 指纹是一个 64 位的数字,代表字典在给定时间的状态,它只是几个字典属性异或在一起。
6 *
7 * When an unsafe iterator is initialized, we get the dict fingerprint, and check
8 * the fingerprint again when the iterator is released.
9 *
10 * 当一个不安全的迭代器被初始化时,我们得到dict指纹,当迭代器被释放时再次检查指纹。
11 *
12 * If the two fingerprints are different it means that the user of the iterator
13 * performed forbidden operations against the dictionary while iterating.
14 *
15 * 如果两个指纹不同,则表示迭代器的用户在迭代时对字典执行了禁止操作。
16 *
17 * 指纹的计算
18 *
19 * 对于不安全的迭代器,在迭代过程中,不允许执行任何修改dict的操作,是只读的,不会发生迭代器失效的问题。
20 * 对于安全的迭代器,在进行操作本节点的时候,redis中记录了当前迭代的bucket idx,以及当前dictEntry的next节点。
21 * 如果只是add操作,即使是用了头插法把新dictEntry插在本节点之前,对迭代器本身并没有影响。
22 * 如果是delete了本节点,迭代器中还记录了next节点的位置,调用next时直接取就好。
23 * 如果next为空,则可以认为当前bucket遍历完了,取下一个bucket就行了。
24 * 当然,如果在add/delete等操作的时候,进行了rehashing操作,那么当前迭代器里记录的next,在rehashing之后,
25 * 可能就不是当前节点新位置的next了。所以在使用安全迭代器的时候,禁止了rehashing操作。
26 */
27 long long dictFingerprint(dict *d) {
28 long long integers[6], hash = 0;
29 int j;
30
31 integers[0] = (long) d->ht[0].table;
32 integers[1] = d->ht[0].size;
33 integers[2] = d->ht[0].used;
34 integers[3] = (long) d->ht[1].table;
35 integers[4] = d->ht[1].size;
36 integers[5] = d->ht[1].used;
37
38 /* We hash N integers by summing every successive integer with the integer
39 * hashing of the previous sum. Basically:
40 *
41 * Result = hash(hash(hash(int1)+int2)+int3) ...
42 *
43 * This way the same set of integers in a different order will (likely) hash
44 * to a different number. */
45 for (j = 0; j < 6; j++) {
46 hash += integers[j];
47 /* For the hashing step we use Tomas Wang's 64 bit integer hash. */
48 hash = (~hash) + (hash << 21); // hash = (hash << 21) - hash - 1;
49 hash = hash ^ (hash >> 24);
50 hash = (hash + (hash << 3)) + (hash << 8); // hash * 265
51 hash = hash ^ (hash >> 14);
52 hash = (hash + (hash << 2)) + (hash << 4); // hash * 21
53 hash = hash ^ (hash >> 28);
54 hash = hash + (hash << 31);
55 }
56 return hash;
57 }
九、其它操作
dict还支持其它的一些操作。
9.1、随机获取一个key
dictGetRandomKey
9.2 随机获取n个key
dictGetSomeKeys
9.3 scan操作
关于scan操作,redis采用了一个很巧妙的方法,保证了在开始scan时未删除的元素一定能遍历到,又能保证尽量少地重复遍历。采用了reverse binary iteration方法,也就是每次是向cursor的最高位加1,并向低位方向进位。
这就是该算法的精妙所在,使用该算法,可以做到下面两点:
a:开始遍历那一刻的所有元素,只要不被删除,肯定能被遍历到,不管字典扩展还是缩小;
b:该算法可能会返回重复元素,但是已经把返回重复元素的可能性降到了最低;
参考网址:
https://blog.csdn.net/gqtcgq/article/details/50533336
https://github.com/redis/redis/pull/579
9.3.1 函数原型
1 /* dictScan() is used to iterate over the elements of a dictionary.
2 *
3 * Iterating works the following way:
4 * 迭代器的工作方式如下
5 *
6 * 1) Initially you call the function using a cursor (v) value of 0.
7 * 初始访问位置为0
8 * 2) The function performs one step of the iteration, and returns the
9 * new cursor value you must use in the next call.
10 * 访问当前位置的元素,并返回下一次访问的cursor的位置(实际上就是二进制高位+1,然后如果必要的话向低位进位)
11 * 3) When the returned cursor is 0, the iteration is complete.
12 * 当返回的下一个访问位置是0,表示整个哈希表遍历结束
13 *
14 * The function guarantees all elements present in the
15 * dictionary get returned between the start and end of the iteration.
16 * However it is possible some elements get returned multiple times.
17 * 这个函数保证了从开始遍历那一刻开始哈希表中的所有元素,只要不被删除,肯定能被遍历到,不管字典扩展还是缩小
18 * 但是有的元素可能会被遍历多次,但是已经把返回重复元素的可能性降到了最低;
19 *
20 * For every element returned, the callback argument 'fn' is
21 * called with 'privdata' as first argument and the dictionary entry
22 * 'de' as second argument.
23 * 对于每一个遍历到的元素,都是使用fn函数进行处理,fn的第一个参数是privdata,第二个参数是遍历到的元素de
24 *
25 * HOW IT WORKS.
26 *
27 * The iteration algorithm was designed by Pieter Noordhuis.
28 * The main idea is to increment a cursor starting from the higher order
29 * bits. That is, instead of incrementing the cursor normally, the bits
30 * of the cursor are reversed, then the cursor is incremented, and finally
31 * the bits are reversed again.
32 *
33 * This strategy is needed because the hash table may be resized between
34 * iteration calls.
35 *
36 * dict.c hash tables are always power of two in size, and they
37 * use chaining, so the position of an element in a given table is given
38 * by computing the bitwise AND between Hash(key) and SIZE-1
39 * (where SIZE-1 is always the mask that is equivalent to taking the rest
40 * of the division between the Hash of the key and SIZE).
41 *
42 * For example if the current hash table size is 16, the mask is
43 * (in binary) 1111. The position of a key in the hash table will always be
44 * the last four bits of the hash output, and so forth.
45 *
46 * WHAT HAPPENS IF THE TABLE CHANGES IN SIZE?
47 *
48 * If the hash table grows, elements can go anywhere in one multiple of
49 * the old bucket: for example let's say we already iterated with
50 * a 4 bit cursor 1100 (the mask is 1111 because hash table size = 16).
51 *
52 * If the hash table will be resized to 64 elements, then the new mask will
53 * be 111111. The new buckets you obtain by substituting in ??1100
54 * with either 0 or 1 can be targeted only by keys we already visited
55 * when scanning the bucket 1100 in the smaller hash table.
56 *
57 * By iterating the higher bits first, because of the inverted counter, the
58 * cursor does not need to restart if the table size gets bigger. It will
59 * continue iterating using cursors without '1100' at the end, and also
60 * without any other combination of the final 4 bits already explored.
61 *
62 * Similarly when the table size shrinks over time, for example going from
63 * 16 to 8, if a combination of the lower three bits (the mask for size 8
64 * is 111) were already completely explored, it would not be visited again
65 * because we are sure we tried, for example, both 0111 and 1111 (all the
66 * variations of the higher bit) so we don't need to test it again.
67 *
68 * WAIT... YOU HAVE *TWO* TABLES DURING REHASHING!
69 *
70 * Yes, this is true, but we always iterate the smaller table first, then
71 * we test all the expansions of the current cursor into the larger
72 * table. For example if the current cursor is 101 and we also have a
73 * larger table of size 16, we also test (0)101 and (1)101 inside the larger
74 * table. This reduces the problem back to having only one table, where
75 * the larger one, if it exists, is just an expansion of the smaller one.
76 *
77 * LIMITATIONS
78 *
79 * This iterator is completely stateless, and this is a huge advantage,
80 * including no additional memory used.
81 *
82 * The disadvantages resulting from this design are:
83 *
84 * 1) It is possible we return elements more than once. However this is usually
85 * easy to deal with in the application level.
86 * 2) The iterator must return multiple elements per call, as it needs to always
87 * return all the keys chained in a given bucket, and all the expansions, so
88 * we are sure we don't miss keys moving during rehashing.
89 * 3) The reverse cursor is somewhat hard to understand at first, but this
90 * comment is supposed to help.
91 */
92 unsigned long dictScan(dict *d,
93 unsigned long v,
94 dictScanFunction *fn,
95 dictScanBucketFunction* bucketfn,
96 void *privdata)
97 {
98 dictht *t0, *t1;
99 const dictEntry *de, *next;
100 unsigned long m0, m1;
101
102 if (dictSize(d) == 0) return 0;
103
104 if (!dictIsRehashing(d)) {
105 /**
106 * 如果不是处于rehash过程中,只需要遍历哈希表ht[0],然后使用用户定义的函数fn对齐进行处理
107 */
108 t0 = &(d->ht[0]);
109 m0 = t0->sizemask;
110
111 /* Emit entries at cursor */
112 if (bucketfn) bucketfn(privdata, &t0->table[v & m0]);
113 de = t0->table[v & m0];
114 while (de) {
115 next = de->next;
116 fn(privdata, de);
117 de = next;
118 }
119
120 /* Set unmasked bits so incrementing the reversed cursor
121 * operates on the masked bits */
122 v |= ~m0;
123
124 /* Increment the reverse cursor */
125 v = rev(v);
126 v++;
127 v = rev(v);
128
129 } else {
130 /**
131 * 如果处于rehash过程中,先遍历较小的哈希表,再遍历较大的哈希表,同时使用用户定义的函数fn对齐进行处理
132 */
133 t0 = &d->ht[0];
134 t1 = &d->ht[1];
135
136 /* Make sure t0 is the smaller and t1 is the bigger table 根据哈希表的大小调整遍历顺序*/
137 if (t0->size > t1->size) {
138 t0 = &d->ht[1];
139 t1 = &d->ht[0];
140 }
141
142 //获取两张哈希表的掩码
143 m0 = t0->sizemask;
144 m1 = t1->sizemask;
145
146 /* Emit entries at cursor */
147 if (bucketfn) bucketfn(privdata, &t0->table[v & m0]);
148 //根据v&m0,找到t0中需要迭代的bucket,然后迭代其中的每个节点即可。
149 de = t0->table[v & m0];
150 while (de) {
151 next = de->next;
152 fn(privdata, de);
153 de = next;
154 }
155
156 /**
157 * Iterate over indices in larger table that are the expansion
158 * of the index pointed to by the cursor in the smaller table
159 * 接下来的代码稍显复杂,但是,本质上,就是t0中,索引为v&m0的bucket中的所有节点,
160 * 再其扩展到t1中后,遍历其所有可能的bucket中的节点。语言不好描述,
161 * 举个例子就明白了:若t0长度为8,则m0为111,v&m0就是保留v的低三位,假设为abc。
162 * 若t1长度为32,则m1为11111,该过程就是:遍历完t0中索引为abc的bucket之后,接着遍历t1中,
163 * 索引为00abc、01abc、10abc、11abc的bucket中的节点。
164 */
165 do {
166 /* Emit entries at cursor */
167 if (bucketfn) bucketfn(privdata, &t1->table[v & m1]);
168 de = t1->table[v & m1];
169 while (de) {
170 next = de->next;
171 fn(privdata, de);
172 de = next;
173 }
174
175 /* Increment the reverse cursor not covered by the smaller mask.*/
176 v |= ~m1;
177 v = rev(v);
178 v++;
179 v = rev(v);
180
181 /* Continue while bits covered by mask difference is non-zero */
182 } while (v & (m0 ^ m1));
183 }
184
185 //返回下一个需要遍历的位置
186 return v;
187 }
9.3.2 核心算法测试测试
下面是抽取核心代码的逻辑而写的测试代码
1 #include <iostream>
2 #include <vector>
3
4 using namespace std;
5
6 static unsigned long rev(unsigned long v) {
7 unsigned long s = 8 * sizeof(v); // bit size; must be power of 2
8 unsigned long mask = ~0;
9 while ((s >>= 1) > 0) {
10 mask ^= (mask << s);
11 v = ((v >> s) & mask) | ((v << s) & ~mask);
12 }
13 return v;
14 }
15
16 void printbits(int n, int x)
17 {
18 vector<int> reg;
19 int bit = 1;
20 while (x)
21 {
22 reg.push_back((n & bit) ? 1 : 0);
23 bit *= 2;
24 x--;
25 }
26 for (int i = reg.size() - 1; i >= 0; i--)
27 cout << reg[i];
28 }
29
30 void test_dictScan_iter(int smalltablesize, int largetablesize)
31 {
32 unsigned long v;
33 unsigned long m0, m1;
34
35 v = 0;
36 m0 = smalltablesize - 1;
37 m1 = largetablesize - 1;
38
39 do
40 {
41 printf("\nsmall v is: ");
42 printbits(v & m0, (int)log2(smalltablesize));
43 printf("\n");
44 int vt = v;
45
46 do
47 {
48 printf("large v is: ");
49 printbits(vt & m1, (int)log2(largetablesize));
50 printf("\n");
51
52 vt |= ~m1;
53 vt = rev(vt);
54 vt++;
55 vt = rev(vt);
56
57 //v = (((v | m0) + 1) & ~m0) | (v & m0);
58 } while (vt & (m0 ^ m1));
59
60 v |= ~m0;
61 v = rev(v);
62 v++;
63 v = rev(v);
64 } while (v != 0);
65 }
66
67 int main()
68 {
69 test_dictScan_iter(8, 32);
70 return 0;
71 }
测试结果
1 small v is: 000
2 large v is: 00000
3 large v is: 01000
4 large v is: 10000
5 large v is: 11000
6
7 small v is: 100
8 large v is: 00100
9 large v is: 01100
10 large v is: 10100
11 large v is: 11100
12
13 small v is: 010
14 large v is: 00010
15 large v is: 01010
16 large v is: 10010
17 large v is: 11010
18
19 small v is: 110
20 large v is: 00110
21 large v is: 01110
22 large v is: 10110
23 large v is: 11110
24
25 small v is: 001
26 large v is: 00001
27 large v is: 01001
28 large v is: 10001
29 large v is: 11001
30
31 small v is: 101
32 large v is: 00101
33 large v is: 01101
34 large v is: 10101
35 large v is: 11101
36
37 small v is: 011
38 large v is: 00011
39 large v is: 01011
40 large v is: 10011
41 large v is: 11011
42
43 small v is: 111
44 large v is: 00111
45 large v is: 01111
46 large v is: 10111
47 large v is: 11111
可见,无论v取何值,只要字典开始扩展了,都会遍历大表中,相应于小表的所有节点。具体的核心逻辑代码如下:
1 do {
2 de = t1->table[v & m1];
3
4 ...
5
6 /* Increment the reverse cursor not covered by the smaller mask.*/
7 v |= ~m1;
8 v = rev(v);
9 v++;
10 v = rev(v);
11
12 /* Continue while bits covered by mask difference is non-zero */
13 } while (v & (m0 ^ m1));
首先迭代t1中,索引为v&m1的bucket,然后就是对v的低m1-m0位加1,并保留v的低m0位。循环条件v &(m0 ^ m1),表示直到v的低m1-m0位到低m1位之间全部为0为止。
参考文章
https://www.cnblogs.com/chinxi/p/12235526.html
本文来自博客园,作者:Mr-xxx,转载请注明原文链接:https://www.cnblogs.com/MrLiuZF/p/14970967.html