redis 5.0.7 源码阅读——跳跃表skiplist
redis中并没有专门给跳跃表两个文件。在5.0.7的版本中,结构体的声明与定义、接口的声明在server.h中,接口的定义在t_zset.c中,所有开头为zsl的函数。
一、数据结构
单个节点:
typedef struct zskiplistNode { //key,唯一 sds ele; //分值,可重复 double score; //后退指针 struct zskiplistNode *backward; //层 struct zskiplistLevel { //前进指针 struct zskiplistNode *forward; //到本层下一节点的跨度,用于计算rank unsigned long span; } level[]; } zskiplistNode;
zskiplistNode定义了跳跃表中每个节点的数据结构,它是一个变长结构体。
1 /* 2 +------------------------+ 3 |sds ele | /+-----------------------------+ 4 +------------------------+ / |struct zskiplistNode *forward| 5 |double score | / +-----------------------------+ 6 +------------------------+ / |unsigned long span | 7 |zskiplistNode * backward| / +-----------------------------+ 8 +------------------------+/ . . 9 |zskiplistLevel level[] | . . 10 +------------------------+\ . . 11 \ +-----------------------------+ 12 \ |struct zskiplistNode *forward| 13 \ +-----------------------------+ 14 \ |unsigned long span | 15 \+-----------------------------+ 16 */
将用以下结构表示:
1 /* 2 +--------+ 3 |level[1]| 4 |1(span) | 5 +--------+ 6 |level[0]| 7 |1(span) | 8 +--------+ 9 |backward| 10 +--------+ 11 |score | 12 +--------+ 13 |ele | 14 +--------+ 15 */
如:
1 /* 2 +--------+ +--------+ +--------+ 3 |level[1]|--------------->|level[1]|--------------->|level[1]| 4 |2 | |2 | |0 | 5 +--------+ +--------+ +--------+ +--------+ +--------+ 6 |level[0]|-->|level[0]|-->|level[0]|-->|level[0]|-->|level[0]| 7 |1 | |1 | |1 | |1 | |0 | 8 +--------+ +--------+ +--------+ +--------+ +--------+ 9 |backward|<--|backward|<--|backward|<--|backward|<--|backward| 10 +--------+ +--------+ +--------+ +--------+ +--------+ 11 |1 | |2 | |3 | |4 | |5 | 12 +--------+ +--------+ +--------+ +--------+ +--------+ 13 |a | |b | |c | |d | |e | 14 +--------+ +--------+ +--------+ +--------+ +--------+ 15 */
跳表:
1 typedef struct zskiplist { 2 //头/尾节点 3 struct zskiplistNode *header, *tail; 4 //总长度 5 unsigned long length; 6 //总层数 7 int level; 8 } zskiplist;
因其头节点固定为空节点,固整体结构:
1 /* 2 +--------+ +--------+ +--------+ 3 |level[1]|--------------->|level[1]|--------------->|level[1]| 4 |2 | |2 | |0 | 5 +--------+ +--------+ +--------+ +--------+ +--------+ 6 |level[0]|-->|level[0]|-->|level[0]|-->|level[0]|-->|level[0]| 7 |1 | |1 | |1 | |1 | |0 | 8 +--------+ +--------+ +--------+ +--------+ +--------+ 9 |backward|<--|backward|<--|backward|<--|backward|<--|backward| 10 +--------+ +--------+ +--------+ +--------+ +--------+ 11 |0 | |2 | |3 | |4 | |5 | 12 +--------+ +--------+ +--------+ +--------+ +--------+ 13 |NULL | |b | |c | |d | |e | 14 +-->+--------+ +--------+ +--------+ +--------+ +--------+<--+ 15 | | 16 | +--------+ | 17 +---|header | | 18 +--------+ | 19 |tail |-------------------------------------------------------+ 20 +--------+ 21 |length=4| 22 +--------+ 23 |level=2 | 24 +--------+ 25 */
每个level层都是一条单身链表,其中level[0]中包含所有元素。
二、创建
根据指定的level,创建一个跳表节点:
1 zskiplistNode *zslCreateNode(int level, double score, sds ele) { 2 zskiplistNode *zn = 3 zmalloc(sizeof(*zn)+level*sizeof(struct zskiplistLevel)); 4 zn->score = score; 5 zn->ele = ele; 6 return zn; 7 }
创建一个跳表:
1 #define ZSKIPLIST_MAXLEVEL 64 /* Should be enough for 2^64 elements */ 2 3 zskiplist *zslCreate(void) { 4 int j; 5 zskiplist *zsl; 6 7 zsl = zmalloc(sizeof(*zsl)); 8 zsl->level = 1; 9 zsl->length = 0; 10 zsl->header = zslCreateNode(ZSKIPLIST_MAXLEVEL,0,NULL); 11 for (j = 0; j < ZSKIPLIST_MAXLEVEL; j++) { 12 zsl->header->level[j].forward = NULL; 13 zsl->header->level[j].span = 0; 14 } 15 zsl->header->backward = NULL; 16 zsl->tail = NULL; 17 return zsl; 18 }
redis中定义的最大层数为64层。且在刚创建时,会生成一个空的头节点,这样就可以不用再考虑节点数从0至1或者从1至0时要处理的各种特殊情况。
刚创完的跳表结构(结构中以4做为最大层数,后同):
1 /* 2 +--------+ 3 |level[3]|-->NULL 4 |0 | 5 +--------+ 6 |level[2]|-->NULL 7 |0 | 8 +--------+ 9 |level[1]|-->NULL 10 |0 | 11 +--------+ 12 |level[0]|-->NULL 13 |0 | 14 +--------+ 15 NULL<-|backward| 16 +--------+ 17 |0 | 18 +--------+ 19 |NULL | 20 +-->+--------+ 21 | 22 | +--------+ 23 +---|header | 24 +--------+ 25 |tail |-->NULL 26 +--------+ 27 |length=0| 28 +--------+ 29 |level=1 | 30 +--------+ 31 */
三、插入节点
1 #define ZSKIPLIST_P 0.25 /* Skiplist P = 1/4 */ 2 3 int zslRandomLevel(void) { 4 int level = 1; 5 while ((random()&0xFFFF) < (ZSKIPLIST_P * 0xFFFF)) 6 level += 1; 7 return (level<ZSKIPLIST_MAXLEVEL) ? level : ZSKIPLIST_MAXLEVEL; 8 }
redis中使用的决定新插入节点层数据的方法是抛硬币法,且“硬币”只有25%的几率是正面。
插入方法:
1 zskiplistNode *zslInsert(zskiplist *zsl, double score, sds ele) { 2 //update数组,用于存储查找路径 3 zskiplistNode *update[ZSKIPLIST_MAXLEVEL], *x; 4 5 //rank数组,用于存储每层路径节点的排名 6 unsigned int rank[ZSKIPLIST_MAXLEVEL]; 7 int i, level; 8 9 serverAssert(!isnan(score)); 10 x = zsl->header; 11 12 //先查找插入位置 13 for (i = zsl->level-1; i >= 0; i--) { 14 /* store rank that is crossed to reach the insert position */ 15 rank[i] = i == (zsl->level-1) ? 0 : rank[i+1]; 16 while (x->level[i].forward && 17 (x->level[i].forward->score < score || 18 (x->level[i].forward->score == score && 19 sdscmp(x->level[i].forward->ele,ele) < 0))) 20 { 21 rank[i] += x->level[i].span; 22 x = x->level[i].forward; 23 } 24 update[i] = x; 25 } 26 27 //随机一个level 28 level = zslRandomLevel(); 29 30 //若当前最大level不够,则补齐update与rank数组 31 if (level > zsl->level) { 32 for (i = zsl->level; i < level; i++) { 33 rank[i] = 0; 34 update[i] = zsl->header; 35 update[i]->level[i].span = zsl->length; 36 } 37 zsl->level = level; 38 } 39 40 //创建一个节点,并插入 41 x = zslCreateNode(level,score,ele); 42 for (i = 0; i < level; i++) { 43 x->level[i].forward = update[i]->level[i].forward; 44 update[i]->level[i].forward = x; 45 46 x->level[i].span = update[i]->level[i].span - (rank[0] - rank[i]); 47 update[i]->level[i].span = (rank[0] - rank[i]) + 1; 48 } 49 50 //update数组中,比插入节点level更高的各成员的跨度增加 51 for (i = level; i < zsl->level; i++) { 52 update[i]->level[i].span++; 53 } 54 55 x->backward = (update[0] == zsl->header) ? NULL : update[0]; 56 if (x->level[0].forward) 57 x->level[0].forward->backward = x; 58 else 59 zsl->tail = x; 60 zsl->length++; 61 return x; 62 }
从注释可知,redis的跳表允许同score的情况发生,但是不允许同ele,且是由调用者在外部保证。若插入顺序为e,b,c,d,则插入e时:
step1、定义update数组与rank数组。
1 /* 2 update rank 3 +--------+ +--------+ 4 |NULL | |0 | 5 +--------+ +--------+ 6 |NULL | |0 | 7 +--------+ +--------+ 8 |NULL | |0 | 9 +--------+ +--------+ 10 |NULL | |0 | 11 +--------+ +--------+ 12 */
实际在linux环境运行时,不会默认初始化,应该是一堆脏数据,此处是为了方便处理结构
step2、查找位置后
1 /* 2 update rank 3 +--------+ +--------+ 4 |NULL | |0 | 5 +--------+ +--------+ 6 |NULL | |0 | 7 +--------+ +--------+ 8 |NULL | |0 | 9 +--------+ +--------+ 10 |header | |0 | 11 +--------+ +--------+ 12 */
step3、e的level为2,比跳表的大,故要补齐update与rank数组
1 /* 2 update rank 3 +--------+ +--------+ 4 |NULL | |0 | 5 +--------+ +--------+ 6 |NULL | |0 | 7 +--------+ +--------+ 8 |header | |0 | 9 +--------+ +--------+ 10 |header | |0 | 11 +--------+ +--------+ 12 */
step4、插入节点,与单身链表插入相同,将新节点e各层,插入到update数组中记录的各层节点之后,并使用rank数组,计算跨度
1 /* 2 +--------+ 3 |level[3]|-->NULL 4 |0 | 5 +--------+ 6 |level[2]|-->NULL 7 |0 | 8 +--------+ +--------+ 9 |level[1]|-->|level[1]|-->NULL 10 |1 | |0 | 11 +--------+ +--------+ 12 |level[0]|-->|level[0]|-->NULL 13 |1 | |0 | 14 +--------+ +--------+ 15 NULL<-|backward| |backward| 16 +--------+ +--------+ 17 |0 | |5 | 18 +--------+ +--------+ 19 |NULL | |e | 20 +-->+--------+ +--------+ 21 | 22 | +--------+ 23 +---|header | 24 +--------+ 25 |tail | 26 +--------+ 27 |length=0| 28 +--------+ 29 |level=1 | 30 +--------+ 31 */
step5、处理新插入节点的backward指针,与跳表的tail指针:
1 /* 2 +--------+ 3 |level[3]|-->NULL 4 |0 | 5 +--------+ 6 |level[2]|-->NULL 7 |0 | 8 +--------+ +--------+ 9 |level[1]|-->|level[1]|-->NULL 10 |1 | |0 | 11 +--------+ +--------+ 12 |level[0]|-->|level[0]|-->NULL 13 |1 | |0 | 14 +--------+ +--------+ 15 NULL<-|backward| |backward| 16 +--------+ +--------+ 17 |0 | |5 | 18 +--------+ +--------+ 19 |NULL | |e | 20 +-->+--------+ +--------+<--+ 21 | | 22 | +--------+ | 23 +---|header | | 24 +--------+ | 25 |tail |----------------+ 26 +--------+ 27 |length=1| 28 +--------+ 29 |level=2 | 30 +--------+ 31 32 */
此时插入b:
找到位置后的update与rank数组:
1 /* 2 update rank 3 +--------+ +--------+ 4 |NULL | |0 | 5 +--------+ +--------+ 6 |NULL | |0 | 7 +--------+ +--------+ 8 |header | |0 | 9 +--------+ +--------+ 10 |header | |0 | 11 +--------+ +--------+ 12 */
插入b节点后:
1 /* 2 +--------+ 3 |level[3]|-->NULL 4 |0 | 5 +--------+ 6 |level[2]|-->NULL 7 |0 | 8 +--------+ +--------+ 9 |level[1]|--------------->|level[1]|-->NULL 10 |2 | |0 | 11 +--------+ +--------+ +--------+ 12 |level[0]|-->|level[0]|-->|level[0]|-->NULL 13 |1 | |1 | |0 | 14 +--------+ +--------+ +--------+ 15 NULL<-|backward| |backward|<--|backward| 16 +--------+ +--------+ +--------+ 17 |0 | |2 | |5 | 18 +--------+ +--------+ +--------+ 19 |NULL | |b | |e | 20 +-->+--------+ +--------+ +--------+<--+ 21 | | 22 | +--------+ | 23 +---|header | | 24 +--------+ | 25 |tail |-----------------------------+ 26 +--------+ 27 |length=2| 28 +--------+ 29 |level=2 | 30 +--------+ 31 */
需要注意的是,update数组idx = 1的节点并没有新的插入操作,span要自增,表示本层跨度增加了1。
插入c时的update与rank数组:
1 /* 2 update rank 3 +--------+ +--------+ 4 |NULL | |0 | 5 +--------+ +--------+ 6 |NULL | |0 | 7 +--------+ +--------+ 8 |header | |0 | 9 +--------+ +--------+ 10 |b | |1 | 11 +--------+ +--------+ 12 */
插入c后:
1 /* 2 +--------+ 3 |level[3]|-->NULL 4 |0 | 5 +--------+ 6 |level[2]|-->NULL 7 |0 | 8 +--------+ +--------+ +--------+ 9 |level[1]|--------------->|level[1]|-->|level[1]|-->NULL 10 |2 | |1 | |0 | 11 +--------+ +--------+ +--------+ +--------+ 12 |level[0]|-->|level[0]|-->|level[0]|-->|level[0]|-->NULL 13 |1 | |1 | |1 | |0 | 14 +--------+ +--------+ +--------+ +--------+ 15 NULL<-|backward| |backward|<--|backward|<--|backward| 16 +--------+ +--------+ +--------+ +--------+ 17 |0 | |2 | |3 | |5 | 18 +--------+ +--------+ +--------+ +--------+ 19 |NULL | |b | |c | |e | 20 +-->+--------+ +--------+ +--------+ +--------+<--+ 21 | | 22 | +--------+ | 23 +---|header | | 24 +--------+ | 25 |tail |------------------------------------------+ 26 +--------+ 27 |length=3| 28 +--------+ 29 |level=2 | 30 +--------+ 31 /*
最后插入d:
update与rank数组:
1 /* 2 update rank 3 +--------+ +--------+ 4 |NULL | |0 | 5 +--------+ +--------+ 6 |NULL | |0 | 7 +--------+ +--------+ 8 |c | |2 | 9 +--------+ +--------+ 10 |c | |2 | 11 +--------+ +--------+ 12 */
插入d:
1 /* 2 +--------+ 3 |level[3]|-->NULL 4 |0 | 5 +--------+ 6 |level[2]|-->NULL 7 |0 | 8 +--------+ +--------+ +--------+ 9 |level[1]|--------------->|level[1]|--------------->|level[1]|-->NULL 10 |2 | |2 | |0 | 11 +--------+ +--------+ +--------+ +--------+ +--------+ 12 |level[0]|-->|level[0]|-->|level[0]|-->|level[0]|-->|level[0]|-->NULL 13 |1 | |1 | |1 | |1 | |0 | 14 +--------+ +--------+ +--------+ +--------+ +--------+ 15 NULL<-|backward| |backward|<--|backward|<--|backward|<--|backward| 16 +--------+ +--------+ +--------+ +--------+ +--------+ 17 |0 | |2 | |3 | |4 | |5 | 18 +--------+ +--------+ +--------+ +--------+ +--------+ 19 |NULL | |b | |c | |d | |e | 20 +-->+--------+ +--------+ +--------+ +--------+ +--------+<--+ 21 | | 22 | +--------+ | 23 +---|header | | 24 +--------+ | 25 |tail |-------------------------------------------------------+ 26 +--------+ 27 |length=4| 28 +--------+ 29 |level=2 | 30 +--------+ 31 /*
如果此时要新插入节点a,score为4.5,则update与rank数组分别为:
1 /* 2 update rank 3 +--------+ +--------+ 4 |NULL | |0 | 5 +--------+ +--------+ 6 |NULL | |0 | 7 +--------+ +--------+ 8 |c | |2 | 9 +--------+ +--------+ 10 |d | |3 | 11 +--------+ +--------+ 12 */
四、删除节点
在已经查找到位置,与已知update数组时的删除方法:
1 void zslDeleteNode(zskiplist *zsl, zskiplistNode *x, zskiplistNode **update) { 2 int i; 3 for (i = 0; i < zsl->level; i++) { 4 if (update[i]->level[i].forward == x) { 5 update[i]->level[i].span += x->level[i].span - 1; 6 update[i]->level[i].forward = x->level[i].forward; 7 } else { 8 update[i]->level[i].span -= 1; 9 } 10 } 11 if (x->level[0].forward) { 12 x->level[0].forward->backward = x->backward; 13 } else { 14 zsl->tail = x->backward; 15 } 16 while(zsl->level > 1 && zsl->header->level[zsl->level-1].forward == NULL) 17 zsl->level--; 18 zsl->length--; 19 }
删除本节点之后,对应路径相应得做处理。
从跳表中删除指定节点的操作:
1 int zslDelete(zskiplist *zsl, double score, sds ele, zskiplistNode **node) { 2 zskiplistNode *update[ZSKIPLIST_MAXLEVEL], *x; 3 int i; 4 5 //先用score与ele查找,生成update数组 6 x = zsl->header; 7 for (i = zsl->level-1; i >= 0; i--) { 8 while (x->level[i].forward && 9 (x->level[i].forward->score < score || 10 (x->level[i].forward->score == score && 11 sdscmp(x->level[i].forward->ele,ele) < 0))) 12 { 13 x = x->level[i].forward; 14 } 15 update[i] = x; 16 } 17 18 //跳表允许同score,防止误删,做一下ele校验 19 if (x && score == x->score && sdscmp(x->ele,ele) == 0) { 20 zslDeleteNode(zsl, x, update); 21 if (!node) 22 zslFreeNode(x); 23 else 24 *node = x; 25 return 1; 26 } 27 return 0; 28 }
如以下跳表:
1 /* 2 +--------+ 3 |level[3]|-->NULL 4 |0 | 5 +--------+ 6 |level[2]|-->NULL 7 |0 | 8 +--------+ +--------+ +--------+ 9 |level[1]|--------------->|level[1]|--------------->|level[1]|-->NULL 10 |2 | |2 | |0 | 11 +--------+ +--------+ +--------+ +--------+ +--------+ 12 |level[0]|-->|level[0]|-->|level[0]|-->|level[0]|-->|level[0]|-->NULL 13 |1 | |1 | |1 | |1 | |0 | 14 +--------+ +--------+ +--------+ +--------+ +--------+ 15 NULL<-|backward| |backward|<--|backward|<--|backward|<--|backward| 16 +--------+ +--------+ +--------+ +--------+ +--------+ 17 |0 | |2 | |3 | |4 | |5 | 18 +--------+ +--------+ +--------+ +--------+ +--------+ 19 |NULL | |b | |c | |d | |e | 20 +-->+--------+ +--------+ +--------+ +--------+ +--------+<--+ 21 | | 22 | +--------+ | 23 +---|header | | 24 +--------+ | 25 |tail |-------------------------------------------------------+ 26 +--------+ 27 |length=4| 28 +--------+ 29 |level=2 | 30 +--------+ 31 /*
要删除节点d,生成的update数组为:
1 /* 2 update 3 +--------+ 4 |NULL | 5 +--------+ 6 |NULL | 7 +--------+ 8 |c | 9 +--------+ 10 |c | 11 +--------+ 12 */
由于d的level为1,故在level[0]层,使用从单向链表中删除节点的操作,把d移出,再给高于level[0]的update数组中所有成员的span自减,节点少了,跨度要跟着降低。
删除d之后的跳表:
1 /* 2 +--------+ 3 |level[3]|-->NULL 4 |0 | 5 +--------+ 6 |level[2]|-->NULL 7 |0 | 8 +--------+ +--------+ +--------+ 9 |level[1]|--------------->|level[1]|-->|level[1]|-->NULL 10 |2 | |1 | |0 | 11 +--------+ +--------+ +--------+ +--------+ 12 |level[0]|-->|level[0]|-->|level[0]|-->|level[0]|-->NULL 13 |1 | |1 | |1 | |0 | 14 +--------+ +--------+ +--------+ +--------+ 15 NULL<-|backward| |backward|<--|backward|<--|backward| 16 +--------+ +--------+ +--------+ +--------+ 17 |0 | |2 | |3 | |5 | 18 +--------+ +--------+ +--------+ +--------+ 19 |NULL | |b | |c | |e | 20 +-->+--------+ +--------+ +--------+ +--------+<--+ 21 | | 22 | +--------+ | 23 +---|header | | 24 +--------+ | 25 |tail |------------------------------------------+ 26 +--------+ 27 |length=3| 28 +--------+ 29 |level=2 | 30 +--------+ 31 /*
五、销毁
1 void zslFreeNode(zskiplistNode *node) { 2 sdsfree(node->ele); 3 zfree(node); 4 } 5 6 void zslFree(zskiplist *zsl) { 7 zskiplistNode *node = zsl->header->level[0].forward, *next; 8 9 zfree(zsl->header); 10 while(node) { 11 next = node->level[0].forward; 12 zslFreeNode(node); 13 node = next; 14 } 15 zfree(zsl); 16 }
销毁操作本身只是在level[0]层遍历所有节点,依次销毁。
redis 5.0.7 下载链接
http://download.redis.io/releases/redis-5.0.7.tar.gz
源码阅读顺序参考:
https://github.com/huangz1990/blog/blob/master/diary/2014/how-to-read-redis-source-code.rst