redis 5.0.2 源码阅读——整数集合intset

redis中整数集合intset相关的文件为：intset.h与intset.c

intset的所有操作与操作一个排序整形数组 int a[N]类似，只是根据类型做了内存上的优化。

一、数据结构

1 /**
2  * 整型有序集合intset
3  */
4 typedef struct intset {
5     uint32_t encoding;
6     uint32_t length;
7     int8_t contents[];
8 } intset;

intset的数据结构比较简单，使用了一个变长结构体，成员length记录当前成员数量，成员encoding记录当前的int类型，共有以下三种

1 /**
2  * Note that these encodings are ordered, so:
3  * INTSET_ENC_INT16 < INTSET_ENC_INT32 < INTSET_ENC_INT64.
4  * intset结构体中encoding的类型
5  */
6 #define INTSET_ENC_INT16 (sizeof(int16_t))
7 #define INTSET_ENC_INT32 (sizeof(int32_t))
8 #define INTSET_ENC_INT64 (sizeof(int64_t))

并使用以下方法进行判断类型

 1 /**
 2  * Return the required encoding for the provided value.
 3  * intset结构体中encoding类型的判断函数
 4  */
 5 static uint8_t _intsetValueEncoding(int64_t v) {
 6     if (v < INT32_MIN || v > INT32_MAX)
 7         return INTSET_ENC_INT64;
 8     else if (v < INT16_MIN || v > INT16_MAX)
 9         return INTSET_ENC_INT32;
10     else
11         return INTSET_ENC_INT16;
12 }

intset是已排序好的整数集合，其大致结构如下

1 /*
2 +--------+--------+--------...--------------+
3 |encoding|length  |contents(encoding*length)|
4 +--------+--------+--------...--------------+
5 */

intset严格按照小端字节序进行存储，不论机器的字节序类型。如果是大端机器，需要进行转换，才进行存储。endianconv.h中有如下定义：

 1 /**
 2  * variants of the function doing the actual conversion only if the target
 3  * host is big endian
 4  * 实现大端字节序到小端字节序的转换
 5  */
 6 #if (BYTE_ORDER == LITTLE_ENDIAN)
 7 #define memrev16ifbe(p) ((void)(0))
 8 #define memrev32ifbe(p) ((void)(0))
 9 #define memrev64ifbe(p) ((void)(0))
10 #define intrev16ifbe(v) (v)
11 #define intrev32ifbe(v) (v)
12 #define intrev64ifbe(v) (v)
13 #else
14 #define memrev16ifbe(p) memrev16(p)
15 #define memrev32ifbe(p) memrev32(p)
16 #define memrev64ifbe(p) memrev64(p)
17 #define intrev16ifbe(v) intrev16(v)
18 #define intrev32ifbe(v) intrev32(v)
19 #define intrev64ifbe(v) intrev64(v)
20 #endif

二、创建

 1 /**
 2  * Create an empty intset.
 3  * 常见整形集合
 4  */
 5 intset *intsetNew(void) {
 6     intset *is = zmalloc(sizeof(intset));
 7     //默认使用最小的类型2个字节，并保证字节序为小端
 8     is->encoding = intrev32ifbe(INTSET_ENC_INT16);
 9     is->length = 0;
10     return is;
11 }

刚创建好的intset是空的，默认使用最小的类型。其结构为：

1 /*此处用一根“-”表示一字节，后同
2 +----+----+
3 |  16|   0|
4 +----+----+
5 */

三、操作

3.1 内存重新分配

1 /**
2  * Resize the intset
3  * 重新设置intset的内存大小
4  */
5 static intset *intsetResize(intset *is, uint32_t len) {
6     uint32_t size = len*intrev32ifbe(is->encoding);
7     is = zrealloc(is,sizeof(intset)+size);
8     return is;
9 }

3.2 查找元素

查找元素是否存在于intset中或应该插入的位置，因intset是已排序好的，所以使用了二分查找。

 1 /**
 2  * Search for the position of "value". Return 1 when the value was found and
 3  * sets "pos" to the position of the value within the intset. Return 0 when
 4  * the value is not present in the intset and sets "pos" to the position
 5  * where "value" can be inserted.
 6  * 搜索“值”的位置。 找到该值时返回 1，并将“pos”设置为该值在 intset 中的位置。
 7  * 当 intset 中不存在该值时返回 0，并将“pos”设置为可以插入“value”的位置。
 8  * 使用的时二分查找
 9  */
10 static uint8_t intsetSearch(intset *is, int64_t value, uint32_t *pos) {
11     int min = 0, max = intrev32ifbe(is->length)-1, mid = -1;
12     int64_t cur = -1;
13 
14     /**
15      * The value can never be found when the set is empty
16      * 当set为空的时候，value是不可能被找到的
17      */
18     if (intrev32ifbe(is->length) == 0) {
19         if (pos) *pos = 0;
20         return 0;
21     } else {
22         /**
23          * Check for the case where we know we cannot find the value,
24          * but do know the insert position.
25          * 处理插入值大于最大值和小于最小值两种特殊情况
26          */
27         if (value > _intsetGet(is,max)) {//大于最大值，插入位置就是length
28             if (pos) *pos = intrev32ifbe(is->length);
29             return 0;
30         } else if (value < _intsetGet(is,0)) {//小于最小值，插入位置就是0
31             if (pos) *pos = 0;
32             return 0;
33         }
34     }
35 
36     //二分法查找
37     while(max >= min) {
38         mid = ((unsigned int)min + (unsigned int)max) >> 1;
39         cur = _intsetGet(is,mid);
40         if (value > cur) {
41             min = mid+1;
42         } else if (value < cur) {
43             max = mid-1;
44         } else {
45             break;
46         }
47     }
48 
49     if (value == cur) {
50         if (pos) *pos = mid;//存在该元素
51         return 1;
52     } else {
53         if (pos) *pos = min;//不存在该元素
54         return 0;
55     }
56 }

3.3 元素位置调整

实现intset中元素的移动

 1 //将从from到尾部的所以元素移动到从to起始向后延续的位置
 2 static void intsetMoveTail(intset *is, uint32_t from, uint32_t to) {
 3     void *src, *dst;
 4     uint32_t bytes = intrev32ifbe(is->length)-from;
 5     uint32_t encoding = intrev32ifbe(is->encoding);
 6 
 7     if (encoding == INTSET_ENC_INT64) {
 8         src = (int64_t*)is->contents+from;
 9         dst = (int64_t*)is->contents+to;
10         bytes *= sizeof(int64_t);
11     } else if (encoding == INTSET_ENC_INT32) {
12         src = (int32_t*)is->contents+from;
13         dst = (int32_t*)is->contents+to;
14         bytes *= sizeof(int32_t);
15     } else {
16         src = (int16_t*)is->contents+from;
17         dst = (int16_t*)is->contents+to;
18         bytes *= sizeof(int16_t);
19     }
20     memmove(dst,src,bytes);
21 }

3.4 插入元素

若有以下intset

1 /*
2 +----+----+--+--+--+--+--+--+--+
3 |  16|   7| 1| 2| 3| 4| 5| 7| 8|
4 +----+----+--+--+--+--+--+--+--+
5           |contents
6 
7 */

3.4.1 插入函数

现在插入一个数字6，需要调用以下方法

 1 /**
 2  * Insert an integer in the intset
 3  * 在inset中插入一个整形
 4  */
 5 intset *intsetAdd(intset *is, int64_t value, uint8_t *success) {
 6     //得到插入值的类型
 7     uint8_t valenc = _intsetValueEncoding(value);
 8     uint32_t pos;
 9     //如果success不是NULL
10     if (success)
11         *success = 1;//默认插入成功
12 
13     /**
14      * Upgrade encoding if necessary. If we need to upgrade, we know that
15      * this value should be either appended (if > 0) or prepended (if < 0),
16      * because it lies outside the range of existing values.
17      * 如果必要的话更新encoding类型，如果我们需要更新，我们知道这个值很小或者很大，因为它位于当前类型的范围之外
18      */
19     if (valenc > intrev32ifbe(is->encoding)) {
20         /**
21          * This always succeeds, so we don't need to curry *success.
22          * 这个操作总是成功的，所以我们不需要使用success，不会出现重复的元素
23          */
24         return intsetUpgradeAndAdd(is,value);
25     } else {
26         /**
27          * Abort if the value is already present in the set.
28          * This call will populate "pos" with the right position to insert
29          * the value when it cannot be found.
30          * 如果该值已存在于集合中，则中止。 此调用将使用正确的位置填充“pos”以在找不到值时插入该值
31          */
32         if (intsetSearch(is,value,&pos)) {
33             //该值已经存在
34             if (success)
35                 *success = 0;//利用传出参数表明插入失败
36             return is;
37         }
38 
39         //为新增元素扩容
40         is = intsetResize(is,intrev32ifbe(is->length)+1);
41         if (pos < intrev32ifbe(is->length))//如果插入位置位于1 ~ is->length之间
42             intsetMoveTail(is,pos,pos+1);//调整插入位置右边元素的位置
43     }
44 
45     //插入元素
46     _intsetSet(is,pos,value);
47     //整形有序集合的长度+1
48     is->length = intrev32ifbe(intrev32ifbe(is->length)+1);
49     return is;
50 }

3.4.2 插入位置查找

因intset是已排序好的，所以使用了二分查找。过程如下

 1 /*
 2 find 6
 3         +----+----+--+--+--+--+--+--+--+
 4         |  16|   7| 1| 2| 3| 4| 5| 7| 8|
 5         +----+----+--+--+--+--+--+--+--+
 6 pos               | 0| 1| 2| 3| 4| 5| 6|
 7 step1             |min=0
 8                                     |max=6
 9                            |mid=(0+6)>>1=3
10                            |mid_val=4
11 
12 pos               | 0| 1| 2| 3| 4| 5| 6|
13 step2                         |min=4
14                                     |max=6
15                                  |mid=(4+6)>>1=5
16                                  |mid_val=7
17 
18 pos               | 0| 1| 2| 3| 4| 5| 6|
19 step3                         |min=4
20                               |max=4
21                               |mid=(4+4)>>1=5
22                               |mid_val=5
23 
24 pos               | 0| 1| 2| 3| 4| 5| 6|
25 step4                            |min=5
26                               |max=4
27 min>max  break
28 */

6在intset中不存在，查找到需要插入到pos=5的位置，此时首先要扩展intset的content，扩展后结构

1 /*        
2 +----+----+--+--+--+--+--+--+--+--+
3 |  16|   7| 1| 2| 3| 4| 5| 7| 8|  |
4 +----+----+--+--+--+--+--+--+--+--+
5 pos       | 0| 1| 2| 3| 4| 5| 6| 7|
6 */

然后把原来在pos=5及之后的所有的元素向后移一格，移动后结构

1 /*        
2 +----+----+--+--+--+--+--+--+--+--+
3 |  16|   7| 1| 2| 3| 4| 5| 7| 7| 8|
4 +----+----+--+--+--+--+--+--+--+--+
5 pos       | 0| 1| 2| 3| 4| 5| 6| 7|
6 */

其使用memmove，并不全修改未覆盖到的内存，所以此时pos=5的值还是7，最后修改pos=5的值

 1 /* Set the value at pos, using the configured encoding. */
 2 static void _intsetSet(intset *is, int pos, int64_t value) {
 3     uint32_t encoding = intrev32ifbe(is->encoding);
 4 
 5     if (encoding == INTSET_ENC_INT64) {
 6         ((int64_t*)is->contents)[pos] = value;
 7         memrev64ifbe(((int64_t*)is->contents)+pos);
 8     } else if (encoding == INTSET_ENC_INT32) {
 9         ((int32_t*)is->contents)[pos] = value;
10         memrev32ifbe(((int32_t*)is->contents)+pos);
11     } else {
12         ((int16_t*)is->contents)[pos] = value;
13         memrev16ifbe(((int16_t*)is->contents)+pos);
14     }
15 }

修改后并增加了length

1 /*        
2 +----+----+--+--+--+--+--+--+--+--+
3 |  16|   8| 1| 2| 3| 4| 5| 6| 7| 8|
4 +----+----+--+--+--+--+--+--+--+--+
5 pos       | 0| 1| 2| 3| 4| 5| 6| 7|
6 */

3.4.3 类型扩大

如果此时要插入的数字是65536，超出了int16_t所能表示的范围，要先进行扩展int类型操作

 1 static intset *intsetUpgradeAndAdd(intset *is, int64_t value) {
 2     uint8_t curenc = intrev32ifbe(is->encoding);
 3     uint8_t newenc = _intsetValueEncoding(value);
 4     int length = intrev32ifbe(is->length);
 5     int prepend = value < 0 ? 1 : 0;
 6 
 7     /* First set new encoding and resize */
 8     is->encoding = intrev32ifbe(newenc);
 9     is = intsetResize(is,intrev32ifbe(is->length)+1);
10 
11     /* Upgrade back-to-front so we don't overwrite values.
12      * Note that the "prepend" variable is used to make sure we have an empty
13      * space at either the beginning or the end of the intset. */
14     while(length--)
15         _intsetSet(is,length+prepend,_intsetGetEncoded(is,length,curenc));
16 
17     /* Set the value at the beginning or the end. */
18     if (prepend)
19         _intsetSet(is,0,value);
20     else
21         _intsetSet(is,intrev32ifbe(is->length),value);
22     is->length = intrev32ifbe(intrev32ifbe(is->length)+1);
23     return is;
24 }

　　因其超出原来的int类型所能表示的范围，若为正数，一定是最大的，则应该插入在intset最后，否则应该在最前面。扩展完之后，从后往前将原来的数字，以新的int类型，放置在新的位置上，保证不会有未处理的数字被覆盖，处理完整。

3.5 删除操作

 1 /**
 2  * Delete integer from intset
 3  * 从整形有序集合intset中删除元素
 4  */
 5 intset *intsetRemove(intset *is, int64_t value, int *success) {
 6     //得到元素的encoding类型
 7     uint8_t valenc = _intsetValueEncoding(value);
 8     uint32_t pos;
 9     if (success)
10         *success = 0;//传出参数，表示删除成功还是失败，默认删除失败
11 
12     /**
13      * 如果删除元素的类型小于intset的最大允许类型并且
14      * 可以找到该元素
15      */
16     if (valenc <= intrev32ifbe(is->encoding) && intsetSearch(is,value,&pos)) {
17         uint32_t len = intrev32ifbe(is->length);
18 
19         /**
20          * We know we can delete
21          * 到这里就表明我们肯定可以删除该元素
22          */
23         if (success)
24             *success = 1;//设置删除成功
25 
26         /* Overwrite value with tail and update length */
27         if (pos < (len-1))
28             intsetMoveTail(is,pos+1,pos);//调整从pos+1到结尾的元素的位置，也就是向左移动一格
29         //重新开辟内存空间，释放删除元素的空间
30         is = intsetResize(is,len-1);
31         //intset中的length-1
32         is->length = intrev32ifbe(len-1);
33     }
34     return is;
35 }

找到指定元素之后，直接把后面的内存移至前面，然后resize。

参考文章

　　https://www.cnblogs.com/chinxi/p/12262901.html

posted @ 2021-07-06 17:36 Mr-xxx 阅读(59) 评论(0) 编辑收藏举报

刷新页面返回顶部