Redis源码 数据结构学习笔记(1)
Redis源码 数据结构学习笔记(1)
本次笔记主要记录学习redis字符串sds的收获。
优势分析
利用结构体里的内容在内存空间里顺序分布的特点,能从一个字符串的地址寻到它的相关信息,这是对原生 char *
做的一个巨大的突破。
显而易见的优势:
- 获取字符串长度:不再需要
O(n)
复杂度,因为关于它的信息储存在sdshdr
里面,我们只需要从字符串的首地址往前找。 - 二进制安全:原生的C并没有字符串的概念,只是用
\0
结尾的一串字符,这导致了很多问题。但如果显示的存储一个字符串的容量和长度,这种问题就很容易得到解决。
代码复用
这里举一例来简单说明:
sds sdsnewlen(const void *init, size_t initlen) {
struct sdshdr *sh;
// 根据是否有初始化内容,选择适当的内存分配方式
// T = O(N)
if (init) {
// zmalloc 不初始化所分配的内存
sh = zmalloc(sizeof(struct sdshdr)+initlen+1);
} else {
// zcalloc 将分配的内存全部初始化为 0
sh = zcalloc(sizeof(struct sdshdr)+initlen+1);
}
// 内存分配失败,返回
if (sh == NULL) return NULL;
// 设置初始化长度
sh->len = initlen;
// 新 sds 不预留任何空间
sh->free = 0;
// 如果有指定初始化内容,将它们复制到 sdshdr 的 buf 中
// T = O(N)
if (initlen && init)
memcpy(sh->buf, init, initlen);
// 以 \0 结尾
sh->buf[initlen] = '\0';
// 返回 buf 部分,而不是整个 sdshdr
return (char*)sh->buf;
}
可以看到这里是创建一个sds的最普遍的情况,即将一个已经存在的原始C字符串转化为sds字符串。甚至传入了len决定将前多少位进行转化。但我们经常使用的操作可能需要的参数更少,例如:
//创建空sds
sds sdsempty(void) {
return sdsnewlen("",0);
}
//通过原始C字符串创建sds
sds sdsnew(const char *init) {
size_t initlen = (init == NULL) ? 0 : strlen(init);
return sdsnewlen(init, initlen);
}
//copy一个sds字符串
sds sdsdup(const sds s) {
return sdsnewlen(s, sdslen(s));
}
效率
同样举例说明:
void sdsclear(sds s) {
// 取出 sdshdr
struct sdshdr *sh = (void*) (s-(sizeof(struct sdshdr)));
// 重新计算属性
sh->free += sh->len;
sh->len = 0;
// 将结束符放到最前面(相当于惰性地删除 buf 中的内容)
sh->buf[0] = '\0';
}
这是一个清空sds字符串的函数(仅清空字符串,不释放空间),这里没有用memset
将字符串的全部内容置为0,而是通过修改参数和结束符的位置来表达了同样的含义。兼容原生Cstring.h
库里的函数,时间复杂度仅为O(1)
printf组函数的实现
原生C里printf组也可以对字符串进行灵活的处理,redis库也提供了对sds进行类似处理的函数。
这里以没用用原生printf组函数的实现为例:
/* This function is similar to sdscatprintf, but much faster as it does
* not rely on sprintf() family functions implemented by the libc that
* are often very slow. Moreover directly handling the sds string as
* new data is concatenated provides a performance improvement.
*
* However this function only handles an incompatible subset of printf-alike
* format specifiers:
*
* %s - C String
* %S - SDS string
* %i - signed int
* %I - 64 bit signed integer (long long, int64_t)
* %u - unsigned int
* %U - 64 bit unsigned integer (unsigned long long, uint64_t)
* %% - Verbatim "%" character.
*/
sds sdscatfmt(sds s, char const *fmt, ...) {
struct sdshdr *sh = (void*) (s-(sizeof(struct sdshdr)));
size_t initlen = sdslen(s);
const char *f = fmt;
int i;
va_list ap;
va_start(ap,fmt);
f = fmt; /* Next format specifier byte to process. */
i = initlen; /* Position of the next byte to write to dest str. */
while(*f) {
char next, *str;
size_t l;
long long num;
unsigned long long unum;
/* Make sure there is always space for at least 1 char. */
if (sh->free == 0) {
s = sdsMakeRoomFor(s,1);
sh = (void*) (s-(sizeof(struct sdshdr)));
}
switch(*f) {
case '%':
next = *(f+1);
f++;
switch(next) {
case 's':
case 'S':
str = va_arg(ap,char*);
l = (next == 's') ? strlen(str) : sdslen(str);
if (sh->free < l) {
s = sdsMakeRoomFor(s,l);
sh = (void*) (s-(sizeof(struct sdshdr)));
}
memcpy(s+i,str,l);
sh->len += l;
sh->free -= l;
i += l;
break;
case 'i':
case 'I':
if (next == 'i')
num = va_arg(ap,int);
else
num = va_arg(ap,long long);
{
char buf[SDS_LLSTR_SIZE];
l = sdsll2str(buf,num);
if (sh->free < l) {
s = sdsMakeRoomFor(s,l);
sh = (void*) (s-(sizeof(struct sdshdr)));
}
memcpy(s+i,buf,l);
sh->len += l;
sh->free -= l;
i += l;
}
break;
case 'u':
case 'U':
if (next == 'u')
unum = va_arg(ap,unsigned int);
else
unum = va_arg(ap,unsigned long long);
{
char buf[SDS_LLSTR_SIZE];
l = sdsull2str(buf,unum);
if (sh->free < l) {
s = sdsMakeRoomFor(s,l);
sh = (void*) (s-(sizeof(struct sdshdr)));
}
memcpy(s+i,buf,l);
sh->len += l;
sh->free -= l;
i += l;
}
break;
default: /* Handle %% and generally %<unknown>. */
s[i++] = next;
sh->len += 1;
sh->free -= 1;
break;
}
break;
default:
s[i++] = *f;
sh->len += 1;
sh->free -= 1;
break;
}
f++;
}
va_end(ap);
/* Add null-term */
s[i] = '\0';
return s;
}
实现了sprintf
的部分内容,用法完全一致,而且因为没有用原生printf组
函数 ,效率较高,实现上也并不困难,只涉及到简单的解析和对可变参数的使用。
Test
实现了一个库首先要对正确性进行测试,sds的测试直接写到了sds.c
里,这是一种很方便的做法。
#ifdef SDS_TEST_MAIN
#include <stdio.h>
#include "testhelp.h"
#include "limits.h"
int main(void) {
{
struct sdshdr *sh;
sds x = sdsnew("foo"), y;
test_cond("Create a string and obtain the length",
sdslen(x) == 3 && memcmp(x,"foo\0",4) == 0)
sdsfree(x);
x = sdsnewlen("foo",2);
test_cond("Create a string with specified length",
sdslen(x) == 2 && memcmp(x,"fo\0",3) == 0)
x = sdscat(x,"bar");
test_cond("Strings concatenation",
sdslen(x) == 5 && memcmp(x,"fobar\0",6) == 0);
x = sdscpy(x,"a");
test_cond("sdscpy() against an originally longer string",
sdslen(x) == 1 && memcmp(x,"a\0",2) == 0)
x = sdscpy(x,"xyzxxxxxxxxxxyyyyyyyyyykkkkkkkkkk");
test_cond("sdscpy() against an originally shorter string",
sdslen(x) == 33 &&
memcmp(x,"xyzxxxxxxxxxxyyyyyyyyyykkkkkkkkkk\0",33) == 0)
sdsfree(x);
x = sdscatprintf(sdsempty(),"%d",123);
test_cond("sdscatprintf() seems working in the base case",
sdslen(x) == 3 && memcmp(x,"123\0",4) == 0)
sdsfree(x);
x = sdsnew("--");
x = sdscatfmt(x, "Hello %s World %I,%I--", "Hi!", LLONG_MIN,LLONG_MAX);
test_cond("sdscatfmt() seems working in the base case",
sdslen(x) == 60 &&
memcmp(x,"--Hello Hi! World -9223372036854775808,"
"9223372036854775807--",60) == 0)
sdsfree(x);
x = sdsnew("--");
x = sdscatfmt(x, "%u,%U--", UINT_MAX, ULLONG_MAX);
test_cond("sdscatfmt() seems working with unsigned numbers",
sdslen(x) == 35 &&
memcmp(x,"--4294967295,18446744073709551615--",35) == 0)
sdsfree(x);
x = sdsnew("xxciaoyyy");
sdstrim(x,"xy");
test_cond("sdstrim() correctly trims characters",
sdslen(x) == 4 && memcmp(x,"ciao\0",5) == 0)
y = sdsdup(x);
sdsrange(y,1,1);
test_cond("sdsrange(...,1,1)",
sdslen(y) == 1 && memcmp(y,"i\0",2) == 0)
sdsfree(y);
y = sdsdup(x);
sdsrange(y,1,-1);
test_cond("sdsrange(...,1,-1)",
sdslen(y) == 3 && memcmp(y,"iao\0",4) == 0)
sdsfree(y);
y = sdsdup(x);
sdsrange(y,-2,-1);
test_cond("sdsrange(...,-2,-1)",
sdslen(y) == 2 && memcmp(y,"ao\0",3) == 0)
sdsfree(y);
y = sdsdup(x);
sdsrange(y,2,1);
test_cond("sdsrange(...,2,1)",
sdslen(y) == 0 && memcmp(y,"\0",1) == 0)
sdsfree(y);
y = sdsdup(x);
sdsrange(y,1,100);
test_cond("sdsrange(...,1,100)",
sdslen(y) == 3 && memcmp(y,"iao\0",4) == 0)
sdsfree(y);
y = sdsdup(x);
sdsrange(y,100,100);
test_cond("sdsrange(...,100,100)",
sdslen(y) == 0 && memcmp(y,"\0",1) == 0)
sdsfree(y);
sdsfree(x);
x = sdsnew("foo");
y = sdsnew("foa");
test_cond("sdscmp(foo,foa)", sdscmp(x,y) > 0)
sdsfree(y);
sdsfree(x);
x = sdsnew("bar");
y = sdsnew("bar");
test_cond("sdscmp(bar,bar)", sdscmp(x,y) == 0)
sdsfree(y);
sdsfree(x);
x = sdsnew("aar");
y = sdsnew("bar");
test_cond("sdscmp(bar,bar)", sdscmp(x,y) < 0)
sdsfree(y);
sdsfree(x);
x = sdsnewlen("\a\n\0foo\r",7);
y = sdscatrepr(sdsempty(),x,sdslen(x));
test_cond("sdscatrepr(...data...)",
memcmp(y,"\"\\a\\n\\x00foo\\r\"",15) == 0)
{
int oldfree;
sdsfree(x);
x = sdsnew("0");
sh = (void*) (x-(sizeof(struct sdshdr)));
test_cond("sdsnew() free/len buffers", sh->len == 1 && sh->free == 0);
x = sdsMakeRoomFor(x,1);
sh = (void*) (x-(sizeof(struct sdshdr)));
test_cond("sdsMakeRoomFor()", sh->len == 1 && sh->free > 0);
oldfree = sh->free;
x[1] = '1';
sdsIncrLen(x,1);
test_cond("sdsIncrLen() -- content", x[0] == '0' && x[1] == '1');
test_cond("sdsIncrLen() -- len", sh->len == 2);
test_cond("sdsIncrLen() -- free", sh->free == oldfree-1);
}
}
test_report()
return 0;
}
#endif
我们只需要在Makefile中略做修改就可以开启测试,值得学习。
Another
sds.c
作为一个相对完备的库,还有很多提供的对sds字符串进行操作的函数,限于篇幅本笔记不能一一讲解。实现难度不高但是要对C语言的内存分配和指针有较透彻的理解。
参考资料
- Redis-3.0-sds.c(含注释) (访问日期:2024年7月2日)
联系方式
邮箱:xiongzile99@gmail.com