看高手是如何抠存储和执行效率的
今天在分析leveldb(google的bigtable的核心组件之一)代码时看到了如下的一段代码:
leveldb\util\coding.cc
char* EncodeVarint32(char* dst, uint32_t v) {
// Operate on characters as unsigneds
unsigned char* ptr = reinterpret_cast<unsigned char*>(dst);
static const int B = 128;
if (v < (1<<7)) {
*(ptr++) = v;
} else if (v < (1<<14)) {
*(ptr++) = v | B;
*(ptr++) = v>>7;
} else if (v < (1<<21)) {
*(ptr++) = v | B;
*(ptr++) = (v>>7) | B;
*(ptr++) = v>>14;
} else if (v < (1<<28)) {
*(ptr++) = v | B;
*(ptr++) = (v>>7) | B;
*(ptr++) = (v>>14) | B;
*(ptr++) = v>>21;
} else {
*(ptr++) = v | B;
*(ptr++) = (v>>7) | B;
*(ptr++) = (v>>14) | B;
*(ptr++) = (v>>21) | B;
*(ptr++) = v>>28;
}
return reinterpret_cast<char*>(ptr);
}
// Operate on characters as unsigneds
unsigned char* ptr = reinterpret_cast<unsigned char*>(dst);
static const int B = 128;
if (v < (1<<7)) {
*(ptr++) = v;
} else if (v < (1<<14)) {
*(ptr++) = v | B;
*(ptr++) = v>>7;
} else if (v < (1<<21)) {
*(ptr++) = v | B;
*(ptr++) = (v>>7) | B;
*(ptr++) = v>>14;
} else if (v < (1<<28)) {
*(ptr++) = v | B;
*(ptr++) = (v>>7) | B;
*(ptr++) = (v>>14) | B;
*(ptr++) = v>>21;
} else {
*(ptr++) = v | B;
*(ptr++) = (v>>7) | B;
*(ptr++) = (v>>14) | B;
*(ptr++) = (v>>21) | B;
*(ptr++) = v>>28;
}
return reinterpret_cast<char*>(ptr);
}
目标很明确,就是减少int数的内存存储长度,也就是当int小于128时,以一个字节存储,大于128时按数字的大小逐步向上加一个字节,这样最长的整数用二进制表示时最大只有5个字节长度表示就基本上够用了。这还有个名词叫varint,即可变长度整数。无独有偶,在之前我看过云风的开源库pbc时,也看过类似的代码,如下:
https://github.com/cloudwu/pbc/blob/master/src/varint.c
_pbcV_encode32(uint32_t number, uint8_t buffer[10])
{
if (number < 0x80) {
buffer[0] = (uint8_t) number ;
return 1;
}
buffer[0] = (uint8_t) (number | 0x80 );
if (number < 0x4000) {
buffer[1] = (uint8_t) (number >> 7 );
return 2;
}
buffer[1] = (uint8_t) ((number >> 7) | 0x80 );
if (number < 0x200000) {
buffer[2] = (uint8_t) (number >> 14);
return 3;
}
buffer[2] = (uint8_t) ((number >> 14) | 0x80 );
if (number < 0x10000000) {
buffer[3] = (uint8_t) (number >> 21);
return 4;
}
buffer[3] = (uint8_t) ((number >> 21) | 0x80 );
buffer[4] = (uint8_t) (number >> 28);
return 5;
}
{
if (number < 0x80) {
buffer[0] = (uint8_t) number ;
return 1;
}
buffer[0] = (uint8_t) (number | 0x80 );
if (number < 0x4000) {
buffer[1] = (uint8_t) (number >> 7 );
return 2;
}
buffer[1] = (uint8_t) ((number >> 7) | 0x80 );
if (number < 0x200000) {
buffer[2] = (uint8_t) (number >> 14);
return 3;
}
buffer[2] = (uint8_t) ((number >> 14) | 0x80 );
if (number < 0x10000000) {
buffer[3] = (uint8_t) (number >> 21);
return 4;
}
buffer[3] = (uint8_t) ((number >> 21) | 0x80 );
buffer[4] = (uint8_t) (number >> 28);
return 5;
}