看高手是如何抠存储和执行效率的
今天在分析leveldb(google的bigtable的核心组件之一)代码时看到了如下的一段代码:
leveldb\util\coding.cc
char* EncodeVarint32(char* dst, uint32_t v) {
// Operate on characters as unsigneds
unsigned char* ptr = reinterpret_cast<unsigned char*>(dst);
static const int B = 128;
if (v < (1<<7)) {
*(ptr++) = v;
} else if (v < (1<<14)) {
*(ptr++) = v | B;
*(ptr++) = v>>7;
} else if (v < (1<<21)) {
*(ptr++) = v | B;
*(ptr++) = (v>>7) | B;
*(ptr++) = v>>14;
} else if (v < (1<<28)) {
*(ptr++) = v | B;
*(ptr++) = (v>>7) | B;
*(ptr++) = (v>>14) | B;
*(ptr++) = v>>21;
} else {
*(ptr++) = v | B;
*(ptr++) = (v>>7) | B;
*(ptr++) = (v>>14) | B;
*(ptr++) = (v>>21) | B;
*(ptr++) = v>>28;
}
return reinterpret_cast<char*>(ptr);
}
// Operate on characters as unsigneds
unsigned char* ptr = reinterpret_cast<unsigned char*>(dst);
static const int B = 128;
if (v < (1<<7)) {
*(ptr++) = v;
} else if (v < (1<<14)) {
*(ptr++) = v | B;
*(ptr++) = v>>7;
} else if (v < (1<<21)) {
*(ptr++) = v | B;
*(ptr++) = (v>>7) | B;
*(ptr++) = v>>14;
} else if (v < (1<<28)) {
*(ptr++) = v | B;
*(ptr++) = (v>>7) | B;
*(ptr++) = (v>>14) | B;
*(ptr++) = v>>21;
} else {
*(ptr++) = v | B;
*(ptr++) = (v>>7) | B;
*(ptr++) = (v>>14) | B;
*(ptr++) = (v>>21) | B;
*(ptr++) = v>>28;
}
return reinterpret_cast<char*>(ptr);
}
目标很明确,就是减少int数的内存存储长度,也就是当int小于128时,以一个字节存储,大于128时按数字的大小逐步向上加一个字节,这样最长的整数用二进制表示时最大只有5个字节长度表示就基本上够用了。这还有个名词叫varint,即可变长度整数。无独有偶,在之前我看过云风的开源库pbc时,也看过类似的代码,如下:
https://github.com/cloudwu/pbc/blob/master/src/varint.c
_pbcV_encode32(uint32_t number, uint8_t buffer[10])
{
if (number < 0x80) {
buffer[0] = (uint8_t) number ;
return 1;
}
buffer[0] = (uint8_t) (number | 0x80 );
if (number < 0x4000) {
buffer[1] = (uint8_t) (number >> 7 );
return 2;
}
buffer[1] = (uint8_t) ((number >> 7) | 0x80 );
if (number < 0x200000) {
buffer[2] = (uint8_t) (number >> 14);
return 3;
}
buffer[2] = (uint8_t) ((number >> 14) | 0x80 );
if (number < 0x10000000) {
buffer[3] = (uint8_t) (number >> 21);
return 4;
}
buffer[3] = (uint8_t) ((number >> 21) | 0x80 );
buffer[4] = (uint8_t) (number >> 28);
return 5;
}
{
if (number < 0x80) {
buffer[0] = (uint8_t) number ;
return 1;
}
buffer[0] = (uint8_t) (number | 0x80 );
if (number < 0x4000) {
buffer[1] = (uint8_t) (number >> 7 );
return 2;
}
buffer[1] = (uint8_t) ((number >> 7) | 0x80 );
if (number < 0x200000) {
buffer[2] = (uint8_t) (number >> 14);
return 3;
}
buffer[2] = (uint8_t) ((number >> 14) | 0x80 );
if (number < 0x10000000) {
buffer[3] = (uint8_t) (number >> 21);
return 4;
}
buffer[3] = (uint8_t) ((number >> 21) | 0x80 );
buffer[4] = (uint8_t) (number >> 28);
return 5;
}
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 开发者必知的日志记录最佳实践
· SQL Server 2025 AI相关能力初探
· Linux系列:如何用 C#调用 C方法造成内存泄露
· AI与.NET技术实操系列(二):开始使用ML.NET
· 记一次.NET内存居高不下排查解决与启示
· Manus重磅发布:全球首款通用AI代理技术深度解析与实战指南
· 被坑几百块钱后,我竟然真的恢复了删除的微信聊天记录!
· 没有Manus邀请码?试试免邀请码的MGX或者开源的OpenManus吧
· 园子的第一款AI主题卫衣上架——"HELLO! HOW CAN I ASSIST YOU TODAY
· 【自荐】一款简洁、开源的在线白板工具 Drawnix
2008-04-28 推荐两个不错的基于SilverLight 2 Beta 应用
2008-04-28 一本晚看了两年半的书--蓝海战略