leveldb 学习记录(三) MemTable 与 Immutable Memtable
前文:
存储格式:
leveldb数据在内存中以 Memtable存储(核心结构是skiplist 已介绍),当达到一定容量则转换为Immutable Memtable,由后台线程存储进磁盘中.同时另开一个新 Memtable,记录数据.
Memtable记录修改新kv对,可读可写.Immutable Memtable不可更改.
Memtable使用的就是skiplist记录key value
class MemTable { public: // MemTables are reference counted. The initial reference count // is zero and the caller must call Ref() at least once. explicit MemTable(const InternalKeyComparator& comparator); //简配版应用计数 初始化时候需要引用ref将计数+1 // Increase reference count. void Ref() { ++refs_; } // Drop reference count. Delete if no more references exist. //unref调用减少应用计数.计数为0 则删除自己 void Unref() { --refs_; assert(refs_ >= 0); if (refs_ <= 0) { delete this; } }
//内存使用相关,暂时不关注 size_t ApproximateMemoryUsage(); //迭代器 类似MEMTABLE 中元素的指针 Iterator* NewIterator(); //KEY是按次序排序,所以结构体内有比较key的定义 struct KeyComparator { const InternalKeyComparator comparator; explicit KeyComparator(const InternalKeyComparator& c) : comparator(c) { } int operator()(const char* a, const char* b) const; }; //私有类中包含构造复制函数,达到禁止复制的目的 private: // No copying allowed MemTable(const MemTable&); void operator=(const MemTable&);
}
Add Get 添加与读取函数, 删除和修改也是添加完成.
修改删除的优化:
实际上的kv删除或者修改,均未删除之前相同的Key记录,只是新增一个修改后的kv对或者带有删除标记的kv对.
因为系统在查找kv对是以由新至旧次序查找,所以肯定是查找到最新的删除或者修改值.
真正的冗余的老KV对在后面compac操作中才是真正的删除(后继介绍)
1 // Add an entry into memtable that maps key to value at the 2 // specified sequence number and with the specified type. 3 // Typically value will be empty if type==kTypeDeletion. 4 void Add(SequenceNumber seq, ValueType type, 5 const Slice& key, 6 const Slice& value); 7 8 // If memtable contains a value for key, store it in *value and return true. 9 // If memtable contains a deletion for key, store a NotFound() error 10 // in *status and return true. 11 // Else, return false. 12 bool Get(const LookupKey& key, std::string* value, Status* s);
Add 函数添加 kTypeDeletion类的kv对,表示删除, value内容为空
void MemTable::Add(SequenceNumber s, ValueType type, const Slice& key, const Slice& value) { // Format of an entry is concatenation of: // key_size : varint32 of internal_key.size() // key bytes : char[internal_key.size()] // value_size : varint32 of value.size() // value bytes : char[value.size()] // 插入格式为 //|--------|-------------------------|---------------------| //|key_size|char[internal_key.size()]|value_size|value_size| //|--------|-------------------------|---------------------| size_t key_size = key.size(); size_t val_size = value.size(); size_t internal_key_size = key_size + 8; const size_t encoded_len = VarintLength(internal_key_size) + internal_key_size + VarintLength(val_size) + val_size; //最后要插入skiplist的buf的长度 char* buf = arena_.Allocate(encoded_len); char* p = EncodeVarint32(buf, internal_key_size); //buf放入internal——key_size 32位 memcpy(p, key.data(), key_size); //存放指针拷贝实际的key值 p += key_size; //指针偏移KEYSIZE字节 EncodeFixed64(p, (s << 8) | type); //存放64位的sequenceNumber 末尾8位空出 最后一位留给数据type p += 8; p = EncodeVarint32(p, val_size); //存放实际val内容 memcpy(p, value.data(), val_size); assert((p + val_size) - buf == encoded_len); table_.Insert(buf); //skiplist insert }
Get函数在MemTable中查找key ,查找成功返回TRUE,查找成功但是type为deletion,返回true并且status为NotFound()错误
其他情况返回false
查找有个细节 skiplist返回的是最近的大于或者等于GreaterOrEqual 所以只要关键字相同 不要求序列号sequence
完全一样(序列号肯定是最新的最大的序列号)
然后代码里再次判断
comparator_.comparator.user_comparator()->Compare(
Slice(key_ptr, key_length - 8),
key.user_key()) == 0)
抛开sequence 仅仅比较key是否相等
bool MemTable::Get(const LookupKey& key, std::string* value, Status* s) { Slice memkey = key.memtable_key(); Table::Iterator iter(&table_); iter.Seek(memkey.data()); if (iter.Valid()) { // entry format is: // klength varint32 // userkey char[klength] // tag uint64 // vlength varint32 // value char[vlength] // Check that it belongs to same user key. We do not check the // sequence number since the Seek() call above should have skipped // all entries with overly large sequence numbers. const char* entry = iter.key(); uint32_t key_length; const char* key_ptr = GetVarint32Ptr(entry, entry+5, &key_length); if (comparator_.comparator.user_comparator()->Compare( Slice(key_ptr, key_length - 8), key.user_key()) == 0) { // Correct user key const uint64_t tag = DecodeFixed64(key_ptr + key_length - 8); switch (static_cast<ValueType>(tag & 0xff)) { case kTypeValue: { Slice v = GetLengthPrefixedSlice(key_ptr + key_length); value->assign(v.data(), v.size()); return true; } case kTypeDeletion: *s = Status::NotFound(Slice()); return true; } } } return false; }
memtable 使用的InternalKey 代码如下
一个字符串的封装和 比较器InternalKeyComparator代码
// Modules in this directory should keep internal keys wrapped inside // the following class instead of plain strings so that we do not // incorrectly use string comparisons instead of an InternalKeyComparator. class InternalKey { private: std::string rep_; public: InternalKey() { } // Leave rep_ as empty to indicate it is invalid InternalKey(const Slice& user_key, SequenceNumber s, ValueType t) { AppendInternalKey(&rep_, ParsedInternalKey(user_key, s, t)); } void DecodeFrom(const Slice& s) { rep_.assign(s.data(), s.size()); } Slice Encode() const { assert(!rep_.empty()); return rep_; } Slice user_key() const { return ExtractUserKey(rep_); } void SetFrom(const ParsedInternalKey& p) { rep_.clear(); AppendInternalKey(&rep_, p); } void Clear() { rep_.clear(); } std::string DebugString() const; }; inline int InternalKeyComparator::Compare( const InternalKey& a, const InternalKey& b) const { return Compare(a.Encode(), b.Encode()); }
inline bool ParseInternalKey(const Slice& internal_key,ParsedInternalKey* result) {
const size_t n = internal_key.size();
if (n < 8) return false;
uint64_t num = DecodeFixed64(internal_key.data() + n - 8);
unsigned char c = num & 0xff; //最后一个字节 代表 类型type
result->sequence = num >> 8; //左移8位 获取序列号
result->type = static_cast<ValueType>(c);
result->user_key = Slice(internal_key.data(), n - 8); //除开信息位的8字节 其余便是数据 转化成 Slice
return (c <= static_cast<unsigned char>(kTypeValue));
}
class LookupKey //DBImpl::Get()查询使用的辅助类
使用两个指针 根据不同需求 提供不同的数据结构
可提供下列三种 Slice
Slice memtable_key()
Slice internal_key()
Slice user_key()
数据都存储在 char space_[200]; // Avoid allocation for short keys
但是如果存储数据过长 则需要重新分配内存
LookupKey::LookupKey(const Slice& user_key, SequenceNumber s) { size_t usize = user_key.size(); size_t needed = usize + 13; // A conservative estimate char* dst; if (needed <= sizeof(space_)) { 需要更多的空间 则自行分配和删除 dst = space_; } else { dst = new char[needed]; } start_ = dst; dst = EncodeVarint32(dst, usize + 8); kstart_ = dst; memcpy(dst, user_key.data(), usize); dst += usize; EncodeFixed64(dst, PackSequenceAndType(s, kValueTypeForSeek)); dst += 8; end_ = dst; } inline LookupKey::~LookupKey() { if (start_ != space_) delete[] start_; //自行删除 }
整个类代码如下
1 // A helper class useful for DBImpl::Get() 2 class LookupKey { //DBImpl::Get()查询使用的辅助类 3 public: 4 // Initialize *this for looking up user_key at a snapshot with 5 // the specified sequence number. 6 LookupKey(const Slice& user_key, SequenceNumber sequence); 7 8 ~LookupKey(); 9 10 // Return a key suitable for lookup in a MemTable. 11 Slice memtable_key() const { return Slice(start_, end_ - start_); } 12 13 // Return an internal key (suitable for passing to an internal iterator) 14 Slice internal_key() const { return Slice(kstart_, end_ - kstart_); } 15 16 // Return the user key 17 Slice user_key() const { return Slice(kstart_, end_ - kstart_ - 8); } 18 19 private: 20 // We construct a char array of the form: 21 // klength varint32 <-- start_ 22 // userkey char[klength] <-- kstart_ 23 // tag uint64 24 // <-- end_ 25 // The array is a suitable MemTable key. 26 // The suffix starting with "userkey" can be used as an InternalKey. 27 const char* start_; 28 const char* kstart_; 29 const char* end_; 30 char space_[200]; // Avoid allocation for short keys 31 32 // No copying allowed 33 LookupKey(const LookupKey&); 34 void operator=(const LookupKey&); 35 }; 36 37 inline LookupKey::~LookupKey() { 38 if (start_ != space_) delete[] start_; 39 } 40 41 }
参考
https://blog.csdn.net/tankles/article/details/7663635
https://blog.csdn.net/sparkliang/article/details/8604424
http://www.cnblogs.com/haippy/archive/2011/12/04/2276064.html
欢迎转帖 请保持文本完整并注明出处
技术博客 http://www.cnblogs.com/itdef/
B站算法视频题解
https://space.bilibili.com/18508846
qq 151435887
gitee https://gitee.com/def/
欢迎c c++ 算法爱好者 windows驱动爱好者 服务器程序员沟通交流
如果觉得不错,欢迎点赞,你的鼓励就是我的动力