leveldb 学习记录(七) SSTable构造

使用TableBuilder构造一个Table

 1 struct TableBuilder::Rep {                       // TableBuilder内部使用的结构,记录当前的一些状态等
 2         Options options;
 3         Options index_block_options;
 4         WritableFile* file;                             // 对应的.sst文件
 5         uint64_t offset;
 6         Status status;
 7         BlockBuilder data_block;                        // Data Block
 8         BlockBuilder index_block;                       // Index Block
 9         std::string last_key;                           // 添加的最后一个key,一方面用于key是否排序的判断,另一方面当写入一个Data 
10                                                         //+ Block时记录index Block中索引项(last_key+offset+size)
11         int64_t num_entries;                            // .sst文件中已经添加的key/value数量
12         bool closed;                                 // Either Finish() or Abandon() has been called.
13  
14        // Add下一Block的第一个key/value时,才根据这个key构造一个FindShortSuccessor,
15        // 写入Index Block中的一个entry(max_key+offset+size),是为了能够找到
16        // 一个更短的分割2个Block的key,从而减少存储容量;
17        // 只有Finish中是根据最后一个Block的最后一个key构造的。
18         // We do not emit the index entry for a block until we have seen the
19         // first key for the next data block.  This allows us to use shorter
20         // keys in the index block.  For example, consider a block boundary
21         // between the keys "the quick brown fox" and "the who".  We can use
22         // "the r" as the key for the index block entry since it is >= all
23         // entries in the first block and < all entries in subsequent
24         // blocks.
25         //
26         // Invariant: r->pending_index_entry is true only if data_block is empty.
27         bool pending_index_entry;                       // 标识是否刚写入一个Data Block,控制在Index
28                                                         //+ Block中添加一项索引信息(last_key+offset+size)
29         BlockHandle pending_handle;  // Handle to add to index block
30  
31         std::string compressed_output;                  // 数据压缩
32  
33         Rep(const Options& opt, WritableFile* f)        // 构造函数
34             : options(opt),
35             index_block_options(opt),
36             file(f),
37             offset(0),
38             data_block(&options),
39             index_block(&index_block_options),
40             num_entries(0),
41             closed(false),
42             pending_index_entry(false) 
43         {
44             index_block_options.block_restart_interval = 1; // Index Block中每个restart块只有一个record,查找方便
45         }
46     };// struct TableBuilder::Rep ;

 

TableBuilder头文件

 1 class TableBuilder {
 2  public:
 3   // Create a builder that will store the contents of the table it is
 4   // building in *file.  Does not close the file.  It is up to the
 5   // caller to close the file after calling Finish().
 6 //创建一个基于file的builder,存储table. 使用期间不能关闭文件,在调用Finish()后调用方关闭文件
 7   TableBuilder(const Options& options, WritableFile* file);
 8 
 9   // REQUIRES: Either Finish() or Abandon() has been called.
10   ~TableBuilder();
11 
12   // Change the options used by this builder.  Note: only some of the
13   // option fields can be changed after construction.  If a field is
14   // not allowed to change dynamically and its value in the structure
15   // passed to the constructor is different from its value in the
16   // structure passed to this method, this method will return an error
17   // without changing any fields.
18   Status ChangeOptions(const Options& options);
19 
20   // Add key,value to the table being constructed.
21   // REQUIRES: key is after any previously added key according to comparator.
22   // REQUIRES: Finish(), Abandon() have not been called
23   //添加key value  稍后查看代码
24   void Add(const Slice& key, const Slice& value);
25 
26   // Advanced operation: flush any buffered key/value pairs to file.
27   // Can be used to ensure that two adjacent entries never live in
28   // the same data block.  Most clients should not need to use this method.
29   // REQUIRES: Finish(), Abandon() have not been called
30   void Flush();
31 
32   // Return non-ok iff some error has been detected.
33   Status status() const;
34 
35   // Finish building the table.  Stops using the file passed to the
36   // constructor after this function returns.
37   // REQUIRES: Finish(), Abandon() have not been called
38 
39   Status Finish();
40 
41   // Indicate that the contents of this builder should be abandoned.  Stops
42   // using the file passed to the constructor after this function returns.
43   // If the caller is not going to call Finish(), it must call Abandon()
44   // before destroying this builder.
45   // REQUIRES: Finish(), Abandon() have not been called
46   void Abandon();
47 
48   // Number of calls to Add() so far.
49   uint64_t NumEntries() const;
50 
51   // Size of the file generated so far.  If invoked after a successful
52   // Finish() call, returns the size of the final generated file.
53   uint64_t FileSize() const;
54 
55  private:
56   bool ok() const { return status().ok(); }
57   void WriteBlock(BlockBuilder* block, BlockHandle* handle);
58 
59   struct Rep;
60   Rep* rep_;
61 
62   // No copying allowed
63   TableBuilder(const TableBuilder&);
64   void operator=(const TableBuilder&);
65 };

 

主要是按照格式填充  这里做了简单的注释

// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.

#include "leveldb/table_builder.h"

#include <assert.h>
#include <stdio.h>
#include "leveldb/comparator.h"
#include "leveldb/env.h"
#include "table/block_builder.h"
#include "table/format.h"
#include "util/coding.h"
#include "util/crc32c.h"
#include "util/logging.h"

namespace leveldb {

struct TableBuilder::Rep {
  Options options;
  Options index_block_options;
  WritableFile* file;
  uint64_t offset;
  Status status;
  BlockBuilder data_block;
  BlockBuilder index_block;
  std::string last_key;
  int64_t num_entries;
  bool closed;          // Either Finish() or Abandon() has been called.

  // We do not emit the index entry for a block until we have seen the
  // first key for the next data block.  This allows us to use shorter
  // keys in the index block.  For example, consider a block boundary
  // between the keys "the quick brown fox" and "the who".  We can use
  // "the r" as the key for the index block entry since it is >= all
  // entries in the first block and < all entries in subsequent
  // blocks.
  //
  // Invariant: r->pending_index_entry is true only if data_block is empty.
  bool pending_index_entry;
  BlockHandle pending_handle;  // Handle to add to index block

  std::string compressed_output;

  Rep(const Options& opt, WritableFile* f)
      : options(opt),
        index_block_options(opt),
        file(f),
        offset(0),
        data_block(&options),
        index_block(&index_block_options),
        num_entries(0),
        closed(false),
        pending_index_entry(false) {
    index_block_options.block_restart_interval = 1;
  }
};

TableBuilder::TableBuilder(const Options& options, WritableFile* file)
    : rep_(new Rep(options, file)) {
}

TableBuilder::~TableBuilder() {
  assert(rep_->closed);  // Catch errors where caller forgot to call Finish()
  delete rep_;
}

Status TableBuilder::ChangeOptions(const Options& options) {
  // Note: if more fields are added to Options, update
  // this function to catch changes that should not be allowed to
  // change in the middle of building a Table.
  if (options.comparator != rep_->options.comparator) {
    return Status::InvalidArgument("changing comparator while building table");
  }

  // Note that any live BlockBuilders point to rep_->options and therefore
  // will automatically pick up the updated options.
  rep_->options = options;
  rep_->index_block_options = options;
  rep_->index_block_options.block_restart_interval = 1;
  return Status::OK();
}

void TableBuilder::Add(const Slice& key, const Slice& value) {
  Rep* r = rep_;
  assert(!r->closed);
  if (!ok()) return;    //确保Rep没有关闭 并且状态正常

  //如果不是添加的table本身的属性  添加的key 必然是有序的的 否则报错
  if (r->num_entries > 0) {
    assert(r->options.comparator->Compare(key, Slice(r->last_key)) > 0);
  }

  //pending_index_entry标记是否是新创建的一个block 
  //当新创建一个block时 才可能确认上一个block和新block之间的key的一个分割字符串 记录在lastkey和index_block 方便以后查找key 定位

  if (r->pending_index_entry) {
    assert(r->data_block.empty());
    //comparator 中有 FindShortestSeparator() / FindShortSuccessor()两个接口,
    //FindShortestSeparator(start, limit)是获得大于 start 但小于 limit 的最小值。
    //FindShortSuccessor(start)是获得比 start 大的最小值。比较都基于 user - commparator,二者会被
    //用来确定 sstable 中 block 的 end - key。
    r->options.comparator->FindShortestSeparator(&r->last_key, key);
    std::string handle_encoding;
    r->pending_handle.EncodeTo(&handle_encoding);
    r->index_block.Add(r->last_key, Slice(handle_encoding));
    r->pending_index_entry = false;
  }
  //更新lastkey 跟新记录计数 添加data block
  r->last_key.assign(key.data(), key.size());
  r->num_entries++;
  r->data_block.Add(key, value);

  //data block 大于指定size 进行flush操作
  const size_t estimated_block_size = r->data_block.CurrentSizeEstimate();
  if (estimated_block_size >= r->options.block_size) {
    Flush();
  }
}

//block flush落盘
void TableBuilder::Flush() {
  Rep* r = rep_;
  assert(!r->closed);
  if (!ok()) return;
  if (r->data_block.empty()) return;
  assert(!r->pending_index_entry);
  WriteBlock(&r->data_block, &r->pending_handle);
  if (ok()) {
    r->pending_index_entry = true;
    r->status = r->file->Flush();
  }
}

//每个block data 包含 n个字节内容  以及type 1个字节  crc 4个字节
void TableBuilder::WriteBlock(BlockBuilder* block, BlockHandle* handle) {
  // File format contains a sequence of blocks where each block has:
  //    block_data: uint8[n]
  //    type: uint8
  //    crc: uint32
  assert(ok());
  Rep* r = rep_;
  Slice raw = block->Finish();

  Slice block_contents;
  CompressionType type = r->options.compression;
  // TODO(postrelease): Support more compression options: zlib?
  switch (type) {
    case kNoCompression:
      block_contents = raw;
      break;

    case kSnappyCompression: {
      std::string* compressed = &r->compressed_output;
      if (port::Snappy_Compress(raw.data(), raw.size(), compressed) &&
          compressed->size() < raw.size() - (raw.size() / 8u)) {
        block_contents = *compressed;
      } else {
        // Snappy not supported, or compressed less than 12.5%, so just
        // store uncompressed form
        block_contents = raw;
        type = kNoCompression;
      }
      break;
    }
  }
  handle->set_offset(r->offset);
  handle->set_size(block_contents.size());
  r->status = r->file->Append(block_contents);
  if (r->status.ok()) {
    char trailer[kBlockTrailerSize];
    trailer[0] = type;
    uint32_t crc = crc32c::Value(block_contents.data(), block_contents.size());
    crc = crc32c::Extend(crc, trailer, 1);  // Extend crc to cover block type
    EncodeFixed32(trailer+1, crc32c::Mask(crc));
    r->status = r->file->Append(Slice(trailer, kBlockTrailerSize));
    if (r->status.ok()) {
      r->offset += block_contents.size() + kBlockTrailerSize;
    }
  }
  r->compressed_output.clear();
  block->Reset();
}

Status TableBuilder::status() const {
  return rep_->status;
}

Status TableBuilder::Finish() {
  Rep* r = rep_;
  Flush();
  assert(!r->closed);
  r->closed = true;
  BlockHandle metaindex_block_handle;
  BlockHandle index_block_handle;
  if (ok()) {
    BlockBuilder meta_index_block(&r->options);
    // TODO(postrelease): Add stats and other meta blocks
    WriteBlock(&meta_index_block, &metaindex_block_handle);
  }
  if (ok()) {
    if (r->pending_index_entry) {
      r->options.comparator->FindShortSuccessor(&r->last_key);
      std::string handle_encoding;
      r->pending_handle.EncodeTo(&handle_encoding);
      r->index_block.Add(r->last_key, Slice(handle_encoding));
      r->pending_index_entry = false;
    }
    WriteBlock(&r->index_block, &index_block_handle);
  }
  if (ok()) {
    Footer footer;
    footer.set_metaindex_handle(metaindex_block_handle);
    footer.set_index_handle(index_block_handle);
    std::string footer_encoding;
    footer.EncodeTo(&footer_encoding);
    r->status = r->file->Append(footer_encoding);
    if (r->status.ok()) {
      r->offset += footer_encoding.size();
    }
  }
  return r->status;
}

void TableBuilder::Abandon() {
  Rep* r = rep_;
  assert(!r->closed);
  r->closed = true;
}

uint64_t TableBuilder::NumEntries() const {
  return rep_->num_entries;
}

uint64_t TableBuilder::FileSize() const {
  return rep_->offset;
}

}

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

参考 

https://blog.csdn.net/tankles/article/details/7663918

《leveldb实现解析》淘宝 那岩

posted on 2018-10-18 09:45  itdef  阅读(278)  评论(0编辑  收藏  举报

导航