LevelDB场景分析1--整体结构分析
基本用法
数据结构
class DBImpl : public DB
{
private:struct CompactionState;struct Writer;// Information kept for every waiting writer
Env* const env_; // 文件,目录,日志,Schedule线程const InternalKeyComparator internal_comparator_;const InternalFilterPolicy internal_filter_policy_; // 提高随机读的性能const Options options_; // Options to control the behavior of a database (passed to DB::Open)bool owns_info_log_;bool owns_cache_;const std::string dbname_;// table_cache_ provides its own synchronizationTableCache* table_cache_;// Lock over the persistent DB state. Non-NULL iff successfully acquired.FileLock* db_lock_;// State below is protected by mutex_port::Mutex mutex_;port::AtomicPointer shutting_down_;port::CondVar bg_cv_; // Signalled when background work finishesMemTable* mem_;MemTable* imm_; // Memtable being compactedport::AtomicPointer has_imm_; // So bg thread can detect non-NULL imm_WritableFile* logfile_;uint64_t logfile_number_;log::Writer* log_;uint32_t seed_; // For sampling.// Queue of writers.std::deque<Writer*> writers_;WriteBatch* tmp_batch_;SnapshotList snapshots_;// Set of table files to protect from deletion because they are// part of ongoing compactions.std::set<uint64_t> pending_outputs_;// Has a background compaction been scheduled or is running?bool bg_compaction_scheduled_;
ManualCompaction* manual_compaction_;VersionSet* versions_;// Have we encountered a background error in paranoid mode?Status bg_error_;CompactionStats stats_[config::kNumLevels];
};
红色部分成员是需要重点关注的。
主要场景
public:Status DB::Open(const Options& options, const std::string& dbname, DB** dbptr)// Implementations of the DB interface
virtual Status Put(const WriteOptions&, const Slice& key, const Slice& value);virtual Status Delete(const WriteOptions&, const Slice& key);virtual Status Write(const WriteOptions& options, WriteBatch* updates);virtual Status Get(const ReadOptions& options,const Slice& key,std::string* value);virtual Iterator* NewIterator(const ReadOptions&);virtual const Snapshot* GetSnapshot();virtual void ReleaseSnapshot(const Snapshot* snapshot);virtual bool GetProperty(const Slice& property, std::string* value);virtual void GetApproximateSizes(const Range* range, int n, uint64_t* sizes);virtual void CompactRange(const Slice* begin, const Slice* end);// Extra methods (for testing) that are not in the public DB interface// Record a sample of bytes read at the specified internal key.
// Samples are taken approximately once every config::kReadBytesPeriod// bytes.void RecordReadSample(Slice key);
private:
Iterator* NewInternalIterator(const ReadOptions&,SequenceNumber* latest_snapshot,uint32_t* seed);Status NewDB();// Recover the descriptor from persistent storage. May do a significant// amount of work to recover recently logged updates. Any changes to// be made to the descriptor are added to *edit.Status Recover(VersionEdit* edit) EXCLUSIVE_LOCKS_REQUIRED(mutex_);void MaybeIgnoreError(Status* s) const;// Delete any unneeded files and stale in-memory entries.void DeleteObsoleteFiles();// Compact the in-memory write buffer to disk. Switches to a new// log-file/memtable and writes a new descriptor iff successful.// Errors are recorded in bg_error_.void CompactMemTable() EXCLUSIVE_LOCKS_REQUIRED(mutex_);Status RecoverLogFile(uint64_t log_number,VersionEdit* edit,SequenceNumber* max_sequence)EXCLUSIVE_LOCKS_REQUIRED(mutex_);Status WriteLevel0Table(MemTable* mem, VersionEdit* edit, Version* base)EXCLUSIVE_LOCKS_REQUIRED(mutex_);Status MakeRoomForWrite(bool force /* compact even if there is room? */)EXCLUSIVE_LOCKS_REQUIRED(mutex_);WriteBatch* BuildBatchGroup(Writer** last_writer);void RecordBackgroundError(const Status& s);void MaybeScheduleCompaction() EXCLUSIVE_LOCKS_REQUIRED(mutex_);static void BGWork(void* db);void BackgroundCall();void BackgroundCompaction() EXCLUSIVE_LOCKS_REQUIRED(mutex_);void CleanupCompaction(CompactionState* compact)EXCLUSIVE_LOCKS_REQUIRED(mutex_);Status DoCompactionWork(CompactionState* compact)EXCLUSIVE_LOCKS_REQUIRED(mutex_);Status OpenCompactionOutputFile(CompactionState* compact);Status FinishCompactionOutputFile(CompactionState* compact, Iterator* input);Status InstallCompactionResults(CompactionState* compact)EXCLUSIVE_LOCKS_REQUIRED(mutex_);
综合介绍
LevelDB主要有以下几个部分的内存开销:memtable,immutable table,table cache,block cache
memtable和immutable table的大小由 options_.write_buffer_size决定。
table cache由max_open_files决定数量。
block cache最大可以传入 size_t的大小。
由于使用了mmap,因此需要尽量使内存大于数据规模。否则可能造成随机读爆慢。