缓冲池bufferpool的实现

使用场景

缓存数据与索引数据,把磁盘或另一台机器上的数据加载到缓冲池,避免每次访问都进行磁盘IO或网络通信,起到加速访问的作用。写可以采用写回策略,写入缓冲池后标记为dirty,然后直接返回;后台自动下刷或者淘汰下刷。

什么是预读

磁盘读写,并不是按需读取,而是按页读取,一次至少读一页数据(一般是4K),如果未来要读取的数据就在页中,就能够省去后续的磁盘IO,提高效率。数据访问,通常都遵循“集中读写”的原则,使用一些数据,大概率会使用附近的数据,这就是所谓的“局部性原理”,它表明提前加载是有效的,确实能够减少磁盘IO。

实现方案

  1. Cache:以LRU策略保存热Page,淘汰调用deleterCallBack将dirty数据下刷
  2. Disk Manager:负责下刷Page,写入磁盘

class BufferPoolManager
{
public:
    //pool_size是16的整数倍时,pool_size才精确
    BufferPoolManager(size_t pool_size, DiskManager *disk_manager);

    /**
     * Destroys an existing BufferPoolManager.
     */
    ~BufferPoolManager();

    LRUEntry *FetchPage(page_id_t page_id);
    bool ReleasePage(LRUEntry *ent, bool is_dirty);
    bool FlushPage(Page* pg);
    bool DeletePage(page_id_t page_id);
    void FlushAllPages();
    inline Page *GetPages() { return pages_; }
    inline size_t TotalElem() { return cache_->TotalElem(); }
private:
    void DeletePageCallBack(void *val)
    {
        Page* pg = reinterpret_cast<Page*>(val);
        if(pg->IsDirty()) {
            disk_manager_->WritePage(pg->GetPageId(), pg->GetData());
        }
        pg->ResetMemory();
        latch_.lock();
        free_list_.emplace_back(pg);
        latch_.unlock();
        LOG("page %u move to free list", pg->page_id_);
    }
    /** Number of pages in the buffer pool. */
    size_t pool_size_;
    /** Array of buffer pool pages. */
    Page *pages_;
    /** Pointer to the disk manager. */
    DiskManager *disk_manager_;
    /** Replacer to find unpinned pages for replacement. */
    ShardedLRUCache *cache_;
    /** List of free pages. */
    std::list<Page *> free_list_;
    Arena arena_;
    /**
     * latch protects:
     * - free_list_
     */
    std::mutex latch_;
};

BufferPoolManager::BufferPoolManager(size_t pool_size, DiskManager *disk_manager)
    : pool_size_(pool_size), disk_manager_(disk_manager)
{
    // We allocate a consecutive memory space for the buffer pool.
    pages_ = new Page[pool_size_];
    cache_ = new ShardedLRUCache(pool_size, std::bind(&BufferPoolManager::DeletePageCallBack,this,std::placeholders::_1));

    // Initially, every page is in the free list.
    for (size_t i = 0; i < pool_size_; ++i)
    {
        free_list_.emplace_back(&pages_[i]);
    }
}

BufferPoolManager::~BufferPoolManager()
{
    delete[] pages_;
    delete cache_;
}

LRUEntry *BufferPoolManager::FetchPage(page_id_t page_id)
{
    Slice key((char*)&page_id, 4);
    auto ent = cache_->Lookup(key);
    if(ent != nullptr) {
        LOG("cache hit");
        return ent;//记得release
    }
    //1.pool里面没有
    Page* pg = nullptr;
    latch_.lock();
    //1.1 pool还有空位置
    if(free_list_.size()) {
        pg = free_list_.front();
        free_list_.pop_front();
    }
    latch_.unlock();
    //1.2 pool没有空位置,new后插入cache,因为这样会让pool膨胀,占用空间变多
    //如果插入的cache正好满了,会淘汰掉一个Page,这个Page会放入free_list
    //最终结果就是free_list不断变大
    //TODO:free_list满后,调用淘汰接口,淘汰一个LRUentry,放到free_list
    if(pg == nullptr) {
        pg = (Page*)arena_.AllocateAligned(sizeof(Page));
    }
    pg->page_id_ = page_id;
    //2.从dm中读上来
    disk_manager_->ReadPage(page_id, pg->GetData());
    auto ret = cache_->Insert(key, (void*)pg);
    LOG("read from disk");
    return ret;//记得release
}
//FetchPage之后必须调用
bool BufferPoolManager::ReleasePage(LRUEntry *ent, bool is_dirty)
{
    Page *pg = (Page*)ent->value;
    pg->is_dirty_ = is_dirty;
    cache_->Release(ent);
    return true;
}

//下刷Page内容,不会删除内存Page
bool BufferPoolManager::FlushPage(Page* pg)
{
    if(pg->IsDirty()) {
        disk_manager_->WritePage(pg->GetPageId(), pg->GetData());
        pg->is_dirty_ = false;
        return true;
    }
    return false;
}
//删除内存Page,但不会删除影响磁盘Page
bool BufferPoolManager::DeletePage(page_id_t page_id)
{
    cache_->Erase(Slice((char*)&page_id, 4));
    return true;
}
//下刷Page内容,不会删除内存Page
void BufferPoolManager::FlushAllPages()
{
    for(size_t i=0; i<pool_size_; ++i) {
        FlushPage(&pages_[i]);
    }
}

posted @ 2022-08-10 20:14  misaka-mikoto  阅读(78)  评论(0编辑  收藏  举报