缓冲池bufferpool的实现
使用场景
缓存数据与索引数据,把磁盘或另一台机器上的数据加载到缓冲池,避免每次访问都进行磁盘IO或网络通信,起到加速访问的作用。写可以采用写回策略,写入缓冲池后标记为dirty,然后直接返回;后台自动下刷或者淘汰下刷。
什么是预读
磁盘读写,并不是按需读取,而是按页读取,一次至少读一页数据(一般是4K),如果未来要读取的数据就在页中,就能够省去后续的磁盘IO,提高效率。数据访问,通常都遵循“集中读写”的原则,使用一些数据,大概率会使用附近的数据,这就是所谓的“局部性原理”,它表明提前加载是有效的,确实能够减少磁盘IO。
实现方案
- Cache:以LRU策略保存热Page,淘汰调用deleterCallBack将dirty数据下刷
- Disk Manager:负责下刷Page,写入磁盘
class BufferPoolManager
{
public:
//pool_size是16的整数倍时,pool_size才精确
BufferPoolManager(size_t pool_size, DiskManager *disk_manager);
/**
* Destroys an existing BufferPoolManager.
*/
~BufferPoolManager();
LRUEntry *FetchPage(page_id_t page_id);
bool ReleasePage(LRUEntry *ent, bool is_dirty);
bool FlushPage(Page* pg);
bool DeletePage(page_id_t page_id);
void FlushAllPages();
inline Page *GetPages() { return pages_; }
inline size_t TotalElem() { return cache_->TotalElem(); }
private:
void DeletePageCallBack(void *val)
{
Page* pg = reinterpret_cast<Page*>(val);
if(pg->IsDirty()) {
disk_manager_->WritePage(pg->GetPageId(), pg->GetData());
}
pg->ResetMemory();
latch_.lock();
free_list_.emplace_back(pg);
latch_.unlock();
LOG("page %u move to free list", pg->page_id_);
}
/** Number of pages in the buffer pool. */
size_t pool_size_;
/** Array of buffer pool pages. */
Page *pages_;
/** Pointer to the disk manager. */
DiskManager *disk_manager_;
/** Replacer to find unpinned pages for replacement. */
ShardedLRUCache *cache_;
/** List of free pages. */
std::list<Page *> free_list_;
Arena arena_;
/**
* latch protects:
* - free_list_
*/
std::mutex latch_;
};
BufferPoolManager::BufferPoolManager(size_t pool_size, DiskManager *disk_manager)
: pool_size_(pool_size), disk_manager_(disk_manager)
{
// We allocate a consecutive memory space for the buffer pool.
pages_ = new Page[pool_size_];
cache_ = new ShardedLRUCache(pool_size, std::bind(&BufferPoolManager::DeletePageCallBack,this,std::placeholders::_1));
// Initially, every page is in the free list.
for (size_t i = 0; i < pool_size_; ++i)
{
free_list_.emplace_back(&pages_[i]);
}
}
BufferPoolManager::~BufferPoolManager()
{
delete[] pages_;
delete cache_;
}
LRUEntry *BufferPoolManager::FetchPage(page_id_t page_id)
{
Slice key((char*)&page_id, 4);
auto ent = cache_->Lookup(key);
if(ent != nullptr) {
LOG("cache hit");
return ent;//记得release
}
//1.pool里面没有
Page* pg = nullptr;
latch_.lock();
//1.1 pool还有空位置
if(free_list_.size()) {
pg = free_list_.front();
free_list_.pop_front();
}
latch_.unlock();
//1.2 pool没有空位置,new后插入cache,因为这样会让pool膨胀,占用空间变多
//如果插入的cache正好满了,会淘汰掉一个Page,这个Page会放入free_list
//最终结果就是free_list不断变大
//TODO:free_list满后,调用淘汰接口,淘汰一个LRUentry,放到free_list
if(pg == nullptr) {
pg = (Page*)arena_.AllocateAligned(sizeof(Page));
}
pg->page_id_ = page_id;
//2.从dm中读上来
disk_manager_->ReadPage(page_id, pg->GetData());
auto ret = cache_->Insert(key, (void*)pg);
LOG("read from disk");
return ret;//记得release
}
//FetchPage之后必须调用
bool BufferPoolManager::ReleasePage(LRUEntry *ent, bool is_dirty)
{
Page *pg = (Page*)ent->value;
pg->is_dirty_ = is_dirty;
cache_->Release(ent);
return true;
}
//下刷Page内容,不会删除内存Page
bool BufferPoolManager::FlushPage(Page* pg)
{
if(pg->IsDirty()) {
disk_manager_->WritePage(pg->GetPageId(), pg->GetData());
pg->is_dirty_ = false;
return true;
}
return false;
}
//删除内存Page,但不会删除影响磁盘Page
bool BufferPoolManager::DeletePage(page_id_t page_id)
{
cache_->Erase(Slice((char*)&page_id, 4));
return true;
}
//下刷Page内容,不会删除内存Page
void BufferPoolManager::FlushAllPages()
{
for(size_t i=0; i<pool_size_; ++i) {
FlushPage(&pages_[i]);
}
}