代码改变世界

Memcached源码拆分:Slabs

2011-11-14 15:03  Aga.J  阅读(639)  评论(0编辑  收藏  举报

个人笔记,不做详细介绍

    Memcached的Slab内存分配策略其实很简单,通过首次使用预分配内存空间或者系统启动时预分配内存空间,避免多次malloc和free等函数调用。

    除此以外,Slab对于内存碎片的处理策略是通过为每个slab制定固定的chunk大小,相邻(可以理解为逻辑上,也可以理解为物理上,源码里面实现的物理上)slab之间chunk大小为1+factor倍,这样每个slab可以适配适合chunk大小的内存空间请求。

    而slab中的chunk的管理则是使用了很多延迟管理策略,它不会经常检查chunk是否超时,直到使用时才检查,它会把回收的chunk放到slot中,而不是free掉,需要使用时直接在slot中使用。

    从XGuru的Memcached源码剖析笔记的pfd中借用下面这张图来简单表示slab结构。

                                     

    我将Memcached的Slab相关的内容拿出来,整成一个类,使用单例模式,系统中只维护一个Slabs实例,用来做slabs的参数初始化 ,通过多级Slab为不同大小的对象分配内存空间,每个slab的chunk大小以factor倍数的大小递增 ,chunk作为对象的容器,当chunk数不足时上层api可以采取LRU的策略剔除chunk。而Slab的chunk内存分配主要经过下面几个阶段:

    ->slabs_alloc(考虑到多线程内存空间使用的竞争)

    -> do_slabs_alloc(计算申请的大小对应的slab的id,使用自治的slab的状态信息,返回地址,首次使用会调用do_slabs_newslab)

    -> do_slabs_newslab(new,可能malloc也可能利用prealloc,然后new后要加入队列或者修正一些辅助参数)

    -> grow_slab_list(new时可能要拓展)

    -> memory_allocate(实际的内存分配)

直接上源码(里面有注释,可能有些注释不正确望指针,原谅我又偷懒...)

slabs.cpp
#include<stdio.h>
#include<string.h>
#include<pthread.h>
#include<stdlib.h>

//#ifndef SLABS
//#define SLABS

bool prealloc=false; //不打开prealloc

struct Settings
{
int chunk_size;
int item_size_max;
double oldest_live;
size_t maxbytes;
}settings;

struct Item
{
Item *prev;
Item *h_next;
Item *next;

int refcount;
double time;
double exptime;
int slabs_clsid;

int nkey;
int nbytes;
int nsuffix;
}item;


#define POWER_SMALLEST 1
#define POWER_LARGEST 200
#define MAX_NUMBER_OF_SLAB_CLASSES (POWER_LARGEST+1)

#define CHUNK_ALIGN_BYTES 8

static pthread_mutex_t slabs_lock=PTHREAD_MUTEX_INITIALIZER; //做slab的alloc时需要加锁互斥

typedef struct
{
//当前slab中每个chunk的大小
unsigned int size;
//每个slab有多少个items
unsigned int perslab;

void **slots;

unsigned int sl_total;
unsigned int sl_curr;

void *end_page_ptr;
unsigned int end_page_free;

unsigned int slabs;

void* *slab_list;
unsigned int list_size;

unsigned int killing;
size_t requested;

}slabclass_t;

//Slabs数据结构的定义,使用单例模式,系统中只维护一个Slabs实例,用来做slabs的参数初始化
//通过多级Slab为不同大小的对象分配内存空间,每个slab的chunk大小以factor倍数的大小递增
//chunk作为对象的容器,当chunk数不足时上层api可以采取LRU的策略剔除chunk。
class Slabs
{
public:
//最多有POWER_LARGEST+1个slab,每个slab额定大小由factor决定
slabclass_t slabclass[MAX_NUMBER_OF_SLAB_CLASSES];

//预分配内存时使用
size_t mem_limit;
size_t mem_malloced;
void *mem_base;
void *mem_current;
size_t mem_avail;

//最后成功分配最大的power值
int power_largest;

public:
//获取实例
static Slabs *getSlabsInstance();

//主要是确定每个slab的chunk大小和chunk个数,做一些参数初始化而已
//limit on number of bytes to allocate
//each slab use a chunk size = previous slab's chunk size * factor
//allocate all memory up front or allocate memory in chunks as it is needs;
void slabs_init(const size_t limit, const double factor,const bool prealloc);

//通过object的size返回所在的slabclass
unsigned int slabs_clsid(const size_t size);

//给定size分配object
void *slabs_alloc(const size_t size,unsigned int id);

//free previously allocated object
void slabs_free(void *ptr,size_t size,unsigned int id);

//adjust the stats for memory requested
// void slabs_adjust_mem_requested(unsigned int id,size_t old,size_t ntotal);

//return a datum for stats in binary protocol
// bool get_stats(const char *stat_type,int nkey,ADD_STAT add_stats,void *c);

//fill buffer with stats
// void slabs_stats(ADD_STAT add_stats,void *c);

private:
static Slabs *slabs;

private:
Slabs()
{
mem_limit=0;
mem_malloced=0;

mem_base=NULL;
mem_current=NULL;

mem_avail=0;

memset(slabclass,0,sizeof(slabclass));
}
void* do_slabs_alloc(const size_t size,unsigned int id);
void do_slabs_free(void *ptr,const size_t size,unsigned int id);
int do_slabs_newslab(const unsigned int id);
int grow_slab_list (const unsigned int id);
void* memory_allocate(size_t size);
};

Slabs* Slabs::slabs=NULL;
Slabs* Slabs::getSlabsInstance() //单例模式
{
if(slabs==NULL)
slabs=new Slabs();
return slabs;
}

//主要是确定每个slab的每个chunk大小和chunk个数( slabclass.size + slabclass.perslab )
void Slabs::slabs_init(const size_t limit, const double factor, const bool prealloc)
{
int i = POWER_SMALLEST-1;

//item + ( key+value+flags )
unsigned int size=sizeof(item)+settings.chunk_size;

//预分配内存最大值
//limit=settings.maxbytes;
//如果使用prealloc策略,那么要记录base和current和avail的信息
mem_limit=limit;
if(prealloc)
{
mem_base=malloc(mem_limit); //mem_base起始地址,分配mem_limit大小的内存空间
if(mem_base!=NULL)
{
mem_current=mem_base; //current
mem_avail=mem_limit; //available
}
}

//数组边界 或 倒数第二个slab(size*factor <= item_size_max,不能让size*factor大于item_size_max,会越界)
while( ++i<POWER_LARGEST && size<=settings.item_size_max/factor)
{
//size对齐,这一步很重要
if (size% CHUNK_ALIGN_BYTES)
size += CHUNK_ALIGN_BYTES - (size % CHUNK_ALIGN_BYTES);

//slab中每个chunk的size
slabclass[i].size=size;
//item_size_max就是 upper end for slab,预定义为1MB
//也就是说,每个slab的可控空间的实际大小一样,但是其中的chunk大小有factor的倍数关系
slabclass[i].perslab=settings.item_size_max/slabclass[i].size;

//slab中每个chunk的size以factor倍因子递增
size *= factor;
}
//剩下的全给这个chunk
power_largest=i;
slabclass[power_largest].size=settings.item_size_max;
slabclass[power_largest].perslab=1;

#ifndef DONT_PREALLOC_SLABS
//slabs_preallocate
//防出错,这里省略
#endif

/*
for(int j=0;j<power_largest;j++)
printf("%d\n",slabclass[j].size);
*/
}

//size大小的object在哪个slabclass内
unsigned int Slabs::slabs_clsid(const size_t size)
{
int res=POWER_SMALLEST;
if(size==0)
return 0;
while( size>slabclass[res].size)
if(res++ == power_largest)
return 0;
return res;
}

//slabs_alloc所依赖的函数

//实际的内存分配
void* Slabs::memory_allocate(size_t size)
{
void *ret;

if (mem_base == NULL) {
/* We are not using a preallocated large memory chunk */
//直接使用malloc分配整个chunk*size大小的slab
ret = malloc(size);
} else {
ret = mem_current;

if (size > mem_avail) {
return NULL;
}

/* mem_current pointer _must_ be aligned!!! */
//size对齐
if (size % CHUNK_ALIGN_BYTES) {
size += CHUNK_ALIGN_BYTES - (size % CHUNK_ALIGN_BYTES);
}

//递增mem_current地址
mem_current = ((char*)mem_current) + size;

//修正mem_avail
if (size < mem_avail) {
mem_avail -= size;
} else {
mem_avail = 0;
}
}

return ret;
}
int Slabs::grow_slab_list (const unsigned int id)
{
slabclass_t *p = &slabclass[id];
if (p->slabs == p->list_size) {
size_t new_size = (p->list_size != 0) ? p->list_size * 2 : 16;
void *new_list = realloc(p->slab_list, new_size * sizeof(void *));
if (new_list == 0) return 0;
p->list_size = new_size;
//这里也可能出现问题!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
p->slab_list = &new_list;
}
return 1;
}

int Slabs::do_slabs_newslab(const unsigned int id)
{
slabclass_t *p = &slabclass[id];
//注意 这里的len指的是整个slab所有chunk的长度!!
int len = p->size * p->perslab;
char *ptr;

if ( (mem_limit && mem_malloced + len > mem_limit && p->slabs > 0) || // 如果已经prealloc,那就判断是否太大了
(grow_slab_list(id) == 0) || //已经超过了listsize,调用grow_slab_list
((ptr = (char *)memory_allocate((size_t)len)) == 0)) { //调用memory_allocate

//MEMCACHED_SLABS_SLABCLASS_ALLOCATE_FAILED(id);
return 0;
}

memset(ptr, 0, (size_t)len);

//修正
p->end_page_ptr = ptr;
p->end_page_free = p->perslab;

//加入到list中??
p->slab_list[p->slabs++] = ptr;

mem_malloced += len; //好像是为了辅助prealloc
//MEMCACHED_SLABS_SLABCLASS_ALLOCATE(id);

return 1;
}

//static
void* Slabs::do_slabs_alloc(const size_t size,unsigned int id)
{
slabclass_t *p;
void *ret=NULL;

if(id<POWER_SMALLEST || id>power_largest)
{
return NULL;
//MEMCACHED_SLABS_ALLOCATE_FAILED(size,0);
}
//根据ID号获取slabs
p=&slabclass[id];
//assert(p->sl_curr==0|| ((item*)p->slots[p->sl_curr-1])->slabs_clsid=0);

#ifdef USE_SYSTEM_MALLOC
if(mem_limit && mem_malloced+size>mem_limit)
{
return 0;
//MEMCACHED_SLABS_ALLOCATE_FAILED(size,id);
}
mem_malloced+=size;
ret=malloc(size);
//MEMCACHED_SLABS_ALLOCATR(size,id,0,ret)
return ret;
#endif

//如果不是,那么由slab自治,slab中会记录当前分配信息等,根据这些信息进行内存分配或返回
if( !(p->end_page_ptr!=0||p->sl_curr!=0||do_slabs_newslab(id)!=0) ) //调用do_slabs_newslab,详见该函数,做内存分配操作
//if( p->end_page_ptr==0 && p->sl_curr==0 && do_slabs_newslab(id)==0 )
ret=NULL;
else if( p->sl_curr!=0) //如果end!=0则直接到这步(如果sl!=0则行,如果刚好sl=0,那就是下面那个else满足的条件),如果end==0但是sl!=0也到这步,
//又因为如果首次分配会使用do_slabs_newslab,而该函数又会做整个slab所有chunk的预分配,所以后面会依赖p->end_page_ptr和p->sl_curr来指示以获取新slot
//如果当前的slab的free slot有 就直接拿来用
ret=p->slots[--p->sl_curr];
else //如果end==0,sl==0,do_new!=0到这一步
{
//assert(p->end_page_ptr!=NULL);
//printf("\nnew\n");
//如果一开始还没有分配东西(包括没有prealloc),那么do_slabs_newslab分配完空间后
//要修正p->end_page_ptr
ret=p->end_page_ptr;
if( --p->end_page_free!=0)
p->end_page_ptr= ( (caddr_t)p->end_page_ptr)+p->size;
else
p->end_page_ptr=0;
}

//修正p->requested
if(ret)
{
p->requested += size;
//MEMCACHED_SLABS_ALLOCATE(size,id,p->size,ret);
}
else
{
//MEMCACHED_SLABS_ALLOCATE_FAILED(size,id);
}

return ret;
}
void* Slabs::slabs_alloc(size_t size,unsigned int id)
{
void *ret;
pthread_mutex_lock(&slabs_lock); //互斥访问
ret=do_slabs_alloc(size,id);
pthread_mutex_unlock(&slabs_lock);
return ret;
}

//slabs_alloc(考虑到多线程内存空间使用的竞争)
//-> do_slabs_alloc(计算申请的大小对应的slab的id,使用自治的slab的状态信息,返回地址,首次使用会调用do_slabs_newslab)
//-> do_slabs_newslab(new,可能malloc也可能利用prealloc,然后new后要加入队列或者修正一些辅助参数)
//-> grow_slab_list(new时可能要拓展)
//-> memory_allocate(实际的内存分配)

//static
//typedef void* pVoid;
//free是把释放的东西放在slot上!
void Slabs::do_slabs_free(void *ptr,const size_t size,unsigned int id)
{
slabclass_t *p;

//assert( ((item*)ptr)->slabs_clcid==0 );
//assert( id>=POWER_SMALLEST && id<=power_largest );
if(id<POWER_SMALLEST || id>power_largest)
return;

//MEMCACHED_SLABS_FREE(size,id,ptr); //这里应该做了什么
p=&slabclass[id];

#ifdef USE_SYSTEM_MALLOC
mem_malloced -= size;
free(ptr); //直接使用free
return;
#endif

//(pVoid) (*new_slots)=0;
void** new_slots=0;

if( p->sl_curr==p->sl_total)
{
int new_size=(p->sl_total!=0)?p->sl_total*2:16;

//这样不知道有没有问题!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
*new_slots = ((void*)realloc(p->slots,new_size*sizeof(void*)));
if(new_slots==0)
return;

p->slots=new_slots;
p->sl_total=new_size;
}
p->slots[p->sl_curr++]=ptr;
p->requested -= size;
return;
}
void Slabs::slabs_free(void *ptr,size_t size, unsigned int id)
{
pthread_mutex_lock(&slabs_lock);

do_slabs_free(ptr,size,id);

pthread_mutex_unlock(&slabs_lock);
}

//#endif

//usage of slabs alloc
//LRU的策略在这里!方便策略更换!
#define LARGEST_ID POWER_LARGEST
#include<stdint.h>
Item *item_alloc(char *key, const size_t nkey, const int flags, const double exptime, const int nbytes) {
//rel_time_t -> double
Slabs *s=Slabs::getSlabsInstance();

static Item *tails[LARGEST_ID];
double current_time=0;


//The length of the suffix.
uint8_t nsuffix;
//item
Item *it = NULL;
//suffix
char suffix[40];

//size_t ntotal = item_make_header(nkey + 1, flags, nbytes, suffix, &nsuffix);
nsuffix = (uint8_t) snprintf(suffix, 40, " %d %d\r\n", flags, nbytes - 2);
size_t ntotal=sizeof(item) + nkey+1 + nsuffix + nbytes;

//找到合适大小的slab的id号,ID号用来传入slab相关函数中,避免不必要的计算
unsigned int id = s->slabs_clsid(ntotal);
if (id == 0)
return 0;

//看看是否有过时的item
int tries = 50;
Item *search;
//最长生存时间
double oldest_live = settings.oldest_live;

//根据slabclass的id号,利用tails表获得tail表id下的item,然后搜寻prev
for (search = tails[id]; tries > 0 && search != NULL; tries--, search=search->prev) {
if (search->refcount == 0 && //如果引用数为0 并且 ( 生存时间小于oldest 或者 exp时间小于当前时间 )
((search->time < oldest_live) || // dead by flush
(search->exptime != 0 && search->exptime < current_time))) {

it = search;

it->refcount = 1;
// slabs_adjust_mem_requested(it->slabs_clsid, ITEM_ntotal(it), ntotal);
// do_item_unlink(it);

/* Initialize the item block: */
it->slabs_clsid = 0;
it->refcount = 0;
break;
}
}


//如果找不到expired item 并且slabs分配失败,那么试着在当前item内利用LRU剔除一个
if (it == NULL && (it = (Item*)s->slabs_alloc(ntotal, id)) == NULL) {
/*
** Could not find an expired item at the tail, and memory allocation
** failed. Try to evict some items!
*/
tries = 50;

/*
* try to get one off the right LRU
* don't necessariuly unlink the tail because it may be locked: refcount>0
* search up from tail an item with refcount==0 and unlink it; give up after 50
* tries
*/
//跟上面一样的for
for (search = tails[id]; tries > 0 && search != NULL; tries--, search=search->prev) {
//don't necessariuly unlink the tail because it may be locked: refcount>0
if (search->refcount == 0)
{
if (search->exptime == 0 || search->exptime > current_time)
{
/*
itemstats[id].evicted++;
itemstats[id].evicted_time = current_time - search->time;
if (search->exptime != 0)
itemstats[id].evicted_nonzero++;

if ((search->it_flags & ITEM_FETCHED) == 0) {
STATS_LOCK();
stats.evicted_unfetched++;
STATS_UNLOCK();
itemstats[id].evicted_unfetched++;
}

STATS_LOCK();
stats.evictions++;
STATS_UNLOCK();
*/
}
else
{
/*
itemstats[id].reclaimed++;
STATS_LOCK();
stats.reclaimed++;
STATS_UNLOCK();
if ((search->it_flags & ITEM_FETCHED) == 0) {
STATS_LOCK();
stats.expired_unfetched++;
STATS_UNLOCK();
itemstats[id].expired_unfetched++;
}
*/
}
// do_item_unlink(search);

break;
}

}


//unlink(search)那个item后,就可以再次尝试alloc了
it = (Item*)s->slabs_alloc(ntotal, id);
}

//slab分配成功后就做一些初始化工作

it->slabs_clsid = id;
//assert(it != heads[it->slabs_clsid]);

it->next = it->prev = it->h_next = 0;
it->refcount = 1; /* the caller will have a reference */
// DEBUG_REFCNT(it, '*');

it->nkey = nkey;
it->nbytes = nbytes;
// memcpy(ITEM_key(it), key, nkey);
it->exptime = exptime;
// memcpy(ITEM_suffix(it), suffix, (size_t)nsuffix);
it->nsuffix = nsuffix;

return it;
}

//usage of slabs alloc
int main()
{
//settings
settings.item_size_max=1024*1024;
settings.chunk_size=48;
settings.maxbytes=1024*1024*64;


Slabs *s=Slabs::getSlabsInstance();
if (s==NULL)
printf("null");
//else printf("%d",s->mem_limit);

s->slabs_init(0,1.25,false); //仅仅指定了每个slabclass的每个chunk大小和chunk的数量

size_t ntotal=512; //(480,600)
unsigned int id = s->slabs_clsid(ntotal);
void *it =s->slabs_alloc(ntotal, id);

// s->slabs_free(it,ntotal,id);

/*
conn* c;
protocol_binary_request_set* req = binary_get_request(c);
char* key = binary_get_key(c);
int nkey = c->binary_header.request.keylen;
vlen = c->binary_header.request.bodylen - (nkey + c->binary_header.request.extlen);
Item *it = item_alloc(key, nkey, req->message.body.flags,
realtime(req->message.body.expiration), vlen+2);
*/
}