各种内存分配器的对比测试

最近两天测试了下tcmalloc,性能的确牛B.

所以修改了下固定对象分配器,模仿tcmalloc利用tls做thread cache.

下面是在我机器上对自己写的各个内存分配器与tcmalloc的对比测试,

fix_obj_pool finish:326
fix_obj_pool finish:165
fix_obj_pool finish:168
fix_obj_pool finish:164
fix_obj_pool finish:174
fix_obj_pool finish:164
fix_obj_pool finish:174
fix_obj_pool finish:185
fix_obj_pool finish:173
fix_obj_pool finish:168
gen_allocator finish:567
gen_allocator finish:264
gen_allocator finish:261
gen_allocator finish:260
gen_allocator finish:260
gen_allocator finish:261
gen_allocator finish:260
gen_allocator finish:261
gen_allocator finish:260
gen_allocator finish:263
block_obj_allocator finish:342
block_obj_allocator finish:257
block_obj_allocator finish:258
block_obj_allocator finish:257
block_obj_allocator finish:258
block_obj_allocator finish:257
block_obj_allocator finish:258
block_obj_allocator finish:259
block_obj_allocator finish:263
block_obj_allocator finish:262
tcmalloc finish:279
tcmalloc finish:266
tcmalloc finish:265
tcmalloc finish:267
tcmalloc finish:266
tcmalloc finish:266
tcmalloc finish:265
tcmalloc finish:264
tcmalloc finish:266
tcmalloc finish:267
test1 finish------------
fix_obj_pool finish:606
fix_obj_pool finish:471
fix_obj_pool finish:469
fix_obj_pool finish:473
fix_obj_pool finish:468
fix_obj_pool finish:468
fix_obj_pool finish:470
fix_obj_pool finish:474
fix_obj_pool finish:475
fix_obj_pool finish:467
gen_allocator finish:928
gen_allocator finish:647
gen_allocator finish:677
gen_allocator finish:643
gen_allocator finish:645
gen_allocator finish:644
gen_allocator finish:643
gen_allocator finish:644
gen_allocator finish:643
gen_allocator finish:644
block_obj_allocator finish:586
block_obj_allocator finish:500
block_obj_allocator finish:502
block_obj_allocator finish:500
block_obj_allocator finish:502
block_obj_allocator finish:501
block_obj_allocator finish:501
block_obj_allocator finish:501
block_obj_allocator finish:501
block_obj_allocator finish:501
tcmalloc finish:551
tcmalloc finish:549
tcmalloc finish:549
tcmalloc finish:549
tcmalloc finish:551
tcmalloc finish:549
tcmalloc finish:548
tcmalloc finish:551
tcmalloc finish:549
tcmalloc finish:550
test2 finish------------
fix_obj_pool finish:464
fix_obj_pool finish:466
fix_obj_pool finish:464
fix_obj_pool finish:465
fix_obj_pool finish:465
fix_obj_pool finish:466
fix_obj_pool finish:465
fix_obj_pool finish:464
fix_obj_pool finish:467
fix_obj_pool finish:465
gen_allocator finish:674
gen_allocator finish:661
gen_allocator finish:667
gen_allocator finish:656
gen_allocator finish:657
gen_allocator finish:658
gen_allocator finish:658
gen_allocator finish:660
gen_allocator finish:657
gen_allocator finish:660
block_obj_allocator finish:479
block_obj_allocator finish:479
block_obj_allocator finish:477
block_obj_allocator finish:477
block_obj_allocator finish:478
block_obj_allocator finish:480
block_obj_allocator finish:478
block_obj_allocator finish:481
block_obj_allocator finish:477
block_obj_allocator finish:478
tcmalloc finish:562
tcmalloc finish:565
tcmalloc finish:563
tcmalloc finish:562
tcmalloc finish:562
tcmalloc finish:563
tcmalloc finish:566
tcmalloc finish:565
tcmalloc finish:562
tcmalloc finish:562
test3 finish------------

三个测试分别是

1)分配 1000万个16字节的对象

2)分配1000万,再释放1000万

3)分配10万,释放10万,执行1000万/10万次

从输出可以看出fix_obj_pool 的第一个测试是最快的,因为它的分配处理最简单,但如果把释放也计入统计,优势就几乎没有了。

还有一个手段可以优化fix_obj_pool,就是释放时不将对象放回到可用列表中,只是增加一个计数,当整个内存块中的对象都被释放时

才将内存放回到可用列表中去。而gen_allocator效果是最差的,可以直接丢弃了。

从事测试结果可以看出,tcmalloc已经可以满足大多数的需求,基本无必要自己写通用内存分配器。当然对象池还是可以考虑的。

代码如下:

block_obj_allocator.h

#ifndef _BLOCK_OBJ_ALLOCATOR
#define _BLOCK_OBJ_ALLOCATOR


typedef struct block_obj_allocator *block_obj_allocator_t;

block_obj_allocator_t create_block_obj_allocator();

void print_info(block_obj_allocator_t,int);

#endif

block_obj_allocator.c

#include "block_obj_allocator.h"
#include <pthread.h>
#include "link_list.h"
#include <stdint.h>
#include <assert.h>
#include "spinlock.h"
#include <stdlib.h>
#include "clib/include/allocator.h"

struct free_list{
    list_node next;
    uint32_t  size;
    uint32_t  init_size;
    list_node *head;
    list_node *tail;
    void *mem;
};

struct thread_allocator
{
    list_node next;
    block_obj_allocator_t central_allocator;
    struct link_list *_free_list;
    struct link_list *_recover;
    uint32_t   free_size;
    uint16_t   array_idx;
    uint32_t   collect_factor;
};

struct thread_cache
{
    list_node next;
    struct thread_allocator _allocator[17];
};

struct block_obj_allocator
{
    IMPLEMEMT(allocator);
    pthread_key_t t_key;
    spinlock_t _free_list_mtx[17];
    struct link_list *_free_list[17];
    spinlock_t mtx;
    struct link_list *thread_caches;
};


static void *free_list_get(struct free_list *f)
{
    void *ptr = (void*)f->head;
    f->head = f->head->next;
    if(!f->head)
        f->tail = NULL;
    --f->size;    
    return ptr;
}

static void free_list_put(struct free_list *f,void *ptr)
{
    list_node *l = (list_node*)ptr;
    l->next = NULL;
    if(f->tail)
    {
        f->tail->next = l;
        f->tail = l;
    }
    else
        f->head = f->tail = l;
    ++f->size;
}

#define DEFAULT_BLOCK_SIZE 1024*1024

static struct free_list *creat_new_freelist(uint32_t size)
{
    uint32_t init_size = DEFAULT_BLOCK_SIZE/size;
    struct free_list *f = (struct free_list*)calloc(1,sizeof(*f));
    assert(f);
    f->mem = calloc(1,DEFAULT_BLOCK_SIZE);
    assert(f->mem);
    f->init_size = f->size = init_size;
    int32_t i = 0;
    for( ; i < init_size; ++i)
    {
        list_node *l = (list_node*)(((uint8_t*)f->mem)+(i*size));
        free_list_put(f,l);
    }
    f->size = init_size;
    return f;

}

static struct free_list *central_get_freelist(block_obj_allocator_t central,uint16_t array_idx)
{
    //printf("central_get_freelist\n");
    struct free_list *f;
    spin_lock(central->_free_list_mtx[array_idx],4000);
    f = (struct free_list*)link_list_pop(central->_free_list[array_idx]);
    spin_unlock(central->_free_list_mtx[array_idx]);
    if(!f)
    {
        //printf("creat_new_freelist\n");
         f = creat_new_freelist(1<<array_idx);            
    }
    return f;
}

static void give_back_to_central(block_obj_allocator_t central,uint16_t array_idx,struct free_list *f)
{
    //printf("give_back_to_central\n");
    spin_lock(central->_free_list_mtx[array_idx],4000);
    LINK_LIST_PUSH_BACK(central->_free_list[array_idx],f);
    spin_unlock(central->_free_list_mtx[array_idx]);
}


void *thread_allocator_alloc(struct thread_allocator *a)
{
    void *ptr;
    struct free_list *f;
    if(!a->free_size)
    {
        //thread cache不够内存了,从central获取
        f = central_get_freelist(a->central_allocator,a->array_idx);
        assert(f);
        LINK_LIST_PUSH_BACK(a->_free_list,f);
        a->free_size += f->size;
    }
    else
    {
        f = (struct free_list*)link_list_head(a->_free_list);
        if(!f)
        {
            f = (struct free_list*)link_list_pop(a->_recover);
            LINK_LIST_PUSH_BACK(a->_free_list,f);
        }
    }
    ptr = free_list_get(f);
    assert(ptr);
    --a->free_size;
    if(!f->size)
    {
        link_list_pop(a->_free_list);
        link_list_push_back(a->_recover,(list_node*)f);
    }
    return ptr;
}

void thread_allocator_dealloc(struct thread_allocator *a,void *ptr)
{
    struct free_list *f = (struct free_list*)link_list_head(a->_recover);
    if(f)
    {
        free_list_put(f,ptr);
        ++a->free_size;
        if(f->size == f->init_size)
        {
            link_list_pop(a->_recover);    
            //printf("==init_size\n");
            //一个free_list回收满了,要么放到free_list中,要么归还central
            if(a->free_size >= a->collect_factor)
            {
                //将f归还给central_allocator;    
                give_back_to_central(a->central_allocator,a->array_idx,f);
                a->free_size -= f->size;
            }
            else
                link_list_push_back(a->_free_list,(list_node*)f);
        }
    }
    else
    {
        f = (struct free_list*)link_list_head(a->_free_list);
        assert(f);
        free_list_put(f,ptr);
        ++a->free_size;
    }
}

void thread_allocator_info(struct thread_allocator *a)
{
    printf("free_size:%d\n",a->free_size);
    {
        struct free_list *f = (struct free_list*)link_list_head(a->_free_list);
        while(f)
        {
            printf("f size%d\n",f->size);
            f = (struct free_list*)((list_node*)f)->next;
        }
    }
    {    
        struct free_list *f = (struct free_list*)link_list_head(a->_recover);
        while(f)
        {
            printf("f recover size%d\n",f->size);
            f = (struct free_list*)((list_node*)f)->next;
        }
    }

}


extern uint8_t GetK(uint32_t size);

static struct thread_cache* thread_cache_create(block_obj_allocator_t ba)
{
    struct thread_cache *tc = calloc(1,sizeof(*tc));
    int32_t i = 0;
    for( ; i < 17; ++i)
    {
        tc->_allocator[i].central_allocator = ba;
        tc->_allocator[i]._free_list = LINK_LIST_CREATE();
        tc->_allocator[i]._recover = LINK_LIST_CREATE();
        tc->_allocator[i].array_idx = i;
        tc->_allocator[i].collect_factor = ((DEFAULT_BLOCK_SIZE)*2)/(1<<i);
    }
    spin_lock(ba->mtx,4000);
    LINK_LIST_PUSH_BACK(ba->thread_caches,tc);
    spin_unlock(ba->mtx);
    return tc; 
}

static void release_freelist(struct link_list *flist)
{
    list_node *l = link_list_head(flist);
    while(l)
    {
        struct free_list *f = (struct free_list*)l;
        l = l->next;
        free(f->mem);
        free(f);
        //printf("destroy_freelist\n");
    }    
}

static void destroy_thread_cache(struct thread_cache *tc)
{
    int32_t i = 0;
    for(; i < 17; ++i)
    {
        release_freelist(tc->_allocator[i]._free_list);
        release_freelist(tc->_allocator[i]._recover);
        LINK_LIST_DESTROY(&(tc->_allocator[i]._free_list));
        LINK_LIST_DESTROY(&(tc->_allocator[i]._recover));
    }
    free(tc);
}

static void* thread_cache_alloc(struct thread_cache *tc,uint32_t size)
{
    size += sizeof(int32_t);
    uint8_t k = GetK(size);
    size = 1 << k;
    int32_t *ptr = (int32_t*)thread_allocator_alloc(&(tc->_allocator[k]));
    *ptr = k;
    ptr++;
    return (void*)ptr;
}

static void  thread_cache_dealloc(struct thread_cache *tc,void *ptr)
{
    int32_t *_ptr = ((int32_t*)ptr)-1;
    uint8_t k = *_ptr;
    thread_allocator_dealloc(&(tc->_allocator[k]),_ptr);
}

static void thread_cache_info(struct thread_cache *tc,uint32_t size)
{
    size += sizeof(int32_t);
    uint8_t k = GetK(size);
    thread_allocator_info(&(tc->_allocator[k]));
}

static void* block_obj_al_alloc(struct allocator *a, int32_t size)
{
    block_obj_allocator_t ba = (block_obj_allocator_t)a;
    struct thread_cache *tc = (struct thread_cache*)pthread_getspecific(ba->t_key);
    if(!tc)
    {
        tc = thread_cache_create(ba);
        pthread_setspecific(ba->t_key,(void*)tc);
    }
    return thread_cache_alloc(tc,size);
}

static void  block_obj_al_dealloc(struct allocator*a, void *ptr)
{
    block_obj_allocator_t ba = (block_obj_allocator_t)a;
    struct thread_cache *tc = (struct thread_cache*)pthread_getspecific(ba->t_key);
    assert(tc);
    thread_cache_dealloc(tc,ptr);
}

static void destroy_block_obj_al(struct allocator **a)
{
    block_obj_allocator_t ba = (block_obj_allocator_t)*a;
    //销毁所有的thread_cache
    {
        list_node *l = link_list_head(ba->thread_caches);
        while(l)
        {
            struct thread_cache *tc = (struct thread_cache *)l;
            l = l->next;
            destroy_thread_cache(tc); 
        }
        LINK_LIST_DESTROY(&ba->thread_caches);
    }
    //销毁所有free_list
    {
        int32_t i = 0;
        for( ; i < 17; ++i)
        {
            release_freelist(ba->_free_list[i]);
            LINK_LIST_DESTROY(&ba->_free_list[i]);
        }
    }
    {
        int32_t i = 0;
        for( ; i < 17; ++i)
        {
            spin_destroy(&(ba->_free_list_mtx[i]));
        }
    }
    spin_destroy(&(ba->mtx));
    pthread_key_delete(ba->t_key);
    free(ba);
    *a = NULL;    
}

block_obj_allocator_t create_block_obj_allocator()
{
    block_obj_allocator_t ba = (block_obj_allocator_t)calloc(1,sizeof(*ba));
    ba->mtx = spin_create();
    ba->thread_caches = LINK_LIST_CREATE();
    int32_t i = 0;
    for( ; i < 17; ++i)
    {
        ba->_free_list[i] = LINK_LIST_CREATE();
        ba->_free_list_mtx[i] = spin_create();
    }
    pthread_key_create(&ba->t_key,0);
    ba->super_class.Alloc = block_obj_al_alloc;
    ba->super_class.DeAlloc = block_obj_al_dealloc;
    ba->super_class.Destroy = destroy_block_obj_al;
    return ba;
}

void print_info(block_obj_allocator_t ba,int size)
{
    struct thread_cache *tc = (struct thread_cache*)pthread_getspecific(ba->t_key);
    thread_cache_info(tc,size);
}

test.c

#include "log.h"
#include <stdio.h>
#include "atomic.h"
#include "wpacket.h"
#include "packet_allocator.h"
#include <stdlib.h>
#include "SysTime.h"
#include <string.h>
#include "block_obj_allocator.h"
#include "clib/include/mem_allocator.h"
#include "clib/include/fix_obj_pool.h"
uint32_t GetSize_of_pow2(uint32_t size);
uint8_t GetK(uint32_t size);





void test1(char **tmp)
{
    {
        allocator_t a =  create_pool(16,65536*10,1);
        int j = 0;
        uint16_t *p; 
        for(;j<10;++j)
        {
            uint32_t tick = GetSystemMs();
            int i = 0;
            for( ; i < 10000000; ++i)
            {
                tmp[i] = ALLOC(a,16);            
            }
            printf("fix_obj_pool finish:%d\n",GetSystemMs()-tick);
            i = 0;
            for(; i < 10000000;++i)
                FREE(a,tmp[i]);                    

        }
        DESTROY(&a);
    }
    {
        allocator_t a =  gen_allocator_create(65536);
        int j = 0;
        uint16_t *p; 
        for(;j<10;++j)
        {
            uint32_t tick = GetSystemMs();
            int i = 0;
            for( ; i < 10000000; ++i)
            {
                tmp[i] = ALLOC(a,16);            
            }
            printf("gen_allocator finish:%d\n",GetSystemMs()-tick);
            i = 0;
            for(; i < 10000000;++i)
                FREE(a,tmp[i]);        
        }
        DESTROY(&a);
    }
    {
        allocator_t a = (allocator_t)create_block_obj_allocator();
        int j = 0;
        for(;j<10;++j)
        {
            uint32_t tick = GetSystemMs();
            int i = 0;
            for( ; i < 10000000; ++i)
            {
                tmp[i] = ALLOC(a,16);
            }
            printf("block_obj_allocator finish:%d\n",GetSystemMs()-tick);
            i = 0;
            for(; i < 10000000;++i)
                FREE(a,tmp[i]);            
        }
        DESTROY(&a);
    }
    {
        int j = 0;
        for(;j<10;++j)
        {        
            uint32_t tick = GetSystemMs();
            int i = 0;
            for( ; i < 10000000; ++i)
            {
                tmp[i] = malloc(16);
            }
            printf("tcmalloc finish:%d\n",GetSystemMs()-tick);            
            i = 0;
            for(; i < 10000000;++i)
                free(tmp[i]);

        }
    }    
}

void test2(char **tmp)
{
    {
        allocator_t a =  create_pool(16,65536*10,1);
        int j = 0;
        uint16_t *p; 
        for(;j<10;++j)
        {
            uint32_t tick = GetSystemMs();
            int i = 0;
            for( ; i < 10000000; ++i)
            {
                tmp[i] = ALLOC(a,16);        
            }

            i = 0;
            for(; i < 10000000;++i)
                FREE(a,tmp[i]);        
            printf("fix_obj_pool finish:%d\n",GetSystemMs()-tick);            

        }
        DESTROY(&a);
    }
    {
        allocator_t a =  gen_allocator_create(65536);
        int j = 0;
        uint16_t *p; 
        for(;j<10;++j)
        {
            uint32_t tick = GetSystemMs();
            int i = 0;
            for( ; i < 10000000; ++i)
            {
                tmp[i] = ALLOC(a,16);        
            }

            i = 0;
            for(; i < 10000000;++i)
                FREE(a,tmp[i]);        
            printf("gen_allocator finish:%d\n",GetSystemMs()-tick);
        }
        DESTROY(&a);
    }
    {
        allocator_t a = (allocator_t)create_block_obj_allocator();
        int j = 0;
        for(;j<10;++j)
        {
            uint32_t tick = GetSystemMs();
            int i = 0;
            for( ; i < 10000000; ++i)
            {
                tmp[i] = ALLOC(a,16);
            }
            i = 0;
            for(; i < 10000000;++i)
                FREE(a,tmp[i]);    
            printf("block_obj_allocator finish:%d\n",GetSystemMs()-tick);        
        }
        DESTROY(&a);
    }
    {
        int j = 0;
        for(;j<10;++j)
        {        
            uint32_t tick = GetSystemMs();
            int i = 0;
            for( ; i < 10000000; ++i)
            {
                tmp[i] = malloc(16);
            }

            i = 0;
            for(; i < 10000000;++i)
                free(tmp[i]);
            printf("tcmalloc finish:%d\n",GetSystemMs()-tick);    
        }
    }    
}

void test3(char **tmp)
{
    {
        allocator_t a =  create_pool(16,65536*10,1);
        int j = 0;
        uint16_t *p; 
        for(;j<10;++j)
        {
            uint32_t tick = GetSystemMs();
            int i = 0;
            for( ; i < 10000000; ++i)
            {
                tmp[i] = ALLOC(a,16);
                if((i+1)%100000 == 0)
                {
                    int k = (i+1)-100000;
                    for(; k < i+1;++k)
                    {
                        FREE(a,tmp[k]);
                    }
                }            
            }
            printf("fix_obj_pool finish:%d\n",GetSystemMs()-tick);            

        }
        DESTROY(&a);
    }
    {
        allocator_t a =  gen_allocator_create(65536);
        int j = 0;
        uint16_t *p; 
        for(;j<10;++j)
        {
            uint32_t tick = GetSystemMs();
            int i = 0;
            for( ; i < 10000000; ++i)
            {
                tmp[i] = ALLOC(a,16);
                if((i+1)%100000 == 0)
                {
                    int k = (i+1)-100000;
                    for(; k < i+1;++k)
                    {
                        FREE(a,tmp[k]);
                    }
                }            
            }
            printf("gen_allocator finish:%d\n",GetSystemMs()-tick);
        }
        DESTROY(&a);
    }
    {
        allocator_t a = (allocator_t)create_block_obj_allocator();
        int j = 0;
        for(;j<10;++j)
        {
            uint32_t tick = GetSystemMs();
            int i = 0;
            for( ; i < 10000000; ++i)
            {
                tmp[i] = ALLOC(a,16);
                if((i+1)%100000 == 0)
                {
                    int k = (i+1)-100000;
                    for(; k < i+1;++k)
                        FREE(a,tmp[k]);
                }
            }
            printf("block_obj_allocator finish:%d\n",GetSystemMs()-tick);        
        }
        DESTROY(&a);
    }
    {
        int j = 0;
        for(;j<10;++j)
        {        
            uint32_t tick = GetSystemMs();
            int i = 0;
            for( ; i < 10000000; ++i)
            {
                tmp[i] = malloc(16);
                if((i+1)%100000 == 0)
                {
                    int k = (i+1)-100000;
                    for(; k < i+1;++k)
                        free(tmp[k]);
                }
            }
            printf("tcmalloc finish:%d\n",GetSystemMs()-tick);    
        }
    }    
}






int main()
{    


    char **tmp = calloc(1,sizeof(char*)*10000000);
    test1(tmp);
    printf("test1 finish------------\n");
    test2(tmp);
    printf("test2 finish------------\n");
    test3(tmp);    
    printf("test3 finish------------\n");
        free(tmp);
        return 0;
}

 

 

项目地址:

https://github.com/sniperHW/kendylib

posted @ 2012-07-24 11:33  sniperHW  阅读(2221)  评论(9编辑  收藏  举报