各种内存分配器的对比测试
最近两天测试了下tcmalloc,性能的确牛B.
所以修改了下固定对象分配器,模仿tcmalloc利用tls做thread cache.
下面是在我机器上对自己写的各个内存分配器与tcmalloc的对比测试,
fix_obj_pool finish:326
fix_obj_pool finish:165
fix_obj_pool finish:168
fix_obj_pool finish:164
fix_obj_pool finish:174
fix_obj_pool finish:164
fix_obj_pool finish:174
fix_obj_pool finish:185
fix_obj_pool finish:173
fix_obj_pool finish:168
gen_allocator finish:567
gen_allocator finish:264
gen_allocator finish:261
gen_allocator finish:260
gen_allocator finish:260
gen_allocator finish:261
gen_allocator finish:260
gen_allocator finish:261
gen_allocator finish:260
gen_allocator finish:263
block_obj_allocator finish:342
block_obj_allocator finish:257
block_obj_allocator finish:258
block_obj_allocator finish:257
block_obj_allocator finish:258
block_obj_allocator finish:257
block_obj_allocator finish:258
block_obj_allocator finish:259
block_obj_allocator finish:263
block_obj_allocator finish:262
tcmalloc finish:279
tcmalloc finish:266
tcmalloc finish:265
tcmalloc finish:267
tcmalloc finish:266
tcmalloc finish:266
tcmalloc finish:265
tcmalloc finish:264
tcmalloc finish:266
tcmalloc finish:267
test1 finish------------
fix_obj_pool finish:606
fix_obj_pool finish:471
fix_obj_pool finish:469
fix_obj_pool finish:473
fix_obj_pool finish:468
fix_obj_pool finish:468
fix_obj_pool finish:470
fix_obj_pool finish:474
fix_obj_pool finish:475
fix_obj_pool finish:467
gen_allocator finish:928
gen_allocator finish:647
gen_allocator finish:677
gen_allocator finish:643
gen_allocator finish:645
gen_allocator finish:644
gen_allocator finish:643
gen_allocator finish:644
gen_allocator finish:643
gen_allocator finish:644
block_obj_allocator finish:586
block_obj_allocator finish:500
block_obj_allocator finish:502
block_obj_allocator finish:500
block_obj_allocator finish:502
block_obj_allocator finish:501
block_obj_allocator finish:501
block_obj_allocator finish:501
block_obj_allocator finish:501
block_obj_allocator finish:501
tcmalloc finish:551
tcmalloc finish:549
tcmalloc finish:549
tcmalloc finish:549
tcmalloc finish:551
tcmalloc finish:549
tcmalloc finish:548
tcmalloc finish:551
tcmalloc finish:549
tcmalloc finish:550
test2 finish------------
fix_obj_pool finish:464
fix_obj_pool finish:466
fix_obj_pool finish:464
fix_obj_pool finish:465
fix_obj_pool finish:465
fix_obj_pool finish:466
fix_obj_pool finish:465
fix_obj_pool finish:464
fix_obj_pool finish:467
fix_obj_pool finish:465
gen_allocator finish:674
gen_allocator finish:661
gen_allocator finish:667
gen_allocator finish:656
gen_allocator finish:657
gen_allocator finish:658
gen_allocator finish:658
gen_allocator finish:660
gen_allocator finish:657
gen_allocator finish:660
block_obj_allocator finish:479
block_obj_allocator finish:479
block_obj_allocator finish:477
block_obj_allocator finish:477
block_obj_allocator finish:478
block_obj_allocator finish:480
block_obj_allocator finish:478
block_obj_allocator finish:481
block_obj_allocator finish:477
block_obj_allocator finish:478
tcmalloc finish:562
tcmalloc finish:565
tcmalloc finish:563
tcmalloc finish:562
tcmalloc finish:562
tcmalloc finish:563
tcmalloc finish:566
tcmalloc finish:565
tcmalloc finish:562
tcmalloc finish:562
test3 finish------------
三个测试分别是
1)分配 1000万个16字节的对象
2)分配1000万,再释放1000万
3)分配10万,释放10万,执行1000万/10万次
从输出可以看出fix_obj_pool 的第一个测试是最快的,因为它的分配处理最简单,但如果把释放也计入统计,优势就几乎没有了。
还有一个手段可以优化fix_obj_pool,就是释放时不将对象放回到可用列表中,只是增加一个计数,当整个内存块中的对象都被释放时
才将内存放回到可用列表中去。而gen_allocator效果是最差的,可以直接丢弃了。
从事测试结果可以看出,tcmalloc已经可以满足大多数的需求,基本无必要自己写通用内存分配器。当然对象池还是可以考虑的。
代码如下:
block_obj_allocator.h
#ifndef _BLOCK_OBJ_ALLOCATOR #define _BLOCK_OBJ_ALLOCATOR typedef struct block_obj_allocator *block_obj_allocator_t; block_obj_allocator_t create_block_obj_allocator(); void print_info(block_obj_allocator_t,int); #endif
block_obj_allocator.c
#include "block_obj_allocator.h" #include <pthread.h> #include "link_list.h" #include <stdint.h> #include <assert.h> #include "spinlock.h" #include <stdlib.h> #include "clib/include/allocator.h" struct free_list{ list_node next; uint32_t size; uint32_t init_size; list_node *head; list_node *tail; void *mem; }; struct thread_allocator { list_node next; block_obj_allocator_t central_allocator; struct link_list *_free_list; struct link_list *_recover; uint32_t free_size; uint16_t array_idx; uint32_t collect_factor; }; struct thread_cache { list_node next; struct thread_allocator _allocator[17]; }; struct block_obj_allocator { IMPLEMEMT(allocator); pthread_key_t t_key; spinlock_t _free_list_mtx[17]; struct link_list *_free_list[17]; spinlock_t mtx; struct link_list *thread_caches; }; static void *free_list_get(struct free_list *f) { void *ptr = (void*)f->head; f->head = f->head->next; if(!f->head) f->tail = NULL; --f->size; return ptr; } static void free_list_put(struct free_list *f,void *ptr) { list_node *l = (list_node*)ptr; l->next = NULL; if(f->tail) { f->tail->next = l; f->tail = l; } else f->head = f->tail = l; ++f->size; } #define DEFAULT_BLOCK_SIZE 1024*1024 static struct free_list *creat_new_freelist(uint32_t size) { uint32_t init_size = DEFAULT_BLOCK_SIZE/size; struct free_list *f = (struct free_list*)calloc(1,sizeof(*f)); assert(f); f->mem = calloc(1,DEFAULT_BLOCK_SIZE); assert(f->mem); f->init_size = f->size = init_size; int32_t i = 0; for( ; i < init_size; ++i) { list_node *l = (list_node*)(((uint8_t*)f->mem)+(i*size)); free_list_put(f,l); } f->size = init_size; return f; } static struct free_list *central_get_freelist(block_obj_allocator_t central,uint16_t array_idx) { //printf("central_get_freelist\n"); struct free_list *f; spin_lock(central->_free_list_mtx[array_idx],4000); f = (struct free_list*)link_list_pop(central->_free_list[array_idx]); spin_unlock(central->_free_list_mtx[array_idx]); if(!f) { //printf("creat_new_freelist\n"); f = creat_new_freelist(1<<array_idx); } return f; } static void give_back_to_central(block_obj_allocator_t central,uint16_t array_idx,struct free_list *f) { //printf("give_back_to_central\n"); spin_lock(central->_free_list_mtx[array_idx],4000); LINK_LIST_PUSH_BACK(central->_free_list[array_idx],f); spin_unlock(central->_free_list_mtx[array_idx]); } void *thread_allocator_alloc(struct thread_allocator *a) { void *ptr; struct free_list *f; if(!a->free_size) { //thread cache不够内存了,从central获取 f = central_get_freelist(a->central_allocator,a->array_idx); assert(f); LINK_LIST_PUSH_BACK(a->_free_list,f); a->free_size += f->size; } else { f = (struct free_list*)link_list_head(a->_free_list); if(!f) { f = (struct free_list*)link_list_pop(a->_recover); LINK_LIST_PUSH_BACK(a->_free_list,f); } } ptr = free_list_get(f); assert(ptr); --a->free_size; if(!f->size) { link_list_pop(a->_free_list); link_list_push_back(a->_recover,(list_node*)f); } return ptr; } void thread_allocator_dealloc(struct thread_allocator *a,void *ptr) { struct free_list *f = (struct free_list*)link_list_head(a->_recover); if(f) { free_list_put(f,ptr); ++a->free_size; if(f->size == f->init_size) { link_list_pop(a->_recover); //printf("==init_size\n"); //一个free_list回收满了,要么放到free_list中,要么归还central if(a->free_size >= a->collect_factor) { //将f归还给central_allocator; give_back_to_central(a->central_allocator,a->array_idx,f); a->free_size -= f->size; } else link_list_push_back(a->_free_list,(list_node*)f); } } else { f = (struct free_list*)link_list_head(a->_free_list); assert(f); free_list_put(f,ptr); ++a->free_size; } } void thread_allocator_info(struct thread_allocator *a) { printf("free_size:%d\n",a->free_size); { struct free_list *f = (struct free_list*)link_list_head(a->_free_list); while(f) { printf("f size%d\n",f->size); f = (struct free_list*)((list_node*)f)->next; } } { struct free_list *f = (struct free_list*)link_list_head(a->_recover); while(f) { printf("f recover size%d\n",f->size); f = (struct free_list*)((list_node*)f)->next; } } } extern uint8_t GetK(uint32_t size); static struct thread_cache* thread_cache_create(block_obj_allocator_t ba) { struct thread_cache *tc = calloc(1,sizeof(*tc)); int32_t i = 0; for( ; i < 17; ++i) { tc->_allocator[i].central_allocator = ba; tc->_allocator[i]._free_list = LINK_LIST_CREATE(); tc->_allocator[i]._recover = LINK_LIST_CREATE(); tc->_allocator[i].array_idx = i; tc->_allocator[i].collect_factor = ((DEFAULT_BLOCK_SIZE)*2)/(1<<i); } spin_lock(ba->mtx,4000); LINK_LIST_PUSH_BACK(ba->thread_caches,tc); spin_unlock(ba->mtx); return tc; } static void release_freelist(struct link_list *flist) { list_node *l = link_list_head(flist); while(l) { struct free_list *f = (struct free_list*)l; l = l->next; free(f->mem); free(f); //printf("destroy_freelist\n"); } } static void destroy_thread_cache(struct thread_cache *tc) { int32_t i = 0; for(; i < 17; ++i) { release_freelist(tc->_allocator[i]._free_list); release_freelist(tc->_allocator[i]._recover); LINK_LIST_DESTROY(&(tc->_allocator[i]._free_list)); LINK_LIST_DESTROY(&(tc->_allocator[i]._recover)); } free(tc); } static void* thread_cache_alloc(struct thread_cache *tc,uint32_t size) { size += sizeof(int32_t); uint8_t k = GetK(size); size = 1 << k; int32_t *ptr = (int32_t*)thread_allocator_alloc(&(tc->_allocator[k])); *ptr = k; ptr++; return (void*)ptr; } static void thread_cache_dealloc(struct thread_cache *tc,void *ptr) { int32_t *_ptr = ((int32_t*)ptr)-1; uint8_t k = *_ptr; thread_allocator_dealloc(&(tc->_allocator[k]),_ptr); } static void thread_cache_info(struct thread_cache *tc,uint32_t size) { size += sizeof(int32_t); uint8_t k = GetK(size); thread_allocator_info(&(tc->_allocator[k])); } static void* block_obj_al_alloc(struct allocator *a, int32_t size) { block_obj_allocator_t ba = (block_obj_allocator_t)a; struct thread_cache *tc = (struct thread_cache*)pthread_getspecific(ba->t_key); if(!tc) { tc = thread_cache_create(ba); pthread_setspecific(ba->t_key,(void*)tc); } return thread_cache_alloc(tc,size); } static void block_obj_al_dealloc(struct allocator*a, void *ptr) { block_obj_allocator_t ba = (block_obj_allocator_t)a; struct thread_cache *tc = (struct thread_cache*)pthread_getspecific(ba->t_key); assert(tc); thread_cache_dealloc(tc,ptr); } static void destroy_block_obj_al(struct allocator **a) { block_obj_allocator_t ba = (block_obj_allocator_t)*a; //销毁所有的thread_cache { list_node *l = link_list_head(ba->thread_caches); while(l) { struct thread_cache *tc = (struct thread_cache *)l; l = l->next; destroy_thread_cache(tc); } LINK_LIST_DESTROY(&ba->thread_caches); } //销毁所有free_list { int32_t i = 0; for( ; i < 17; ++i) { release_freelist(ba->_free_list[i]); LINK_LIST_DESTROY(&ba->_free_list[i]); } } { int32_t i = 0; for( ; i < 17; ++i) { spin_destroy(&(ba->_free_list_mtx[i])); } } spin_destroy(&(ba->mtx)); pthread_key_delete(ba->t_key); free(ba); *a = NULL; } block_obj_allocator_t create_block_obj_allocator() { block_obj_allocator_t ba = (block_obj_allocator_t)calloc(1,sizeof(*ba)); ba->mtx = spin_create(); ba->thread_caches = LINK_LIST_CREATE(); int32_t i = 0; for( ; i < 17; ++i) { ba->_free_list[i] = LINK_LIST_CREATE(); ba->_free_list_mtx[i] = spin_create(); } pthread_key_create(&ba->t_key,0); ba->super_class.Alloc = block_obj_al_alloc; ba->super_class.DeAlloc = block_obj_al_dealloc; ba->super_class.Destroy = destroy_block_obj_al; return ba; } void print_info(block_obj_allocator_t ba,int size) { struct thread_cache *tc = (struct thread_cache*)pthread_getspecific(ba->t_key); thread_cache_info(tc,size); }
test.c
#include "log.h" #include <stdio.h> #include "atomic.h" #include "wpacket.h" #include "packet_allocator.h" #include <stdlib.h> #include "SysTime.h" #include <string.h> #include "block_obj_allocator.h" #include "clib/include/mem_allocator.h" #include "clib/include/fix_obj_pool.h" uint32_t GetSize_of_pow2(uint32_t size); uint8_t GetK(uint32_t size); void test1(char **tmp) { { allocator_t a = create_pool(16,65536*10,1); int j = 0; uint16_t *p; for(;j<10;++j) { uint32_t tick = GetSystemMs(); int i = 0; for( ; i < 10000000; ++i) { tmp[i] = ALLOC(a,16); } printf("fix_obj_pool finish:%d\n",GetSystemMs()-tick); i = 0; for(; i < 10000000;++i) FREE(a,tmp[i]); } DESTROY(&a); } { allocator_t a = gen_allocator_create(65536); int j = 0; uint16_t *p; for(;j<10;++j) { uint32_t tick = GetSystemMs(); int i = 0; for( ; i < 10000000; ++i) { tmp[i] = ALLOC(a,16); } printf("gen_allocator finish:%d\n",GetSystemMs()-tick); i = 0; for(; i < 10000000;++i) FREE(a,tmp[i]); } DESTROY(&a); } { allocator_t a = (allocator_t)create_block_obj_allocator(); int j = 0; for(;j<10;++j) { uint32_t tick = GetSystemMs(); int i = 0; for( ; i < 10000000; ++i) { tmp[i] = ALLOC(a,16); } printf("block_obj_allocator finish:%d\n",GetSystemMs()-tick); i = 0; for(; i < 10000000;++i) FREE(a,tmp[i]); } DESTROY(&a); } { int j = 0; for(;j<10;++j) { uint32_t tick = GetSystemMs(); int i = 0; for( ; i < 10000000; ++i) { tmp[i] = malloc(16); } printf("tcmalloc finish:%d\n",GetSystemMs()-tick); i = 0; for(; i < 10000000;++i) free(tmp[i]); } } } void test2(char **tmp) { { allocator_t a = create_pool(16,65536*10,1); int j = 0; uint16_t *p; for(;j<10;++j) { uint32_t tick = GetSystemMs(); int i = 0; for( ; i < 10000000; ++i) { tmp[i] = ALLOC(a,16); } i = 0; for(; i < 10000000;++i) FREE(a,tmp[i]); printf("fix_obj_pool finish:%d\n",GetSystemMs()-tick); } DESTROY(&a); } { allocator_t a = gen_allocator_create(65536); int j = 0; uint16_t *p; for(;j<10;++j) { uint32_t tick = GetSystemMs(); int i = 0; for( ; i < 10000000; ++i) { tmp[i] = ALLOC(a,16); } i = 0; for(; i < 10000000;++i) FREE(a,tmp[i]); printf("gen_allocator finish:%d\n",GetSystemMs()-tick); } DESTROY(&a); } { allocator_t a = (allocator_t)create_block_obj_allocator(); int j = 0; for(;j<10;++j) { uint32_t tick = GetSystemMs(); int i = 0; for( ; i < 10000000; ++i) { tmp[i] = ALLOC(a,16); } i = 0; for(; i < 10000000;++i) FREE(a,tmp[i]); printf("block_obj_allocator finish:%d\n",GetSystemMs()-tick); } DESTROY(&a); } { int j = 0; for(;j<10;++j) { uint32_t tick = GetSystemMs(); int i = 0; for( ; i < 10000000; ++i) { tmp[i] = malloc(16); } i = 0; for(; i < 10000000;++i) free(tmp[i]); printf("tcmalloc finish:%d\n",GetSystemMs()-tick); } } } void test3(char **tmp) { { allocator_t a = create_pool(16,65536*10,1); int j = 0; uint16_t *p; for(;j<10;++j) { uint32_t tick = GetSystemMs(); int i = 0; for( ; i < 10000000; ++i) { tmp[i] = ALLOC(a,16); if((i+1)%100000 == 0) { int k = (i+1)-100000; for(; k < i+1;++k) { FREE(a,tmp[k]); } } } printf("fix_obj_pool finish:%d\n",GetSystemMs()-tick); } DESTROY(&a); } { allocator_t a = gen_allocator_create(65536); int j = 0; uint16_t *p; for(;j<10;++j) { uint32_t tick = GetSystemMs(); int i = 0; for( ; i < 10000000; ++i) { tmp[i] = ALLOC(a,16); if((i+1)%100000 == 0) { int k = (i+1)-100000; for(; k < i+1;++k) { FREE(a,tmp[k]); } } } printf("gen_allocator finish:%d\n",GetSystemMs()-tick); } DESTROY(&a); } { allocator_t a = (allocator_t)create_block_obj_allocator(); int j = 0; for(;j<10;++j) { uint32_t tick = GetSystemMs(); int i = 0; for( ; i < 10000000; ++i) { tmp[i] = ALLOC(a,16); if((i+1)%100000 == 0) { int k = (i+1)-100000; for(; k < i+1;++k) FREE(a,tmp[k]); } } printf("block_obj_allocator finish:%d\n",GetSystemMs()-tick); } DESTROY(&a); } { int j = 0; for(;j<10;++j) { uint32_t tick = GetSystemMs(); int i = 0; for( ; i < 10000000; ++i) { tmp[i] = malloc(16); if((i+1)%100000 == 0) { int k = (i+1)-100000; for(; k < i+1;++k) free(tmp[k]); } } printf("tcmalloc finish:%d\n",GetSystemMs()-tick); } } } int main() { char **tmp = calloc(1,sizeof(char*)*10000000); test1(tmp); printf("test1 finish------------\n"); test2(tmp); printf("test2 finish------------\n"); test3(tmp); printf("test3 finish------------\n"); free(tmp); return 0; }
项目地址: