内存分配器对比
ptmalloc
ptmalloc是glibc默认的内存管理器
tcmalloc
tcmalloc是Google开源的一个内存管理库,作为glibc malloc的替代品。目前已经在chrome、safari等知名软件中运用
对于小内存的分配,有非常明显性能优势
也减少了多线程程序中的锁竞争情况
jemalloc
jemalloc是facebook推出的,最早的时候是freebsd的libc malloc实现。目前在firefox、facebook服务器各种组件中大量使用
多核下性能较tcmalloc更好
mimalloc
mimalloc是微软最近开源的一个malloc实现
号称比jemalloc、tcmalloc等实现大约快了10%
bmalloc
Apple在WebKit最新代码里提供了新的分配器
号称远远超过tcmalloc
hoard
一个专为多线程优化的分配器, 作者是大学教授
linux对比测试
#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
#include <time.h>
#include <sys/time.h>
#include <thread>
using namespace std;
#define MAX_COUNT 1024 * 1024
void cpu_bind(int cpu)
{
cpu_set_t mask;
CPU_ZERO(&mask);
CPU_SET(cpu, &mask);
sched_setaffinity(0, sizeof(cpu_set_t), &mask);
}
void mymalloc(int size)
{
char *p = new char[size];
delete[] p;
}
int main()
{
struct timeval tstart, tend;
int timeuse;
int step = 1;
int cpu = 1;
cpu_bind(cpu);
for(int j = 0; j < 20; j++)
{
gettimeofday(&tstart, NULL);
for(int i = 0; i < MAX_COUNT; i++)
{
mymalloc(step);
}
gettimeofday(&tend, NULL);
timeuse = 1000000 * (tend.tv_sec - tstart.tv_sec) + tend.tv_usec - tstart.tv_usec;
printf("step %d used time: %d\n", step, timeuse);
step *= 2;
}
puts("------------------------------");
return 0;
}
test: clean
g++ test.c -o test -pthread
g++ test.c -o tctest -pthread -ltcmalloc
g++ test.c -o jetest -pthread -ljemalloc
g++ test.c -o mitest -pthread -lmimalloc
g++ test.c -o htest -pthread -lhoard
# ./test && ./tctest && ./jetest && ./mitest && ./htest
step 1 used time: 16519
step 2 used time: 15843
step 4 used time: 16365
step 8 used time: 16139
step 16 used time: 17871
step 32 used time: 18170
step 64 used time: 18069
step 128 used time: 18074
step 256 used time: 18123
step 512 used time: 18398
step 1024 used time: 18446
step 2048 used time: 21232
step 4096 used time: 21028
step 8192 used time: 22479
step 16384 used time: 21496
step 32768 used time: 21562
step 65536 used time: 24523
step 131072 used time: 24428
step 262144 used time: 22674
step 524288 used time: 22735
------------------------------ tctest
step 1 used time: 16330
step 2 used time: 16588
step 4 used time: 16179
step 8 used time: 16139
step 16 used time: 16426
step 32 used time: 16501
step 64 used time: 16463
step 128 used time: 16501
step 256 used time: 16476
step 512 used time: 16449
step 1024 used time: 16429
step 2048 used time: 16377
step 4096 used time: 16363
step 8192 used time: 16382
step 16384 used time: 16359
step 32768 used time: 16380
step 65536 used time: 16438
step 131072 used time: 16476
step 262144 used time: 16701
step 524288 used time: 46855
------------------------------ jetest
step 1 used time: 16057
step 2 used time: 16130
step 4 used time: 16087
step 8 used time: 16127
step 16 used time: 16168
step 32 used time: 16219
step 64 used time: 16248
step 128 used time: 16255
step 256 used time: 16428
step 512 used time: 16693
step 1024 used time: 17374
step 2048 used time: 18538
step 4096 used time: 20768
step 8192 used time: 29029
step 16384 used time: 38466
step 32768 used time: 51371
step 65536 used time: 360735
step 131072 used time: 358943
step 262144 used time: 349103
step 524288 used time: 332748
------------------------------ mitest
step 1 used time: 12493
step 2 used time: 12447
step 4 used time: 12436
step 8 used time: 12494
step 16 used time: 12412
step 32 used time: 18487
step 64 used time: 27421
step 128 used time: 13800
step 256 used time: 14440
step 512 used time: 15716
step 1024 used time: 26533
step 2048 used time: 32315
step 4096 used time: 24270
step 8192 used time: 23697
step 16384 used time: 23433
step 32768 used time: 23683
step 65536 used time: 25473
step 131072 used time: 23841
step 262144 used time: 836834
step 524288 used time: 836637
------------------------------ htest
step 1 used time: 16652
step 2 used time: 16809
step 4 used time: 16786
step 8 used time: 16788
step 16 used time: 16976
step 32 used time: 16856
step 64 used time: 16848
step 128 used time: 16849
step 256 used time: 16909
step 512 used time: 17100
step 1024 used time: 15154
step 2048 used time: 95212
step 4096 used time: 96964
step 8192 used time: 98308
step 16384 used time: 95372
step 32768 used time: 95534
step 65536 used time: 88633
step 131072 used time: 84243
step 262144 used time: 83833
step 524288 used time: 84071
多线程对比测试
#define MAX_COUNT 1024 * 1024
void cpu_bind(int cpu)
{
cpu_set_t mask;
CPU_ZERO(&mask);
CPU_SET(cpu, &mask);
sched_setaffinity(0, sizeof(cpu_set_t), &mask);
}
void mymalloc(int size)
{
char *p = new char[size];
delete[] p;
}
void malloc_thread(void)
{
struct timeval tstart, tend;
int timeuse;
int step = 1;
int cpu = 2;
cpu_bind(cpu);
for(int j = 0; j < 20; j++)
{
gettimeofday(&tstart, NULL);
for(int i = 0; i < MAX_COUNT; i++)
{
mymalloc(step);
}
gettimeofday(&tend, NULL);
timeuse = 1000000 * (tend.tv_sec - tstart.tv_sec) + tend.tv_usec - tstart.tv_usec;
printf("thread step %d used time: %d\n", step, timeuse);
step *= 2;
}
}
int main()
{
struct timeval tstart, tend;
int timeuse;
int step = 1;
int cpu = 1;
cpu_bind(cpu);
thread t(malloc_thread);
for(int j = 0; j < 20; j++)
{
gettimeofday(&tstart, NULL);
for(int i = 0; i < MAX_COUNT; i++)
{
mymalloc(step);
}
gettimeofday(&tend, NULL);
timeuse = 1000000 * (tend.tv_sec - tstart.tv_sec) + tend.tv_usec - tstart.tv_usec;
printf("step %d used time: %d\n", step, timeuse);
step *= 2;
}
t.join();
puts("---------------------------------------");
return 0;
}
step 1 used time: 16048
thread step 1 used time: 18455
step 2 used time: 16014
step 4 used time: 16105
thread step 2 used time: 18350
step 8 used time: 16083
thread step 4 used time: 18007
step 16 used time: 18151
thread step 8 used time: 17896
step 32 used time: 19476
thread step 16 used time: 19686
step 64 used time: 19110
thread step 32 used time: 19969
step 128 used time: 18149
thread step 64 used time: 20021
step 256 used time: 18229
thread step 128 used time: 20024
step 512 used time: 18277
thread step 256 used time: 19887
step 1024 used time: 18060
thread step 512 used time: 20529
thread step 1024 used time: 19877
step 2048 used time: 41164
thread step 2048 used time: 41802
step 4096 used time: 41993
thread step 4096 used time: 41495
step 8192 used time: 41831
thread step 8192 used time: 41410
step 16384 used time: 40962
thread step 16384 used time: 42044
step 32768 used time: 42544
thread step 32768 used time: 42635
step 65536 used time: 51692
thread step 65536 used time: 43304
step 131072 used time: 52293
thread step 131072 used time: 42753
step 262144 used time: 42920
thread step 262144 used time: 42080
step 524288 used time: 42607
thread step 524288 used time: 42151
--------------------------------------- tctest_thread
step 1 used time: 21038
thread step 1 used time: 19193
thread step 2 used time: 20282
step 2 used time: 35598
thread step 4 used time: 20611
step 4 used time: 31300
thread step 8 used time: 23624
step 8 used time: 18101
thread step 16 used time: 17824
step 16 used time: 17400
thread step 32 used time: 16899
step 32 used time: 16621
thread step 64 used time: 16533
step 64 used time: 16405
thread step 128 used time: 16599
step 128 used time: 16633
thread step 256 used time: 16536
step 256 used time: 16560
thread step 512 used time: 16582
step 512 used time: 16600
thread step 1024 used time: 16605
step 1024 used time: 16623
thread step 2048 used time: 16437
step 2048 used time: 16475
thread step 4096 used time: 16493
step 4096 used time: 16472
thread step 8192 used time: 16433
step 8192 used time: 16473
thread step 16384 used time: 16735
step 16384 used time: 16469
thread step 32768 used time: 16508
step 32768 used time: 16512
thread step 65536 used time: 17245
step 65536 used time: 16515
thread step 131072 used time: 16732
step 131072 used time: 16862
thread step 262144 used time: 16673
step 262144 used time: 16943
thread step 524288 used time: 650578
step 524288 used time: 657209
--------------------------------------- jetest_thread
step 1 used time: 16043
thread step 1 used time: 16469
step 2 used time: 16128
thread step 2 used time: 16134
step 4 used time: 16098
thread step 4 used time: 16115
step 8 used time: 16090
thread step 8 used time: 16103
step 16 used time: 16196
thread step 16 used time: 16170
step 32 used time: 16187
thread step 32 used time: 16261
step 64 used time: 16270
thread step 64 used time: 16319
step 128 used time: 16534
thread step 128 used time: 16374
step 256 used time: 16479
step 512 used time: 16732
thread step 256 used time: 18556
step 1024 used time: 17293
thread step 512 used time: 17323
thread step 1024 used time: 17259
step 2048 used time: 18446
thread step 2048 used time: 18402
step 4096 used time: 20707
thread step 4096 used time: 20780
step 8192 used time: 29158
thread step 8192 used time: 31846
step 16384 used time: 38374
thread step 16384 used time: 38356
step 32768 used time: 51634
thread step 32768 used time: 51486
thread step 65536 used time: 289356
step 65536 used time: 421243
thread step 131072 used time: 364896
step 131072 used time: 376191
thread step 262144 used time: 355574
step 262144 used time: 377443
thread step 524288 used time: 355181
step 524288 used time: 377075
--------------------------------------- mitest_thread
step 1 used time: 12328
step 2 used time: 12388
thread step 1 used time: 12661
step 4 used time: 12490
thread step 2 used time: 12403
step 8 used time: 12437
thread step 4 used time: 12418
step 16 used time: 11793
thread step 8 used time: 12433
step 32 used time: 12459
thread step 16 used time: 12388
step 64 used time: 12554
thread step 32 used time: 12439
step 128 used time: 13978
thread step 64 used time: 12677
step 256 used time: 14623
thread step 128 used time: 14015
step 512 used time: 15843
thread step 256 used time: 14813
step 1024 used time: 17588
thread step 512 used time: 16042
thread step 1024 used time: 16224
step 2048 used time: 24292
thread step 2048 used time: 23822
step 4096 used time: 23860
thread step 4096 used time: 24775
step 8192 used time: 25971
thread step 8192 used time: 25829
step 16384 used time: 24181
thread step 16384 used time: 24485
step 32768 used time: 24440
thread step 32768 used time: 23818
step 65536 used time: 24092
thread step 65536 used time: 23794
step 131072 used time: 24547
thread step 131072 used time: 23961
thread step 262144 used time: 706187
step 262144 used time: 865379
thread step 524288 used time: 731916
step 524288 used time: 890115
--------------------------------------- htest_thread
step 1 used time: 14592
step 2 used time: 22448
thread step 1 used time: 26341
step 4 used time: 22551
thread step 2 used time: 27129
step 8 used time: 21595
thread step 4 used time: 26202
step 16 used time: 26728
thread step 8 used time: 18380
step 32 used time: 14793
thread step 16 used time: 17274
step 64 used time: 18173
thread step 32 used time: 18245
step 128 used time: 17473
thread step 64 used time: 14741
step 256 used time: 14723
thread step 128 used time: 14762
step 512 used time: 14734
thread step 256 used time: 14774
step 1024 used time: 14714
thread step 512 used time: 15092
thread step 1024 used time: 14754
step 2048 used time: 415679
thread step 2048 used time: 458307
step 4096 used time: 624716
thread step 4096 used time: 608261
step 8192 used time: 447928
thread step 8192 used time: 488360
step 16384 used time: 512709
thread step 16384 used time: 510815
step 32768 used time: 656702
thread step 32768 used time: 591620
step 65536 used time: 98693
thread step 65536 used time: 93030
step 131072 used time: 84362
thread step 131072 used time: 83680
step 262144 used time: 88070
thread step 262144 used time: 89721
step 524288 used time: 83627
thread step 524288 used time: 84722
windows对比测试
#include <iostream>
#include <windows.h>
#include <mimalloc.h>
#define MAX_COUNT 1024 * 1024
void mymalloc(int size)
{
void* p = mi_malloc(size);
mi_free(p);
}
int main()
{
LARGE_INTEGER nFreq, tstart, tend;
double timeuse;
int step = 1;
QueryPerformanceFrequency(&nFreq);
for (int j = 0; j < 20; j++)
{
QueryPerformanceCounter(&tstart);
for (int i = 0; i < MAX_COUNT; i++)
{
mymalloc(step);
}
QueryPerformanceCounter(&tend);
timeuse = 1000000 * (tend.QuadPart - tstart.QuadPart) / (double)nFreq.QuadPart;
printf("step %d used time: %f\n", step, timeuse);
step *= 2;
}
return 0;
}
step 1 used time: 43849.800000
step 2 used time: 43590.900000
step 4 used time: 44083.800000
step 8 used time: 45312.700000
step 16 used time: 43359.300000
step 32 used time: 44040.300000
step 64 used time: 43809.100000
step 128 used time: 44005.400000
step 256 used time: 46109.200000
step 512 used time: 49124.800000
step 1024 used time: 44350.900000
step 2048 used time: 53124.800000
step 4096 used time: 54409.600000
step 8192 used time: 44956.300000
step 16384 used time: 43690.100000
step 32768 used time: 138411.200000
step 65536 used time: 133109.500000
step 131072 used time: 126346.800000
step 262144 used time: 132872.600000
step 524288 used time: 150357.500000
// mimalloc
step 1 used time: 4595.600000
step 2 used time: 4537.400000
step 4 used time: 4697.500000
step 8 used time: 5734.200000
step 16 used time: 4646.500000
step 32 used time: 4700.500000
step 64 used time: 4858.400000
step 128 used time: 11683.200000
step 256 used time: 5911.000000
step 512 used time: 6510.400000
step 1024 used time: 13842.000000
step 2048 used time: 13953.700000
step 4096 used time: 14006.400000
step 8192 used time: 23049.000000
step 16384 used time: 19441.600000
step 32768 used time: 18976.800000
step 65536 used time: 18985.900000
step 131072 used time: 22530.500000
step 262144 used time: 302135.700000
step 524288 used time: 300475.000000