malloc 13us耗时分析定位
【问题描述】
10个生产者malloc(50KB),1个消费者(模拟业务,1ms后free)
调用malloc接口大概13us,分析耗时
【调用栈】
【编译调试】
root@jch:~/glibc_test/glibc-2.31/build# make install -sj;gcc -g -L /root/glibc-2.31_install/64/lib -Wl,--rpath=/root/glibc-2.31_install/64/libs -Wl,-I /root/glibc-2.31_install/64/lib/ld-2.31.so ../hello.c -o hello -lpthread -ljemalloc -mcmodel=medium;./hello
【内部耗时】
1) 打点结果
[TIME=1685875827][140712515462912]user_call_malloc ,count: 18000,avg: 13(us), min:0,max:9155, total: 242970(us)
[TIME1=1685875828][140712515462912]grow_heap ,count: 17961,avg: 7(us), min:0,max:9148, total: 137963(us)
[TIME1=1685875828][140712515462912]sysmalloc_2414_2775_sethead2 ,count: 17974,avg: 4(us), min:0,max:6546, total: 85628(us)
2)grow_heap里的__m_protect耗时7us
3)sysmalloc里的set_head接口
【疑问】
第一个为什么这么耗时
第二个改内存值可能触发系统调用
【测试代码】
#include<stdio.h> #include<unistd.h> #include<stdlib.h> #include<string.h> #include "sys/time.h" #include <pthread.h> #define TOTAL_COUNT (1000 * 1000 * 10 ) #define NUM_Threads 10 #define COUNT (TOTAL_COUNT/NUM_Threads) #define PRODUCE_COUNT 18000 #define AVERAGE_PRODUCE_COUNT (PRODUCE_COUNT/NUM_Threads) #define je_malloc malloc #define je_free free //#include <jemalloc/jemalloc.h> static int producer_index=0; static int consumer_index=0; typedef unsigned long long uint64; #define MIN(A, B) ((B) < (A) ? (B) : (A)) #define MAX(A, B) ((B) > (A) ? (B) : (A)) uint64 us_clock_now(){ struct timeval st; gettimeofday(&st, NULL); return st.tv_sec * 1000 * 1000 + st.tv_usec; } #define TIME_BEGIN(name) uint64 __##name##begin = us_clock_now(); #define TIME_END(name, print_interval_us) \ do { \ static __thread uint64 __##name##total = 0; \ static __thread uint64 __##name##count = 0; \ static __thread uint64 __##name##min = 1000000000000; \ static __thread uint64 __##name##max = 0; \ static __thread uint64 __##name##pre_total = 0; \ static __thread uint64 __##name##pre_count = 0; \ static __thread uint64 __##name##log_time = 0; \ uint64 current = (us_clock_now() - __##name##begin);\ __##name##min = MIN(current, __##name##min); \ __##name##max = MAX(current, __##name##max); \ __##name##total += current; \ __##name##count++; \ if (us_clock_now() - __##name##log_time > print_interval_us) \ { \ __##name##log_time = us_clock_now(); \ uint64 __##name##this_total = __##name##total - __##name##pre_total; \ uint64 __##name##this_count = __##name##count - __##name##pre_count; \ __##name##pre_total = __##name##total; \ __##name##pre_count = __##name##count; \ if (__##name##this_count > 1) { \ printf("[TIME=%llu][%ld]%-30s,count:%10llu,avg:%10llu(us), min:%llu,max:%llu, total:%10llu(us)\n", \ us_clock_now()/1000000,pthread_self(), \ #name, __##name##this_count, \ __##name##this_count == 0 ? 0 :__##name##this_total / __##name##this_count, __##name##min, __##name##max, __##name##this_total); \ } \ __##name##min=1000000000000; \ __##name##max = 0;\ } \ } while (0); static char* threads[NUM_Threads][COUNT]; static uint64 malloc_time[NUM_Threads][COUNT]; static uint64 global_malloc_time[NUM_Threads * COUNT]; static uint64 global_producer_index = 0; static uint64 global_consumer_index = 0; pthread_mutex_t gMutex = PTHREAD_MUTEX_INITIALIZER; void *PrintHello(void *arg) { int id = (int) arg; static struct timeval st; static struct timeval ed; static long time_total=0; long time_current; uint64 last_produce_time = us_clock_now(); for(int i=0;i<COUNT;i++){ producer_index=i; // gettimeofday(&st, NULL); TIME_BEGIN(user_call_malloc); char * s1=(char *)je_malloc(50 * 1000); *s1 = 'a'; TIME_END(user_call_malloc, 10 * 1000 *1000); // gettimeofday(&ed, NULL); threads[id][i] = s1; pthread_mutex_lock(&gMutex); global_malloc_time[global_producer_index]= us_clock_now(); global_producer_index++; // malloc_time[id][i]=us_clock_now(); pthread_mutex_unlock(&gMutex); // time_current = (ed.tv_sec - st.tv_sec) * 1000000 + (ed.tv_usec - st.tv_usec) ; // time_total += time_current; if(i%AVERAGE_PRODUCE_COUNT==0){ while(us_clock_now() - last_produce_time < 1000*1000){ usleep(1); } last_produce_time = us_clock_now(); } } printf("[time_total]:%ld, threadId=%d\n", time_total,id); return 0; } void *Consumer(void *arg) { int id = (int) arg; static struct timeval st; static struct timeval ed; static long time_total=0; long time_current; for(id=0;id<NUM_Threads;id++){ for (int i=0;i<COUNT;i++){ while(global_malloc_time[global_consumer_index]==0 || us_clock_now() - global_malloc_time[global_consumer_index] <= 1000) { usleep(1); } global_consumer_index++; TIME_BEGIN(free); je_free(threads[id][i]); consumer_index=i; if(global_consumer_index % 100000==0){ printf("gap=%llu,consumer_index=%llu, producer_index=%llu\n",global_producer_index-global_consumer_index, global_consumer_index,global_producer_index); } TIME_END(free, 10000 * 1000); } }; return 0; } int main(){ printf("AVERAGE_PRODUCE_COUNT=%d\n", AVERAGE_PRODUCE_COUNT); memset(threads, 0, sizeof(threads)); memset(global_malloc_time, 0, sizeof(global_malloc_time)); pthread_t tids[NUM_Threads]; int i; int ret; for (i=0; i<NUM_Threads; i++) { ret = pthread_create(&tids[i], NULL, PrintHello, (void *)i); if (ret != 0) { printf("pthread_create error: error_code = \n"); } } usleep(10 * 1000); // sleep(2); pthread_create(&tids[i], NULL, Consumer, NULL); void *a1; for (i=0; i<NUM_Threads; i++){ pthread_join(tids[i],&a1); } return 0; } `