源码来源:Ubuntu18.04执行sudo apt source glibc
struct malloc_chunk {
INTERNAL_SIZE_T mchunk_prev_size; /* Size of previous chunk (if free). */
INTERNAL_SIZE_T mchunk_size; /* Size in bytes, including overhead. */
struct malloc_chunk* fd; /* double links -- used only if free. */
struct malloc_chunk* bk;
/* Only used for large blocks: pointer to next larger size. */
struct malloc_chunk* fd_nextsize; /* double links -- used only if free. */
struct malloc_chunk* bk_nextsize;
- 被free的chunk的大小分别被存储在chunk头和尾,这使得碎片化的chunk合并为大的chunk非常快,size区域还包括一个比特用来表示上一个堆块是否被使用,并且malloc返回的指针是指向堆块内容的,而fd和bk指针则是指向chunk头的
- free的chunk被存储在循环双向链表中,fd指向下一个chunk,bk指向前一个chunk
- 三个例外:
- top chunk不包含trailing size也就是下一个chunk的prev size,因为其被分配在堆区域最高的地址,堆是由低到高增长的,因此,topchunk不含有nextchunk
- M位被用来标识是否使用的是mmap进行分配
- fastbin的P位始终为1,防止合并,fastbin只有在 bulk 或者 malloc_consolidate函数时才会发生堆合并
[!NOTE] 注:
struct malloc_state
/* Serialize access. */
__libc_lock_define (, mutex);
/* Flags (formerly in max_fast). */
int flags;
/* Set if the fastbin chunks contain recently inserted free blocks. */
/* Note this is a bool but not all targets support atomics on booleans. */
int have_fastchunks;
/* Fastbins */
mfastbinptr fastbinsY[NFASTBINS];
/* Base of the topmost chunk -- not otherwise kept in a bin */
mchunkptr top;
/* The remainder from the most recent split of a small request */
mchunkptr last_remainder;
/* Normal bins packed as described above */
mchunkptr bins[NBINS * 2 - 2];
/* Bitmap of bins */
unsigned int binmap[BINMAPSIZE];
/* Linked list */
struct malloc_state *next;
/* Linked list for free arenas. Access to this field is serialized
by free_list_lock in arena.c. */
struct malloc_state *next_free;
/* Number of threads attached to this arena. 0 if the arena is on
the free list. Access to this field is serialized by
free_list_lock in arena.c. */
INTERNAL_SIZE_T attached_threads;
/* Memory allocated from the system in this arena. */
INTERNAL_SIZE_T system_mem;
INTERNAL_SIZE_T max_system_mem;
glibc malloc底层通过brk和mmap系统调用来实现内存分配
对于main_arena其由sbrk函数创建,并且其malloc_state结构体存储在进程链接的libc.so的数据段。大小可扩展,存储在进程的heap区域 -
- fastbinsY[NFASTBINS],用来存储fastbin单链表的头指针的数组
- bins[NBINS * 2 - 2],用来存储unsortedbin,largebin,smallbin的链表表头的数组
- top,指向topchunk的指针
- last_remainder,指向一个chunk被切割后剩余的部分的指针
typedef struct _heap_info
mstate ar_ptr; /* Arena for this heap. */
struct _heap_info *prev; /* Previous heap. */
size_t size; /* Current size in bytes. */
size_t mprotect_size; /* Size in bytes that has been mprotected
/* Make sure the following data is properly aligned, particularly
that sizeof (heap_info) + 2 * SIZE_SZ is a multiple of
char pad[-6 * SIZE_SZ & MALLOC_ALIGN_MASK];
} heap_info;
[!NOTE] Tips:
![[未命名绘图.drawio (2).png]]
#define NFASTBINS (fastbin_index (request2size (MAX_FAST_SIZE)) + 1)
/* offset 2 to use otherwise unindexable first 2 bins */
#define fastbin_index(sz) \
((((unsigned int) (sz)) >> (SIZE_SZ == 8 ? 4 : 3)) - 2)
#define request2size(req) \
/* The maximum fastbin request size we support */
#define MAX_FAST_SIZE (80 * SIZE_SZ / 4)
[!NOTE] 注:
#define DEFAULT_MXFAST (64 * SIZE_SZ / 4)
if (av == &main_arena)
set_max_fast (DEFAULT_MXFAST);
[!NOTE] 注:
# define TCACHE_MAX_BINS 64
typedef struct tcache_entry
struct tcache_entry *next;
/* This field exists to detect double frees. */
struct tcache_perthread_struct *key;
} tcache_entry;
typedef struct tcache_perthread_struct
char counts[TCACHE_MAX_BINS];
tcache_entry *entries[TCACHE_MAX_BINS];
} tcache_perthread_struct;
由两个成员变量组成- next指针,用来存放指向tcachebin中下一个chunk的地址
- key,用来标记chunk已经被free,避免double free发生
结构体用来管理tcache bins,每个线程都有一个。它由两个成员变量组成counts[TCACHE_MAX_BINS]
[!NOTE] Compare tcache to fastbin
![[b.xm.drawio (1).png]]
可以看到,一个tcache bin chunk至少0x20字节
bins[NBINS * 2 - 2]
首先,顾名思义bins[NBINS * 2 - 2]是一个存储着所有unsortedbin、largebin、以及smallbin链表表头(头结点)的数组。其中每个线程有1个unsortedbin,62个smallbin,63个largebin。
Bin 1 – Unsorted bin
Bin 2 to Bin 63 – Small bin
Bin 64 to Bin 126 – Large bin
![[Pasted image 20240221182218.png]]
#define NBINS 128
为了解释为啥数组参数是NBINS * 2 - 2,即254。我们先来思考一下,我们该怎么去存储这126个malloc_chunk结构体,首先在64位下,malloc_chunk结构体需要6*8字节的大小,也就是6个mchunkptr大小的。所以我们一共有126个chunk(126个链表头结点)需要存储,那我们需要多少个mchunkptr呢,答案是126*6 = 756个。但实际上我们真的需要这么多的空间来存储头结点吗,显然不是,我们知道头结点它并不存储真正的数据,它只有fd和bk指针是有效的,那我们是不是就可以将除了fd和bk的其他字段重复利用,实现"空间复用"的效果。如图,实现了复用之后,我们就可以只用126*2 =252个mchunkptr来存储头结点。这里注意我们只是进行了空间复用,并不能将前面bin[0]和bin[1]中浪费的两个字段空间给删除,因此,最终我们所需要的bins数组大小为128*2-2 。
#define NSMALLBINS 64
#define in_smallbin_range(sz) \
((unsigned long) (sz) < (unsigned long) MIN_LARGE_SIZE)
#define smallbin_index(sz) \
((SMALLBIN_WIDTH == 16 ? (((unsigned) (sz)) >> 4) : (((unsigned) (sz)) >> 3))\
= 16,SIZE_SZ
= 4时用来修正下标的。但我MALLOC_ALIGNMENT
[!NOTE] 注:
Bins for sizes < 512 bytes contain chunks of all the same size, spaced
8 bytes apart. Larger bins are approximately logarithmically spaced:
64 bins of size 8
32 bins of size 64
16 bins of size 512
8 bins of size 4096
4 bins of size 32768
2 bins of size 262144
1 bin of size what's left
There is actually a little bit of slop in the numbers in bin_index
for the sake of speed. This makes no difference elsewhere.
The bins top out around 1MB because we expect to service large
requests via mmap.
Bin 0 does not exist. Bin 1 is the unordered list; if that would be
a valid chunk size the small bins are bumped up one.
#define largebin_index_32(sz) \
(((((unsigned long) (sz)) >> 6) <= 38) ? 56 + (((unsigned long) (sz)) >> 6) :\
((((unsigned long) (sz)) >> 9) <= 20) ? 91 + (((unsigned long) (sz)) >> 9) :\
((((unsigned long) (sz)) >> 12) <= 10) ? 110 + (((unsigned long) (sz)) >> 12) :\
((((unsigned long) (sz)) >> 15) <= 4) ? 119 + (((unsigned long) (sz)) >> 15) :\
((((unsigned long) (sz)) >> 18) <= 2) ? 124 + (((unsigned long) (sz)) >> 18) :\
#define largebin_index_32_big(sz) \
(((((unsigned long) (sz)) >> 6) <= 45) ? 49 + (((unsigned long) (sz)) >> 6) :\
((((unsigned long) (sz)) >> 9) <= 20) ? 91 + (((unsigned long) (sz)) >> 9) :\
((((unsigned long) (sz)) >> 12) <= 10) ? 110 + (((unsigned long) (sz)) >> 12) :\
((((unsigned long) (sz)) >> 15) <= 4) ? 119 + (((unsigned long) (sz)) >> 15) :\
((((unsigned long) (sz)) >> 18) <= 2) ? 124 + (((unsigned long) (sz)) >> 18) :\
// XXX It remains to be seen whether it is good to keep the widths of
// XXX the buckets the same or whether it should be scaled by a factor
// XXX of two as well.
#define largebin_index_64(sz) \
(((((unsigned long) (sz)) >> 6) <= 48) ? 48 + (((unsigned long) (sz)) >> 6) :\
((((unsigned long) (sz)) >> 9) <= 20) ? 91 + (((unsigned long) (sz)) >> 9) :\
((((unsigned long) (sz)) >> 12) <= 10) ? 110 + (((unsigned long) (sz)) >> 12) :\
((((unsigned long) (sz)) >> 15) <= 4) ? 119 + (((unsigned long) (sz)) >> 15) :\
((((unsigned long) (sz)) >> 18) <= 2) ? 124 + (((unsigned long) (sz)) >> 18) :\
#define largebin_index(sz) \
(SIZE_SZ == 8 ? largebin_index_64 (sz) \
: MALLOC_ALIGNMENT == 16 ? largebin_index_32_big (sz) \
: largebin_index_32 (sz))
#define bin_index(sz) \
((in_smallbin_range (sz)) ? smallbin_index (sz) : largebin_index (sz))
组 | 数量 | 步长 |
0 | 64 | 8 |
1 | 32 | 64 |
2 | 16 | 512 |
3 | 8 | 4096 |
4 | 4 | 32768 |
5 | 2 | 262144 |
6 | 1 | 不限制 |
(其中第0组属于smallbin,smallbin实际有62个) |
unsigned long largebin_index_32(SIZE_T sz) {
if(sz <= 2432) {
return 56 + sz >> 6;
}else if(sz <= 10240) {
return 91 + sz >> 9;
}else if(sz <= 40960) {
return 110 + sz >> 12;
}else if(sz <= 131072) {
return 110 + sz >> 15;
}else if(sz <= 524288) {
return 124 + sz >> 18;
}else {
return 126;
unsigned long largebin_index_32_big(SIZE_T sz) {
if(sz <= 2880) {
return 49 + sz >> 6;
}else if(sz <= 10240) {
return 91 + sz >> 9;
}else if(sz <= 40960) {
return 110 + sz >> 12;
}else if(sz <= 131072) {
return 110 + sz >> 15;
}else if(sz <= 524288) {
return 124 + sz >> 18;
}else {
return 126;
unsigned long largebin_index_64(SIZE_T sz) {
if(sz >> 6 <= 48) {
return 48 + sz >> 6;
}else if(sz >> 9 <= 20) {
return 91 + sz >> 9;
}else if(sz >> 12 <= 10) {
return 110 + sz >> 12;
}else if(sz >> 15 <= 4) {
return 119 + sz >> 15;
}else if(sz >> 18 <= 2) {
return 124 + sz >> 18;
}else {
return 126;
Start | End | index |
0 | 7 | 不存在 |
8 | 15 | 不存在 |
16 | 23 | 2 |
24 | 31 | 3 |
32 | 39 | 4 |
40 | 47 | 5 |
48 | 55 | 6 |
56 | 63 | 7 |
64 | 71 | 8 |
72 | 79 | 9 |
80 | 87 | 10 |
88 | 95 | 11 |
96 | 103 | 12 |
104 | 111 | 13 |
112 | 119 | 14 |
120 | 127 | 15 |
128 | 135 | 16 |
136 | 143 | 17 |
144 | 151 | 18 |
152 | 159 | 19 |
160 | 167 | 20 |
168 | 175 | 21 |
176 | 183 | 22 |
184 | 191 | 23 |
192 | 199 | 24 |
200 | 207 | 25 |
208 | 215 | 26 |
216 | 223 | 27 |
224 | 231 | 28 |
232 | 239 | 29 |
240 | 247 | 30 |
248 | 255 | 31 |
256 | 263 | 32 |
264 | 271 | 33 |
272 | 279 | 34 |
280 | 287 | 35 |
288 | 295 | 36 |
296 | 303 | 37 |
304 | 311 | 38 |
312 | 319 | 39 |
320 | 327 | 40 |
328 | 335 | 41 |
336 | 343 | 42 |
344 | 351 | 43 |
352 | 359 | 44 |
360 | 367 | 45 |
368 | 375 | 46 |
376 | 383 | 47 |
384 | 391 | 48 |
392 | 399 | 49 |
400 | 407 | 50 |
408 | 415 | 51 |
416 | 423 | 52 |
424 | 431 | 53 |
432 | 439 | 54 |
440 | 447 | 55 |
448 | 455 | 56 |
456 | 463 | 57 |
464 | 471 | 58 |
472 | 479 | 59 |
480 | 487 | 60 |
488 | 495 | 61 |
496 | 503 | 62 |
504 | 511 | 63 |
512 | 575 | 64 |
576 | 639 | 65 |
640 | 703 | 66 |
704 | 767 | 67 |
768 | 831 | 68 |
832 | 895 | 69 |
896 | 959 | 70 |
960 | 1023 | 71 |
1024 | 1087 | 72 |
1088 | 1151 | 73 |
1152 | 1215 | 74 |
1216 | 1279 | 75 |
1280 | 1343 | 76 |
1344 | 1407 | 77 |
1408 | 1471 | 78 |
1472 | 1535 | 79 |
1536 | 1599 | 80 |
1600 | 1663 | 81 |
1664 | 1727 | 82 |
1728 | 1791 | 83 |
1792 | 1855 | 84 |
1856 | 1919 | 85 |
1920 | 1983 | 86 |
1984 | 2047 | 87 |
2048 | 2111 | 88 |
2112 | 2175 | 89 |
2176 | 2239 | 90 |
2240 | 2303 | 91 |
2304 | 2367 | 92 |
2368 | 2431 | 93 |
2432 | 2495 | 94 |
2496 | 2559 | 95 |
2560 | 3071 | 96 |
3072 | 3583 | 97 |
3584 | 4095 | 98 |
4096 | 4607 | 99 |
4608 | 5119 | 100 |
5120 | 5631 | 101 |
5632 | 6143 | 102 |
6144 | 6655 | 103 |
6656 | 7167 | 104 |
7168 | 7679 | 105 |
7680 | 8191 | 106 |
8192 | 8703 | 107 |
8704 | 9215 | 108 |
9216 | 9727 | 109 |
9728 | 10239 | 110 |
10240 | 10751 | 111 |
10752 | 14847 | 112 |
14848 | 18943 | 113 |
18944 | 23039 | 114 |
23040 | 27135 | 115 |
27136 | 31231 | 116 |
31232 | 35327 | 117 |
35328 | 39423 | 118 |
39424 | 43519 | 119 |
43520 | 76287 | 120 |
76288 | 109055 | 121 |
109056 | 141823 | 122 |
141824 | 174591 | 123 |
174592 | 436735 | 124 |
436736 | 698879 | 125 |
698880 | 2^32或 2^64 | 126 |
Unsorted chunks
All remainders from chunk splits, as well as all returned chunks,
are first placed in the "unsorted" bin. They are then placed
in regular bins after malloc gives them ONE chance to be used before
binning. So, basically, the unsorted_chunks list acts as a queue,
with chunks being placed on it in free (and malloc_consolidate),
and taken off (to be either used or placed in bins) in malloc.
The NON_MAIN_ARENA flag is never set for unsorted chunks, so it
does not have to be taken into account in size comparisons.
/* The otherwise unindexable 1-bin is used to hold unsorted chunks. */
#define unsorted_chunks(M) (bin_at (M, 1))
top chunk
The top-most available chunk (i.e., the one bordering the end of
available memory) is treated specially. It is never included in
any bin, is used only if no other chunk is available, and is
released back to the system if it is very large (see
M_TRIM_THRESHOLD). Because top initially
points to its own bin with initial zero size, thus forcing
extension on the first malloc request, we avoid having any special
code in malloc to check whether it even exists yet. But we still
need to do so when getting memory from system, so we make
initial_top treat the bin as a legal but unusable chunk during the
interval between initialization and the first call to
sysmalloc. (This is somewhat delicate, since it relies on
the 2 preceding words to be zero during this interval as well.)
/* Conveniently, the unsorted bin can be used as dummy top on first call */
#define initial_top(M) (unsorted_chunks (M))
注释:topchunk是一个特殊的chunk块,它不属于任何bin,只有在没有其他chunk可以使用的时候使用。并且如果非常大(即=-1)时,则会被释放返回系统。应为top chunk初始时大小为0,我们在malloc扩展时,为了避免检查其是否存在,因此我们在初始化和第一次调用sysmallloc之间的时间间隔内使用initial_top将其视为合法但不可用的块。
void *
__libc_malloc (size_t bytes)
mstate ar_ptr; //定义了一个malloc_state结构体指针变量
void *victim;
void *(*hook) (size_t, const void *)
= atomic_forced_read (__malloc_hook);
if (__builtin_expect (hook != NULL, 0))
return (*hook)(bytes, RETURN_ADDRESS (0));
/* int_free also calls request2size, be careful to not pad twice. */
size_t tbytes;//tbytes用来存储修改后的字节大小
checked_request2size (bytes, tbytes);
size_t tc_idx = csize2tidx (tbytes);//使用修改后的字节大小返回对应的下标
MAYBE_INIT_TCACHE ();//如果tcache不存在则使用tcache_init()函数初始化tcache,关于tcache_init()函数如何初始化的,详见后文
# define MAYBE_INIT_TCACHE() \
if (__glibc_unlikely (tcache == NULL)) \
static struct malloc_par mp_ =
.top_pad = DEFAULT_TOP_PAD,
.n_mmaps_max = DEFAULT_MMAP_MAX,
.mmap_threshold = DEFAULT_MMAP_THRESHOLD,
.trim_threshold = DEFAULT_TRIM_THRESHOLD,
#define NARENAS_FROM_NCORES(n) ((n) * (sizeof (long) == 4 ? 2 : 8))
.arena_test = NARENAS_FROM_NCORES (1)
.tcache_count = TCACHE_FILL_COUNT,
.tcache_bins = TCACHE_MAX_BINS,
.tcache_max_bytes = tidx2usize (TCACHE_MAX_BINS-1),
.tcache_unsorted_limit = 0 // No limit.
if (tc_idx < mp_.tcache_bins
/*&& tc_idx < TCACHE_MAX_BINS*/ /* to appease gcc */
&& tcache
&& tcache->entries[tc_idx] != NULL)
return tcache_get (tc_idx);
victim = _int_malloc (&main_arena, bytes);
assert (!victim || chunk_is_mmapped (mem2chunk (victim)) ||
&main_arena == arena_for_chunk (mem2chunk (victim)));
return victim;
arena_get (ar_ptr, bytes);
victim = _int_malloc (ar_ptr, bytes);
/* Retry with another arena only if we were able to find a usable arena
before. */
if (!victim && ar_ptr != NULL)
LIBC_PROBE (memory_malloc_retry, 1, bytes);
ar_ptr = arena_get_retry (ar_ptr, bytes);
victim = _int_malloc (ar_ptr, bytes);
if (ar_ptr != NULL)
__libc_lock_unlock (ar_ptr->mutex);
assert (!victim || chunk_is_mmapped (mem2chunk (victim)) ||
ar_ptr == arena_for_chunk (mem2chunk (victim)));
return victim;
static void
mstate ar_ptr;//同样定义一个malloc_state类型的指针
void *victim = 0;
typedef struct tcache_perthread_struct
char counts[TCACHE_MAX_BINS];
tcache_entry *entries[TCACHE_MAX_BINS];
} tcache_perthread_struct;
const size_t bytes = sizeof (tcache_perthread_struct);
if (tcache_shutting_down)
arena_get (ar_ptr, bytes);
victim = _int_malloc (ar_ptr, bytes);
if (!victim && ar_ptr != NULL)
ar_ptr = arena_get_retry (ar_ptr, bytes);
victim = _int_malloc (ar_ptr, bytes);
if (ar_ptr != NULL)
__libc_lock_unlock (ar_ptr->mutex);
/* In a low memory situation, we may not be able to allocate memory
- in which case, we just keep trying later. However, we
typically do this very early, so either there is sufficient
memory, or there isn't enough memory to do non-trivial
allocations anyway. */
if (victim)
tcache = (tcache_perthread_struct *) victim;
memset (tcache, 0, sizeof (tcache_perthread_struct));
/* Caller must ensure that we know tc_idx is valid and there's
available chunks to remove. */
static __always_inline void *
tcache_get (size_t tc_idx)
tcache_entry *e = tcache->entries[tc_idx];
assert (tc_idx < TCACHE_MAX_BINS);
assert (tcache->entries[tc_idx] > 0);
tcache->entries[tc_idx] = e->next;
e->key = NULL;
return (void *) e;
/* Caller must ensure that we know tc_idx is valid and there's room
for more chunks. */
static __always_inline void
tcache_put (mchunkptr chunk, size_t tc_idx)
tcache_entry *e = (tcache_entry *) chunk2mem (chunk);
assert (tc_idx < TCACHE_MAX_BINS);
/* Mark this chunk as "in the tcache" so the test in _int_free will
detect a double free. */
e->key = tcache;
e->next = tcache->entries[tc_idx];
tcache->entries[tc_idx] = e;
static void *
_int_malloc (mstate av, size_t bytes)
INTERNAL_SIZE_T nb; /* normalized request size */
unsigned int idx; /* associated bin index */
mbinptr bin; /* associated bin */
mchunkptr victim; /* inspected/selected chunk */
INTERNAL_SIZE_T size; /* its size */
int victim_index; /* its bin index */
mchunkptr remainder; /* remainder from a split */
unsigned long remainder_size; /* its size */
unsigned int block; /* bit map traverser */
unsigned int bit; /* bit map traverser */
unsigned int map; /* current word of binmap */
mchunkptr fwd; /* misc temp for linking */
mchunkptr bck; /* misc temp for linking */
size_t tcache_unsorted_count; /* count of unsorted chunks processed */
Convert request size to internal form by adding SIZE_SZ bytes
overhead plus possibly more to obtain necessary alignment and/or
to obtain a size of at least MINSIZE, the smallest allocatable
size. Also, checked_request2size traps (returning 0) request sizes
that are so large that they wrap around zero when padded and
checked_request2size (bytes, nb);
/* There are no usable arenas. Fall back to sysmalloc to get a chunk from
mmap. */
if (__glibc_unlikely (av == NULL))
void *p = sysmalloc (nb, av);
if (p != NULL)
alloc_perturb (p, bytes);
return p;
static void *
_int_malloc (mstate av, size_t bytes)
If the size qualifies as a fastbin, first check corresponding bin.
This code is safe to execute even if av is not yet initialized, so we
can try it without checking, which saves some time on this fast path.
#define REMOVE_FB(fb, victim, pp) \
do \
{ \
victim = pp; \
if (victim == NULL) \
break; \
} \
while ((pp = catomic_compare_and_exchange_val_acq (fb, victim->fd, victim)) \
!= victim);
if ((unsigned long) (nb) <= (unsigned long) (get_max_fast ()))
idx = fastbin_index (nb);
mfastbinptr *fb = &fastbin (av, idx);//获取对应大小的链表头指针
mchunkptr pp;
victim = *fb;//将第一个chunk从链表头中取出,这也对应着fastbin后入先出的存储顺序
if (victim != NULL)
*fb = victim->fd;//将victim从链表中取出
REMOVE_FB (fb, pp, victim);
if (__glibc_likely (victim != NULL))
size_t victim_idx = fastbin_index (chunksize (victim));
if (__builtin_expect (victim_idx != idx, 0))
malloc_printerr ("malloc(): memory corruption (fast)");
check_remalloced_chunk (av, victim, nb);
/* While we're here, if we see other chunks of the same size,
stash them in the tcache. */
size_t tc_idx = csize2tidx (nb);
if (tcache && tc_idx < mp_.tcache_bins)
mchunkptr tc_victim;
/* While bin not empty and tcache not full, copy chunks. */
while (tcache->counts[tc_idx] < mp_.tcache_count
&& (tc_victim = *fb) != NULL)
*fb = tc_victim->fd;
REMOVE_FB (fb, pp, tc_victim);
if (__glibc_unlikely (tc_victim == NULL))
tcache_put (tc_victim, tc_idx);
void *p = chunk2mem (victim);
alloc_perturb (p, bytes);
return p;
static void *
_int_malloc (mstate av, size_t bytes)
If a small request, check regular bin. Since these "smallbins"
hold one size each, no searching within bins is necessary.
(For a large request, we need to wait until unsorted chunks are
processed to find best fit. But for small ones, fits are exact
anyway, so we can check now, which is faster.)
if (in_smallbin_range (nb))
idx = smallbin_index (nb);
bin = bin_at (av, idx);
if ((victim = last (bin)) != bin)
bck = victim->bk;
if (__glibc_unlikely (bck->fd != victim))
malloc_printerr ("malloc(): smallbin double linked list corrupted");
set_inuse_bit_at_offset (victim, nb);
bin->bk = bck;
bck->fd = bin;
if (av != &main_arena)
set_non_main_arena (victim);
check_malloced_chunk (av, victim, nb);
/* While we're here, if we see other chunks of the same size,
stash them in the tcache. */
size_t tc_idx = csize2tidx (nb);
if (tcache && tc_idx < mp_.tcache_bins)
mchunkptr tc_victim;
/* While bin not empty and tcache not full, copy chunks over. */
while (tcache->counts[tc_idx] < mp_.tcache_count
&& (tc_victim = last (bin)) != bin)
if (tc_victim != 0)
bck = tc_victim->bk;
set_inuse_bit_at_offset (tc_victim, nb);
if (av != &main_arena)
set_non_main_arena (tc_victim);
bin->bk = bck;
bck->fd = bin;
tcache_put (tc_victim, tc_idx);
void *p = chunk2mem (victim);
alloc_perturb (p, bytes);
return p;
- 计算出对应的largebin的索引
- 调用
函数合并fastbin,为了减少堆中的碎片- 这里的
- 这里的
static void *
_int_malloc (mstate av, size_t bytes)
If this is a large request, consolidate fastbins before continuing.
While it might look excessive to kill all fastbins before
even seeing if there is space available, this avoids
fragmentation problems normally associated with fastbins.
Also, in practice, programs tend to have runs of either small or
large requests, but less often mixtures, so consolidation is not
invoked all that often in most programs. And the programs that
it is called frequently in otherwise tend to fragment.
idx = largebin_index (nb);
if (atomic_load_relaxed (&av->have_fastchunks))
malloc_consolidate (av);
static void *
_int_malloc (mstate av, size_t bytes)
Process recently freed or remaindered chunks, taking one only if
it is exact fit, or, if this a small request, the chunk is remainder from
the most recent non-exact fit. Place other traversed chunks in
bins. Note that this step is the only place in any routine where
chunks are placed in bins.
The outer loop here is needed because we might not realize until
near the end of malloc that we should have consolidated, so must
do so and retry. This happens at most once, and only when we would
otherwise need to expand memory to service a "small" request.
INTERNAL_SIZE_T tcache_nb = 0;
size_t tc_idx = csize2tidx (nb);//nb大小对应的tcachebin的下标
if (tcache && tc_idx < mp_.tcache_bins)
tcache_nb = nb;
int return_cached = 0;//用来标记是否由合适大小的chunk在下面的循环中放入tcachebin
tcache_unsorted_count = 0;//下面大循环处理的unsortedbin中的chunk的数量
//这个大循环,也是上面注释里说的outer loop
for (;; )
int iters = 0;
while ((victim = unsorted_chunks (av)->bk) != unsorted_chunks (av))
bck = victim->bk;
if (__builtin_expect (chunksize_nomask (victim) <= 2 * SIZE_SZ, 0)
|| __builtin_expect (chunksize_nomask (victim)
> av->system_mem, 0))
malloc_printerr ("malloc(): memory corruption");
size = chunksize (victim);
If a small request, try to use last remainder if it is the
only chunk in unsorted bin. This helps promote locality for
runs of consecutive small requests. This is the only
exception to best-fit, and applies only when there is
no exact fit for a small chunk.
如果时小块的请求并且unsortedbin中只有一个chunk,尝试使用last remainder。这会帮助促进连续小堆块请求的本地化。这是唯一一个例外去精准匹配,只适用于没有可以精准匹配小堆块的情况
//如果在smallbin的范围内并且unsortedbin中只有一个chunk,并且这个chunk是last remainder,并且在分割nb字节后仍然可以成为一个堆块。
if (in_smallbin_range (nb) &&
bck == unsorted_chunks (av) &&
victim == av->last_remainder &&
(unsigned long) (size) > (unsigned long) (nb + MINSIZE))
/* split and reattach remainder */
remainder_size = size - nb;//剩余部分的大小
remainder = chunk_at_offset (victim, nb);//剩余部分的起始地址
unsorted_chunks (av)->bk = unsorted_chunks (av)->fd = remainder;
av->last_remainder = remainder;
remainder->bk = remainder->fd = unsorted_chunks (av);
if (!in_smallbin_range (remainder_size))
remainder->fd_nextsize = NULL;
remainder->bk_nextsize = NULL;
set_head (victim, nb | PREV_INUSE |
(av != &main_arena ? NON_MAIN_ARENA : 0));
set_head (remainder, remainder_size | PREV_INUSE);
set_foot (remainder, remainder_size);
check_malloced_chunk (av, victim, nb);
void *p = chunk2mem (victim);
alloc_perturb (p, bytes);
return p;
/* remove from unsorted list */
unsorted_chunks (av)->bk = bck;
bck->fd = unsorted_chunks (av);
/* Take now instead of binning if exact fit */
if (size == nb)
set_inuse_bit_at_offset (victim, size);
if (av != &main_arena)
set_non_main_arena (victim);
/* Fill cache first, return to user only if cache fills.
We may return one of these chunks later. */
if (tcache_nb
&& tcache->counts[tc_idx] < mp_.tcache_count)
tcache_put (victim, tc_idx);
return_cached = 1;
#endif //返回victim
check_malloced_chunk (av, victim, nb);
void *p = chunk2mem (victim);
alloc_perturb (p, bytes);
return p;
/* place chunk in bin */
if (in_smallbin_range (size))
victim_index = smallbin_index (size);
bck = bin_at (av, victim_index);
fwd = bck->fd;
victim_index = largebin_index (size);
bck = bin_at (av, victim_index);//bck用于存储bins链表头
fwd = bck->fd;//fwd用于存储链表中第一个chunk
/* maintain large bins in sorted order */
if (fwd != bck)
/* Or with inuse bit to speed comparisons */
size |= PREV_INUSE;
/* if smaller than smallest, bypass loop below */
assert (chunk_main_arena (bck->bk));
if ((unsigned long) (size)
< (unsigned long) chunksize_nomask (bck->bk))
fwd = bck; //头结点
bck = bck->bk; //指向最后一个bin的指针
victim->fd_nextsize = fwd->fd; //将第一个bin的地址赋值给victim的fd_nextsize字段,因为largebin是从大到小排列的,所以,第一个bin是最大的bin,fd_nextsize是指向比自己小的bin,但这个链表是双向循环链表,所以,最后一个bin指向了第一个bin,也就是最大的bin
victim->bk_nextsize = fwd->fd->bk_nextsize;//第一个bin的bk_nextsize指向比自己大的bin,但同上,这个bin是最小的bin
fwd->fd->bk_nextsize = victim->bk_nextsize->fd_nextsize = victim;//将别的bin的对应字段设置为victim
assert (chunk_main_arena (fwd));
while ((unsigned long) size < chunksize_nomask (fwd))
fwd = fwd->fd_nextsize;
assert (chunk_main_arena (fwd));
if ((unsigned long) size
== (unsigned long) chunksize_nomask (fwd))
/* Always insert in the second position. */
fwd = fwd->fd;
victim->fd_nextsize = fwd;
victim->bk_nextsize = fwd->bk_nextsize;
fwd->bk_nextsize = victim;
victim->bk_nextsize->fd_nextsize = victim;
bck = fwd->bk;//bck用来存储比victim大的bin,fwd用来存储比victim小的bin
victim->fd_nextsize = victim->bk_nextsize = victim;
mark_bin (av, victim_index);
victim->bk = bck;
victim->fd = fwd;
fwd->bk = victim;
bck->fd = victim;
/* If we've processed as many chunks as we're allowed while
filling the cache, return one of the cached ones. */
if (return_cached
&& mp_.tcache_unsorted_limit > 0
&& tcache_unsorted_count > mp_.tcache_unsorted_limit)
return tcache_get (tc_idx);
#define MAX_ITERS 10000
if (++iters >= MAX_ITERS)
/* If all the small chunks we found ended up cached, return one now. */
if (return_cached)
return tcache_get (tc_idx);
If a large request, scan through the chunks of current bin in
sorted order to find smallest that fits. Use the skip list for this.
if (!in_smallbin_range (nb))
bin = bin_at (av, idx);
/* skip scan if empty or largest chunk is too small */
if ((victim = first (bin)) != bin
&& (unsigned long) chunksize_nomask (victim)
>= (unsigned long) (nb))
victim = victim->bk_nextsize;//此时victim指向最小的chunk
while (((unsigned long) (size = chunksize (victim)) <
(unsigned long) (nb)))
victim = victim->bk_nextsize;//反向循环,从小到大查找,找到不小于nb的chunk
/* Avoid removing the first entry for a size so that the skip
list does not have to be rerouted. */
if (victim != last (bin)
&& chunksize_nomask (victim)
== chunksize_nomask (victim->fd))
victim = victim->fd;
remainder_size = size - nb;
unlink (av, victim, bck, fwd);
/* Exhaust */
if (remainder_size < MINSIZE)
set_inuse_bit_at_offset (victim, size);
if (av != &main_arena)
set_non_main_arena (victim);
/* Split */
remainder = chunk_at_offset (victim, nb);
/* We cannot assume the unsorted list is empty and therefore
have to perform a complete insert here. */
bck = unsorted_chunks (av);
fwd = bck->fd;
if (__glibc_unlikely (fwd->bk != bck))
malloc_printerr ("malloc(): corrupted unsorted chunks");
remainder->bk = bck;
remainder->fd = fwd;
bck->fd = remainder;
fwd->bk = remainder;
if (!in_smallbin_range (remainder_size))
remainder->fd_nextsize = NULL;
remainder->bk_nextsize = NULL;
set_head (victim, nb | PREV_INUSE |
(av != &main_arena ? NON_MAIN_ARENA : 0));
set_head (remainder, remainder_size | PREV_INUSE);
set_foot (remainder, remainder_size);
check_malloced_chunk (av, victim, nb);
void *p = chunk2mem (victim);
alloc_perturb (p, bytes);
return p;
/* Take a chunk off a bin list */
#define unlink(AV, P, BK, FD) { \
if (__builtin_expect (chunksize(P) != prev_size (next_chunk(P)), 0)) \
malloc_printerr ("corrupted size vs. prev_size"); \
FD = P->fd; \
BK = P->bk; \
if (__builtin_expect (FD->bk != P || BK->fd != P, 0)) \
malloc_printerr ("corrupted double-linked list"); \
else { \
FD->bk = BK; \
BK->fd = FD; \
//其中的__builtin_expect (P->fd_nextsize != NULL, 0)的含义是P->fd_nextsize不为空这个条件大概率为假。这个函数存在的意义就是用来进行代码优化的——当我们在编译c文件为汇编代码时,程序在执行汇编代码并不是顺序逐行执行,而是并行执行,因此,指令跳转在底层很大程度上降低性能,而我们加入__builtin_expect函数则能够一定程度上提高性能
if (!in_smallbin_range (chunksize_nomask (P)) \
&& __builtin_expect (P->fd_nextsize != NULL, 0)) { \
if (__builtin_expect (P->fd_nextsize->bk_nextsize != P, 0) \
|| __builtin_expect (P->bk_nextsize->fd_nextsize != P, 0)) \
malloc_printerr ("corrupted double-linked list (not small)"); \
if (FD->fd_nextsize == NULL) { \
if (P->fd_nextsize == P) \
FD->fd_nextsize = FD->bk_nextsize = FD; \
else { \
FD->fd_nextsize = P->fd_nextsize; \
FD->bk_nextsize = P->bk_nextsize; \
P->fd_nextsize->bk_nextsize = FD; \
P->bk_nextsize->fd_nextsize = FD; \
} \
} else { \
P->fd_nextsize->bk_nextsize = P->bk_nextsize; \
P->bk_nextsize->fd_nextsize = P->fd_nextsize; \
} \
} \
} \
Search for a chunk by scanning bins, starting with next largest
bin. This search is strictly by best-fit; i.e., the smallest
(with ties going to approximately the least recently used) chunk
that fits is selected.
The bitmap avoids needing to check that most blocks are nonempty.
The particular case of skipping all bins during warm-up phases
when no chunks have been returned yet is faster than it might look.
bin = bin_at (av, idx);
block = idx2block (idx);
map = av->binmap[block];
bit = idx2bit (idx);
for (;; )
/* Skip rest of block if there are no more set bits in this block. */
if (bit > map || bit == 0)
if (++block >= BINMAPSIZE) /* out of bins */
goto use_top;
while ((map = av->binmap[block]) == 0);
bin = bin_at (av, (block << BINMAPSHIFT));
bit = 1;
/* Advance to bin with set bit. There must be one. */
while ((bit & map) == 0)
bin = next_bin (bin);
bit <<= 1;
assert (bit != 0);
/* Inspect the bin. It is likely to be non-empty */
victim = last (bin);
/* If a false alarm (empty bin), clear the bit. */
if (victim == bin)
av->binmap[block] = map &= ~bit; /* Write through */
bin = next_bin (bin);
bit <<= 1;
size = chunksize (victim);
/* We know the first chunk in this bin is big enough to use. */
assert ((unsigned long) (size) >= (unsigned long) (nb));
remainder_size = size - nb;
/* unlink */
unlink (av, victim, bck, fwd);
/* Exhaust */
if (remainder_size < MINSIZE)
set_inuse_bit_at_offset (victim, size);
if (av != &main_arena)
set_non_main_arena (victim);
/* Split */
remainder = chunk_at_offset (victim, nb);
/* We cannot assume the unsorted list is empty and therefore
have to perform a complete insert here. */
bck = unsorted_chunks (av);
fwd = bck->fd;
if (__glibc_unlikely (fwd->bk != bck))
malloc_printerr ("malloc(): corrupted unsorted chunks 2");
remainder->bk = bck;
remainder->fd = fwd;
bck->fd = remainder;
fwd->bk = remainder;
/* advertise as last remainder */
if (in_smallbin_range (nb))
av->last_remainder = remainder;
if (!in_smallbin_range (remainder_size))
remainder->fd_nextsize = NULL;
remainder->bk_nextsize = NULL;
set_head (victim, nb | PREV_INUSE |
(av != &main_arena ? NON_MAIN_ARENA : 0));
set_head (remainder, remainder_size | PREV_INUSE);
set_foot (remainder, remainder_size);
check_malloced_chunk (av, victim, nb);
void *p = chunk2mem (victim);
alloc_perturb (p, bytes);
return p;
top chunk:
If large enough, split off the chunk bordering the end of memory
(held in av->top). Note that this is in accord with the best-fit
search rule. In effect, av->top is treated as larger (and thus
less well fitting) than any other available chunk since it can
be extended to be as large as necessary (up to system
We require that av->top always exists (i.e., has size >=
MINSIZE) after initialization, so if it would otherwise be
exhausted by current request, it is replenished. (The main
reason for ensuring it exists is that we may need MINSIZE space
to put in fenceposts in sysmalloc.)
victim = av->top;
size = chunksize (victim);
if ((unsigned long) (size) >= (unsigned long) (nb + MINSIZE))
remainder_size = size - nb;
remainder = chunk_at_offset (victim, nb);
av->top = remainder;
set_head (victim, nb | PREV_INUSE |
(av != &main_arena ? NON_MAIN_ARENA : 0));
set_head (remainder, remainder_size | PREV_INUSE);
check_malloced_chunk (av, victim, nb);
void *p = chunk2mem (victim);
alloc_perturb (p, bytes);
return p;
/* When we are using atomic ops to free fast chunks we can get
here for all block sizes. */
else if (atomic_load_relaxed (&av->have_fastchunks))
malloc_consolidate (av);
/* restore original bin index */
if (in_smallbin_range (nb))
idx = smallbin_index (nb);
idx = largebin_index (nb);
Otherwise, relay to handle system-dependent cases
void *p = sysmalloc (nb, av);
if (p != NULL)
alloc_perturb (p, bytes);
return p;
【推荐】凌霞软件回馈社区,博客园 & 1Panel & Halo 联合会员上线
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· DeepSeek “源神”启动!「GitHub 热点速览」
· 微软正式发布.NET 10 Preview 1:开启下一代开发框架新篇章
· C# 集成 DeepSeek 模型实现 AI 私有化(本地部署与 API 调用教程)
· DeepSeek R1 简明指南:架构、训练、本地部署及硬件要求
· 2 本地部署DeepSeek模型构建本地知识库+联网搜索详细步骤