glibc-malloc-_int_malloc
_int_malloc 的参数:
mstate:
typedef struct malloc_state *mstate;
av 是指向记录当前堆状态的结构体的指针
bytes 就是要申请的 chunk 的大小(并不是用户 malloc 的大小)
下面提到的 nb 变量是一个 size_t ,也就是 一个 unsigned int 类型的变量,代表 malloc 的 chunk 的大小(全称应该是 n bytes,应该是这样理解,我猜的)
static void *
_int_malloc (mstate av, size_t bytes)
{
INTERNAL_SIZE_T nb; /* normalized request size */
unsigned int idx; /* associated bin index */
mbinptr bin; /* associated bin */
mchunkptr victim; /* inspected/selected chunk */
INTERNAL_SIZE_T size; /* its size */
int victim_index; /* its bin index */
mchunkptr remainder; /* remainder from a split */
unsigned long remainder_size; /* its size */
unsigned int block; /* bit map traverser */
unsigned int bit; /* bit map traverser */
unsigned int map; /* current word of binmap */
mchunkptr fwd; /* misc temp for linking */
mchunkptr bck; /* misc temp for linking */
const char *errstr = NULL;
/*
Convert request size to internal form by adding SIZE_SZ bytes
overhead plus possibly more to obtain necessary alignment and/or
to obtain a size of at least MINSIZE, the smallest allocatable
size. Also, checked_request2size traps (returning 0) request sizes
that are so large that they wrap around zero when padded and
aligned.
*/
/*检查 malloc 的大小是不是越界*/
/* checked_request2size 宏
#define checked_request2size(req, sz) \
if (REQUEST_OUT_OF_RANGE (req)) { \
__set_errno (ENOMEM); \
return 0; \
} \
(sz) = request2size (req);
REQUEST_OUT_OF_RANGE 宏
#define REQUEST_OUT_OF_RANGE(req) \
((unsigned long) (req) >= \
(unsigned long) (INTERNAL_SIZE_T) (-2 * MINSIZE))
*/
checked_request2size (bytes, nb);
/* There are no usable arenas. Fall back to sysmalloc to get a chunk from
mmap. */
/* av 就是主分配区,
* av == NULL 代表主分配区不存在,直接调用 sysmalloc
* mmap 分配
* 具体的运作我还没有看,现在主要是研究 _int_malloc
*/
if (__glibc_unlikely (av == NULL))
{
void *p = sysmalloc (nb, av);
if (p != NULL)
alloc_perturb (p, bytes);
return p;
}
/*
If the size qualifies as a fastbin, first check corresponding bin.
This code is safe to execute even if av is not yet initialized, so we
can try it without checking, which saves some time on this fast path.
*/
/* 先检查一下请求分配的内存大小是不是在 fastbin 的范围
* 是的话就从 fastbin 里面拿
* get_max_fast 能获得 fastbin 的最大大小
*/
if ((unsigned long) (nb) <= (unsigned long) (get_max_fast ()))
{
/* 通过 fastbin_index 宏拿到 chunk 位于哪一条 fastbin 中的 index*/
idx = fastbin_index (nb);
/* 通过 index 从堆(av)的 fastbin 里面到一个指向符合条件的 chunk 的指针*/
mfastbinptr *fb = &fastbin (av, idx);
mchunkptr pp = *fb;
//验证是不是真正拿到 chunk
do
{
victim = pp;
if (victim == NULL)
break;
}
while ((pp = catomic_compare_and_exchange_val_acq (fb, victim->fd, victim))
!= victim);
// 验证失败
if (victim != 0)
{
if (__builtin_expect (fastbin_index (chunksize (victim)) != idx, 0))
{
errstr = "malloc(): memory corruption (fast)";
errout:
malloc_printerr (check_action, errstr, chunk2mem (victim), av);
// malloc 返回 NULL
return NULL;
}
check_remalloced_chunk (av, victim, nb);
/* victim 是一个指向 chunk 的指针
* 使用 chunk2mem 宏把其转成指向 chunk 的 data 区段
*/
void *p = chunk2mem (victim);
// 把 chunk 的 data 区段 全部置 0
alloc_perturb (p, bytes);
/*
static void
alloc_perturb (char *p, size_t n)
{
if (__glibc_unlikely (perturb_byte))
memset (p, perturb_byte ^ 0xff, n);
}
*/
// malloc 结束返回一个 指向 chunk 的 data 区段的指针
return p;
}
}
/*
If a small request, check regular bin. Since these "smallbins"
hold one size each, no searching within bins is necessary.
(For a large request, we need to wait until unsorted chunks are
processed to find best fit. But for small ones, fits are exact
anyway, so we can check now, which is faster.)
*/
// 如果请求分配的大小超过了 fastbin 的最大大小则从 smallbin 中取 chunk
if (in_smallbin_range (nb))
{
// 通过 size 获得对应的 chunk 位于的 bin 的 index
idx = smallbin_index (nb);
// 通过 index 获得 smallbin 的表头
bin = bin_at (av, idx);
// victim 是 bin 中的最后的一个 chunk
// 因为是 LIFO(Last In First Out),这个最后一个 chunk 指的是最近一次 free 加到 bin 中的 chunk
// 如果 victim 和 bin 是一样的话,说明这条 bin 为空
if ((victim = last (bin)) != bin)
{
if (victim == 0) /* initialization check */
malloc_consolidate (av);
else
{
// 获取 victim 的 下一个 chunk 存到 bck,因为一下要把 victim 从 bin 中移除
bck = victim->bk;
// 检查 chunk 是不是被损坏
if (__glibc_unlikely (bck->fd != victim))
{
errstr = "malloc(): smallbin double linked list corrupted";
goto errout;
}
// 把 chunk 的标记为 正在使用
set_inuse_bit_at_offset (victim, nb);
// 把 chunk 从 bin 中拿下来,这个操作跟 unlink 一样。
bin->bk = bck;
bck->fd = bin;
// 判断当前是不是处于 主分配区
if (av != &main_arena)
// 要是不是 位于主分配区则设置 main_arena bit 为 0
victim->size |= NON_MAIN_ARENA;
// 检查 chunk 是不是真的已经被 malloc
check_malloced_chunk (av, victim, nb);
void *p = chunk2mem (victim);
alloc_perturb (p, bytes);
return p;
}
}
}
/*
If this is a large request, consolidate fastbins before continuing.
While it might look excessive to kill all fastbins before
even seeing if there is space available, this avoids
fragmentation problems normally associated with fastbins.
Also, in practice, programs tend to have runs of either small or
large requests, but less often mixtures, so consolidation is not
invoked all that often in most programs. And the programs that
it is called frequently in otherwise tend to fragment.
*/
else
{
idx = largebin_index (nb);
if (have_fastchunks (av))
malloc_consolidate (av);
}
/*
Process recently freed or remaindered chunks, taking one only if
it is exact fit, or, if this a small request, the chunk is remainder from
the most recent non-exact fit. Place other traversed chunks in
bins. Note that this step is the only place in any routine where
chunks are placed in bins.
The outer loop here is needed because we might not realize until
near the end of malloc that we should have consolidated, so must
do so and retry. This happens at most once, and only when we would
otherwise need to expand memory to service a "small" request.
*/
for (;; )
{
int iters = 0;
while ((victim = unsorted_chunks (av)->bk) != unsorted_chunks (av))
{
bck = victim->bk;
if (__builtin_expect (victim->size <= 2 * SIZE_SZ, 0)
|| __builtin_expect (victim->size > av->system_mem, 0))
malloc_printerr (check_action, "malloc(): memory corruption",
chunk2mem (victim), av);
size = chunksize (victim);
/*
If a small request, try to use last remainder if it is the
only chunk in unsorted bin. This helps promote locality for
runs of consecutive small requests. This is the only
exception to best-fit, and applies only when there is
no exact fit for a small chunk.
*/
if (in_smallbin_range (nb) &&
bck == unsorted_chunks (av) &&
victim == av->last_remainder &&
(unsigned long) (size) > (unsigned long) (nb + MINSIZE))
{
/* split and reattach remainder */
remainder_size = size - nb;
remainder = chunk_at_offset (victim, nb);
unsorted_chunks (av)->bk = unsorted_chunks (av)->fd = remainder;
av->last_remainder = remainder;
remainder->bk = remainder->fd = unsorted_chunks (av);
if (!in_smallbin_range (remainder_size))
{
remainder->fd_nextsize = NULL;
remainder->bk_nextsize = NULL;
}
set_head (victim, nb | PREV_INUSE |
(av != &main_arena ? NON_MAIN_ARENA : 0));
set_head (remainder, remainder_size | PREV_INUSE);
set_foot (remainder, remainder_size);
check_malloced_chunk (av, victim, nb);
void *p = chunk2mem (victim);
alloc_perturb (p, bytes);
return p;
}
/* remove from unsorted list */
unsorted_chunks (av)->bk = bck;
bck->fd = unsorted_chunks (av);
/* Take now instead of binning if exact fit */
if (size == nb)
{
set_inuse_bit_at_offset (victim, size);
if (av != &main_arena)
victim->size |= NON_MAIN_ARENA;
check_malloced_chunk (av, victim, nb);
void *p = chunk2mem (victim);
alloc_perturb (p, bytes);
return p;
}
/* place chunk in bin */
if (in_smallbin_range (size))
{
victim_index = smallbin_index (size);
bck = bin_at (av, victim_index);
fwd = bck->fd;
}
else
{
victim_index = largebin_index (size);
bck = bin_at (av, victim_index);
fwd = bck->fd;
/* maintain large bins in sorted order */
if (fwd != bck)
{
/* Or with inuse bit to speed comparisons */
size |= PREV_INUSE;
/* if smaller than smallest, bypass loop below */
assert ((bck->bk->size & NON_MAIN_ARENA) == 0);
if ((unsigned long) (size) < (unsigned long) (bck->bk->size))
{
fwd = bck;
bck = bck->bk;
victim->fd_nextsize = fwd->fd;
victim->bk_nextsize = fwd->fd->bk_nextsize;
fwd->fd->bk_nextsize = victim->bk_nextsize->fd_nextsize = victim;
}
else
{
assert ((fwd->size & NON_MAIN_ARENA) == 0);
while ((unsigned long) size < fwd->size)
{
fwd = fwd->fd_nextsize;
assert ((fwd->size & NON_MAIN_ARENA) == 0);
}
if ((unsigned long) size == (unsigned long) fwd->size)
/* Always insert in the second position. */
fwd = fwd->fd;
else
{
victim->fd_nextsize = fwd;
victim->bk_nextsize = fwd->bk_nextsize;
fwd->bk_nextsize = victim;
victim->bk_nextsize->fd_nextsize = victim;
}
bck = fwd->bk;
}
}
else
victim->fd_nextsize = victim->bk_nextsize = victim;
}
mark_bin (av, victim_index);
victim->bk = bck;
victim->fd = fwd;
fwd->bk = victim;
bck->fd = victim;
#define MAX_ITERS 10000
if (++iters >= MAX_ITERS)
break;
}
/*
If a large request, scan through the chunks of current bin in
sorted order to find smallest that fits. Use the skip list for this.
*/
if (!in_smallbin_range (nb))
{
bin = bin_at (av, idx);
/* skip scan if empty or largest chunk is too small */
if ((victim = first (bin)) != bin &&
(unsigned long) (victim->size) >= (unsigned long) (nb))
{
victim = victim->bk_nextsize;
while (((unsigned long) (size = chunksize (victim)) <
(unsigned long) (nb)))
victim = victim->bk_nextsize;
/* Avoid removing the first entry for a size so that the skip
list does not have to be rerouted. */
if (victim != last (bin) && victim->size == victim->fd->size)
victim = victim->fd;
remainder_size = size - nb;
unlink (av, victim, bck, fwd);
/* Exhaust */
if (remainder_size < MINSIZE)
{
set_inuse_bit_at_offset (victim, size);
if (av != &main_arena)
victim->size |= NON_MAIN_ARENA;
}
/* Split */
else
{
remainder = chunk_at_offset (victim, nb);
/* We cannot assume the unsorted list is empty and therefore
have to perform a complete insert here. */
bck = unsorted_chunks (av);
fwd = bck->fd;
if (__glibc_unlikely (fwd->bk != bck))
{
errstr = "malloc(): corrupted unsorted chunks";
goto errout;
}
remainder->bk = bck;
remainder->fd = fwd;
bck->fd = remainder;
fwd->bk = remainder;
if (!in_smallbin_range (remainder_size))
{
remainder->fd_nextsize = NULL;
remainder->bk_nextsize = NULL;
}
set_head (victim, nb | PREV_INUSE |
(av != &main_arena ? NON_MAIN_ARENA : 0));
set_head (remainder, remainder_size | PREV_INUSE);
set_foot (remainder, remainder_size);
}
check_malloced_chunk (av, victim, nb);
void *p = chunk2mem (victim);
alloc_perturb (p, bytes);
return p;
}
}
/*
Search for a chunk by scanning bins, starting with next largest
bin. This search is strictly by best-fit; i.e., the smallest
(with ties going to approximately the least recently used) chunk
that fits is selected.
The bitmap avoids needing to check that most blocks are nonempty.
The particular case of skipping all bins during warm-up phases
when no chunks have been returned yet is faster than it might look.
*/
++idx;
bin = bin_at (av, idx);
block = idx2block (idx);
map = av->binmap[block];
bit = idx2bit (idx);
for (;; )
{
/* Skip rest of block if there are no more set bits in this block. */
if (bit > map || bit == 0)
{
do
{
if (++block >= BINMAPSIZE) /* out of bins */
goto use_top;
}
while ((map = av->binmap[block]) == 0);
bin = bin_at (av, (block << BINMAPSHIFT));
bit = 1;
}
/* Advance to bin with set bit. There must be one. */
while ((bit & map) == 0)
{
bin = next_bin (bin);
bit <<= 1;
assert (bit != 0);
}
/* Inspect the bin. It is likely to be non-empty */
victim = last (bin);
/* If a false alarm (empty bin), clear the bit. */
if (victim == bin)
{
av->binmap[block] = map &= ~bit; /* Write through */
bin = next_bin (bin);
bit <<= 1;
}
else
{
size = chunksize (victim);
/* We know the first chunk in this bin is big enough to use. */
assert ((unsigned long) (size) >= (unsigned long) (nb));
remainder_size = size - nb;
/* unlink */
unlink (av, victim, bck, fwd);
/* Exhaust */
if (remainder_size < MINSIZE)
{
set_inuse_bit_at_offset (victim, size);
if (av != &main_arena)
victim->size |= NON_MAIN_ARENA;
}
/* Split */
else
{
remainder = chunk_at_offset (victim, nb);
/* We cannot assume the unsorted list is empty and therefore
have to perform a complete insert here. */
bck = unsorted_chunks (av);
fwd = bck->fd;
if (__glibc_unlikely (fwd->bk != bck))
{
errstr = "malloc(): corrupted unsorted chunks 2";
goto errout;
}
remainder->bk = bck;
remainder->fd = fwd;
bck->fd = remainder;
fwd->bk = remainder;
/* advertise as last remainder */
if (in_smallbin_range (nb))
av->last_remainder = remainder;
if (!in_smallbin_range (remainder_size))
{
remainder->fd_nextsize = NULL;
remainder->bk_nextsize = NULL;
}
set_head (victim, nb | PREV_INUSE |
(av != &main_arena ? NON_MAIN_ARENA : 0));
set_head (remainder, remainder_size | PREV_INUSE);
set_foot (remainder, remainder_size);
}
check_malloced_chunk (av, victim, nb);
void *p = chunk2mem (victim);
alloc_perturb (p, bytes);
return p;
}
}
use_top:
/*
If large enough, split off the chunk bordering the end of memory
(held in av->top). Note that this is in accord with the best-fit
search rule. In effect, av->top is treated as larger (and thus
less well fitting) than any other available chunk since it can
be extended to be as large as necessary (up to system
limitations).
We require that av->top always exists (i.e., has size >=
MINSIZE) after initialization, so if it would otherwise be
exhausted by current request, it is replenished. (The main
reason for ensuring it exists is that we may need MINSIZE space
to put in fenceposts in sysmalloc.)
*/
victim = av->top;
size = chunksize (victim);
if ((unsigned long) (size) >= (unsigned long) (nb + MINSIZE))
{
remainder_size = size - nb;
remainder = chunk_at_offset (victim, nb);
av->top = remainder;
set_head (victim, nb | PREV_INUSE |
(av != &main_arena ? NON_MAIN_ARENA : 0));
set_head (remainder, remainder_size | PREV_INUSE);
check_malloced_chunk (av, victim, nb);
void *p = chunk2mem (victim);
alloc_perturb (p, bytes);
return p;
}
/* When we are using atomic ops to free fast chunks we can get
here for all block sizes. */
else if (have_fastchunks (av))
{
malloc_consolidate (av);
/* restore original bin index */
if (in_smallbin_range (nb))
idx = smallbin_index (nb);
else
idx = largebin_index (nb);
}
/*
Otherwise, relay to handle system-dependent cases
*/
else
{
void *p = sysmalloc (nb, av);
if (p != NULL)
alloc_perturb (p, bytes);
return p;
}
}
}
关于上文提到的重要的宏
get_max_fast() 宏:
这个宏是用来获取 fastbin 的上限大小,主要还是这个 global_max_fast,这个变量是 通过调用 set_max_fast 宏来获得具体的值的。
#define set_max_fast(s) \
global_max_fast = (((s) == 0) \
? SMALLBIN_WIDTH : ((s + SIZE_SZ) & ~MALLOC_ALIGN_MASK))
#define get_max_fast() global_max_fast
fastbin_index(sz)宏:
传入的参数是一个 chunk 的 大小,然后通过这个大小确定 这个大小对应的 bin 的 index,其实 fastbin 都是经过对齐的,在 32 bit 系统下面除以 8(>> 3),在 64 bit 系统下面除以 16(>> 4),就能拿到一个不是 index 的 index,因为有没有 0 号和 1 号 bin,所以前面得到的数还要 -2 才能得到真正的 index。
#define fastbin_index(sz) \
((((unsigned int) (sz)) >> (SIZE_SZ == 8 ? 4 : 3)) - 2)
所以 index 和 size 的关系是:
size = (index + 2) << (SIZE_SZ == 8 ? 4 : 3)
MAX_FAST_SIZE 宏:
这个宏定义了最大 fastbin 的 size ,在 32 bit 下面是 64 B,在 64 bit 下面是 128 B
#define MAX_FAST_SIZE (80 * SIZE_SZ / 4)
in_smallbin_range 宏:
参数是一个 sz 代表大小,检查这个 size 是不是落在 smallbin 里面。
#define in_smallbin_range(sz) \
((unsigned long) (sz) < (unsigned long) MIN_LARGE_SIZE)
smallbin_index 宏:
参数是一个 sz 代表大小,用来获取 这个 size 的 chunk 位于那条 bin 中。
#define smallbin_index(sz) \
((SMALLBIN_WIDTH == 16 ? (((unsigned) (sz)) >> 4) : (((unsigned) (sz)) >> 3))\
+ SMALLBIN_CORRECTION)
因为在 smallbin 里面 size 和 index 的关系是:
size=2 * SIZE_SZ * index
要通过 size 获得 index 直接除以 SIZE_SZ (32bit : 4,64bit :8 )再除以 2 就能得到 index。这个宏用的不是除法而是进行位运算,其实很少用除法,可能是效率的问题,我写了个 demo 逆向看了一下汇编,位移的指令数要比除法少,当然这个可能还是得看 是不是大数运算(我队友说的)。
bin_at 宏:
/* addressing -- note that bin_at(0) does not exist */
#define bin_at(m, i) \
(mbinptr) (((char *) &((m)->bins[((i) - 1) * 2])) \
- offsetof (struct malloc_chunk, fd))
next_bin 宏:
用于获得下一条 bin 的地址,只需要将当前 bin 的地址向后移动两个指针的长度就得到下一个 bin 的链表头地址。
/* analog of ++bin */
#define next_bin(b) ((mbinptr) ((char *) (b) + (sizeof (mchunkptr) << 1)))
first, last宏:
获得当前 chunk 的上一个(first)或者下一个(last) chunk。
/* Reminders about list directionality within bins */
#define first(b) ((b)->fd)
#define last(b) ((b)->bk)
set_inuse_bit_at_offset 宏:
这个宏用来设置物理位置上面的上一个 chunk 的 inuse bit,表示 p 正在使用
#define set_inuse_bit_at_offset(p, s) \
(((mchunkptr) (((char *) (p)) + (s)))->size |= PREV_INUSE)
p 是要标记为 正在使用 的chunk
s 是 p 的 size
其实这个的计算方式很简单,不过可能会混淆,可能会有人会以为加上 size 不对啊,不是还有 fd 和 bk 吗?其实 正在使用 的 chunk 的 fd 和 bk是没有意义的,所以可以在上面写入用户数据,所以只用加上 size 就能得到一个指向下一个 malloc_chunk 的指针,然后在它的 size 上面设置 inuse bit, 说明上一个 chunk 正在使用。
check_malloced_chunk 宏:
其实这个宏最终会调用 do_check_malloced_chunk 和 do_check_remalloced_chunk函数
static void
do_check_remalloced_chunk (mstate av, mchunkptr p, INTERNAL_SIZE_T s)
{
// 清除 chunk 的标志位,取得 chunk 的 size
INTERNAL_SIZE_T sz = p->size & ~(PREV_INUSE | NON_MAIN_ARENA);
if (!chunk_is_mmapped (p))
{
assert (av == arena_for_chunk (p));
if (chunk_non_main_arena (p))
assert (av != &main_arena);
else
assert (av == &main_arena);
}
do_check_inuse_chunk (av, p);
/* Legal size ... */
assert ((sz & MALLOC_ALIGN_MASK) == 0);
assert ((unsigned long) (sz) >= MINSIZE);
/* ... and alignment */
assert (aligned_OK (chunk2mem (p)));
/* chunk is less than MINSIZE more than request */
assert ((long) (sz) - (long) (s) >= 0);
assert ((long) (sz) - (long) (s + MINSIZE) < 0);
}
static void
do_check_malloced_chunk (mstate av, mchunkptr p, INTERNAL_SIZE_T s)
{
/* same as recycled case ... */
do_check_remalloced_chunk (av, p, s);
/*
... plus, must obey implementation invariant that prev_inuse is
always true of any allocated chunk; i.e., that each allocated
chunk borders either a previously allocated and still in-use
chunk, or the base of its memory arena. This is ensured
by making all allocations from the `lowest' part of any found
chunk. This does not necessarily hold however for chunks
recycled via fastbins.
*/
assert (prev_inuse (p));
}
注:这篇文章参考了 华庭的 ptmalloc 分析