gcc为函数生成指令内为什么会有多处return
函数返回
如果一个函数的返回点比较多,而且函数比较长,想通过调试器知道函数从哪个位置退出就会比较麻烦。有些资料说一般编译器的所有return最终会经过同一条ret(机器指令)返回,所以只要找到该指令的位置打断点即可。这个对于没有开优化的指令可能是正确的,开启优化生成的二进制中经常可以看到一个函数内有多处ret,所以这种说法并不准确。
return语义
在对return语句的处理过程中,所有的return生成的指令的确是跳转到相同位置(return_label)。
/* Output a return with no value. */
static void
expand_null_return_1 (void)
{
clear_pending_stack_adjust ();
do_pending_stack_adjust ();
emit_jump (return_label);
}
jump redirect
try_optimize_cfg函数中,如果满足
/* Try to change a branch to a return to just that return. */
rtx_insn *ret, *use;
if (single_succ_p (b)
&& onlyjump_p (BB_END (b))
&& bb_is_just_return (single_succ (b), &ret, &use))
则尝试修改跳转的目的地为ret指定的label,这里也就是最终的返回地址。
if (redirect_jump (as_a <rtx_jump_insn *> (BB_END (b)),
PATTERN (ret), 0))
也就是
/* Create some permanent unique rtl objects shared between all functions. */
void
init_emit_once (void)
{
///...
simple_return_rtx = gen_rtx_fmt_ (SIMPLE_RETURN, VOIDmode);
///...
}
中创建的SIMPLE_RETURN特殊类型表达式。如果跳转的目的是这个label,生成的汇编代码(对386系统来说)就是一条ret机器指令。
/* Return true if BB contains just a return and possibly a USE of the
return value. Fill in *RET and *USE with the return and use insns
if any found, otherwise NULL. */
static bool
bb_is_just_return (basic_block bb, rtx_insn **ret, rtx_insn **use)
{
*ret = *use = NULL;
rtx_insn *insn;
if (bb == EXIT_BLOCK_PTR_FOR_FN (cfun))
return false;
FOR_BB_INSNS (bb, insn)
if (NONDEBUG_INSN_P (insn))
{
if (!*ret && ANY_RETURN_P (PATTERN (insn)))
*ret = insn;
else if (!*ret && !*use && GET_CODE (PATTERN (insn)) == USE
&& REG_P (XEXP (PATTERN (insn), 0))
&& REG_FUNCTION_VALUE_P (XEXP (PATTERN (insn), 0)))
*use = insn;
else
return false;
}
return !!*ret;
}
/* Do simple CFG optimizations - basic block merging, simplifying of jump
instructions etc. Return nonzero if changes were made. */
static bool
try_optimize_cfg (int mode)
{
///...
for (b = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb; b
!= EXIT_BLOCK_PTR_FOR_FN (cfun);)
{
///...
/* Try to change a branch to a return to just that return. */
rtx_insn *ret, *use;
if (single_succ_p (b)
&& onlyjump_p (BB_END (b))
&& bb_is_just_return (single_succ (b), &ret, &use))
{
if (redirect_jump (as_a <rtx_jump_insn *> (BB_END (b)),
PATTERN (ret), 0))
{
if (use)
emit_insn_before (copy_insn (PATTERN (use)),
BB_END (b));
if (dump_file)
fprintf (dump_file, "Changed jump %d->%d to return.\n",
b->index, single_succ (b)->index);
redirect_edge_succ (single_succ_edge (b),
EXIT_BLOCK_PTR_FOR_FN (cfun));
single_succ_edge (b)->flags &= ~EDGE_CROSSING;
changed_here = true;
}
}
///...
}
///...
}
epilogue
在函数逻辑生成之后,在函数的结尾生成函数的epilogue,这个逻辑可以不同的后端自己定制。
/* Return a sequence to be used as the epilogue for the current function,
or NULL. */
static rtx_insn *
make_epilogue_seq (void)
{
if (!targetm.have_epilogue ())
return NULL;
start_sequence ();
emit_note (NOTE_INSN_EPILOGUE_BEG);
rtx_insn *seq = targetm.gen_epilogue ();
if (seq)
emit_jump_insn (seq);
/* Retain a map of the epilogue insns. */
record_insns (seq, NULL, &epilogue_insn_hash);
set_insn_locations (seq, epilogue_location);
seq = get_insns ();
rtx_insn *returnjump = get_last_insn ();
end_sequence ();
if (JUMP_P (returnjump))
set_return_jump_label (returnjump);
return seq;
}
复杂epilogue
如果一些epilogue比较复杂,可能涉及到栈帧的调整,这个时候basic_block_reorder流程就有可能重新复制一份代码。在386系统下,如果开启了optimize_bb_for_speed_p,只要bb的指令长度小于16,就有可能被拷贝一份。
///@file:bb-reorder.c
/* Return true when BB can and should be copied. CODE_MAY_GROW is true
when code size is allowed to grow by duplication. */
static bool
copy_bb_p (const_basic_block bb, int code_may_grow)
{
int size = 0;
int max_size = uncond_jump_length;
rtx_insn *insn;
if (!bb->frequency)
return false;
if (EDGE_COUNT (bb->preds) < 2)
return false;
if (!can_duplicate_block_p (bb))
return false;
/* Avoid duplicating blocks which have many successors (PR/13430). */
if (EDGE_COUNT (bb->succs) > 8)
return false;
if (code_may_grow && optimize_bb_for_speed_p (bb))
max_size *= PARAM_VALUE (PARAM_MAX_GROW_COPY_BB_INSNS);
FOR_BB_INSNS (bb, insn)
{
if (INSN_P (insn))
size += get_attr_min_length (insn);
}
if (size <= max_size)
return true;
if (dump_file)
{
fprintf (dump_file,
"Block %d can't be copied because its size = %d.\n",
bb->index, size);
}
return false;
}
机器描述
在i386.md中描述了对应的机器指令。
;; Used by x86_machine_dependent_reorg to avoid penalty on single byte RET
;; instruction Athlon and K8 have.
(define_insn "simple_return_internal_long"
[(simple_return)
(unspec [(const_int 0)] UNSPEC_REP)]
"reload_completed"
"* return ix86_output_function_return (true);"
[(set_attr "length" "2")
(set_attr "atom_unit" "jeu")
(set_attr "length_immediate" "0")
(set_attr "prefix_rep" "1")
(set_attr "modrm" "0")])
(define_insn_and_split "simple_return_pop_internal"
[(simple_return)
(use (match_operand:SI 0 "const_int_operand"))]
"reload_completed"
"%!ret\t%0"
"&& cfun->machine->function_return_type != indirect_branch_keep"
[(const_int 0)]
"ix86_split_simple_return_pop_internal (operands[0]); DONE;"
[(set_attr "length" "3")
(set_attr "atom_unit" "jeu")
(set_attr "length_immediate" "2")
(set_attr "modrm" "0")
(set_attr "maybe_prefix_bnd" "1")])
栗子
在开启优化的版本中有多处ret指令。
tsecer@harry: cat multi.return.cpp
int foo(int x, int y)
{
switch (x)
{
case 1: return y * 1;
case 2: return y * 2;
case 3: return y * 3;
case 4: return y * 4;
}
}
tsecer@harry: gcc -O3 -S multi.return.cpp
tsecer@harry: cat multi.return.s
.file "multi.return.cpp"
.text
.p2align 4,,15
.globl _Z3fooii
.type _Z3fooii, @function
_Z3fooii:
.LFB0:
.cfi_startproc
cmpl $2, %edi
je .L3
jle .L12
cmpl $3, %edi
je .L6
leal 0(,%rsi,4), %eax
ret
.p2align 4,,10
.p2align 3
.L6:
leal (%rsi,%rsi,2), %eax
ret
.p2align 4,,10
.p2align 3
.L12:
movl %esi, %eax
ret
.p2align 4,,10
.p2align 3
.L3:
leal (%rsi,%rsi), %eax
ret
.cfi_endproc
.LFE0:
.size _Z3fooii, .-_Z3fooii
.section .note.GNU-stack,"",@progbits
tsecer@harry:
bb拷贝栗子
tsecer@harry: cat bb.copy.cpp
int foo(int x, int y)
{
int a[20];
extern int bar(int *);
bar(a);
switch (x)
{
case 1: return y * 1;
case 2: return y * 2;
case 3 : return y *3;
case 4: return y *4;
}
return x * 5;
}
tsecer@harry: gcc -O3 -S bb.copy.cpp
tsecer@harry: cat bb.copy.s
.file "bb.copy.cpp"
.text
.p2align 4,,15
.globl _Z3fooii
.type _Z3fooii, @function
_Z3fooii:
.LFB0:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
pushq %rbx
.cfi_def_cfa_offset 24
.cfi_offset 3, -24
movl %edi, %ebp
movl %esi, %ebx
subq $88, %rsp
.cfi_def_cfa_offset 112
movq %rsp, %rdi
call _Z3barPi
cmpl $2, %ebp
je .L3
jle .L13
cmpl $3, %ebp
je .L6
cmpl $4, %ebp
leal 0(,%rbx,4), %eax
jne .L2
.L1:
addq $88, %rsp
.cfi_remember_state
.cfi_def_cfa_offset 24
popq %rbx
.cfi_def_cfa_offset 16
popq %rbp
.cfi_def_cfa_offset 8
ret
.p2align 4,,10
.p2align 3
.L6:
.cfi_restore_state
addq $88, %rsp
.cfi_remember_state
.cfi_def_cfa_offset 24
leal (%rbx,%rbx,2), %eax
popq %rbx
.cfi_def_cfa_offset 16
popq %rbp
.cfi_def_cfa_offset 8
ret
.p2align 4,,10
.p2align 3
.L13:
.cfi_restore_state
cmpl $1, %ebp
movl %ebx, %eax
je .L1
.L2:
addq $88, %rsp
.cfi_remember_state
.cfi_def_cfa_offset 24
leal 0(%rbp,%rbp,4), %eax
popq %rbx
.cfi_def_cfa_offset 16
popq %rbp
.cfi_def_cfa_offset 8
ret
.p2align 4,,10
.p2align 3
.L3:
.cfi_restore_state
addq $88, %rsp
.cfi_def_cfa_offset 24
leal (%rbx,%rbx), %eax
popq %rbx
.cfi_def_cfa_offset 16
popq %rbp
.cfi_def_cfa_offset 8
ret
.cfi_endproc
.LFE0:
.size _Z3fooii, .-_Z3fooii
.section .note.GNU-stack,"",@progbits
tsecer@harry: