在C代码中将结构体变量作为参数传递效率忒低

在C语言编程中,我们几乎不可能看见有人将一个结构体变量作为参数进行传递,因为效率太低了。本文尝试从反汇编的角度给出其中的缘由。

对于C语言来说,所有的参数传递都是值传递。如果一个变量为指针,那么传递的就是指针变量的值(即某个内存地址)。

那么,如果一个参数是结构体变量(包括多个成员),怎么从caller传递到callee呢?

先看下面的代码片段:

o foo1.c

 1 #define FALSE 0
 2 #define TRUE  (!0)
 3 
 4 typedef struct point_s {
 5     int x;
 6     int y;
 7     int z;
 8 } point_t;
 9 
10 static int cmp(point_t a, point_t b)
11 {
12     if (a.x != b.x)
13         return FALSE;
14     if (a.y != b.y)
15         return FALSE;
16     if (a.z != b.z)
17         return FALSE;
18     return TRUE;
19 }
20 
21 int main(int argc, char *argv[])
22 {
23     point_t a = { .x = 0x1, .y = 0x2, .z = +0x3 };
24     point_t b = { .x = 0x1, .y = 0x2, .z = -0x3 };
25     return !cmp(a, b);
26 }

o 对foo1.c进行编译后反汇编

 1 $ gcc -g -Wall -m32 -std=gnu99 -o foo1 foo1.c
 2 $ gdb foo1
 3 (gdb) set disassembly-flavor intel
 4 (gdb) disas /m main
 5 Dump of assembler code for function main:
 6 22    {
 7    0x0804842a <+0>:    push   ebp
 8    0x0804842b <+1>:    mov    ebp,esp
 9    0x0804842d <+3>:    sub    esp,0x38
10 
11 23        point_t a = { .x = 0x1, .y = 0x2, .z = +0x3 };
12    0x08048430 <+6>:    mov    DWORD PTR [ebp-0x18],0x1
13    0x08048437 <+13>:    mov    DWORD PTR [ebp-0x14],0x2
14    0x0804843e <+20>:    mov    DWORD PTR [ebp-0x10],0x3
15 
16 24        point_t b = { .x = 0x1, .y = 0x2, .z = -0x3 };
17    0x08048445 <+27>:    mov    DWORD PTR [ebp-0xc],0x1
18    0x0804844c <+34>:    mov    DWORD PTR [ebp-0x8],0x2
19    0x08048453 <+41>:    mov    DWORD PTR [ebp-0x4],0xfffffffd
20 
21 25        return !cmp(a, b);
22    0x0804845a <+48>:    mov    eax,DWORD PTR [ebp-0xc]
23    0x0804845d <+51>:    mov    DWORD PTR [esp+0xc],eax
24    0x08048461 <+55>:    mov    eax,DWORD PTR [ebp-0x8]
25    0x08048464 <+58>:    mov    DWORD PTR [esp+0x10],eax
26    0x08048468 <+62>:    mov    eax,DWORD PTR [ebp-0x4]
27    0x0804846b <+65>:    mov    DWORD PTR [esp+0x14],eax
28    0x0804846f <+69>:    mov    eax,DWORD PTR [ebp-0x18]
29    0x08048472 <+72>:    mov    DWORD PTR [esp],eax
30    0x08048475 <+75>:    mov    eax,DWORD PTR [ebp-0x14]
31    0x08048478 <+78>:    mov    DWORD PTR [esp+0x4],eax
32    0x0804847c <+82>:    mov    eax,DWORD PTR [ebp-0x10]
33    0x0804847f <+85>:    mov    DWORD PTR [esp+0x8],eax
34    0x08048483 <+89>:    call   0x80483ed <cmp>
35    0x08048488 <+94>:    test   eax,eax
36    0x0804848a <+96>:    sete   al
37    0x0804848d <+99>:    movzx  eax,al
38 
39 26    }
40    0x08048490 <+102>:    leave  
41    0x08048491 <+103>:    ret    
42 
43 End of assembler dump.
44 (gdb) disas /m cmp
45 Dump of assembler code for function cmp:
46 11    {
47    0x080483ed <+0>:    push   ebp
48    0x080483ee <+1>:    mov    ebp,esp
49 
50 12        if (a.x != b.x)
51    0x080483f0 <+3>:    mov    edx,DWORD PTR [ebp+0x8]
52    0x080483f3 <+6>:    mov    eax,DWORD PTR [ebp+0x14]
53    0x080483f6 <+9>:    cmp    edx,eax
54    0x080483f8 <+11>:    je     0x8048401 <cmp+20>
55 
56 13            return FALSE;
57    0x080483fa <+13>:    mov    eax,0x0
58    0x080483ff <+18>:    jmp    0x8048428 <cmp+59>
59 
60 14        if (a.y != b.y)
61    0x08048401 <+20>:    mov    edx,DWORD PTR [ebp+0xc]
62    0x08048404 <+23>:    mov    eax,DWORD PTR [ebp+0x18]
63    0x08048407 <+26>:    cmp    edx,eax
64    0x08048409 <+28>:    je     0x8048412 <cmp+37>
65 
66 15            return FALSE;
67    0x0804840b <+30>:    mov    eax,0x0
68    0x08048410 <+35>:    jmp    0x8048428 <cmp+59>
69 
70 16        if (a.z != b.z)
71    0x08048412 <+37>:    mov    edx,DWORD PTR [ebp+0x10]
72    0x08048415 <+40>:    mov    eax,DWORD PTR [ebp+0x1c]
73    0x08048418 <+43>:    cmp    edx,eax
74    0x0804841a <+45>:    je     0x8048423 <cmp+54>
75 
76 17            return FALSE;
77    0x0804841c <+47>:    mov    eax,0x0
78    0x08048421 <+52>:    jmp    0x8048428 <cmp+59>
79 
80 18        return TRUE;
81    0x08048423 <+54>:    mov    eax,0x1
82 
83 19    }
84    0x08048428 <+59>:    pop    ebp
85    0x08048429 <+60>:    ret    
86 
87 End of assembler dump.
88 (gdb)

o caller: point_t b的所有成员x, y, z和point_t a的所有成员x, y, z被依次存入到stack上

23              point_t a = { .x = 0x1, .y = 0x2, .z = +0x3 };
   0x08048430 <+6>:     mov    DWORD PTR [ebp-0x18],0x1         ; a.x = 0x1
   0x08048437 <+13>:    mov    DWORD PTR [ebp-0x14],0x2         ; a.y = 0x2
   0x0804843e <+20>:    mov    DWORD PTR [ebp-0x10],0x3         ; a.z = +0x3

24              point_t b = { .x = 0x1, .y = 0x2, .z = -0x3 };
   0x08048445 <+27>:    mov    DWORD PTR [ebp-0xc],0x1          ; b.x = 0x1
   0x0804844c <+34>:    mov    DWORD PTR [ebp-0x8],0x2          ; b.y = 0x2
   0x08048453 <+41>:    mov    DWORD PTR [ebp-0x4],0xfffffffd   ; b.z = -0x3

25              return !cmp(a, b);
   0x0804845a <+48>:    mov    eax,DWORD PTR [ebp-0xc]          ;
   0x0804845d <+51>:    mov    DWORD PTR [esp+0xc],eax          ; save b.x to stack
   0x08048461 <+55>:    mov    eax,DWORD PTR [ebp-0x8]          ;
   0x08048464 <+58>:    mov    DWORD PTR [esp+0x10],eax         ; save b.y to stack
   0x08048468 <+62>:    mov    eax,DWORD PTR [ebp-0x4]          ;
   0x0804846b <+65>:    mov    DWORD PTR [esp+0x14],eax         ; save b.z to stack
   0x0804846f <+69>:    mov    eax,DWORD PTR [ebp-0x18]         ;
   0x08048472 <+72>:    mov    DWORD PTR [esp],eax              ; save a.x to stack
   0x08048475 <+75>:    mov    eax,DWORD PTR [ebp-0x14]         ;
   0x08048478 <+78>:    mov    DWORD PTR [esp+0x4],eax          ; save a.y to stack
   0x0804847c <+82>:    mov    eax,DWORD PTR [ebp-0x10]         ;
   0x0804847f <+85>:    mov    DWORD PTR [esp+0x8],eax          ; save a.z to stack
   0x08048483 <+89>:    call   0x80483ed <cmp>                  ;

也就是说在caller中调用cmp(a, b)表面上传递了两个实参,其实给stack里压入了6个值。 而对于callee cmp()来说,需要去栈里把对应的6个值取出来使用。

作为对比, 下面的程序片段在cmp()中使用结构体变量指针。

o foo2.c

 1 #define FALSE 0
 2 #define TRUE  (!0)
 3 
 4 typedef struct point_s {
 5     int x;
 6     int y;
 7     int z;
 8 } point_t;
 9 
10 static int cmp(point_t *a, point_t *b)
11 {
12     if (a->x != b->x)
13         return FALSE;
14     if (a->y != b->y)
15         return FALSE;
16     if (a->z != b->z)
17         return FALSE;
18     return TRUE;
19 }
20 
21 int main(int argc, char *argv[])
22 {
23     point_t a = { .x = 0x1, .y = 0x2, .z = +0x3 };
24     point_t b = { .x = 0x1, .y = 0x2, .z = -0x3 };
25     return !cmp(&a, &b);
26 }

o foo1.c v.s. foo2.c

o 对foo2.c进行编译后反汇编

 1 $ gcc -g -Wall -m32 -std=gnu99 -o foo2 foo2.c
 2 $ gdb foo2
 3 (gdb) set disassembly-flavor intel
 4 (gdb) disas /m main
 5 Dump of assembler code for function main:
 6 22      {
 7    0x0804843a <+0>:     push   ebp
 8    0x0804843b <+1>:     mov    ebp,esp
 9    0x0804843d <+3>:     sub    esp,0x28
10 
11 23              point_t a = { .x = 0x1, .y = 0x2, .z = +0x3 };
12    0x08048440 <+6>:     mov    DWORD PTR [ebp-0x18],0x1
13    0x08048447 <+13>:    mov    DWORD PTR [ebp-0x14],0x2
14    0x0804844e <+20>:    mov    DWORD PTR [ebp-0x10],0x3
15 
16 24              point_t b = { .x = 0x1, .y = 0x2, .z = -0x3 };
17    0x08048455 <+27>:    mov    DWORD PTR [ebp-0xc],0x1
18    0x0804845c <+34>:    mov    DWORD PTR [ebp-0x8],0x2
19    0x08048463 <+41>:    mov    DWORD PTR [ebp-0x4],0xfffffffd
20 
21 25              return !cmp(&a, &b);
22    0x0804846a <+48>:    lea    eax,[ebp-0xc]
23    0x0804846d <+51>:    mov    DWORD PTR [esp+0x4],eax
24    0x08048471 <+55>:    lea    eax,[ebp-0x18]
25    0x08048474 <+58>:    mov    DWORD PTR [esp],eax
26    0x08048477 <+61>:    call   0x80483ed <cmp>
27    0x0804847c <+66>:    test   eax,eax
28    0x0804847e <+68>:    sete   al
29    0x08048481 <+71>:    movzx  eax,al
30 
31 26      }
32    0x08048484 <+74>:    leave
33    0x08048485 <+75>:    ret
34 
35 End of assembler dump.
36 (gdb) disas /m cmp
37 Dump of assembler code for function cmp:
38 11      {
39    0x080483ed <+0>:     push   ebp
40    0x080483ee <+1>:     mov    ebp,esp
41 
42 12              if (a->x != b->x)
43    0x080483f0 <+3>:     mov    eax,DWORD PTR [ebp+0x8]
44    0x080483f3 <+6>:     mov    edx,DWORD PTR [eax]
45    0x080483f5 <+8>:     mov    eax,DWORD PTR [ebp+0xc]
46    0x080483f8 <+11>:    mov    eax,DWORD PTR [eax]
47    0x080483fa <+13>:    cmp    edx,eax
48    0x080483fc <+15>:    je     0x8048405 <cmp+24>
49 
50 13                      return FALSE;
51    0x080483fe <+17>:    mov    eax,0x0
52    0x08048403 <+22>:    jmp    0x8048438 <cmp+75>
53 
54 14              if (a->y != b->y)
55    0x08048405 <+24>:    mov    eax,DWORD PTR [ebp+0x8]
56    0x08048408 <+27>:    mov    edx,DWORD PTR [eax+0x4]
57    0x0804840b <+30>:    mov    eax,DWORD PTR [ebp+0xc]
58    0x0804840e <+33>:    mov    eax,DWORD PTR [eax+0x4]
59    0x08048411 <+36>:    cmp    edx,eax
60    0x08048413 <+38>:    je     0x804841c <cmp+47>
61 
62 15                      return FALSE;
63    0x08048415 <+40>:    mov    eax,0x0
64    0x0804841a <+45>:    jmp    0x8048438 <cmp+75>
65 
66 16              if (a->z != b->z)
67    0x0804841c <+47>:    mov    eax,DWORD PTR [ebp+0x8]
68    0x0804841f <+50>:    mov    edx,DWORD PTR [eax+0x8]
69    0x08048422 <+53>:    mov    eax,DWORD PTR [ebp+0xc]
70    0x08048425 <+56>:    mov    eax,DWORD PTR [eax+0x8]
71    0x08048428 <+59>:    cmp    edx,eax
72    0x0804842a <+61>:    je     0x8048433 <cmp+70>
73 
74 17                      return FALSE;
75    0x0804842c <+63>:    mov    eax,0x0
76    0x08048431 <+68>:    jmp    0x8048438 <cmp+75>
77 
78 18              return TRUE;
79    0x08048433 <+70>:    mov    eax,0x1
80 
81 19      }
82    0x08048438 <+75>:    pop    ebp
83    0x08048439 <+76>:    ret
84 
85 End of assembler dump.
86 (gdb)

o caller: point_t b的地址&b和point_t a的地址&a被依次存入到stack上

23              point_t a = { .x = 0x1, .y = 0x2, .z = +0x3 };
   0x08048440 <+6>:     mov    DWORD PTR [ebp-0x18],0x1         ; a.x = 0x1
   0x08048447 <+13>:    mov    DWORD PTR [ebp-0x14],0x2         ; a.y = 0x2
   0x0804844e <+20>:    mov    DWORD PTR [ebp-0x10],0x3         ; a.z = +0x3

24              point_t b = { .x = 0x1, .y = 0x2, .z = -0x3 };
   0x08048455 <+27>:    mov    DWORD PTR [ebp-0xc],0x1          ; b.x = 0x1
   0x0804845c <+34>:    mov    DWORD PTR [ebp-0x8],0x2          ; b.y = 0x2
   0x08048463 <+41>:    mov    DWORD PTR [ebp-0x4],0xfffffffd   ; b.z = -0x3

25              return !cmp(&a, &b);
   0x0804846a <+48>:    lea    eax,[ebp-0xc]                    ; get &b (addr of struct b)
   0x0804846d <+51>:    mov    DWORD PTR [esp+0x4],eax          ; save &b to stack
   0x08048471 <+55>:    lea    eax,[ebp-0x18]                   ; get &a (addr of struct a)
   0x08048474 <+58>:    mov    DWORD PTR [esp],eax              ; save &a to stack
   0x08048477 <+61>:    call   0x80483ed <cmp>                  ;

显然,在caller中使用cmp(&a, &b)只需要给栈里存入两个值, 相比之下, cmp(a, b)给栈里存入了6个值,cmp(&a, &b) 效率确实高。

另外,在64位的程序中,前6个参数是默认存在寄存器上的,如果超过6个参数,才使用栈传递(具体请参见对应的ABI)。如果使用结构体变量传递参数,对寄存器是极大的浪费。

结论:

  • 不要在函数参数中使用结构体变量;
  • 也不要在函数中定义太多的参数,<=6最好;
  • 如果不可避免地要使用较多的参数,设计函数的时候请最大化利用结构体,然后使用结构体指针作为参数。
posted @ 2017-01-19 18:51  veli  阅读(1053)  评论(0编辑  收藏  举报