高级C代码的汇编分析
在windows上,常用的函数调用方式有:
Pascal方式,WINAPI(_stdcall)方式 和C方式(_cdecl)
_cdecl调用规则:
1,参数从右到左入堆栈
2,在函数返回后,调用者要负责清除堆栈
所以这种调用常会生成较大的可执行文件。
_stdcall又称为WINAPI调用方式,规则:
1,参数从右向左入堆栈
2,被调用的函数在返回前自行清理堆栈
所以这种调用会生成比cdecl小的代码
Pascal调用方式,主要用在WIN16函数库中,现在基本不用
规则:
1,参数从左向右入堆栈
2,被调用函数在返回前自行清理堆栈
此外,在Windows内核中还常见的有快速调用方式(_fastcall)
在C++编译的代码中有this call方式(_thiscall)
在windows中,不管哪种方式,返回值都写在eax中,外部从中获取返回值
_cdecl方式步骤
1,保存ebp
2,保存esp到ebp
3,在堆栈中腾出一个区域来保存局部变量
4,保存ebx,esi,edi到堆栈中,函数调用完后返回
5,把局部变量区域初始化为0xcccccccch,实际上是int 3指令机器码,这是一个断点软中断
6,做函数里应该做的事情
7,恢复ebx,esi,edi,esp,ebp,最后返回
2: int func(int a,int b) 3: { 00401010 push ebp 00401011 mov ebp,esp 00401013 sub esp,44h 00401016 push ebx 00401017 push esi 00401018 push edi 00401019 lea edi,[ebp-44h] 0040101C mov ecx,11h 00401021 mov eax,0CCCCCCCCh 00401026 rep stos dword ptr [edi] 4: int c = a + b; 00401028 mov eax,dword ptr [ebp+8] 0040102B add eax,dword ptr [ebp+0Ch] 0040102E mov dword ptr [ebp-4],eax 5: return c; 00401031 mov eax,dword ptr [ebp-4] 6: } 00401034 pop edi 00401035 pop esi 00401036 pop ebx 00401037 mov esp,ebp 00401039 pop ebp 0040103A ret
for循环的汇编代码分析:
6: int i; 7: for(i = 0 ;i < 50 ; i ++) 0040B501 mov dword ptr [ebp-8],0 0040B508 jmp func+33h (0040b513) 0040B50A mov ecx,dword ptr [ebp-8] 0040B50D add ecx,1 0040B510 mov dword ptr [ebp-8],ecx 0040B513 cmp dword ptr [ebp-8],32h 0040B517 jge func+44h (0040b524) 8: c = c + i; 0040B519 mov edx,dword ptr [ebp-4] 0040B51C add edx,dword ptr [ebp-8] 0040B51F mov dword ptr [ebp-4],edx 0040B522 jmp func+2Ah (0040b50a) 9: 10: return c; 0040B524 mov eax,dword ptr [ebp-4] 11: }
从上面的汇编代码可以分析出,for循环就是cmp指令+jmp指令
根据cmp判断然后跳转到那个位置执行代码
do...while循环分析
5: 6: int i = 0; 0040B501 mov dword ptr [ebp-8],0 7: 8: do { 9: c = c +i; 0040B508 mov ecx,dword ptr [ebp-4] 0040B50B add ecx,dword ptr [ebp-8] 0040B50E mov dword ptr [ebp-4],ecx 10: }while(c < 50); 0040B511 cmp dword ptr [ebp-4],32h 0040B515 jl func+28h (0040b508) 11: 12: return c; 0040B517 mov eax,dword ptr [ebp-4] 13: } 0040B51A pop edi 0040B51B pop esi 0040B51C pop ebx 0040B51D mov esp,ebp 0040B51F pop ebp 0040B520 ret
从上面代码可以看出
本质do...while循环和for差不多
while循环:
6: int i = 0; 0040B501 mov dword ptr [ebp-8],0 7: 8: while(i < 50) 0040B508 cmp dword ptr [ebp-8],32h 0040B50C jge func+39h (0040b519) 9: { 10: c = c +i; 0040B50E mov ecx,dword ptr [ebp-4] 0040B511 add ecx,dword ptr [ebp-8] 0040B514 mov dword ptr [ebp-4],ecx 11: }; 0040B517 jmp func+28h (0040b508) 12: 13: return c; 0040B519 mov eax,dword ptr [ebp-4] 14: } 0040B51C pop edi 0040B51D pop esi 0040B51E pop ebx 0040B51F mov esp,ebp 0040B521 pop ebp 0040B522 ret
if...else if...else语句分析
: 6: int i = 0; 0040B501 mov dword ptr [ebp-8],0 7: 8: if(c>0 && c < 10) 0040B508 cmp dword ptr [ebp-4],0 0040B50C jle func+43h (0040b523) 0040B50E cmp dword ptr [ebp-4],0Ah 0040B512 jge func+43h (0040b523) 9: { 10: printf("c > 0"); 0040B514 push offset string "c > 0" (0041ff5c) 0040B519 call printf (0040b780) 0040B51E add esp,4 11: } 12: else if(c>10 && c<00) 0040B521 jmp func+6Bh (0040b54b) 0040B523 cmp dword ptr [ebp-4],0Ah 0040B527 jle func+5Eh (0040b53e) 0040B529 cmp dword ptr [ebp-4],0 0040B52D jge func+5Eh (0040b53e) 13: { 14: printf("c>10 && c<100"); 0040B52F push offset string "c>10 && c<100" (0041ff4c) 0040B534 call printf (0040b780) 0040B539 add esp,4 15: } 16: else 0040B53C jmp func+6Bh (0040b54b) 17: { 18: printf("c>10 && c < 100"); 0040B53E push offset string "c>10 && c < 100" (0041ff3c) 0040B543 call printf (0040b780) 0040B548 add esp,4 19: } 20: 21: return c; 0040B54B mov eax,dword ptr [ebp-4] 22: } 0040B54E pop edi 0040B54F pop esi 0040B550 pop ebx 0040B551 add esp,48h 0040B554 cmp ebp,esp 0040B556 call __chkesp (0040b4a0) 0040B55B mov esp,ebp 0040B55D pop ebp 0040B55E ret
switch...case 代码分析
4: int c = a + b; 0040B4F8 mov eax,dword ptr [ebp+8] 0040B4FB add eax,dword ptr [ebp+0Ch] 0040B4FE mov dword ptr [ebp-4],eax 5: 6: switch(c) 7: { 0040B501 mov ecx,dword ptr [ebp-4] 0040B504 mov dword ptr [ebp-8],ecx 0040B507 cmp dword ptr [ebp-8],0 0040B50B je func+35h (0040b515) 0040B50D cmp dword ptr [ebp-8],1 0040B511 je func+42h (0040b522) 0040B513 jmp func+51h (0040b531) 8: case 0: 9: printf("c>0"); 0040B515 push offset string "c>0" (0041ff4c) 0040B51A call printf (0040b780) 0040B51F add esp,4 10: case 1: 11: printf("c>10 && c<100"); 0040B522 push offset string "c>10 && c<100" (0041ff3c) 0040B527 call printf (0040b780) 0040B52C add esp,4 12: break; 0040B52F jmp func+5Eh (0040b53e) 13: default: 14: printf("c>10 && c<100"); 0040B531 push offset string "c>10 && c<100" (0041ff3c) 0040B536 call printf (0040b780) 0040B53B add esp,4 15: } 16: 17: return c; 0040B53E mov eax,dword ptr [ebp-4] 18: } 0040B541 pop edi 0040B542 pop esi 0040B543 pop ebx 0040B544 add esp,48h 0040B547 cmp ebp,esp 0040B549 call __chkesp (0040b4a0) 0040B54E mov esp,ebp 0040B550 pop ebp 0040B551 ret
结构体分析
1: 2: typedef struct { 3: int a; 4: int b; 5: int c; 6: }mystruct; 7: 8: int func(int a,int b) 9: { 0040B800 push ebp 0040B801 mov ebp,esp 0040B803 sub esp,1D8h 0040B809 push ebx 0040B80A push esi 0040B80B push edi 0040B80C lea edi,[ebp-1D8h] 0040B812 mov ecx,76h 0040B817 mov eax,0CCCCCCCCh 0040B81C rep stos dword ptr [edi] 10: 11: unsigned char *buf[100]; 12: mystruct *strs = (mystruct *)buf; 0040B81E lea eax,[ebp-190h] 0040B824 mov dword ptr [ebp-194h],eax 13: int i; 14: for(i=0; i<5; i++) 0040B82A mov dword ptr [ebp-198h],0 0040B834 jmp func+45h (0040b845) 0040B836 mov ecx,dword ptr [ebp-198h] 0040B83C add ecx,1 0040B83F mov dword ptr [ebp-198h],ecx 0040B845 cmp dword ptr [ebp-198h],5 0040B84C jge func+94h (0040b894) 15: { 16: strs[i].a=0; 0040B84E mov edx,dword ptr [ebp-198h] 0040B854 imul edx,edx,0Ch 0040B857 mov eax,dword ptr [ebp-194h] 0040B85D mov dword ptr [eax+edx],0 17: strs[i].b=1; 0040B864 mov ecx,dword ptr [ebp-198h] 0040B86A imul ecx,ecx,0Ch 0040B86D mov edx,dword ptr [ebp-194h] 0040B873 mov dword ptr [edx+ecx+4],1 18: strs[i].c=2; 0040B87B mov eax,dword ptr [ebp-198h] 0040B881 imul eax,eax,0Ch 0040B884 mov ecx,dword ptr [ebp-194h] 0040B88A mov dword ptr [ecx+eax+8],2 19: } 0040B892 jmp func+36h (0040b836) 20: 21: return 0; 0040B894 xor eax,eax 22: } 0040B896 pop edi 0040B897 pop esi 0040B898 pop ebx 0040B899 mov esp,ebp 0040B89B pop ebp 0040B89C ret
从上面不难看出,结构体赋值是先经过计算,然后把基址存放的一个变量
然后计算每个结构体的偏移量,然后对每个struct进行定数累加赋值
枚举,联合,结构结合分析:
1: typedef enum { 2: ENUM_1 = 1, 3: ENUM_2 = 2, 4: ENUM_3, 5: ENUM_4 6: }myenum; 7: 8: typedef struct { 9: int a; 10: int b; 11: int c; 12: }mystruct; 13: 14: typedef union { 15: mystruct s; 16: myenum e[3]; 17: }myunion; 18: 19: int func(int a,int b) 20: { 00401020 push ebp 00401021 mov ebp,esp 00401023 sub esp,0ACh 00401029 push ebx 0040102A push esi 0040102B push edi 0040102C lea edi,[ebp-0ACh] 00401032 mov ecx,2Bh 00401037 mov eax,0CCCCCCCCh 0040103C rep stos dword ptr [edi] 21: unsigned char buf[100] = {0}; 0040103E mov byte ptr [ebp-64h],0 00401042 mov ecx,18h 00401047 xor eax,eax 00401049 lea edi,[ebp-63h] 0040104C rep stos dword ptr [edi] 0040104E stos word ptr [edi] 00401050 stos byte ptr [edi] 22: myunion *uns = (myunion *)buf; 00401051 lea eax,[ebp-64h] 00401054 mov dword ptr [ebp-68h],eax 23: 24: int i; 25: 26: for(i = 0; i < 5; i++) 00401057 mov dword ptr [ebp-6Ch],0 0040105E jmp func+49h (00401069) 00401060 mov ecx,dword ptr [ebp-6Ch] 00401063 add ecx,1 00401066 mov dword ptr [ebp-6Ch],ecx 00401069 cmp dword ptr [ebp-6Ch],5 0040106D jge func+83h (004010a3) 27: { 28: uns[i].s.a=0; 0040106F mov edx,dword ptr [ebp-6Ch] 00401072 imul edx,edx,0Ch 00401075 mov eax,dword ptr [ebp-68h] 00401078 mov dword ptr [eax+edx],0 29: uns[i].s.b = 1; 0040107F mov ecx,dword ptr [ebp-6Ch] 00401082 imul ecx,ecx,0Ch 00401085 mov edx,dword ptr [ebp-68h] 00401088 mov dword ptr [edx+ecx+4],1 30: uns[i].e[2] = ENUM_4; 00401090 mov eax,dword ptr [ebp-6Ch] 00401093 imul eax,eax,0Ch 00401096 mov ecx,dword ptr [ebp-68h] 00401099 mov dword ptr [ecx+eax+8],4 31: } 004010A1 jmp func+40h (00401060) 32: 33: return 0; 004010A3 xor eax,eax 34: } 004010A5 pop edi 004010A6 pop esi 004010A7 pop ebx 004010A8 mov esp,ebp 004010AA pop ebp 004010AB ret
我们发现这段代码和上面的汇编后代码基本一样,因此我们知道,汇编中对共用体和枚举类型没有特别的处理
并不会引入新的代码,因为共用体和枚举都是方便给程序员用的,本质没什么改变
其实上面这些控制语句,对反汇编来说很容易分析,逆向工程中最令人蛋疼的是算法
一个3*3矩阵算法的逆向分析
main函数
int main() 13: { 0040B640 push ebp 0040B641 mov ebp,esp 0040B643 sub esp,0ACh 0040B649 push ebx 0040B64A push esi 0040B64B push edi 0040B64C lea edi,[ebp-0ACh] 0040B652 mov ecx,2Bh 0040B657 mov eax,0CCCCCCCCh 0040B65C rep stos dword ptr [edi] 14: int a[3][3] = {{1,2,3},{2,3,4},{3,4,5}}; 0040B65E mov dword ptr [ebp-24h],1 0040B665 mov dword ptr [ebp-20h],2 0040B66C mov dword ptr [ebp-1Ch],3 0040B673 mov dword ptr [ebp-18h],2 0040B67A mov dword ptr [ebp-14h],3 0040B681 mov dword ptr [ebp-10h],4 0040B688 mov dword ptr [ebp-0Ch],3 0040B68F mov dword ptr [ebp-8],4 0040B696 mov dword ptr [ebp-4],5 15: int b[3][3] = {{2,3,4},{2,4,1},{6,2,1}}; 0040B69D mov dword ptr [ebp-48h],2 0040B6A4 mov dword ptr [ebp-44h],3 0040B6AB mov dword ptr [ebp-40h],4 0040B6B2 mov dword ptr [ebp-3Ch],2 0040B6B9 mov dword ptr [ebp-38h],4 0040B6C0 mov dword ptr [ebp-34h],1 0040B6C7 mov dword ptr [ebp-30h],6 0040B6CE mov dword ptr [ebp-2Ch],2 0040B6D5 mov dword ptr [ebp-28h],1 16: int c[3][3]; 17: 18: func(a,b,c); 0040B6DC lea eax,[ebp-6Ch] 0040B6DF push eax 0040B6E0 lea ecx,[ebp-48h] 0040B6E3 push ecx 0040B6E4 lea edx,[ebp-24h] 0040B6E7 push edx 0040B6E8 call @ILT+5(_func) (0040100a) 0040B6ED add esp,0Ch 19: 20: return 0; 0040B6F0 xor eax,eax 21: } 0040B6F2 pop edi 0040B6F3 pop esi 0040B6F4 pop ebx 0040B6F5 add esp,0ACh 0040B6FB cmp ebp,esp 0040B6FD call __chkesp (00401130) 0040B702 mov esp,ebp 0040B704 pop ebp 0040B705 ret
算法函数:
1: int func(int a[3][3],int b[3][3],int c[3][3]) 2: { 0040B580 push ebp 0040B581 mov ebp,esp 0040B583 sub esp,48h 0040B586 push ebx 0040B587 push esi 0040B588 push edi 0040B589 lea edi,[ebp-48h] 0040B58C mov ecx,12h 0040B591 mov eax,0CCCCCCCCh 0040B596 rep stos dword ptr [edi] 3: int i,j; 4: for(i = 0 ; i < 3; i++) 0040B598 mov dword ptr [ebp-4],0 0040B59F jmp func+2Ah (0040b5aa) 0040B5A1 mov eax,dword ptr [ebp-4] 0040B5A4 add eax,1 0040B5A7 mov dword ptr [ebp-4],eax 0040B5AA cmp dword ptr [ebp-4],3 0040B5AE jge func+0AAh (0040b62a) 5: { 6: for(j = 0 ; j < 3; j ++) 0040B5B0 mov dword ptr [ebp-8],0 0040B5B7 jmp func+42h (0040b5c2) 0040B5B9 mov ecx,dword ptr [ebp-8] 0040B5BC add ecx,1 0040B5BF mov dword ptr [ebp-8],ecx 0040B5C2 cmp dword ptr [ebp-8],3 0040B5C6 jge func+0A5h (0040b625) 7: c[i][j] = a[i][0]*b[0][j]+a[i][1]*b[1][j]+a[i][2]*b[2][j]; 0040B5C8 mov edx,dword ptr [ebp-4] 0040B5CB imul edx,edx,0Ch 0040B5CE mov eax,dword ptr [ebp+8] 0040B5D1 mov ecx,dword ptr [ebp-8] 0040B5D4 mov esi,dword ptr [ebp+0Ch] 0040B5D7 mov edx,dword ptr [eax+edx] 0040B5DA imul edx,dword ptr [esi+ecx*4] 0040B5DE mov eax,dword ptr [ebp-4] 0040B5E1 imul eax,eax,0Ch 0040B5E4 mov ecx,dword ptr [ebp+8] 0040B5E7 mov esi,dword ptr [ebp-8] 0040B5EA mov edi,dword ptr [ebp+0Ch] 0040B5ED mov eax,dword ptr [ecx+eax+4] 0040B5F1 imul eax,dword ptr [edi+esi*4+0Ch] 0040B5F6 add edx,eax 0040B5F8 mov ecx,dword ptr [ebp-4] 0040B5FB imul ecx,ecx,0Ch 0040B5FE mov eax,dword ptr [ebp+8] 0040B601 mov esi,dword ptr [ebp-8] 0040B604 mov edi,dword ptr [ebp+0Ch] 0040B607 mov ecx,dword ptr [eax+ecx+8] 0040B60B imul ecx,dword ptr [edi+esi*4+18h] 0040B610 add edx,ecx 0040B612 mov eax,dword ptr [ebp-4] 0040B615 imul eax,eax,0Ch 0040B618 mov ecx,dword ptr [ebp+10h] 0040B61B add ecx,eax 0040B61D mov eax,dword ptr [ebp-8] 0040B620 mov dword ptr [ecx+eax*4],edx 0040B623 jmp func+39h (0040b5b9) 8: } 0040B625 jmp func+21h (0040b5a1) 9: return 0; 0040B62A xor eax,eax 10: } 0040B62C pop edi 0040B62D pop esi 0040B62E pop ebx 0040B62F mov esp,ebp 0040B631 pop ebp 0040B632 ret
从上面的代码我们可以看出,汇编对Debug模式的二位数组操作方式如下:
mov eax,<数组元素下表> imul eax,eax,<结构体的大小> mov ecx,<结构体开始地址> mov eax,dword ptr [ecx+eax] 访问内部变量的时候,还要加上数字 mov eax,dword ptr [ecx+eax+0CH]