Mixing x86 with x64 code (混合编写x86和x64代码)
几个月前我小小的研究了在WOW64下的32位进程中运行native x64代码。
第二个设想是在64位进程下运行x86代码。它们都是可以的,如我google的一样,
已经有人在使用这两种方法了:
- http://vx.netlux.org/lib/vrg02.html
- http://www.corsix.org/content/dll-injection-and-wow64
- http://int0h.wordpress.com/2009/12/24/the-power-of-wow64/
- http://int0h.wordpress.com/2011/02/22/anti-anti-debugging-via-wow64/
x86 <-> x64 Transition(x86和x64之间的转换)
最早的来检查x86到x64转换的方法是观察windows中32位版本和64位版本的ntdll.dll中的任意syscall:
32-bits ntdll from Win7 x86 | 32-bits ntdll from Win7 x64 |
mov eax, X mov edx, 7FFE0300h call dword ptr [edx] ;ntdll.KiFastSystemCall retn Z |
mov eax, X mov ecx, Y lea edx, [esp+4] call dword ptr fs:[0C0h] ;wow64cpu!X86SwitchTo64BitMode add esp, 4 ret Z |
wow64cpu!X86SwitchTo64BitMode: 748c2320 jmp 0033:748C271E ;wow64cpu!CpupReturnFromSimulatedCode这就是64位Windows系统上转换x64和x86后面的魔术。
此外他也能在非WoW64进程中运行(标准的native 64位应用程序),所以32位代码也能运行在64位应用程序中。
总结一下,运行在64位Windows中的每个进程(x86和x64),都分配了两个代码段:
- cs = 0×23 -> x86 mode
- cs = 0×33 -> x64 mode
Running x64 code inside 32-bits process(在32位进程中运行x64代码)
首先,我准备了一些宏,将用它来标记64位代码的开始和结尾:
#define EM(a) __asm __emit (a) #define X64_Start_with_CS(_cs) \ { \ EM(0x6A) EM(_cs) /* push _cs */ \ EM(0xE8) EM(0) EM(0) EM(0) EM(0) /* call $+5 */ \ EM(0x83) EM(4) EM(0x24) EM(5) /* add dword [esp], 5 */ \ EM(0xCB) /* retf */ \ } #define X64_End_with_CS(_cs) \ { \ EM(0xE8) EM(0) EM(0) EM(0) EM(0) /* call $+5 */ \ EM(0xC7) EM(0x44) EM(0x24) EM(4) /* */ \ EM(_cs) EM(0) EM(0) EM(0) /* mov dword [rsp + 4], _cs */ \ EM(0x83) EM(4) EM(0x24) EM(0xD) /* add dword [rsp], 0xD */ \ EM(0xCB) /* retf */ \ } #define X64_Start() X64_Start_with_CS(0x33) #define X64_End() X64_End_with_CS(0x23)执行完X64_Start()宏后,CPU直接转换到x64模式,执行完X64_End()宏后立即回到x86模式。
由于远返回的opcode,以上宏都是位置独立的。
能够调用x64版本的APIs是非常有用的。我尝试加载过x64版本的kernel32.dll,他不是一个微不足道的任务,
并且我失败了,所以我需要坚持使用Native API。x64版本的kernel32.dll的主要问题是在已经加载x86版本的
kernel32.dll的情况下,x64 kernel32.dll 有一些额外的检查来阻止正常的加载。我相信通过一些猥琐的hook
来拦截kernel32!BaseDllInitialize能达到目的,但是这是非常复杂的任务。当我开始研究的时候,我是在WIndows
Vista上,并且我能加载(用一些hacks)64位版本的kernel32和user32库,但是他们没有完整的功能,同时我又
转换到Windows7,使用在Vista上的方法不能够正常工作了。
让我们回到主题上,为了使用Native APIs,我需要定位内存中你给的x64版本的ntdll.dll。为了完成这个任务,
我需要解析_PEB_LDR_DATA结构中的InLoadOrderModuleList。64位的_PEB被64位的_TEB包含,并且64位_TEB
类似于x86平台的(在x64上我们需要使用gs段代替fs):
mov eax, gs:[0x30]他甚至可以更简单,因为 wow64cpu!CpuSimulate(负责转换CPU到x86模式的函数)将gs:[0x30]的值移动到r12寄存器中,
所以我们的getTEB64()版本看起来像这样:
//to fool M$ inline asm compiler I'm using 2 DWORDs instead of DWORD64 //use of DWORD64 will generate wrong 'pop word ptr[]' and it will break stack union reg64 { DWORD dw[2]; DWORD64 v; }; //macro that simplifies pushing x64 registers #define X64_Push(r) EM(0x48 | ((r) >> 3)) EM(0x50 | ((r) & 7)) WOW64::TEB64* getTEB64() { reg64 reg; reg.v = 0; X64_Start(); //R12 register should always contain pointer to TEB64 in WoW64 processes X64_Push(_R12); //below pop will pop QWORD from stack, as we're in x64 mode now __asm pop reg.dw[0] X64_End(); //upper 32 bits should be always 0 in WoW64 processes if (reg.dw[1] != 0) return 0; return (WOW64::TEB64*)reg.dw[0]; }
WOW64名字空间定义在"os_structs.h"文件中,随后将会和其他示例代码添加到文章尾部。
负责定位64位ntdll.dll函数定义如下:
DWORD getNTDLL64() { static DWORD ntdll64 = 0; if (ntdll64 != 0) return ntdll64; WOW64::TEB64* teb64 = getTEB64(); WOW64::PEB64* peb64 = teb64->ProcessEnvironmentBlock; WOW64::PEB_LDR_DATA64* ldr = peb64->Ldr; printf("TEB: %08X\n", (DWORD)teb64); printf("PEB: %08X\n", (DWORD)peb64); printf("LDR: %08X\n", (DWORD)ldr); printf("Loaded modules:\n"); WOW64::LDR_DATA_TABLE_ENTRY64* head = \ (WOW64::LDR_DATA_TABLE_ENTRY64*)ldr->InLoadOrderModuleList.Flink; do { printf(" %ws\n", head->BaseDllName.Buffer); if (memcmp(head->BaseDllName.Buffer, L"ntdll.dll", head->BaseDllName.Length) == 0) { ntdll64 = (DWORD)head->DllBase; } head = (WOW64::LDR_DATA_TABLE_ENTRY64*)head->InLoadOrderLinks.Flink; } while (head != (WOW64::LDR_DATA_TABLE_ENTRY64*)&ldr->InLoadOrderModuleList); printf("NTDLL x64: %08X\n", ntdll64); return ntdll64; }为了完整支持x64 Native API调用,我们还需要等价于GetProcAddress的函数,通过ntdll!LdrGetProcedureAddress更容易
的交流。下面代码负责获取LdrGetProcedureAddress的地址:
DWORD getLdrGetProcedureAddress() { BYTE* modBase = (BYTE*)getNTDLL64(); IMAGE_NT_HEADERS64* inh = \ (IMAGE_NT_HEADERS64*)(modBase + ((IMAGE_DOS_HEADER*)modBase)->e_lfanew); IMAGE_DATA_DIRECTORY& idd = \ inh->OptionalHeader.DataDirectory[IMAGE_DIRECTORY_ENTRY_EXPORT]; if (idd.VirtualAddress == 0) return 0; IMAGE_EXPORT_DIRECTORY* ied = \ (IMAGE_EXPORT_DIRECTORY*)(modBase + idd.VirtualAddress); DWORD* rvaTable = (DWORD*)(modBase + ied->AddressOfFunctions); WORD* ordTable = (WORD*)(modBase + ied->AddressOfNameOrdinals); DWORD* nameTable = (DWORD*)(modBase + ied->AddressOfNames); //lazy search, there is no need to use binsearch for just one function for (DWORD i = 0; i < ied->NumberOfFunctions; i++) { if (strcmp((char*)modBase + nameTable[i], "LdrGetProcedureAddress")) continue; else return (DWORD)(modBase + rvaTable[ordTable[i]]); } return 0; }
为了锦上添花,我将介绍有用的函数,能让我在x86的C/C++代码中直接的调用x64 Native APIs:
DWORD64 X64Call(DWORD func, int argC, ...) { va_list args; va_start(args, argC); DWORD64 _rcx = (argC > 0) ? argC--, va_arg(args, DWORD64) : 0; DWORD64 _rdx = (argC > 0) ? argC--, va_arg(args, DWORD64) : 0; DWORD64 _r8 = (argC > 0) ? argC--, va_arg(args, DWORD64) : 0; DWORD64 _r9 = (argC > 0) ? argC--, va_arg(args, DWORD64) : 0; reg64 _rax; _rax.v = 0; DWORD64 restArgs = (DWORD64)&va_arg(args, DWORD64); //conversion to QWORD for easier use in inline assembly DWORD64 _argC = argC; DWORD64 _func = func; DWORD back_esp = 0; __asm { ;//keep original esp in back_esp variable mov back_esp, esp ;//align esp to 8, without aligned stack some syscalls ;//may return errors ! and esp, 0xFFFFFFF8 X64_Start(); ;//fill first four arguments push _rcx X64_Pop(_RCX); push _rdx X64_Pop(_RDX); push _r8 X64_Pop(_R8); push _r9 X64_Pop(_R9); push edi push restArgs X64_Pop(_RDI); push _argC X64_Pop(_RAX); ;//put rest of arguments on the stack test eax, eax jz _ls_e lea edi, dword ptr [edi + 8*eax - 8] _ls: test eax, eax jz _ls_e push dword ptr [edi] sub edi, 8 sub eax, 1 jmp _ls _ls_e: ;//create stack space for spilling registers sub esp, 0x20 call _func ;//cleanup stack push _argC X64_Pop(_RCX); lea esp, dword ptr [esp + 8*ecx + 0x20] pop edi ;//set return value X64_Push(_RAX); pop _rax.dw[0] X64_End(); mov esp, back_esp } return _rax.v; }函数有一点长,但是有注释,并且整个想法也是非常简单的。第一个参数是我们想调用的x64函数地址,第二个参数是指定函数
需要的参数个数,其他的参数依赖于被调用的函数,所有的参数都应该转换成DWORD64。调用X64Call的一个小例子:
DWORD64 GetProcAddress64(DWORD module, char* funcName) { static DWORD _LdrGetProcedureAddress = 0; if (_LdrGetProcedureAddress == 0) { _LdrGetProcedureAddress = getLdrGetProcedureAddress(); printf("LdrGetProcedureAddress: %08X\n", _LdrGetProcedureAddress); if (_LdrGetProcedureAddress == 0) return 0; } WOW64::ANSI_STRING64 fName = { 0 }; fName.Buffer = funcName; fName.Length = strlen(funcName); fName.MaximumLength = fName.Length + 1; DWORD64 funcRet = 0; X64Call(_LdrGetProcedureAddress, 4, (DWORD64)module, (DWORD64)&fName, (DWORD64)0, (DWORD64)&funcRet); printf("%s: %08X\n", funcName, (DWORD)funcRet); return funcRet; }
Running x86 code inside 64-bits process(在64位进程中运行x86代码)
X86_Start MACRO LOCAL xx, rt call $+5 xx equ $ mov dword ptr [rsp + 4], 23h add dword ptr [rsp], rt - xx retf rt: ENDM X86_End MACRO db 6Ah, 33h ; push 33h db 0E8h, 0, 0, 0, 0 ; call $+5 db 83h, 4, 24h, 5 ; add dword ptr [esp], 5 db 0CBh ; retf ENDM
Ending notes
文章中使用到的源码链接:http://download.csdn.net/detail/u014249041/7074553
http://download.csdn.net/detail/u014249041/7074555
原文地址: