APC 篇—— APC 执行
写在前面
此系列是本人一个字一个字码出来的,包括示例和实验截图。由于系统内核的复杂性,故可能有错误或者不全面的地方,如有错误,欢迎批评指正,本教程将会长期更新。 如有好的建议,欢迎反馈。码字不易,如果本篇文章有帮助你的,如有闲钱,可以打赏支持我的创作。如想转载,请把我的转载信息附在文章后面,并声明我的个人信息和本人博客地址即可,但必须事先通知我。
你如果是从中间插过来看的,请仔细阅读 羽夏看Win系统内核——简述 ,方便学习本教程。
看此教程之前,问几个问题,基础知识储备好了吗?保护模式篇学会了吗?练习做完了吗?没有的话就不要继续了。
🔒 华丽的分割线 🔒
QueueUserAPC 逆向分析
我们逆向该函数的目的是把它是如何调用进入内核,如何初始化APC
,如何插入APC
。为了节约篇幅,增加可读性,故使用经过重命名好的伪C代码进行讲解,你的命名可能和我的不太一样,如果不知道类型和变量含义,可以参考WRK
进行。
QueueUserAPC
这个函数在kernel32
这个Dll
中,我们来看看它的伪代码:
DWORD __stdcall QueueUserAPC(PAPCFUNC pfnAPC, HANDLE hThread, ULONG_PTR dwData)
{
NTSTATUS status; // eax
int ContextInfo; // eax
DWORD result; // eax
int pvBuffer; // [esp+4h] [ebp-8h] BYREF
int v7; // [esp+8h] [ebp-4h]
pvBuffer = 0;
v7 = 0;
status = RtlQueryInformationActivationContext(
RTL_QUERY_ACTIVATION_CONTEXT_FLAG_USE_ACTIVE_ACTIVATION_CONTEXT,
0,
0,
ActivationContextBasicInformation,
&pvBuffer,
8u,
0);
if ( status < 0 )
{
DbgPrint(
"SXS: %s failing because RtlQueryInformationActivationContext() returned status %08lx\n",
"QueueUserAPC",
status);
result = 0;
}
else
{
ContextInfo = pvBuffer;
if ( (v7 & 1) != 0 )
ContextInfo = -1;
result = NtQueueApcThread(hThread, BaseDispatchAPC, pfnAPC, dwData, ContextInfo) >= 0;
}
return result;
}
RtlQueryInformationActivationContext
这个名字贼长的函数不知道是干啥的,经过查阅是查询RTL
以获取有关当前激活上下文的信息,激活上下文简单就是manifest
里面描述信息封装成的结构体,对于该函数也不知道有啥用,就先略过,把重心放到NtQueueApcThread
上面。
BaseDispatchAPC
是个函数,通过函数名是派发APC
执行的函数,点击去看看其伪代码:
void __fastcall BaseDispatchAPC(struct _RTL_CALLER_ALLOCATED_ACTIVATION_CONTEXT_STACK_FRAME_EXTENDED *a1, void *a2, PVOID NormalContext, PVOID SystemArgument1, PVOID SystemArgument2)
{
PRTL_CALLER_ALLOCATED_ACTIVATION_CONTEXT_STACK_FRAME_EXTENDED v5; // ecx
int v6; // [esp+0h] [ebp-3Ch]
int v7[5]; // [esp+Ch] [ebp-30h] BYREF
HANDLE handle; // [esp+20h] [ebp-1Ch]
CPPEH_RECORD ms_exc; // [esp+24h] [ebp-18h]
v7[0] = 20;
v7[1] = 1;
v7[2] = 0;
v7[3] = 0;
v7[4] = 0;
handle = SystemArgument2;
if ( SystemArgument2 == -1 )
{
(NormalContext)(a1, a2, SystemArgument1);
}
else
{
RtlActivateActivationContextUnsafeFast(a1, a2);
ms_exc.registration.TryLevel = 0;
(NormalContext)(SystemArgument1, v7, SystemArgument2, v6);
ms_exc.registration.TryLevel = -1;
RtlDeactivateActivationContextUnsafeFast(v5);
RtlReleaseActivationContext(handle);
}
}
经过分析,该函数会把我们想要执行的函数地址是NormalContext
参数,传入的参数就是参数1,激活上下文变成参数2。我们把注意力放到该函数:
NTSTATUS __stdcall NtQueueApcThread(HANDLE ThreadHandle, PKNORMAL_ROUTINE ApcRoutine, PVOID NormalContext, PVOID SystemArgument1, PVOID SystemArgument2)
{
PETHREAD Thread; // ebx
struct _KAPC *apc; // eax
struct _KAPC *apc_1; // edi
HANDLE v9; // [esp+14h] [ebp+8h] FORCED
Thread = ObReferenceObjectByHandle(
ThreadHandle,
0x10u,
PsThreadType,
KeGetCurrentThread()->PreviousMode,
&ThreadHandle,
0);
if ( Thread >= 0 )
{
Thread = 0;
if ( (*(ThreadHandle + 584) & 0x10) != 0 ) // PETHREAD->CrossThreadFlags
{
Thread = STATUS_INVALID_HANDLE;
}
else
{
apc = ExAllocatePoolWithQuotaTag(8, 0x30u, 'pasP');
apc_1 = apc;
if ( apc )
{
KeInitializeApc(apc, v9, OriginalApcEnvironment, IopDeallocateApc, 0, ApcRoutine, 1, NormalContext);
if ( !KeInsertQueueApc(apc_1, SystemArgument1, SystemArgument2, 0) )
{
ExFreePoolWithTag(apc_1, 0);
Thread = STATUS_UNSUCCESSFUL;
}
}
else
{
Thread = STATUS_NO_MEMORY;
}
}
ObfDereferenceObject(v9);
}
return Thread;
}
可以看出,如果线程句柄被正常的转化为线程结构体,就会初始化APC
,然后插入APC
,全部的流程就这些。但这个思考题并没有完成,我们还得弄懂是如何初始化的,如何插入的。先看初始化:
PKAPC __stdcall KeInitializeApc(PKAPC Apc, PKTHREAD Thread, _KAPC_ENVIRONMENT Environment, int KernelRoutine, int RundownRoutine, int NormalRoutine, char ApcMode, PVOID NormalContext)
{
PKAPC result; // eax
char ApcStateIndex; // dl
result = Apc;
ApcStateIndex = Environment;
Apc->Type = 0x12;
Apc->Size = 0x30;
if ( Environment == CurrentApcEnvironment )
ApcStateIndex = Thread->ApcStateIndex;
Apc->Thread = Thread;
Apc->KernelRoutine = KernelRoutine;
Apc->ApcStateIndex = ApcStateIndex;
Apc->RundownRoutine = RundownRoutine;
Apc->NormalRoutine = NormalRoutine;
if ( NormalRoutine )
{
Apc->ApcMode = ApcMode;
Apc->NormalContext = NormalContext;
}
else
{
Apc->ApcMode = 0;
Apc->NormalContext = 0;
}
Apc->Inserted = 0;
return result;
}
初始化代码函数十分简单,我就不赘述了。再来看看如何插入:
char __stdcall KeInsertQueueApc(PKAPC apc, PVOID SystemArgument1, PVOID SystemArgument2, KPRIORITY Increment)
{
_KTHREAD *thread; // esi
char res; // bl
struct _KLOCK_QUEUE_HANDLE LockHandle; // [esp+Ch] [ebp-Ch] BYREF
thread = apc->Thread;
KeAcquireInStackQueuedSpinLockRaiseToSynch(&thread->ApcQueueLock, &LockHandle);
res = 0;
if ( thread->ApcQueueable )
{
apc->SystemArgument1 = SystemArgument1;
apc->SystemArgument2 = SystemArgument2;
res = KiInsertQueueApc(apc, Increment);
}
KeReleaseInStackQueuedSpinLock(&LockHandle);
return res;
}
这个插入函数也是挺简单的,加了一个APC
队列自旋锁,然后调用KiInsertQueueApc
实现,然后再释放。我们看看这个函数是咋实现的:
char __fastcall KiInsertQueueApc(PKAPC apc, KPRIORITY Increment)
{
// [COLLAPSED LOCAL DECLARATIONS. PRESS KEYPAD CTRL-"+" TO EXPAND]
Inserted = apc->Inserted == 0;
thread = apc->Thread;
if ( !Inserted )
return 0;
if ( apc->ApcStateIndex == 3 )
apc->ApcStateIndex = thread->ApcStateIndex;
ApcState = thread->ApcStatePointer[apc->ApcStateIndex];
ApcMode = apc->ApcMode;
if ( apc->NormalRoutine )
{
if ( ApcMode && apc->KernelRoutine == PsExitSpecialApc )
{
thread->ApcState.UserApcPending = 1;
v8 = &ApcState->ApcListHead[ApcMode];
v9 = v8->Flink;
apc->ApcListEntry.Flink = v8->Flink;
apc->ApcListEntry.Blink = v8;
v9->Blink = &apc->ApcListEntry;
v8->Flink = &apc->ApcListEntry;
}
else
{
v10 = &ApcState->ApcListHead[ApcMode];
v11 = v10->Blink;
apc->ApcListEntry.Flink = v10;
apc->ApcListEntry.Blink = v11;
v11->Flink = &apc->ApcListEntry;
v10->Blink = &apc->ApcListEntry;
}
}
else
{
v12 = &ApcState->ApcListHead[ApcMode];
for ( i = v12->Blink; i != v12 && i[2].Flink; i = i->Blink )
;
v14 = i->Flink;
apc->ApcListEntry.Flink = i->Flink;
apc->ApcListEntry.Blink = i;
v14->Blink = &apc->ApcListEntry;
i->Flink = &apc->ApcListEntry;
}
ApcStateIndex = apc->ApcStateIndex;
apc->Inserted = 1;
if ( ApcStateIndex == thread->ApcStateIndex )
{
if ( ApcMode )
{
if ( thread->State == Waiting
&& thread->WaitMode == UserMode
&& (thread->Alertable || thread->ApcState.UserApcPending) )
{
thread->ApcState.UserApcPending = 1;
KiUnwaitThread(thread, STATUS_USER_APC, Increment, 0);
}
}
else
{
State = thread->State;
thread->ApcState.KernelApcPending = 1;
if ( State == 2 )
{
LOBYTE(thread) = 1;
HalRequestSoftwareInterrupt(thread);
}
else if ( State == 5
&& !thread->WaitIrql
&& (!apc->NormalRoutine || !thread->KernelApcDisable && !thread->ApcState.KernelApcInProgress) )
{
KiUnwaitThread(thread, STATUS_KERNEL_APC, Increment, 0);
}
}
}
return 1;
}
如下部分就是插入用户APC
的部分:
if ( ApcMode && apc->KernelRoutine == PsExitSpecialApc )
{
thread->ApcState.UserApcPending = 1;
v8 = &ApcState->ApcListHead[ApcMode];
v9 = v8->Flink;
apc->ApcListEntry.Flink = v8->Flink;
apc->ApcListEntry.Blink = v8;
v9->Blink = &apc->ApcListEntry;
v8->Flink = &apc->ApcListEntry;
}
else
{
v10 = &ApcState->ApcListHead[ApcMode];
v11 = v10->Blink;
apc->ApcListEntry.Flink = v10;
apc->ApcListEntry.Blink = v11;
v11->Flink = &apc->ApcListEntry;
v10->Blink = &apc->ApcListEntry;
}
本思考题最终结束,当然里面也有很多细节并没有介绍,将在总结与提升介绍。
无法被唤醒的线程 APC 分析
这个实验应该也挺好做的,结论是执行两个,代码验证如下:
#include "stdafx.h"
#include <stdlib.h>
#define _WIN32_WINNT 0x400
#include <windows.h>
VOID WINAPI APCProc(ULONG Param)
{
printf("APC…… 0x%X \n",Param);
}
DWORD WINAPI ThreadProc(VOID* Param)
{
for (int i =0 ;i<8;i++)
{
Sleep(1000);
printf("Running\n");
}
SleepEx(1,TRUE);
return 0;
}
int main(int argc, char* argv[])
{
HANDLE hTread=CreateThread(NULL,NULL,ThreadProc,NULL,NULL,NULL);
Sleep(3000);
QueueUserAPC(APCProc,hTread,1);
QueueUserAPC(APCProc,hTread,2);
system("pause");
CloseHandle(hTread);
return 0;
}
效果如下:
APC 的执行流程
APC
的执行流程还是挺复杂的,需要大量的逆向。如果发现自己不懂的地方,请参考WRK
,里面的参数类型和变量类型都有详细的参考。里面有比较多未知的东西,不会的话,强烈推荐参考WRK
。下面我们分如下三部分讲解:合适执行APC
、内核APC
执行流程、用户APC
的执行流程。注意,如果真想学会,请一定要在看完何时执行APC
之后的内容就不要再看了,自己完整的把KiDeliverApc
这个处理APC
函数完整的逆一遍,不会或者完成了后,回来再看。逆向可能需要花费3个小时左右,自己要有心理准备。
何时执行 APC
我们都知道,APC
函数的执行与插入并不是同一个线程,具体点说在A线程中向B线程插入一个APC
,插入的动作是在A线程中完成的,但什么时候执行则由B线程决定,所以叫“异步过程调用”。内核APC
函数与用户APC
函数的执行时间和执行方式也有区别,下面我们来看看线程是何时处理APC
的:
SwapContext
这个函数我们都很熟悉,在学习线程切换的时候重点研究的,咱把APC
相关的东西拿出来看看:
cmp [esi+_KTHREAD.ApcState.KernelApcPending], 0
jnz short loc_46A9BD
popf
xor eax, eax
retn
; ---------------------------------------------------------------------------
loc_46A9BD: ; CODE XREF: SwapContext+D7↑j
popf
jnz short loc_46A9C3
mov al, 1
retn
之前说过KernelApcPending
是表示有没有内核APC
处理的标志,如果是1,也就是有的话,就会使返回值置1;反之置0。也就是说,这个函数的返回值是有意义的。我们再往上一级函数看看:
; __fastcall KiSwapContext(x)
@KiSwapContext@4 proc near ; CODE XREF: KiSwapThread()+41↑p
var_200FE4 = dword ptr -200FE4h
var_10 = dword ptr -10h
var_C = dword ptr -0Ch
var_8 = dword ptr -8
var_4 = dword ptr -4
sub esp, 10h
mov [esp+10h+var_4], ebx
mov [esp+10h+var_8], esi
mov [esp+10h+var_C], edi
mov [esp+10h+var_10], ebp
mov ebx, ds:0FFDFF01Ch ; ebx = &_KPCR
mov esi, ecx ; esi = ecx = NextReadyThread
mov edi, [ebx+_KPCR.PrcbData.CurrentThread] ; oldThread
mov [ebx+_KPCR.PrcbData.CurrentThread], esi
mov cl, [edi+_KTHREAD.WaitIrql]
call SwapContext
mov ebp, [esp+10h+var_10]
mov edi, [esp+10h+var_C]
mov esi, [esp+10h+var_8]
mov ebx, [esp+10h+var_4]
add esp, 10h
retn
@KiSwapContext@4 endp
这一级函数并没有用到返回值,也没有操作eax
,说明就没有用到吗?我们再往上一级看看:
loc_429CCF: ; CODE XREF: KiSwapThread()+1A↑j
mov ecx, eax ; 从这里判断出这个参数为一个线程结构体 ecx 传参
call @KiSwapContext@4 ; KiSwapContext(x)
test al, al
mov cl, [edi+58h]
mov edi, [edi+54h]
mov esi, ds:__imp_@KfLowerIrql@4 ; KfLowerIrql(x)
jz short loc_429CF6
mov cl, 1 ; NewIrql
call esi ; KfLowerIrql(x) ; KfLowerIrql(x)
xor eax, eax
push eax ; trapframe
push eax ; unknown
push eax ; CanUserMode
call _KiDeliverApc@12 ; KiDeliverApc(x,x,x)
xor cl, cl ; NewIrql
loc_429CF6: ; CODE XREF: KiSwapThread()+54↑j
call esi ; KfLowerIrql(x) ; KfLowerIrql(x)
mov eax, edi
pop edi
pop esi
retn
看到test al, al
这条汇编了吗?如果为1的话,也就是有内核APC
执行,就会继续往下走,调用KiDeliverApc
这个函数,也就是我们前面提到的处理APC
的函数,传递的三个参数都是0。
KiServiceExit
如果你比较仔细的话,这个函数你也是眼熟的,就是我们在学习系统调用时看到的函数。不过当时由于知识的限制没有要求。这个函数是系统调用、中断异常必经之处,我们来看看与APC
相关的汇编:
cli
test dword ptr [ebp+70h], 20000h
jnz short loc_466659
test byte ptr [ebp+6Ch], 1
jz short loc_4666B0
loc_466659: ; CODE XREF: _KiServiceExit+8↑j
; _KiServiceExit+63↓j
mov ebx, ds:0FFDFF124h
mov byte ptr [ebx+2Eh], 0
cmp byte ptr [ebx+4Ah], 0
jz short loc_4666B0
mov ebx, ebp
mov [ebx+44h], eax
mov dword ptr [ebx+50h], 3Bh ; ';'
mov dword ptr [ebx+38h], 23h ; '#'
mov dword ptr [ebx+34h], 23h ; '#'
mov dword ptr [ebx+30h], 0
mov ecx, 1 ; NewIrql
call ds:__imp_@KfRaiseIrql@4 ; KfRaiseIrql(x)
push eax
sti
push ebx ; trapframe
push 0 ; unknown
push 1 ; CanUserAPC
call _KiDeliverApc@12 ; KiDeliverApc(x,x,x)
pop ecx ; NewIrql
call ds:__imp_@KfLowerIrql@4 ; KfLowerIrql(x)
mov eax, [ebx+44h]
cli
jmp short loc_466659
可以看出传递参数的不同,第一个参数是1,第二个参数是0,第三个就是所谓的TrapFrame
。
内核 APC 执行流程
内核APC
执行算是简单的。不过为了讲解方便,我们需要把KiDeliverApc
的伪代码放上:
_KTRAP_FRAME *__stdcall KiDeliverApc(BOOLEAN CanUserAPC, int unknown, _KTRAP_FRAME *trapframe)
{
// [COLLAPSED LOCAL DECLARATIONS. PRESS KEYPAD CTRL-"+" TO EXPAND]
if ( trapframe )
{
_eip = trapframe->Eip;
if ( _eip >= ExpInterlockedPopEntrySListResume && _eip <= ExpInterlockedPopEntrySListEnd )
trapframe->Eip = ExpInterlockedPopEntrySListResume;
}
currentThread = KeGetCurrentThread();
tmpTrapFrame = currentThread->Tcb.TrapFrame;
v5 = currentThread->Tcb.ApcState.Process;
currentThread->Tcb.TrapFrame = trapframe;
APCDeliverProcess = v5;
KeAcquireInStackQueuedSpinLock(¤tThread->Tcb.ApcQueueLock, &LockHandle);
currentThread->Tcb.ApcState.KernelApcPending = 0;
ApcListEntry = (currentThread + offsetof(_KTHREAD, ApcState));
while ( !IsListEmpty(ApcListEntry) )
{
plist = ApcListEntry->Flink;
kpac = CONTAINING_RECORD(ApcListEntry->Flink, _KAPC, ApcListEntry);
KernelRoutine = kpac->KernelRoutine;
NormalRoutine = kpac->NormalRoutine;
WaitStatus = kpac->NormalContext;
SystemArgument1 = kpac->SystemArgument1;
SystemArgument2 = kpac->SystemArgument2;
if ( NormalRoutine )
{
if ( currentThread->Tcb.ApcState.KernelApcInProgress || currentThread->Tcb.KernelApcDisable )
goto NoUserAPC;
RemoveEntryList(plist);
kpac->Inserted = 0;
KeReleaseInStackQueuedSpinLock(&LockHandle);
KernelRoutine(kpac, &NormalRoutine, &WaitStatus, &SystemArgument1, &SystemArgument2);
if ( NormalRoutine )
{
currentThread->Tcb.ApcState.KernelApcInProgress = 1;
KfLowerIrql(0);
(NormalRoutine)(WaitStatus, SystemArgument1, SystemArgument2);
LockHandle.OldIrql = KfRaiseIrql(1u);
}
KeAcquireInStackQueuedSpinLock(¤tThread->Tcb.ApcQueueLock, &LockHandle);
currentThread->Tcb.ApcState.KernelApcInProgress = 0;
}
else
{
RemoveEntryList(plist);
kpac->Inserted = 0;
KeReleaseInStackQueuedSpinLock(&LockHandle);
KernelRoutine(kpac, &NormalRoutine, &WaitStatus, &SystemArgument1, &SystemArgument2);
KeAcquireInStackQueuedSpinLock(¤tThread->Tcb.ApcQueueLock, &LockHandle);
}
}
P = currentThread->Tcb.ApcState.ApcListHead[UserMode].Flink;
if ( P == ¤tThread->Tcb.ApcState.ApcListHead[UserMode]
|| CanUserAPC != UserMode
|| !currentThread->Tcb.ApcState.UserApcPending )
{
NoUserAPC:
KeReleaseInStackQueuedSpinLock(&LockHandle);
goto LABEL_21;
}
currentThread->Tcb.ApcState.UserApcPending = 0;
kpacSTATE = &P[-2u].Blink; // ====如下代码错误请参考汇编===
KernelRoutine_1 = P[1].Flink;
NormalRoutine = P[2].Flink;
WaitStatus = P[2].Blink;
SystemArgument1 = P[3].Flink;
SystemArgument2 = P[3].Blink;
v12 = P->Flink;
v13 = P->Blink;
v13->Flink = v12;
v12->Blink = v13;
kpacSTATE[1].UserApcPending = 0; // =======错误代码结束=======
KeReleaseInStackQueuedSpinLock(&LockHandle);
(KernelRoutine_1)(kpacSTATE, &NormalRoutine, &WaitStatus, &SystemArgument1, &SystemArgument2);
if ( NormalRoutine )
KiInitializeUserApc(unknown, trapframe, NormalRoutine, WaitStatus, SystemArgument1, SystemArgument2);
else
KeTestAlertThread(1);
LABEL_21:
if ( currentThread->Tcb.ApcState.Process != APCDeliverProcess )
KeBugCheckEx(
5u,
APCDeliverProcess,
currentThread->Tcb.ApcState.Process,
currentThread->Tcb.ApcStateIndex,
KeGetPcr()->PrcbData.DpcRoutineActive);
result = tmpTrapFrame;
currentThread->Tcb.TrapFrame = tmpTrapFrame;
return result;
}
当然,上面的伪代码有错误,我已经用注释标注错误的区域,下面我们再把汇编放上:
; _KTRAP_FRAME *__stdcall KiDeliverApc(BOOLEAN CanUserAPC, int unknown, _KTRAP_FRAME *trapframe)
public _KiDeliverApc@12
_KiDeliverApc@12 proc near ; CODE XREF: KiSwapThread()+5F↓p
; _KiServiceExit+53↓p ...
LockHandle = _KLOCK_QUEUE_HANDLE ptr -28h
tmpTrapFrame = dword ptr -1Ch
APCDeliverProcess= dword ptr -18h
KernelRoutine = dword ptr -14h
NormalContext = dword ptr -10h
SystemArgument1 = dword ptr -0Ch
SystemArgument2 = dword ptr -8
NormalRoutine = dword ptr -4
CanUserAPC = byte ptr 8
a2 = dword ptr 0Ch
trapframe = dword ptr 10h
mov edi, edi
push ebp
mov ebp, esp
sub esp, 28h
mov ecx, [ebp+trapframe]
test ecx, ecx
jz short loc_426A54
mov edx, [ecx+_KTRAP_FRAME._Eip]
mov eax, offset _ExpInterlockedPopEntrySListResume@0 ; ExpInterlockedPopEntrySListResume()
cmp edx, eax
jb short loc_426A54
cmp edx, offset _ExpInterlockedPopEntrySListEnd@0 ; ExpInterlockedPopEntrySListEnd()
ja short loc_426A54
mov [ecx+_KTRAP_FRAME._Eip], eax
loc_426A54: ; CODE XREF: KiDeliverApc(x,x,x)+D↑j
; KiDeliverApc(x,x,x)+19↑j ...
push ebx
push esi
push edi
mov eax, large fs:_KPCR.PrcbData.CurrentThread
mov esi, eax
mov eax, [esi+_KTHREAD.TrapFrame]
mov [ebp+tmpTrapFrame], eax
mov eax, [esi+_KTHREAD.ApcState.Process]
mov [esi+_KTHREAD.TrapFrame], ecx
lea ecx, [esi+_ETHREAD.Tcb.ApcQueueLock] ; SpinLock
lea edx, [ebp+LockHandle] ; LockHandle
mov [ebp+APCDeliverProcess], eax
call ds:__imp_@KeAcquireInStackQueuedSpinLock@8 ; KeAcquireInStackQueuedSpinLock(x,x)
mov [esi+_KTHREAD.ApcState.KernelApcPending], 0
lea ebx, [esi+_KTHREAD.ApcState]
jmp loc_426B70
; ---------------------------------------------------------------------------
ListEmptyLoop: ; CODE XREF: KiDeliverApc(x,x,x)+144↓j
mov eax, [ebx]
lea edi, [eax-0Ch]
mov ecx, [edi+_KAPC.KernelRoutine]
mov [ebp+KernelRoutine], ecx
mov ecx, [edi+_KAPC.NormalRoutine]
test ecx, ecx
mov [ebp+NormalRoutine], ecx
mov edx, [edi+_KAPC.NormalContext]
mov [ebp+NormalContext], edx
mov edx, [edi+_KAPC.SystemArgument1]
mov [ebp+SystemArgument1], edx
mov edx, [edi+_KAPC.SystemArgument2]
mov [ebp+SystemArgument2], edx
jnz short NormalRoutineNotNull
mov ecx, [eax] ; 开始摘掉 APC
mov eax, [eax+4]
mov [eax], ecx
mov [ecx+4], eax
lea ecx, [ebp+LockHandle] ; LockHandle
mov [edi+_KAPC.Inserted], 0 ; WaitListEntry
call ds:__imp_@KeReleaseInStackQueuedSpinLock@4 ; KeReleaseInStackQueuedSpinLock(x)
lea eax, [ebp+SystemArgument2]
push eax
lea eax, [ebp+SystemArgument1]
push eax
lea eax, [ebp+NormalContext]
push eax
lea eax, [ebp+NormalRoutine]
push eax
push edi
call [ebp+KernelRoutine]
lea edx, [ebp+LockHandle] ; LockHandle
lea ecx, [esi+_KTHREAD.ApcQueueLock] ; SpinLock
call ds:__imp_@KeAcquireInStackQueuedSpinLock@8 ; KeAcquireInStackQueuedSpinLock(x,x)
jmp short loc_426B70
; ---------------------------------------------------------------------------
NormalRoutineNotNull: ; CODE XREF: KiDeliverApc(x,x,x)+86↑j
cmp [esi+_KTHREAD.ApcState.KernelApcInProgress], 0
jnz NoUserAPC
cmp [esi+_KTHREAD.KernelApcDisable], 0
jnz NoUserAPC
mov ecx, [eax]
mov eax, [eax+_LIST_ENTRY.Blink]
mov [eax], ecx
mov [ecx+_LIST_ENTRY.Blink], eax
lea ecx, [ebp+LockHandle] ; LockHandle
mov [edi+_KAPC.Inserted], 0
call ds:__imp_@KeReleaseInStackQueuedSpinLock@4 ; KeReleaseInStackQueuedSpinLock(x)
lea eax, [ebp+SystemArgument2]
push eax
lea eax, [ebp+SystemArgument1]
push eax
lea eax, [ebp+NormalContext]
push eax
lea eax, [ebp+NormalRoutine]
push eax
push edi
call [ebp+KernelRoutine]
cmp [ebp+NormalRoutine], 0
jz short NormalRoutineNULL
xor cl, cl ; NewIrql
mov [esi+_KTHREAD.ApcState.KernelApcInProgress], 1
call ds:__imp_@KfLowerIrql@4 ; KfLowerIrql(x)
push [ebp+SystemArgument2]
push [ebp+SystemArgument1]
push [ebp+NormalContext]
call [ebp+NormalRoutine]
mov cl, 1 ; NewIrql
call ds:__imp_@KfRaiseIrql@4 ; KfRaiseIrql(x)
mov [ebp+LockHandle.OldIrql], al
NormalRoutineNULL: ; CODE XREF: KiDeliverApc(x,x,x)+10A↑j
lea edx, [ebp+LockHandle] ; LockHandle
lea ecx, [esi+_ETHREAD.Tcb.ApcQueueLock] ; SpinLock
call ds:__imp_@KeAcquireInStackQueuedSpinLock@8 ; KeAcquireInStackQueuedSpinLock(x,x)
mov [esi+_KTHREAD.ApcState.KernelApcInProgress], 0
loc_426B70: ; CODE XREF: KiDeliverApc(x,x,x)+5C↑j
; KiDeliverApc(x,x,x)+C2↑j
cmp [ebx+_KAPC_STATE.ApcListHead.Flink], ebx
jnz ListEmptyLoop
lea ecx, [esi+(_ETHREAD.Tcb.ApcState.ApcListHead.Flink+8)] ; ApcListHead[UserMode]
mov eax, [ecx+_LIST_ENTRY.Flink]
cmp eax, ecx
jz NoUserAPC
cmp [ebp+CanUserAPC], 1
jnz short NoUserAPC
cmp [esi+_KTHREAD.ApcState.UserApcPending], 0
jz short NoUserAPC
mov [esi+_KTHREAD.ApcState.UserApcPending], 0
lea edi, [eax-0Ch]
mov ecx, [edi+_KAPC.NormalRoutine]
mov ebx, [edi+_KAPC.KernelRoutine]
mov [ebp+NormalRoutine], ecx
mov ecx, [edi+_KAPC.NormalContext]
mov [ebp+NormalContext], ecx
mov ecx, [edi+_KAPC.SystemArgument1]
mov [ebp+SystemArgument1], ecx
mov ecx, [edi+_KAPC.SystemArgument2]
mov [ebp+SystemArgument2], ecx
mov ecx, [eax+_LIST_ENTRY.Flink]
mov eax, [eax+_LIST_ENTRY.Blink]
mov [eax+_LIST_ENTRY.Flink], ecx
mov [ecx+_LIST_ENTRY.Blink], eax
lea ecx, [ebp+LockHandle] ; LockHandle
mov [edi+_KAPC.Inserted], 0
call ds:__imp_@KeReleaseInStackQueuedSpinLock@4 ; KeReleaseInStackQueuedSpinLock(x)
lea eax, [ebp+SystemArgument2]
push eax
lea eax, [ebp+SystemArgument1]
push eax
lea eax, [ebp+NormalContext]
push eax
lea eax, [ebp+NormalRoutine]
push eax
push edi
call ebx
cmp [ebp+NormalRoutine], 0
jnz short loc_426BEC
push 1
call _KeTestAlertThread@4 ; KeTestAlertThread(x)
jmp short loc_426C0E
; ---------------------------------------------------------------------------
loc_426BEC: ; CODE XREF: KiDeliverApc(x,x,x)+1B3↑j
push [ebp+SystemArgument2] ; SystemArgument2
push [ebp+SystemArgument1] ; SystemArgument1
push [ebp+NormalContext] ; NormalContext
push [ebp+NormalRoutine] ; NormalRoutine
push [ebp+trapframe] ; TrapFrame
push [ebp+a2] ; ExceptionFrame
call _KiInitializeUserApc@24 ; KiInitializeUserApc(x,x,x,x,x,x)
jmp short loc_426C0E
; ---------------------------------------------------------------------------
NoUserAPC: ; CODE XREF: KiDeliverApc(x,x,x)+C8↑j
; KiDeliverApc(x,x,x)+D5↑j ...
lea ecx, [ebp+LockHandle] ; LockHandle
call ds:__imp_@KeReleaseInStackQueuedSpinLock@4 ; KeReleaseInStackQueuedSpinLock(x)
loc_426C0E: ; CODE XREF: KiDeliverApc(x,x,x)+1BC↑j
; KiDeliverApc(x,x,x)+1D5↑j
mov ecx, [ebp+APCDeliverProcess]
cmp [esi+_KTHREAD.ApcState.Process], ecx
jz short loc_426C30
mov eax, large fs:_KPCR.PrcbData.DpcRoutineActive
push eax ; BugCheckParameter4
movzx eax, [esi+_KTHREAD.ApcStateIndex]
push eax ; BugCheckParameter3
push [esi+_KTHREAD.ApcState.Process] ; BugCheckParameter2
push ecx ; BugCheckParameter1
push 5 ; BugCheckCode
call _KeBugCheckEx@20 ; KeBugCheckEx(x,x,x,x,x)
; ---------------------------------------------------------------------------
loc_426C30: ; CODE XREF: KiDeliverApc(x,x,x)+1E6↑j
mov eax, [ebp+tmpTrapFrame]
pop edi
mov [esi+_KTHREAD.TrapFrame], eax
pop esi
pop ebx
leave
retn 0Ch
_KiDeliverApc@12 endp
通过代码分析,上面的两处传的参数都会处理内核APC
,线程切换处理内核APC
,另一个处理用户和内核APC
。由于此部分讲解内核APC
的处理,我们只看相关部分。
由于此部分不难,我们只简单介绍一下流程,其他细节自行查看代码:
- 判断第一个链表是否为空(链表地址和链表的内容十分相等,相等则为空,微软就是这么设计的)
- 判断
KTHREAD.ApcState.KernelApcInProgress
是否为1 - 判断是否禁用内核
APC
(KTHREAD.KernelApcDisable
是否为1) - 将当前
KAPC
结构体从链表中摘除 - 执行
KAPC.KernelRoutine
指定的函数,释放KAPC
结构体占用的空间 - 将
KTHREAD.ApcState.KernelApcInProgress
设置为1,标识正在执行内核APC
- 执行真正的内核
APC
函数(KAPC.NormalRoutine
) - 执行完毕,将
KernelApcInProgress
改为0 - 循环上述过程直至装内核
APC
的链表全部完成
我们可以做出如下总结:
- 内核
APC
在线程切换的时候就会执行,这也就意味着,只要插入内核APC
很快就会执行。 - 在执行用户
APC
之前会先执行内核APC
。 - 内核
APC
在内核空间执行,不需要换栈,一个循环全部执行完毕。
有些细节我还没有强调,不过自己应该也能够发现,这些东西我都会在下一篇总结与提升进行讲解。下面我们来分析用户APC
的执行流程。
用户 APC 执行流程
上述伪代码错误的区域都是赋值操作,找到KAPC
结构,并摘除准备执行的操作,请参考汇编,挺简单的我就不赘述了。我们把重点放到KiInitializeUserApc
这个函数上,先看其反汇编:
int __stdcall KiInitializeUserApc(_KTRAP_FRAME *ExceptionFrame, _KTRAP_FRAME *TrapFrame, PKNORMAL_ROUTINE NormalRoutine, PVOID NormalContext, PVOID SystemArgument1, PVOID SystemArgument2)
{
PKNORMAL_ROUTINE *_esp; // esi
unsigned int newEflag; // eax
int result; // eax
_CONTEXT ContextRecord; // [esp+70h] [ebp-2E8h] BYREF
void *v10; // [esp+33Ch] [ebp-1Ch]
CPPEH_RECORD ms_exc; // [esp+340h] [ebp-18h]
if ( (TrapFrame->EFlags & 0x20000) == offsetof(_KTRAP_FRAME, DbgEbp) )
{
ContextRecord.ContextFlags = 0x10017;
KeContextFromKframes(TrapFrame, ExceptionFrame, &ContextRecord);
ms_exc.registration.TryLevel = 0;
_esp = ((ContextRecord.Esp & 0xFFFFFFFC) - 0x2DC);
ProbeForWrite(_esp, 0x2DCu, 4u);
qmemcpy(_esp + 4, &ContextRecord, 0x2CCu);
TrapFrame->SegCs = 0x1B;
TrapFrame->HardwareSegSs = 0x23;
TrapFrame->SegDs = 0x23;
TrapFrame->SegEs = 0x23;
TrapFrame->SegFs = 0x3B;
TrapFrame->SegGs = 0;
if ( (ContextRecord.EFlags & 0x20000) != 0 && KeI386VdmIoplAllowed )
newEflag = KeI386EFlagsOrMaskV86 | ContextRecord.EFlags & KeI386EFlagsAndMaskV86;
else
newEflag = ContextRecord.EFlags & 0x3E0DD7 | 0x200;
TrapFrame->EFlags = newEflag;
if ( KeGetCurrentThread()->Iopl )
BYTE1(TrapFrame->EFlags) |= 0x30u;
TrapFrame->HardwareEsp = _esp;
TrapFrame->Eip = KeUserApcDispatcher;
TrapFrame->ErrCode = 0;
*_esp = NormalRoutine;
_esp[1] = NormalContext;
_esp[2] = SystemArgument1;
_esp[3] = SystemArgument2;
ms_exc.registration.TryLevel = -1;
}
xHalReferenceHandler(v10);
return result;
}
处理用户APC
要比内核APC
复杂的多,因为,用户APC
函数要在用户空间执行的,这里涉及到大量换栈的操作:当线程从用户层进入内核层时,要保留原来的运行环境,比如各种寄存器,栈的位置等等(_Trap_Frame
),然后切换成内核的堆栈,如果正常返回,恢复堆栈环境即可。但如果有用户APC
要执行的话,就意味着线程要提前返回到用户空间去执行,而且返回的位置不是线程进入内核时的位置,而是返回到其他的位置,每处理一个用户APC
都会涉及到:内核到用户空间再回到内核空间。
线程进0环时,原来的运行环境(寄存器栈顶等)保存到_Trap_Frame
结构体中,如果要提前返回3环去处理用户APC
,就必须要修改_Trap_Frame
结构体。比如:进0环时的位置存储在EIP
中,现在要提前返回,而且返回的并不是原来的位置,那就意味着必须要修改EIP
为新的返回位置。还有堆栈ESP
,也要修改为处理APC
需要的堆栈。那原来的值怎么办呢?处理完APC
后该如何返回原来的位置呢?
KiInitializeUserApc
要做的第一件事就是备份,将原来_Trap_Frame
的值备份到一个新的结构体中(CONTEXT
),这个功能由其子函数KeContextFromKframes
来完成。我们可以点进去看看:
void __stdcall KeContextFromKframes(_KTRAP_FRAME *TrapFrame, _KTRAP_FRAME *ExceptionFrame, _CONTEXT *ContextRecord)
{
// [COLLAPSED LOCAL DECLARATIONS. PRESS KEYPAD CTRL-"+" TO EXPAND]
v3 = TrapFrame;
NewIrql = KeGetCurrentIrql();
if ( !NewIrql )
NewIrql = KfRaiseIrql(1u);
v4 = ContextRecord->ContextFlags;
v5 = 0xFFFF;
if ( (ContextRecord->ContextFlags & CONTEXT_CONTROL) == CONTEXT_CONTROL )
{
ContextRecord->Ebp = TrapFrame->Ebp;
ContextRecord->Eip = TrapFrame->Eip;
v6 = TrapFrame->SegCs;
if ( (v6 & 0xFFF8) == 0 && (TrapFrame->EFlags & 0x20000) == 0 )
v6 = TrapFrame->TempSegCs;
ContextRecord->SegCs = v6;
ContextRecord->EFlags = TrapFrame->EFlags;
ContextRecord->SegSs = KiSegSsFromTrapFrame(TrapFrame);
ContextRecord->Esp = KiEspFromTrapFrame(TrapFrame);
}
if ( (v4 & CONTEXT_SEGMENTS) == CONTEXT_SEGMENTS )
{
if ( (TrapFrame->EFlags & 0x20000) != 0 )
{
ContextRecord->SegGs = v5 & TrapFrame->V86Gs;
ContextRecord->SegFs = v5 & TrapFrame->V86Fs;
ContextRecord->SegEs = v5 & TrapFrame->V86Es;
v7 = TrapFrame->V86Ds;
}
else
{
if ( TrapFrame->SegCs == 8 )
{
TrapFrame->SegGs = 0;
TrapFrame->SegFs = 0x30;
TrapFrame->SegEs = 0x23;
TrapFrame->SegDs = 0x23;
}
ContextRecord->SegGs = v5 & TrapFrame->SegGs;
ContextRecord->SegFs = v5 & TrapFrame->SegFs;
ContextRecord->SegEs = v5 & TrapFrame->SegEs;
v7 = TrapFrame->SegDs;
}
ContextRecord->SegDs = v5 & v7;
}
v8 = ContextRecord->ContextFlags;
if ( (ContextRecord->ContextFlags & 0x10002) == 65538 )
{
ContextRecord->Edi = TrapFrame->Edi;
ContextRecord->Esi = TrapFrame->Esi;
ContextRecord->Ebx = TrapFrame->Ebx;
ContextRecord->Ecx = TrapFrame->Ecx;
ContextRecord->Edx = TrapFrame->Edx;
ContextRecord->Eax = TrapFrame->Eax;
}
if ( (v8 & CONTEXT_EXTENDED_REGISTERS) == CONTEXT_EXTENDED_REGISTERS
&& (TrapFrame->SegCs & 1) != 0
&& KeI386NpxPresent )
{
KiFlushNPXState(0);
qmemcpy(ContextRecord->ExtendedRegisters, &TrapFrame[1], sizeof(ContextRecord->ExtendedRegisters));
v3 = TrapFrame;
}
if ( (ContextRecord->ContextFlags & 0x10008) == 65544 && (v3->SegCs & 1) != 0 )
{
v9 = &v3[1].DbgEbp;
if ( KeI386NpxPresent )
{
if ( KeI386FxsrPresent == 1 )
{
v9 = v16;
KiFlushNPXState(v16);
}
else
{
KiFlushNPXState(0);
}
ContextRecord->FloatSave.ControlWord = *v9;
ContextRecord->FloatSave.StatusWord = v9[1];
ContextRecord->FloatSave.TagWord = v9[2];
ContextRecord->FloatSave.ErrorOffset = v9[3];
ContextRecord->FloatSave.ErrorSelector = v9[4];
ContextRecord->FloatSave.DataOffset = v9[5];
ContextRecord->FloatSave.DataSelector = v9[6];
ContextRecord->FloatSave.Cr0NpxState = v3[4].Eip;
for ( i = 0; i < 0x50; ++i )
ContextRecord->FloatSave.RegisterArea[i] = *(v9 + i + 28);
}
else if ( KiEm87StateToNpxFrame(&ContextRecord->FloatSave) )
{
ContextRecord->FloatSave.Cr0NpxState = v3[4].Eip;
}
else
{
ContextRecord->ContextFlags &= 0xFFFFFFF7;
}
}
if ( (ContextRecord->ContextFlags & CONTEXT_DEBUG_REGISTERS) == CONTEXT_DEBUG_REGISTERS )
{
v11 = (v3->SegCs & 1) == 0;
v12 = KeGetCurrentThread()->DebugActive;
v15[0] = v12;
if ( (!v11 || (v3->EFlags & 0x20000) != 0) && v12 )
{
ContextRecord->Dr0 = v3->Dr0;
ContextRecord->Dr1 = v3->Dr1;
ContextRecord->Dr2 = v3->Dr2;
ContextRecord->Dr3 = v3->Dr3;
ContextRecord->Dr6 = v3->Dr6;
v13 = KiUpdateDr7(v3->Dr7, v15);
}
else
{
v13 = 0;
ContextRecord->Dr0 = 0;
ContextRecord->Dr1 = 0;
ContextRecord->Dr2 = 0;
ContextRecord->Dr3 = 0;
ContextRecord->Dr6 = 0;
}
ContextRecord->Dr7 = v13;
}
if ( !NewIrql )
KfLowerIrql(0);
xHalReferenceHandler(v16[30]);
}
备份完毕后,我就可以随心所欲改了。_esp = ((ContextRecord.Esp & 0xFFFFFFFC) - 0x2DC)
这个就是所谓的提栈操作,为构造的新Context
提供空间,ContextRecord.Esp & 0xFFFFFFFC
就是让内存实现4字节对齐。将内部的段寄存器相关改为3环的环境,我们重点看下面的代码:
TrapFrame->HardwareEsp = _esp;
TrapFrame->Eip = KeUserApcDispatcher;
TrapFrame->ErrCode = 0;
*_esp = NormalRoutine;
_esp[1] = NormalContext;
_esp[2] = SystemArgument1;
_esp[3] = SystemArgument2;
可以看出,这几个操作就是修改EIP
并压栈的操作,KeUserApcDispatcher
是一个全局变量,这个是3环的函数,在系统启动完毕后就会赋值,我们在WinDbg
看看是何方神圣:
kd> !process 0 0
**** NT ACTIVE PROCESS DUMP ****
……
Failed to get VadRoot
PROCESS 89b51b28 SessionId: 0 Cid: 05f0 Peb: 7ffde000 ParentCid: 05d4
DirBase: 12fc01c0 ObjectTable: e17efb00 HandleCount: 353.
Image: explorer.exe
……
kd> .process 89b51b28
ReadVirtual: 89b51b40 not properly sign extended
Implicit process is now 89b51b28
WARNING: .cache forcedecodeuser is not enabled
kd> u 7c92e430
ntdll!KiUserApcDispatcher:
7c92e430 8d7c2410 lea edi,[esp+10h]
7c92e434 58 pop eax
7c92e435 ffd0 call eax
7c92e437 6a01 push 1
7c92e439 57 push edi
7c92e43a e801ecffff call ntdll!NtContinue (7c92d040)
7c92e43f 90 nop
ntdll!KiUserCallbackDispatcher:
7c92e440 83c404 add esp,4
可以看出这个函数就是KiUserApcDispatcher
,在ntdll.dll
当中,我们用IDA
打开看看:
; __stdcall KiUserApcDispatcher(x, x, x, x, x)
public _KiUserApcDispatcher@20
_KiUserApcDispatcher@20 proc near ; DATA XREF: .text:off_7C923428↑o
arg_C = byte ptr 10h
lea edi, [esp+arg_C]
pop eax
call eax
push 1
push edi
call _ZwContinue@8 ; ZwContinue(x,x)
nop
_KiUserApcDispatcher@20 endp ; sp-analysis failed
esp + 10h
就是构造出来的_Trap_Frame
结构体的地址。我们画个堆栈图就能明白了:
当用户在3环调用QueueUserAPC函数来插入APC
时,不需要提供NormalRoutine
,这个参数是在QueueUserAPC
内部指定的,是BaseDispatchAPC
这个函数,可以自行翻阅我分析的过程。
我们下面再看看ZwContinue
函数,它的作用如下:
- 返回内核,如果还有用户
APC
,重复上面的执行过程。 - 如果没有需要执行的用户
APC
,会将CONTEXT
赋值给Trap_Frame
结构体。就像从来没有修改过一样。ZwContinue
后面的代码不会执行,线程从哪里进0环仍然会从哪里回去。
我们继续做出如下总结:
- 内核APC在线程切换时执行,不需要换栈,比较简单,一个循环执行完毕。
- 用户APC在系统调用、中断或异常返回3环前会进行判断,如果有要执行的用户APC,再执行。
- 用户APC执行前会先执行内核APC。
到此,与APC
执行相关的知识,就到此结束了。
下一篇
APC 篇——总结与提升
本文来自博客园,作者:寂静的羽夏 ,一个热爱计算机技术的菜鸟
转载请注明原文链接:https://www.cnblogs.com/wingsummer/p/15865885.html