UE4之Game、Render、RHI多线程架构
游戏线程(GameThread)
GameThread是引擎运行的心脏,承载游戏逻辑、运行流程的工作,也是其它线程的数据发起者。在FEngineLoop::Tick函数执行每帧逻辑的更新。
在引擎启动时会把GameThread的线程id存储到全局变量GGameThreadId中,且稍后会设置到TaskGraph系统中。
int32 FEngineLoop::PreInitPreStartupScreen(const TCHAR* CmdLine) { // ... ... // 创建线程自己的TLS数据FPerThreadFreeBlockLists 注:Binned2、Binned3内存分配器需要 FMemory::SetupTLSCachesOnCurrentThread(); // remember thread id of the main thread GGameThreadId = FPlatformTLS::GetCurrentThreadId();// 游戏线程id GIsGameThreadIdInitialized = true; // 游戏线程id是否被初始化 FPlatformProcess::SetThreadAffinityMask(FPlatformAffinity::GetMainGameMask()); // 设置当前线程的cpu核的相关性 注:防止在多个cpu核心上跳来跳去,引发性能问题 FPlatformProcess::SetupGameThread(); // 设置游戏线程数据(但很多平台都是空的实现体) // ... ... FTaskGraphInterface::Startup(FPlatformMisc::NumberOfCores()); // TaskGraph初始化,并根据当前机器cpu的核数来创建工作线程 FTaskGraphInterface::Get().AttachToThread(ENamedThreads::GameThread); // 附加到TaskGraph的GameThread命名插槽中. 这样游戏线程便和TaskGraph联动了起来. if (GUseThreadedRendering) // 如果使用渲染线程 { if (GRHISupportsRHIThread) // 当前平台如果支持RHI线程 { const bool DefaultUseRHIThread = true; GUseRHIThread_InternalUseOnly = DefaultUseRHIThread; if (FParse::Param(FCommandLine::Get(), TEXT("rhithread"))) { GUseRHIThread_InternalUseOnly = true; // 创建独立的RHIThread,并加入到TaskGraph中,RHI会跑在TaskGraph的RHIThread上 } else if (FParse::Param(FCommandLine::Get(), TEXT("norhithread"))) { GUseRHIThread_InternalUseOnly = false; // RHI跑在TaskGraph的AnyThread上 } } SCOPED_BOOT_TIMING("StartRenderingThread"); StartRenderingThread(); // 创建并启动渲染线程 } // ... ... }
游戏线程和TaskGraph系统的ENamedThreads::GameThread其实是一回事,都是同一个线程!
经过上面的初始化和设置后,其它地方就可以通过TaskGraph系统并行地处理任务了,也可以访问全局变量,以便判断游戏线程是否初始化完,当前线程是否游戏线程:
bool IsInGameThread() { return GIsGameThreadIdInitialized && FPlatformTLS::GetCurrentThreadId() == GGameThreadId; }
渲染线程(RenderThread)
RenderThread在TaskGraph系统中有一个任务队列,其他线程(主要是GameThread)通过宏ENQUEUE_RENDER_COMMAND(Type)向该队列中填充任务 注:Type字符串要保持唯一,否则ENQUEUE_RENDER_COMMAND(Type)会失效
RenderThread则不断从这个队列中取出任务来执行,从而生成与平台无关的Command List(渲染指令列表)。注:整个过程是异步的
RenderThread是其他线程(主要是GameThread)的奴隶,只是简单地作为工作线程不断执行它们赋予的工作。
RenderingThread.h声明了全部对外的接口,部分如下:
// Engine\Source\Runtime\RenderCore\Public\RenderingThread.h // 是否启用了独立的渲染线程, 如果为false, 则所有渲染命令会被立即执行, 而不是放入渲染命令队列. extern RENDERCORE_API bool GIsThreadedRendering; // 渲染线程是否应该被创建. 通常被命令行参数或ToggleRenderingThread控制台参数设置. extern RENDERCORE_API bool GUseThreadedRendering; // 是否开启RHI线程 extern RENDERCORE_API void SetRHIThreadEnabled(bool bEnableDedicatedThread, bool bEnableRHIOnTaskThreads); (......) // 开启渲染线程. extern RENDERCORE_API void StartRenderingThread(); // 停止渲染线程. extern RENDERCORE_API void StopRenderingThread(); // 检查渲染线程是否健康(是否Crash), 如果crash, 则会用UE_Log输出日志. extern RENDERCORE_API void CheckRenderingThreadHealth(); // 检查渲染线程是否健康(是否Crash) extern RENDERCORE_API bool IsRenderingThreadHealthy(); // 增加一个必须在下一个场景绘制前或flush渲染命令前完成的任务. extern RENDERCORE_API void AddFrameRenderPrerequisite(const FGraphEventRef& TaskToAdd); // 手机帧渲染前序任务, 保证所有渲染命令被入队. extern RENDERCORE_API void AdvanceFrameRenderPrerequisite(); // 等待所有渲染线程的渲染命令被执行完毕. 会卡住游戏线程, 只能被游戏线程调用. extern RENDERCORE_API void FlushRenderingCommands(bool bFlushDeferredDeletes = false); extern RENDERCORE_API void FlushPendingDeleteRHIResources_GameThread(); extern RENDERCORE_API void FlushPendingDeleteRHIResources_RenderThread(); extern RENDERCORE_API void TickRenderingTickables(); extern RENDERCORE_API void StartRenderCommandFenceBundler(); extern RENDERCORE_API void StopRenderCommandFenceBundler(); (......)
RenderingThread.h还有一个非常重要的宏ENQUEUE_RENDER_COMMAND
,它的作用是向渲染线程入队渲染指令。下面是它的声明和实现:
// 向渲染线程入队渲染指令, Type指明了渲染操作的名字. #define ENQUEUE_RENDER_COMMAND(Type) \ struct Type##Name \ { \ static const char* CStr() { return #Type; } \ static const TCHAR* TStr() { return TEXT(#Type); } \ }; \ EnqueueUniqueRenderCommand<Type##Name>
上面最后一句使用了EnqueueUniqueRenderCommand
命令,继续追踪之:
/* UnrealEngine\Engine\Source\Runtime\RenderCore\Public\RenderingThread.h */ /** The parent class of commands stored in the rendering command queue. */ class RENDERCORE_API FRenderCommand { public: // All render commands run on the render thread static ENamedThreads::Type GetDesiredThread() // 所有渲染指令都必须在渲染线程执行. { check(!GIsThreadedRendering || ENamedThreads::GetRenderThread() != ENamedThreads::GameThread); return ENamedThreads::GetRenderThread(); // 开启渲染多线程时,返回渲染线程。不开启渲染多线程时,返回GameThread } static ESubsequentsMode::Type GetSubsequentsMode() { // Don't support tasks having dependencies on us, reduces task graph overhead tracking and dealing with subsequents return ESubsequentsMode::FireAndForget; } }; template<typename TSTR, typename LAMBDA> class TEnqueueUniqueRenderCommandType : public FRenderCommand { public: TEnqueueUniqueRenderCommandType(LAMBDA&& InLambda) : Lambda(Forward<LAMBDA>(InLambda)) {} void DoTask(ENamedThreads::Type CurrentThread, const FGraphEventRef& MyCompletionGraphEvent) { TRACE_CPUPROFILER_EVENT_SCOPE_ON_CHANNEL_STR(TSTR::TStr(), RenderCommandsChannel); FRHICommandListImmediate& RHICmdList = GetImmediateCommandList_ForRenderCommand(); Lambda(RHICmdList); } FORCEINLINE_DEBUGGABLE TStatId GetStatId() const { #if STATS static struct FThreadSafeStaticStat<FStat_EnqueueUniqueRenderCommandType> StatPtr_EnqueueUniqueRenderCommandType; return StatPtr_EnqueueUniqueRenderCommandType.GetStatId(); #else return TStatId(); #endif } private: LAMBDA Lambda; // 缓存渲染回调函数. }; /*************************************************************************************************************/ template<typename TSTR, typename LAMBDA> // 传入的TSTR为结构体类型,里面包含CStr和TStr的静态方法,为渲染命令名字。 LAMBDA是回调函数 FORCEINLINE_DEBUGGABLE void EnqueueUniqueRenderCommand(LAMBDA&& Lambda) { QUICK_SCOPE_CYCLE_COUNTER(STAT_EnqueueUniqueRenderCommand); typedef TEnqueueUniqueRenderCommandType<TSTR, LAMBDA> EURCType; // EURCType类型即为TEnqueueUniqueRenderCommandType<TSTR, LAMBDA>类型 #if 0 // UE_SERVER && UE_BUILD_DEBUG UE_LOG(LogRHI, Warning, TEXT("Render command '%s' is being executed on a dedicated server."), TSTR::TStr()) #endif if (IsInRenderingThread()) // 如果在渲染线程内直接执行回调而不入队渲染命令. { FRHICommandListImmediate& RHICmdList = GetImmediateCommandList_ForRenderCommand(); // --》FRHICommandListExecutor::GetImmediateCommandList() --》GRHICommandList.CommandListImmediate Lambda(RHICmdList); // 在渲染线程中,直接执行传入的lamda匿名函数 } else { if (ShouldExecuteOnRenderThread()) // if ((GIsThreadedRendering || !IsInGameThread())) // 使用渲染线程 or 当前不为GameThread // 需要在独立的渲染线程执行 { CheckNotBlockedOnRenderThread(); TGraphTask<EURCType>::CreateTask().ConstructAndDispatchWhenReady(Forward<LAMBDA>(Lambda)); // 向渲染线程的TaskGraph队列里面投递类型名为EURCType类型的任务,并将lamda匿名函数作为参数传入该任务的构造函数 } else { // 不在独立的渲染线程执行,则构建EURCType类型的对象,然后直接执行 EURCType TempCommand(Forward<LAMBDA>(Lambda)); FScopeCycleCounter EURCMacro_Scope(TempCommand.GetStatId()); TempCommand.DoTask(ENamedThreads::GameThread, FGraphEventRef()); } } }
EnqueueUniqueRenderCommand函数只负责向渲染线程的TaskGraph队列里面投递任务,它耗时统计到Quick的EnqueueUniqueRenderCommand分类中
注:执行EnqueueUniqueRenderCommand函数的线程可能为:GameThread、RenderThread、RTHeartBeat、TaskGraph的AnyThread、PoolThread的FQueuedThread
ENQUEUE_RENDER_COMMAND(xxx),在RenderThread上执行的xxx的耗时统计在RenderThreadCommands分类中
为了更好理解入队渲染命令操作,举2个具体的例子:
例1:在GameThread执行LoadMap切地图,在卸载掉Old World之后,会在TrimMemory()函数中使用宏ENQUEUE_RENDER_COMMAND向RenderThread的TaskGraph队列中投递一个FlushCommand任务
ENQUEUE_RENDER_COMMAND(FlushCommand)( /* ---------------------------------------------- ENQUEUE_RENDER_COMMAND宏展开后 ------------------------------------------------ struct FlushCommandName { static const char* CStr() { return "FlushCommand"; } static const TCHAR* TStr() { return L"FlushCommand"; } }; EnqueueUniqueRenderCommand<FlushCommandName>( */ [](FRHICommandList& RHICmdList) { GRHICommandList.GetImmediateCommandList().ImmediateFlush(EImmediateFlushType::FlushRHIThreadFlushResources); RHIFlushResources(); GRHICommandList.GetImmediateCommandList().ImmediateFlush(EImmediateFlushType::FlushRHIThreadFlushResources); });
例2:在GameThread中执行控制台变量命令,会使用宏ENQUEUE_RENDER_COMMAND向RenderThread的TaskGraph队列中投递一个OnCVarChange1任务,以便将新的数值传递到RenderThread的逻辑中使用
virtual void OnCVarChange(int32& Dest, int32 NewValue) { int32* DestPtr = &Dest; ENQUEUE_RENDER_COMMAND(OnCVarChange1)( /* ---------------------------------------------- ENQUEUE_RENDER_COMMAND宏展开后 ------------------------------------------------ struct OnCVarChange1Name { static const char* CStr() { return "OnCVarChange1"; } static const TCHAR* TStr() { return L"OnCVarChange1"; } }; EnqueueUniqueRenderCommand<OnCVarChange1Name>( */ [DestPtr, NewValue](FRHICommandListImmediate& RHICmdList) { *DestPtr = NewValue; }); }
FRenderingThread承载了渲染线程的主要工作,它的部分接口和实现代码如下:
// Engine\Source\Runtime\RenderCore\Private\RenderingThread.cpp class FRenderingThread : public FRunnable { private: bool bAcquiredThreadOwnership; // 当没有独立的RHI线程时, 渲染线程将被其它线程捕获. public: FEvent* TaskGraphBoundSyncEvent; // TaskGraph同步事件, 以便在主线程使用渲染线程之前就将渲染线程绑定到TaskGraph体系中. FRenderingThread() { bAcquiredThreadOwnership = false; // 获取同步事件. TaskGraphBoundSyncEvent = FPlatformProcess::GetSynchEventFromPool(true); RHIFlushResources(); } // FRunnable interface. virtual bool Init(void) override { // 获取当前线程ID到全局变量GRenderThreadId, 以便其它地方引用. GRenderThreadId = FPlatformTLS::GetCurrentThreadId(); // 处理线程捕获关系. if (!IsRunningRHIInSeparateThread()) { bAcquiredThreadOwnership = true; RHIAcquireThreadOwnership(); } return true; } (......) virtual uint32 Run(void) override { // 设置TLS. FMemory::SetupTLSCachesOnCurrentThread(); // 设置渲染线程平台相关的数据. FPlatformProcess::SetupRenderThread(); (......) { // 进入渲染线程主循环. RenderingThreadMain( TaskGraphBoundSyncEvent ); } FMemory::ClearAndDisableTLSCachesOnCurrentThread(); return 0; } };
可见它在运行之后会进入渲染线程逻辑,这里再进入RenderingThreadMain代码一探究竟:
void RenderingThreadMain( FEvent* TaskGraphBoundSyncEvent ) { LLM_SCOPE(ELLMTag::RenderingThreadMemory); // 将渲染线程和局部线程线程插槽设置成ActualRenderingThread和ActualRenderingThread_Local. ENamedThreads::Type RenderThread = ENamedThreads::Type(ENamedThreads::ActualRenderingThread); ENamedThreads::SetRenderThread(RenderThread); ENamedThreads::SetRenderThread_Local(ENamedThreads::Type(ENamedThreads::ActualRenderingThread_Local)); // 将当前线程附加到TaskGraph的RenderThread插槽中. FTaskGraphInterface::Get().AttachToThread(RenderThread); FPlatformMisc::MemoryBarrier(); // 触发同步事件, 通知主线程渲染线程已经附加到TaskGraph, 已经准备好接收任务. if( TaskGraphBoundSyncEvent != NULL ) { TaskGraphBoundSyncEvent->Trigger(); } (......) // 渲染线程不同阶段的处理. FCoreDelegates::PostRenderingThreadCreated.Broadcast(); check(GIsThreadedRendering); FTaskGraphInterface::Get().ProcessThreadUntilRequestReturn(RenderThread); FPlatformMisc::MemoryBarrier(); check(!GIsThreadedRendering); FCoreDelegates::PreRenderingThreadDestroyed.Broadcast(); (......) // 恢复线程线程到游戏线程. ENamedThreads::SetRenderThread(ENamedThreads::GameThread); ENamedThreads::SetRenderThread_Local(ENamedThreads::GameThread_Local); FPlatformMisc::MemoryBarrier(); }
不过这里还留有一个很大的疑问,那就是FRenderingThread只是获取当前线程作为渲染线程并附加到TaskGraph中,并没有创建线程。
那么是哪里创建的渲染线程呢?继续追踪,结果发现是在StartRenderingThread()
接口中创建了FRenderingThread实例,它的实现代码如下(节选):
// Engine\Source\Runtime\RenderCore\Private\RenderingThread.cpp void StartRenderingThread() { (......) // Turn on the threaded rendering flag. GIsThreadedRendering = true; // 创建FRenderingThread实例. GRenderingThreadRunnable = new FRenderingThread(); // 创建渲染线程!! GRenderingThread = FRunnableThread::Create(GRenderingThreadRunnable, *BuildRenderingThreadName(ThreadCount), 0, FPlatformAffinity::GetRenderingThreadPriority(), FPlatformAffinity::GetRenderingThreadMask(), FPlatformAffinity::GetRenderingThreadFlags()); (......) // 开启渲染命令的栅栏. FRenderCommandFence Fence; Fence.BeginFence(); Fence.Wait(); (......) }
如果继续追踪,会发现StartRenderingThread()
是在FEngineLoop::PreInitPostStartupScreen
中调用的。
至此,渲染线程的创建、初始化以及主要接口的实现都剖析完了。
RHI线程(RHIThread)
RenderThread作为前端(frontend)产生的Command List是平台无关的,是抽象的图形API调用;
而RHIThread作为后端(backend)会执行和转换渲染线程的Command List成为指定图形API的调用(称为Graphical Command),并提交到GPU执行。
RHI线程的工作是转换渲染指令到指定图形API,创建、上传渲染资源到GPU。实现代码如下:
// Engine\Source\Runtime\RenderCore\Private\RenderingThread.cpp class FRHIThread : public FRunnable { public: FRunnableThread* Thread; // 所在的RHI线程. FRHIThread() : Thread(nullptr) { check(IsInGameThread()); } void Start() { // 开始时创建RHI线程. Thread = FRunnableThread::Create(this, TEXT("RHIThread"), 512 * 1024, FPlatformAffinity::GetRHIThreadPriority(), FPlatformAffinity::GetRHIThreadMask(), FPlatformAffinity::GetRHIThreadFlags() ); check(Thread); } virtual uint32 Run() override { LLM_SCOPE(ELLMTag::RHIMisc); // 初始化TLS FMemory::SetupTLSCachesOnCurrentThread(); // 将FRHIThread所在的RHI线程附加到askGraph体系中,并指定到ENamedThreads::RHIThread。 FTaskGraphInterface::Get().AttachToThread(ENamedThreads::RHIThread); // 启动RHI线程,直到线程返回。 FTaskGraphInterface::Get().ProcessThreadUntilRequestReturn(ENamedThreads::RHIThread); // 清理TLS. FMemory::ClearAndDisableTLSCachesOnCurrentThread(); return 0; } // 单例接口。 static FRHIThread& Get() { static FRHIThread Singleton; // 使用了局部静态变量,可以保证线程安全。 return Singleton; } };
可见RHI线程不同于渲染线程,是直接在FRHIThread对象内创建实际的线程。而FRHIThread的创建也是在StartRenderingThread()
中:
void StartRenderingThread() { (......) if (GUseRHIThread_InternalUseOnly) { FRHICommandListExecutor::GetImmediateCommandList().ImmediateFlush(EImmediateFlushType::DispatchToRHIThread); if (!FTaskGraphInterface::Get().IsThreadProcessingTasks(ENamedThreads::RHIThread)) { // 创建FRHIThread实例并启动它. FRHIThread::Get().Start(); } DECLARE_CYCLE_STAT(TEXT("Wait For RHIThread"), STAT_WaitForRHIThread, STATGROUP_TaskGraphTasks); // 创建RHI线程拥有者捕获任务, 让游戏线程等待. FGraphEventRef CompletionEvent = TGraphTask<FOwnershipOfRHIThreadTask>::CreateTask(NULL, ENamedThreads::GameThread).ConstructAndDispatchWhenReady(true, GET_STATID(STAT_WaitForRHIThread)); QUICK_SCOPE_CYCLE_COUNTER(STAT_StartRenderingThread); // 让游戏线程或局部线程等待RHI线程处理(捕获了线程拥有者, 大多数图形API为空)完毕. FTaskGraphInterface::Get().WaitUntilTaskCompletes(CompletionEvent, ENamedThreads::GameThread_Local); // 存储RHI线程id. GRHIThread_InternalUseOnly = FRHIThread::Get().Thread; check(GRHIThread_InternalUseOnly); GIsRunningRHIInDedicatedThread_InternalUseOnly = true; GIsRunningRHIInSeparateThread_InternalUseOnly = true; GRHIThreadId = GRHIThread_InternalUseOnly->GetThreadID(); GRHICommandList.LatchBypass(); } (......) }
以Fortnite(堡垒之夜)移动端为例,在开启RHI线程之前,渲染线程急剧地上下波动,而加了RHI线程之后,波动平缓许多,和游戏线程基本保持一致,帧率也提升不少:
GameThread、RenderThread、RHIThread之间的同步机制
这3个线程处理的数据通常是不同帧的,譬如GameThread处理N帧数据,RenderThread和RHIThread处理N-1帧数据。
但也存在例外,比如RenderThread和RHIThread运行很快,几乎不存在延迟,这种情况下,GameThread处理N帧,而RenderThread可能处理N或N-1帧,RHIThread也可能在转换N或N-1帧。
但是,RenderThread不能落后游戏线程一帧,否则GameThread会卡住,直到RenderThread处理所有指令。
游戏线程和渲染线程的同步
游戏线程不可能领先于渲染线程超过一帧(最多快一帧),否则游戏线程会等待渲染线程处理完。它们的同步机制涉及两个关键的概念:
// Engine\Source\Runtime\RenderCore\Public\RenderCommandFence.h // 渲染命令栅栏 class RENDERCORE_API FRenderCommandFence { public: // 向渲染命令队列增加一个栅栏. bSyncToRHIAndGPU是否同步RHI和GPU交换Buffer, 否则只等待渲染线程. void BeginFence(bool bSyncToRHIAndGPU = false); // 等待栅栏被执行. bProcessGameThreadTasks没有作用. void Wait(bool bProcessGameThreadTasks = false) const; // 是否完成了栅栏. bool IsFenceComplete() const; private: mutable FGraphEventRef CompletionEvent; // 处理完成同步的事件 ENamedThreads::Type TriggerThreadIndex; // 处理完之后需要触发的线程类型. }; // Engine\Source\Runtime\Engine\Public\UnrealEngine.h class FFrameEndSync { FRenderCommandFence Fence[2]; // 渲染栅栏对. int32 EventIndex; // 当前事件索引 public: // 同步游戏线程和渲染线程. bAllowOneFrameThreadLag是否允许渲染线程一帧的延迟. void Sync( bool bAllowOneFrameThreadLag ) { Fence[EventIndex].BeginFence(true); // 开启栅栏, 强制同步RHI和GPU交换链的. bool bEmptyGameThreadTasks = !FTaskGraphInterface::Get().IsThreadProcessingTasks(ENamedThreads::GameThread); // 保证游戏线程至少跑过一次任务. if (bEmptyGameThreadTasks) { FTaskGraphInterface::Get().ProcessThreadUntilIdle(ENamedThreads::GameThread); } // 如果允许延迟, 交换事件索引. if( bAllowOneFrameThreadLag ) { EventIndex = (EventIndex + 1) % 2; } (......) // 开启栅栏等待. Fence[EventIndex].Wait(bEmptyGameThreadTasks); } };
在FRenderCommandFence的BeginFence函数中
当GameThread与RHI线程及GPU同步时,GameThread会使用宏ENQUEUE_RENDER_COMMAND向RenderThread的TaskGraph队列中投递一个FSyncFrameCommand任务,以便将Command List同步投递到RHI线程
当GameThread与RenderThread同步时,GameThread会创建一个FNullGraphTask空任务,放到RenderThread的TaskGraph队列中让其执行
在FRenderCommandFence的Wait函数中,会检查投递给RenderThread的CompletionEvent是否被执行,如果没有执行则调用GameThreadWaitForTask函数来阻塞等待(通过Event实现)
void FRenderCommandFence::BeginFence(bool bSyncToRHIAndGPU) { if (!GIsThreadedRendering) { return; } else { // Render thread is a default trigger for the CompletionEvent TriggerThreadIndex = ENamedThreads::ActualRenderingThread; if (BundledCompletionEvent.GetReference() && IsInGameThread()) { CompletionEvent = BundledCompletionEvent; return; } int32 GTSyncType = CVarGTSyncType.GetValueOnAnyThread(); if (bSyncToRHIAndGPU) { // Don't sync to the RHI and GPU if GtSyncType is disabled, or we're not vsyncing //@TODO: do this logic in the caller? static auto CVarVsync = IConsoleManager::Get().FindConsoleVariable(TEXT("r.VSync")); // 是否开了VSync check(CVarVsync != nullptr); if ( GTSyncType == 0 || CVarVsync->GetInt() == 0 ) // r.GTSyncType为0或r.VSync为0时,GameThread不与RHI线程及GPU同步 { bSyncToRHIAndGPU = false; } } if (bSyncToRHIAndGPU) // GameThread与RHI线程及GPU同步时 { if (IsRHIThreadRunning()) { // Change trigger thread to RHI TriggerThreadIndex = ENamedThreads::RHIThread; } // Create a task graph event which we can pass to the render or RHI threads. CompletionEvent = FGraphEvent::CreateGraphEvent(); FGraphEventRef InCompletionEvent = CompletionEvent; /* ---------------------------------------------- ENQUEUE_RENDER_COMMAND宏展开后 ------------------------------------------------ struct FSyncFrameCommandName { static const char* CStr() { return "FSyncFrameCommand"; } static const TCHAR* TStr() { return L"FSyncFrameCommand"; } }; EnqueueUniqueRenderCommand<FSyncFrameCommandName>( */ ENQUEUE_RENDER_COMMAND(FSyncFrameCommand)( [InCompletionEvent, GTSyncType](FRHICommandListImmediate& RHICmdList) { if (IsRHIThreadRunning()) // 如果开启了RHI线程 { ALLOC_COMMAND_CL(RHICmdList, FRHISyncFrameCommand)(InCompletionEvent, GTSyncType); // 将创建的CompletionEvent投递到RHI线程的TaskGraph的任务队列中 RHICmdList.ImmediateFlush(EImmediateFlushType::DispatchToRHIThread); } else // 渲染线程直接执行 { FRHISyncFrameCommand Command(InCompletionEvent, GTSyncType); Command.Execute(RHICmdList); } }); } else // GameThead与RenderThread同步 { // Sync Game Thread with Render Thread only DECLARE_CYCLE_STAT(TEXT("FNullGraphTask.FenceRenderCommand"), STAT_FNullGraphTask_FenceRenderCommand, STATGROUP_TaskGraphTasks); CompletionEvent = TGraphTask<FNullGraphTask>::CreateTask(NULL, ENamedThreads::GameThread).ConstructAndDispatchWhenReady( GET_STATID(STAT_FNullGraphTask_FenceRenderCommand), ENamedThreads::GetRenderThread()); } } } /** * Waits for pending fence commands to retire. */ void FRenderCommandFence::Wait(bool bProcessGameThreadTasks) const { if (!IsFenceComplete()) { StopRenderCommandFenceBundler(); GameThreadWaitForTask(CompletionEvent, TriggerThreadIndex, bProcessGameThreadTasks); } } bool FRenderCommandFence::IsFenceComplete() const { if (!GIsThreadedRendering) { return true; } check(IsInGameThread() || IsInAsyncLoadingThread()); CheckRenderingThreadHealth(); if (!CompletionEvent.GetReference() || CompletionEvent->IsComplete()) { CompletionEvent = NULL; // this frees the handle for other uses, the NULL state is considered completed return true; } return false; }
而FFrameEndSync
的使用是在FEngineLoop::Tick
中:
// Engine\Source\Runtime\Launch\Private\LaunchEngineLoop.cpp void FEngineLoop::Tick() { (......) // 在引擎循环的帧末尾添加游戏线程和渲染线程的同步事件. { static FFrameEndSync FrameEndSync; // 局部静态变量, 线程安全. static auto CVarAllowOneFrameThreadLag = IConsoleManager::Get().FindTConsoleVariableDataInt(TEXT("r.OneFrameThreadLag")); // 同步游戏和渲染线程, 是否允许一帧的延迟可由控制台命令控制. 默认是开启的. FrameEndSync.Sync( CVarAllowOneFrameThreadLag->GetValueOnGameThread() != 0 ); } (......) }
FlushRenderingCommands
在游戏线程中调用,会阻塞游戏线程,强行等待所有的渲染线程pending render command以及RHI线程中的指令执行完,相当于一次完整地对渲染线程的同步
/ * Waits for the rendering thread to finish executing all pending rendering commands. Should only be used from the game thread. */ void FlushRenderingCommands(bool bFlushDeferredDeletes) { if (!GIsRHIInitialized) { return; } FSuspendRenderingTickables SuspendRenderingTickables; // Need to flush GT because render commands from threads other than GT are sent to // the main queue of GT when RT is disabled if (!GIsThreadedRendering && !FTaskGraphInterface::Get().IsThreadProcessingTasks(ENamedThreads::GameThread) && !FTaskGraphInterface::Get().IsThreadProcessingTasks(ENamedThreads::GameThread_Local)) { FTaskGraphInterface::Get().ProcessThreadUntilIdle(ENamedThreads::GameThread); FTaskGraphInterface::Get().ProcessThreadUntilIdle(ENamedThreads::GameThread_Local); } ENQUEUE_RENDER_COMMAND(FlushPendingDeleteRHIResourcesCmd)( [bFlushDeferredDeletes](FRHICommandListImmediate& RHICmdList) { RHICmdList.ImmediateFlush( bFlushDeferredDeletes ? EImmediateFlushType::FlushRHIThreadFlushResourcesFlushDeferredDeletes : EImmediateFlushType::FlushRHIThreadFlushResources); }); // Find the objects which may be cleaned up once the rendering thread command queue has been flushed. FPendingCleanupObjects* PendingCleanupObjects = GetPendingCleanupObjects(); // Issue a fence command to the rendering thread and wait for it to complete. FRenderCommandFence Fence; Fence.BeginFence(); // 创建一个FNullGraphTask空任务,放到RenderThread的TaskGraph队列中让其执行 Fence.Wait(); // 检查投递给RenderThread的CompletionEvent是否被执行,如果没有执行就会阻塞等待 // Delete the objects which were enqueued for deferred cleanup before the command queue flush. delete PendingCleanupObjects; }
渲染线程和RHI线程的同步
RenderThread每次在调用RenderViewFamily_RenderThread的起始处,会阻塞等待所有RHI指令处理完成,然后才开始当前帧的渲染逻辑。
FMobileSceneRender渲染管线下,RenderThread每一帧都会执行ImmediateFlush,阻塞等待RHI处理完FGraphEventRef RHIThreadTask任务,代码如下:
FDeferredShadingSceneRender渲染管线下,RenderThread每一帧都会执行ImmediateFlush,阻塞等待RHI处理完FGraphEventRef RHIThreadTask任务,代码如下:
RHI线程与GPU的同步
UE4Editor-D3D11RHI-Win64-Debug.dll!FD3D11Viewport::PresentChecked(int SyncInterval=0) Line 374 C++ UE4Editor-D3D11RHI-Win64-Debug.dll!FD3D11Viewport::Present(bool bLockToVsync=false) Line 622 C++ UE4Editor-D3D11RHI-Win64-Debug.dll!FD3D11DynamicRHI::RHIEndDrawingViewport(FRHIViewport * ViewportRHI=0x00000273786c9180, bool bPresent=true, bool bLockToVsync=false) Line 770 C++ UE4Editor-RHI-Win64-Debug.dll!FRHICommandEndDrawingViewport::Execute(FRHICommandListBase & CmdList={...}) Line 704 C++ UE4Editor-RHI-Win64-Debug.dll!FRHICommand<FRHICommandEndDrawingViewport,FRHICommandEndDrawingViewportString1847>::ExecuteAndDestruct(FRHICommandListBase & CmdList={...}, FRHICommandListDebugContext & Context={...}) Line 763 C++ UE4Editor-RHI-Win64-Debug.dll!FRHICommandListExecutor::ExecuteInner_DoExecute(FRHICommandListBase & CmdList={...}) Line 374 C++ UE4Editor-RHI-Win64-Debug.dll!FExecuteRHIThreadTask::DoTask(ENamedThreads::Type CurrentThread=RHIThread, const TRefCountPtr<FGraphEvent> & MyCompletionGraphEvent={...}) Line 429 C++ UE4Editor-RHI-Win64-Debug.dll!TGraphTask<FExecuteRHIThreadTask>::ExecuteTask(TArray<FBaseGraphTask *,TSizedDefaultAllocator<32>> & NewTasks={...}, ENamedThreads::Type CurrentThread=RHIThread) Line 888 C++ UE4Editor-Core-Win64-Debug.dll!FNamedTaskThread::ProcessTasksNamedThread(int QueueIndex=0, bool bAllowStall=true) Line 709 C++ UE4Editor-Core-Win64-Debug.dll!FNamedTaskThread::ProcessTasksUntilQuit(int QueueIndex=0) Line 601 C++ UE4Editor-Core-Win64-Debug.dll!FTaskGraphImplementation::ProcessThreadUntilRequestReturn(ENamedThreads::Type CurrentThread=RHIThread) Line 1474 C++ UE4Editor-RenderCore-Win64-Debug.dll!FRHIThread::Run() Line 320 C++ UE4Editor-Core-Win64-Debug.dll!FRunnableThreadWin::Run() Line 84 C++ UE4Editor-Core-Win64-Debug.dll!FRunnableThreadWin::GuardedRun() Line 27 C++ UE4Editor-Core-Win64-Debug.dll!FRunnableThreadWin::_ThreadProc(void * pThis=0x000002730781fc90) Line 38 C++ kernel32.dll!BaseThreadInitThunk() Unknown ntdll.dll!RtlUserThreadStart() Unknown
阻塞时的stats栈(移动端)
RHI等待eglSwapBuffers(Android移动端)
高通下调用堆栈为:
1 /system/vendor/lib64/egl/libGLESv2_adreno.so pc 0000000000128c48 (EsxContext::WriteBinScissor(EsxRenderMode, EsxBinData*, EsxRenderBucket*, int, int)+232) [arm64-v8a] 2 /system/vendor/lib64/egl/libGLESv2_adreno.so pc 0000000000131020 (EsxContext::ProcessAndSubmitRendering(EsxFlushReason)+3168) [arm64-v8a] 3 /system/vendor/lib64/egl/libGLESv2_adreno.so pc 0000000000253814 (EsxCmdMgr::Flush(EsxFlushReason)+596) [arm64-v8a] 4 /system/vendor/lib64/egl/libGLESv2_adreno.so pc 000000000012d6fc (EsxContext::SwapBuffers(EsxRect const*, unsigned int)+124) [arm64-v8a] 5 /system/vendor/lib64/egl/libGLESv2_adreno.so pc 00000000001b4d9c (EglContext::SwapBuffers(EsxRect const*, unsigned int)+92) [arm64-v8a] 6 /system/vendor/lib64/egl/libGLESv2_adreno.so pc 00000000001c4578 (EglWindowSurface::PerformContextSwapOperation(EglContext*, EsxRect const*, unsigned int, EglResourceAccessInfo*)+56) [arm64-v8a] 7 /system/vendor/lib64/egl/libGLESv2_adreno.so pc 00000000001c4678 (EglWindowSurface::SwapBuffers(EglContext*, EsxRect const*, unsigned int)+56) [arm64-v8a] 8 /system/vendor/lib64/egl/libGLESv2_adreno.so pc 00000000001b029c (EglApi::SwapBuffers(void*, void*)+156) [arm64-v8a] 9 /system/vendor/lib64/egl/libEGL_adreno.so pc 0000000000009348 (eglSwapBuffers+40) [arm64-v8a] 10 /system/lib64/libEGL.so pc 0000000000027af8 (eglSwapBuffers+168) [arm64-v8a] 11 libUE4.so pc 00000000057c01dc SwapBuffers (D:/Code\UnrealEngine\Engine\Source\Runtime\OpenGLDrv\Private\Android/AndroidOpenGLFramePacer.cpp:408) [arm64-v8a] 12 libUE4.so pc 00000000057ba094 PlatformBlitToViewport (D:/Code\UnrealEngine\Engine\Source\Runtime\OpenGLDrv\Private\Android/AndroidOpenGL.cpp:184 [Inline: SwapBuffers]) [arm64-v8a] 13 libUE4.so pc 000000000583a310 RHIEndDrawingViewport (D:/Code\UnrealEngine\Engine\Source\Runtime\OpenGLDrv\Private/OpenGLViewport.cpp:143) [arm64-v8a] 14 libUE4.so pc 0000000003246198 ExecuteInner_DoExecute (D:/Code\UnrealEngine\Engine\Source\Runtime\RHI\Private/RHICommandList.cpp:400) [arm64-v8a] 15 libUE4.so pc 000000000326934c DoTask (D:/Code\UnrealEngine\Engine\Source\Runtime\RHI\Private/RHICommandList.cpp:461) [arm64-v8a] 16 libUE4.so pc 0000000003269114 ExecuteTask (D:/Code\UnrealEngine\Engine\Source\Runtime\Core\Public\Async/TaskGraphInterfaces.h:886) [arm64-v8a] 17 libUE4.so pc 00000000029ebf2c ProcessTasksNamedThread (D:/Code\UnrealEngine\Engine\Source\Runtime\Core\Public\Async/TaskGraphInterfaces.h:524) [arm64-v8a] 18 libUE4.so pc 00000000029ebac0 ProcessTasksUntilQuit (D:/Code\UnrealEngine\Engine\Source\Runtime\Core\Private\Async/TaskGraph.cpp:600) [arm64-v8a] 19 libUE4.so pc 00000000032a7754 Run (D:/Code\UnrealEngine\Engine\Source\Runtime\RenderCore\Private/RenderingThread.cpp:319) [arm64-v8a] 20 libUE4.so pc 0000000002a575f8 Run (D:/Code\UnrealEngine\Engine\Source\Runtime\Core\Private\HAL/PThreadRunnableThread.cpp:25) [arm64-v8a] 21 libUE4.so pc 00000000029e9eb4 _ThreadProc (D:/Code\UnrealEngine\Engine\Source\Runtime\Core\Private\HAL/PThreadRunnableThread.h:185) [arm64-v8a] 22 /system/lib64/libc.so pc 0000000000018ff0 (__pthread_start(void*)+52) [arm64-v8a] 23 /system/lib64/libc.so pc 0000000000015170 (__start_thread+16) [arm64-v8a]
mali下调用堆栈为:
1 #00 pc 00000000000706f8 /apex/com.android.runtime/lib64/bionic/libc.so (0x0000007C3D7166F8 libc.so(0x00000000000AD6F8)![Unknown]() []) 2 #01 pc 000000000002404c /apex/com.android.runtime/lib64/bionic/libc.so (0x0000007C3D6CA04C libc.so(0x000000000006104C)!poll() []) 3 #02 pc 000000000000006c /system/lib64/libsync.so (0x0000007C45A9606C libsync.so(0x000000000000106C)!sync_wait() []) 4 #03 pc 00000000000027ac /system/lib64/libui.so (0x0000007C44A377AC libui.so(0x000000000002F7AC)!android::Fence::waitForever(char const*) []) 5 #04 pc 0000000000056f34 /system/lib64/libgui.so (0x0000007C4FB48F34 libgui.so(0x00000000000E1F34)!android::BufferQueueProducer::queueBuffer(int, android::IGraphicBufferProducer::QueueBufferInput const&, android::IGraphicBufferProducer::QueueBufferOutput*) []) 6 #05 pc 000000000009bcb8 /system/lib64/libgui.so (0x0000007C4FB8DCB8 libgui.so(0x0000000000126CB8)!android::Surface::queueBuffer(ANativeWindowBuffer*, int) []) 7 #06 pc 0000000000001c3c /system/lib64/libnativewindow.so (0x0000007C4E87BC3C libnativewindow.so(0x0000000000005C3C)!ANativeWindow_queueBuffer() []) 8 #07 pc 00000000000f12e8 /vendor/lib64/egl/libGLES_mali.so (0x000000797C0DF2E8 libGLES_mali.so(0x000000000083C2E8)![Unknown]() []) 9 #08 pc 0000000000109b24 /vendor/lib64/egl/libGLES_mali.so (0x000000797C0F7B24 libGLES_mali.so(0x0000000000854B24)![Unknown]() []) 10 #09 pc 0000000000108c30 /vendor/lib64/egl/libGLES_mali.so (0x000000797C0F6C30 libGLES_mali.so(0x0000000000853C30)![Unknown]() []) 11 #10 pc 000000000000c4e4 /system/lib64/libEGL.so (0x0000007C644E54E4 libEGL.so(0x00000000000204E4)![Unknown]() []) 12 #11 pc 0000000000008c44 /system/lib64/libEGL.so (0x0000007C644E1C44 libEGL.so(0x000000000001CC44)!eglSwapBuffers() []) 13 #12 pc 000000000ecf3c3c /data/app/~~LnYA_TCpurC7EQEE1aBIoA==/com.tencent.mf.mytest-k7EFP2nWw8f9fcvqfcRAdA==/lib/arm64/libUE4.so (0x0000007903290C3C libUE4.so(0x000000001DB6FC3C)!FAndroidOpenGLFramePacer::SwapBuffers(bool) []) 14 #13 pc 000000000ece5688 /data/app/~~LnYA_TCpurC7EQEE1aBIoA==/com.tencent.mf.mytest-k7EFP2nWw8f9fcvqfcRAdA==/lib/arm64/libUE4.so (0x0000007903282688 libUE4.so(0x000000001DB61688)!PlatformBlitToViewport(FPlatformOpenGLDevice*, FOpenGLViewport const&, unsigned int, unsigned int, bool, bool) []) 15 #14 pc 000000000ee2c868 /data/app/~~LnYA_TCpurC7EQEE1aBIoA==/com.tencent.mf.mytest-k7EFP2nWw8f9fcvqfcRAdA==/lib/arm64/libUE4.so (0x00000079033C9868 libUE4.so(0x000000001DCA8868)!FOpenGLDynamicRHI::RHIEndDrawingViewport(FRHIViewport*, bool, bool) []) 16 #15 pc 0000000009c82574 /data/app/~~LnYA_TCpurC7EQEE1aBIoA==/com.tencent.mf.mytest-k7EFP2nWw8f9fcvqfcRAdA==/lib/arm64/libUE4.so (0x00000078FE21F574 libUE4.so(0x0000000018AFE574)!FRHICommandEndDrawingViewport::Execute(FRHICommandListBase&) []) 17 #16 pc 0000000009c8fd64 /data/app/~~LnYA_TCpurC7EQEE1aBIoA==/com.tencent.mf.mytest-k7EFP2nWw8f9fcvqfcRAdA==/lib/arm64/libUE4.so (0x00000078FE22CD64 libUE4.so(0x0000000018B0BD64)!FRHICommandListExecutor::ExecuteInner_DoExecute(FRHICommandListBase&) []) 18 #17 pc 0000000009d189ac /data/app/~~LnYA_TCpurC7EQEE1aBIoA==/com.tencent.mf.mytest-k7EFP2nWw8f9fcvqfcRAdA==/lib/arm64/libUE4.so (0x00000078FE2B59AC libUE4.so(0x0000000018B949AC)!FExecuteRHIThreadTask::DoTask(ENamedThreads::Type, TRefCountPtr<FGraphEvent> const&) []) 19 #18 pc 0000000009d17720 /data/app/~~LnYA_TCpurC7EQEE1aBIoA==/com.tencent.mf.mytest-k7EFP2nWw8f9fcvqfcRAdA==/lib/arm64/libUE4.so (0x00000078FE2B4720 libUE4.so(0x0000000018B93720)!TGraphTask<FExecuteRHIThreadTask>::ExecuteTask(TArray<FBaseGraphTask*, TSizedDefaultAllocator<32> >&, ENamedThreads::Type) []) 20 #19 pc 00000000084d2e04 /data/app/~~LnYA_TCpurC7EQEE1aBIoA==/com.tencent.mf.mytest-k7EFP2nWw8f9fcvqfcRAdA==/lib/arm64/libUE4.so (0x00000078FCA6FE04 libUE4.so(0x000000001734EE04)!FNamedTaskThread::ProcessTasksNamedThread(int, bool) []) 21 #20 pc 00000000084cfd1c /data/app/~~LnYA_TCpurC7EQEE1aBIoA==/com.tencent.mf.mytest-k7EFP2nWw8f9fcvqfcRAdA==/lib/arm64/libUE4.so (0x00000078FCA6CD1C libUE4.so(0x000000001734BD1C)!FNamedTaskThread::ProcessTasksUntilQuit(int) []) 22 #21 pc 0000000009dc36bc /data/app/~~LnYA_TCpurC7EQEE1aBIoA==/com.tencent.mf.mytest-k7EFP2nWw8f9fcvqfcRAdA==/lib/arm64/libUE4.so (0x00000078FE3606BC libUE4.so(0x0000000018C3F6BC)!FRHIThread::Run() []) 23 #22 pc 000000000860d190 /data/app/~~LnYA_TCpurC7EQEE1aBIoA==/com.tencent.mf.mytest-k7EFP2nWw8f9fcvqfcRAdA==/lib/arm64/libUE4.so (0x00000078FCBAA190 libUE4.so(0x0000000017489190)!FRunnableThreadPThread::Run() []) 24 #23 pc 00000000084c9860 /data/app/~~LnYA_TCpurC7EQEE1aBIoA==/com.tencent.mf.mytest-k7EFP2nWw8f9fcvqfcRAdA==/lib/arm64/libUE4.so (0x00000078FCA66860 libUE4.so(0x0000000017345860)!FRunnableThreadPThread::_ThreadProc(void*) []) 25 #24 pc 000000000008526c /apex/com.android.runtime/lib64/bionic/libc.so (0x0000007C3D72B26C libc.so(0x00000000000C226C)![Unknown]() []) 26 #25 pc 0000000000017a30 /apex/com.android.runtime/lib64/bionic/libc.so (0x0000007C3D6BDA30 libc.so(0x0000000000054A30)![Unknown]() [])
注:eglSwapBuffers高一般是等待GPU完成
iOS下RenderThread等待Present 注:iOS默认不开启RHI线程
MyGame 0x0000000105ea8fd0 FMetalViewport::Present(FMetalCommandQueue&, bool) (Runtime/Apple/MetalRHI/Private/MetalViewport.cpp:573) MyGame 0x0000000105e39424 FMetalDeviceContext::EndDrawingViewport(FMetalViewport*, bool, bool) (Runtime/Apple/MetalRHI/Private/MetalContext.cpp:724) MyGame 0x0000000105ea9b6c FMetalRHIImmediateCommandContext::RHIEndDrawingViewport(FRHIViewport*, bool, bool) (Runtime/Apple/MetalRHI/Private/MetalViewport.cpp:668) MyGame 0x00000001066e0be8 FRHICommandList::EndDrawingViewport(FRHIViewport*, bool, bool) (Runtime/RHI/Public/RHICommandList.h:102) MyGame 0x00000001074e92bc FSlateRHIRenderer::DrawWindow_RenderThread(FRHICommandListImmediate&, FViewportInfo&, FSlateWindowElementList&, FSlateDrawWindowCommandParams const&) (Runtime/Core/Public/Apple/ApplePlatformTime.h:41) MyGame 0x0000000107502bdc _ZN10TGraphTaskI31TEnqueueUniqueRenderCommandTypeIZN17FSlateRHIRenderer19DrawWindows_PrivateER16FSlateDrawBufferE27SlateDrawWindowsCommandNameZNS1_19DrawWindows_PrivateES3_E4$_15EE11ExecuteTaskER6TArrayIP14FBaseGraphTask22TSizedDefaultAllocatorILi32EEEN13ENamedThreads4TypeE (Runtime/Core/Public/Async/TaskGraphInterfaces.h:891) MyGame 0x0000000105ece890 FNamedTaskThread::ProcessTasksNamedThread(int, bool) (Runtime/Core/Private/Async/TaskGraph.cpp:710) MyGame 0x0000000105ece404 FNamedTaskThread::ProcessTasksUntilQuit(int) (Runtime/Core/Private/Async/TaskGraph.cpp:601) MyGame 0x000000010671e610 RenderingThreadMain(FEvent*) (Runtime/Core/Public/Delegates/MulticastDelegateBase.h:248) MyGame 0x0000000106729d90 FRenderingThread::Run() (Runtime/RenderCore/Private/RenderingThread.cpp:532) MyGame 0x0000000105f32bec FRunnableThreadPThread::Run() (Runtime/Core/Private/HAL/PThreadRunnableThread.cpp:25) MyGame 0x0000000105f03a80 FRunnableThreadPThread::_ThreadProc(void*) (Runtime/Core/Private/HAL/PThreadRunnableThread.h:186) libsystem_pthread.dylib 0x00000001d6ca4bfc _pthread_start + 320
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· AI与.NET技术实操系列(二):开始使用ML.NET
· 记一次.NET内存居高不下排查解决与启示
· 探究高空视频全景AR技术的实现原理
· 理解Rust引用及其生命周期标识(上)
· 浏览器原生「磁吸」效果!Anchor Positioning 锚点定位神器解析
· 全程不用写代码,我用AI程序员写了一个飞机大战
· DeepSeek 开源周回顾「GitHub 热点速览」
· 记一次.NET内存居高不下排查解决与启示
· MongoDB 8.0这个新功能碉堡了,比商业数据库还牛
· .NET10 - 预览版1新功能体验(一)