可可西

UE4之RHI命令执行

多线程架构下(GameThread --  RenderThread -- RHIThread)渲染时,不会直接调用图形API的上下文的接口

而是创建一个个FRHICommand对象(构成一个链表),并赋值给FExecuteRHIThreadTask对象的FRHICommandListBase* RHICmdList

接着FExecuteRHIThreadTask对象会被压入到TaskGraph[ENamedThreads::RHIThread]的Queue队列中

RHIThread依次处理TaskGraph[ENamedThreads::RHIThread]的Queue队列中任务,执行FExecuteRHIThreadTask时

将该对象中FRHICommandListBase* RHICmdList链表中的FRHICommand转换成图形API的上下文的接口并调用执行

 

FRHICommand

FRHICommand是RHI模块的渲染指令基类,这些指令通常由渲染线程通过命令队列Push到RHI线程,在合适的时机由RHI线程执行。

FRHICommand继承自FRHICommandBase,它们的定义如下:

// Engine\Source\Runtime\RHI\Public\RHICommandList.h

// RHI命令基类.
struct FRHICommandBase
{
    // 下一个命令. (命令链表的节点)
    FRHICommandBase* Next = nullptr;
    
    // 执行命令后销毁.
    virtual void ExecuteAndDestruct(FRHICommandListBase& CmdList, FRHICommandListDebugContext& DebugContext) = 0;
};

template<typename TCmd, typename NameType = FUnnamedRhiCommand>
struct FRHICommand : public FRHICommandBase
{
    // 执行命令后销毁.
    void ExecuteAndDestruct(FRHICommandListBase& CmdList, FRHICommandListDebugContext& Context) override final
    {
        TCmd *ThisCmd = static_cast<TCmd*>(this);
        ThisCmd->Execute(CmdList);
        ThisCmd->~TCmd();
    }
};

注:FRHICommandBase有指向下一个节点的Next变量,意味着FRHICommandBase是命令链表的节点。

 

FRHICommand拥有数量众多的子类,是通过特殊的宏来快速声明:

// 定义RHI命令子类的宏,会从FRHICommand上派生
#define FRHICOMMAND_MACRO(CommandName)                                \
struct PREPROCESSOR_JOIN(CommandName##String, __LINE__)                \
{                                                                    \
    static const TCHAR* TStr() { return TEXT(#CommandName); }        \
};                                                                    \
struct CommandName final : public FRHICommand<CommandName, PREPROCESSOR_JOIN(CommandName##String, __LINE__)>

// PREPROCESSOR_JOIN宏定义如下 详见: UnrealEngine\Engine\Source\Runtime\Core\Public\HAL\PreprocessorHelpers.h // Concatenates two preprocessor tokens, performing macro expansion on them first #define PREPROCESSOR_JOIN(x, y) PREPROCESSOR_JOIN_INNER(x, y) #define PREPROCESSOR_JOIN_INNER(x, y) x##y

 

有了以上的宏,就可以快速定义FRHICommand的子类(亦即具体的RHI命令),以FRHICommandSetStencilRef为例:

FRHICOMMAND_MACRO(FRHICommandSetStencilRef)
{
    uint32 StencilRef;
    FORCEINLINE_DEBUGGABLE FRHICommandSetStencilRef(uint32 InStencilRef)
        : StencilRef(InStencilRef)
    {
    }
    RHI_API void Execute(FRHICommandListBase& CmdList);
};

展开宏定义之后,代码如下:

struct FRHICommandSetStencilRefString853
{
    static const TCHAR* TStr() { return TEXT("FRHICommandSetStencilRef"); }
};

// FRHICommandSetStencilRef继承了FRHICommand.
struct FRHICommandSetStencilRef final : public FRHICommand<FRHICommandSetStencilRef, FRHICommandSetStencilRefString853>
{
    uint32 StencilRef;
    FRHICommandSetStencilRef(uint32 InStencilRef)
        : StencilRef(InStencilRef)
    {
    }
    RHI_API void Execute(FRHICommandListBase& CmdList);
};

利用FRHICOMMAND_MACRO声明的RHI命令数量众多,下面列举其中一部分:

FRHICOMMAND_MACRO(FRHISyncFrameCommand)
FRHICOMMAND_MACRO(FRHICommandStat)
FRHICOMMAND_MACRO(FRHICommandRHIThreadFence)
FRHICOMMAND_MACRO(FRHIAsyncComputeSubmitList)
FRHICOMMAND_MACRO(FRHICommandSubmitSubList)

FRHICOMMAND_MACRO(FRHICommandWaitForAndSubmitSubListParallel)
FRHICOMMAND_MACRO(FRHICommandWaitForAndSubmitSubList)
FRHICOMMAND_MACRO(FRHICommandWaitForAndSubmitRTSubList)
FRHICOMMAND_MACRO(FRHICommandWaitForTemporalEffect)
FRHICOMMAND_MACRO(FRHICommandBroadcastTemporalEffect)
    
FRHICOMMAND_MACRO(FRHICommandBeginUpdateMultiFrameResource)
FRHICOMMAND_MACRO(FRHICommandEndUpdateMultiFrameResource)
FRHICOMMAND_MACRO(FRHICommandBeginUpdateMultiFrameUAV)
FRHICOMMAND_MACRO(FRHICommandEndUpdateMultiFrameUAV)
FRHICOMMAND_MACRO(FRHICommandSetGPUMask)

FRHICOMMAND_MACRO(FRHICommandSetStencilRef)
FRHICOMMAND_MACRO(FRHICommandSetBlendFactor)
FRHICOMMAND_MACRO(FRHICommandSetStreamSource)
FRHICOMMAND_MACRO(FRHICommandSetStreamSource)
FRHICOMMAND_MACRO(FRHICommandSetViewport)
FRHICOMMAND_MACRO(FRHICommandSetScissorRect)
    
FRHICOMMAND_MACRO(FRHICommandBeginRenderPass)
FRHICOMMAND_MACRO(FRHICommandEndRenderPass)
FRHICOMMAND_MACRO(FRHICommandNextSubpass)
FRHICOMMAND_MACRO(FRHICommandBeginParallelRenderPass)
FRHICOMMAND_MACRO(FRHICommandEndParallelRenderPass)
FRHICOMMAND_MACRO(FRHICommandBeginRenderSubPass)
FRHICOMMAND_MACRO(FRHICommandEndRenderSubPass)
    
FRHICOMMAND_MACRO(FRHICommandDrawPrimitive)
FRHICOMMAND_MACRO(FRHICommandDrawIndexedPrimitive)
FRHICOMMAND_MACRO(FRHICommandDrawPrimitiveIndirect)
FRHICOMMAND_MACRO(FRHICommandDrawIndexedIndirect)
FRHICOMMAND_MACRO(FRHICommandDrawIndexedPrimitiveIndirect)
    
FRHICOMMAND_MACRO(FRHICommandSetGraphicsPipelineState)
FRHICOMMAND_MACRO(FRHICommandBeginUAVOverlap)
FRHICOMMAND_MACRO(FRHICommandEndUAVOverlap)

FRHICOMMAND_MACRO(FRHICommandSetDepthBounds)
FRHICOMMAND_MACRO(FRHICommandSetShadingRate)
FRHICOMMAND_MACRO(FRHICommandSetShadingRateImage)
FRHICOMMAND_MACRO(FRHICommandClearUAVFloat)
FRHICOMMAND_MACRO(FRHICommandCopyToResolveTarget)
FRHICOMMAND_MACRO(FRHICommandCopyTexture)
FRHICOMMAND_MACRO(FRHICommandBeginTransitions)
FRHICOMMAND_MACRO(FRHICommandEndTransitions)
FRHICOMMAND_MACRO(FRHICommandResourceTransition)
FRHICOMMAND_MACRO(FRHICommandClearColorTexture)
FRHICOMMAND_MACRO(FRHICommandClearDepthStencilTexture)
FRHICOMMAND_MACRO(FRHICommandClearColorTextures)

FRHICOMMAND_MACRO(FRHICommandSetGlobalUniformBuffers)
FRHICOMMAND_MACRO(FRHICommandBuildLocalUniformBuffer)

FRHICOMMAND_MACRO(FRHICommandBeginRenderQuery)
FRHICOMMAND_MACRO(FRHICommandEndRenderQuery)
FRHICOMMAND_MACRO(FRHICommandPollOcclusionQueries)

FRHICOMMAND_MACRO(FRHICommandBeginScene)
FRHICOMMAND_MACRO(FRHICommandEndScene)
FRHICOMMAND_MACRO(FRHICommandBeginFrame)
FRHICOMMAND_MACRO(FRHICommandEndFrame)
FRHICOMMAND_MACRO(FRHICommandBeginDrawingViewport)
FRHICOMMAND_MACRO(FRHICommandEndDrawingViewport)

FRHICOMMAND_MACRO(FRHICommandInvalidateCachedState)
FRHICOMMAND_MACRO(FRHICommandDiscardRenderTargets)

FRHICOMMAND_MACRO(FRHICommandUpdateTextureReference)
FRHICOMMAND_MACRO(FRHICommandUpdateRHIResources)
FRHICOMMAND_MACRO(FRHICommandBackBufferWaitTrackingBeginFrame)
FRHICOMMAND_MACRO(FRHICommandFlushTextureCacheBOP)
FRHICOMMAND_MACRO(FRHICommandCopyBufferRegion)
FRHICOMMAND_MACRO(FRHICommandCopyBufferRegions)

FRHICOMMAND_MACRO(FClearCachedRenderingDataCommand)
FRHICOMMAND_MACRO(FClearCachedElementDataCommand)

FRHICOMMAND_MACRO(FRHICommandRayTraceOcclusion)
FRHICOMMAND_MACRO(FRHICommandRayTraceIntersection)
FRHICOMMAND_MACRO(FRHICommandRayTraceDispatch)
FRHICOMMAND_MACRO(FRHICommandSetRayTracingBindings)
FRHICOMMAND_MACRO(FRHICommandClearRayTracingBindings)

这些RHI命令的void Execute(FRHICommandListBase& CmdList)函数大多实现在UnrealEngine\Engine\Source\Runtime\RHI\Public\RHICommandListCommandExecutes.inl文件中

void FRHICommandDrawPrimitive::Execute(FRHICommandListBase& CmdList)的实现体如下:

void FRHICommandDrawPrimitive::Execute(FRHICommandListBase& CmdList)
{
    RHISTAT(DrawPrimitive);  // 需开启RHI_STATS宏,才能统计
    INTERNAL_DECORATOR(RHIDrawPrimitive)(BaseVertexIndex, NumPrimitives, NumInstances); //宏展开后为:CmdList.GetContext().RHIDrawPrimitive(BaseVertexIndex, NumPrimitives, NumInstances);
}

 

FRHICommand的子类除了以上用FRHICOMMAND_MACRO声明的,还拥有以下直接派生的:

FRHICommandSetShaderParameter
FRHICommandSetShaderUniformBuffer
FRHICommandSetShaderTexture
FRHICommandSetShaderResourceViewParameter
FRHICommandSetUAVParameter
FRHICommandSetShaderSampler
FRHICommandSetComputeShader
FRHICommandSetComputePipelineState
FRHICommandDispatchComputeShader
FRHICommandDispatchIndirectComputeShader
FRHICommandSetAsyncComputeBudget
FRHICommandCopyToStagingBuffer
FRHICommandWriteGPUFence
FRHICommandSetLocalUniformBuffer
FRHICommandSubmitCommandsHint
FRHICommandPushEvent
FRHICommandPopEvent
FRHICommandBuildAccelerationStructure
FRHICommandBuildAccelerationStructures
......

无论是直接派生还是用FRHICOMMAND_MACRO宏,都是FRHICommand的子类,都可以提供给渲染线程操作的RHI层渲染命令。只是用FRHICOMMAND_MACRO会更简便,少写一些重复的代码罢了。

因此可知,RHI命令种类繁多,主要包含以下几大类:

  • 数据和资源的设置、更新、清理、转换、拷贝、回读。
  • 图元绘制。
  • Pass、SubPass、场景、ViewPort等的开始和结束事件。
  • 栅栏、等待、广播接口。
  • 光线追踪。
  • Slate、调试相关的命令。

下面绘制出FRHICommand的核心继承体系:

 

FRHICommandList

FRHICommandList是RHI的指令队列,用来管理、执行一组FRHICommand的对象。它和父类的定义如下:

// Engine\Source\Runtime\RHI\Public\RHICommandList.h

// RHI命令列表基类.
class FRHICommandListBase : public FNoncopyable  // 不允许拷贝构造
{
public:
    ~FRHICommandListBase();

    // 附带了循环利用的自定义new/delete操作.
    void* operator new(size_t Size);
    void operator delete(void *RawMemory);

    // 刷新命令队列.
    inline void Flush();
    // 是否立即模式.
    inline bool IsImmediate();
    // 是否立即的异步计算.
    inline bool IsImmediateAsyncCompute();

    // 获取已占用的内存.
    const int32 GetUsedMemory() const;
    
    // 入队异步命令队列的提交.
    void QueueAsyncCommandListSubmit(FGraphEventRef& AnyThreadCompletionEvent, class FRHICommandList* CmdList);
    // 入队并行的异步命令队列的提交.
    void QueueParallelAsyncCommandListSubmit(FGraphEventRef* AnyThreadCompletionEvents, bool bIsPrepass, class FRHICommandList** CmdLists, int32* NumDrawsIfKnown, int32 Num, int32 MinDrawsPerTranslate, bool bSpewMerge);
    // 入队渲染线程命令队列的提交.
    void QueueRenderThreadCommandListSubmit(FGraphEventRef& RenderThreadCompletionEvent, class FRHICommandList* CmdList);
    // 入队命令队列的提交.
    void QueueCommandListSubmit(class FRHICommandList* CmdList);
    // 增加派发前序任务.
    void AddDispatchPrerequisite(const FGraphEventRef& Prereq);
    
    // 等待接口.
    void WaitForTasks(bool bKnownToBeComplete = false);
    void WaitForDispatch();
    void WaitForRHIThreadTasks();
    void HandleRTThreadTaskCompletion(const FGraphEventRef& MyCompletionGraphEvent);

    // 分配接口.
    void* Alloc(int32 AllocSize, int32 Alignment);
    template <typename T>
    void* Alloc();
    template <typename T>
    const TArrayView<T> AllocArray(const TArrayView<T> InArray);
    TCHAR* AllocString(const TCHAR* Name);
    // 分配指令.
    void* AllocCommand(int32 AllocSize, int32 Alignment);
    template <typename TCmd>
    void* AllocCommand();

    bool HasCommands() const;
    bool IsExecuting() const;
    bool IsBottomOfPipe() const;
    bool IsTopOfPipe() const;
    bool IsGraphics() const;
    bool IsAsyncCompute() const;
    // RHI管线, ERHIPipeline::Graphics或ERHIPipeline::AsyncCompute.
    ERHIPipeline GetPipeline() const;

    // 是否忽略RHI线程而直接当同步执行.
    bool Bypass() const;

    // 交换命令队列.
    void ExchangeCmdList(FRHICommandListBase& Other);
    // 设置Context.
    void SetContext(IRHICommandContext* InContext);
    IRHICommandContext& GetContext();
    void SetComputeContext(IRHIComputeContext* InComputeContext);
    IRHIComputeContext& GetComputeContext();
    void CopyContext(FRHICommandListBase& ParentCommandList);
    
    void MaybeDispatchToRHIThread();
    void MaybeDispatchToRHIThreadInner();
    
    (......)

private:
    // 命令链表的头.
    FRHICommandBase* Root;
    // 指向Root的指针.
    FRHICommandBase** CommandLink;
    
    bool bExecuting;
    uint32 NumCommands;
    uint32 UID;
    
    // 设备上下文.
    IRHICommandContext* Context;
    // 计算上下文.
    IRHIComputeContext* ComputeContext;
    
    FMemStackBase MemManager; 
    FGraphEventArray RTTasks;

    // 重置.
    void Reset();

public:
    enum class ERenderThreadContext
    {
        SceneRenderTargets,
        Num
    };
    
    // 渲染线程上下文.
    void *RenderThreadContexts[(int32)ERenderThreadContext::Num];

protected:
    //the values of this struct must be copied when the commandlist is split 
    struct FPSOContext
    {
        uint32 CachedNumSimultanousRenderTargets = 0;
        TStaticArray<FRHIRenderTargetView, MaxSimultaneousRenderTargets> CachedRenderTargets;
        FRHIDepthRenderTargetView CachedDepthStencilTarget;
        
        ESubpassHint SubpassHint = ESubpassHint::None;
        uint8 SubpassIndex = 0;
        uint8 MultiViewCount = 0;
        bool HasFragmentDensityAttachment = false;
    } PSOContext;

    // 绑定的着色器输入.
    FBoundShaderStateInput BoundShaderInput;
    // 绑定的计算着色器RHI资源.
    FRHIComputeShader* BoundComputeShaderRHI;

    // 使绑定的着色器生效.
    void ValidateBoundShader(FRHIVertexShader* ShaderRHI);
    void ValidateBoundShader(FRHIPixelShader* ShaderRHI);
    (......)

    void CacheActiveRenderTargets(...);
    void CacheActiveRenderTargets(const FRHIRenderPassInfo& Info);
    void IncrementSubpass();
    void ResetSubpass(ESubpassHint SubpassHint);
    
public:
    void CopyRenderThreadContexts(const FRHICommandListBase& ParentCommandList);
    void SetRenderThreadContext(void* InContext, ERenderThreadContext Slot);
    void* GetRenderThreadContext(ERenderThreadContext Slot);

    // 通用数据.
    struct FCommonData
    {
        class FRHICommandListBase* Parent = nullptr;

        enum class ECmdListType
        {
            Immediate = 1,
            Regular,
        };
        ECmdListType Type = ECmdListType::Regular;
        bool bInsideRenderPass = false;
        bool bInsideComputePass = false;
    };

    bool DoValidation() const;
    inline bool IsOutsideRenderPass() const;
    inline bool IsInsideRenderPass() const;
    inline bool IsInsideComputePass() const;

    FCommonData Data;
};

// 计算命令队列.
class FRHIComputeCommandList : public FRHICommandListBase
{
public:
    FRHIComputeCommandList(FRHIGPUMask GPUMask) : FRHICommandListBase(GPUMask) {}
    
    void* operator new(size_t Size);
    void operator delete(void *RawMemory);

    // 着色器参数设置和获取.
    inline FRHIComputeShader* GetBoundComputeShader() const;
    void SetGlobalUniformBuffers(const FUniformBufferStaticBindings& UniformBuffers);
    void SetShaderUniformBuffer(FRHIComputeShader* Shader, uint32 BaseIndex, FRHIUniformBuffer* UniformBuffer);
    void SetShaderUniformBuffer(const FComputeShaderRHIRef& Shader, uint32 BaseIndex, FRHIUniformBuffer* UniformBuffer);
    void SetShaderParameter(FRHIComputeShader* Shader, uint32 BufferIndex, uint32 BaseIndex, uint32 NumBytes, const void* NewValue);
    void SetShaderParameter(FComputeShaderRHIRef& Shader, uint32 BufferIndex, uint32 BaseIndex, uint32 NumBytes, const void* NewValue);
    void SetShaderTexture(FRHIComputeShader* Shader, uint32 TextureIndex, FRHITexture* Texture);
    void SetShaderResourceViewParameter(FRHIComputeShader* Shader, uint32 SamplerIndex, FRHIShaderResourceView* SRV);
    void SetShaderSampler(FRHIComputeShader* Shader, uint32 SamplerIndex, FRHISamplerState* State);
    void SetUAVParameter(FRHIComputeShader* Shader, uint32 UAVIndex, FRHIUnorderedAccessView* UAV);
    void SetUAVParameter(FRHIComputeShader* Shader, uint32 UAVIndex, FRHIUnorderedAccessView* UAV, uint32 InitialCount);
    void SetComputeShader(FRHIComputeShader* ComputeShader);
    void SetComputePipelineState(FComputePipelineState* ComputePipelineState, FRHIComputeShader* ComputeShader);

    void SetAsyncComputeBudget(EAsyncComputeBudget Budget);
    // 派发计算着色器.
    void DispatchComputeShader(uint32 ThreadGroupCountX, uint32 ThreadGroupCountY, uint32 ThreadGroupCountZ);
    void DispatchIndirectComputeShader(FRHIVertexBuffer* ArgumentBuffer, uint32 ArgumentOffset);

    // 清理.
    void ClearUAVFloat(FRHIUnorderedAccessView* UnorderedAccessViewRHI, const FVector4& Values);
    void ClearUAVUint(FRHIUnorderedAccessView* UnorderedAccessViewRHI, const FUintVector4& Values);
    
    // 资源转换.
    void BeginTransitions(TArrayView<const FRHITransition*> Transitions);
    void EndTransitions(TArrayView<const FRHITransition*> Transitions);
    inline void Transition(TArrayView<const FRHITransitionInfo> Infos);
    void BeginTransition(const FRHITransition* Transition);
    void EndTransition(const FRHITransition* Transition);
    void Transition(const FRHITransitionInfo& Info)

    // ---- 旧有的API ----

    void TransitionResource(ERHIAccess TransitionType, const FTextureRHIRef& InTexture);
    void TransitionResource(ERHIAccess TransitionType, FRHITexture* InTexture);
    inline void TransitionResources(ERHIAccess TransitionType, FRHITexture* const* InTextures, int32 NumTextures);
    void TransitionResourceArrayNoCopy(ERHIAccess TransitionType, TArray<FRHITexture*>& InTextures);
    inline void TransitionResources(ERHIAccess TransitionType, EResourceTransitionPipeline /* ignored TransitionPipeline */, FRHIUnorderedAccessView* const* InUAVs, int32 NumUAVs, FRHIComputeFence* WriteFence);
    void TransitionResource(ERHIAccess TransitionType, EResourceTransitionPipeline TransitionPipeline, FRHIUnorderedAccessView* InUAV, FRHIComputeFence* WriteFence);
    void TransitionResource(ERHIAccess TransitionType, EResourceTransitionPipeline TransitionPipeline, FRHIUnorderedAccessView* InUAV);
    void TransitionResources(ERHIAccess TransitionType, EResourceTransitionPipeline TransitionPipeline, FRHIUnorderedAccessView* const* InUAVs, int32 NumUAVs);
    void WaitComputeFence(FRHIComputeFence* WaitFence);

    void BeginUAVOverlap();
    void EndUAVOverlap();
    void BeginUAVOverlap(FRHIUnorderedAccessView* UAV);
    void EndUAVOverlap(FRHIUnorderedAccessView* UAV);
    void BeginUAVOverlap(TArrayView<FRHIUnorderedAccessView* const> UAVs);
    void EndUAVOverlap(TArrayView<FRHIUnorderedAccessView* const> UAVs);

    void PushEvent(const TCHAR* Name, FColor Color);
    void PopEvent();
    void BreakPoint();

    void SubmitCommandsHint();
    void CopyToStagingBuffer(FRHIVertexBuffer* SourceBuffer, FRHIStagingBuffer* DestinationStagingBuffer, uint32 Offset, uint32 NumBytes);

    void WriteGPUFence(FRHIGPUFence* Fence);
    void SetGPUMask(FRHIGPUMask InGPUMask);

    (......)
};

// RHI命令队列.
class FRHICommandList : public FRHIComputeCommandList
{
public:
    FRHICommandList(FRHIGPUMask GPUMask) : FRHIComputeCommandList(GPUMask) {}

    bool AsyncPSOCompileAllowed() const;

    void* operator new(size_t Size);
    void operator delete(void *RawMemory);
    
    // 获取绑定的着色器.
    inline FRHIVertexShader* GetBoundVertexShader() const;
    inline FRHIHullShader* GetBoundHullShader() const;
    inline FRHIDomainShader* GetBoundDomainShader() const;
    inline FRHIPixelShader* GetBoundPixelShader() const;
    inline FRHIGeometryShader* GetBoundGeometryShader() const;

    // 更新多帧资源.
    void BeginUpdateMultiFrameResource(FRHITexture* Texture);
    void EndUpdateMultiFrameResource(FRHITexture* Texture);
    void BeginUpdateMultiFrameResource(FRHIUnorderedAccessView* UAV);
    void EndUpdateMultiFrameResource(FRHIUnorderedAccessView* UAV);

    // Uniform Buffer接口.
    FLocalUniformBuffer BuildLocalUniformBuffer(const void* Contents, uint32 ContentsSize, const FRHIUniformBufferLayout& Layout);
    template <typename TRHIShader>
    void SetLocalShaderUniformBuffer(TRHIShader* Shader, uint32 BaseIndex, const FLocalUniformBuffer& UniformBuffer);
    template <typename TShaderRHI>
    void SetLocalShaderUniformBuffer(const TRefCountPtr<TShaderRHI>& Shader, uint32 BaseIndex, const FLocalUniformBuffer& UniformBuffer);
    void SetShaderUniformBuffer(FRHIGraphicsShader* Shader, uint32 BaseIndex, FRHIUniformBuffer* UniformBuffer);
    template <typename TShaderRHI>
    FORCEINLINE void SetShaderUniformBuffer(const TRefCountPtr<TShaderRHI>& Shader, uint32 BaseIndex, FRHIUniformBuffer* UniformBuffer);
    
    // 着色器参数.
    void SetShaderParameter(FRHIGraphicsShader* Shader, uint32 BufferIndex, uint32 BaseIndex, uint32 NumBytes, const void* NewValue);
    template <typename TShaderRHI>
    void SetShaderParameter(const TRefCountPtr<TShaderRHI>& Shader, uint32 BufferIndex, uint32 BaseIndex, uint32 NumBytes, const void* NewValue);
    void SetShaderTexture(FRHIGraphicsShader* Shader, uint32 TextureIndex, FRHITexture* Texture);
    template <typename TShaderRHI>
    void SetShaderTexture(const TRefCountPtr<TShaderRHI>& Shader, uint32 TextureIndex, FRHITexture* Texture);
    void SetShaderResourceViewParameter(FRHIGraphicsShader* Shader, uint32 SamplerIndex, FRHIShaderResourceView* SRV);
    template <typename TShaderRHI>
    void SetShaderResourceViewParameter(const TRefCountPtr<TShaderRHI>& Shader, uint32 SamplerIndex, FRHIShaderResourceView* SRV);
    void SetShaderSampler(FRHIGraphicsShader* Shader, uint32 SamplerIndex, FRHISamplerState* State);
    template <typename TShaderRHI>
    void SetShaderSampler(const TRefCountPtr<TShaderRHI>& Shader, uint32 SamplerIndex, FRHISamplerState* State);
    void SetUAVParameter(FRHIPixelShader* Shader, uint32 UAVIndex, FRHIUnorderedAccessView* UAV);
    void SetUAVParameter(const TRefCountPtr<FRHIPixelShader>& Shader, uint32 UAVIndex, FRHIUnorderedAccessView* UAV);
    void SetBlendFactor(const FLinearColor& BlendFactor = FLinearColor::White);
    
    // 图元绘制.
    void DrawPrimitive(uint32 BaseVertexIndex, uint32 NumPrimitives, uint32 NumInstances);
    void DrawIndexedPrimitive(FRHIIndexBuffer* IndexBuffer, int32 BaseVertexIndex, uint32 FirstInstance, uint32 NumVertices, uint32 StartIndex, uint32 NumPrimitives, uint32 NumInstances);
    void DrawPrimitiveIndirect(FRHIVertexBuffer* ArgumentBuffer, uint32 ArgumentOffset);
    void DrawIndexedIndirect(FRHIIndexBuffer* IndexBufferRHI, FRHIStructuredBuffer* ArgumentsBufferRHI, uint32 DrawArgumentsIndex, uint32 NumInstances);
    void DrawIndexedPrimitiveIndirect(FRHIIndexBuffer* IndexBuffer, FRHIVertexBuffer* ArgumentsBuffer, uint32 ArgumentOffset);
    
    // 设置数据.
    void SetStreamSource(uint32 StreamIndex, FRHIVertexBuffer* VertexBuffer, uint32 Offset);
    void SetStencilRef(uint32 StencilRef);
    void SetViewport(float MinX, float MinY, float MinZ, float MaxX, float MaxY, float MaxZ);
    void SetStereoViewport(float LeftMinX, float RightMinX, float LeftMinY, float RightMinY, float MinZ, float LeftMaxX, float RightMaxX, float LeftMaxY, float RightMaxY, float MaxZ);
    void SetScissorRect(bool bEnable, uint32 MinX, uint32 MinY, uint32 MaxX, uint32 MaxY);
    void ApplyCachedRenderTargets(FGraphicsPipelineStateInitializer& GraphicsPSOInit);
    void SetGraphicsPipelineState(class FGraphicsPipelineState* GraphicsPipelineState, const FBoundShaderStateInput& ShaderInput, bool bApplyAdditionalState);
    void SetDepthBounds(float MinDepth, float MaxDepth);
    void SetShadingRate(EVRSShadingRate ShadingRate, EVRSRateCombiner Combiner);
    void SetShadingRateImage(FRHITexture* RateImageTexture, EVRSRateCombiner Combiner);
    
    // 拷贝纹理.
    void CopyToResolveTarget(FRHITexture* SourceTextureRHI, FRHITexture* DestTextureRHI, const FResolveParams& ResolveParams);
    void CopyTexture(FRHITexture* SourceTextureRHI, FRHITexture* DestTextureRHI, const FRHICopyTextureInfo& CopyInfo);
    
    void ResummarizeHTile(FRHITexture2D* DepthTexture);
    
    // 渲染查询.
    void BeginRenderQuery(FRHIRenderQuery* RenderQuery)
    void EndRenderQuery(FRHIRenderQuery* RenderQuery)
    void CalibrateTimers(FRHITimestampCalibrationQuery* CalibrationQuery);
    void PollOcclusionQueries()

    /* LEGACY API */
    void TransitionResource(FExclusiveDepthStencil DepthStencilMode, FRHITexture* DepthTexture);
    void BeginRenderPass(const FRHIRenderPassInfo& InInfo, const TCHAR* Name);
    void EndRenderPass();
    void NextSubpass();

    // 下面接口需要在立即模式的命令队列调用.
    void BeginScene();
    void EndScene();
    void BeginDrawingViewport(FRHIViewport* Viewport, FRHITexture* RenderTargetRHI);
    void EndDrawingViewport(FRHIViewport* Viewport, bool bPresent, bool bLockToVsync);
    void BeginFrame();
    void EndFrame();

    void RHIInvalidateCachedState();
    void DiscardRenderTargets(bool Depth, bool Stencil, uint32 ColorBitMask);
    
    void CopyBufferRegion(FRHIVertexBuffer* DestBuffer, uint64 DstOffset, FRHIVertexBuffer* SourceBuffer, uint64 SrcOffset, uint64 NumBytes);

    (......)
};

 

FRHICommandListBase定义了命令队列所需的基本数据(命令列表、设备上下文)和接口(命令的刷新、等待、入队、派发等,内存分配)。

FRHIComputeCommandList定义了计算着色器相关的接口、GPU资源状态转换和着色器部分参数的设置。

FRHICommandList定义了普通渲染管线的接口,与IRHICommandContext的接口高度相似且重叠。包含VS、PS、GS的绑定,图元绘制,更多着色器参数的设置和资源状态转换,资源创建、更新和等待等等。

 

FRHICommandList还有数个子类,定义如下:

// 立即模式的命令队列.
class FRHICommandListImmediate : public FRHICommandList
{
    // 命令匿名函数.
    template <typename LAMBDA>
    struct TRHILambdaCommand final : public FRHICommandBase
    {
        LAMBDA Lambda;

        void ExecuteAndDestruct(FRHICommandListBase& CmdList, FRHICommandListDebugContext&) override final;
    };

    FRHICommandListImmediate();
    ~FRHICommandListImmediate();
    
public:
    // 立即刷新命令.
    void ImmediateFlush(EImmediateFlushType::Type FlushType);
    // 阻塞RHI线程.
    bool StallRHIThread();
    // 取消阻塞RHI线程.
    void UnStallRHIThread();
    // 是否阻塞中.
    static bool IsStalled();

    void SetCurrentStat(TStatId Stat);

    static FGraphEventRef RenderThreadTaskFence();
    static FGraphEventArray& GetRenderThreadTaskArray();
    static void WaitOnRenderThreadTaskFence(FGraphEventRef& Fence);
    static bool AnyRenderThreadTasksOutstanding();
    FGraphEventRef RHIThreadFence(bool bSetLockFence = false);

    // 将给定的异步计算命令列表按当前立即命令列表的顺序排列.
    void QueueAsyncCompute(FRHIComputeCommandList& RHIComputeCmdList);

    bool IsBottomOfPipe();
    bool IsTopOfPipe();
    template <typename LAMBDA>
    void EnqueueLambda(LAMBDA&& Lambda);

    // 资源创建.
    FSamplerStateRHIRef CreateSamplerState(const FSamplerStateInitializerRHI& Initializer)
    FRasterizerStateRHIRef CreateRasterizerState(const FRasterizerStateInitializerRHI& Initializer)
    FDepthStencilStateRHIRef CreateDepthStencilState(const FDepthStencilStateInitializerRHI& Initializer)
    FBlendStateRHIRef CreateBlendState(const FBlendStateInitializerRHI& Initializer)
    FPixelShaderRHIRef CreatePixelShader(TArrayView<const uint8> Code, const FSHAHash& Hash)
    FVertexShaderRHIRef CreateVertexShader(TArrayView<const uint8> Code, const FSHAHash& Hash)
    FHullShaderRHIRef CreateHullShader(TArrayView<const uint8> Code, const FSHAHash& Hash)
    FDomainShaderRHIRef CreateDomainShader(TArrayView<const uint8> Code, const FSHAHash& Hash)
    FGeometryShaderRHIRef CreateGeometryShader(TArrayView<const uint8> Code, const FSHAHash& Hash)
    FComputeShaderRHIRef CreateComputeShader(TArrayView<const uint8> Code, const FSHAHash& Hash)
    FComputeFenceRHIRef CreateComputeFence(const FName& Name)
    FGPUFenceRHIRef CreateGPUFence(const FName& Name)
    FStagingBufferRHIRef CreateStagingBuffer()
    FBoundShaderStateRHIRef CreateBoundShaderState(...)
    FGraphicsPipelineStateRHIRef CreateGraphicsPipelineState(const FGraphicsPipelineStateInitializer& Initializer)
    TRefCountPtr<FRHIComputePipelineState> CreateComputePipelineState(FRHIComputeShader* ComputeShader)
    FUniformBufferRHIRef CreateUniformBuffer(...)
    FIndexBufferRHIRef CreateAndLockIndexBuffer(uint32 Stride, uint32 Size, EBufferUsageFlags InUsage, ERHIAccess InResourceState, FRHIResourceCreateInfo& CreateInfo, void*& OutDataBuffer)
    FIndexBufferRHIRef CreateAndLockIndexBuffer(uint32 Stride, uint32 Size, uint32 InUsage, FRHIResourceCreateInfo& CreateInfo, void*& OutDataBuffer)
    
    // 顶点/索引接口.
    void* LockIndexBuffer(FRHIIndexBuffer* IndexBuffer, uint32 Offset, uint32 SizeRHI, EResourceLockMode LockMode);
    void UnlockIndexBuffer(FRHIIndexBuffer* IndexBuffer);
    void* LockStagingBuffer(FRHIStagingBuffer* StagingBuffer, FRHIGPUFence* Fence, uint32 Offset, uint32 SizeRHI);
    void UnlockStagingBuffer(FRHIStagingBuffer* StagingBuffer);
    FVertexBufferRHIRef CreateAndLockVertexBuffer(uint32 Size, EBufferUsageFlags InUsage, ...);
    FVertexBufferRHIRef CreateAndLockVertexBuffer(uint32 Size, uint32 InUsage, FRHIResourceCreateInfo& CreateInfo, void*& OutDataBuffer);
    void* LockVertexBuffer(FRHIVertexBuffer* VertexBuffer, uint32 Offset, uint32 SizeRHI, EResourceLockMode LockMode);
    void UnlockVertexBuffer(FRHIVertexBuffer* VertexBuffer);
    void CopyVertexBuffer(FRHIVertexBuffer* SourceBuffer, FRHIVertexBuffer* DestBuffer);
    void* LockStructuredBuffer(FRHIStructuredBuffer* StructuredBuffer, uint32 Offset, uint32 SizeRHI, EResourceLockMode LockMode);
    void UnlockStructuredBuffer(FRHIStructuredBuffer* StructuredBuffer);
    
    // UAV/SRV创建.
    FUnorderedAccessViewRHIRef CreateUnorderedAccessView(FRHIStructuredBuffer* StructuredBuffer, bool bUseUAVCounter, bool bAppendBuffer)
    FUnorderedAccessViewRHIRef CreateUnorderedAccessView(FRHITexture* Texture, uint32 MipLevel)
    FUnorderedAccessViewRHIRef CreateUnorderedAccessView(FRHITexture* Texture, uint32 MipLevel, uint8 Format)
    FUnorderedAccessViewRHIRef CreateUnorderedAccessView(FRHIVertexBuffer* VertexBuffer, uint8 Format)
    FUnorderedAccessViewRHIRef CreateUnorderedAccessView(FRHIIndexBuffer* IndexBuffer, uint8 Format)
    FShaderResourceViewRHIRef CreateShaderResourceView(FRHIStructuredBuffer* StructuredBuffer)
    FShaderResourceViewRHIRef CreateShaderResourceView(FRHIVertexBuffer* VertexBuffer, uint32 Stride, uint8 Format)
    FShaderResourceViewRHIRef CreateShaderResourceView(const FShaderResourceViewInitializer& Initializer)
    FShaderResourceViewRHIRef CreateShaderResourceView(FRHIIndexBuffer* Buffer)
        
    uint64 CalcTexture2DPlatformSize(...);
    uint64 CalcTexture3DPlatformSize(...);
    uint64 CalcTextureCubePlatformSize(...);
    
    // 纹理操作.
    void GetTextureMemoryStats(FTextureMemoryStats& OutStats);
    bool GetTextureMemoryVisualizeData(...);
    void CopySharedMips(FRHITexture2D* DestTexture2D, FRHITexture2D* SrcTexture2D);
    void TransferTexture(FRHITexture2D* Texture, FIntRect Rect, uint32 SrcGPUIndex, uint32 DestGPUIndex, bool PullData);
    void TransferTextures(const TArrayView<const FTransferTextureParams> Params);
    void GetResourceInfo(FRHITexture* Ref, FRHIResourceInfo& OutInfo);
    FShaderResourceViewRHIRef CreateShaderResourceView(FRHITexture* Texture, const FRHITextureSRVCreateInfo& CreateInfo);
    FShaderResourceViewRHIRef CreateShaderResourceView(FRHITexture* Texture, uint8 MipLevel);
    FShaderResourceViewRHIRef CreateShaderResourceView(FRHITexture* Texture, uint8 MipLevel, uint8 NumMipLevels, uint8 Format);
    FShaderResourceViewRHIRef CreateShaderResourceViewWriteMask(FRHITexture2D* Texture2DRHI);
    FShaderResourceViewRHIRef CreateShaderResourceViewFMask(FRHITexture2D* Texture2DRHI);
    uint32 ComputeMemorySize(FRHITexture* TextureRHI);
    FTexture2DRHIRef AsyncReallocateTexture2D(...);
    ETextureReallocationStatus FinalizeAsyncReallocateTexture2D(FRHITexture2D* Texture2D, bool bBlockUntilCompleted);
    ETextureReallocationStatus CancelAsyncReallocateTexture2D(FRHITexture2D* Texture2D, bool bBlockUntilCompleted);
    void* LockTexture2D(...);
    void UnlockTexture2D(FRHITexture2D* Texture, uint32 MipIndex, bool bLockWithinMiptail, bool bFlushRHIThread = true);
    void* LockTexture2DArray(...);
    void UnlockTexture2DArray(FRHITexture2DArray* Texture, uint32 TextureIndex, uint32 MipIndex, bool bLockWithinMiptail);
    void UpdateTexture2D(...);
    void UpdateFromBufferTexture2D(...);
    FUpdateTexture3DData BeginUpdateTexture3D(...);
    void EndUpdateTexture3D(FUpdateTexture3DData& UpdateData);
    void EndMultiUpdateTexture3D(TArray<FUpdateTexture3DData>& UpdateDataArray);
    void UpdateTexture3D(...);
    void* LockTextureCubeFace(...);
    void UnlockTextureCubeFace(FRHITextureCube* Texture, ...);

    // 读取纹理表面数据.
    void ReadSurfaceData(FRHITexture* Texture, ...);
    void ReadSurfaceData(FRHITexture* Texture, ...);
    void MapStagingSurface(FRHITexture* Texture, void*& OutData, int32& OutWidth, int32& OutHeight);
    void MapStagingSurface(FRHITexture* Texture, ...);
    void UnmapStagingSurface(FRHITexture* Texture);
    void ReadSurfaceFloatData(FRHITexture* Texture, ...);
    void ReadSurfaceFloatData(FRHITexture* Texture, ...);
    void Read3DSurfaceFloatData(FRHITexture* Texture,...);
    
    // 渲染线程的资源状态转换.
    void AcquireTransientResource_RenderThread(FRHITexture* Texture);
    void DiscardTransientResource_RenderThread(FRHITexture* Texture);
    void AcquireTransientResource_RenderThread(FRHIVertexBuffer* Buffer);
    void DiscardTransientResource_RenderThread(FRHIVertexBuffer* Buffer);
    void AcquireTransientResource_RenderThread(FRHIStructuredBuffer* Buffer);
    void DiscardTransientResource_RenderThread(FRHIStructuredBuffer* Buffer);
   
    // 获取渲染查询结果.
    bool GetRenderQueryResult(FRHIRenderQuery* RenderQuery, ...);
    void PollRenderQueryResults();
    
    // 视口
    FViewportRHIRef CreateViewport(void* WindowHandle, ...);
    uint32 GetViewportNextPresentGPUIndex(FRHIViewport* Viewport);
    FTexture2DRHIRef GetViewportBackBuffer(FRHIViewport* Viewport);
    void AdvanceFrameForGetViewportBackBuffer(FRHIViewport* Viewport);
    void ResizeViewport(FRHIViewport* Viewport, ...);
    
    void AcquireThreadOwnership();
    void ReleaseThreadOwnership();
    
    // 提交命令并刷新到GPU.
    void SubmitCommandsAndFlushGPU();
    // 执行命令队列.
    void ExecuteCommandList(FRHICommandList* CmdList);
    
    // 更新资源.
    void UpdateTextureReference(FRHITextureReference* TextureRef, FRHITexture* NewTexture);
    void UpdateRHIResources(FRHIResourceUpdateInfo* UpdateInfos, int32 Num, bool bNeedReleaseRefs);
    // 刷新资源.
    void FlushResources();
    
    // 帧更新.
    void Tick(float DeltaTime);
    // 阻塞直到GPU空闲. // 强制把当前的所有rhi中指令执行完毕,并且把commandbuffer发送给gpu,并且等待gpu执行完成,相当于一个强制同步到GPU的过程.
    void BlockUntilGPUIdle();
    
    // 暂停/开启渲染.
    void SuspendRendering();
    void ResumeRendering();
    bool IsRenderingSuspended();
    
    // 压缩/解压数据.
    bool EnqueueDecompress(uint8_t* SrcBuffer, uint8_t* DestBuffer, int CompressedSize, void* ErrorCodeBuffer);
    bool EnqueueCompress(uint8_t* SrcBuffer, uint8_t* DestBuffer, int UnCompressedSize, void* ErrorCodeBuffer);
    
    // 其它接口.
    bool GetAvailableResolutions(FScreenResolutionArray& Resolutions, bool bIgnoreRefreshRate);
    void GetSupportedResolution(uint32& Width, uint32& Height);
    void VirtualTextureSetFirstMipInMemory(FRHITexture2D* Texture, uint32 FirstMip);
    void VirtualTextureSetFirstMipVisible(FRHITexture2D* Texture, uint32 FirstMip);

    // 获取原生的数据.
    void* GetNativeDevice();
    void* GetNativeInstance();
    // 获取立即模式的命令上下文.
    IRHICommandContext* GetDefaultContext();
    // 获取命令上下文容器.
    IRHICommandContextContainer* GetCommandContextContainer(int32 Index, int32 Num);
    
    uint32 GetGPUFrameCycles();
};

// 在RHI实现中标记命令列表的递归使用的类型定义.
class FRHICommandList_RecursiveHazardous : public FRHICommandList
{
public:
    FRHICommandList_RecursiveHazardous(IRHICommandContext *Context, FRHIGPUMask InGPUMask = FRHIGPUMask::All());
};

// RHI内部使用的工具类,以更安全地使用FRHICommandList_RecursiveHazardous
template <typename ContextType>
class TRHICommandList_RecursiveHazardous : public FRHICommandList_RecursiveHazardous
{
    template <typename LAMBDA>
    struct TRHILambdaCommand final : public FRHICommandBase
    {
        LAMBDA Lambda;

        TRHILambdaCommand(LAMBDA&& InLambda);
        void ExecuteAndDestruct(FRHICommandListBase& CmdList, FRHICommandListDebugContext&) override final;
    };

public:
    TRHICommandList_RecursiveHazardous(ContextType *Context, FRHIGPUMask GPUMask = FRHIGPUMask::All());

    template <typename LAMBDA>
    void RunOnContext(LAMBDA&& Lambda);
};

 

在RHI内部使用TRHICommandList_RecursiveHazardous来执行Computer Shader,下文为使用其来清理某个UAV(UnorderedAccessView)为数值Values:

 

FRHICommandListImmediate封装了立即模式的图形API接口,在UE渲染体系中被应用得非常广泛。它额外定义了资源的操作、创建、更新、读取和状态转换接口,也增加了线程同步和GPU同步的接口。

 

下面对FRHICommandList核心继承体系来个UML图总结一下:

 

RHI体系总览

若抛开图形API的具体实现细节和众多的RHI具体子类,将RHI Context/CommandList/Command/Resource等的顶层概念汇总成UML关系图,则是如下模样:

 

下图是在上面的基础上细化了子类的UML:

 

RHI命令执行

FRHICommandListExecutor

FRHICommandListExecutor负责将Renderer层的RHI中间指令转译(或直接调用)到目标平台的图形API,它在RHI体系中起着举足轻重的作用,定义如下:

// Engine\Source\Runtime\RHI\Public\RHICommandList.h

class RHI_API FRHICommandListExecutor
{
public:
    enum
    {
        DefaultBypass = PLATFORM_RHITHREAD_DEFAULT_BYPASS
    };
    FRHICommandListExecutor()
        : bLatchedBypass(!!DefaultBypass)
        , bLatchedUseParallelAlgorithms(false)
    {
    }
    
    // 静态接口, 获取立即命令列表.
    static inline FRHICommandListImmediate& GetImmediateCommandList();
    // 静态接口, 获取立即异步计算命令列表.
    static inline FRHIAsyncComputeCommandListImmediate& GetImmediateAsyncComputeCommandList();

    // 执行命令列表.
    void ExecuteList(FRHICommandListBase& CmdList);
    void ExecuteList(FRHICommandListImmediate& CmdList);
    void LatchBypass();

    // 等待RHI线程栅栏.
    static void WaitOnRHIThreadFence(FGraphEventRef& Fence);

    // 是否绕过命令生成模式, 如果是, 则直接调用目标平台的图形API.
    FORCEINLINE_DEBUGGABLE bool Bypass()
    {
#if CAN_TOGGLE_COMMAND_LIST_BYPASS
        return bLatchedBypass;
#else
        return !!DefaultBypass;
#endif
    }
    // 是否使用并行算法.
    FORCEINLINE_DEBUGGABLE bool UseParallelAlgorithms()
    {
#if CAN_TOGGLE_COMMAND_LIST_BYPASS
        return bLatchedUseParallelAlgorithms;
#else
        return  FApp::ShouldUseThreadingForPerformance() && !Bypass() && (GSupportsParallelRenderingTasksWithSeparateRHIThread || !IsRunningRHIInSeparateThread());
#endif
    }
    static void CheckNoOutstandingCmdLists();
    static bool IsRHIThreadActive();
    static bool IsRHIThreadCompletelyFlushed();

private:
    // 内部执行.
    void ExecuteInner(FRHICommandListBase& CmdList);
    // 内部执行, 真正执行转译.
    static void ExecuteInner_DoExecute(FRHICommandListBase& CmdList);

    bool bLatchedBypass;
    bool bLatchedUseParallelAlgorithms;
    
    // 同步变量.
    FThreadSafeCounter UIDCounter;
    FThreadSafeCounter OutstandingCmdListCount;
    
    // 立即模式的命令队列.
    FRHICommandListImmediate CommandListImmediate;
    // 立即模式的异步计算命令队列.
    FRHIAsyncComputeCommandListImmediate AsyncComputeCmdListImmediate;
};

下面是FRHICommandListExecutor部分重要接口的实现代码:

// Engine\Source\Runtime\RHI\Private\RHICommandList.cpp

// 检测RHI线程是否激活状态.
bool FRHICommandListExecutor::IsRHIThreadActive()
{
    // 是否异步提交.
    bool bAsyncSubmit = CVarRHICmdAsyncRHIThreadDispatch.GetValueOnRenderThread() > 0;  // r.RHICmdAsyncRHIThreadDispatch
    // 1. 先检测是否存在未完成的子命令列表提交任务.
    if (bAsyncSubmit)
    {
        if (RenderThreadSublistDispatchTask.GetReference() && RenderThreadSublistDispatchTask->IsComplete())
        {
            RenderThreadSublistDispatchTask = nullptr;
        }
        if (RenderThreadSublistDispatchTask.GetReference())
        {
            return true; // it might become active at any time
        }
        // otherwise we can safely look at RHIThreadTask
    }

    // 2. 再检测是否存在未完成的RHI线程任务.
    if (RHIThreadTask.GetReference() && RHIThreadTask->IsComplete())
    {
        RHIThreadTask = nullptr;
        PrevRHIThreadTask = nullptr;
    }
    return !!RHIThreadTask.GetReference();
}

// 检测RHI线程是否完全刷新了数据.
bool FRHICommandListExecutor::IsRHIThreadCompletelyFlushed()
{
    if (IsRHIThreadActive() || GetImmediateCommandList().HasCommands())
    {
        return false;
    }
    if (RenderThreadSublistDispatchTask.GetReference() && RenderThreadSublistDispatchTask->IsComplete())
    {
#if NEEDS_DEBUG_INFO_ON_PRESENT_HANG
        bRenderThreadSublistDispatchTaskClearedOnRT = IsInActualRenderingThread();
        bRenderThreadSublistDispatchTaskClearedOnGT = IsInGameThread();
#endif
        RenderThreadSublistDispatchTask = nullptr;
    }
    return !RenderThreadSublistDispatchTask;
}

void FRHICommandListExecutor::ExecuteList(FRHICommandListImmediate& CmdList)
{
    {
        SCOPE_CYCLE_COUNTER(STAT_ImmedCmdListExecuteTime);
        ExecuteInner(CmdList);
    }
}

void FRHICommandListExecutor::ExecuteList(FRHICommandListBase& CmdList)
{
    // 执行命令队列转换之前先刷新已有的命令.
    if (IsInRenderingThread() && !GetImmediateCommandList().IsExecuting())
    {
        GetImmediateCommandList().ImmediateFlush(EImmediateFlushType::DispatchToRHIThread);
    }

    // 内部执行.
    ExecuteInner(CmdList);
}

void FRHICommandListExecutor::ExecuteInner(FRHICommandListBase& CmdList)
{
    // 是否在渲染线程中.
    bool bIsInRenderingThread = IsInRenderingThread();
    // 是否在游戏线程中.
    bool bIsInGameThread = IsInGameThread();
    
    // 开启了专用的RHI线程.
    if (IsRunningRHIInSeparateThread())
    {
        bool bAsyncSubmit = false;
        ENamedThreads::Type RenderThread_Local = ENamedThreads::GetRenderThread_Local();
        if (bIsInRenderingThread)
        {
            if (!bIsInGameThread && !FTaskGraphInterface::Get().IsThreadProcessingTasks(RenderThread_Local))
            {
                // 把所有需要传递的东西都处理掉.
                FTaskGraphInterface::Get().ProcessThreadUntilIdle(RenderThread_Local);
            }
            // 检测子命令列表任务是否完成.
            bAsyncSubmit = CVarRHICmdAsyncRHIThreadDispatch.GetValueOnRenderThread() > 0;  // r.RHICmdAsyncRHIThreadDispatch
            if (RenderThreadSublistDispatchTask.GetReference() && RenderThreadSublistDispatchTask->IsComplete())
            {
                RenderThreadSublistDispatchTask = nullptr;
                if (bAsyncSubmit && RHIThreadTask.GetReference() && RHIThreadTask->IsComplete())
                {
                    RHIThreadTask = nullptr;
                    PrevRHIThreadTask = nullptr;
                }
            }
            // 检测RHI线程任务是否完成.
            if (!bAsyncSubmit && RHIThreadTask.GetReference() && RHIThreadTask->IsComplete())
            {
                RHIThreadTask = nullptr;
                PrevRHIThreadTask = nullptr;
            }
        }
        
        if (CVarRHICmdUseThread.GetValueOnRenderThread() > 0 && bIsInRenderingThread && !bIsInGameThread) // r.RHICmdUseThread
        {
             // 交换前序和RT线程任务的列表.
            FRHICommandList* SwapCmdList;
            FGraphEventArray Prereq;
            Exchange(Prereq, CmdList.RTTasks); 
            {
                QUICK_SCOPE_CYCLE_COUNTER(STAT_FRHICommandListExecutor_SwapCmdLists);
                SwapCmdList = new FRHICommandList(CmdList.GetGPUMask());

                static_assert(sizeof(FRHICommandList) == sizeof(FRHICommandListImmediate), "We are memswapping FRHICommandList and FRHICommandListImmediate; they need to be swappable.");
                SwapCmdList->ExchangeCmdList(CmdList);
                CmdList.CopyContext(*SwapCmdList);
                CmdList.GPUMask = SwapCmdList->GPUMask;
                CmdList.InitialGPUMask = SwapCmdList->GPUMask;
                CmdList.PSOContext = SwapCmdList->PSOContext;
                CmdList.Data.bInsideRenderPass = SwapCmdList->Data.bInsideRenderPass;
                CmdList.Data.bInsideComputePass = SwapCmdList->Data.bInsideComputePass;
            }
            
            // 提交任务.
            QUICK_SCOPE_CYCLE_COUNTER(STAT_FRHICommandListExecutor_SubmitTasks);

            // 创建FDispatchRHIThreadTask, 并将AllOutstandingTasks和RenderThreadSublistDispatchTask作为它的前序任务.
            if (AllOutstandingTasks.Num() || RenderThreadSublistDispatchTask.GetReference())
            {
                Prereq.Append(AllOutstandingTasks);
                AllOutstandingTasks.Reset();
                if (RenderThreadSublistDispatchTask.GetReference())
                {
                    Prereq.Add(RenderThreadSublistDispatchTask);
                }
                RenderThreadSublistDispatchTask = TGraphTask<FDispatchRHIThreadTask>::CreateTask(&Prereq, ENamedThreads::GetRenderThread()).ConstructAndDispatchWhenReady(SwapCmdList, bAsyncSubmit);
            }
            // 创建FExecuteRHIThreadTask, 并将RHIThreadTask作为它的前序任务.
            else
            {
                if (RHIThreadTask.GetReference())
                {
                    Prereq.Add(RHIThreadTask);
                }
                PrevRHIThreadTask = RHIThreadTask;
                RHIThreadTask = TGraphTask<FExecuteRHIThreadTask>::CreateTask(&Prereq, ENamedThreads::GetRenderThread()).ConstructAndDispatchWhenReady(SwapCmdList);
            }
            
            if (CVarRHICmdForceRHIFlush.GetValueOnRenderThread() > 0 ) // r.RHICmdForceRHIFlush
            {
                // 检测渲染线程是否死锁.
                if (FTaskGraphInterface::Get().IsThreadProcessingTasks(RenderThread_Local))
                {
                    // this is a deadlock. RT tasks must be done by now or they won't be done. We could add a third queue...
                    UE_LOG(LogRHI, Fatal, TEXT("Deadlock in FRHICommandListExecutor::ExecuteInner 2."));
                }
                
                // 检测RenderThreadSublistDispatchTask是否完成.
                if (RenderThreadSublistDispatchTask.GetReference())
                {
                    FTaskGraphInterface::Get().WaitUntilTaskCompletes(RenderThreadSublistDispatchTask, RenderThread_Local);
                    RenderThreadSublistDispatchTask = nullptr;
                }
                
                // 等待RHIThreadTask完成.
                while (RHIThreadTask.GetReference())
                {
                    FTaskGraphInterface::Get().WaitUntilTaskCompletes(RHIThreadTask, RenderThread_Local);
                    if (RHIThreadTask.GetReference() && RHIThreadTask->IsComplete())
                    {
                        RHIThreadTask = nullptr;
                        PrevRHIThreadTask = nullptr;
                    }
                }
            }
            
            return;
        }
        
        // 执行RTTasks/RenderThreadSublistDispatchTask/RHIThreadTask等任务.
        if (bIsInRenderingThread)
        {
            if (CmdList.RTTasks.Num())
            {
                if (FTaskGraphInterface::Get().IsThreadProcessingTasks(RenderThread_Local))
                {
                    UE_LOG(LogRHI, Fatal, TEXT("Deadlock in FRHICommandListExecutor::ExecuteInner (RTTasks)."));
                }
                FTaskGraphInterface::Get().WaitUntilTasksComplete(CmdList.RTTasks, RenderThread_Local);
                CmdList.RTTasks.Reset();

            }
            if (RenderThreadSublistDispatchTask.GetReference())
            {
                if (FTaskGraphInterface::Get().IsThreadProcessingTasks(RenderThread_Local))
                {
                    // this is a deadlock. RT tasks must be done by now or they won't be done. We could add a third queue...
                    UE_LOG(LogRHI, Fatal, TEXT("Deadlock in FRHICommandListExecutor::ExecuteInner (RenderThreadSublistDispatchTask)."));
                }
                FTaskGraphInterface::Get().WaitUntilTaskCompletes(RenderThreadSublistDispatchTask, RenderThread_Local);
#if NEEDS_DEBUG_INFO_ON_PRESENT_HANG
                bRenderThreadSublistDispatchTaskClearedOnRT = IsInActualRenderingThread();
                bRenderThreadSublistDispatchTaskClearedOnGT = bIsInGameThread;
#endif
                RenderThreadSublistDispatchTask = nullptr;
            }
            while (RHIThreadTask.GetReference())
            {
                if (FTaskGraphInterface::Get().IsThreadProcessingTasks(RenderThread_Local))
                {
                    // this is a deadlock. RT tasks must be done by now or they won't be done. We could add a third queue...
                    UE_LOG(LogRHI, Fatal, TEXT("Deadlock in FRHICommandListExecutor::ExecuteInner (RHIThreadTask)."));
                }
                FTaskGraphInterface::Get().WaitUntilTaskCompletes(RHIThreadTask, RenderThread_Local);
                if (RHIThreadTask.GetReference() && RHIThreadTask->IsComplete())
                {
                    RHIThreadTask = nullptr;
                    PrevRHIThreadTask = nullptr;
                }
            }
        }
    }
    // 非RHI专用线程.
    else
    {
        if (bIsInRenderingThread && CmdList.RTTasks.Num())
        {
            ENamedThreads::Type RenderThread_Local = ENamedThreads::GetRenderThread_Local();
            if (FTaskGraphInterface::Get().IsThreadProcessingTasks(RenderThread_Local))
            {
                // this is a deadlock. RT tasks must be done by now or they won't be done. We could add a third queue...
                UE_LOG(LogRHI, Fatal, TEXT("Deadlock in FRHICommandListExecutor::ExecuteInner (RTTasks)."));
            }
            FTaskGraphInterface::Get().WaitUntilTasksComplete(CmdList.RTTasks, RenderThread_Local);
            CmdList.RTTasks.Reset();
        }
    }

    // 内部执行命令.
    ExecuteInner_DoExecute(CmdList);
}

void FRHICommandListExecutor::ExecuteInner_DoExecute(FRHICommandListBase& CmdList)
{
    FScopeCycleCounter ScopeOuter(CmdList.ExecuteStat);

    CmdList.bExecuting = true;
    check(CmdList.Context || CmdList.ComputeContext);

    FMemMark Mark(FMemStack::Get());

    // 设置多GPU的Mask.
#if WITH_MGPU
    if (CmdList.Context != nullptr)
    {
        CmdList.Context->RHISetGPUMask(CmdList.InitialGPUMask);
    }
    if (CmdList.ComputeContext != nullptr && CmdList.ComputeContext != CmdList.Context)
    {
        CmdList.ComputeContext->RHISetGPUMask(CmdList.InitialGPUMask);
    }
#endif

    FRHICommandListDebugContext DebugContext;
    FRHICommandListIterator Iter(CmdList);
    // 统计执行信息.
#if STATS
    bool bDoStats =  CVarRHICmdCollectRHIThreadStatsFromHighLevel.GetValueOnRenderThread() > 0 && FThreadStats::IsCollectingData() && (IsInRenderingThread() || IsInRHIThread()); //r.RHICmdCollectRHIThreadStatsFromHighLevel
    if (bDoStats)
    {
        while (Iter.HasCommandsLeft())
        {
            TStatIdData const* Stat = GCurrentExecuteStat.GetRawPointer();
            FScopeCycleCounter Scope(GCurrentExecuteStat);
            while (Iter.HasCommandsLeft() && Stat == GCurrentExecuteStat.GetRawPointer())
            {
                FRHICommandBase* Cmd = Iter.NextCommand();
                Cmd->ExecuteAndDestruct(CmdList, DebugContext);
            }
        }
    }
    else
    // 统计指定事件.
#elif ENABLE_STATNAMEDEVENTS
    bool bDoStats = CVarRHICmdCollectRHIThreadStatsFromHighLevel.GetValueOnRenderThread() > 0 && GCycleStatsShouldEmitNamedEvents && (IsInRenderingThread() || IsInRHIThread()); //r.RHICmdCollectRHIThreadStatsFromHighLevel
    if (bDoStats)
    {
        while (Iter.HasCommandsLeft())
        {
            PROFILER_CHAR const* Stat = GCurrentExecuteStat.StatString;
            FScopeCycleCounter Scope(GCurrentExecuteStat);
            while (Iter.HasCommandsLeft() && Stat == GCurrentExecuteStat.StatString)
            {
                FRHICommandBase* Cmd = Iter.NextCommand();
                Cmd->ExecuteAndDestruct(CmdList, DebugContext);
            }
        }
    }
    else
#endif
    // 不调试或不统计信息的版本.
    {
        // 循环所有命令, 执行并销毁之.
        while (Iter.HasCommandsLeft())
        {
            FRHICommandBase* Cmd = Iter.NextCommand();
            GCurrentCommand = Cmd;
            Cmd->ExecuteAndDestruct(CmdList, DebugContext);
        }
    }
    // 充值命令列表.
    CmdList.Reset();
}

由此可知,FRHICommandListExecutor处理了复杂的各类任务,并且要判定任务的前序、等待、依赖关系,还有各个线程之间的依赖和等待关系。上述代码中涉及到了两个重要的任务类型:

// 派发RHI线程任务.
class FDispatchRHIThreadTask
{
    FRHICommandListBase* RHICmdList; // 待派发的命令列表.
    bool bRHIThread; // 是否在RHI线程中派发.

public:
    FDispatchRHIThreadTask(FRHICommandListBase* InRHICmdList, bool bInRHIThread)
        : RHICmdList(InRHICmdList)
        , bRHIThread(bInRHIThread)
    {        
    }
    FORCEINLINE TStatId GetStatId() const;
    static ESubsequentsMode::Type GetSubsequentsMode() { return ESubsequentsMode::TrackSubsequents; }

    // 预期的线程由是否在RHI线程/是否在独立的RHI线程等变量决定.
    ENamedThreads::Type GetDesiredThread()
    {
        return bRHIThread ? (IsRunningRHIInDedicatedThread() ? ENamedThreads::RHIThread : CPrio_RHIThreadOnTaskThreads.Get()) : ENamedThreads::GetRenderThread_Local();
    }
    
    void DoTask(ENamedThreads::Type CurrentThread, const FGraphEventRef& MyCompletionGraphEvent)
    {
        // 前序任务是RHIThreadTask.
        FGraphEventArray Prereq;
        if (RHIThreadTask.GetReference())
        {
            Prereq.Add(RHIThreadTask);
        }
        // 将当前任务放到PrevRHIThreadTask中.
        PrevRHIThreadTask = RHIThreadTask;
        // 创建FExecuteRHIThreadTask任务并赋值到RHIThreadTask.
        RHIThreadTask = TGraphTask<FExecuteRHIThreadTask>::CreateTask(&Prereq, CurrentThread).ConstructAndDispatchWhenReady(RHICmdList);
    }
};

// 执行RHI线程任务.
class FExecuteRHIThreadTask
{
    FRHICommandListBase* RHICmdList;

public:
    FExecuteRHIThreadTask(FRHICommandListBase* InRHICmdList)
        : RHICmdList(InRHICmdList)
    {
    }

    FORCEINLINE TStatId GetStatId() const;
    static ESubsequentsMode::Type GetSubsequentsMode() { return ESubsequentsMode::TrackSubsequents; }

    // 根据是否在专用的RHI线程而选择RHI或渲染线程.
    ENamedThreads::Type GetDesiredThread()
    {
        return IsRunningRHIInDedicatedThread() ? ENamedThreads::RHIThread : CPrio_RHIThreadOnTaskThreads.Get();
    }
    
    void DoTask(ENamedThreads::Type CurrentThread, const FGraphEventRef& MyCompletionGraphEvent)
    {
        // 设置全局变量GRHIThreadId
        if (IsRunningRHIInTaskThread())
        {
            GRHIThreadId = FPlatformTLS::GetCurrentThreadId();
        }
        
        // 执行RHI命令队列.
        {
            // 临界区, 保证线程访问安全.
            FScopeLock Lock(&GRHIThreadOnTasksCritical);
            
            FRHICommandListExecutor::ExecuteInner_DoExecute(*RHICmdList);
            delete RHICmdList;
        }
        
        // 清空全局变量GRHIThreadId
        if (IsRunningRHIInTaskThread())
        {
            GRHIThreadId = 0;
        }
    }
};

由上可知,在派发和转译命令队列时,可能在专用的RHI线程执行,也可能在渲染线程或工作线程执行。

 

GRHICommandList

GRHICommandList乍一看以为是FRHICommandListBase的实例,但实际类型是FRHICommandListExecutor。它的声明和实现如下:

// Engine\Source\Runtime\RHI\Public\RHICommandList.h
extern RHI_API FRHICommandListExecutor GRHICommandList;

// Engine\Source\Runtime\RHI\Private\RHICommandList.cpp
RHI_API FRHICommandListExecutor GRHICommandList;

有关GRHICommandList的全局或静态接口如下:

FRHICommandListImmediate& FRHICommandListExecutor::GetImmediateCommandList()
{
    return GRHICommandList.CommandListImmediate;
}

FRHIAsyncComputeCommandListImmediate& FRHICommandListExecutor::GetImmediateAsyncComputeCommandList()
{
    return GRHICommandList.AsyncComputeCmdListImmediate;
}

 

在UE的渲染模块和RHI模块中拥有大量的GRHICommandList使用案例,取其中之一:

// Engine\Source\Runtime\Renderer\Private\DeferredShadingRenderer.cpp

void ServiceLocalQueue()
{
    FTaskGraphInterface::Get().ProcessThreadUntilIdle(ENamedThreads::GetRenderThread_Local());

    if (IsRunningRHIInSeparateThread())
    {
        FRHICommandListExecutor::GetImmediateCommandList().ImmediateFlush(EImmediateFlushType::DispatchToRHIThread);
    }
}

在RHI命令队列模块,除了涉及GRHICommandList,还涉及诸多全局的任务变量:

// Engine\Source\Runtime\RHI\Private\RHICommandList.cpp

static FGraphEventArray AllOutstandingTasks;
static FGraphEventArray WaitOutstandingTasks;
static FGraphEventRef RHIThreadTask;
static FGraphEventRef PrevRHIThreadTask;
static FGraphEventRef RenderThreadSublistDispatchTask;

它们的创建或添加任务的代码如下:

void FRHICommandListBase::QueueParallelAsyncCommandListSubmit(FGraphEventRef* AnyThreadCompletionEvents, ...)
{
    (......)
    
    if (Num && IsRunningRHIInSeparateThread())
    {
        (......)
            
        // 创建FParallelTranslateSetupCommandList任务.
        FGraphEventRef TranslateSetupCompletionEvent = TGraphTask<FParallelTranslateSetupCommandList>::CreateTask(&Prereq, ENamedThreads::GetRenderThread()).ConstructAndDispatchWhenReady(CmdList, &RHICmdLists[0], Num, bIsPrepass);
        QueueCommandListSubmit(CmdList);
        // 添加到AllOutstandingTasks.
        AllOutstandingTasks.Add(TranslateSetupCompletionEvent);
        
        (......)
        
        FGraphEventArray Prereq;
        FRHICommandListBase** RHICmdLists = (FRHICommandListBase**)Alloc(sizeof(FRHICommandListBase*) * (1 + Last - Start), alignof(FRHICommandListBase*));
        // 将所有外部任务AnyThreadCompletionEvents加入到对应的列表中.
        for (int32 Index = Start; Index <= Last; Index++)
        {
            FGraphEventRef& AnyThreadCompletionEvent = AnyThreadCompletionEvents[Index];
            FRHICommandList* CmdList = CmdLists[Index];
            RHICmdLists[Index - Start] = CmdList;
            if (AnyThreadCompletionEvent.GetReference())
            {
                Prereq.Add(AnyThreadCompletionEvent);
                AllOutstandingTasks.Add(AnyThreadCompletionEvent);
                WaitOutstandingTasks.Add(AnyThreadCompletionEvent);
            }
        }
        
        (......)
        
        // 并行转译任务FParallelTranslateCommandList.
        FGraphEventRef TranslateCompletionEvent = TGraphTask<FParallelTranslateCommandList>::CreateTask(&Prereq, ENamedThreads::GetRenderThread()).ConstructAndDispatchWhenReady(&RHICmdLists[0], 1 + Last - Start, ContextContainer, bIsPrepass);
        AllOutstandingTasks.Add(TranslateCompletionEvent);
        
        (......)
}
    
void FRHICommandListBase::QueueAsyncCommandListSubmit(FGraphEventRef& AnyThreadCompletionEvent, class FRHICommandList* CmdList)
{
    (......)
    
    // 处理外部任务AnyThreadCompletionEvent
    if (AnyThreadCompletionEvent.GetReference())
    {
        if (IsRunningRHIInSeparateThread())
        {
            AllOutstandingTasks.Add(AnyThreadCompletionEvent);
        }
        WaitOutstandingTasks.Add(AnyThreadCompletionEvent);
    }
    
    (......)
}
    
class FDispatchRHIThreadTask
{
    void DoTask(ENamedThreads::Type CurrentThread, const FGraphEventRef& MyCompletionGraphEvent)
    {
        (......)
        
        // 创建RHI线程任务FExecuteRHIThreadTask.
        PrevRHIThreadTask = RHIThreadTask;
        RHIThreadTask = TGraphTask<FExecuteRHIThreadTask>::CreateTask(&Prereq, CurrentThread).ConstructAndDispatchWhenReady(RHICmdList);
    }
};
    
class FParallelTranslateSetupCommandList
{
    void DoTask(ENamedThreads::Type CurrentThread, const FGraphEventRef& MyCompletionGraphEvent)
    {
        (......)

        // 创建并行转译任务FParallelTranslateCommandList.
        FGraphEventRef TranslateCompletionEvent = TGraphTask<FParallelTranslateCommandList>::CreateTask(nullptr, ENamedThreads::GetRenderThread()).ConstructAndDispatchWhenReady(&RHICmdLists[Start], 1 + Last - Start, ContextContainer, bIsPrepass);
        MyCompletionGraphEvent->DontCompleteUntil(TranslateCompletionEvent);
        // 利用RHICmdList的接口FRHICommandWaitForAndSubmitSubListParallel提交任务, 最终会进入AllOutstandingTasks和WaitOutstandingTasks.
        ALLOC_COMMAND_CL(*RHICmdList, FRHICommandWaitForAndSubmitSubListParallel)(TranslateCompletionEvent, ContextContainer, EffectiveThreads, ThreadIndex++);
    
};
    
void FRHICommandListExecutor::ExecuteInner(FRHICommandListBase& CmdList)
{
    (......)
    
    if (IsRunningRHIInSeparateThread())
    {
        (......)
        
        if (AllOutstandingTasks.Num() || RenderThreadSublistDispatchTask.GetReference())
        {
            (......)
            // 创建渲染线程子命令派发(提交)任务FDispatchRHIThreadTask.
            RenderThreadSublistDispatchTask = TGraphTask<FDispatchRHIThreadTask>::CreateTask(&Prereq, ENamedThreads::GetRenderThread()).ConstructAndDispatchWhenReady(SwapCmdList, bAsyncSubmit);
        }
        else
        {
            (......)
            PrevRHIThreadTask = RHIThreadTask;
            // 创建渲染线程子命令转译任务FExecuteRHIThreadTask.
            RHIThreadTask = TGraphTask<FExecuteRHIThreadTask>::CreateTask(&Prereq, ENamedThreads::GetRenderThread()).ConstructAndDispatchWhenReady(SwapCmdList);
        }
        
        (......)
}

总结一下这些任务变量的作用:

任务变量执行线程描述
AllOutstandingTasks 渲染、RHI、工作 所有在处理或待处理的任务列表。类型是FParallelTranslateSetupCommandList、FParallelTranslateCommandList。
WaitOutstandingTasks 渲染、RHI、工作 待处理的任务列表。类型是FParallelTranslateSetupCommandList、FParallelTranslateCommandList。
RHIThreadTask RHI、工作 正在处理的RHI线程任务。类型是FExecuteRHIThreadTask。
PrevRHIThreadTask RHI、工作 上一次处理的RHIThreadTask。类型是FExecuteRHIThreadTask。
RenderThreadSublistDispatchTask 渲染、RHI、工作 正在派发(提交)的任务。类型是FDispatchRHIThreadTask。注:与并行渲染AllOutstandingTasks、WaitOutstandingTasks有关

 

D3D11命令执行

本节将研究UE4.26在PC平台的通用RHI及D3D11命令运行过程和机制。由于UE4.26在PC平台默认的RHI是D3D11,并且关键的几个控制台变量的默认值如下:

 

也就是说开启了命令跳过模式,并且禁用了RHI线程。在此情况下,FRHICommandList的某个接口被调用时,不会生成单独的FRHICommand,而是直接调用Context的方法。以FRHICommandList::DrawPrimitive为例:

class RHI_API FRHICommandList : public FRHIComputeCommandList
{
    void DrawPrimitive(uint32 BaseVertexIndex, uint32 NumPrimitives, uint32 NumInstances)
    {
        // 默认情况下Bypass为1, 进入此分支.
        if (Bypass())
        {
            // 直接调用图形API的上下文的对应方法.
            GetContext().RHIDrawPrimitive(BaseVertexIndex, NumPrimitives, NumInstances);
            return;
        }
        
        // 分配单独的FRHICommandDrawPrimitive命令.
        ALLOC_COMMAND(FRHICommandDrawPrimitive)(BaseVertexIndex, NumPrimitives, NumInstances);

// 展开后变为:

          // new(AllocCommand(sizeof(FRHICommandDrawPrimitive), alignof(FRHICommandDrawPrimitive))) FRHICommandDrawPrimitive(
          //        BaseVertexIndex, NumPrimitives, NumInstances);

    }
}

因此,在PC的默认图形API(D3D11)下,r.RHICmdBypass 1且r.RHIThread.Enable 0,FRHICommandList将直接调用图形API的上下文的接口,相当于同步调用图形API,此时的图形API运行于渲染线程(如果开启)。

接着将r.RHICmdBypass设为0,但保持r.RHIThread.Enable为0,此时不再直接调用Context的方法,而是通过生成一条条单独的FRHICommand,然后由FRHICommandList相关的对象执行。还是以FRHICommandList::DrawPrimitive为例,调用堆栈如下所示:

class RHI_API FRHICommandList : public FRHIComputeCommandList
{
    void FRHICommandList::DrawPrimitive(uint32 BaseVertexIndex, uint32 NumPrimitives, uint32 NumInstances)
    {
        // 默认情况下Bypass为1, 进入此分支.
        if (Bypass())
        {
            // 直接调用图形API的上下文的对应方法.
            GetContext().RHIDrawPrimitive(BaseVertexIndex, NumPrimitives, NumInstances);
            return;
        }
        
        // 分配单独的FRHICommandDrawPrimitive命令.
        // ALLOC_COMMAND宏会调用AllocCommand接口.
        ALLOC_COMMAND(FRHICommandDrawPrimitive)(BaseVertexIndex, NumPrimitives, NumInstances);
    }
    
    template <typename TCmd>
    void* AllocCommand()
    {
        return AllocCommand(sizeof(TCmd), alignof(TCmd));
    }
    
    void* AllocCommand(int32 AllocSize, int32 Alignment)
    {
        FRHICommandBase* Result = (FRHICommandBase*) MemManager.Alloc(AllocSize, Alignment);
        ++NumCommands;
        // CommandLink指向了上一个命令节点的Next.
        *CommandLink = Result;
        // 将CommandLink赋值为当前节点的Next.
        CommandLink = &Result->Next;
        return Result;
    }
}

利用ALLOC_COMMAND宏分配的命令实例会进入FRHICommandListBase的命令链表,但此时并未执行,而是等待其它合适的时机执行,例如在FRHICommandListImmediate::ImmediateFlush。下面是执行FRHICommandList的调用堆栈:

 

由调用堆栈可以得知,在此情况下,命令执行的过程变得复杂起来,多了很多中间执行步骤。还是以FRHICommandList::DrawPrimitive为例,调用流程示意图如下:

 

上图的使用了宏INTERNAL_DECORATOR,其和相关宏的定义如下:

// Engine\Source\Runtime\RHI\Public\RHICommandListCommandExecutes.inl

#define INTERNAL_DECORATOR(Method) CmdList.GetContext().Method
#define INTERNAL_DECORATOR_COMPUTE(Method) CmdList.GetComputeContext().Method

相当于通过宏来调用CommandList的Context接口。

在RHI禁用(r.RHIThread.Enable 0)情况下,以上的调用在渲染线程执行:

 

接下来将r.RHIThread.Enable设为1,以开启RHI线程。此时运行命令的线程变成了RHI:

 

并且调用堆栈是从TaskGraph的RHI线程发起任务:

 

此时,命令执行的流程图如下:

上面流程图中,方角表示在渲染线程执行,而圆角在RHI线程执行(绿框中)。

 

开启RHI线程后,将出现它的统计数据:

左:未开启RHI线程的统计数据;右:开启RHI线程后的统计数据。

 

下面绘制出开启或关闭Bypass和RHI线程的流程图(以调用D3D11的DrawPrimitive为例):

上面流程图中,方角表示在渲染线程中执行,圆角表示在RHI线程中执行(绿框中)。

 

ImmediateFlush

在FDynamicRHI中,提及了刷新类型(FlushType),是指EImmediateFlushType定义的类型:

// Engine\Source\Runtime\RHI\Public\RHICommandList.h

namespace EImmediateFlushType
{
    enum Type
    { 
        WaitForOutstandingTasksOnly = 0, // 等待仅正在处理的任务完成.
        DispatchToRHIThread,             // 派发到RHI线程. 注:创建一个FExecuteRHIThreadTask任务并投递到RHIThread的TaskGraph任务队列中
        WaitForDispatchToRHIThread,      // 等待派发到RHI线程.
        FlushRHIThread,                  // 刷新RHI线程.
        FlushRHIThreadFlushResources,    // 刷新RHI线程和资源
        FlushRHIThreadFlushResourcesFlushDeferredDeletes // 刷新RHI线程/资源和延迟删除.
    };
};

EImmediateFlushType中各个值的区别在FRHICommandListImmediate::ImmediateFlush的实现代码中体现出来:

// Engine\Source\Runtime\RHI\Public\RHICommandList.inl

void FRHICommandListImmediate::ImmediateFlush(EImmediateFlushType::Type FlushType)
{
    switch (FlushType)
    {
    // 等待任务完成.
    case EImmediateFlushType::WaitForOutstandingTasksOnly:
        {
            WaitForTasks();
        }
        break;
    // 派发RHI线程(执行命令队列)
    case EImmediateFlushType::DispatchToRHIThread:
        {
            if (HasCommands())
            {
                GRHICommandList.ExecuteList(*this);
            }
        }
        break;
    // 等待RHI线程派发.
    case EImmediateFlushType::WaitForDispatchToRHIThread:
        {
            if (HasCommands())
            {
                GRHICommandList.ExecuteList(*this);
            }
            WaitForDispatch();
        }
        break;
    // 刷新RHI线程.
    case EImmediateFlushType::FlushRHIThread:
        {
            // 派发并等待RHI线程.
            if (HasCommands())
            {
                GRHICommandList.ExecuteList(*this);
            }
            WaitForDispatch();
            
            // 等待RHI线程任务.
            if (IsRunningRHIInSeparateThread())
            {
                WaitForRHIThreadTasks();
            }
            
            // 重置正在处理的任务列表.
            WaitForTasks(true);
        }
        break;
    case EImmediateFlushType::FlushRHIThreadFlushResources:
    case EImmediateFlushType::FlushRHIThreadFlushResourcesFlushDeferredDeletes:
        {
            if (HasCommands())
            {
                GRHICommandList.ExecuteList(*this);
            }
            WaitForDispatch();
            WaitForRHIThreadTasks();
            WaitForTasks(true);
            
            // 刷新管线状态缓存的资源.
            PipelineStateCache::FlushResources();
            // 刷新将要删除的资源.
            FRHIResource::FlushPendingDeletes(FlushType == EImmediateFlushType::FlushRHIThreadFlushResourcesFlushDeferredDeletes);
        }
        break;
    }
}

上面代码中涉及到了若干种处理和等待任务的接口,它们的实现如下:

// 等待任务完成.
void FRHICommandListBase::WaitForTasks(bool bKnownToBeComplete)
{
    if (WaitOutstandingTasks.Num())
    {
        // 检测是否存在未完成的等待任务.
        bool bAny = false;
        for (int32 Index = 0; Index < WaitOutstandingTasks.Num(); Index++)
        {
            if (!WaitOutstandingTasks[Index]->IsComplete())
            {
                bAny = true;
                break;
            }
        }
        // 存在就利用TaskGraph的接口开启线程等待.
        if (bAny)
        {
            ENamedThreads::Type RenderThread_Local = ENamedThreads::GetRenderThread_Local();
            FTaskGraphInterface::Get().WaitUntilTasksComplete(WaitOutstandingTasks, RenderThread_Local);
        }
        // 重置等待任务列表.
        WaitOutstandingTasks.Reset();
    }
}

// 等待渲染线程派发完成.
void FRHICommandListBase::WaitForDispatch()
{
    // 如果RenderThreadSublistDispatchTask已完成, 则置空.
    if (RenderThreadSublistDispatchTask.GetReference() && RenderThreadSublistDispatchTask->IsComplete())
    {
        RenderThreadSublistDispatchTask = nullptr;
    }
    
    // RenderThreadSublistDispatchTask有未完成的任务.
    while (RenderThreadSublistDispatchTask.GetReference())
    {
        ENamedThreads::Type RenderThread_Local = ENamedThreads::GetRenderThread_Local();
        FTaskGraphInterface::Get().WaitUntilTaskCompletes(RenderThreadSublistDispatchTask, RenderThread_Local);
        if (RenderThreadSublistDispatchTask.GetReference() && RenderThreadSublistDispatchTask->IsComplete())
        {
            RenderThreadSublistDispatchTask = nullptr;
        }
    }
}

// 等待RHI线程任务完成.
void FRHICommandListBase::WaitForRHIThreadTasks()
{
    bool bAsyncSubmit = CVarRHICmdAsyncRHIThreadDispatch.GetValueOnRenderThread() > 0; //r.RHICmdAsyncRHIThreadDispatch
    ENamedThreads::Type RenderThread_Local = ENamedThreads::GetRenderThread_Local();
    
    // 相当于执行FRHICommandListBase::WaitForDispatch()
    if (bAsyncSubmit)
    {
        if (RenderThreadSublistDispatchTask.GetReference() && RenderThreadSublistDispatchTask->IsComplete())
        {
            RenderThreadSublistDispatchTask = nullptr;
        }
        while (RenderThreadSublistDispatchTask.GetReference())
        {
            if (FTaskGraphInterface::Get().IsThreadProcessingTasks(RenderThread_Local))
            {
                while (!RenderThreadSublistDispatchTask->IsComplete())
                {
                    FPlatformProcess::SleepNoStats(0);
                }
            }
            else
            {
                FTaskGraphInterface::Get().WaitUntilTaskCompletes(RenderThreadSublistDispatchTask, RenderThread_Local);
            }
            
            if (RenderThreadSublistDispatchTask.GetReference() && RenderThreadSublistDispatchTask->IsComplete())
            {
                RenderThreadSublistDispatchTask = nullptr;
            }
        }
        // now we can safely look at RHIThreadTask
    }
    
    // 如果RHI线程任务已完成, 则置空任务.
    if (RHIThreadTask.GetReference() && RHIThreadTask->IsComplete())
    {
        RHIThreadTask = nullptr;
        PrevRHIThreadTask = nullptr;
    }
    
    // 如果RHI线程有任务未完成, 则执行并等待.
    while (RHIThreadTask.GetReference())
    {
        // 如果已在处理, 则用sleep(0)跳过此时间片.
        if (FTaskGraphInterface::Get().IsThreadProcessingTasks(RenderThread_Local))
        {
            while (!RHIThreadTask->IsComplete())
            {
                FPlatformProcess::SleepNoStats(0);
            }
        }
        // 任务尚未处理, 开始并等待之.
        else
        {
            FTaskGraphInterface::Get().WaitUntilTaskCompletes(RHIThreadTask, RenderThread_Local);
        }
        
        // 如果RHI线程任务已完成, 则置空任务.
        if (RHIThreadTask.GetReference() && RHIThreadTask->IsComplete())
        {
            RHIThreadTask = nullptr;
            PrevRHIThreadTask = nullptr;
        }
    }
}

 

RHI控制台变量

前面章节的代码也显示RHI体系涉及的控制台变量非常多,下面列出部分控制台变量,以便调试、优化RHI渲染效果或效率:

名称描述
r.RHI.Name 显示当前RHI的名字,如D3D11。
r.RHICmdAsyncRHIThreadDispatch 实验选项,是否执行RHI调度异步。可使数据更快地刷新到RHI线程,避免帧末尾出现卡顿。
r.RHICmdBalanceParallelLists 允许启用DrawList的预处理,以尝试在命令列表之间均衡负载。0:关闭,1:开启,2:实验选项,使用上一帧的结果(在分屏等不做任何事情)。
r.RHICmdBalanceTranslatesAfterTasks 实验选项,平衡并行翻译后的渲染任务完成。可最小化延迟上下文的数量,但会增加启动转译的延迟。
r.RHICmdBufferWriteLocks 仅与RHI线程相关。用于诊断缓冲锁问题的调试选项。
r.RHICmdBypass 是否绕过RHI命令列表,立即发送RHI命令。0:禁用(需开启多线程渲染),1:开启。
r.RHICmdCollectRHIThreadStatsFromHighLevel 这将在执行的RHI线程上推送统计信息,这样就可以确定它们来自哪个高层级的Pass。对帧速率有不利影响。默认开启。
r.RHICmdFlushOnQueueParallelSubmit 在提交后立即等待并行命令列表的完成。问题诊断。只适用于部分RHI。
r.RHICmdFlushRenderThreadTasks 如果为真,则每次调用时都刷新渲染线程任务。问题诊断。这是一个更细粒度cvars的主开关。
r.RHICmdForceRHIFlush 对每个任务强制刷新发送给RHI线程。问题诊断。
r.RHICmdMergeSmallDeferredContexts 合并小的并行转译任务,基于r.RHICmdMinDrawsPerParallelCmdList。
r.RHICmdUseDeferredContexts 使用延迟上下文并行执行命令列表。只适用于部分RHI。
r.RHICmdUseParallelAlgorithms True使用并行算法。如果r.RHICmdBypass为1则忽略。
r.RHICmdUseThread 使用RHI线程。问题诊断。
r.RHICmdWidth 控制并行渲染器中大量事物的任务粒度。
r.RHIThread.Enable 启用/禁用RHI线程,并确定RHI工作是否在专用线程上运行。
RHI.GPUHitchThreshold GPU上检测卡顿的阈值(毫秒)。
RHI.MaximumFrameLatency 可以排队进行渲染的帧数。
RHI.SyncThreshold 在垂直同步功能启用前的连续“快速”帧数。
RHI.TargetRefreshRate 如果非零,则显示的更新频率永远不会超过目标刷新率(以Hz为单位)。

注:以上只列出部分RHI相关的变量,还有很多未列出。

 

参考

剖析虚幻渲染体系(10)- RHI

 

posted on 2021-12-16 23:14  可可西  阅读(4445)  评论(0编辑  收藏  举报

导航