UE4 stats性能埋点
某个Stats文件所统计到大类(Group Name)如下:
某个Stats文件Group Name为Memory的所统计到细项如下:
统计项类型:
为int或float数字类型 // 用于Stat HUD展示,如下图所示
Stat GPU // 显示帧的GPU统计数据 注:android平台上没有输出
为Memory类型 // 用于Stat HUD展示,如下图所示
Stat Memory // 显示有关虚幻引擎中各个子系统使用多少内存的统计数据
为hierarchy类别,可以嵌套子节点,包含CallCount、InclusiveTime、ExclusiveTime等字段 // 用于Stat HUD展示和Profiler工具CallStack展示 如下图所示
Stat Component // 显示组件列表及组件性能信息
Profiler工具CallStack展示
本文重点讲述如何用自定义hierarchy类别来埋点,并在Profiler工具的CallStack树中查看数据。
定义分组
DECLARE_STATS_GROUP(TEXT("AI"),STATGROUP_AI, STATCAT_Advanced); // 3个参数分别对应Description、GroupName、GroupCategory
// 展开后为如下结构体: struct FStatGroup_STATGROUP_AI { enum { DefaultEnable = true, CompileTimeEnable = true, SortByName = false }; static __forceinline const char* GetGroupName() { return "STATGROUP_AI"; } static __forceinline const char* GetGroupCategory() { return "STATCAT_Advanced"; } static __forceinline const TCHAR* GetDescription() { return L"AI"; } static __forceinline bool IsDefaultEnabled() { return (bool)DefaultEnable; } static __forceinline bool IsCompileTimeEnable() { return (bool)CompileTimeEnable; } static __forceinline bool GetSortByName() { return (bool)SortByName; } };;;
DECLARE_STATS_GROUP_VERBOSE(TEXT("LoadTimeVerbose"), STATGROUP_LoadTimeVerbose, STATCAT_Advanced); // 缺省不开启统计
// 展开后为如下结构体: struct FStatGroup_STATGROUP_LoadTimeVerbose { enum { DefaultEnable = false, CompileTimeEnable = true, SortByName = false }; static __forceinline const char* GetGroupName() { return "STATGROUP_LoadTimeVerbose"; } static __forceinline const char* GetGroupCategory() { return "STATCAT_Advanced"; } static __forceinline const TCHAR* GetDescription() { return L"LoadTimeVerbose"; } static __forceinline bool IsDefaultEnabled() { return (bool)DefaultEnable; } static __forceinline bool IsCompileTimeEnable() { return (bool)CompileTimeEnable; } static __forceinline bool GetSortByName() { return (bool)SortByName; } };;;
DECLARE_STATS_GROUP_SORTBYNAME(TEXT("Streaming Overview"),STATGROUP_StreamingOverview, STATCAT_Advanced); // 会进行排序,消耗会大一些
// 展开后为如下结构体: struct FStatGroup_STATGROUP_StreamingOverview { enum { DefaultEnable = true, CompileTimeEnable = true, SortByName = true }; static __forceinline const char* GetGroupName() { return "STATGROUP_StreamingOverview"; } static __forceinline const char* GetGroupCategory() { return "STATCAT_Advanced"; } static __forceinline const TCHAR* GetDescription() { return L"Streaming Overview"; } static __forceinline bool IsDefaultEnabled() { return (bool)DefaultEnable; } static __forceinline bool IsCompileTimeEnable() { return (bool)CompileTimeEnable; } static __forceinline bool GetSortByName() { return (bool)SortByName; } };;;
DECLARE_STATS_GROUP_MAYBE_COMPILED_OUT(TEXT("SlateVeryVerbose"), STATGROUP_SlateVeryVerbose, STATCAT_Advanced, WITH_VERY_VERBOSE_SLATE_STATS); // 缺省不开启统计 宏WITH_VERY_VERBOSE_SLATE_STATS为0:表示不编译该统计项的逻辑
// 展开后为如下结构体: struct FStatGroup_STATGROUP_SlateVeryVerbose { enum { DefaultEnable = false, CompileTimeEnable = 0, SortByName = false }; static __forceinline const char* GetGroupName() { return "STATGROUP_SlateVeryVerbose"; } static __forceinline const char* GetGroupCategory() { return "STATCAT_Advanced"; } static __forceinline const TCHAR* GetDescription() { return L"SlateVeryVerbose"; } static __forceinline bool IsDefaultEnabled() { return (bool)DefaultEnable; } static __forceinline bool IsCompileTimeEnable() { return (bool)CompileTimeEnable; } static __forceinline bool GetSortByName() { return (bool)SortByName; } };;;
定义埋点方式1
在cpp代码的全局区域,定义埋点结构体类型和static全局变量
DECLARE_CYCLE_STAT(TEXT("Test1"), STAT_Test1, STATGROUP_TestGroup); // 3个参数分别对应Description、埋点结构体类型、GroupName
// 宏展开后为: struct FStat_STAT_Test1 { typedef FStatGroup_STATGROUP_TestGroup TGroup; static __forceinline const char* GetStatName() { return "STAT_Test1"; } static __forceinline const TCHAR* GetDescription() { return L"Test1"; } static __forceinline EStatDataType::Type GetStatType() { return EStatDataType::ST_int64; } static __forceinline bool IsClearEveryFrame() { return true; } static __forceinline bool IsCycleStat() { return true; } static __forceinline FPlatformMemory::EMemoryCounterRegion GetMemoryRegion() { return FPlatformMemory::MCR_Invalid; } };; static struct FThreadSafeStaticStat<FStat_STAT_Test1> StatPtr_STAT_Test1;;
在函数中插入埋点
DECLARE_STATS_GROUP(TEXT("TestGroupDesc"), STATGROUP_TestGroup, STATCAT_Advanced); // 定义名为TestGroup的分组
DECLARE_CYCLE_STAT(TEXT("Test1"), STAT_Test1, STATGROUP_TestGroup); // 定义Test1的埋点类型与static埋点变量,并放在TestGroup分组中 DECLARE_CYCLE_STAT(TEXT("Test2"), STAT_Test2, STATGROUP_TestGroup); // 定义Test2的埋点类型与static埋点变量,并放在TestGroup分组中 DECLARE_CYCLE_STAT(TEXT("Test3"), STAT_Test3, STATGROUP_TestGroup); // 定义Test3的埋点类型与static埋点变量,并放在TestGroup分组中 DECLARE_CYCLE_STAT(TEXT("Test4"), STAT_Test4, STATGROUP_TestGroup); // 定义Test4的埋点类型与static埋点变量,并放在TestGroup分组中 DECLARE_CYCLE_STAT(TEXT("Test5"), STAT_Test5, STATGROUP_TestGroup); // 定义Test5的埋点类型与static埋点变量,并放在TestGroup分组中 DECLARE_CYCLE_STAT(TEXT("Test6"), STAT_Test6, STATGROUP_TestGroup); // 定义Test6的埋点类型与static埋点变量,并放在TestGroup分组中 // LoopCall(1)在我的电脑耗时约为16ms #define LoopCall(n) \ { \ uint64 sum = 1; \ for (int32 i = 1; i < 10000000*n; i++) \ { \ sum *= i; \ } \ } void AMyTest1Character::StatTest() { SCOPE_CYCLE_COUNTER(STAT_Test1); // 宏展开后的代码为:FScopeCycleCounter CycleCount_STAT_Test1((StatPtr_STAT_Test1.GetStatId()));; FPlatformProcess::Sleep(0.002); // 统计到CPU Stall - Sleep
// 条件埋点: 条件成立时,才会埋点成功
int a = 100; CONDITIONAL_SCOPE_CYCLE_COUNTER(STAT_Test2, a > 50); // 宏展开后的代码为:FScopeCycleCounter CycleCount_STAT_Test2(a > 50 ? (StatPtr_STAT_Test2.GetStatId()) : TStatId());; LoopCall(1); SCOPE_CYCLE_COUNTER(STAT_Test3); // 宏展开后的代码为:FScopeCycleCounter CycleCount_STAT_Test3((StatPtr_STAT_Test3.GetStatId()));; FPlatformProcess::Sleep(0.005); // 统计到CPU Stall - Sleep { SCOPE_CYCLE_COUNTER(STAT_Test6); // 宏展开后的代码为:FScopeCycleCounter CycleCount_STAT_Test6((StatPtr_STAT_Test6.GetStatId()));; } FPlatformProcess::Sleep(0.003); // 统计到CPU Stall - Sleep SCOPE_CYCLE_COUNTER(STAT_Test4); // 宏展开后的代码为:FScopeCycleCounter CycleCount_STAT_Test4((StatPtr_STAT_Test4.GetStatId()));; { SCOPE_CYCLE_COUNTER(STAT_Test5); // 宏展开后的代码为:FScopeCycleCounter CycleCount_STAT_Test5((StatPtr_STAT_Test5.GetStatId()));; LoopCall(2); } { SCOPE_CYCLE_COUNTER(STAT_Test5); // 宏展开后的代码为:FScopeCycleCounter CycleCount_STAT_Test5((StatPtr_STAT_Test5.GetStatId()));; FPlatformProcess::SleepNoStats(0.5); // 会被统计到当前作用域埋点的IncTime中 } SCOPE_CYCLE_COUNTER(STAT_Test6); // 宏展开后的代码为:FScopeCycleCounter CycleCount_STAT_Test6((StatPtr_STAT_Test6.GetStatId()));; }
定义埋点方式2
相比方式1,该方式不需要提前定义埋点类型,比较方便
DECLARE_SCOPE_CYCLE_COUNTER(TEXT("UnhashUnreachableObjects"), STAT_UnhashUnreachableObjects, STATGROUP_GC)展开为如下代码:
struct FStat_STAT_UnhashUnreachableObjects { typedef FStatGroup_STATGROUP_GC TGroup; static __forceinline const char* GetStatName() { return "STAT_UnhashUnreachableObjects"; } static __forceinline const TCHAR* GetDescription() { return L"UnhashUnreachableObjects"; } static __forceinline EStatDataType::Type GetStatType() { return EStatDataType::ST_int64; } static __forceinline bool IsClearEveryFrame() { return true; } static __forceinline bool IsCycleStat() { return true; } static __forceinline FPlatformMemory::EMemoryCounterRegion GetMemoryRegion() { return FPlatformMemory::MCR_Invalid; } };; static struct FThreadSafeStaticStat<FStat_STAT_UnhashUnreachableObjects> StatPtr_STAT_UnhashUnreachableObjects; FScopeCycleCounter CycleCount_STAT_UnhashUnreachableObjects((StatPtr_STAT_UnhashUnreachableObjects.GetStatId()));;
在函数中,定义埋点结构体类型和static局部变量,并插入埋点
bool UnhashUnreachableObjects(bool bUseTimeLimit, float TimeLimit) { DECLARE_SCOPE_CYCLE_COUNTER(TEXT("UnhashUnreachableObjects"), STAT_UnhashUnreachableObjects, STATGROUP_GC); // ... ... }
定义埋点方式3
在DECLARE_SCOPE_CYCLE_COUNTER基础上封装,放到Quick分组下,更易于使用
QUICK_SCOPE_CYCLE_COUNTER(STAT_QuickTest1); // 即:DECLARE_SCOPE_CYCLE_COUNTER(TEXT("STAT_QuickTest1"),STAT_QuickTest1,STATGROUP_Quick)
// 用宏QUICK_SCOPE_CYCLE_COUNTER定义的埋点会放到GroupName为FStatGroup_STATGROUP_Quick下
QUICK_SCOPE_CYCLE_COUNTER(STAT_QuickTest1)展开为:
struct FStat_STAT_QuickTest1 { typedef FStatGroup_STATGROUP_Quick TGroup; static __forceinline const char* GetStatName() { return "STAT_QuickTest1"; } static __forceinline const TCHAR* GetDescription() { return L"STAT_QuickTest1"; } static __forceinline EStatDataType::Type GetStatType() { return EStatDataType::ST_int64; } static __forceinline bool IsClearEveryFrame() { return true; } static __forceinline bool IsCycleStat() { return true; } static __forceinline FPlatformMemory::EMemoryCounterRegion GetMemoryRegion() { return FPlatformMemory::MCR_Invalid; } };; static struct FThreadSafeStaticStat<FStat_STAT_QuickTest1> StatPtr_STAT_QuickTest1; FScopeCycleCounter CycleCount_STAT_QuickTest1((StatPtr_STAT_QuickTest1.GetStatId()));;
在函数中,定义埋点结构体类型和static局部变量,并插入埋点
void AMyTest1Character::StatTest() { QUICK_SCOPE_CYCLE_COUNTER(STAT_QuickTest1); // ... ... }
对UObject对象埋点
DECLARE_STATS_GROUP(TEXT("TestGroupDesc"), STATGROUP_TestGroup, STATCAT_Advanced); // 定义名为TestGroup的分组 DECLARE_CYCLE_STAT(TEXT("ObjTest1"), STAT_ObjTest1, STATGROUP_TestGroup); // 定义ObjTest1的埋点类型与static埋点变量,并放在TestGroup分组中 DECLARE_CYCLE_STAT(TEXT("ObjTest2"), STAT_ObjTest2, STATGROUP_TestGroup); // 定义ObjTest2的埋点类型与static埋点变量,并放在TestGroup分组中 void AMyTest1Character::StatTest() { FString MyBPObjectPath = TEXT("/Game/ThirdPersonCPP/Blueprints/MyBlueprintObject.MyBlueprintObject_C"); UClass* MyBPObjectClass = LoadClass<UObject>(nullptr, *MyBPObjectPath); UMyBPObject* BPObj1 = NewObject<UMyBPObject>(this, MyBPObjectClass); FString TexturePath1 = TEXT("/Engine/EngineMaterials/DefaultDiffuse_TC_Masks"); UTexture2D* TextureObj1 = LoadObject<UTexture2D>(nullptr, *TexturePath1); { FScopeCycleCounterUObject ObjScope(MyBPObjectClass); LoopCall(1); FPlatformProcess::Sleep(0.002); { FScopeCycleCounterUObject ObjScope2(MyBPObjectClass); LoopCall(1); {
// 通过GET_STATID宏来获取,会被展开为:(StatPtr_STAT_ObjTest1.GetStatId())
TStatId StatBPObj1 = GET_STATID(STAT_ObjTest1); FScopeCycleCounterUObject ObjScope3(BPObj1, StatBPObj1); FPlatformProcess::SleepNoStats(0.5); } {
// 通过GET_STATID宏来获取,会被展开为:(StatPtr_STAT_ObjTest2.GetStatId()) FScopeCycleCounterUObject ObjScope4(this, GET_STATID(STAT_ObjTest2)); FPlatformProcess::Sleep(0.003); } } LoopCall(2); // 动态创建TStatId对象 TStatId StatObjTest3 = FDynamicStats::CreateStatId<FStatGroup_STATGROUP_TestGroup>(FString(TEXT("ObjTest3"))); FScopeCycleCounterUObject ObjScope3(MyBPObjectClass, StatObjTest3); } }
即使对UObject指定了埋点类型,但对UObject的统计会放到STATGROUP_UObjects分组中,如下所示:
Tickable对象的Tick耗时
UCLASS() class UMyBPObject : public UObject, public FTickableGameObject { GENERATED_BODY() public: UMyBPObject(); ~UMyBPObject(); virtual TStatId GetStatId() const override { RETURN_QUICK_DECLARE_CYCLE_STAT(MyBPObject, STATGROUP_Tickables); // 如果不希望被统计,直接返回return TStatId();即可 } virtual bool IsTickable() const override { return !this->IsDefaultSubobject(); } virtual void Tick(float DeltaTime) override { if (GFrameCounter % 300 == 0) { FPlatformProcess::SleepNoStats(0.03); } } }; void AMyTest1Character::StatTest() { FString MyBPObjectPath = TEXT("/Game/ThirdPersonCPP/Blueprints/MyBlueprintObject.MyBlueprintObject_C"); UClass* MyBPObjectClass = LoadClass<UObject>(nullptr, *MyBPObjectPath); // MyBPObjectClass为UBlueprintGeneratedClass*类型 // 创建UMyBPObject对象,并赋值给成员变量UMyBPObject* m_BPObj1 m_BPObj1 = NewObject<UMyBPObject>(this, MyBPObjectClass); }
Tickable对象一般会放在STATGROUP_Tickables分组中。以下为某个Stats文件收集到的Tickable对象名称:
Task执行任务的耗时
DECLARE_STATS_GROUP(TEXT("TestGroupDesc"), STATGROUP_TestGroup, STATCAT_Advanced); // 定义名为TestGroup的分组 class FMyTestTask { public: FMyTestTask() { } static const TCHAR* GetTaskName() { return TEXT("FMyTestTask"); } FORCEINLINE static TStatId GetStatId() { RETURN_QUICK_DECLARE_CYCLE_STAT(FMyTestTask, STATGROUP_TestGroup); } /** return the thread for this task **/ static ENamedThreads::Type GetDesiredThread() { return ENamedThreads::AnyThread; } static ESubsequentsMode::Type GetSubsequentsMode() { return ESubsequentsMode::TrackSubsequents; } void DoTask(ENamedThreads::Type CurrentThread, const FGraphEventRef& MyCompletionGraphEvent) { LoopCall(5); } }; void AMyTest1Character::StatTest() { FGraphEventRef MyTestTaskEvent = TGraphTask<FMyTestTask>::CreateTask().ConstructAndDispatchWhenReady();
// 当前线程挂起,等待Task任务执行完成 FTaskGraphInterface::Get().WaitUntilTaskCompletes(MyTestTaskEvent); // 统计到CPU Stall - Wait For Event }
TaskGraph的任务一般会放在STATGROUP_TaskGraphTasks和STATGROUP_ThreadPoolAsyncTasks分组中。以下为某个Stats文件收集到的Task名称:
其他统计说明
CPU停转的统计:
Stat系统自己开销的统计:
总结
① 通过定义线程安全的埋点变量,在其构造函数(从变量定义的地方开始记录)与析构函数(结束记录)中来计算埋点变量在生命周期范围的耗时
② 在记录数据时,会带上Thread Id。因此,在Profiler工具中展示数据时,会按照线程做大类进行分类
③ 在Profiler工具的CallStack树,是埋点变量之间的嵌套关系,与代码的函数调用CallStack没有关系
参考