【Remotery】 Remotery-轻量级的远程实时 CPU/GPU 分析器设计浅析
1.Remotery简介
Remotery是一个轻量级的远程实时CPU/GPU分析器,主要用于监控CPU和GPU上多线程的活动。它提供了一个C文件,可以很容易的集成到项目中,并配置一个实时监控的Web界面,可以通过远程观察和分析程序的性能,适用于监控游戏的实时运行性能和分析移动端应用的性能场景。
2.Remotery编译运行
Remotery可以定义一些额外的宏来修改要编译到Remotery中的功能:
Macro Default Description
RMT_ENABLED 1 Disable this to not include any bits of Remotery in your build
RMT_USE_TINYCRT 0 Used by the Celtoys TinyCRT library (not released yet)
RMT_USE_CUDA 0 Assuming CUDA headers/libs are setup, allow CUDA profiling
RMT_USE_D3D11 0 Assuming Direct3D 11 headers/libs are setup, allow D3D11 GPU profiling
RMT_USE_OPENGL 0 Allow OpenGL GPU profiling (dynamically links OpenGL libraries on available platforms)
RMT_USE_METAL 0 Allow Metal profiling of command buffers
2.1 Linux平台下
Linux(GCC):在lib文件夹中添加源代码。编译代码需要-pthreads来进行库链接。例如,编译Remotery库本身并运行:
cc lib/Remotery.c sample/sample.c -I lib -pthread -lm
编译后,会在目录下生成一个a.out可执行文件,./a.out后,双击vis/index.html,即可看到线程运行情况
2.2 Windows平台下
Windows(MSVC)-将lib/Remotery.c和lib/Remotery.h添加到程序中。设置include目录以添加Remotery/lib路径。所需的库ws2_32.lib应该通过在Remotery.c中使用#pragma comment(lib,ws2_32.lib)指令来获取。
3.Remotery各功能类简介
3.1 基础功能类
3.1.1 New
带有错误值的新建/删除运算符,用于简化对象创建/销毁
// Ensures the pointer is non-NULL, calls the destructor, frees memory and sets the pointer to NULL
#define Delete(type, obj) \
if (obj != NULL) \
{ \
type##_Destructor(obj); \
rmtFree(obj); \
obj = NULL; \
}
#define BeginNew(type, obj) \
{ \
obj = (type*)rmtMalloc(sizeof(type)); \
if (obj == NULL) \
{ \
error = RMT_ERROR_MALLOC_FAIL; \
} \
else \
{ \
#define EndNew(type, obj) \
if (error != RMT_ERROR_NONE) \
Delete(type, obj); \
} \
}
// Specialisations for New with varying constructor parameter counts
#define New_0(type, obj) \
BeginNew(type, obj); error = type##_Constructor(obj); EndNew(type, obj)
#define New_1(type, obj, a0) \
BeginNew(type, obj); error = type##_Constructor(obj, a0); EndNew(type, obj)
#define New_2(type, obj, a0, a1) \
BeginNew(type, obj); error = type##_Constructor(obj, a0, a1); EndNew(type, obj)
#define New_3(type, obj, a0, a1, a2) \
BeginNew(type, obj); error = type##_Constructor(obj, a0, a1, a2); EndNew(type, obj)
3.1.2 Deps
外部依赖项
rmtU8 minU8(rmtU8 a, rmtU8 b);
rmtU16 maxU16(rmtU16 a, rmtU16 b);
rmtS64 maxS64(rmtS64 a, rmtS64 b);
void* rmtMalloc( rmtU32 size );
void* rmtRealloc( void* ptr, rmtU32 size);
void rmtFree( void* ptr );
3.1.3 ObjAlloc
可重用对象分配器
//
// All objects that require free-list-backed allocation need to inherit from this type.
// 所有需要自由列表支持分配的对象都需要从此类型继承。
//
typedef struct ObjectLink_s
{
struct ObjectLink_s* volatile next;
} ObjectLink;
typedef struct
{
// Object create/destroy parameters
// 对象创建/销毁参数
rmtU32 object_size;
ObjConstructor constructor;
ObjDestructor destructor;
// Number of objects in the free list
// 自由列表中的对象数量
volatile rmtS32 nb_free;
// Number of objects used by callers
// 调用方使用的对象数
volatile rmtS32 nb_inuse;
// Total allocation count
// 总分配计数
volatile rmtS32 nb_allocated;
ObjectLink* first_free;
} ObjectAllocator;
void ObjectLink_Constructor(ObjectLink* link);
rmtError ObjectAllocator_Constructor(ObjectAllocator* allocator, rmtU32 object_size, ObjConstructor constructor, ObjDestructor destructor);
void ObjectAllocator_Destructor(ObjectAllocator* allocator);
void ObjectAllocator_Push(ObjectAllocator* allocator, ObjectLink* start, ObjectLink* end);
ObjectLink* ObjectAllocator_Pop(ObjectAllocator* allocator);
rmtError ObjectAllocator_Alloc(ObjectAllocator* allocator, void** object);
void ObjectAllocator_Free(ObjectAllocator* allocator, void* object);
void ObjectAllocator_FreeRange(ObjectAllocator* allocator, void* start, void* end, rmtU32 count);
3.1.4 Safec
安全C库摘录
r_size_t strnlen_s (const char *dest, r_size_t dmax);
errno_t strstr_s (char *dest, r_size_t dmax,
const char *src, r_size_t slen, char **substring);
errno_t strncat_s (char *dest, r_size_t dmax, const char *src, r_size_t slen);
errno_t strcpy_s(char *dest, r_size_t dmax, const char *src);
void itoahex_s( char *dest, r_size_t dmax, rmtS32 value );
3.1.5 SHA1
SHA-1加密哈希函数
typedef struct
{
rmtU8 data[20];
} SHA1;
unsigned int rol(const unsigned int value, const unsigned int steps);
void clearWBuffert(unsigned int* buffert);
void innerHash(unsigned int* result, unsigned int* w);
void calc(const void* src, const int bytelength, unsigned char* hash);
SHA1 SHA1_Calculate(const void* src, unsigned int length);
3.1.6 BASE64
Base-64编码器
rmtU32 Base64_CalculateEncodedLength(rmtU32 length);
void Base64_Encode(const rmtU8* in_bytes, rmtU32 length, rmtU8* out_bytes);
3.1.7 Murmurhash
Murmur-Hash 3
rmtU32 rotl32(rmtU32 x, rmtS8 r);
rmtU32 getblock32(const rmtU32* p, int i);
rmtU32 fmix32(rmtU32 h);
rmtU32 MurmurHash3_x86_32(const void* key, int len, rmtU32 seed);
3.2 线程并发功能类
3.2.1 Tls
线程局部存储(pthread_key_create/pthread_setspecific)
rmtError tlsAlloc(rmtTLS* handle);
void tlsFree(rmtTLS handle);
void tlsSet(rmtTLS handle, void* value);
void* tlsGet(rmtTLS handle);
3.2.2 Atomic
原子操作
rmtBool AtomicCompareAndSwap(rmtU32 volatile* val, long old_val, long new_val);
rmtBool AtomicCompareAndSwapPointer(long* volatile* ptr, long* old_ptr, long* new_ptr);
rmtS32 AtomicAdd(rmtS32 volatile* value, rmtS32 add);
void AtomicSub(rmtS32 volatile* value, rmtS32 sub);
void CompilerWriteFence();
void CompilerReadFence();
rmtU32 LoadAcquire(rmtU32* volatile address);
long* LoadAcquirePointer(long* volatile* ptr);
void StoreRelease(rmtU32* volatile address, rmtU32 value);
void StoreReleasePointer(long* volatile* ptr, long* value);
3.2.3 Threads
线程处理
typedef struct Thread_t rmtThread;
typedef rmtError(*ThreadProc)(rmtThread* thread);
struct Thread_t
{
// OS-specific data
// 操作系统特定数据
#if defined(RMT_PLATFORM_WINDOWS)
HANDLE handle;
#else
pthread_t handle;
#endif
// Callback executed when the thread is created
// 创建线程时执行的回调
ThreadProc callback;
// Caller-specified parameter passed to Thread_Create
// 传递给Thread_Create的调用方指定参数
void* param;
// Error state returned from callback
// 回调返回错误状态
rmtError error;
// External threads can set this to request an exit
// 外部线程可以将其设置为请求退出
volatile rmtBool request_exit;
};
int rmtThread_Valid(rmtThread* thread);
rmtError rmtThread_Constructor(rmtThread* thread, ThreadProc callback, void* param);
void rmtThread_RequestExit(rmtThread* thread);
void rmtThread_Join(rmtThread* thread);
void rmtThread_Destructor(rmtThread* thread);
3.2.4 DynBuf
动态缓冲器
typedef struct
{
rmtU32 alloc_granularity;
rmtU32 bytes_allocated;
rmtU32 bytes_used;
rmtU8* data;
} Buffer;
rmtError Buffer_Constructor(Buffer* buffer, rmtU32 alloc_granularity);
void Buffer_Destructor(Buffer* buffer);
rmtError Buffer_Grow(Buffer* buffer, rmtU32 length);
rmtError Buffer_Write(Buffer* buffer, const void* data, rmtU32 length);
rmtError Buffer_WriteStringZ(Buffer* buffer, rmtPStr string);
void U32ToByteArray(rmtU8* dest, rmtU32 value);
rmtError Buffer_WriteU32(Buffer* buffer, rmtU32 value);
rmtBool IsLittleEndian();
rmtError Buffer_WriteU64(Buffer* buffer, rmtU64 value);
rmtError Buffer_WriteStringWithLength(Buffer* buffer, rmtPStr string);
3.3 网络服务功能类
3.3.1 Sockets
TCP/IP Sockets
typedef struct
{
SOCKET socket;
} TCPSocket;
typedef struct
{
rmtBool can_read;
rmtBool can_write;
rmtError error_state;
} SocketStatus;
rmtError TCPSocket_Constructor(TCPSocket* tcp_socket);
void TCPSocket_Destructor(TCPSocket* tcp_socket);
rmtError TCPSocket_RunServer(TCPSocket* tcp_socket, rmtU16 port, rmtBool reuse_open_port, rmtBool limit_connections_to_localhost);
void TCPSocket_Close(TCPSocket* tcp_socket);
SocketStatus TCPSocket_PollStatus(TCPSocket* tcp_socket);
rmtError TCPSocket_AcceptConnection(TCPSocket* tcp_socket, TCPSocket** client_socket);
int TCPSocketWouldBlock();
rmtError TCPSocket_Send(TCPSocket* tcp_socket, const void* data, rmtU32 length, rmtU32 timeout_ms);
rmtError TCPSocket_Receive(TCPSocket* tcp_socket, void* data, rmtU32 length, rmtU32 timeout_ms);
3.3.2 WebSockets
WebSockets
enum WebSocketMode
{
WEBSOCKET_NONE = 0,
WEBSOCKET_TEXT = 1,
WEBSOCKET_BINARY = 2,
};
typedef struct
{
TCPSocket* tcp_socket;
enum WebSocketMode mode;
rmtU32 frame_bytes_remaining;
rmtU32 mask_offset;
union
{
rmtU8 mask[4];
rmtU32 mask_u32;
} data;
} WebSocket;
char* GetField(char* buffer, r_size_t buffer_length, rmtPStr field_name);
rmtError WebSocketHandshake(TCPSocket* tcp_socket, rmtPStr limit_host);
rmtError WebSocket_Constructor(WebSocket* web_socket, TCPSocket* tcp_socket);
void WebSocket_Destructor(WebSocket* web_socket);
rmtError WebSocket_RunServer(WebSocket* web_socket, rmtU16 port, rmtBool reuse_open_port, rmtBool limit_connections_to_localhost, enum WebSocketMode mode);
void WebSocket_Close(WebSocket* web_socket);
SocketStatus WebSocket_PollStatus(WebSocket* web_socket);
rmtError WebSocket_AcceptConnection(WebSocket* web_socket, WebSocket** client_socket);
void WriteSize(rmtU32 size, rmtU8* dest, rmtU32 dest_size, rmtU32 dest_offset);
void WebSocket_PrepareBuffer(Buffer* buffer);
rmtU32 WebSocket_FrameHeaderSize(rmtU32 length);
void WebSocket_WriteFrameHeader(WebSocket* web_socket, rmtU8* dest, rmtU32 length);
rmtError WebSocket_Send(WebSocket* web_socket, const void* data, rmtU32 length, rmtU32 timeout_ms);
rmtError ReceiveFrameHeader(WebSocket* web_socket);
rmtError WebSocket_Receive(WebSocket* web_socket, void* data, rmtU32* msg_len, rmtU32 length, rmtU32 timeout_ms);
3.3.3 Network
网络服务器
typedef rmtError (*Server_ReceiveHandler)(void*, char*, rmtU32);
typedef struct
{
WebSocket* listen_socket;
WebSocket* client_socket;
rmtU32 last_ping_time;
rmtU16 port;
rmtBool reuse_open_port;
rmtBool limit_connections_to_localhost;
// A dynamically-sized buffer used for binary-encoding messages and sending to the client
Buffer* bin_buf;
// Handler for receiving messages from the client
Server_ReceiveHandler receive_handler;
void* receive_handler_context;
} Server;
rmtError Server_CreateListenSocket(Server* server, rmtU16 port, rmtBool reuse_open_port, rmtBool limit_connections_to_localhost);
rmtError Server_Constructor(Server* server, rmtU16 port, rmtBool reuse_open_port, rmtBool limit_connections_to_localhost);
void Server_Destructor(Server* server);
rmtBool Server_IsClientConnected(Server* server);
void Server_DisconnectClient(Server* server);
rmtError Server_Send(Server* server, const void* data, rmtU32 length, rmtU32 timeout);
rmtError Server_ReceiveMessage(Server* server, char message_first_byte, rmtU32 message_length);
void Server_Update(Server* server);
3.4 负载率采样功能类
3.4.1 Timers
特定于平台的计时器
功能类函数接口:
// 微秒精度高性能计数器
#ifndef RMT_PLATFORM_WINDOWS
typedef rmtU64 LARGE_INTEGER;
#endif
typedef struct
{
LARGE_INTEGER counter_start;
double counter_scale;
} usTimer;
void usTimer_Init(usTimer* timer);
rmtU32 msTimer_Get(); // 获取ms时间值
rmtU64 usTimer_Get(usTimer* timer);
void msSleep(rmtU32 time_ms);
3.4.2 Sample
基本采样说明(默认情况下为CPU)
typedef enum SampleType
{
SampleType_CPU,
SampleType_CUDA,
SampleType_D3D11,
SampleType_OpenGL,
SampleType_Metal,
SampleType_Count,
} SampleType;
typedef struct Sample
{
// Inherit so that samples can be quickly allocated
ObjectLink Link;
enum SampleType type;
// Used to anonymously copy sample data without knowning its type
rmtU32 size_bytes;
// Hash generated from sample name
//根据样本名称生成的哈希
rmtU32 name_hash;
// Unique, persistent ID among all samples
// 所有样本中唯一、持久的ID
rmtU32 unique_id;
// Null-terminated string storing the hash-prefixed 6-digit colour
rmtU8 unique_id_html_colour[8];
// Links to related samples in the tree
struct Sample* parent;
struct Sample* first_child;
struct Sample* last_child;
struct Sample* next_sibling;
// Keep track of child count to distinguish from repeated calls to the same function at the same stack level
// This is also mixed with the callstack hash to allow consistent addressing of any point in the tree
rmtU32 nb_children;
// Sample end points and length in microseconds
rmtU64 us_start;
rmtU64 us_end;
rmtU64 us_length;
// Total sampled length of all children
rmtU64 us_sampled_length;
// Number of times this sample was used in a call in aggregate mode, 1 otherwise
rmtU32 call_count;
// Current and maximum sample recursion depths
rmtU16 recurse_depth;
rmtU16 max_recurse_depth;
} Sample;
rmtError Sample_Constructor(Sample* sample);
void Sample_Destructor(Sample* sample);
void Sample_Prepare(Sample* sample, rmtU32 name_hash, Sample* parent);
rmtError bin_Sample(Buffer* buffer, Sample* sample);
rmtError bin_SampleArray(Buffer* buffer, Sample* parent_sample);
3.4.3 SampleTree
带有分配器的样本树
typedef struct SampleTree
{
// Allocator for all samples
ObjectAllocator* allocator;
// Root sample for all samples created by this thread
Sample* root;
// Most recently pushed sample
Sample* current_parent;
} SampleTree;
typedef struct Msg_SampleTree
{
Sample* root_sample;
ObjectAllocator* allocator;
rmtPStr thread_name;
} Msg_SampleTree;
rmtError SampleTree_Constructor(SampleTree* tree, rmtU32 sample_size, ObjConstructor constructor, ObjDestructor destructor);
void SampleTree_Destructor(SampleTree* tree);
rmtU32 HashCombine(rmtU32 hash_a, rmtU32 hash_b);
rmtError SampleTree_Push(SampleTree* tree, rmtU32 name_hash, rmtU32 flags, Sample** sample);
void SampleTree_Pop(SampleTree* tree, Sample* sample);
ObjectLink* FlattenSampleTree(Sample* sample, rmtU32* nb_samples);
void FreeSampleTree(Sample* sample, ObjectAllocator* allocator);
void AddSampleTreeMessage(rmtMessageQueue* queue, Sample* sample, ObjectAllocator* allocator, rmtPStr thread_name, struct ThreadSampler* thread_sampler)
3.4.4 Tsampler
每个线程的采样器
typedef struct ThreadSampler
{
// Name to assign to the thread in the viewer
rmtS8 name[256];
// Store a unique sample tree for each type
//为每种类型存储一个唯一的样本树
SampleTree* sample_trees[SampleType_Count];
// Table of all sample names encountered on this thread
// 此线程上遇到的所有示例名称表
StringTable* names;
#if RMT_USE_D3D11
D3D11* d3d11;
#endif
// Next in the global list of active thread samplers
//全局活动线程采样器列表中的下一个
struct ThreadSampler* volatile next;
} ThreadSampler;
rmtError ThreadSampler_Constructor(ThreadSampler* thread_sampler);
void ThreadSampler_Destructor(ThreadSampler* ts);
rmtError ThreadSampler_Push(SampleTree* tree, rmtU32 name_hash, rmtU32 flags, Sample** sample);
rmtBool ThreadSampler_Pop(ThreadSampler* ts, rmtMessageQueue* queue, Sample* sample);
rmtU32 ThreadSampler_GetNameHash(ThreadSampler* ts, rmtPStr name, rmtU32* hash_cache);
3.5 消息队列类
3.5.1 Vmbuffer
使用虚拟内存进行自动换行的镜像缓冲区
typedef struct VirtualMirrorBuffer
{
// Page-rounded size of the buffer without mirroring
rmtU32 size;
// Pointer to the first part of the mirror
// The second part comes directly after at ptr+size bytes
rmtU8* ptr;
#ifdef RMT_PLATFORM_WINDOWS
#ifdef _XBOX_ONE
size_t page_count;
size_t* page_mapping;
#else
HANDLE file_map_handle;
#endif
#endif
} VirtualMirrorBuffer;
rmtError VirtualMirrorBuffer_Constructor(VirtualMirrorBuffer* buffer, rmtU32 size, int nb_attempts);
void VirtualMirrorBuffer_Destructor(VirtualMirrorBuffer* buffer);
3.5.2 HashTable
用于插入/查找的整数对哈希映射。为了增加简单性,没有删除。
typedef struct
{
// Non-zero, pre-hashed key
rmtU32 key;
// Value that's not equal to RMT_NOT_FOUND
rmtU32 value;
} HashSlot;
typedef struct
{
// Stats
rmtU32 max_nb_slots;
rmtU32 nb_slots;
// Data
HashSlot* slots;
} rmtHashTable;
rmtError rmtHashTable_Constructor(rmtHashTable* table, rmtU32 max_nb_slots);
void rmtHashTable_Destructor(rmtHashTable* table);
rmtError rmtHashTable_Insert(rmtHashTable* table, rmtU32 key, rmtU32 value);
rmtError rmtHashTable_Resize(rmtHashTable* table);
rmtU32 rmtHashTable_Find(rmtHashTable* table, rmtU32 key);
3.5.3 StringTable
从字符串哈希映射到本地缓冲区中的字符串偏移
typedef struct
{
// Growable dynamic array of strings added so far
Buffer* text;
// Map from text hash to text location in the buffer
rmtHashTable* text_map;
} StringTable;
rmtError StringTable_Constructor(StringTable* table);
void StringTable_Destructor(StringTable* table);
rmtPStr StringTable_Find(StringTable* table, rmtU32 name_hash);
void StringTable_Insert(StringTable* table, rmtU32 name_hash, rmtPStr name);
3.5.4 Messageq
多生产者、单消费者消息队列
typedef enum MessageID
{
MsgID_NotReady,
MsgID_LogText,
MsgID_SampleTree,
MsgID_None,
MsgID_Force32Bits = 0xFFFFFFFF,
} MessageID;
typedef struct Message
{
MessageID id;
rmtU32 payload_size;
// For telling which thread the message came from in the debugger
struct ThreadSampler* thread_sampler;
rmtU8 payload[1];
} Message;
typedef struct rmtMessageQueue
{
rmtU32 size;
// The physical address of this data buffer is pointed to by two sequential
// virtual memory pages, allowing automatic wrap-around of any reads or writes
// that exceed the limits of the buffer.
// 该数据缓冲区的物理地址由两个顺序的指针指向
// 虚拟内存页,允许自动环绕任何读取或写入
// 超出缓冲区的限制。
VirtualMirrorBuffer* data;
// Read/write position never wrap allowing trivial overflow checks
// with easier debugging
//读/写位置从不换行,允许进行琐碎的溢出检查
//更容易调试
rmtU32 read_pos;
rmtU32 write_pos;
} rmtMessageQueue;
rmtError rmtMessageQueue_Constructor(rmtMessageQueue* queue, rmtU32 size);
void rmtMessageQueue_Destructor(rmtMessageQueue* queue);
rmtU32 rmtMessageQueue_SizeForPayload(rmtU32 payload_size);
static Message* rmtMessageQueue_AllocMessage(rmtMessageQueue* queue, rmtU32 payload_size, struct ThreadSampler* thread_sampler);
void rmtMessageQueue_CommitMessage(Message* message, MessageID id);
Message* rmtMessageQueue_PeekNextMessage(rmtMessageQueue* queue);
void rmtMessageQueue_ConsumeNextMessage(rmtMessageQueue* queue, Message* message);
3.6 主功能类
3.6.1 Remotry
struct Remotery
{
Server* server;
// Microsecond accuracy timer for CPU timestamps
usTimer timer;
rmtTLS thread_sampler_tls_handle;
// Linked list of all known threads being sampled
ThreadSampler* volatile first_thread_sampler;
// Queue between clients and main remotery thread
rmtMessageQueue* mq_to_rmt_thread;
// The main server thread
rmtThread* thread;
// Set to trigger a map of each message on the remotery thread message queue
void (*map_message_queue_fn)(Remotery* rmt, Message*);
void* map_message_queue_data;
#if RMT_USE_CUDA
rmtCUDABind cuda;
#endif
#if RMT_USE_OPENGL
OpenGL* opengl;
#endif
#if RMT_USE_METAL
Metal* metal;
#endif
};
void GetSampleDigest(Sample* sample, rmtU32* digest_hash, rmtU32* nb_samples);
rmtError Remotery_SendLogTextMessage(Remotery* rmt, Message* message);
rmtError bin_SampleTree(Buffer* buffer, Msg_SampleTree* msg);
rmtError Remotery_SendSampleTreeMessage(Remotery* rmt, Message* message);
rmtError Remotery_ConsumeMessageQueue(Remotery* rmt);
void Remotery_FlushMessageQueue(Remotery* rmt);
void Remotery_MapMessageQueue(Remotery* rmt);
rmtError Remotery_ThreadMain(rmtThread* thread);
rmtError Remotery_ReceiveMessage(void* context, char* message_data, rmtU32 message_length);
rmtError Remotery_Constructor(Remotery* rmt);
void Remotery_Destructor(Remotery* rmt);
rmtError Remotery_GetThreadSampler(Remotery* rmt, ThreadSampler** thread_sampler);C
void Remotery_DestroyThreadSamplers(Remotery* rmt);
void* CRTMalloc(void* mm_context, rmtU32 size);
void CRTFree(void* mm_context, void* ptr);
void* CRTRealloc(void* mm_context, void* ptr, rmtU32 size);
RMI API进行了头文件声明,用于Remotery做为三方库时,API被其他库调用:
RMT_API rmtSettings* _rmt_Settings( void );
RMT_API enum rmtError _rmt_CreateGlobalInstance(Remotery** remotery);
RMT_API void _rmt_DestroyGlobalInstance(Remotery* remotery);
RMT_API void _rmt_SetGlobalInstance(Remotery* remotery);
RMT_API Remotery* _rmt_GetGlobalInstance(void);
RMT_API void _rmt_SetCurrentThreadName(rmtPStr thread_name);
RMT_API void _rmt_LogText(rmtPStr text);
RMT_API void _rmt_BeginCPUSample(rmtPStr name, rmtU32 flags, rmtU32* hash_cache);
RMT_API void _rmt_EndCPUSample(void);
3.6.2 CUDA
CUDA事件负载率采样
typedef struct CUDASample
{
// IS-A inheritance relationship
Sample base;
// Pair of events that wrap the sample
CUevent event_start;
CUevent event_end;
} CUDASample;
rmtError MapCUDAResult(CUresult result);
rmtError CUDASetContext(void* context);
rmtError CUDAGetContext(void** context);
rmtError CUDAEnsureContext();
rmtError CUDAEventCreate(CUevent* phEvent, unsigned int Flags);
rmtError CUDAEventDestroy(CUevent hEvent);
rmtError CUDAEventRecord(CUevent hEvent, void* hStream);
rmtError CUDAEventQuery(CUevent hEvent);
rmtError CUDAEventElapsedTime(float* pMilliseconds, CUevent hStart, CUevent hEnd);
rmtError CUDASample_Constructor(CUDASample* sample);
void CUDASample_Destructor(CUDASample* sample);
rmtBool AreCUDASamplesReady(Sample* sample);
rmtBool GetCUDASampleTimes(Sample* root_sample, Sample* sample);
RMT_API void _rmt_BindCUDA(const rmtCUDABind* bind);
RMT_API void _rmt_BeginCUDASample(rmtPStr name, rmtU32* hash_cache, void* stream);
RMT_API void _rmt_EndCUDASample(void* stream);
3.6.3 D3D11
Direct3D 11事件采样
3.6.4 OPENGL
OpenGL事件采样
3.6.5 METAL
metal事件采样
4.Remotery主体设计
主函数主流程:
rmtError Remotery_ThreadMain(rmtThread* thread)
网络功能类继承关系:
如何计算的每个函数的CPU负载呢?
初步分析,Remotey是通过在运行一个线程前将这个线程/函数名(name)加入一个hash表中(_rmt_BeginCPUSample(rmtPStr name, rmtU32 flags, rmtU32* hash_cache)),然后获取当前的时间,运行此线程/函数,再调用_rmt_EndCPUSample()再获取一个时间,这样就算出了一个函数的运行时间。作为计算负载的依据。