【Remotery】 Remotery-轻量级的远程实时 CPU/GPU 分析器设计浅析

1.Remotery简介

Remotery是一个轻量级的远程实时CPU/GPU分析器,主要用于监控CPU和GPU上多线程的活动。它提供了一个C文件,可以很容易的集成到项目中,并配置一个实时监控的Web界面,可以通过远程观察和分析程序的性能,适用于监控游戏的实时运行性能和分析移动端应用的性能场景。

image

2.Remotery编译运行

Remotery可以定义一些额外的宏来修改要编译到Remotery中的功能:

Macro               Default     Description

    RMT_ENABLED         1           Disable this to not include any bits of Remotery in your build
    RMT_USE_TINYCRT     0           Used by the Celtoys TinyCRT library (not released yet)
    RMT_USE_CUDA        0           Assuming CUDA headers/libs are setup, allow CUDA profiling
    RMT_USE_D3D11       0           Assuming Direct3D 11 headers/libs are setup, allow D3D11 GPU profiling
    RMT_USE_OPENGL      0           Allow OpenGL GPU profiling (dynamically links OpenGL libraries on available platforms)
    RMT_USE_METAL       0           Allow Metal profiling of command buffers

2.1 Linux平台下

Linux(GCC):在lib文件夹中添加源代码。编译代码需要-pthreads来进行库链接。例如,编译Remotery库本身并运行:

cc lib/Remotery.c sample/sample.c -I lib -pthread -lm

编译后,会在目录下生成一个a.out可执行文件,./a.out后,双击vis/index.html,即可看到线程运行情况
image

2.2 Windows平台下

Windows(MSVC)-将lib/Remotery.c和lib/Remotery.h添加到程序中。设置include目录以添加Remotery/lib路径。所需的库ws2_32.lib应该通过在Remotery.c中使用#pragma comment(lib,ws2_32.lib)指令来获取。

3.Remotery各功能类简介

3.1 基础功能类

3.1.1 New

带有错误值的新建/删除运算符,用于简化对象创建/销毁

// Ensures the pointer is non-NULL, calls the destructor, frees memory and sets the pointer to NULL
#define Delete(type, obj)           \
    if (obj != NULL)                \
    {                               \
        type##_Destructor(obj);     \
        rmtFree(obj);               \
        obj = NULL;                 \
    }

#define BeginNew(type, obj)                 \
    {                                       \
        obj = (type*)rmtMalloc(sizeof(type));  \
        if (obj == NULL)                    \
        {                                   \
            error = RMT_ERROR_MALLOC_FAIL;  \
        }                                   \
        else                                \
        {                                   \


#define EndNew(type, obj)                   \
            if (error != RMT_ERROR_NONE)    \
                Delete(type, obj);          \
        }                                   \
    }


// Specialisations for New with varying constructor parameter counts
#define New_0(type, obj)    \
    BeginNew(type, obj); error = type##_Constructor(obj); EndNew(type, obj)
#define New_1(type, obj, a0)    \
    BeginNew(type, obj); error = type##_Constructor(obj, a0); EndNew(type, obj)
#define New_2(type, obj, a0, a1)    \
    BeginNew(type, obj); error = type##_Constructor(obj, a0, a1); EndNew(type, obj)
#define New_3(type, obj, a0, a1, a2)    \
    BeginNew(type, obj); error = type##_Constructor(obj, a0, a1, a2); EndNew(type, obj)

3.1.2 Deps

外部依赖项

rmtU8 minU8(rmtU8 a, rmtU8 b);
rmtU16 maxU16(rmtU16 a, rmtU16 b);
rmtS64 maxS64(rmtS64 a, rmtS64 b);
void* rmtMalloc( rmtU32 size );
void* rmtRealloc( void* ptr, rmtU32 size);
void rmtFree( void* ptr );

3.1.3 ObjAlloc

可重用对象分配器

//
// All objects that require free-list-backed allocation need to inherit from this type.
// 所有需要自由列表支持分配的对象都需要从此类型继承。
//
typedef struct ObjectLink_s
{
    struct ObjectLink_s* volatile next;
} ObjectLink;

typedef struct
{
    // Object create/destroy parameters
    // 对象创建/销毁参数
    rmtU32 object_size;
    ObjConstructor constructor;
    ObjDestructor destructor;

    // Number of objects in the free list
    // 自由列表中的对象数量
    volatile rmtS32 nb_free;

    // Number of objects used by callers
    // 调用方使用的对象数
    volatile rmtS32 nb_inuse;

    // Total allocation count
    // 总分配计数
    volatile rmtS32 nb_allocated;

    ObjectLink* first_free;
} ObjectAllocator;

void ObjectLink_Constructor(ObjectLink* link);
rmtError ObjectAllocator_Constructor(ObjectAllocator* allocator, rmtU32 object_size, ObjConstructor constructor, ObjDestructor destructor);
void ObjectAllocator_Destructor(ObjectAllocator* allocator);
void ObjectAllocator_Push(ObjectAllocator* allocator, ObjectLink* start, ObjectLink* end);
ObjectLink* ObjectAllocator_Pop(ObjectAllocator* allocator);
rmtError ObjectAllocator_Alloc(ObjectAllocator* allocator, void** object);
void ObjectAllocator_Free(ObjectAllocator* allocator, void* object);
void ObjectAllocator_FreeRange(ObjectAllocator* allocator, void* start, void* end, rmtU32 count);

3.1.4 Safec

安全C库摘录

r_size_t strnlen_s (const char *dest, r_size_t dmax);
errno_t strstr_s (char *dest, r_size_t dmax,
          const char *src, r_size_t slen, char **substring);
errno_t strncat_s (char *dest, r_size_t dmax, const char *src, r_size_t slen);
errno_t strcpy_s(char *dest, r_size_t dmax, const char *src);
void itoahex_s( char *dest, r_size_t dmax, rmtS32 value );

3.1.5 SHA1

SHA-1加密哈希函数

typedef struct
{
    rmtU8 data[20];
} SHA1;

unsigned int rol(const unsigned int value, const unsigned int steps);
void clearWBuffert(unsigned int* buffert);
void innerHash(unsigned int* result, unsigned int* w);
void calc(const void* src, const int bytelength, unsigned char* hash);
SHA1 SHA1_Calculate(const void* src, unsigned int length);

3.1.6 BASE64

Base-64编码器

rmtU32 Base64_CalculateEncodedLength(rmtU32 length);
void Base64_Encode(const rmtU8* in_bytes, rmtU32 length, rmtU8* out_bytes);

3.1.7 Murmurhash

Murmur-Hash 3

rmtU32 rotl32(rmtU32 x, rmtS8 r);
rmtU32 getblock32(const rmtU32* p, int i);
rmtU32 fmix32(rmtU32 h);
rmtU32 MurmurHash3_x86_32(const void* key, int len, rmtU32 seed);

3.2 线程并发功能类

3.2.1 Tls

线程局部存储(pthread_key_create/pthread_setspecific)

rmtError tlsAlloc(rmtTLS* handle);
void tlsFree(rmtTLS handle);
void tlsSet(rmtTLS handle, void* value);
void* tlsGet(rmtTLS handle);

3.2.2 Atomic

原子操作

rmtBool AtomicCompareAndSwap(rmtU32 volatile* val, long old_val, long new_val);
rmtBool AtomicCompareAndSwapPointer(long* volatile* ptr, long* old_ptr, long* new_ptr);
rmtS32 AtomicAdd(rmtS32 volatile* value, rmtS32 add);
void AtomicSub(rmtS32 volatile* value, rmtS32 sub);
void CompilerWriteFence();
void CompilerReadFence();
rmtU32 LoadAcquire(rmtU32* volatile address);
long* LoadAcquirePointer(long* volatile* ptr);
void StoreRelease(rmtU32* volatile address, rmtU32 value);
void StoreReleasePointer(long* volatile* ptr, long* value);

3.2.3 Threads

线程处理

typedef struct Thread_t rmtThread;
typedef rmtError(*ThreadProc)(rmtThread* thread);

struct Thread_t
{
    // OS-specific data
    // 操作系统特定数据
    #if defined(RMT_PLATFORM_WINDOWS)
        HANDLE handle;
    #else
        pthread_t handle;
    #endif

    // Callback executed when the thread is created
    // 创建线程时执行的回调
    ThreadProc callback;

    // Caller-specified parameter passed to Thread_Create
    // 传递给Thread_Create的调用方指定参数
    void* param;

    // Error state returned from callback
    // 回调返回错误状态
    rmtError error;

    // External threads can set this to request an exit
    // 外部线程可以将其设置为请求退出
    volatile rmtBool request_exit;

};

int rmtThread_Valid(rmtThread* thread);
rmtError rmtThread_Constructor(rmtThread* thread, ThreadProc callback, void* param);
void rmtThread_RequestExit(rmtThread* thread);
void rmtThread_Join(rmtThread* thread);
void rmtThread_Destructor(rmtThread* thread);

3.2.4 DynBuf

动态缓冲器

typedef struct
{
    rmtU32 alloc_granularity;

    rmtU32 bytes_allocated;
    rmtU32 bytes_used;

    rmtU8* data;
} Buffer;

rmtError Buffer_Constructor(Buffer* buffer, rmtU32 alloc_granularity);
void Buffer_Destructor(Buffer* buffer);
rmtError Buffer_Grow(Buffer* buffer, rmtU32 length);
rmtError Buffer_Write(Buffer* buffer, const void* data, rmtU32 length);
rmtError Buffer_WriteStringZ(Buffer* buffer, rmtPStr string);
void U32ToByteArray(rmtU8* dest, rmtU32 value);
rmtError Buffer_WriteU32(Buffer* buffer, rmtU32 value);
rmtBool IsLittleEndian();
rmtError Buffer_WriteU64(Buffer* buffer, rmtU64 value);
rmtError Buffer_WriteStringWithLength(Buffer* buffer, rmtPStr string);

3.3 网络服务功能类

3.3.1 Sockets

TCP/IP Sockets

typedef struct
{
    SOCKET socket;
} TCPSocket;


typedef struct
{
    rmtBool can_read;
    rmtBool can_write;
    rmtError error_state;
} SocketStatus;

rmtError TCPSocket_Constructor(TCPSocket* tcp_socket);
void TCPSocket_Destructor(TCPSocket* tcp_socket);
rmtError TCPSocket_RunServer(TCPSocket* tcp_socket, rmtU16 port, rmtBool reuse_open_port, rmtBool limit_connections_to_localhost);
void TCPSocket_Close(TCPSocket* tcp_socket);
SocketStatus TCPSocket_PollStatus(TCPSocket* tcp_socket);
rmtError TCPSocket_AcceptConnection(TCPSocket* tcp_socket, TCPSocket** client_socket);
int TCPSocketWouldBlock();
rmtError TCPSocket_Send(TCPSocket* tcp_socket, const void* data, rmtU32 length, rmtU32 timeout_ms);
rmtError TCPSocket_Receive(TCPSocket* tcp_socket, void* data, rmtU32 length, rmtU32 timeout_ms);

3.3.2 WebSockets

WebSockets

enum WebSocketMode
{
    WEBSOCKET_NONE = 0,
    WEBSOCKET_TEXT = 1,
    WEBSOCKET_BINARY = 2,
};


typedef struct
{
    TCPSocket* tcp_socket;

    enum WebSocketMode mode;

    rmtU32 frame_bytes_remaining;
    rmtU32 mask_offset;

    union
    {
        rmtU8 mask[4];
        rmtU32 mask_u32;
    } data;

} WebSocket;

char* GetField(char* buffer, r_size_t buffer_length, rmtPStr field_name);
rmtError WebSocketHandshake(TCPSocket* tcp_socket, rmtPStr limit_host);
rmtError WebSocket_Constructor(WebSocket* web_socket, TCPSocket* tcp_socket);
void WebSocket_Destructor(WebSocket* web_socket);
rmtError WebSocket_RunServer(WebSocket* web_socket, rmtU16 port, rmtBool reuse_open_port, rmtBool limit_connections_to_localhost, enum WebSocketMode mode);
void WebSocket_Close(WebSocket* web_socket);
SocketStatus WebSocket_PollStatus(WebSocket* web_socket);
rmtError WebSocket_AcceptConnection(WebSocket* web_socket, WebSocket** client_socket);
void WriteSize(rmtU32 size, rmtU8* dest, rmtU32 dest_size, rmtU32 dest_offset);
void WebSocket_PrepareBuffer(Buffer* buffer);
rmtU32 WebSocket_FrameHeaderSize(rmtU32 length);
void WebSocket_WriteFrameHeader(WebSocket* web_socket, rmtU8* dest, rmtU32 length);
rmtError WebSocket_Send(WebSocket* web_socket, const void* data, rmtU32 length, rmtU32 timeout_ms);
rmtError ReceiveFrameHeader(WebSocket* web_socket);
rmtError WebSocket_Receive(WebSocket* web_socket, void* data, rmtU32* msg_len, rmtU32 length, rmtU32 timeout_ms);

3.3.3 Network

网络服务器

typedef rmtError (*Server_ReceiveHandler)(void*, char*, rmtU32);

typedef struct
{
    WebSocket* listen_socket;

    WebSocket* client_socket;

    rmtU32 last_ping_time;

    rmtU16 port;

    rmtBool reuse_open_port;
    rmtBool limit_connections_to_localhost;

    // A dynamically-sized buffer used for binary-encoding messages and sending to the client
    Buffer* bin_buf;

    // Handler for receiving messages from the client
    Server_ReceiveHandler receive_handler;
    void* receive_handler_context;
} Server;

rmtError Server_CreateListenSocket(Server* server, rmtU16 port, rmtBool reuse_open_port, rmtBool limit_connections_to_localhost);
rmtError Server_Constructor(Server* server, rmtU16 port, rmtBool reuse_open_port, rmtBool limit_connections_to_localhost);
void Server_Destructor(Server* server);
rmtBool Server_IsClientConnected(Server* server);
void Server_DisconnectClient(Server* server);
rmtError Server_Send(Server* server, const void* data, rmtU32 length, rmtU32 timeout);
rmtError Server_ReceiveMessage(Server* server, char message_first_byte, rmtU32 message_length);
void Server_Update(Server* server);

3.4 负载率采样功能类

3.4.1 Timers

特定于平台的计时器

功能类函数接口:

// 微秒精度高性能计数器
#ifndef RMT_PLATFORM_WINDOWS
    typedef rmtU64 LARGE_INTEGER;
#endif
typedef struct
{
    LARGE_INTEGER counter_start;
    double counter_scale;
} usTimer;

void usTimer_Init(usTimer* timer);
rmtU32 msTimer_Get();       // 获取ms时间值
rmtU64 usTimer_Get(usTimer* timer);
void msSleep(rmtU32 time_ms);

3.4.2 Sample

基本采样说明(默认情况下为CPU)

typedef enum SampleType
{
    SampleType_CPU,
    SampleType_CUDA,
    SampleType_D3D11,
    SampleType_OpenGL,
    SampleType_Metal,
    SampleType_Count,
} SampleType;

typedef struct Sample
{
    // Inherit so that samples can be quickly allocated
    ObjectLink Link;

    enum SampleType type;

    // Used to anonymously copy sample data without knowning its type
    rmtU32 size_bytes;

    // Hash generated from sample name
    //根据样本名称生成的哈希
    rmtU32 name_hash;

    // Unique, persistent ID among all samples
    // 所有样本中唯一、持久的ID
    rmtU32 unique_id;

    // Null-terminated string storing the hash-prefixed 6-digit colour
    rmtU8 unique_id_html_colour[8];

    // Links to related samples in the tree
    struct Sample* parent;
    struct Sample* first_child;
    struct Sample* last_child;
    struct Sample* next_sibling;

    // Keep track of child count to distinguish from repeated calls to the same function at the same stack level
    // This is also mixed with the callstack hash to allow consistent addressing of any point in the tree
    rmtU32 nb_children;

    // Sample end points and length in microseconds
    rmtU64 us_start;
    rmtU64 us_end;
    rmtU64 us_length;

    // Total sampled length of all children
    rmtU64 us_sampled_length;

    // Number of times this sample was used in a call in aggregate mode, 1 otherwise
    rmtU32 call_count;

    // Current and maximum sample recursion depths
    rmtU16 recurse_depth;
    rmtU16 max_recurse_depth;

} Sample;

rmtError Sample_Constructor(Sample* sample);
void Sample_Destructor(Sample* sample);
void Sample_Prepare(Sample* sample, rmtU32 name_hash, Sample* parent);
rmtError bin_Sample(Buffer* buffer, Sample* sample);
rmtError bin_SampleArray(Buffer* buffer, Sample* parent_sample);

3.4.3 SampleTree

带有分配器的样本树

typedef struct SampleTree
{
    // Allocator for all samples
    ObjectAllocator* allocator;

    // Root sample for all samples created by this thread
    Sample* root;

    // Most recently pushed sample
    Sample* current_parent;

} SampleTree;

typedef struct Msg_SampleTree
{
    Sample* root_sample;

    ObjectAllocator* allocator;

    rmtPStr thread_name;
} Msg_SampleTree;

rmtError SampleTree_Constructor(SampleTree* tree, rmtU32 sample_size, ObjConstructor constructor, ObjDestructor destructor);
void SampleTree_Destructor(SampleTree* tree);
rmtU32 HashCombine(rmtU32 hash_a, rmtU32 hash_b);
rmtError SampleTree_Push(SampleTree* tree, rmtU32 name_hash, rmtU32 flags, Sample** sample);
void SampleTree_Pop(SampleTree* tree, Sample* sample);
ObjectLink* FlattenSampleTree(Sample* sample, rmtU32* nb_samples);
void FreeSampleTree(Sample* sample, ObjectAllocator* allocator);

void AddSampleTreeMessage(rmtMessageQueue* queue, Sample* sample, ObjectAllocator* allocator, rmtPStr thread_name, struct ThreadSampler* thread_sampler)

3.4.4 Tsampler

每个线程的采样器

typedef struct ThreadSampler
{
    // Name to assign to the thread in the viewer
    rmtS8 name[256];

    // Store a unique sample tree for each type
    //为每种类型存储一个唯一的样本树
    SampleTree* sample_trees[SampleType_Count];

    // Table of all sample names encountered on this thread
    // 此线程上遇到的所有示例名称表
    StringTable* names;

#if RMT_USE_D3D11
    D3D11* d3d11;
#endif

    // Next in the global list of active thread samplers
    //全局活动线程采样器列表中的下一个
    struct ThreadSampler* volatile next;

} ThreadSampler;

rmtError ThreadSampler_Constructor(ThreadSampler* thread_sampler);
void ThreadSampler_Destructor(ThreadSampler* ts);
rmtError ThreadSampler_Push(SampleTree* tree, rmtU32 name_hash, rmtU32 flags, Sample** sample);
rmtBool ThreadSampler_Pop(ThreadSampler* ts, rmtMessageQueue* queue, Sample* sample);
rmtU32 ThreadSampler_GetNameHash(ThreadSampler* ts, rmtPStr name, rmtU32* hash_cache);

3.5 消息队列类

3.5.1 Vmbuffer

使用虚拟内存进行自动换行的镜像缓冲区

typedef struct VirtualMirrorBuffer
{
    // Page-rounded size of the buffer without mirroring
    rmtU32 size;

    // Pointer to the first part of the mirror
    // The second part comes directly after at ptr+size bytes
    rmtU8* ptr;

#ifdef RMT_PLATFORM_WINDOWS
    #ifdef _XBOX_ONE
        size_t page_count;
        size_t* page_mapping;
    #else
        HANDLE file_map_handle;
    #endif
#endif

} VirtualMirrorBuffer;

rmtError VirtualMirrorBuffer_Constructor(VirtualMirrorBuffer* buffer, rmtU32 size, int nb_attempts);
void VirtualMirrorBuffer_Destructor(VirtualMirrorBuffer* buffer);

3.5.2 HashTable

用于插入/查找的整数对哈希映射。为了增加简单性,没有删除。

typedef struct
{
    // Non-zero, pre-hashed key
    rmtU32 key;

    // Value that's not equal to RMT_NOT_FOUND
    rmtU32 value;
} HashSlot;

typedef struct
{
    // Stats
    rmtU32 max_nb_slots;
    rmtU32 nb_slots;

    // Data
    HashSlot* slots;
} rmtHashTable;

rmtError rmtHashTable_Constructor(rmtHashTable* table, rmtU32 max_nb_slots);
void rmtHashTable_Destructor(rmtHashTable* table);
rmtError rmtHashTable_Insert(rmtHashTable* table, rmtU32 key, rmtU32 value);
rmtError rmtHashTable_Resize(rmtHashTable* table);
rmtU32 rmtHashTable_Find(rmtHashTable* table, rmtU32 key);

3.5.3 StringTable

从字符串哈希映射到本地缓冲区中的字符串偏移

typedef struct
{
    // Growable dynamic array of strings added so far
    Buffer* text;

    // Map from text hash to text location in the buffer
    rmtHashTable* text_map;
} StringTable;

rmtError StringTable_Constructor(StringTable* table);
void StringTable_Destructor(StringTable* table);
rmtPStr StringTable_Find(StringTable* table, rmtU32 name_hash);
void StringTable_Insert(StringTable* table, rmtU32 name_hash, rmtPStr name);

3.5.4 Messageq

多生产者、单消费者消息队列

typedef enum MessageID
{
    MsgID_NotReady,
    MsgID_LogText,
    MsgID_SampleTree,
    MsgID_None,
    MsgID_Force32Bits = 0xFFFFFFFF,
} MessageID;

typedef struct Message
{
    MessageID id;

    rmtU32 payload_size;

    // For telling which thread the message came from in the debugger
    struct ThreadSampler* thread_sampler;

    rmtU8 payload[1];
} Message;

typedef struct rmtMessageQueue
{
    rmtU32 size;

    // The physical address of this data buffer is pointed to by two sequential
    // virtual memory pages, allowing automatic wrap-around of any reads or writes
    // that exceed the limits of the buffer.
    // 该数据缓冲区的物理地址由两个顺序的指针指向
    // 虚拟内存页,允许自动环绕任何读取或写入
    // 超出缓冲区的限制。  
    VirtualMirrorBuffer* data;

    // Read/write position never wrap allowing trivial overflow checks
    // with easier debugging
    //读/写位置从不换行,允许进行琐碎的溢出检查
    //更容易调试
    rmtU32 read_pos;
    rmtU32 write_pos;

} rmtMessageQueue;

rmtError rmtMessageQueue_Constructor(rmtMessageQueue* queue, rmtU32 size);
void rmtMessageQueue_Destructor(rmtMessageQueue* queue);
rmtU32 rmtMessageQueue_SizeForPayload(rmtU32 payload_size);
static Message* rmtMessageQueue_AllocMessage(rmtMessageQueue* queue, rmtU32 payload_size, struct ThreadSampler* thread_sampler);
void rmtMessageQueue_CommitMessage(Message* message, MessageID id);
Message* rmtMessageQueue_PeekNextMessage(rmtMessageQueue* queue);
void rmtMessageQueue_ConsumeNextMessage(rmtMessageQueue* queue, Message* message);

3.6 主功能类

3.6.1 Remotry

struct Remotery
{
    Server* server;

    // Microsecond accuracy timer for CPU timestamps
    usTimer timer;

    rmtTLS thread_sampler_tls_handle;

    // Linked list of all known threads being sampled
    ThreadSampler* volatile first_thread_sampler;

    // Queue between clients and main remotery thread
    rmtMessageQueue* mq_to_rmt_thread;

    // The main server thread
    rmtThread* thread;

    // Set to trigger a map of each message on the remotery thread message queue
    void (*map_message_queue_fn)(Remotery* rmt, Message*);
    void* map_message_queue_data;

#if RMT_USE_CUDA
    rmtCUDABind cuda;
#endif

#if RMT_USE_OPENGL
    OpenGL* opengl;
#endif

#if RMT_USE_METAL
    Metal* metal;
#endif
};

void GetSampleDigest(Sample* sample, rmtU32* digest_hash, rmtU32* nb_samples);
rmtError Remotery_SendLogTextMessage(Remotery* rmt, Message* message);
rmtError bin_SampleTree(Buffer* buffer, Msg_SampleTree* msg);
rmtError Remotery_SendSampleTreeMessage(Remotery* rmt, Message* message);
rmtError Remotery_ConsumeMessageQueue(Remotery* rmt);
void Remotery_FlushMessageQueue(Remotery* rmt);
void Remotery_MapMessageQueue(Remotery* rmt);
rmtError Remotery_ThreadMain(rmtThread* thread);
rmtError Remotery_ReceiveMessage(void* context, char* message_data, rmtU32 message_length);
rmtError Remotery_Constructor(Remotery* rmt);
void Remotery_Destructor(Remotery* rmt);
rmtError Remotery_GetThreadSampler(Remotery* rmt, ThreadSampler** thread_sampler);C
void Remotery_DestroyThreadSamplers(Remotery* rmt);
void* CRTMalloc(void* mm_context, rmtU32 size);
void CRTFree(void* mm_context, void* ptr);
void* CRTRealloc(void* mm_context, void* ptr, rmtU32 size);

RMI API进行了头文件声明,用于Remotery做为三方库时,API被其他库调用:

RMT_API rmtSettings* _rmt_Settings( void );
RMT_API enum rmtError _rmt_CreateGlobalInstance(Remotery** remotery);
RMT_API void _rmt_DestroyGlobalInstance(Remotery* remotery);
RMT_API void _rmt_SetGlobalInstance(Remotery* remotery);
RMT_API Remotery* _rmt_GetGlobalInstance(void);
RMT_API void _rmt_SetCurrentThreadName(rmtPStr thread_name);
RMT_API void _rmt_LogText(rmtPStr text);
RMT_API void _rmt_BeginCPUSample(rmtPStr name, rmtU32 flags, rmtU32* hash_cache);
RMT_API void _rmt_EndCPUSample(void);

3.6.2 CUDA

CUDA事件负载率采样

typedef struct CUDASample
{
    // IS-A inheritance relationship
    Sample base;

    // Pair of events that wrap the sample
    CUevent event_start;
    CUevent event_end;

} CUDASample;

rmtError MapCUDAResult(CUresult result);
rmtError CUDASetContext(void* context);
rmtError CUDAGetContext(void** context);
rmtError CUDAEnsureContext();
rmtError CUDAEventCreate(CUevent* phEvent, unsigned int Flags);
rmtError CUDAEventDestroy(CUevent hEvent);
rmtError CUDAEventRecord(CUevent hEvent, void* hStream);
rmtError CUDAEventQuery(CUevent hEvent);
rmtError CUDAEventElapsedTime(float* pMilliseconds, CUevent hStart, CUevent hEnd);
rmtError CUDASample_Constructor(CUDASample* sample);
void CUDASample_Destructor(CUDASample* sample);
rmtBool AreCUDASamplesReady(Sample* sample);
rmtBool GetCUDASampleTimes(Sample* root_sample, Sample* sample);

RMT_API void _rmt_BindCUDA(const rmtCUDABind* bind);
RMT_API void _rmt_BeginCUDASample(rmtPStr name, rmtU32* hash_cache, void* stream);
RMT_API void _rmt_EndCUDASample(void* stream);

3.6.3 D3D11

Direct3D 11事件采样

3.6.4 OPENGL

OpenGL事件采样

3.6.5 METAL

metal事件采样

4.Remotery主体设计

主函数主流程:

rmtError Remotery_ThreadMain(rmtThread* thread)

image

网络功能类继承关系:

image

如何计算的每个函数的CPU负载呢?

初步分析,Remotey是通过在运行一个线程前将这个线程/函数名(name)加入一个hash表中(_rmt_BeginCPUSample(rmtPStr name, rmtU32 flags, rmtU32* hash_cache)),然后获取当前的时间,运行此线程/函数,再调用_rmt_EndCPUSample()再获取一个时间,这样就算出了一个函数的运行时间。作为计算负载的依据。










开源地址:
https://gitee.com/stlstl/Remotery.git

posted @ 2024-07-22 17:48  Emma1111  阅读(65)  评论(0编辑  收藏  举报