IL modification at runtime (step III)

Here is one more incremental step in instrumenting IL at runtime. This time, the method call that will be inserted will be implemented in a separate dll and will take an int as an input.
If you missed the previous episodes you can find them here: Step I, Step II and Step II+.

Calling a method from a separate assembly requires that some metadata be created so that we can reference the foreign method using tokens.
Multiple references (tokens) need to be generated: first the dll containing the Logger::Log method (using DefineAssemblyRef), then the Logger class itself (using DefineTypeRefByName) and finally the Logger::Log(int) method (using DefineMemberRef).
When we have the token for the Logger::Log method we can insert a call to it, with the regular IL code: 0x28 (call) followed by the method token.

When you are ready to call the Log(int) method, you need to decide what integer you want to pass it. You can either load a constant on the stack (with the ldc.i4 IL opcode to load the integer 4, for example) or pass it a copy of an int from the current method context. In our case, we'll see this second technique, were the method getting instrumented takes some integers as input parameters, and one of these inputs gets logged.

Now to the code:

HRESULT CProfilerCallback::JITCompilationStarted(UINT functionId,      BOOL fIsSafeToBlock) {   wchar_t wszClass[512];   wchar_t wszMethod[512];

  HRESULT hr = S_OK;

  ClassID classId = 0;
  ModuleID moduleId = 0;
  mdToken tkMethod = 0;

  // Only execute for the blah method
  if (GetMethodNameFromFunctionId(functionId, wszClass, wszMethod))
  {
   ProfilerPrintf("JITCompilationStarted: %ls::%ls\n",wszClass,wszMethod);
   if (wcscmp(wszClass, L"Hello") != 0 ||
     wcscmp(wszMethod, L"blah") != 0) {
    goto exit;
   }
  } else {
   ProfilerPrintf( "JITCompilationStarted\n" );
   goto exit;
  }

  // Get the moduleID and tkMethod
  hr = m_pICorProfilerInfo->GetFunctionInfo(functionId, &classId, &moduleId, &tkMethod);
  if (FAILED(hr))
   { goto exit; }

  // Get the metadata import
  IMetaDataImport* pMetaDataImport = NULL;
  hr = m_pICorProfilerInfo->GetModuleMetaData(moduleId, ofRead, IID_IMetaDataImport,
     (IUnknown** )&pMetaDataImport);
  if (FAILED(hr))
   { goto exit; }


  //
  // Metadata modification
  //
  IMetaDataEmit* pMetaDataEmit = NULL;
  IMetaDataAssemblyEmit* pMetaDataAssemblyEmit = NULL;
  mdAssemblyRef tkLoggerLib;

  hr = m_pICorProfilerInfo->GetModuleMetaData(
     moduleId, ofRead | ofWrite, IID_IMetaDataEmit,
     (IUnknown** )&pMetaDataEmit);
  if (FAILED(hr)) { goto exit; }
  hr = pMetaDataEmit->QueryInterface(IID_IMetaDataAssemblyEmit,
(void**)&pMetaDataAssemblyEmit);
  if (FAILED(hr)) { goto exit; }

  // Get the token for the Logger class and its Log method
  mdTypeDef tkLogger = 0;
  mdMethodDef tkLog = 0;

  // Create a token for the Logger.dll assembly
  ASSEMBLYMETADATA amd;
  ZeroMemory(&amd, sizeof(amd));
  amd.usMajorVersion = 0;
  amd.usMinorVersion = 0;
  amd.usBuildNumber = 0;
  amd.usRevisionNumber = 0;
  hr = pMetaDataAssemblyEmit->DefineAssemblyRef(
     NULL, 0, // No public key token
     L"Logger",
     &amd, NULL, 0, 0,
     &tkLoggerLib);
  if (FAILED(hr)) { goto exit; }

  // Create a token for the Logger class
  hr = pMetaDataEmit->DefineTypeRefByName(tkLoggerLib,
     L"DumkyNamespace.Logger", &tkLogger);
  if (FAILED(hr)) { goto exit; }

  // Create a token for the Log method
  BYTE Sig_void_String[] = {
   0, // IMAGE_CEE_CS_CALLCONV_DEFAULT
   0x1, // argument count
   0x1, // ret = ELEMENT_TYPE_VOID
   ELEMENT_TYPE_I4
  };

  hr = pMetaDataEmit->DefineMemberRef(tkLogger,
     L"Log",
     Sig_void_String, sizeof(Sig_void_String),
     &tkLog);
  if (FAILED(hr)) { goto exit; }

  //
  // IL modification
  //

#include <pshpack1.h>
  struct {
   BYTE opCode1;
   BYTE call; DWORD method_token;
  } ILCode;
#include <poppack.h>

  //ILCode.opCode1 = 0x19; // load integer '3' or CEE_LDC_I4_3 from opcode.def
  ILCode.opCode1 = 0x02; // load arg 0 opdcode (ldarg.0 or CEE_LDARG_0)
  ILCode.call = 0x28;
  ILCode.method_token = tkLog;

  InsertIL(moduleId, tkMethod, (BYTE*) &ILCode, sizeof(ILCode));

exit:
  return hr;
}


HRESULT CProfilerCallback::InsertIL(ModuleID moduleId, mdToken tkMethod, BYTE* pbNewIL, int iNewILLen) {
  HRESULT hr = S_OK;

  //
  // Get the existing IL
  //
  LPCBYTE pMethodHeader = NULL;
  ULONG iMethodSize = 0;
  hr = m_pICorProfilerInfo->GetILFunctionBody(moduleId, tkMethod, &pMethodHeader, &iMethodSize);
  if (FAILED(hr))
   { goto exit; }

  //
  // Print the existing IL
  //
  IMAGE_COR_ILMETHOD* pMethod = (IMAGE_COR_ILMETHOD*)pMethodHeader;
  COR_ILMETHOD_FAT* fatImage = (COR_ILMETHOD_FAT*)&pMethod->Fat;

  if(!fatImage->IsFat()) {
   goto exit;
  }

  ProfilerPrintf("\n");
  ProfilerPrintIL(fatImage);


  //
  // Get the IL Allocator
  //
  IMethodMalloc* pIMethodMalloc = NULL;
  IMAGE_COR_ILMETHOD* pNewMethod = NULL;
  hr = m_pICorProfilerInfo->GetILFunctionBodyAllocator(moduleId, &pIMethodMalloc);
  if (FAILED(hr))
   { goto exit; }

  //
  // Allocate IL space and copy the IL in it
  //
  pNewMethod = (IMAGE_COR_ILMETHOD*) pIMethodMalloc->Alloc(iMethodSize+iNewILLen);
  if (pNewMethod == NULL)
   { goto exit; }
  COR_ILMETHOD_FAT* newFatImage = (COR_ILMETHOD_FAT*)&pNewMethod->Fat;


  //
  // Modify IL
  //
  // Copy the header
  memcpy((BYTE*)newFatImage, (BYTE*)fatImage, fatImage->Size * sizeof(DWORD));

  // Add a call to "Log"
  memcpy(newFatImage->GetCode(), pbNewIL, iNewILLen);

  // Copy the remaining of the method
  memcpy(newFatImage->GetCode() + iNewILLen,
   fatImage->GetCode(),
   fatImage->CodeSize);


  // Update the code size
  newFatImage->CodeSize += iNewILLen;
  newFatImage->MaxStack += 1;

  // Print modified IL
  ProfilerPrintf("\n");
  ProfilerPrintIL(newFatImage);

  // Push IL back in
  hr = m_pICorProfilerInfo->SetILFunctionBody(moduleId, tkMethod, (LPCBYTE) pNewMethod);
  if (FAILED(hr))
   { goto exit; }

  pIMethodMalloc->Release();
exit:
  return hr;
}

A couple of notes:

Increasing the MaxStack might not be necessary in all cases, instead it should be updated to be the max of the stack used for the injected code and the stack used by the existing code. But the current code is safe, as the MaxStack will always be larger than the maximum stack used.

If you mistype the names of the assembly, class or method, then you get quite interesting errors at runtime, that are actually quite helpful. My thanks to the CLR team for that.


Here is the code for the assembly containing the "instrumentation" (Logger.cs):

using System;

namespace DumkyNamespace
{
  public class Logger
  {
   public static void Log(int i)
   {
   Console.WriteLine("Log!" + i);
   }
  }
}

You can compile it with "csc /t:library Logger.cs".

And as usual the code to be instrumented (Hello.cs):

using System;

public class Hello
{
  public static void Main(string[] prms)
  {
   Console.WriteLine("main!");
   blah(4,5);
  }

  public static void blah(int i, int j) {
   Console.WriteLine("blah!");
   Console.WriteLine(i);
   Console.WriteLine(j);
  }
}

When the blah method gets JITed, a call to Logger::Log(i) is added, so you get the following output:

posted @ 2004-05-19 13:28  dudu  阅读(2003)  评论(0编辑  收藏  举报