gdb的pretty-printer实现及使用

一、pretty printer

即使在有调试器的情况下,打印(print)信息也是定位问题的重要和有效手段,所以一些变量的打印就非常重要,例如C++中流行的libfmt库,gcc扩展的"PRETTY_FUNCTION "宏变量。gdb作为调试程序的主要通用工具,也包含了对类型的pretty-print。

C++的容器类通常使用模板实现,源代码看起来比较繁琐,如果尝试从源代码的角度理解相关实现成本会比较高。如果不理解模板实现的内部结构,在遇到容器类问题时又不太容易定位,这个时候最优的解决方案就是gdb可以优雅的打印这些结构的内容。

但是这些容器类结构不是gdb维护的内容,必然会经常变化,了解stl容器类的实现也只有代码作者才知道,所以这个pretty-print最好由stl库的作者来配合代码一起实现。

这反过来就需要gdb提供一种机制,可以让源代码(C++源代码)的作者注册特定结构的printer。gdb提供的这种注册机制就是基于python实现的pretty-printer。

使用python的优势比较明显:python是一种脚本语言,扩展性强,避免C语言实现这种二进制兼容以及各种各样头文件依赖、编译、链接等问题,在gdb这种性能不是关键的地方使用没有问题。

gdb框架中的python脚本位于gdb-10.1\gdb\python\lib\gdb\command,C++容器类的打印脚本位于libstdc++-v3\python\libstdcxx\v6文件夹下。

二、gdb的初始化

///gdb-10.1\gdb\python\python.c
static bool
do_start_initialization ()
{
///...
  gdbpy_to_string_cst = PyString_FromString ("to_string");
  if (gdbpy_to_string_cst == NULL)
    return false;
  gdbpy_children_cst = PyString_FromString ("children");
  if (gdbpy_children_cst == NULL)
    return false;
  gdbpy_display_hint_cst = PyString_FromString ("display_hint");
  if (gdbpy_display_hint_cst == NULL)
    return false;
  gdbpy_doc_cst = PyString_FromString ("__doc__");
  if (gdbpy_doc_cst == NULL)
    return false;
  gdbpy_enabled_cst = PyString_FromString ("enabled");
  if (gdbpy_enabled_cst == NULL)
    return false;
  gdbpy_value_cst = PyString_FromString ("value");
  if (gdbpy_value_cst == NULL)
    return false;
///...
}

三、对于pretty-printer调用

在找到对应的printer之后,会(print_string_repr函数中)调用printer的"to_string"方法,然后(在print_children函数中)调用printer的children接口。

///gdb-10.1\gdb\python\py-prettyprint.c
enum ext_lang_rc
gdbpy_apply_val_pretty_printer (const struct extension_language_defn *extlang,
				struct value *value,
				struct ui_file *stream, int recurse,
				const struct value_print_options *options,
				const struct language_defn *language)
{
  struct type *type = value_type (value);
  struct gdbarch *gdbarch = get_type_arch (type);
  enum string_repr_result print_result;

  if (value_lazy (value))
    value_fetch_lazy (value);

  /* No pretty-printer support for unavailable values.  */
  if (!value_bytes_available (value, 0, TYPE_LENGTH (type)))
    return EXT_LANG_RC_NOP;

  if (!gdb_python_initialized)
    return EXT_LANG_RC_NOP;

  gdbpy_enter enter_py (gdbarch, language);

  gdbpy_ref<> val_obj (value_to_value_object_no_release (value));
  if (val_obj == NULL)
    {
      print_stack_unless_memory_error (stream);
      return EXT_LANG_RC_ERROR;
    }

  /* Find the constructor.  */
  gdbpy_ref<> printer (find_pretty_printer (val_obj.get ()));
  if (printer == NULL)
    {
      print_stack_unless_memory_error (stream);
      return EXT_LANG_RC_ERROR;
    }

  if (printer == Py_None)
    return EXT_LANG_RC_NOP;

  if (val_print_check_max_depth (stream, recurse, options, language))
    return EXT_LANG_RC_OK;

  /* If we are printing a map, we want some special formatting.  */
  gdb::unique_xmalloc_ptr<char> hint (gdbpy_get_display_hint (printer.get ()));

  /* Print the section */
  print_result = print_string_repr (printer.get (), hint.get (), stream,
				    recurse, options, language, gdbarch);
  if (print_result != string_repr_error)
    print_children (printer.get (), hint.get (), stream, recurse, options,
		    language, print_result == string_repr_none);

  if (PyErr_Occurred ())
    print_stack_unless_memory_error (stream);
  return EXT_LANG_RC_OK;
}

四、如何查找printer

传入一个value_object_type类型变量,然后调用函数的__call__函数,如果返回值非null,则认为匹配成功。由于value_object_type提供了各种各样的接口,所以匹配函数可以方便的进行各种匹配。

///gdb-10.1\gdb\python\py-prettyprint.c
/* Helper function for find_pretty_printer which iterates over a list,
   calls each function and inspects output.  This will return a
   printer object if one recognizes VALUE.  If no printer is found, it
   will return None.  On error, it will set the Python error and
   return NULL.  */

static gdbpy_ref<>
search_pp_list (PyObject *list, PyObject *value)
{
  Py_ssize_t pp_list_size, list_index;

  pp_list_size = PyList_Size (list);
  for (list_index = 0; list_index < pp_list_size; list_index++)
    {
      PyObject *function = PyList_GetItem (list, list_index);
      if (! function)
	return NULL;

      /* Skip if disabled.  */
      if (PyObject_HasAttr (function, gdbpy_enabled_cst))
	{
	  gdbpy_ref<> attr (PyObject_GetAttr (function, gdbpy_enabled_cst));
	  int cmp;

	  if (attr == NULL)
	    return NULL;
	  cmp = PyObject_IsTrue (attr.get ());
	  if (cmp == -1)
	    return NULL;

	  if (!cmp)
	    continue;
	}

      gdbpy_ref<> printer (PyObject_CallFunctionObjArgs (function, value,
							 NULL));
      if (printer == NULL)
	return NULL;
      else if (printer != Py_None)
	return printer;
    }

  return gdbpy_ref<>::new_reference (Py_None);
}

其中最常用的就是变量的type字段,有了type字段之后就可以获得它的name字段,并进行名字匹配。

static gdb_PyGetSetDef value_object_getset[] = {
  { "address", valpy_get_address, NULL, "The address of the value.",
    NULL },
  { "is_optimized_out", valpy_get_is_optimized_out, NULL,
    "Boolean telling whether the value is optimized "
    "out (i.e., not available).",
    NULL },
  { "type", valpy_get_type, NULL, "Type of the value.", NULL },
  ///.
  }
  
/* Return type of the value.  */
static PyObject *
valpy_get_type (PyObject *self, void *closure)
{
  value_object *obj = (value_object *) self;

  if (!obj->type)
    {
      obj->type = type_to_type_object (value_type (obj->value));
      if (!obj->type)
	return NULL;
    }
  Py_INCREF (obj->type);
  return obj->type;
}

/* Return the type's name, or None.  */

static PyObject *
typy_get_name (PyObject *self, void *closure)
{
  struct type *type = ((type_object *) self)->type;

  if (type->name () == NULL)
    Py_RETURN_NONE;
  return PyString_FromString (type->name ());
}

static gdb_PyGetSetDef type_object_getset[] =
{
  { "alignof", typy_get_alignof, NULL,
    "The alignment of this type, in bytes.", NULL },
  { "code", typy_get_code, NULL,
    "The code for this type.", NULL },
  { "dynamic", typy_get_dynamic, NULL,
    "Whether this type is dynamic.", NULL },
  { "name", typy_get_name, NULL,
    "The name for this type, or None.", NULL },
  { "sizeof", typy_get_sizeof, NULL,
    "The size of this type, in bytes.", NULL },
  { "tag", typy_get_tag, NULL,
    "The tag name for this type, or None.", NULL },
  { "objfile", typy_get_objfile, NULL,
    "The objfile this type was defined in, or None.", NULL },
  { NULL }
};

五、stl库中的例子

在std库的python代码中,通过变量获得类型(val.type),然后通过tag获得类型名称(type.tag),使用这个名字和注册的printer进行匹配(self.lookup[basename].invoke(val))。

'''libstdc++-v3\python\libstdcxx\v6\printers.py
    @staticmethod
    def get_basic_type(type):
        # If it points to a reference, get the reference.
        if type.code == gdb.TYPE_CODE_REF:
            type = type.target ()

        # Get the unqualified type, stripped of typedefs.
        type = type.unqualified ().strip_typedefs ()

        return type.tag

    def __call__(self, val):
        typename = self.get_basic_type(val.type)
        if not typename:
            return None

        # All the types we match are template types, so we can use a
        # dictionary.
        match = self.compiled_rx.match(typename)
        if not match:
            return None

        basename = match.group(1)

        if val.type.code == gdb.TYPE_CODE_REF:
            if hasattr(gdb.Value,"referenced_value"):
                val = val.referenced_value()

        if basename in self.lookup:
            return self.lookup[basename].invoke(val)

        # Cannot find a pretty printer.  Return None.
        return None
        '''....
        libstdcxx_printer.add_container('std::', 'vector', StdVectorPrinter)

六、如何向gdb注册printer

在gcc自带的stl库python脚本中,printers.py脚本执行的时候会自动执行build_libstdcxx_dictionary,主动调用register_libstdcxx_printers则会向gdb注册定义的printer类。

从下面代码也可以看到,注册是通过

obj.pretty_printers.append(libstdcxx_printer)

注册给gdb的。

 ''''libstdc++-v3\python\libstdcxx\v6\printers.py
 def add_one_type_printer(obj, match, name):
    printer = FilteringTypePrinter(match, 'std::' + name)
    gdb.types.register_type_printer(obj, printer)
    if _versioned_namespace:
        printer = FilteringTypePrinter(match, 'std::' + _versioned_namespace + name)
        gdb.types.register_type_printer(obj, printer)

def register_libstdcxx_printers (obj):
    "Register libstdc++ pretty-printers with objfile Obj."

    global _use_gdb_pp
    global libstdcxx_printer

    if _use_gdb_pp:
        gdb.printing.register_pretty_printer(obj, libstdcxx_printer)
    else:
        if obj is None:
            obj = gdb
        obj.pretty_printers.append(libstdcxx_printer)

    register_type_printers(obj)

def build_libstdcxx_dictionary ():
    global libstdcxx_printer

    libstdcxx_printer = Printer("libstdc++-v6")

    # libstdc++ objects requiring pretty-printing.
    # In order from:
    # http://gcc.gnu.org/onlinedocs/libstdc++/latest-doxygen/a01847.html
    libstdcxx_printer.add_version('std::', 'basic_string', StdStringPrinter)
	'''......
build_libstdcxx_dictionary ()

所以,正如这个解决方案所说,可以通过下面命令找到gcc自带的python脚本,然后在gdb的初始化脚本中注册(其中的/home/bartgol/.gdb/gdb_printers/python要替换为真实环境上的libstdcxx/v6/printer.py所在路径)。

python
import sys
sys.path.insert(0, '/home/bartgol/.gdb/gdb_printers/python')
from libstdcxx.v6.printers import register_libstdcxx_printers
register_libstdcxx_printers (None)
end

七、举个栗子

有了上面的例子,参考gdb的文档,我们也可以实现一个自定义的printer。

例子中首先把Match注册到gdb导出的db.pretty_printers列表中,这个matcher在__call__函数中判断变量名是不是tsecer,如果是话创建一个tscerPrinter对象,而tscerPrinter对象的to_string接口完成对变量内容的自定义打印输出。

tsecer@harry: cat gdb.py 
import sys
sys.path.insert(0, '/home/harry/study/gdb_custom_pretty_printer')
import tsecer

tsecer@harry: cat tsecer.py
import gdb

class tscerPrinter:
    def __init__ (self, val):
        self.val = val

    def to_string (self):
        return 'tsecer with x val %s' % (str(self.val['x']))

class tsecerMatch:
    def __call__(self, val):
        typetag = val.type.tag
        if typetag != 'tsecer':
            print(typetag)
            return None
        return tscerPrinter(val)

tmatch = tsecerMatch()
gdb.pretty_printers.append(tmatch)

tsecer@harry: cat main.cpp 
struct tsecer
{
    int x;
};
tsecer x = {111};
int main()
{
    return 0;
}
tsecer@harry: g++ -g main.cpp 
tsecer@harry: gdb -q a.out 
Reading symbols from a.out...
(gdb) start
Temporary breakpoint 1 at 0x4004fb: file main.cpp, line 8.
Starting program: /home/harry/study/gdb_custom_pretty_printer/a.out 

Temporary breakpoint 1, main () at main.cpp:8
8           return 0;
(gdb) p x
$1 = {x = 111}
(gdb) source gdb.py 
(gdb) p x
$2 = None
tsecer with x val 111
(gdb) 

posted on 2022-08-26 20:03  tsecer  阅读(2030)  评论(0编辑  收藏  举报

导航