python解释器源码函数调用分析

1、编译python代码

1.1 python代码

test.py

1 def ftest():
2     x = 3
3 ftest()

1.2 编译工具

disass_py.py

#-*- coding:utf8 -*-
import dis
import sys

def disassemble_file(file_path):
    with open(file_path, 'r') as file:
        source_code = file.read()
    try:
        compiled_code = compile(source_code, file_path, 'exec')
        dis.dis(compiled_code)
    except Exception as e:
        print("Error disassembling {}: {}".format(file_path, e))

if __name__ == '__main__':
    if len(sys.argv) != 2:
        print("Usage: python disass_py.py <path_to_python_file>")
        sys.exit(1)

    file_path = sys.argv[1]
    disassemble_file(file_path)

1.3 编译python代码

root(127.0.0.1) /data/ # python3 ./disass_py.py ./test.py
  1           0 LOAD_CONST               0 (<code object ftest at 0x7f34cb4b49d0, file "./test.py", line 1>)
              2 LOAD_CONST               1 ('ftest')                      // 将 ftest 载入
              4 MAKE_FUNCTION            0                                // 构建 ftest 函数
              6 STORE_NAME               0 (ftest)                        // 将 ftest 函数结构体存入 locals

  3           8 LOAD_NAME                0 (ftest)                        // 从 locals 中加载 ftest 压栈
             10 CALL_FUNCTION            0                                // 调用栈顶函数
             12 POP_TOP
             14 LOAD_CONST               2 (None)
             16 RETURN_VALUE

Disassembly of <code object ftest at 0x7f34cb4b49d0, file "./xtest.py", line 1>:    // 函数实现
  2           0 LOAD_CONST               1 (3)
              2 STORE_FAST               0 (x)
              4 LOAD_CONST               0 (None)
              6 RETURN_VALUE

 

2、分析对应字节码

可以看见python代码非常简单,定义了一个函数,并调用它。

整个字节码也体现的很清晰,

  1. 载入函数名称        2 LOAD_CONST               1 ('ftest')
  2. 构建函数               4 MAKE_FUNCTION            0
  3. 存入函数               6 STORE_NAME               0 (ftest)
  4. 取出栈顶函数        8 LOAD_NAME                0 (ftest)
  5. 调用函数               10 CALL_FUNCTION            0

2.1 LOAD_CONST

        case TARGET(LOAD_CONST): {
            PREDICTED(LOAD_CONST);
            PyObject *value = GETITEM(consts, oparg);
            Py_INCREF(value);
            PUSH(value);
            DISPATCH();
        }

2.2 MAKE_FUNCTION

解释器主循环中调用 PyFunction_NewWithQualName 将函数名称,代码对象弹栈,构建一个python函数。

        case TARGET(MAKE_FUNCTION): {
            PyObject *qualname = POP();
            PyObject *codeobj = POP();
            PyFunctionObject *func = (PyFunctionObject *)
                PyFunction_NewWithQualName(codeobj, f->f_globals, qualname);

            Py_DECREF(codeobj);
            Py_DECREF(qualname);
            if (func == NULL) {
                goto error;
            }

            if (oparg & 0x08) {
                assert(PyTuple_CheckExact(TOP()));
                func->func_closure = POP();
            }
            if (oparg & 0x04) {
                assert(PyTuple_CheckExact(TOP()));
                func->func_annotations = POP();
            }
            if (oparg & 0x02) {
                assert(PyDict_CheckExact(TOP()));
                func->func_kwdefaults = POP();
            }
            if (oparg & 0x01) {
                assert(PyTuple_CheckExact(TOP()));
                func->func_defaults = POP();
            }

            PUSH((PyObject *)func);  // 将函数结构体压栈
            DISPATCH();
        }

2.2.1 PyFunction_NewWithQualName

创建与python函数代码绑定的 PyFunctionObject 结构体,在 op->vectorcall = _PyFunction_Vectorcall; 中将_PyFunction_Vectorcall 用于引导调用python 函数。

PyObject *
PyFunction_NewWithQualName(PyObject *code, PyObject *globals, PyObject *qualname)
{
    ...
    PyThreadState *tstate = _PyThreadState_GET();

    PyCodeObject *code_obj = (PyCodeObject *)code;  // 函数代码
    ...
    PyObject *name = code_obj->co_name;             // 函数名称
    ...
    // __module__: Use globals['__name__'] if it exists, or NULL.
    _Py_IDENTIFIER(__name__);
    PyObject *module = _PyDict_GetItemIdWithError(globals, &PyId___name__);
    PyObject *builtins = NULL;
    ...
    builtins = _PyEval_BuiltinsFromGlobals(tstate, globals); // borrowed ref
    if (builtins == NULL) {
        goto error;
    }
    Py_INCREF(builtins);
    PyFunctionObject *op = PyObject_GC_New(PyFunctionObject, &PyFunction_Type);
    ...
    op->func_globals = globals;
    op->func_builtins = builtins;
    op->func_name = name;                 // 函数名称
    op->func_qualname = qualname;
    op->func_code = (PyObject*)code_obj;  // 函数代码
    op->func_defaults = NULL;    // No default positional arguments
    op->func_kwdefaults = NULL;  // No default keyword arguments
    op->func_closure = NULL;
    op->func_doc = doc;
    op->func_dict = NULL;
    op->func_weakreflist = NULL;
    op->func_module = module;
    op->func_annotations = NULL;
    op->vectorcall = _PyFunction_Vectorcall; // Vectorcall 函数

    _PyObject_GC_TRACK(op);
    return (PyObject *)op;

error:
    ...
}

2.3 STORE_NAME

存储指定名称的python对象到当前函数帧的locals 中。

        case TARGET(STORE_NAME): {
            PyObject *name = GETITEM(names, oparg);
            PyObject *v = POP();
            PyObject *ns = f->f_locals;
            int err;
            ...
            if (PyDict_CheckExact(ns))
                err = PyDict_SetItem(ns, name, v);   // 将名称映射的对象存入当前frame 的locals
            else
                err = PyObject_SetItem(ns, name, v); // 将名称映射的对象存入当前frame 的locals
            Py_DECREF(v);
            ..
            DISPATCH();
        }

2.4 LOAD_NAME

从当前帧的locals 中找到指定名称的python对象,并压入栈。

        case TARGET(LOAD_NAME): {
            PyObject *name = GETITEM(names, oparg);
            PyObject *locals = f->f_locals;
            PyObject *v;
            if (PyDict_CheckExact(locals)) {
                ...
            }else {
                v = PyObject_GetItem(locals, name);   // 从locals 中获取指定名称的对象
                ...
            }
            ...
            PUSH(v);                                 // 将其压入栈顶
            DISPATCH();
        }

2.5 CALL_FUNCTION

调用栈顶的函数

        case TARGET(CALL_FUNCTION): {
            PREDICTED(CALL_FUNCTION);
            PyObject **sp, *res;
            sp = stack_pointer; // 栈顶指针
            res = call_function(tstate, &trace_info, &sp, oparg, NULL); // call 指定函数
            stack_pointer = sp;
            PUSH(res);
            if (res == NULL) {
                goto error;
            }
            CHECK_EVAL_BREAKER();
            DISPATCH();
        }

2.5.1 call_function

从栈中取出参数,函数结构体,引导执行该函数。

Py_LOCAL_INLINE(PyObject *) _Py_HOT_FUNCTION
call_function(PyThreadState *tstate,
              PyTraceInfo *trace_info,
              PyObject ***pp_stack,
              Py_ssize_t oparg,
              PyObject *kwnames)
{
    PyObject **pfunc = (*pp_stack) - oparg - 1;  // 减去栈顶的操作码,再减去一个指针的大小,得到上一个指令 LOAD_NAME 从local 中加载的指定函数结构体
    PyObject *func = *pfunc;
    PyObject *x, *w;
    Py_ssize_t nkwargs = (kwnames == NULL) ? 0 : PyTuple_GET_SIZE(kwnames);
    Py_ssize_t nargs = oparg - nkwargs;
    PyObject **stack = (*pp_stack) - nargs - nkwargs;

    x = PyObject_Vectorcall(func, stack, nargs | PY_VECTORCALL_ARGUMENTS_OFFSET, kwnames); // 执行vectorcall函数调用

    /* Clear the stack of the function object. */
    while ((*pp_stack) > pfunc) {
        w = EXT_POP(*pp_stack);
        Py_DECREF(w);
    }

    return x;
}

2.5.2 查找执行引导函数

 

static inline PyObject *
PyObject_Vectorcall(PyObject *callable, PyObject *const *args,
                     size_t nargsf, PyObject *kwnames)
{
    PyThreadState *tstate = PyThreadState_Get();
    return _PyObject_VectorcallTstate(tstate, callable,
                                      args, nargsf, kwnames);   // 找到 vectorcall 并执行 
}


static inline PyObject *
_PyObject_VectorcallTstate(PyThreadState *tstate, PyObject *callable,
                           PyObject *const *args, size_t nargsf,
                           PyObject *kwnames)
{
    vectorcallfunc func;
    PyObject *res;
    ...
    func = PyVectorcall_Function(callable);    // 找到结构体中的 vectorcall 函数
    ...
    res = func(callable, args, nargsf, kwnames); // 执行 vectorcall 函数
    return _Py_CheckFunctionResult(tstate, callable, res, NULL);
}

static inline vectorcallfunc
PyVectorcall_Function(PyObject *callable)
{
    PyTypeObject *tp;
    Py_ssize_t offset;
    vectorcallfunc ptr;

    tp = Py_TYPE(callable);              // cast 成pyobj 类型
    ....
    offset = tp->tp_vectorcall_offset;  // 获取 vectorcall 函数在结构体中的偏移大小
    assert(offset > 0);
    memcpy(&ptr, (char *) callable + offset, sizeof(ptr)); // 找到 vectorcall 函数 将其复制到 ptr函数指针
    return ptr;  // 将其返回
}

 

posted @ 2024-07-22 16:37  G1733  阅读(5)  评论(0编辑  收藏  举报