python中module内全局变量的局部性
问题
python中的模块主要通过import实现。一个常见的典型场景是在A模块import模块B的func,并调用B模块的func;而B模块的func又引用了所在模块B的gvar变量。这个过程中A其实只import了模块B的func函数,在A模块调用B模块的func时,func函数能不忘初心,找到自己当初定义模块中的gvar吗?
当然,这个问题很容易验证,这里讨论的是python内部的实现机制。
MAKE_FUNCTION
在执行函数定义(MAKE_FUNCTION)时,函数定义结构会把模块的global保存在新定义结构中。
TARGET(MAKE_FUNCTION) {
PyObject *qualname = POP();
PyObject *codeobj = POP();
PyFunctionObject *func = (PyFunctionObject *)
PyFunction_NewWithQualName(codeobj, f->f_globals, qualname);
///
PyObject *
PyFunction_NewWithQualName(PyObject *code, PyObject *globals, PyObject *qualname)
{
PyFunctionObject *op;
PyObject *doc, *consts, *module;
static PyObject *__name__ = NULL;
if (__name__ == NULL) {
__name__ = PyUnicode_InternFromString("__name__");
if (__name__ == NULL)
return NULL;
}
op = PyObject_GC_New(PyFunctionObject, &PyFunction_Type);
if (op == NULL)
return NULL;
op->func_weakreflist = NULL;
Py_INCREF(code);
op->func_code = code;
Py_INCREF(globals);
op->func_globals = globals;
///...
}
CALL_FUNCTION
在调用函数中,从函数定义中内容找到func_globals字段,并传递给函数栈帧执行。
#define PyFunction_GET_GLOBALS(func) \
(((PyFunctionObject *)func) -> func_globals)
static PyObject *
fast_function(PyObject *func, PyObject **stack,
Py_ssize_t nargs, PyObject *kwnames)
{
PyCodeObject *co = (PyCodeObject *)PyFunction_GET_CODE(func);
PyObject *globals = PyFunction_GET_GLOBALS(func);
PyObject *argdefs = PyFunction_GET_DEFAULTS(func);
PyObject *kwdefs, *closure, *name, *qualname;
PyObject **d;
Py_ssize_t nkwargs = (kwnames == NULL) ? 0 : PyTuple_GET_SIZE(kwnames);
Py_ssize_t nd;
assert(PyFunction_Check(func));
assert(nargs >= 0);
assert(kwnames == NULL || PyTuple_CheckExact(kwnames));
assert((nargs == 0 && nkwargs == 0) || stack != NULL);
/* kwnames must only contains str strings, no subclass, and all keys must
be unique */
PCALL(PCALL_FUNCTION);
PCALL(PCALL_FAST_FUNCTION);
if (co->co_kwonlyargcount == 0 && nkwargs == 0 &&
co->co_flags == (CO_OPTIMIZED | CO_NEWLOCALS | CO_NOFREE))
{
if (argdefs == NULL && co->co_argcount == nargs) {
return _PyFunction_FastCall(co, stack, nargs, globals);
}
else if (nargs == 0 && argdefs != NULL
&& co->co_argcount == Py_SIZE(argdefs)) {
/* function called with no arguments, but all parameters have
a default value: use default values as arguments .*/
stack = &PyTuple_GET_ITEM(argdefs, 0);
return _PyFunction_FastCall(co, stack, Py_SIZE(argdefs), globals);
}
}
kwdefs = PyFunction_GET_KW_DEFAULTS(func);
closure = PyFunction_GET_CLOSURE(func);
name = ((PyFunctionObject *)func) -> func_name;
qualname = ((PyFunctionObject *)func) -> func_qualname;
if (argdefs != NULL) {
d = &PyTuple_GET_ITEM(argdefs, 0);
nd = Py_SIZE(argdefs);
}
else {
d = NULL;
nd = 0;
}
return _PyEval_EvalCodeWithName((PyObject*)co, globals, (PyObject *)NULL,
stack, nargs,
nkwargs ? &PyTuple_GET_ITEM(kwnames, 0) : NULL,
stack + nargs,
nkwargs, 1,
d, (int)nd, kwdefs,
closure, name, qualname);
}
原始global
前面的MAKE_FUNCTION和CALL_FUNCTION都是从函数定义的func_globals取数据,貌似陷入了一个“鸡生蛋,蛋生鸡”的怪圈。那最原始的global是从哪里来的呢?
在创建一个新模块时,会通过module_dict_for_exec函数为模块创建一个新的global字典。在执行PyEval_EvalCode函数的时候,传入的globals和locals都是这个新创建的模块字典。
PyObject*
PyImport_ExecCodeModuleObject(PyObject *name, PyObject *co, PyObject *pathname,
PyObject *cpathname)
{
PyObject *d, *external, *res;
PyInterpreterState *interp = PyThreadState_GET()->interp;
_Py_IDENTIFIER(_fix_up_module);
d = module_dict_for_exec(name);
if (d == NULL) {
return NULL;
}
if (pathname == NULL) {
pathname = ((PyCodeObject *)co)->co_filename;
}
external = PyObject_GetAttrString(interp->importlib, "_bootstrap_external");
if (external == NULL)
return NULL;
res = _PyObject_CallMethodIdObjArgs(external,
&PyId__fix_up_module,
d, name, pathname, cpathname, NULL);
Py_DECREF(external);
if (res != NULL) {
Py_DECREF(res);
res = exec_code_in_module(name, d, co);
}
return res;
}
static PyObject *
exec_code_in_module(PyObject *name, PyObject *module_dict, PyObject *code_object)
{
PyObject *modules = PyImport_GetModuleDict();
PyObject *v, *m;
v = PyEval_EvalCode(code_object, module_dict, module_dict);
if (v == NULL) {
remove_module(name);
return NULL;
}
Py_DECREF(v);
if ((m = PyDict_GetItem(modules, name)) == NULL) {
PyErr_Format(PyExc_ImportError,
"Loaded module %R not found in sys.modules",
name);
return NULL;
}
Py_INCREF(m);
return m;
}
PyObject *
PyEval_EvalCode(PyObject *co, PyObject *globals, PyObject *locals)
{
return PyEval_EvalCodeEx(co,
globals, locals,
(PyObject **)NULL, 0,
(PyObject **)NULL, 0,
(PyObject **)NULL, 0,
NULL, NULL);
}
import *
从下面的代码可以明显的看到,这里的流程是将指定module的__all__或者__dict__内容导出到当前模块的_local_(注意:不是globals列表)列表中。这也意味着:如果是在一个函数内执行import,那么import的内容只会存在于函数内而不会存在于所在模块中。
///@file: Python-3.6.0\Python\ceval.c
PyObject *
_PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag)
{
///...
TARGET(IMPORT_STAR) {
PyObject *from = POP(), *locals;
int err;
if (PyFrame_FastToLocalsWithError(f) < 0)
goto error;
locals = f->f_locals;
if (locals == NULL) {
PyErr_SetString(PyExc_SystemError,
"no locals found during 'import *'");
goto error;
}
err = import_all_from(locals, from);
PyFrame_LocalsToFast(f, 0);
Py_DECREF(from);
if (err != 0)
goto error;
DISPATCH();
}
///...
}
static int
import_all_from(PyObject *locals, PyObject *v)
{
_Py_IDENTIFIER(__all__);
_Py_IDENTIFIER(__dict__);
PyObject *all = _PyObject_GetAttrId(v, &PyId___all__);
PyObject *dict, *name, *value;
int skip_leading_underscores = 0;
int pos, err;
if (all == NULL) {
if (!PyErr_ExceptionMatches(PyExc_AttributeError))
return -1; /* Unexpected error */
PyErr_Clear();
dict = _PyObject_GetAttrId(v, &PyId___dict__);
if (dict == NULL) {
if (!PyErr_ExceptionMatches(PyExc_AttributeError))
return -1;
PyErr_SetString(PyExc_ImportError,
"from-import-* object has no __dict__ and no __all__");
return -1;
}
all = PyMapping_Keys(dict);
Py_DECREF(dict);
if (all == NULL)
return -1;
skip_leading_underscores = 1;
}
for (pos = 0, err = 0; ; pos++) {
name = PySequence_GetItem(all, pos);
if (name == NULL) {
if (!PyErr_ExceptionMatches(PyExc_IndexError))
err = -1;
else
PyErr_Clear();
break;
}
if (skip_leading_underscores &&
PyUnicode_Check(name) &&
PyUnicode_READY(name) != -1 &&
PyUnicode_READ_CHAR(name, 0) == '_')
{
Py_DECREF(name);
continue;
}
value = PyObject_GetAttr(v, name);
if (value == NULL)
err = -1;
else if (PyDict_CheckExact(locals))
err = PyDict_SetItem(locals, name, value);
else
err = PyObject_SetItem(locals, name, value);
Py_DECREF(name);
Py_XDECREF(value);
if (err != 0)
break;
}
Py_DECREF(all);
return err;
}
可以看到,在导入的时候使用了一个不太常见的__all__变量,这个变量在stackoverflow的一个帖子里有一个详细的说明。大致来说,这个关键字就是为了给 import *这种语句使用的,这样模块就可以自己确定哪些内容希望导出而哪些不希望导出(当然,只是在使用import *的时候)。
而且,这个表达式在一些NameSpace模块中更加常见。随便找一下,在python工程中自带的Python-3.6.0\Lib\dbm_init_.py文件中就包含了一个选择性导出的__all__变量,控制这个模块的导出内容。
__all__ = ['open', 'whichdb', 'error']
defining what symbols in a module will be exported when from <module> import * is used on the module.
For example, the following code in a foo.py explicitly exports the symbols bar and baz:
__all__ = ['bar', 'baz']
waz = 5
bar = 10
def baz(): return 'baz'
These symbols can then be imported like so:
from foo import *
print(bar)
print(baz)
# The following will trigger an exception, as "waz" is not exported by the module
print(waz)
If the __all__ above is commented out, this code will then execute to completion, as the default behaviour of import * is to import all symbols that do not begin with an underscore, from the given namespace.
Reference: https://docs.python.org/tutorial/modules.html#importing-from-a-package
NOTE: __all__ affects the from <module> import * behavior only. Members that are not mentioned in __all__ are still accessible from outside the module and can be imported with from <module> import <member>.
栗子
经过这么多的分析,终于得到了一个简单的结论:
在python执行定义函数(MAKE_FUNCTION)时,会在函数定义中保存一个指向所在模块global字典的指针(就好像closure包含所有额外引用列表),执行函数(CALL_FUNCTION)时,使用的栈帧中同样会传入这个定义时绑定的global指针。从而在模块外调用该模块内定义函数时,就好像在函数内调用一样(可以访问到该模块的所有global变量)。
tsecer@harry: cat tsecer.py
import harry
gvar = "tsecer"
harry.hello()
print(gvar)
tsecer@harry: cat harry.py
def hello():
print(gvar)
"""
Traceback (most recent call last):
File "tsecer.py", line 1, in <module>
import harry
File "/home/harry/py.main.sub.global/harry.py", line 4, in <module>
hello()
File "/home/harry/py.main.sub.global/harry.py", line 2, in hello
print(gvar)
NameError: name 'gvar' is not defined
"""
#hello()
gvar = "harry.ac"
hello()
gvar = "harry.bc"
hello()
tsecer@harry: python3 tsecer.py
harry.ac
harry.bc
harry.bc
tsecer
tsecer@harry: