gcc对构造函数的调用生成
identifier
C++的前端对identifier做了扩展,在每个identifier中还包含了两个cxx_binding字段:namespace_bindings和bindings。当通过字符串找到一个identifier的时候,同时顺带获得了两个binding信息。
/* Language-dependent contents of an identifier. */
struct GTY(()) lang_identifier {
struct c_common_identifier c_common;
cxx_binding *namespace_bindings;
cxx_binding *bindings;
tree class_template_info;
tree label_value;
};
事实上,在LANG_HOOKS_INITIALIZER的定义中,第一个字段是语言的名字,而第二个字段就是语言中identifier的大小,可以这个结构还是相当重要的?
/* The whole thing. The structure is defined in langhooks.h. */
#define LANG_HOOKS_INITIALIZER { \
LANG_HOOKS_NAME, \
LANG_HOOKS_IDENTIFIER_SIZE, \
LANG_HOOKS_FREE_LANG_DATA, \
LANG_HOOKS_TREE_SIZE, \
bingding
在cxx_binding结构中,有value和type两个字段,注释也比较贴心,就是这个标识符的类型和值信息。
由于同一个identifier在不同的scope中可能有不同的意义,所以结构中还包含了previous字段将所有scope中同名的标识符串在一起放在链表中。
struct cxx_binding {
/* Link to chain together various bindings for this name. */
cxx_binding *previous;
/* The non-type entity this name is bound to. */
tree value;
/* The type entity this name is bound to. */
tree type;
/* The scope at which this binding was made. */
cp_binding_level *scope;
unsigned value_is_inherited : 1;
unsigned is_local : 1;
};
创建
当语法分析到一个变量/类型/函数等声明的时候,会为这个标识符创建一个对应的binding。在push_binding函数中会记录这个标识符的声明(decl),并把它放在链表的最开始(And put it on the front of the list of bindings for ID)。
/* Make DECL the innermost binding for ID. The LEVEL is the binding
level at which this declaration is being bound. */
void
push_binding (tree id, tree decl, cp_binding_level* level)
{
cxx_binding *binding;
if (level != class_binding_level)
{
binding = cxx_binding_make (decl, NULL_TREE);
binding->scope = level;
}
else
binding = new_class_binding (id, decl, /*type=*/NULL_TREE, level);
/* Now, fill in the binding information. */
binding->previous = IDENTIFIER_BINDING (id);
INHERITED_VALUE_BINDING_P (binding) = 0;
LOCAL_BINDING_P (binding) = (level != class_binding_level);
/* And put it on the front of the list of bindings for ID. */
IDENTIFIER_BINDING (id) = binding;
}
查找
在语法解析的时候,如果遇到一个新的标识符,首先会根据这个标识符的字符串找到对应的结构,然后从当前scope逐层向外搜索这个标识符的信息。 实现上看主要就是从查找到的identifier的binding信息逐层向外找到最近的标识符声明。
/* Look up NAME in the current binding level and its superiors in the
namespace of variables, functions and typedefs. Return a ..._DECL
node of some kind representing its definition if there is only one
such declaration, or return a TREE_LIST with all the overloaded
definitions if there are many, or return 0 if it is undefined.
Hidden name, either friend declaration or built-in function, are
not ignored.
If PREFER_TYPE is > 0, we prefer TYPE_DECLs or namespaces.
If PREFER_TYPE is > 1, we reject non-type decls (e.g. namespaces).
Otherwise we prefer non-TYPE_DECLs.
If NONCLASS is nonzero, bindings in class scopes are ignored. If
BLOCK_P is false, bindings in block scopes are ignored. */
static tree
lookup_name_real_1 (tree name, int prefer_type, int nonclass, bool block_p,
int namespaces_only, int flags)
{
///...
if (block_p || !nonclass)
for (iter = outer_binding (name, NULL, !nonclass);
iter;
iter = outer_binding (name, iter, !nonclass))
{
tree binding;
/* Skip entities we don't want. */
if (LOCAL_BINDING_P (iter) ? !block_p : nonclass)
continue;
/* If this is the kind of thing we're looking for, we're done. */
if (qualify_lookup (iter->value, flags))
binding = iter->value;
else if ((flags & LOOKUP_PREFER_TYPES)
&& qualify_lookup (iter->type, flags))
binding = iter->type;
else
binding = NULL_TREE;
if (binding)
{
if (hidden_name_p (binding))
{
/* A non namespace-scope binding can only be hidden in the
presence of a local class, due to friend declarations.
In particular, consider:
struct C;
void f() {
struct A {
friend struct B;
friend struct C;
void g() {
B* b; // error: B is hidden
C* c; // OK, finds ::C
}
};
B *b; // error: B is hidden
C *c; // OK, finds ::C
struct B {};
B *bb; // OK
}
The standard says that "B" is a local class in "f"
(but not nested within "A") -- but that name lookup
for "B" does not find this declaration until it is
declared directly with "f".
In particular:
[class.friend]
If a friend declaration appears in a local class and
the name specified is an unqualified name, a prior
declaration is looked up without considering scopes
that are outside the innermost enclosing non-class
scope. For a friend function declaration, if there is
no prior declaration, the program is ill-formed. For a
friend class declaration, if there is no prior
declaration, the class that is specified belongs to the
innermost enclosing non-class scope, but if it is
subsequently referenced, its name is not found by name
lookup until a matching declaration is provided in the
innermost enclosing nonclass scope.
So just keep looking for a non-hidden binding.
*/
gcc_assert (TREE_CODE (binding) == TYPE_DECL);
continue;
}
val = binding;
break;
}
}
///...
}
/* Return the innermost non-namespace binding for NAME from a scope
containing BINDING, or, if BINDING is NULL, the current scope.
Please note that for a given template, the template parameters are
considered to be in the scope containing the current scope.
If CLASS_P is false, then class bindings are ignored. */
cxx_binding *
outer_binding (tree name,
cxx_binding *binding,
bool class_p)
{
cxx_binding *outer;
cp_binding_level *scope;
cp_binding_level *outer_scope;
if (binding)
{
scope = binding->scope->level_chain;
outer = binding->previous;
}
else
{
scope = current_binding_level;
outer = IDENTIFIER_BINDING (name);
}
///...
}
生效
以C++最为关心的struct/class为例,在解析完成类似于struct Tsecer;这样合法的结构之后,会立即调用xref_tag来创建一个对于这个类型的前置声明。这种实现感觉也是crtp的一个实现基础:在派生类的名字解析完成之后,派生类的tag已经可用,此时可以在基类中把该类型作为模板参数使用。
/* Get the struct, enum or union (TAG_CODE says which) with tag NAME.
Define the tag as a forward-reference if it is not defined.
If a declaration is given, process it here, and report an error if
multiple declarations are not identical.
SCOPE is TS_CURRENT when this is also a definition. Only look in
the current frame for the name (since C++ allows new names in any
scope.) It is TS_WITHIN_ENCLOSING_NON_CLASS if this is a friend
declaration. Only look beginning from the current scope outward up
till the nearest non-class scope. Otherwise it is TS_GLOBAL.
TEMPLATE_HEADER_P is true when this declaration is preceded by
a set of template parameters. */
static tree
xref_tag_1 (enum tag_types tag_code, tree name,
tag_scope orig_scope, bool template_header_p)
{
///...
t = make_class_type (code);
TYPE_CONTEXT (t) = context;
if (orig_scope == ts_lambda)
/* Remember that we're declaring a lambda to avoid bogus errors
in push_template_decl. */
CLASSTYPE_LAMBDA_EXPR (t) = error_mark_node;
t = pushtag (name, t, scope);
///...
}
构造函数
在cp_finish_decl==>>check_initializer函数中,如果一个变量声明时有Init初始化内容,则会判断是否需要调用构造函数。
/* Verify INIT (the initializer for DECL), and record the
initialization in DECL_INITIAL, if appropriate. CLEANUP is as for
grok_reference_init.
If the return value is non-NULL, it is an expression that must be
evaluated dynamically to initialize DECL. */
static tree
check_initializer (tree decl, tree init, int flags, vec<tree, va_gc> **cleanups)
{
///...
if (((type_build_ctor_call (type) || CLASS_TYPE_P (type))
&& !(flags & LOOKUP_ALREADY_DIGESTED)
&& !(init && BRACE_ENCLOSED_INITIALIZER_P (init)
&& CP_AGGREGATE_TYPE_P (type)
&& (CLASS_TYPE_P (type)
|| !TYPE_NEEDS_CONSTRUCTING (type)
|| type_has_extended_temps (type))))
|| (DECL_DECOMPOSITION_P (decl) && TREE_CODE (type) == ARRAY_TYPE))
{
init_code = build_aggr_init_full_exprs (decl, init, flags);
/* A constructor call is a non-trivial initializer even if
it isn't explicitly written. */
if (TREE_SIDE_EFFECTS (init_code))
DECL_NONTRIVIALLY_INITIALIZED_P (decl) = true;
///...
}
check_initializer>>build_aggr_init_full_exprs >>build_aggr_init>>expand_aggr_init_1>>expand_default_init==>>build_special_member_call最终会触发对于构造函数的调用。
static void
expand_default_init (tree binfo, tree true_exp, tree exp, tree init, int flags,
tsubst_flags_t complain)
{
///...
if (true_exp == exp)
ctor_name = complete_ctor_identifier;
else
ctor_name = base_ctor_identifier;
rval = build_special_member_call (exp, ctor_name, &parms, binfo, flags,
complain);
///...
}
结论
- 构造和赋值
构造函数会在变量定义的时候判断是否是需要执行构造函数。同样一个对象,在声明时和之后赋值虽然都是用赋值,但是在此处有不同的语义。
在下面代码中,第19行的内容除了和18行开始的声明不一样之外,赋值符号左右的内容都是相同的,但是因为一个是声明上下文,一个是赋值上下文,编译器执行的动作也大相径庭:一个是拷贝构造,一个赋值。
tsecer@harry: cat -n context.cpp
1 #include <stdio.h>
2 struct Tsecer
3 {
4 Tsecer() = default;
5
6 Tsecer(const Tsecer &t)
7 {
8 printf("copy constructor\n");
9 }
10 Tsecer& operator = (const Tsecer &t)
11 {
12 printf("assign operator\n");
13 }
14 };
15
16 int main(int argc, const char *argv[])
17 {
18 Tsecer t1 = Tsecer();
19 t1 = Tsecer();
20 return 0;
21 }
22
tsecer@harry: g++ -fno-elide-constructors context.cpp
tsecer@harry: ./a.out
copy constructor
assign operator
tsecer@harry:
- 标识符
gcc对于字符串形式的identifier在不同的scope中具体代表什么意义的实现比较巧妙,但也还算比较直观,只是稍微有一点绕,如果不了解这个实现机制,有些地方代码阅读的时候可能会觉得不容易理解。