Lua GC 之 Finalizer

lua 的 __gc元方法,被称为 finalizer 。这里根据其特点结合源码进行分析:

1. 设置 __gc 元方法

要想对象的 __gc 元方法生效,必须在设置对象的元方法的时候元表已经存在 __gc 方法,(即调用 setmetatable 时,元表已经存在__gc方法)。以后即使对元表增加了 __gc 元方法,也不会生效。见下面的代码

1 mt = {}
2 obj = setmetatable({}, mt)
3 mt.__gc = function print "do gc" end
4 -- obj对象在内存释放前并不会执行__gc元方法。虽然查看obj的元表可以看到__gc元方法
5 
6 setmetatable(obj, mt)
7 -- obj对象的__gc元方法,产生作用。会再内存释放前被调用

  原因在于仅仅只有的 lua_setmetatable 才会调用 luaC_checkfinalizer 函数。luaC_checkfinalizer 函数检查表中是否有 __gc 方法,如果有,则将对象从 allgc 链中移到了 finobj 链中(lua 5.2 中 Table 和 Userdata 类型均支持__gc元方法,但 lua5.1 仅 Userdata 类型支持)。其实现如下:

LUA_API int lua_setmetatable (lua_State *L, int objindex) {
  TValue *obj;
  Table *mt;
  lua_lock(L);
  api_checknelems(L, 1);
  obj = index2addr(L, objindex);
  api_checkvalidindex(L, obj);
  if (ttisnil(L->top - 1))
    mt = NULL;
  else {
    api_check(L, ttistable(L->top - 1), "table expected");
    mt = hvalue(L->top - 1);
  }
  switch (ttypenv(obj)) {
    case LUA_TTABLE: {
      hvalue(obj)->metatable = mt;
      if (mt)
        luaC_objbarrierback(L, gcvalue(obj), mt);
        luaC_checkfinalizer(L, gcvalue(obj), mt);
      break;
    }
    case LUA_TUSERDATA: {
      uvalue(obj)->metatable = mt;
      if (mt) {
        luaC_objbarrier(L, rawuvalue(obj), mt);
        luaC_checkfinalizer(L, gcvalue(obj), mt);
      }
      break;
    }
    default: {
      G(L)->mt[ttypenv(obj)] = mt;
      break;
    }
  }
  L->top--;
  lua_unlock(L);
  return 1;
}

/*
** if object 'o' has a finalizer, remove it from 'allgc' list (must
** search the list to find it) and link it in 'finobj' list.
*/
void luaC_checkfinalizer (lua_State *L, GCObject *o, Table *mt) {
  global_State *g = G(L);
  if (testbit(gch(o)->marked, SEPARATED) || /* obj. is already separated... */
      isfinalized(o) ||                           /* ... or is finalized... */
      gfasttm(g, mt, TM_GC) == NULL)                /* or has no finalizer? */
    return;  /* nothing to be done */
  else {  /* move 'o' to 'finobj' list */
    GCObject **p;
    GCheader *ho = gch(o);
    if (g->sweepgc == &ho->next) {  /* avoid removing current sweep object */
      lua_assert(issweepphase(g));
      g->sweepgc = sweeptolive(L, g->sweepgc, NULL);
    }
    /* search for pointer pointing to 'o' */
    for (p = &g->allgc; *p != o; p = &gch(*p)->next) { /* empty */ }
    *p = ho->next;  /* remove 'o' from root list */
    ho->next = g->finobj;  /* link it in list 'finobj' */
    g->finobj = o;
    l_setbit(ho->marked, SEPARATED);  /* mark it as such */
    if (!keepinvariant(g))  /* not keeping invariant? */
      makewhite(g, o);  /* "sweep" object */
    else
      resetoldbit(o);  /* see MOVE OLD rule */
  }
}

 也就是说你单独对 metatable 的修改并不会触发 luaC_checkfinalizer 函数的对象分类操作。

为什么会这样?原因很简单,性能。表的操作太频繁了,所以不能在对表的赋值操作中插入 luaC_checkfinalizer 函数。

2. __gc 元方法执行

__gc 元方法执行的时候,弱引用特点:

     弱键强值表可访问(以对象自身为键)

     弱值表不可访问(以对象自身为值)

见下面代码:

 1 mt = {__gc = function(self)
 2                 print("self as key:", tk[self])
 3                 print("self as value:", tv[self.i])
 4                 print("self as key and value:", tkv[self])
 5             end
 6         }
 7 
 8 tk = setmetatable({}, {__mode='k'})
 9 tv = setmetatable({}, {__mode='v'})
10 tkv = setmetatable({}, {__mode='kv'})
11 
12 obj = setmetatable({}, mt)
13 obj.i = 1
14 
15 
16 tk[obj] = 'obj value'
17 tv[obj.i] = obj
18 tkv[obj] = obj
19 
20 collectgarbage()
21 
22 print '----'
23 
24 obj = nil
25 
26 
27 collectgarbage()
28 print '----'

 输出如下:

----
self as key:    obj value
self as value:  nil
self as key and value:  nil
----

 从上面的输出可以看到 __gc 元方法调用时,仅仅 tk 表中以自身为键的的值还可以访问。为什么仅仅是键可访问,而不是值?主要是基于这点:通常我们用表以该对象为弱键,来保存该对象的备注信息。可参考 PIL 中的 17.2 关联对象属性 和 lua 邮件组中的讨论

 

筛选可释放对象,实现细节(有删减):

static l_mem atomic (lua_State *L) {
  global_State *g = G(L);

// 弱表(弱值、弱键弱值)中清除不可达的值 /* at this point, all strongly accessible objects are marked. */ /* clear values from weak tables, before checking finalizers */ clearvalues(g, g->weak, NULL); clearvalues(g, g->allweak, NULL);
// 将不可达对象从 finobj 链移入 tobefnz 链 separatetobefnz(L, 0); /* separate objects to be finalized */

// 将 tobefnz 链中所有对象及其引用的对象,都标记为可达对象 markbeingfnz(g); /* mark objects that will be finalized */ propagateall(g); /* remark, to propagate `preserveness' */
// 弱表(弱键、弱键弱值)中清理不可达的 键 /* at this point, all resurrected objects are marked. */ /* remove dead objects from weak tables */ clearkeys(g, g->ephemeron, NULL); /* clear keys from all ephemeron tables */ clearkeys(g, g->allweak, NULL); /* clear keys from all allweak tables */ return work; /* estimate of memory marked by 'atomic' */ }

 可以看到, 先清理弱表中的键,再复活待执行对象,这时才开始清理弱表中的值。

g->allweak、g->weak、g->ephemeron 这三个链表存放的是弱表,下篇文章会详细介绍弱表

执行 __gc 元方法,实现细节:

/*
** performs a basic GC step
*/
void luaC_forcestep (lua_State *L) {
  global_State *g = G(L);
  int i;
  if (isgenerational(g)) generationalcollection(L);
  else incstep(L);
// 一次最多执行 GCFINALIZENUM 个对象的 __gc 元方法
  /* run a few finalizers (or all of them at the end of a collect cycle) */ 
  for (i = 0; g->tobefnz && (i < GCFINALIZENUM || g->gcstate == GCSpause); i++)
    GCTM(L, 1);  /* call one finalizer */
}

static void GCTM (lua_State *L, int propagateerrors) {
  global_State *g = G(L);
  const TValue *tm;
  TValue v;
  setgcovalue(L, &v, udata2finalize(g)); // udata2finalize(g) 将对象从 tobefnz 链中移入 allgc 链
  tm = luaT_gettmbyobj(L, &v, TM_GC);
  if (tm != NULL && ttisfunction(tm)) {  /* is there a finalizer? */
    int status;
    lu_byte oldah = L->allowhook;
    int running  = g->gcrunning;
    L->allowhook = 0;  /* stop debug hooks during GC metamethod */
    g->gcrunning = 0;  /* avoid GC steps */
    setobj2s(L, L->top, tm);  /* push finalizer... */
    setobj2s(L, L->top + 1, &v);  /* ... and its argument */
    L->top += 2;  /* and (next line) call the finalizer */
    status = luaD_pcall(L, dothecall, NULL, savestack(L, L->top - 2), 0); // 执行 __gc 元方法
    L->allowhook = oldah;  /* restore hooks */
    g->gcrunning = running;  /* restore state */
    if (status != LUA_OK && propagateerrors) {  /* error while running __gc? */
      if (status == LUA_ERRRUN) {  /* is there an error object? */
        const char *msg = (ttisstring(L->top - 1))
                            ? svalue(L->top - 1)
                            : "no message";
        luaO_pushfstring(L, "error in __gc metamethod (%s)", msg);
        status = LUA_ERRGCMM;  /* error in __gc metamethod */
      }
      luaD_throw(L, status);  /* re-throw error */
    }
  }
}

 

 GCTM 函数会从 tobefnz 链中移出一个对象,并放入 allobj 链,然后执行该对象的 __gc 元方法。

 

 

最后总结下对象的流向:

             lua_setmetable                               separatetobefnz                               GCTM

   allgc --------------------------> finobj ------------------------> tobefnz ---------------------> allgc

 

 

posted on 2012-12-27 18:05  JesseFang  阅读(3208)  评论(0编辑  收藏  举报