lua.5.2.3源码阅读(02):TString字符串对象

lua中的字符串是对象,主要分析一下对象的结构和申请的方式。

TString是一个union,为了进行字节对齐,中间插入了L_Umaxalign,按照union的定义

union的大小,必须是单个结构大小的整数倍,按照目前的定义,应该是double大小的整数倍。

 1 /* type to ensure maximum alignment */
 2 #if !defined(LUAI_USER_ALIGNMENT_T)
 3 #define LUAI_USER_ALIGNMENT_T    union { double u; void *s; long l; }
 4 #endif
 5 
 6 typedef LUAI_USER_ALIGNMENT_T L_Umaxalign;
 7 
 8 /*
 9 ** Header for string value; string bytes follow the end of this structure
10 */
11 typedef union TString {
12   L_Umaxalign dummy;  /* ensures maximum alignment for strings */
13   struct {
14     CommonHeader;
15     lu_byte extra;  /* reserved words for short strings; "has hash" for longs */
16     unsigned int hash;
17     size_t len;  /* number of characters in string */
18   } tsv;
19 } TString;
20 
21 /*
22 ** Common Header for all collectable objects (in macro form, to be
23 ** included in other objects)
24 */
25 #define CommonHeader    GCObject *next; lu_byte tt; lu_byte marked

展开上述定义,实际TString的关键定义:

1   struct {
2     GCObject *next; 
3     lu_byte tt; 
4     lu_byte marked;
5     lu_byte extra;  /* reserved words for short strings; "has hash" for longs */
6     unsigned int hash;
7     size_t len;  /* number of characters in string */
8   } tsv;

tt用于表示GC对象的类型,marked用于GC收集的时候的类型,后续再分析,extra标记是否为

lua的保留字符串,hash为短字符串的哈希值,len为字符串的长度。

 

TString对象在lstring.h中定义的函数:

1 LUAI_FUNC int luaS_eqlngstr (TString *a, TString *b);
2 LUAI_FUNC int luaS_eqstr (TString *a, TString *b);
3 LUAI_FUNC unsigned int luaS_hash (const char *str, size_t l, unsigned int seed);
4 LUAI_FUNC void luaS_resize (lua_State *L, int newsize);
5 LUAI_FUNC TString *luaS_newlstr (lua_State *L, const char *str, size_t l);
6 LUAI_FUNC TString *luaS_new (lua_State *L, const char *str);

主要看后面两个函数:luaS_newlstr和luaS_new,实际两个函数只是参数不一样,是否包含'\0'结束符。

创建字符串的过程中,根据字符串的长度,进行不同的处理,长度小于LUAI_MAXSHORTLEN的字符串,

进行hash,重用放置在在一个hash表格中,对于长度大于LUAI_MAXSHORTLEN的则一定创建一个

TString对象,另外一个区别是,GC所挂在的位置不同:一个在global_State的字符串hash表中,另外

一个在global_State的所有GC列表中allgc。

 1 /*
 2 @@ LUAI_MAXSHORTLEN is the maximum length for short strings, that is,
 3 ** strings that are internalized. (Cannot be smaller than reserved words
 4 ** or tags for metamethods, as these strings must be internalized;
 5 ** #("function") = 8, #("__newindex") = 10.)
 6 */
 7 #define LUAI_MAXSHORTLEN        40
 8 
 9 /*
10 ** new string (with explicit length)
11 */
12 TString *luaS_newlstr (lua_State *L, const char *str, size_t l) {
13   if (l <= LUAI_MAXSHORTLEN)  /* short string? */
14     return internshrstr(L, str, l);
15   else {
16     if (l + 1 > (MAX_SIZET - sizeof(TString))/sizeof(char))
17       luaM_toobig(L);
18     return createstrobj(L, str, l, LUA_TLNGSTR, G(L)->seed, NULL);
19   }
20 }
21 
22 /*
23 ** new zero-terminated string
24 */
25 TString *luaS_new (lua_State *L, const char *str) {
26   return luaS_newlstr(L, str, strlen(str));
27 }

 

先看短字符串的流程,代码中直接说明了流程:
1、首先查找是否hash表中是否存在链表;
2、如果存在链表,并遍历链表,看看是否存在字符串相同的对象;
3、如果查找失败,创建新的短字符串对象;
4、如果查找成功,修改对象GC的对象状态,直接返回;
 1 /*
 2 ** checks whether short string exists and reuses it or creates a new one
 3 */
 4 static TString *internshrstr (lua_State *L, const char *str, size_t l) {
 5   GCObject *o;
 6   global_State *g = G(L);
 7   unsigned int h = luaS_hash(str, l, g->seed);
 8   for (o = g->strt.hash[lmod(h, g->strt.size)];
 9        o != NULL;
10        o = gch(o)->next) {
11     TString *ts = rawgco2ts(o);
12     if (h == ts->tsv.hash &&
13         l == ts->tsv.len &&
14         (memcmp(str, getstr(ts), l * sizeof(char)) == 0)) {
15       if (isdead(G(L), o))  /* string is dead (but was not collected yet)? */
16         changewhite(o);  /* resurrect it */
17       return ts;
18     }
19   }
20   return newshrstr(L, str, l, h);  /* not found; create a new string */
21 }

 

创建过程中,首先看看字符串hash表中的空间是否足够,并获取hash所在的链表表头:

 1 /*
 2 ** creates a new short string, inserting it into string table
 3 */
 4 static TString *newshrstr (lua_State *L, const char *str, size_t l,
 5                                        unsigned int h) {
 6   GCObject **list;  /* (pointer to) list where it will be inserted */
 7   stringtable *tb = &G(L)->strt;
 8   TString *s;
 9   if (tb->nuse >= cast(lu_int32, tb->size) && tb->size <= MAX_INT/2)
10     luaS_resize(L, tb->size*2);  /* too crowded */
11   list = &tb->hash[lmod(h, tb->size)];
12   s = createstrobj(L, str, l, LUA_TSHRSTR, h, list);
13   tb->nuse++;
14   return s;
15 }
 
所有对象在lua中都是一块内存,根据对象的大小申请对应的内存块,申请的内存初始化GCObject结构,
挂载到对应的位置:hash表或者全局gc链表:
 1 /*
 2 ** creates a new string object
 3 */
 4 static TString *createstrobj (lua_State *L, const char *str, size_t l,
 5                               int tag, unsigned int h, GCObject **list) {
 6   TString *ts;
 7   size_t totalsize;  /* total size of TString object */
 8   totalsize = sizeof(TString) + ((l + 1) * sizeof(char));
 9   ts = &luaC_newobj(L, tag, totalsize, list, 0)->ts;
10   ts->tsv.len = l;
11   ts->tsv.hash = h;
12   ts->tsv.extra = 0;
13   memcpy(ts+1, str, l*sizeof(char));
14   ((char *)(ts+1))[l] = '\0';  /* ending 0 */
15   return ts;
16 }
17 
18 /*
19 ** create a new collectable object (with given type and size) and link
20 ** it to '*list'. 'offset' tells how many bytes to allocate before the
21 ** object itself (used only by states).
22 */
23 GCObject *luaC_newobj (lua_State *L, int tt, size_t sz, GCObject **list,  int offset) {
24   global_State *g = G(L);
25   char *raw = cast(char *, luaM_newobject(L, novariant(tt), sz));
26   GCObject *o = obj2gco(raw + offset);
27   if (list == NULL)
28     list = &g->allgc;  /* standard list for collectable objects */
29   gch(o)->marked = luaC_white(g);
30   gch(o)->tt = tt;
31   gch(o)->next = *list;
32   *list = o;
33   return o;
34 }

 

让我们看看实际上Lua默认的内存申请函数,默认申请内存的方法采用realloc方法,在lua_State创建过程中初始化:

 1 static void *l_alloc (void *ud, void *ptr, size_t osize, size_t nsize) {
 2   (void)ud; (void)osize;  /* not used */
 3   if (nsize == 0) {
 4     free(ptr);
 5     return NULL;
 6   }
 7   else
 8     return realloc(ptr, nsize);
 9 }
10 
11 LUALIB_API lua_State *luaL_newstate (void) {
12   lua_State *L = lua_newstate(l_alloc, NULL);
13   if (L) lua_atpanic(L, &panic);
14   return L;
15 }

 

那么默认的申请过程:

1、申请成功直接返回;

2、如果内存申请失败,先进行一次GC处理,删除多余的不用的内存结构,重新申请并返回;

3、统计可以使用的内存信息:

 1 #define luaM_newobject(L,tag,s)    luaM_realloc_(L, NULL, tag, (s))
 2 
 3 /*
 4 ** generic allocation routine.
 5 */
 6 void *luaM_realloc_ (lua_State *L, void *block, size_t osize, size_t nsize) {
 7   void *newblock;
 8   global_State *g = G(L);
 9   size_t realosize = (block) ? osize : 0;
10   lua_assert((realosize == 0) == (block == NULL));
11 #if defined(HARDMEMTESTS)
12   if (nsize > realosize && g->gcrunning)
13     luaC_fullgc(L, 1);  /* force a GC whenever possible */
14 #endif
15   newblock = (*g->frealloc)(g->ud, block, osize, nsize);
16   if (newblock == NULL && nsize > 0) {
17     api_check(L, nsize > realosize,
18                  "realloc cannot fail when shrinking a block");
19     if (g->gcrunning) {
20       luaC_fullgc(L, 1);  /* try to free some memory... */
21       newblock = (*g->frealloc)(g->ud, block, osize, nsize);  /* try again */
22     }
23     if (newblock == NULL)
24       luaD_throw(L, LUA_ERRMEM);
25   }
26   lua_assert((nsize == 0) == (newblock == NULL));
27   g->GCdebt = (g->GCdebt + nsize) - realosize;
28   return newblock;
29 }

 

posted @ 2014-12-28 19:14  #shany  阅读(952)  评论(0编辑  收藏  举报