lua.5.2.3源码阅读(02):TString字符串对象
lua中的字符串是对象,主要分析一下对象的结构和申请的方式。
TString是一个union,为了进行字节对齐,中间插入了L_Umaxalign,按照union的定义
union的大小,必须是单个结构大小的整数倍,按照目前的定义,应该是double大小的整数倍。
1 /* type to ensure maximum alignment */ 2 #if !defined(LUAI_USER_ALIGNMENT_T) 3 #define LUAI_USER_ALIGNMENT_T union { double u; void *s; long l; } 4 #endif 5 6 typedef LUAI_USER_ALIGNMENT_T L_Umaxalign; 7 8 /* 9 ** Header for string value; string bytes follow the end of this structure 10 */ 11 typedef union TString { 12 L_Umaxalign dummy; /* ensures maximum alignment for strings */ 13 struct { 14 CommonHeader; 15 lu_byte extra; /* reserved words for short strings; "has hash" for longs */ 16 unsigned int hash; 17 size_t len; /* number of characters in string */ 18 } tsv; 19 } TString; 20 21 /* 22 ** Common Header for all collectable objects (in macro form, to be 23 ** included in other objects) 24 */ 25 #define CommonHeader GCObject *next; lu_byte tt; lu_byte marked
展开上述定义,实际TString的关键定义:
1 struct { 2 GCObject *next; 3 lu_byte tt; 4 lu_byte marked; 5 lu_byte extra; /* reserved words for short strings; "has hash" for longs */ 6 unsigned int hash; 7 size_t len; /* number of characters in string */ 8 } tsv;
tt用于表示GC对象的类型,marked用于GC收集的时候的类型,后续再分析,extra标记是否为
lua的保留字符串,hash为短字符串的哈希值,len为字符串的长度。
TString对象在lstring.h中定义的函数:
1 LUAI_FUNC int luaS_eqlngstr (TString *a, TString *b); 2 LUAI_FUNC int luaS_eqstr (TString *a, TString *b); 3 LUAI_FUNC unsigned int luaS_hash (const char *str, size_t l, unsigned int seed); 4 LUAI_FUNC void luaS_resize (lua_State *L, int newsize); 5 LUAI_FUNC TString *luaS_newlstr (lua_State *L, const char *str, size_t l); 6 LUAI_FUNC TString *luaS_new (lua_State *L, const char *str);
主要看后面两个函数:luaS_newlstr和luaS_new,实际两个函数只是参数不一样,是否包含'\0'结束符。
创建字符串的过程中,根据字符串的长度,进行不同的处理,长度小于LUAI_MAXSHORTLEN的字符串,
进行hash,重用放置在在一个hash表格中,对于长度大于LUAI_MAXSHORTLEN的则一定创建一个
TString对象,另外一个区别是,GC所挂在的位置不同:一个在global_State的字符串hash表中,另外
一个在global_State的所有GC列表中allgc。
1 /* 2 @@ LUAI_MAXSHORTLEN is the maximum length for short strings, that is, 3 ** strings that are internalized. (Cannot be smaller than reserved words 4 ** or tags for metamethods, as these strings must be internalized; 5 ** #("function") = 8, #("__newindex") = 10.) 6 */ 7 #define LUAI_MAXSHORTLEN 40 8 9 /* 10 ** new string (with explicit length) 11 */ 12 TString *luaS_newlstr (lua_State *L, const char *str, size_t l) { 13 if (l <= LUAI_MAXSHORTLEN) /* short string? */ 14 return internshrstr(L, str, l); 15 else { 16 if (l + 1 > (MAX_SIZET - sizeof(TString))/sizeof(char)) 17 luaM_toobig(L); 18 return createstrobj(L, str, l, LUA_TLNGSTR, G(L)->seed, NULL); 19 } 20 } 21 22 /* 23 ** new zero-terminated string 24 */ 25 TString *luaS_new (lua_State *L, const char *str) { 26 return luaS_newlstr(L, str, strlen(str)); 27 }
先看短字符串的流程,代码中直接说明了流程:
1、首先查找是否hash表中是否存在链表;
2、如果存在链表,并遍历链表,看看是否存在字符串相同的对象;
3、如果查找失败,创建新的短字符串对象;
4、如果查找成功,修改对象GC的对象状态,直接返回;
1 /* 2 ** checks whether short string exists and reuses it or creates a new one 3 */ 4 static TString *internshrstr (lua_State *L, const char *str, size_t l) { 5 GCObject *o; 6 global_State *g = G(L); 7 unsigned int h = luaS_hash(str, l, g->seed); 8 for (o = g->strt.hash[lmod(h, g->strt.size)]; 9 o != NULL; 10 o = gch(o)->next) { 11 TString *ts = rawgco2ts(o); 12 if (h == ts->tsv.hash && 13 l == ts->tsv.len && 14 (memcmp(str, getstr(ts), l * sizeof(char)) == 0)) { 15 if (isdead(G(L), o)) /* string is dead (but was not collected yet)? */ 16 changewhite(o); /* resurrect it */ 17 return ts; 18 } 19 } 20 return newshrstr(L, str, l, h); /* not found; create a new string */ 21 }
创建过程中,首先看看字符串hash表中的空间是否足够,并获取hash所在的链表表头:
1 /* 2 ** creates a new short string, inserting it into string table 3 */ 4 static TString *newshrstr (lua_State *L, const char *str, size_t l, 5 unsigned int h) { 6 GCObject **list; /* (pointer to) list where it will be inserted */ 7 stringtable *tb = &G(L)->strt; 8 TString *s; 9 if (tb->nuse >= cast(lu_int32, tb->size) && tb->size <= MAX_INT/2) 10 luaS_resize(L, tb->size*2); /* too crowded */ 11 list = &tb->hash[lmod(h, tb->size)]; 12 s = createstrobj(L, str, l, LUA_TSHRSTR, h, list); 13 tb->nuse++; 14 return s; 15 }
所有对象在lua中都是一块内存,根据对象的大小申请对应的内存块,申请的内存初始化GCObject结构,
挂载到对应的位置:hash表或者全局gc链表:
1 /* 2 ** creates a new string object 3 */ 4 static TString *createstrobj (lua_State *L, const char *str, size_t l, 5 int tag, unsigned int h, GCObject **list) { 6 TString *ts; 7 size_t totalsize; /* total size of TString object */ 8 totalsize = sizeof(TString) + ((l + 1) * sizeof(char)); 9 ts = &luaC_newobj(L, tag, totalsize, list, 0)->ts; 10 ts->tsv.len = l; 11 ts->tsv.hash = h; 12 ts->tsv.extra = 0; 13 memcpy(ts+1, str, l*sizeof(char)); 14 ((char *)(ts+1))[l] = '\0'; /* ending 0 */ 15 return ts; 16 } 17 18 /* 19 ** create a new collectable object (with given type and size) and link 20 ** it to '*list'. 'offset' tells how many bytes to allocate before the 21 ** object itself (used only by states). 22 */ 23 GCObject *luaC_newobj (lua_State *L, int tt, size_t sz, GCObject **list, int offset) { 24 global_State *g = G(L); 25 char *raw = cast(char *, luaM_newobject(L, novariant(tt), sz)); 26 GCObject *o = obj2gco(raw + offset); 27 if (list == NULL) 28 list = &g->allgc; /* standard list for collectable objects */ 29 gch(o)->marked = luaC_white(g); 30 gch(o)->tt = tt; 31 gch(o)->next = *list; 32 *list = o; 33 return o; 34 }
让我们看看实际上Lua默认的内存申请函数,默认申请内存的方法采用realloc方法,在lua_State创建过程中初始化:
1 static void *l_alloc (void *ud, void *ptr, size_t osize, size_t nsize) { 2 (void)ud; (void)osize; /* not used */ 3 if (nsize == 0) { 4 free(ptr); 5 return NULL; 6 } 7 else 8 return realloc(ptr, nsize); 9 } 10 11 LUALIB_API lua_State *luaL_newstate (void) { 12 lua_State *L = lua_newstate(l_alloc, NULL); 13 if (L) lua_atpanic(L, &panic); 14 return L; 15 }
那么默认的申请过程:
1、申请成功直接返回;
2、如果内存申请失败,先进行一次GC处理,删除多余的不用的内存结构,重新申请并返回;
3、统计可以使用的内存信息:
1 #define luaM_newobject(L,tag,s) luaM_realloc_(L, NULL, tag, (s)) 2 3 /* 4 ** generic allocation routine. 5 */ 6 void *luaM_realloc_ (lua_State *L, void *block, size_t osize, size_t nsize) { 7 void *newblock; 8 global_State *g = G(L); 9 size_t realosize = (block) ? osize : 0; 10 lua_assert((realosize == 0) == (block == NULL)); 11 #if defined(HARDMEMTESTS) 12 if (nsize > realosize && g->gcrunning) 13 luaC_fullgc(L, 1); /* force a GC whenever possible */ 14 #endif 15 newblock = (*g->frealloc)(g->ud, block, osize, nsize); 16 if (newblock == NULL && nsize > 0) { 17 api_check(L, nsize > realosize, 18 "realloc cannot fail when shrinking a block"); 19 if (g->gcrunning) { 20 luaC_fullgc(L, 1); /* try to free some memory... */ 21 newblock = (*g->frealloc)(g->ud, block, osize, nsize); /* try again */ 22 } 23 if (newblock == NULL) 24 luaD_throw(L, LUA_ERRMEM); 25 } 26 lua_assert((nsize == 0) == (newblock == NULL)); 27 g->GCdebt = (g->GCdebt + nsize) - realosize; 28 return newblock; 29 }