yyqng

  博客园  :: 首页  :: 新随笔  :: 联系 :: 订阅 订阅  :: 管理

3.1 概述

    字符串在Lua中是不可变的数据。每当使用不存在的字符串时,就会创建一份新的数据,创建之后是不可更改的。

3.2 字符串实现

//luaconf.h:595
// 用于8字节对齐
#define LUAI_USER_ALIGNMENT_T    union { double u; void *s; long l; }

//limits.h:47
// 用于8字节对齐typedef LUAI_USER_ALIGNMENT_T L_Umaxalign;

//lobject.h:199
typedef union TString {
  L_Umaxalign dummy;  /* ensures maximum alignment for strings */
  struct {
    CommonHeader;
    lu_byte reserved;  //1:系统保留,不会在GC阶段回收
    unsigned int hash;
    size_t len;
  } tsv;
} TString;


//lstate.h:68
typedef struct global_State {
  stringtable strt;  /* hash table for strings */
  //......
} global_State;

//lstate.h:38
typedef struct stringtable {
  GCObject **hash; //哈希桶,每个槽又是一个GCObject *,数据TString使用链式存储
  lu_int32 nuse;   /* number of elements */
  int size;
} stringtable;

//为了避免数据(TString)量太大导致查找退化成线性操作,需要重新散列:
//lstring.c:22
void luaS_resize (lua_State *L, int newsize) {
  GCObject **newhash;
  stringtable *tb;
  int i;
  if (G(L)->gcstate == GCSsweepstring)
    return;  /* cannot resize during GC traverse */
  newhash = luaM_newvector(L, newsize, GCObject *);
  tb = &G(L)->strt;
  for (i=0; i<newsize; i++) newhash[i] = NULL;
  /* rehash 重新散列*/
  for (i=0; i<tb->size; i++) {
    GCObject *p = tb->hash[i];
    while (p) {  /* for each node in the list */
      GCObject *next = p->gch.next;  /* save next */
      unsigned int h = gco2ts(p)->hash;
      int h1 = lmod(h, newsize);  /* new position */
      lua_assert(cast_int(h%newsize) == lmod(h, newsize));
      p->gch.next = newhash[h1];  /* chain it */
      newhash[h1] = p;
      p = next;
    }
  }
  luaM_freearray(L, tb->hash, tb->size, TString *); //释放旧的散列桶
  tb->size = newsize;
  tb->hash = newhash;
}

     有两处关于luaS_resize函数的调用:

//lgc.c:431
//这里会进行检查,如果此时桶的数量太大(利用率不到1/4 且大于 MINSTRTABSIZE * 2),
//则会将散列桶数组减少为原来的一半
static void checkSizes (lua_State *L) {
  global_State *g = G(L);
  /* check size of string hash */
  if (g->strt.nuse < cast(lu_int32, g->strt.size/4) &&
      g->strt.size > MINSTRTABSIZE*2)
    luaS_resize(L, g->strt.size/2);  /* table is too big */
  /* check size of buffer */
  if (luaZ_sizebuffer(&g->buff) > LUA_MINBUFFER*2) {  /* buffer too big? */
    size_t newsize = luaZ_sizebuffer(&g->buff) / 2;
    luaZ_resizebuffer(L, &g->buff, newsize);
  }
}

//lstring.c:75
//分配一个新的字符串
TString *luaS_newlstr (lua_State *L, const char *str, size_t l) {
  GCObject *o;
  unsigned int h = cast(unsigned int, l);  /* seed */
  size_t step = (l>>5)+1;  /* if string is too long, don't hash all its chars */
  size_t l1;
  for (l1=l; l1>=step; l1-=step)  /* compute hash */
    h = h ^ ((h<<5)+(h>>2)+cast(unsigned char, str[l1-1]));
  for (o = G(L)->strt.hash[lmod(h, G(L)->strt.size)];
       o != NULL;
       o = o->gch.next) {
    TString *ts = rawgco2ts(o);
    if (ts->tsv.len == l && (memcmp(str, getstr(ts), l) == 0)) {
      /* string may be dead: 需要更改为不需要在GC阶段回收*/
      if (isdead(G(L), o)) changewhite(o);
      return ts; //该字符串已经存在,直接返回结果
    }
  }
  return newlstr(L, str, l, h);  /* not found: 分配新的字符串*/
}

//lstring.c:50
//分配一个新的字符串
static TString *newlstr (lua_State *L, const char *str, size_t l,
                                       unsigned int h) {
  TString *ts;
  stringtable *tb;
  if (l+1 > (MAX_SIZET - sizeof(TString))/sizeof(char))
    luaM_toobig(L);
  ts = cast(TString *, luaM_malloc(L, (l+1)*sizeof(char)+sizeof(TString)));
  ts->tsv.len = l;
  ts->tsv.hash = h;
  ts->tsv.marked = luaC_white(G(L));
  ts->tsv.tt = LUA_TSTRING;
  ts->tsv.reserved = 0;
  memcpy(ts+1, str, l*sizeof(char));
  ((char *)(ts+1))[l] = '\0';  /* ending 0 */
  tb = &G(L)->strt;
  h = lmod(h, tb->size);
  ts->tsv.next = tb->hash[h];  /* chain new entry */
  tb->hash[h] = obj2gco(ts);
  tb->nuse++;
  if (tb->nuse > cast(lu_int32, tb->size) && tb->size <= MAX_INT/2)
    luaS_resize(L, tb->size*2);  /* too crowded */
  return ts;
}

 

    TString结构体中的字段reserved用于标示是不是保留字

//llex.c:37
/* ORDER RESERVED */
const char *const luaX_tokens [] = {
    "and", "break", "do", "else", "elseif",
    "end", "false", "for", "function", "if",
    "in", "local", "nil", "not", "or", "repeat",
    "return", "then", "true", "until", "while",
    "..", "...", "==", ">=", "<=", "~=",
    "<number>", "<name>", "<string>", "<eof>",
    NULL
};

//llex.h:24
enum RESERVED {
  /* terminal symbols denoted by reserved words */
  TK_AND = FIRST_RESERVED, TK_BREAK,
  TK_DO, TK_ELSE, TK_ELSEIF, TK_END, TK_FALSE, TK_FOR, TK_FUNCTION,
  TK_IF, TK_IN, TK_LOCAL, TK_NIL, TK_NOT, TK_OR, TK_REPEAT,
  TK_RETURN, TK_THEN, TK_TRUE, TK_UNTIL, TK_WHILE,
  /* other terminal symbols */
  TK_CONCAT, TK_DOTS, TK_EQ, TK_GE, TK_LE, TK_NE, TK_NUMBER,
  TK_NAME, TK_STRING, TK_EOS
};

//lstring.h:20
#define luaS_new(L, s)    (luaS_newlstr(L, s, strlen(s)))

//lgc.h:60
#define FIXEDBIT  5

//lstring.h:24
#define luaS_fix(s) l_setbit((s)->tsv.marked, FIXEDBIT)

//llex.h:36
/* number of reserved words */
#define NUM_RESERVED    (cast(int, TK_WHILE-FIRST_RESERVED+1))

//llex.c:64
//最开始新建保留字,并对reserved赋值
void luaX_init (lua_State *L) {
  int i;
  for (i=0; i<NUM_RESERVED; i++) {
    TString *ts = luaS_new(L, luaX_tokens[i]);
    luaS_fix(ts);  /* reserved words are never collected */
    lua_assert(strlen(luaX_tokens[i])+1 <= TOKEN_LEN);
    ts->tsv.reserved = cast_byte(i+1);  /* reserved word */
  }
}

 

posted on 2021-01-19 07:10  zziii  阅读(101)  评论(0编辑  收藏  举报