mysql分析(二)mysql语法分析

一、mysqld中对于SQL语句的分析
客户端和服务器之间交互的时候,客户端发送的同样是字符串形式的查询和执行命令,返回的特定格式的数据库内容(?)。这个过程就需要在服务器端进行实时指令的翻译,生成特定的查询指令。在sqlite的实现中,sqlite定义了一个专用的虚拟机环境,和通常的真正的汇编指令相同,它有自己的指令集,有特定的指令格式和操作数,有自己的寄存器和内存内容,这个虚拟环境叫做vdbe(virtual database environment)。这一点其实并不奇怪,但是也并不是那么简单的,因为这是要设计出一种语言,这种语言要尽量的精简,但是为了方便,也可以增加一些相对复杂的指令。想一下RISC和CISC体系结构的指令的区别就可以看出,其实设计一种语言并不是那么简单的。特别是我们最多使用的C语言,能够使用和设计并实现一种语言是有天大区别的。
二、词法分析
和很多的编译器一样,其词法分析并没有使用lex来生成词法分析,而是自己定义了一个词法分析器,其实现相对简单一些,可能和SQL语法的语言本身比较简单有关。其可以识别的单词定义在lex.h文件的symbols数组中:
typedef struct st_symbol {
  const char *name;
  uint    tok;
  uint length;
  struct st_sym_group *group;
} SYMBOL;
#define SYM(A) SYM_OR_NULL(A),0,&sym_group_common

/*
  Symbols are broken into separated arrays to allow field names with
  same name as functions.
  These are kept sorted for human lookup (the symbols are hashed).

  NOTE! The symbol tables should be the same regardless of what features
  are compiled into the server. Don't add ifdef'ed symbols to the
  lists
*/

static SYMBOL symbols[] = {
  { "&&",        SYM(AND_AND_SYM)},
  { "<",        SYM(LT)},
  { "<=",        SYM(LE)},
  { "<>",        SYM(NE)},
  { "!=",        SYM(NE)},
  { "=",        SYM(EQ)},
  { ">",        SYM(GT_SYM)},
  { ">=",        SYM(GE)},
  { "<<",        SYM(SHIFT_LEFT)},
  { ">>",        SYM(SHIFT_RIGHT)},
  { "<=>",        SYM(EQUAL_SYM)},
  { "ACCESSIBLE",    SYM(ACCESSIBLE_SYM)},
  { "ACTION",        SYM(ACTION)},
  { "ADD",        SYM(ADD)},
  { "AFTER",        SYM(AFTER_SYM)},
  { "AGAINST",          SYM(AGAINST)},
  { "AGGREGATE",    SYM(AGGREGATE_SYM)},
  { "ALL",        SYM(ALL)},
  { "ALGORITHM",    SYM(ALGORITHM_SYM)},
  { "ALTER",        SYM(ALTER)},
  { "ANALYZE",        SYM(ANALYZE_SYM)},
  { "AND",        SYM(AND_SYM)},
  { "ANY",              SYM(ANY_SYM)},
……
  { "XML",              SYM(XML_SYM)}, /* LOAD XML Arnold/Erik */
  { "YEAR",        SYM(YEAR_SYM)},
  { "YEAR_MONTH",    SYM(YEAR_MONTH_SYM)},
  { "ZEROFILL",        SYM(ZEROFILL)},
  { "||",        SYM(OR_OR_SYM)}
};
然后在gen_lex_hash.cc中可能生成了一个动态文件,只是文件到底是干啥的并不是很清楚,因为当时执行make的时候没有重定向make的输出,所以并不清楚这些文件是如何使用的,好在现在也不是很关心这个东西。里面会动态生成一个get_hash_symbol函数以及一些简单的表格,get_hash_symbol也是在真正的词法分析中使用的一个函数。
三、语法分析
语法分析始终是一个比较复杂的东东,所以虽然很多人不愿意,但是还是使用了yacc来进行语法分析,词法分析文件是在sql_yacc.yy文件中实现的,相对该文件复杂一些。
insert:
          INSERT
          {
            LEX *lex= Lex;
            lex->sql_command= SQLCOM_INSERT;
            lex->duplicates= DUP_ERROR; 
            mysql_init_select(lex);
          }
          insert_lock_option
          opt_ignore insert2
          {
            Select->set_lock_for_tables($3);
            Lex->current_select= &Lex->select_lex;
          }
          insert_field_spec opt_insert_update
          {}
        ;
insert2:
          INTO insert_table {}  这说明insert之后的into是可选的,有没有相同的
        | insert_table {}
        ;

insert_table:
          table_name
          {
            LEX *lex=Lex;
            lex->field_list.empty();
            lex->many_values.empty();
            lex->insert_list=0;
          };

insert_field_spec:
          insert_values {}
        | '(' ')' insert_values {}
        | '(' fields ')' insert_values {}
        | SET
          {
            LEX *lex=Lex;
            if (!(lex->insert_list = new List_item) ||
                lex->many_values.push_back(lex->insert_list))
              MYSQL_YYABORT;
          }
          ident_eq_list
        ;

fields:
          fields ',' insert_ident { Lex->field_list.push_back($3); }
        | insert_ident { Lex->field_list.push_back($1); }
        ;

insert_values:
          VALUES values_list {}
        | VALUE_SYM values_list {}
        | create_select
          { Select->set_braces(0);}
          union_clause {}
        | '(' create_select ')'
          { Select->set_braces(1);}
          union_opt {}
        ;
……
opt_insert_update:
          /* empty */
        | ON DUPLICATE_SYM { Lex->duplicates= DUP_UPDATE; }
          KEY_SYM UPDATE_SYM insert_update_list
        ;
values_list:
          values_list ','  no_braces
        | no_braces
        ;
no_braces:
          '('
          {
              if (!(Lex->insert_list = new List_item))
                MYSQL_YYABORT;
          }
          opt_values ')'
          {
            LEX *lex=Lex;
            if (lex->many_values.push_back(lex->insert_list))
              MYSQL_YYABORT;
          }
        ;
opt_values:
          /* empty */ {}
        | values
        ;
values:
          values ','  expr_or_default
          {
            if (Lex->insert_list->push_back($3))
              MYSQL_YYABORT;
          }
        | expr_or_default
          {
            if (Lex->insert_list->push_back($1))
              MYSQL_YYABORT;
          }
        ;
expr_or_default:
          expr { $$= $1;}
        | DEFAULT
          {
            $$= new (YYTHD->mem_root) Item_default_value(Lex->current_context());
            if ($$ == NULL)
              MYSQL_YYABORT;
          }
        ;
四、查询指令开始
query:
          END_OF_INPUT
          {
            THD *thd= YYTHD;
            if (!thd->bootstrap &&
              (!(thd->lex->select_lex.options & OPTION_FOUND_COMMENT)))
            {
              my_message(ER_EMPTY_QUERY, ER(ER_EMPTY_QUERY), MYF(0));
              MYSQL_YYABORT;
            }
            thd->lex->sql_command= SQLCOM_EMPTY_QUERY;
            YYLIP->found_semicolon= NULL;
          }
        | verb_clause
          {
            Lex_input_stream *lip = YYLIP;

            if ((YYTHD->client_capabilities & CLIENT_MULTI_QUERIES) &&
                lip->multi_statements &&
                ! lip->eof())
            {
              /*
                We found a well formed query, and multi queries are allowed:
                - force the parser to stop after the ';'
                - mark the start of the next query for the next invocation
                  of the parser.
              */
              lip->next_state= MY_LEX_END;
              lip->found_semicolon= lip->get_ptr();
            }
            else
            {
              /* Single query, terminated. */
              lip->found_semicolon= NULL;
            }
          }
          ';'
          opt_end_of_input
        | verb_clause END_OF_INPUT
          {
            /* Single query, not terminated. */
            YYLIP->found_semicolon= NULL;
          }
        ;

opt_end_of_input:
          /* empty */
        | END_OF_INPUT
        ;
这说明每次命令的查询都是以一个分号或者是文件结束标志的,这也说明这个语法分析是相对比较功能单一的,它一次只执行一个指令,对于一个 
use mysql; select ;
句型的语句,mysql是分为两次发送的,这样的好处就是能够及时发现错误。
(gdb) r -e "use mysql;select * from help_keyword limit 1;select * from help_topic limit 2"
The program being debugged has been started already.
Start it from the beginning? (y or n) y

Starting program: /usr/local/mysql/bin/mysql -e "use mysql;select * from help_keyword limit 1;select * from help_topic limit 2"
[Thread debugging using libthread_db enabled]
[New Thread 0xb7fe6b70 (LWP 9826)]
[Thread 0xb7fe6b70 (LWP 9826) exited]

Breakpoint 1, com_go (buffer=0x833c090, line=0x0)
    at /home/tsecer/Downloads/mysql-5.5.28/client/mysql.cc:2967
2967      ulong        timer, warnings= 0;
(gdb) p *buffer
$3 = {Ptr = 0x83441e8 "select * from help_keyword limit 1", str_length = 34, 
  Alloced_length = 520, alloced = true, str_charset = 0x80ceee0}
(gdb) c
Continuing.
+-----------------+------+
| help_keyword_id | name |
+-----------------+------+
|               0 | JOIN |
+-----------------+------+

Breakpoint 1, com_go (buffer=0x833c090, line=0x0)
    at /home/tsecer/Downloads/mysql-5.5.28/client/mysql.cc:2967
2967      ulong        timer, warnings= 0;
(gdb) p *buffer
$4 = {Ptr = 0x83441e8 "select * from help_topic limit 2\n", str_length = 32, 
  Alloced_length = 520, alloced = true, str_charset = 0x80ceee0}
(gdb) c
五、从开始到write调用链
Breakpoint 4, vio_write (vio=0x8344600, buf=0x8348798 "#", size=39)
    at /home/tsecer/Downloads/mysql-5.5.28/vio/viosocket.c:127
127      r = write(vio->sd, buf, size);
Current language:  auto
The current source language is "auto; currently c".
(gdb) p/50c buf
Item count other than 1 is meaningless in "print" command.
(gdb) x/50c buf
0x8348798:    35 '#'    0 '\000'    0 '\000'    0 '\000'    3 '\003'    115 's'    101 'e'    108 'l'
0x83487a0:    101 'e'    99 'c'    116 't'    32 ' '    42 '*'    32 ' '    102 'f'    114 'r'
0x83487a8:    111 'o'    109 'm'    32 ' '    104 'h'    101 'e'    108 'l'    112 'p'    95 '_'
0x83487b0:    107 'k'    101 'e'    121 'y'    119 'w'    111 'o'    114 'r'    100 'd'    32 ' '
0x83487b8:    108 'l'    105 'i'    109 'm'    105 'i'    116 't'    32 ' '    49 '1'    0 '\000'
0x83487c0:    0 '\000'    0 '\000'    109 'm'    121 'y'    115 's'    113 'q'    108 'l'    95 '_'
0x83487c8:    110 'n'    97 'a'
(gdb) bt
#0  vio_write (vio=0x8344600, buf=0x8348798 "#", size=39)
    at /home/tsecer/Downloads/mysql-5.5.28/vio/viosocket.c:127
#1  0x08061800 in net_real_write (net=0x833bc60, packet=0x8348798 "#", len=39)
    at /home/tsecer/Downloads/mysql-5.5.28/sql/net_serv.cc:632
#2  0x08061112 in net_flush (net=0x833bc60)
    at /home/tsecer/Downloads/mysql-5.5.28/sql/net_serv.cc:338
#3  0x08061477 in net_write_command (net=0x833bc60, command=3 '\003', header=0x0, 
    head_len=0, packet=0x83441e8 "select * from help_keyword limit 1", len=34)
    at /home/tsecer/Downloads/mysql-5.5.28/sql/net_serv.cc:478
#4  0x08064bbb in cli_advanced_command (mysql=0x833bc60, command=COM_QUERY, 
    header=0x0, header_length=0, 
    arg=0x83441e8 "select * from help_keyword limit 1", arg_length=34, 
    skip_check=1 '\001', stmt=0x0)
    at /home/tsecer/Downloads/mysql-5.5.28/sql-common/client.c:841
#5  0x08068fa6 in mysql_send_query (mysql=0x833bc60, 
    query=0x83441e8 "select * from help_keyword limit 1", length=34)
    at /home/tsecer/Downloads/mysql-5.5.28/sql-common/client.c:3903
#6  0x08068fd6 in mysql_real_query (mysql=0x833bc60, 
    query=0x83441e8 "select * from help_keyword limit 1", length=34)
    at /home/tsecer/Downloads/mysql-5.5.28/sql-common/client.c:3914
#7  0x080522cd in mysql_real_query_for_lazy (
    buf=0x83441e8 "select * from help_keyword limit 1", length=34)
    at /home/tsecer/Downloads/mysql-5.5.28/client/mysql.cc:2738
---Type <return> to continue, or q <return> to quit---
#8  0x08052e7f in com_go (buffer=0x833c090, line=0x0)
    at /home/tsecer/Downloads/mysql-5.5.28/client/mysql.cc:3008
#9  0x080511cc in add_line (buffer=..., 
    line=0x8343398 "select * from help_keyword limit 1rd limit 1;select * from help_topic limit 2", in_string=0xbffff03f "", ml_comment=0xbffff03e, truncated=false)
    at /home/tsecer/Downloads/mysql-5.5.28/client/mysql.cc:2178
#10 0x08050938 in read_and_execute (interactive=false)
    at /home/tsecer/Downloads/mysql-5.5.28/client/mysql.cc:1951
#11 0x0804fa31 in main (argc=6, argv=0x8343220)
    at /home/tsecer/Downloads/mysql-5.5.28/client/mysql.cc:1232
(gdb) 


vio_write函数定义mysql-5.5.28\vio\viosocket.c
size_t vio_write(Vio * vio, const uchar* buf, size_t size)
{
  size_t r;
  DBUG_ENTER("vio_write");
  DBUG_PRINT("enter", ("sd: %d  buf: 0x%lx  size: %u", vio->sd, (long) buf,
                       (uint) size));
#ifdef __WIN__
  r = send(vio->sd, buf, size,0);
#else
  r = write(vio->sd, buf, size);
#endif /* __WIN__ */
#ifndef DBUG_OFF
  if (r == (size_t) -1)
  {
    DBUG_PRINT("vio_error", ("Got error on write: %d",socket_errno));
  }
#endif /* DBUG_OFF */
  DBUG_PRINT("exit", ("%u", (uint) r));
  DBUG_RETURN(r);
}
其实都是一些应用层的皮毛内容,和编译器一样,真正的语义分析及优化才是关键。对于数据库来说,真正的核心操作同样在于查询指令的生成,而这一点可能需要花费大量的时间(和智商),由于两者都不具备,有时间再慢慢来。

posted on 2019-03-07 10:12  tsecer  阅读(927)  评论(0编辑  收藏  举报

导航