从括弧类匹配高亮看vim内置plugin及用户自定义command

引入

vim内置命令中包含了text object相关功能,这些功能可以在vim的
:h motion.txt
帮助文档中找到相关帮助。这个功能看起来实现比较简单,而且是一个非常实用的功能。这个功能之所以加入比较晚,在于它使用a和i并不是传统的motion动作,而是normal模式下的插入(insert)和追加(append)命令。所以在vim的command+motion模式下有些异类。

正统的motion是通过[]引导的括弧匹配,例如

[(                      go to [count] previous unmatched '('.
                        exclusive motion. {not in Vi}

                                                [{
[{                      go to [count] previous unmatched '{'.
                        exclusive motion. {not in Vi}

                                                ])
])                      go to [count] next unmatched ')'.
                        exclusive motion. {not in Vi}

                                                ]}
]}                      go to [count] next unmatched '}'.
                        exclusive motion. {not in Vi}

这些命令其实还隐含了一个pair的概念:也就是左括弧一定和对应的右括弧匹配。尽管这个看起来比较直观,但是在vim中还是有对应的配置选项描述所有的pair对应关系。这个对应关系不仅在这里使用,而且在后面提到的pair高亮中也会使用。

'matchpairs' 'mps'      string  (default "(:),{:},[:]")
                        local to buffer 
                        {not in Vi}
        Characters that form pairs.  The % command jumps from one to the
        other.
        Only character pairs are allowed that are different, thus you cannot
        jump between two double quotes. 
        The characters must be separated by a colon.
        The pairs must be separated by a comma.  Example for including '<' and
        '>' (HTML):                             
                :set mps+=<:> 
                        
        A more exotic example, to jump between the '=' and ';' in an
        assignment, useful for languages like C and Java:
                :au FileType c,cpp,java set mps+==:;
                        
        For a more advanced way of using "%", see the matchit.vim plugin in
        the $VIMRUNTIME/macros directory. add-local-help

嵌套

在括弧匹配的时候,不可避免的会遇到括弧嵌套的问题,如果在匹配的过程中不考虑嵌套,那匹配的结果几乎是不可使用的。例如考虑下面的语法文件

{
	if ()
	{
		while ()
		{
		
		}
	}
}

在最外层括弧中,左括弧和逻辑上对应的、最后一行的右括弧之间还分布着两个额外的右括弧,在匹配的时候都要设法避免匹配到这两个括弧,而绕开这些括弧又需要先确定它们对应的左括弧。不过这个问题相对还是比较简单,就是在匹配的时候记录pair的数量,遇到相同的递增,遇到对应的递减,直到计数为零就认为找到了对应的匹配(这个过程中更精细的还要跳过字符串中的符号)。

vim的代码是通过对pair进行计数来实现嵌套的匹配,并且考虑到了是否包含在引用内部。

/*
 * findmatchlimit -- find the matching paren or brace, if it exists within
 * maxtravel lines of the cursor.  A maxtravel of 0 means search until falling
 * off the edge of the file.
 *
 * "initc" is the character to find a match for.  NUL means to find the
 * character at or after the cursor. Special values:
 * '*'  look for C-style comment / *
 * '/'  look for C-style comment / *, ignoring comment-end
 * '#'  look for preprocessor directives
 * 'R'  look for raw string start: R"delim(text)delim" (only backwards)
 *
 * flags: FM_BACKWARD	search backwards (when initc is '/', '*' or '#')
 *	  FM_FORWARD	search forwards (when initc is '/', '*' or '#')
 *	  FM_BLOCKSTOP	stop at start/end of block ({ or } in column 0)
 *	  FM_SKIPCOMM	skip comments (not implemented yet!)
 *
 * "oap" is only used to set oap->motion_type for a linewise motion, it can be
 * NULL
 */

    pos_T *
findmatchlimit(
    oparg_T	*oap,
    int		initc,
    int		flags,
    int		maxtravel)
{
///...
	    /* Check for match outside of quotes, and inside of
	     * quotes when the start is also inside of quotes. */
	    if ((!inquote || start_in_quotes == TRUE)
		    && (c == initc || c == findc))
	    {
		int	col, bslcnt = 0;

		if (!cpo_bsl)
		{
		    for (col = pos.col; check_prevcol(linep, col, '\\', &col);)
			bslcnt++;
		}
		/* Only accept a match when 'M' is in 'cpo' or when escaping
		 * is what we expect. */
		if (cpo_bsl || (bslcnt & 1) == match_escaped)
		{
		    if (c == initc)
			count++;
		    else
		    {
			if (count == 0)
			    return &pos;
			count--;
		    }
		}
	    }
///...
}

matchparen插件

即使不定义任何自定义配置,在vim中把光标移动到括弧位置也会高亮显示对应的括弧,这个功能就是通过vim内置插件matchparen来实现的。在vim源代码的runtime\plugin文件夹,可以看到matchparen.vim这个插件,该插件就基于vim内置的matchparen功能实现了括弧的自动高亮。

从这个脚本可以看到,它主要是通过3match这个内置command实现的高亮。

186   " If a match is found setup match highlighting.                               
187   if m_lnum > 0 && m_lnum >= stoplinetop && m_lnum <= stoplinebottom            
188     if exists('*matchaddpos')                                                   
189       call matchaddpos('MatchParen', [[c_lnum, c_col - before], [m_lnum, m_col]], 10, 3)
190     else                                                                        
191       exe '3match MatchParen /\(\%' . c_lnum . 'l\%' . (c_col - before) .       
192         \ 'c\)\|\(\%' . m_lnum . 'l\%' . m_col . 'c\)/'                         
193     endif                                                                       
194     let w:paren_hl_on = 1                                                       
195   endif       

:3mat[ch] none                                                                  
                Just like :match above, but set a separate match.  Thus         
                there can be three matches active at the same time.  The match  
                with the lowest number has priority if several match at the     
                same position.                                                  
                The ":3match" command is used by the matchparen plugin.  You                                                                                                                
                are suggested to use ":match" for manual matching and           
                ":2match" for another plugin. 

3match

从代码实现上看,前面的前缀表示的是id,不同的id表示清除不同的高亮类型,这样可以同时1、2、3中高亮,因为括弧高亮是和常规的高亮可以并行的,也就是文档说明中说的通常这个3match只是给matchparen使用的原因。

/*
 * ":[N]match {group} {pattern}"
 * Sets nextcmd to the start of the next command, if any.  Also called when
 * skipping commands to find the next command.
 */
    static void
ex_match(exarg_T *eap)
{
    char_u	*p;
    char_u	*g = NULL;
    char_u	*end;
    int		c;
    int		id;

    if (eap->line2 <= 3)
	id = eap->line2;
    else
    {
	EMSG(e_invcmd);
	return;
    }

    /* First clear any old pattern. */
    if (!eap->skip)
	match_delete(curwin, id, FALSE);

}

内置正则表达式

matchparen.vim插件中的3match命令后面有复杂的pattern,但是从其中的变量命名可以猜测到,其中的%l和%c应该分别对应line和column,也就是让vim在指定的两个(行,列)位置显示MatchParen类型高亮。这一点在vim的帮助手册中也有说明。

/%l %23l %23l in line 23 /zero-width
/%c %23c %23c in column 23 /zero-width

Overview of ordinary atoms.                             /ordinary-atom          
More explanation and examples below, follow the links.                          
                                                                                
      ordinary atom                                                             
      magic   nomagic   matches                                                 
/^      ^       ^       start-of-line (at start of pattern) /zero-width         
/\^     \^      \^      literal '^'                                             
/\_^    \_^     \_^     start-of-line (used anywhere) /zero-width               
/$      $       $       end-of-line (at end of pattern) /zero-width             
/\$     \$      \$      literal '$'                                             
/\_$    \_$     \_$     end-of-line (used anywhere) /zero-width                 
/.      .       \.      any single character (not an end-of-line)               
/\_.    \_.     \_.     any single character or end-of-line                     
/\<     \<      \<      beginning of a word /zero-width                         
/\>     \>      \>      end of a word /zero-width                                                                       
/\zs    \zs     \zs     anything, sets start of match                           
/\ze    \ze     \ze     anything, sets end of match                             
/\%^    \%^     \%^     beginning of file /zero-width           E71             
/\%$    \%$     \%$     end of file /zero-width                                 
/\%V    \%V     \%V     inside Visual area /zero-width                          
/\%#    \%#     \%#     cursor position /zero-width                             
/\%'m   \%'m    \%'m    mark m position /zero-width                             
/\%l    \%23l   \%23l   in line 23 /zero-width                                  
/\%c    \%23c   \%23c   in column 23 /zero-width                                
/\%v    \%23v   \%23v   in virtual column 23 /zero-width        

对应代码在regexp.c文件中

/*
 * Parse the lowest level.
 *
 * Optimization:  gobbles an entire sequence of ordinary characters so that
 * it can turn them into a single node, which is smaller to store and
 * faster to run.  Don't do this when one_exactly is set.
 */
    static char_u *
regatom(int *flagp)
{
///....
      case Magic('%'):
	{
	    c = no_Magic(getchr());
	    switch (c)
	    {
	    ///....
	    		default:
			  if (VIM_ISDIGIT(c) || c == '<' || c == '>'
								 || c == '\'')
			  {
			      long_u	n = 0;
			      int	cmp;

			      cmp = c;
			      if (cmp == '<' || cmp == '>')
				  c = getchr();
			      while (VIM_ISDIGIT(c))
			      {
				  n = n * 10 + (c - '0');
				  c = getchr();
			      }
			      if (c == '\'' && n == 0)
			      {
				  /* "\%'m", "\%<'m" and "\%>'m": Mark */
				  c = getchr();
				  ret = regnode(RE_MARK);
				  if (ret == JUST_CALC_SIZE)
				      regsize += 2;
				  else
				  {
				      *regcode++ = c;
				      *regcode++ = cmp;
				  }
				  break;
			      }
			      else if (c == 'l' || c == 'c' || c == 'v')
			      {
				  if (c == 'l')
				  {
				      ret = regnode(RE_LNUM);
				      if (save_prev_at_start)
					  at_start = TRUE;
				  }
				  else if (c == 'c')
				      ret = regnode(RE_COL);
				  else
				      ret = regnode(RE_VCOL);
				  if (ret == JUST_CALC_SIZE)
				      regsize += 5;
				  else
				  {
				      /* put the number and the optional
				       * comparator after the opcode */
				      regcode = re_put_long(regcode, n);
				      *regcode++ = cmp;
				  }
				  break;
			      }
			  }
///....
}

VIMRUNTIME

这个位置是vim内置的,它们作为vim项目的一部分存在,所以在vim启动的时候默认会执行这些plugin中的内容。
这些plugin中还包含了netrwPlugin.vim插件,该插件包含了内置的Explore、Vex等文件夹浏览/重命名等命令。

查看所有自定义命令

在vim的ex模式下执行
com
可以看到缺省的可执行命令列表。

:com
    Name        Args Range Complete  Definition
    DoMatchParen 0                    runtime plugin/matchparen.vim | windo doau CursorMoved
!   Explore     *    0c    dir       call netrw#Explore(<count>,0,0+<bang>0,<q-args>)
    GLVS        0                    call getscript#GetLatestVimScripts()
    GetLatestVimScripts 0                    call getscript#GetLatestVimScripts()
    GetScripts  0                    call getscript#GetLatestVimScripts()
!   Hexplore    *    0c    dir       call netrw#Explore(<count>,1,2+<bang>0,<q-args>)
!   MkVimball   +    .     file      call vimball#MkVimball(<line1>,<line2>,<bang>0,<f-args>)
    NetUserPass *                    call NetUserPass(<f-args>)
!   NetrwClean  0                    call netrw#NetrwClean(<bang>0)
    NetrwSettings 0                    call netrwSettings#NetrwSettings()
!   Nexplore    *                    call netrw#Explore(-1,0,0,<q-args>)
    NoMatchParen 0                    windo 3match none | unlet! g:loaded_matchparen | au! matchparen
    Nread       *    1c              call netrw#NetrwSavePosn()|call netrw#NetRead(<count>,<f-args>)|call netrw#NetrwRestorePosn()
    Nsource     *                    call netrw#NetrwSavePosn()|call netrw#NetSource(<f-args>)|call netrw#NetrwRestorePosn()
    Nwrite      *    %               call netrw#NetrwSavePosn()|<line1>,<line2>call netrw#NetWrite(<f-args>)|call netrw#NetrwRestorePosn()
!   Pexplore    *                    call netrw#Explore(-2,0,0,<q-args>)
    Rexplore    0                    if exists("w:netrw_rexlocal")|call s:NetrwRexplore(w:netrw_rexlocal,exists("w:netrw_rexdir")? w:netrw_rexdir : ".")|else|call netrw#ErrorMsg(s:WARNING,
"not a former netrw window",79)|endif
    RmVimball   *          dir       call vimball#SaveSettings()|call vimball#RmVimball(<f-args>)|call vimball#RestoreSettings()
!   Sexplore    *    0c    dir       call netrw#Explore(<count>,1,0+<bang>0,<q-args>)
    TOhtml      0    %               :call tohtml#Convert2HTML(<line1>, <line2>)
    Texplore    *    0c    dir       call netrw#Explore(<count>,0,6        ,<q-args>)
    UseVimball  ?          dir       call vimball#Vimball(1,<f-args>)
!   Vexplore    *    0c    dir       call netrw#Explore(<count>,1,4+<bang>0,<q-args>)
    VimballList 0                    call vimball#Vimball(0)
    Vimuntar    ?          file      call tar#Vimuntar(<q-args>)

用户自定义的一些说明

所有用户自定义命令必须大写开头

All user defined commands must start with an uppercase letter, to avoid
confusion with builtin commands. Exceptions are these builtin commands:
:Next
:X
They cannot be used for a user defined command. ":Print" is also an existing
command, but it is deprecated and can be overruled.

用户命令二义性

When using a user-defined command, the command can be abbreviated. However, if
an abbreviation is not unique, an error will be issued. Furthermore, a
built-in command will always take precedence.

代码实现

ex内容的解析

前面看到的3match,在ex解析时是解析认为在3行开始,3行结束的位置执行match命令,只是在match的命令中,把这个行号解释为了高亮的id。

    static char_u *
do_one_cmd(
    char_u		**cmdlinep,
    int			sourcing,
#ifdef FEAT_EVAL
    struct condstack	*cstack,
#endif
    char_u		*(*fgetline)(int, void *, int),
    void		*cookie)		/* argument for fgetline() */
{
///...
/*
 * 3. Skip over the range to find the command.  Let "p" point to after it.
 *
 * We need the command to know what kind of range it uses.
 */
    cmd = ea.cmd;
    ea.cmd = skip_range(ea.cmd, NULL);
    if (*ea.cmd == '*' && vim_strchr(p_cpo, CPO_STAR) == NULL)
	ea.cmd = skipwhite(ea.cmd + 1);
    p = find_command(&ea, NULL);
///...
/*
 * 4. parse a range specifier of the form: addr [,addr] [;addr] ..
 *
 * where 'addr' is:
 *
 * %	      (entire file)
 * $  [+-NUM]
 * 'x [+-NUM] (where x denotes a currently defined mark)
 * .  [+-NUM]
 * [+-NUM]..
 * NUM
 *
 * The ea.cmd pointer is updated to point to the first character following the
 * range spec. If an initial address is found, but no second, the upper bound
 * is equal to the lower.
 */
///...

    /* One address given: set start and end lines */
    if (ea.addr_count == 1)
    {
	ea.line1 = ea.line2;
	    /* ... but only implicit: really no address given */
	if (lnum == MAXLNUM)
	    ea.addr_count = 0;
    }

/*
 * 5. Parse the command.
 */

    /*
     * Skip ':' and any white space
     */
    ea.cmd = skipwhite(ea.cmd);
    while (*ea.cmd == ':')
	ea.cmd = skipwhite(ea.cmd + 1);
///...
}

command解析

从代码可以看到,的确是判断了命令的首字母是否是大写,如果是大写的话则会尝试从用户自定义命令中查找。

/*
 * Find an Ex command by its name, either built-in or user.
 * Start of the name can be found at eap->cmd.
 * Returns pointer to char after the command name.
 * "full" is set to TRUE if the whole command name matched.
 * Returns NULL for an ambiguous user command.
 */
    static char_u *
find_command(exarg_T *eap, int *full UNUSED)
{
    int		len;
    char_u	*p;
    int		i;

    /*
     * Isolate the command and search for it in the command table.
     * Exceptions:
     * - the 'k' command can directly be followed by any character.
     * - the 's' command can be followed directly by 'c', 'g', 'i', 'I' or 'r'
     *	    but :sre[wind] is another command, as are :scr[iptnames],
     *	    :scs[cope], :sim[alt], :sig[ns] and :sil[ent].
     * - the "d" command can directly be followed by 'l' or 'p' flag.
     */
    p = eap->cmd;
    if (*p == 'k')
    {
	eap->cmdidx = CMD_k;
	++p;
    }
    else if (p[0] == 's'
	    && ((p[1] == 'c' && (p[2] == NUL || (p[2] != 's' && p[2] != 'r'
			&& (p[3] == NUL || (p[3] != 'i' && p[4] != 'p')))))
		|| p[1] == 'g'
		|| (p[1] == 'i' && p[2] != 'm' && p[2] != 'l' && p[2] != 'g')
		|| p[1] == 'I'
		|| (p[1] == 'r' && p[2] != 'e')))
    {
	eap->cmdidx = CMD_substitute;
	++p;
    }
    else
    {
	while (ASCII_ISALPHA(*p))
	    ++p;
	/* for python 3.x support ":py3", ":python3", ":py3file", etc. */
	if (eap->cmd[0] == 'p' && eap->cmd[1] == 'y')
	    while (ASCII_ISALNUM(*p))
		++p;

	/* check for non-alpha command */
	if (p == eap->cmd && vim_strchr((char_u *)"@*!=><&~#", *p) != NULL)
	    ++p;
	len = (int)(p - eap->cmd);
	if (*eap->cmd == 'd' && (p[-1] == 'l' || p[-1] == 'p'))
	{
	    /* Check for ":dl", ":dell", etc. to ":deletel": that's
	     * :delete with the 'l' flag.  Same for 'p'. */
	    for (i = 0; i < len; ++i)
		if (eap->cmd[i] != ((char_u *)"delete")[i])
		    break;
	    if (i == len - 1)
	    {
		--len;
		if (p[-1] == 'l')
		    eap->flags |= EXFLAG_LIST;
		else
		    eap->flags |= EXFLAG_PRINT;
	    }
	}

	if (ASCII_ISLOWER(eap->cmd[0]))
	{
	    int c1 = eap->cmd[0];
	    int c2 = eap->cmd[1];

	    if (command_count != (int)CMD_SIZE)
	    {
		iemsg((char_u *)_("E943: Command table needs to be updated, run 'make cmdidxs'"));
		getout(1);
	    }

	    /* Use a precomputed index for fast look-up in cmdnames[]
	     * taking into account the first 2 letters of eap->cmd. */
	    eap->cmdidx = cmdidxs1[CharOrdLow(c1)];
	    if (ASCII_ISLOWER(c2))
		eap->cmdidx += cmdidxs2[CharOrdLow(c1)][CharOrdLow(c2)];
	}
	else
	    eap->cmdidx = CMD_bang;

	for ( ; (int)eap->cmdidx < (int)CMD_SIZE;
			       eap->cmdidx = (cmdidx_T)((int)eap->cmdidx + 1))
	    if (STRNCMP(cmdnames[(int)eap->cmdidx].cmd_name, (char *)eap->cmd,
							    (size_t)len) == 0)
	    {
#ifdef FEAT_EVAL
		if (full != NULL
			   && cmdnames[(int)eap->cmdidx].cmd_name[len] == NUL)
		    *full = TRUE;
#endif
		break;
	    }

#ifdef FEAT_USR_CMDS
	/* Look for a user defined command as a last resort.  Let ":Print" be
	 * overruled by a user defined command. */
	if ((eap->cmdidx == CMD_SIZE || eap->cmdidx == CMD_Print)
		&& *eap->cmd >= 'A' && *eap->cmd <= 'Z')
	{
	    /* User defined commands may contain digits. */
	    while (ASCII_ISALNUM(*p))
		++p;
	    p = find_ucmd(eap, p, full, NULL, NULL);
	}
#endif
	if (p == eap->cmd)
	    eap->cmdidx = CMD_SIZE;
    }

    return p;
}

用户自定义命令查找

用户自定义命令是按照命令的字典序排列在数组中的,当查找命令的时候,是拿着用户输入命令匹配所有用户自定义命令。在这个匹配的时候,如果匹配之后,用户自定义列表中的命令还有剩余字符没有被匹配,认为是一个潜在的匹配;如果之后没有剩余(*np == NUL)则是完美匹配;在遇到一个潜在匹配之后再次遇到一个潜在匹配,则认为存在歧义。
以当前vim8.0内置的插件集合来看,Explore命令的E前缀是没有其它命令以这个字母开始,所以即使输入E也可以执行Explore命令。但是随着插件的增多,用户自定义命令也会越来越多,单单一个字符E就很难保证是唯一的了。

/*
 * Search for a user command that matches "eap->cmd".
 * Return cmdidx in "eap->cmdidx", flags in "eap->argt", idx in "eap->useridx".
 * Return a pointer to just after the command.
 * Return NULL if there is no matching command.
 */
    static char_u *
find_ucmd(
    exarg_T	*eap,
    char_u	*p,	/* end of the command (possibly including count) */
    int		*full,	/* set to TRUE for a full match */
    expand_T	*xp,	/* used for completion, NULL otherwise */
    int		*compl)	/* completion flags or NULL */
{
    int		len = (int)(p - eap->cmd);
    int		j, k, matchlen = 0;
    ucmd_T	*uc;
    int		found = FALSE;
    int		possible = FALSE;
    char_u	*cp, *np;	    /* Point into typed cmd and test name */
    garray_T	*gap;
    int		amb_local = FALSE;  /* Found ambiguous buffer-local command,
				       only full match global is accepted. */

    /*
     * Look for buffer-local user commands first, then global ones.
     */
    gap = &curbuf->b_ucmds;
    for (;;)
    {
	for (j = 0; j < gap->ga_len; ++j)
	{
	    uc = USER_CMD_GA(gap, j);
	    cp = eap->cmd;
	    np = uc->uc_name;
	    k = 0;
	    while (k < len && *np != NUL && *cp++ == *np++)
		k++;
	    if (k == len || (*np == NUL && vim_isdigit(eap->cmd[k])))
	    {
		/* If finding a second match, the command is ambiguous.  But
		 * not if a buffer-local command wasn't a full match and a
		 * global command is a full match. */
		if (k == len && found && *np != NUL)
		{
		    if (gap == &ucmds)
			return NULL;
		    amb_local = TRUE;
		}

		if (!found || (k == len && *np == NUL))
		{
		    /* If we matched up to a digit, then there could
		     * be another command including the digit that we
		     * should use instead.
		     */
		    if (k == len)
			found = TRUE;
		    else
			possible = TRUE;

		    if (gap == &ucmds)
			eap->cmdidx = CMD_USER;
		    else
			eap->cmdidx = CMD_USER_BUF;
		    eap->argt = (long)uc->uc_argt;
		    eap->useridx = j;
		    eap->addr_type = uc->uc_addr_type;

# ifdef FEAT_CMDL_COMPL
		    if (compl != NULL)
			*compl = uc->uc_compl;
#  ifdef FEAT_EVAL
		    if (xp != NULL)
		    {
			xp->xp_arg = uc->uc_compl_arg;
			xp->xp_scriptID = uc->uc_scriptID;
		    }
#  endif
# endif
		    /* Do not search for further abbreviations
		     * if this is an exact match. */
		    matchlen = k;
		    if (k == len && *np == NUL)
		    {
			if (full != NULL)
			    *full = TRUE;
			amb_local = FALSE;
			break;
		    }
		}
	    }
	}

	/* Stop if we found a full match or searched all. */
	if (j < gap->ga_len || gap == &ucmds)
	    break;
	gap = &ucmds;
    }

    /* Only found ambiguous matches. */
    if (amb_local)
    {
	if (xp != NULL)
	    xp->xp_context = EXPAND_UNSUCCESSFUL;
	return NULL;
    }

    /* The match we found may be followed immediately by a number.  Move "p"
     * back to point to it. */
    if (found || possible)
	return p + (matchlen - len);
    return p;
}

posted on 2022-09-07 19:43  tsecer  阅读(162)  评论(0编辑  收藏  举报

导航