YouCompleteMe completer及插件模型
问题
尽管VS Code提供了在windows下通过ssh访问linux机器的各种便利,但是使用vim在linux环境下开发可以方便的使用从Unix系统中继承的强大工具集(例如grep命令),并且构建、部署也更加方便。加上vim丝滑的操作流,使得linux环境下vim开发依然有着很大的吸引力。
但是由于vim的定位只是单个文件的“手术刀”,主要是对文件内容进行读写操作,所以没有语法级别的智能提示;更没有类似于VS的project概念,所以更没有工程级别的智能提示。
但是YouCompleteMe的出现解决了单个编译单元(Compiler Unit)内提示的问题:
- 异步
由于是通过启动异步进程(ycmd)来完成completion,所以主线程没有卡顿。
- 全能
不仅支持常见的C/C++,对于python,csharp,go,甚至rust都有支持。架构上看添加一个新的completer也有标准流程。
在文本文件中输入文件路径,还可以自动完成文件名的输入。
- 准确
由于C++的completer是基于llvm的C++编译器实现的,所以提示非常准确。
completer
请求
当在vim中触发completer动作时,vim中的python插件会将这次请求的上下文信息:文件路径(filepath)、光标所在的行号/列号,当前工作目录,当前编辑buffer数据(file_data)等信息打包(通过HTTP协议)发送给ycmd进程来处理。
##@file:YouCompleteMe/python/ycm/client/base_request.py
def BuildRequestData( buffer_number = None ):
"""Build request for the current buffer or the buffer with number
|buffer_number| if specified."""
working_dir = GetCurrentDirectory()
current_buffer = vim.current.buffer
if buffer_number and current_buffer.number != buffer_number:
# Cursor position is irrelevant when filepath is not the current buffer.
buffer_object = vim.buffers[ buffer_number ]
filepath = vimsupport.GetBufferFilepath( buffer_object )
return {
'filepath': filepath,
'line_num': 1,
'column_num': 1,
'working_dir': working_dir,
'file_data': vimsupport.GetUnsavedAndSpecifiedBufferData( buffer_object,
filepath )
}
current_filepath = vimsupport.GetBufferFilepath( current_buffer )
line, column = vimsupport.CurrentLineAndColumn()
return {
'filepath': current_filepath,
'line_num': line + 1,
'column_num': column + 1,
'working_dir': working_dir,
'file_data': vimsupport.GetUnsavedAndSpecifiedBufferData( current_buffer,
current_filepath )
}
file_data中包含了buffer的内容(contents)和buffer的文件类型(filetypes)。
## @file:YouCompleteMe/python/ycm/vimsupport.py
def GetBufferData( buffer_object ):
return {
# Add a newline to match what gets saved to disk. See #1455 for details.
'contents': JoinLinesAsUnicode( buffer_object ) + '\n',
'filetypes': FiletypesForBuffer( buffer_object )
}
请求命令字为"completions"
##@file: YouCompleteMe/python/ycm/client/completion_request.py
class CompletionRequest( BaseRequest ):
def Start( self ):
self._response_future = self.PostDataToHandlerAsync( self.request_data,
'completions' )
接收
在ycmd服务侧注册了对该命令字的处理函数为GetCompletions函数,也就是说:当ycmd收到completions命令之后进入到该函数处理流程。
这个函数实现中有一个细节:如果找不到文件类型(filetype)对应的completer,并且在没有设置force_semantic的情况下,会使用通用completer(GetGeneralCompleter)。
###@file: YouCompleteMe/third_party/ycmd/ycmd/handlers.py
@app.post( '/completions' )
def GetCompletions():
_logger.info( 'Received completion request' )
request_data = RequestWrap( request.json )
do_filetype_completion = _server_state.ShouldUseFiletypeCompleter(
request_data )
_logger.debug( 'Using filetype completion: %s', do_filetype_completion )
errors = None
completions = None
if do_filetype_completion:
try:
completions = ( _server_state.GetFiletypeCompleter(
request_data[ 'filetypes' ] )
.ComputeCandidates( request_data ) )
except Exception as exception:
if request_data[ 'force_semantic' ]:
# user explicitly asked for semantic completion, so just pass the error
# back
raise
# store the error to be returned with results from the identifier
# completer
stack = traceback.format_exc()
_logger.error( 'Exception from semantic completer (using general): ' +
"".join( stack ) )
errors = [ BuildExceptionResponse( exception, stack ) ]
if not completions and not request_data[ 'force_semantic' ]:
completions = _server_state.GetGeneralCompleter().ComputeCandidates(
request_data )
return _JsonResponse(
BuildCompletionResponse( completions if completions else [],
request_data[ 'start_column' ],
errors = errors ) )
查找
把文件类型作为文件夹名称,从文件系统中查找对应的文件夹,并以该文件夹作为completer。
###@file:third_party/ycmd/ycmd/server_state.py
def _GetFiletypeCompleterForFiletype( self, filetype ):
with self._filetype_completers_lock:
try:
return self._filetype_completers[ filetype ]
except KeyError:
pass
module_path = PathToFiletypeCompleterPluginLoader( filetype )
completer = None
supported_filetypes = { filetype }
if os.path.exists( module_path ):
module = LoadPythonSource( filetype, module_path )
completer = module.GetCompleter( self._user_options )
if completer:
supported_filetypes.update( completer.SupportedFiletypes() )
for supported_filetype in supported_filetypes:
self._filetype_completers[ supported_filetype ] = completer
return completer
def PathToFiletypeCompleterPluginLoader( filetype ):
return os.path.join( _PathToCompletersFolder(), filetype, 'hook.py' )
在completer文件夹可以看到,当前支持的completer包括下面列表,其中也包含了通用的general文件夹,这个也是前面看到的GetCompletions函数中使用的last resort completer。
tsecer@harry: find . -maxdepth 1 -type d
.
./general
./cs
./objcpp
./typescript
./all
./go
./python
./java
./rust
./objc
./cpp
./javascript
./__pycache__
./c
./cuda
./language_server
tsecer@harry:
ycm_core
入口
在ycmd程序的入口,注释中说明了不能全局import ycmd 中handler文件的原因在于ycm_core文件需要依赖extra_conf文件的加载。也就是说:ycm_core模块是必定会被加载的,只是为了让conf先加载而推迟到了函数内部。可以通过打开任一个非cpp文件,看运行ycmd的python进程地址空间中始终是有ycm_core.so这个文件来验证。
# YouCompleteMe\third_party\ycmd\ycmd\__main__.py
def Main():
# This ensures that ycm_core is not loaded before extra conf
# preload was run.
YcmCoreSanityCheck()
extra_conf_store.CallGlobalExtraConfYcmCorePreloadIfExists()
code = ImportAndCheckCore()
if code:
sys.exit( code )
PossiblyDetachFromTerminal()
# These can't be top-level imports because they transitively import
# ycm_core which we want to be imported ONLY after extra conf
# preload has executed.
from ycmd import handlers
from ycmd.watchdog_plugin import WatchdogPlugin
handlers.UpdateUserOptions( options )
handlers.SetHmacSecret( hmac_secret )
handlers.KeepSubserversAlive( args.check_interval_seconds )
SetUpSignalHandler()
在handlers文件中直接加载了ycm_core.so文件(关于python加载so模块的问题后面会讨论),
# third_party/ycmd/ycmd/handlers.py
from bottle import request
import ycm_core
from ycmd import extra_conf_store, hmac_plugin, server_state, user_options_store
蜕化
在没有专门的语法提示工具的时候,ycm_core可以蜕化为一个简单的标识符提示器(identifier completer),从这个名字也可以看出来,大致的意思就是不再识别语法(syntax),而是把文件作为一个单纯的文本文件来处理。
从效果上看,这是ycmd内置的,last resort的提示器:比方说对于文本文件、rust之类的ycm不识别/不支持的文件类型就可以通过这个提示器来完成。
# ycmd/completers/all/identifier_completer.py
class IdentifierCompleter( GeneralCompleter ):
def __init__( self, user_options ):
super( IdentifierCompleter, self ).__init__( user_options )
self._completer = ycm_core.IdentifierCompleter()
self._tags_file_last_mtime = defaultdict( int )
self._logger = logging.getLogger( __name__ )
self._max_candidates = user_options[ 'max_num_identifier_candidates' ]
在general_completer_store中引用的也是IdentifierCompleter
# ycmd/ycmd/completers/general/general_completer_store.py
from ycmd.completers.completer import Completer
from ycmd.completers.all.identifier_completer import IdentifierCompleter
from ycmd.completers.general.filename_completer import FilenameCompleter
#...
class GeneralCompleterStore( Completer ):
"""
Holds a list of completers that can be used in all filetypes.
It overrides all Competer API methods so that specific calls to
GeneralCompleterStore are passed to all general completers.
"""
def __init__( self, user_options ):
super( GeneralCompleterStore, self ).__init__( user_options )
self._identifier_completer = IdentifierCompleter( user_options )
如果需要使用C++格式的completer,则需要在编译的时候添加libclang.so文件,从ycmd的实现来看,ycm_core可以通过编译宏控制是否开启ycm_core.so中的clang编译。如果开启则需要链接libclang.so文件,而这个文件是通过静态配置从llvm上直接下载获得。
general completer
正如YouCompleteMe文档所说,除了基于语法的提示之外,还有超级快速的identifier提示、filename提示、UltiSnips提示:
- Super-fast identifier completer including tags files and syntax elements
- Intelligent suggestion ranking and filtering
- File and path suggestions
- Suggestions from Vim's omnifunc
- UltiSnips snippet suggestions
其中在general文件夹下,包含了提到了两个file和ultisnip功能相关的文件,而identifier则是位于all/identifier_completer.py文件中。
tsecer@harry: ls *.py
filename_completer.py general_completer_store.py __init__.py ultisnips_completer.py
tsecer@harry:
identifier
正如文档所说,在文件就绪之后,通过_AddBufferIdentifiers将buffer中内容分割成identifier;通过_AddIdentifiersFromTagFiles将tag文件中的标识符添加进来;通过_AddIdentifiersFromSyntax将语法关键字添加进来。
###@file:third_party/ycmd/ycmd/completers/all/identifier_completer.py
def OnFileReadyToParse( self, request_data ):
self._AddBufferIdentifiers( request_data )
if 'tag_files' in request_data:
self._AddIdentifiersFromTagFiles( request_data[ 'tag_files' ] )
if 'syntax_keywords' in request_data:
self._AddIdentifiersFromSyntax( request_data[ 'syntax_keywords' ],
request_data[ 'first_filetype' ] )
可以注意到,completion的计算和identifier的存储都是在ycm_core这个so文件中存储。
为什么要这么做呢?能考虑到的原因是C++对于大规模字符串的存储/查询效率更高?为了和支持clang时的ycm_core流程一致?...,不得而知。
class IdentifierCompleter( GeneralCompleter ):
def __init__( self, user_options ):
super( IdentifierCompleter, self ).__init__( user_options )
self._completer = ycm_core.IdentifierCompleter()
self._tags_file_last_mtime = defaultdict( int )
self._logger = logging.getLogger( __name__ )
####....
def ComputeCandidates( self, request_data ):
if not self.ShouldUseNow( request_data ):
return []
completions = self._completer.CandidatesForQueryAndType(
###...
def _AddPreviousIdentifier( self, request_data ):
self._AddIdentifier(
_PreviousIdentifier(
self.user_options[ 'min_num_of_chars_for_completion' ],
self.user_options[ 'collect_identifiers_from_comments_and_strings' ],
request_data ),
request_data )
file and path
这个其实相对比较简单,根据相对路径或者绝对路径读取目录中文件列表。相关代码在third_party/ycmd/ycmd/completers/general/filename_completer.py文件中。
###@file:third_party/ycmd/ycmd/completers/general/filename_completer.py
def _GetPathCompletionCandidates( path_dir, use_working_dir,
filepath, working_dir ):
absolute_path_dir = _GetAbsolutePathForCompletions( path_dir,
use_working_dir,
filepath,
working_dir )
entries = []
unicode_path = ToUnicode( absolute_path_dir )
try:
# We need to pass a unicode string to get unicode strings out of
# listdir.
relative_paths = os.listdir( unicode_path )
except Exception:
_logger.exception( 'Error while listing %s folder.', absolute_path_dir )
relative_paths = []
for rel_path in relative_paths:
absolute_path = os.path.join( unicode_path, rel_path )
entries.append( ( rel_path, GetPathType( absolute_path ) ) )
return entries
UltiSnips
UltiSnips是一个vim插件,YouCompleteMe只是通过约定的UltiSnips函数来提前获得所有片段列表,然后将这个信息发送给ycmd,ycmd用这个信息来丰富了自己的提示库。或者再通俗的说:就是UltiSnips主要是用来完成代码片段的识别和替换,而ycmd则主要负责在提示框中展示内容。
###@file: python/ycm/youcompleteme.py
def _AddUltiSnipsDataIfNeeded( self, extra_data ):
# See :h UltiSnips#SnippetsInCurrentScope.
try:
vim.eval( 'UltiSnips#SnippetsInCurrentScope( 1 )' )
except vim.error:
return
snippets = vimsupport.GetVariableValue( 'g:current_ulti_dict_info' )
extra_data[ 'ultisnips_snippets' ] = [
{ 'trigger': trigger,
'description': snippet[ 'description' ] }
for trigger, snippet in iteritems( snippets )
]
栗子
在使用ycm时,提示内容通常不会为空,因为根据下面的流程,如果语法识别出来的匹配为空(而不是文件类型对应的completer为空),默认会继续执行通用的匹配,通用的匹配会经过文件匹配和标识符匹配。这也意味着,即使在c++代码中,如果单独输入一个文件的绝对路径或者相对路径,在C++语法识别的匹配为空的情况下,依然会(由file匹配来生成)有基于文件路径的有效匹配。对于文本文件来说,由于没有对应的completer,所以也是这个流程。
相关代码再贴一份,可以看到,执行的时候是如果文件类型对应的completer返回的completion为空,则会从general completer中再次获取:
###@file:third_party/ycmd/ycmd/handlers.py
@app.post( '/completions' )
def GetCompletions():
_logger.info( 'Received completion request' )
request_data = RequestWrap( request.json )
do_filetype_completion = _server_state.ShouldUseFiletypeCompleter(
request_data )
_logger.debug( 'Using filetype completion: %s', do_filetype_completion )
errors = None
completions = None
if do_filetype_completion:
try:
completions = ( _server_state.GetFiletypeCompleter(
request_data[ 'filetypes' ] )
.ComputeCandidates( request_data ) )
except Exception as exception:
if request_data[ 'force_semantic' ]:
# user explicitly asked for semantic completion, so just pass the error
# back
raise
# store the error to be returned with results from the identifier
# completer
stack = traceback.format_exc()
_logger.error( 'Exception from semantic completer (using general): ' +
"".join( stack ) )
errors = [ BuildExceptionResponse( exception, stack ) ]
if not completions and not request_data[ 'force_semantic' ]:
completions = _server_state.GetGeneralCompleter().ComputeCandidates(
request_data )
return _JsonResponse(
BuildCompletionResponse( completions if completions else [],
request_data[ 'start_column' ],
errors = errors ) )
SharedLibrary
前缀
当执行一个模块的时候,会固定的找到一个前缀,这个前缀通常就是PyInit。
//importdl.c
static const char * const ascii_only_prefix = "PyInit";
static const char * const nonascii_prefix = "PyInitU";
/* Get the variable part of a module's export symbol name.
* Returns a bytes instance. For non-ASCII-named modules, the name is
* encoded as per PEP 489.
* The hook_prefix pointer is set to either ascii_only_prefix or
* nonascii_prefix, as appropriate.
*/
static PyObject *
get_encoded_name(PyObject *name, const char **hook_prefix) {
///...
/* Encode to ASCII or Punycode, as needed */
encoded = PyUnicode_AsEncodedString(name, "ascii", NULL);
if (encoded != NULL) {
*hook_prefix = ascii_only_prefix;
} else {
if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError)) {
PyErr_Clear();
encoded = PyUnicode_AsEncodedString(name, "punycode", NULL);
if (encoded == NULL) {
goto error;
}
*hook_prefix = nonascii_prefix;
} else {
goto error;
}
}
///...
}
接口
效果上看,就是通过PyInit_ModuleName来获得函数名。这个其实看起来是有些冗余的,但是“冗余”通常还有一个优点就是容错:在加载的时候会检测模块名和模块中函数的一致性?
PyObject *
_PyImport_LoadDynamicModuleWithSpec(PyObject *spec, FILE *fp)
{
#ifndef MS_WINDOWS
PyObject *pathbytes = NULL;
#endif
PyObject *name_unicode = NULL, *name = NULL, *path = NULL, *m = NULL;
const char *name_buf, *hook_prefix;
char *oldcontext;
dl_funcptr exportfunc;
PyModuleDef *def;
PyObject *(*p0)(void);
name_unicode = PyObject_GetAttrString(spec, "name");
if (name_unicode == NULL) {
return NULL;
}
name = get_encoded_name(name_unicode, &hook_prefix);
if (name == NULL) {
goto error;
}
name_buf = PyBytes_AS_STRING(name);
path = PyObject_GetAttrString(spec, "origin");
if (path == NULL)
goto error;
#ifdef MS_WINDOWS
exportfunc = _PyImport_FindSharedFuncptrWindows(hook_prefix, name_buf,
path, fp);
#else
pathbytes = PyUnicode_EncodeFSDefault(path);
if (pathbytes == NULL)
goto error;
exportfunc = _PyImport_FindSharedFuncptr(hook_prefix, name_buf,
PyBytes_AS_STRING(pathbytes),
fp);
Py_DECREF(pathbytes);
#endif
///...
}
dl_funcptr
_PyImport_FindSharedFuncptr(const char *prefix,
const char *shortname,
const char *pathname, FILE *fp)
{
dl_funcptr p;
void *handle;
char funcname[258];
char pathbuf[260];
int dlopenflags=0;
if (strchr(pathname, '/') == NULL) {
/* Prefix bare filename with "./" */
PyOS_snprintf(pathbuf, sizeof(pathbuf), "./%-.255s", pathname);
pathname = pathbuf;
}
PyOS_snprintf(funcname, sizeof(funcname),
LEAD_UNDERSCORE "%.20s_%.200s", prefix, shortname);
///...
}
文档
1.4. The Module’s Method Table and Initialization Function,说明了初始化函数必须命名为PyInit_name,其中name为Module的名字。
This structure, in turn, must be passed to the interpreter in the module’s initialization function. The initialization function must be named PyInit_name(), where name is the name of the module, and should be the only non-static item defined in the module file:
插件
UE
在每个模块需要通过IMPLEMENT_MODULE宏来定义模块导出内容。可以看到一个关键的内容就是定义了一个InitializeModule接口,接口返回一个ModuleImplClass类型的对象。
///ModuleManager.h
#define IMPLEMENT_MODULE( ModuleImplClass, ModuleName ) \
\
/**/ \
/* InitializeModule function, called by module manager after this module's DLL has been loaded */ \
/**/ \
/* @return Returns an instance of this module */ \
/**/ \
extern "C" DLLEXPORT IModuleInterface* InitializeModule() \
{ \
return new ModuleImplClass(); \
} \
/* Forced reference to this function is added by the linker to check that each module uses IMPLEMENT_MODULE */ \
extern "C" void IMPLEMENT_MODULE_##ModuleName() { } \
PER_MODULE_BOILERPLATE \
PER_MODULE_BOILERPLATE_ANYLINK(ModuleImplClass, ModuleName)
在加载模块之后,调用模块中的约定好的InitializeModule函数,该函数返回一个IModuleInterface类型对象。
IModuleInterface* FModuleManager::LoadModuleWithFailureReason(const FName InModuleName, EModuleLoadResult& OutFailureReason, ELoadModuleFlags InLoadModuleFlags)
{
///...
// Find our "InitializeModule" global function, which must exist for all module DLLs
FInitializeModuleFunctionPtr InitializeModuleFunctionPtr =
(FInitializeModuleFunctionPtr)FPlatformProcess::GetDllExport(ModuleInfo->Handle, TEXT("InitializeModule"));
if (InitializeModuleFunctionPtr != nullptr)
{
if ( ModuleInfo->Module.IsValid() )
{
// Assign the already loaded module into the return value, otherwise the return value gives the impression the module failed load!
LoadedModule = ModuleInfo->Module.Get();
}
else
{
// Initialize the module!
ModuleInfo->Module = TUniquePtr<IModuleInterface>(InitializeModuleFunctionPtr());
///...
}
gcc
gcc同样支持插件式开发。实现上看,插件也是通过调用约定名称的plugin_init函数来初始化
Every plugin should export a function called plugin_init that is called right after the plugin is loaded. This function is responsible for registering all the callbacks required by the plugin and do any other required initialization.
对于
///@file:gcc/plugin.h
/* Declaration for "plugin_init" function so that it doesn't need to be
duplicated in every plugin. */
extern int plugin_init (struct plugin_name_args *plugin_info,
struct plugin_gcc_version *version);
///@file:gcc/plugin.c
/* Each plugin should define an initialization function with exactly
this name. */
static const char *str_plugin_init_func_name = "plugin_init";
/* Try to initialize PLUGIN. Return true if successful. */
static bool
try_init_one_plugin (struct plugin_name_args *plugin)
{
void *dl_handle;
plugin_init_func plugin_init;
const char *err;
PTR_UNION_TYPE (plugin_init_func) plugin_init_union;
/* We use RTLD_NOW to accelerate binding and detect any mismatch
between the API expected by the plugin and the GCC API; we use
RTLD_GLOBAL which is useful to plugins which themselves call
dlopen. */
dl_handle = dlopen (plugin->full_name, RTLD_NOW | RTLD_GLOBAL);
if (!dl_handle)
{
error ("cannot load plugin %s\n%s", plugin->full_name, dlerror ());
return false;
}
/* Clear any existing error. */
dlerror ();
/* Check the plugin license. */
if (dlsym (dl_handle, str_license) == NULL)
fatal_error (input_location,
"plugin %s is not licensed under a GPL-compatible license\n"
"%s", plugin->full_name, dlerror ());
PTR_UNION_AS_VOID_PTR (plugin_init_union) =
dlsym (dl_handle, str_plugin_init_func_name);
plugin_init = PTR_UNION_AS_CAST_PTR (plugin_init_union);
if ((err = dlerror ()) != NULL)
{
error ("cannot find %s in plugin %s\n%s", str_plugin_init_func_name,
plugin->full_name, err);
return false;
}
/* Call the plugin-provided initialization routine with the arguments. */
if ((*plugin_init) (plugin, &gcc_version))
{
error ("fail to initialize plugin %s", plugin->full_name);
return false;
}
return true;
}
vim
vim文档说明了添加一个插件的方式就是把脚本文件放到合适的文件夹中。由于脚本语言没有约定的入口(或者说入口就是文件的第一行),所以只需要把文件放到指定文件夹位置即可。
add-global-plugin
You can add a global plugin to add functionality that will always be present
when you use Vim. There are only two steps for adding a global plugin:
- Get a copy of the plugin.
- Drop it in the right directory.
从代码上看,vim是通过wildcard执行指定文件夹下的所有脚本文件,尽管通常文件夹只有一个文件。
int
vim_main2(void)
{
///...
source_in_path(rtp_copy == NULL ? p_rtp : rtp_copy,
# ifdef VMS /* Somehow VMS doesn't handle the "**". */
(char_u *)"plugin/*.vim",
# else
(char_u *)"plugin/**/*.vim",
# endif
DIP_ALL | DIP_NOAFTER);
///...
}
/*
* Just like source_runtime(), but use "path" instead of 'runtimepath'.
*/
int
source_in_path(char_u *path, char_u *name, int flags)
{
return do_in_path_and_pp(path, name, flags, source_callback, NULL);
}
int
do_in_path(
char_u *path,
char_u *name,
int flags,
void (*callback)(char_u *fname, void *ck),
void *cookie)
{
///...
/* Expand wildcards, invoke the callback for each match. */
if (gen_expand_wildcards(1, &buf, &num_files, &files,
(flags & DIP_DIR) ? EW_DIR : EW_FILE) == OK)
{
for (i = 0; i < num_files; ++i)
{
(*callback)(files[i], cookie);
did_one = TRUE;
if (!(flags & DIP_ALL))
break;
}
FreeWild(num_files, files);
}
///...
}