dbt adapter 注册处理简单说明
以下简单说明下dbt 是如何加载其他adapter 的
参考注册处理
实际上是在cli 的manifest 装饰器的parse_manifest 中处理的
- 参考代码
def parse_manifest(runtime_config, write_perf_info, write, write_json):
register_adapter(runtime_config, get_mp_context())
adapter = get_adapter(runtime_config)
adapter.set_macro_context_generator(generate_runtime_macro_context)
manifest = ManifestLoader.get_full_manifest(
runtime_config,
write_perf_info=write_perf_info,
)
if write and write_json:
write_manifest(manifest, runtime_config.project_target_path)
pm = plugins.get_plugin_manager(runtime_config.project_name)
plugin_artifacts = pm.get_manifest_artifacts(manifest)
for path, plugin_artifact in plugin_artifacts.items():
plugin_artifact.write(path)
return manifest
)
- register_adapter 处理
调用dbt-adapters 包中的方法,具体是dbt/adapters/factory.py 模块中的方法
def register_adapter(self, config: AdapterRequiredConfig, mp_context: SpawnContext) -> None:
adapter_name = config.credentials.type
# 注意此处只是基于已经加载的adapter 进行处理,实际加载是通过装饰器先加载的 profile ,因为实际需要先读取profile 文件,知道链接信息
adapter_type = self.get_adapter_class_by_name(adapter_name)
adapter_version = self._adapter_version(adapter_name)
fire_event(
AdapterRegistered(adapter_name=adapter_name, adapter_version=adapter_version)
)
with self.lock:
if adapter_name in self.adapters:
# this shouldn't really happen...
return
adapter: Adapter = adapter_type(config, mp_context) # type: ignore
self.adapters[adapter_name] = adapter
- profile 装饰器使用
参考run cli
@cli.command("run")
@click.pass_context
@global_flags
@p.exclude
@p.full_refresh
@p.profile
@p.profiles_dir
@p.project_dir
@p.empty
@p.select
@p.selector
@p.target
@p.target_path
@p.threads
@p.vars
@requires.postflight
@requires.preflight
@requires.profile
@requires.project
- profile 装饰器处理
def profile(func):
def wrapper(*args, **kwargs):
ctx = args[0]
assert isinstance(ctx, Context)
flags = ctx.obj["flags"]
# TODO: Generalize safe access to flags.THREADS:
# https://github.com/dbt-labs/dbt-core/issues/6259
threads = getattr(flags, "THREADS", None)
profile = load_profile(flags.PROJECT_DIR, flags.VARS, flags.PROFILE, flags.TARGET, threads)
ctx.obj["profile"] = profile
return func(*args, **kwargs)
return update_wrapper(wrapper, func)
adapter 插件处理
- adapter 插件定义
adapter 中的dbt/adapters/base/plugin.py
class AdapterPlugin:
"""Defines the basic requirements for a dbt adapter plugin.
:param include_path: The path to this adapter plugin's root
:param dependencies: A list of adapter names that this adapter depends
upon.
"""
def __init__(
self,
adapter: Type[AdapterProtocol],
credentials: Type[Credentials],
include_path: str,
dependencies: Optional[List[str]] = None,
project_name: Optional[str] = None,
) -> None:
self.adapter: Type[AdapterProtocol] = adapter
self.credentials: Type[Credentials] = credentials
self.include_path: str = include_path
self.project_name: str = project_name or f"dbt_{Path(include_path).name}"
self.dependencies: List[str]
if dependencies is None:
self.dependencies = []
else:
self.dependencies = dependencies
- adapter 插件加载
是在获取profile 过程中获取到的(读取认证信息)
core 中的profile
@staticmethod
def _credentials_from_profile(
profile: Dict[str, Any], profile_name: str, target_name: str
) -> Credentials:
# avoid an import cycle
from dbt.adapters.factory import load_plugin
# credentials carry their 'type' in their actual type, not their
# attributes. We do want this in order to pick our Credentials class.
if "type" not in profile:
raise DbtProfileError(
'required field "type" not found in profile {} and target {}'.format(
profile_name, target_name
)
)
typename = profile.pop("type")
try:
# 执行加载之后,会方法模块变量 FACTORY: AdapterContainer = AdapterContainer() 中,后续的读取都可以使用模块
cls = load_plugin(typename)
data = cls.translate_aliases(profile)
cls.validate(data)
credentials = cls.from_dict(data)
except (DbtRuntimeError, ValidationError) as e:
msg = str(e) if isinstance(e, DbtRuntimeError) else e.message
raise DbtProfileError(
'Credentials in profile "{}", target "{}" invalid: {}'.format(
profile_name, target_name, msg
)
) from e
return credentials
- 实际load_plugin 处理
内部处理是基于了import_module
def load_plugin(self, name: str) -> Type[Credentials]:
# this doesn't need a lock: in the worst case we'll overwrite packages
# and adapter_type entries with the same value, as they're all
# singletons
try:
# 加载格式实际上是dbt.adapter.<adaptername> 比如dremio 的为 dbt.adapter.dremio
# mypy doesn't think modules have any attributes.
mod: Any = import_module("." + name, "dbt.adapters")
except ModuleNotFoundError as exc:
# if we failed to import the target module in particular, inform
# the user about it via a runtime error
if exc.name == "dbt.adapters." + name:
fire_event(AdapterImportError(exc=str(exc)))
raise DbtRuntimeError(f"Could not find adapter type {name}!")
# otherwise, the error had to have come from some underlying
# library. Log the stack trace.
fire_event(PluginLoadError(exc_info=traceback.format_exc()))
raise
# adapter 会包含一个标准的Plugin 变量,任何adapter 必须使用此模式
plugin: AdapterPlugin = mod.Plugin
plugin_type = plugin.adapter.type()
if plugin_type != name:
raise DbtRuntimeError(
f"Expected to find adapter with type named {name}, got "
f"adapter with type {plugin_type}"
)
with self.lock:
# things do hold the lock to iterate over it so we need it to add
self.plugins[name] = plugin
self.packages[plugin.project_name] = Path(plugin.include_path)
# 如果包含依赖,还有同时进行以来adapter 的加载
for dep in plugin.dependencies:
self.load_plugin(dep)
return plugin.credentials
说明
dbt 对于adapter 的加载实际上基于了python 的动态模块加载机制,之后放到一个模块变量中方便共享使用,对于加载的触发实际也是通过
装饰器处理的(profile),以上是一个简单说明,实际可以结合完整代码学习
参考资料
core/dbt/cli/requires.py (core )
dbt/adapters/base/plugin.py (adapters)
core/dbt/config/profile.py (core)
https://github.com/dbt-labs/dbt-adapters
https://docs.python.org/3/library/importlib.html