dbt doc 函数内部处理简单说明
dbt 提供了一个方便的doc 函数,可以方便的使用类似ref 模式进行docs block 定义的引用
引用参考处理
- 示例
version: 2
models:
- name: events
description: '{{ doc("table_events") }}'
columns:
- name: event_id
description: This is a unique identifier for the event
tests:
- unique
- not_null
参考处理
主要是通过DocsRuntimeContext
class DocsRuntimeContext(SchemaYamlContext):
def __init__(
self,
config: RuntimeConfig,
node: Union[Macro, ResultNode],
manifest: Manifest,
current_project: str,
) -> None:
super().__init__(config, current_project, None)
self.node = node
self.manifest = manifest
# contextmember 标记的方法
@contextmember
def doc(self, *args: str) -> str:
"""The `doc` function is used to reference docs blocks in schema.yml
files. It is analogous to the `ref` function. For more information,
consult the Documentation guide.
> orders.md:
{% docs orders %}
# docs
- go
- here
{% enddocs %}
> schema.yml
version: 2
models:
- name: orders
description: "{{ doc('orders') }}"
"""
# when you call doc(), this is what happens at runtime
if len(args) == 1:
doc_package_name = None
doc_name = args[0]
elif len(args) == 2:
doc_package_name, doc_name = args
else:
raise DocArgsError(self.node, args)
# Documentation, 实际上是通过解析manifest 中的resolve_doc
target_doc = self.manifest.resolve_doc(
doc_name,
doc_package_name,
self._project_name,
self.node.package_name,
)
if target_doc:
file_id = target_doc.file_id
if file_id in self.manifest.files:
source_file = self.manifest.files[file_id]
# TODO CT-211
source_file.add_node(self.node.unique_id) # type: ignore[union-attr]
else:
raise DocTargetNotFoundError(
node=self.node, target_doc_name=doc_name, target_doc_package=doc_package_name
)
return target_doc.block_contents
def generate_runtime_docs_context(
config: RuntimeConfig,
target: Any,
manifest: Manifest,
current_project: str,
) -> Dict[str, Any]:
ctx = DocsRuntimeContext(config, target, manifest, current_project)
# This is not a Mashumaro to_dict call
return ctx.to_dict()
- resolve_doc 处理
核心是返回一个Documentation 类
def resolve_doc(
self,
name: str,
package: Optional[str],
current_project: str,
node_package: str,
) -> Optional[Documentation]:
"""Resolve the given documentation. This follows the same algorithm as
resolve_ref except the is_enabled checks are unnecessary as docs are
always enabled.
"""
candidates = _search_packages(current_project, node_package, package)
for pkg in candidates:
result = self.doc_lookup.find(name, pkg, self)
if result is not None:
return result
return None
- doc_lookup 处理
内部是通过DocLookup 类处理的,实际内部也是manifest 的docs
def perform_lookup(self, unique_id: UniqueID, manifest) -> Documentation:
if unique_id not in manifest.docs:
raise dbt.exceptions.DbtInternalError(
f"Doc {unique_id} found in cache but not found in manifest"
)
return manifest.docs[unique_id]
- 实际上docs manifest 参考
可以看到上边解析部分的处理会结合下边的格式
"doc.dremio_demo_app.__dremio_demo_app__": {
"name": "__dremio_demo_app__",
"resource_type": "doc",
"package_name": "dremio_demo_app",
"path": "dalongdemo.md",
"original_file_path": "models/dalongdemo.md",
"unique_id": "doc.dremio_demo_app.__dremio_demo_app__",
"block_contents": "## this is my pacakge demo\n\n\n### contains \n\n- mymodel-v2\n- mymodel-v3"
},
说明
dbt 的manifest 是一个比较重要的东西,好多地方都会有使用到,后边详细说明下manifest 的内部解析以及执行
参考资料
core/dbt/context/docs.py
core/dbt/contracts/graph/manifest.py
https://docs.getdbt.com/reference/dbt-jinja-functions/doc