dremio 的SourceCapabilities 能力简单说明
SourceCapabilities 提供了对于存储插件能力的一些说明,在dremio 的执行计划中占有比较大的作用,以下简单说明下
SourceCapabilities 类定义
每个存储扩展的实现都有一个getSourceCapabilities 的方法,具体SourceCapabilities 是一个包含了各类CapabilityValue 实现的子类的集合
public final class SourceCapabilities {
public static final SourceCapabilities NONE =
new SourceCapabilities(ImmutableList.<CapabilityValue<?, ?>>of());
public static final BooleanCapability REQUIRES_HARD_AFFINITY =
new BooleanCapability("requires_hard_affinity", false);
public static final BooleanCapability SUPPORTS_CONTAINS =
new BooleanCapability("supports_contains_operation", false);
// SourceCapabilities 已经包含了一些内部实现,但是我们也可以自己扩展
// Indicates that the plugin disallows ScanCrel nodes in plans produced for it. By making the cost
// infinite,
// the planner is forced to substitute a ScanCrel for a plugin-specific node.
public static final BooleanCapability TREAT_CALCITE_SCAN_COST_AS_INFINITE =
new BooleanCapability("treat_calcite_scan_cost_as_infinite", false);
// Indicates that the plugin is capable of pushing down sub queries.
public static final BooleanCapability SUBQUERY_PUSHDOWNABLE =
new BooleanCapability("subquery_pushdownable", false);
// Indicates that the plugin is capable of pushing down correlated sub queries.
public static final BooleanCapability CORRELATED_SUBQUERY_PUSHDOWN =
new BooleanCapability("correlated_pushdownable", true);
public static final BooleanCapability VARCHARS_WITH_WIDTH =
new BooleanCapability("varchars_with_width", false);
// Indicates to use the native privileges set directly on the source.
public static final BooleanCapability USE_NATIVE_PRIVILEGES =
new BooleanCapability("use_native_privileges", false);
// Any plugin that supports Iceberg tables can support this capabilities if the planner can depend
// on partition stats while planning
private static BooleanCapability CAN_USE_PARTITION_STATS =
new BooleanCapability("can_use_partition_stats", false);
private final ImmutableMap<Capability<?>, CapabilityValue<?, ?>> values;
@JsonCreator
public SourceCapabilities(
@JsonProperty("capabilitiesList") List<CapabilityValue<?, ?>> capabilities) {
if (capabilities == null) {
capabilities = ImmutableList.of();
}
ImmutableMap.Builder<Capability<?>, CapabilityValue<?, ?>> builder = ImmutableMap.builder();
for (CapabilityValue<?, ?> c : capabilities) {
builder.put(c.getCapability(), c);
}
this.values = builder.build();
}
public SourceCapabilities(CapabilityValue<?, ?>
ImmutableMap.Builder<Capability<?>, CapabilityValue<?, ?>> builder = ImmutableMap.builder();
for (CapabilityValue<?, ?> c : capabilities) {
builder.put(c.getCapability(), c);
}
this.values = builder.build();
}
public long getCapability(LongCapability capability) {
CapabilityValue<?, ?> value = values.get(capability);
if (value != null && value.getCapability().equals(capability)) {
return (Long) value.getValue();
} else {
return capability.getDefaultValue();
}
}
public boolean getCapability(BooleanCapability capability) {
CapabilityValue<?, ?> value = values.get(capability);
if (value != null && value.getCapability().equals(capability)) {
return (Boolean) value.getValue();
} else {
return capability.getDefaultValue();
}
}
public double getCapability(DoubleCapability capability) {
CapabilityValue<?, ?> value = values.get(capability);
if (value != null && value.getCapability().equals(capability)) {
return (Double) value.getValue();
} else {
return capability.getDefaultValue();
}
}
public String getCapability(StringCapability capability) {
CapabilityValue<?, ?> value = values.get(capability);
if (value != null && value.getCapability().equals(capability)) {
return (String) value.getValue();
} else {
return capability.getDefaultValue();
}
}
// for serialization.
@JsonInclude(JsonInclude.Include.NON_EMPTY)
public List<CapabilityValue<?, ?>> getCapabilitiesList() {
return ImmutableList.copyOf(values.values());
}
@Override
public boolean equals(final Object other) {
if (!(other instanceof SourceCapabilities)) {
return false;
}
SourceCapabilities castOther = (SourceCapabilities) other;
return Objects.equal(values, castOther.values);
}
@Override
public int hashCode() {
return Objects.hashCode(values);
}
// For testing purposes only. Don't use elsewhere
@VisibleForTesting
public static void setCanUsePartitionStatsCapability(boolean capability) {
CAN_USE_PARTITION_STATS = new BooleanCapability("can_use_partition_stats", capability);
}
@VisibleForTesting
public static BooleanCapability getCanUsePartitionStats() {
return CAN_USE_PARTITION_STATS;
}
}
相关实现
- 一些实现
以前也说明,存储插件是都要实现此接口能力的,以下是一个开源里边的,不包含jdbc的
DataplanePlugin的(nessie 类数据源)
public SourceCapabilities getSourceCapabilities() {
if (this.context
.getOptionManager()
.getOption(VERSIONED_SOURCE_CAPABILITIES_USE_NATIVE_PRIVILEGES_ENABLED)) {
return new SourceCapabilities(
new BooleanCapabilityValue(
USE_NATIVE_PRIVILEGES,
this.pluginConfig.useNativePrivileges(this.context.getOptionManager())));
}
return SourceCapabilities.NONE;
}
jdbc的 比较多,因为可优化点比较多
public SourceCapabilities getSourceCapabilities() {
return this.dialect == null ? new SourceCapabilities(new CapabilityValue[0]) : new SourceCapabilities(new CapabilityValue[]{new BooleanCapabilityValue(SourceCapabilities.TREAT_CALCITE_SCAN_COST_AS_INFINITE, true), new BooleanCapabilityValue(SourceCapabilities.SUBQUERY_PUSHDOWNABLE, this.dialect.supportsSubquery()), new BooleanCapabilityValue(SourceCapabilities.CORRELATED_SUBQUERY_PUSHDOWN, this.dialect.supportsCorrelatedSubquery()), new BooleanCapabilityValue(REQUIRE_TRIMS_ON_CHARS, this.dialect.requiresTrimOnChars()), new BooleanCapabilityValue(COERCE_TIMES_TO_UTC, this.dialect.coerceTimesToUTC()), new BooleanCapabilityValue(COERCE_TIMESTAMPS_TO_UTC, this.dialect.coerceTimestampsToUTC()), new BooleanCapabilityValue(ADJUST_DATE_TIMEZONE, this.dialect.adjustDateTimezone())});
}
使用
- 直接使用的
目前直接使用的,如下图几个类存储插件,实际上可以分为两大类,一个是反射存储插件,一个是托管存储插件(实际上就是对于实际存储插件的包装,实现管理能力),到实际查询的时候反射的也会被托管存储插件进行包装的,成为一个StoragePluginId 类
- StoragePluginId 类getCapabilities的处理
对于实际的计划处理会使用到,一些使用参考下图,包含了逻辑计划以及物理计划
说明
以上是对于SourceCapabilities的一个简单说明,实际上在dremio 内部优化中还是比较重要的,影响dremio 的查询效率,完整的一些使用后边结合逻辑计划以及物理计划的处理进行说明
参考资料
services/namespace/src/main/java/com/dremio/service/namespace/capabilities/SourceCapabilities.java
services/namespace/src/main/java/com/dremio/service/namespace/capabilities/Capability.java
services/namespace/src/main/java/com/dremio/service/namespace/capabilities/CapabilityValue.java
sabot/kernel/src/main/java/com/dremio/exec/store/StoragePlugin.java
sabot/kernel/src/main/java/com/dremio/exec/catalog/StoragePluginId.java