dremio 的SourceCapabilities 能力简单说明

SourceCapabilities 提供了对于存储插件能力的一些说明,在dremio 的执行计划中占有比较大的作用,以下简单说明下

SourceCapabilities 类定义

每个存储扩展的实现都有一个getSourceCapabilities 的方法,具体SourceCapabilities 是一个包含了各类CapabilityValue 实现的子类的集合

public final class SourceCapabilities {
 
  public static final SourceCapabilities NONE =
      new SourceCapabilities(ImmutableList.<CapabilityValue<?, ?>>of());
 
  public static final BooleanCapability REQUIRES_HARD_AFFINITY =
      new BooleanCapability("requires_hard_affinity", false);
  public static final BooleanCapability SUPPORTS_CONTAINS =
      new BooleanCapability("supports_contains_operation", false);
  // SourceCapabilities 已经包含了一些内部实现,但是我们也可以自己扩展
  // Indicates that the plugin disallows ScanCrel nodes in plans produced for it. By making the cost
  // infinite,
  // the planner is forced to substitute a ScanCrel for a plugin-specific node.
  public static final BooleanCapability TREAT_CALCITE_SCAN_COST_AS_INFINITE =
      new BooleanCapability("treat_calcite_scan_cost_as_infinite", false);
 
  // Indicates that the plugin is capable of pushing down sub queries.
  public static final BooleanCapability SUBQUERY_PUSHDOWNABLE =
      new BooleanCapability("subquery_pushdownable", false);
 
  // Indicates that the plugin is capable of pushing down correlated sub queries.
  public static final BooleanCapability CORRELATED_SUBQUERY_PUSHDOWN =
      new BooleanCapability("correlated_pushdownable", true);
 
  public static final BooleanCapability VARCHARS_WITH_WIDTH =
      new BooleanCapability("varchars_with_width", false);
 
  // Indicates to use the native privileges set directly on the source.
  public static final BooleanCapability USE_NATIVE_PRIVILEGES =
      new BooleanCapability("use_native_privileges", false);
 
  // Any plugin that supports Iceberg tables can support this capabilities if the planner can depend
  // on partition stats while planning
  private static BooleanCapability CAN_USE_PARTITION_STATS =
      new BooleanCapability("can_use_partition_stats", false);
 
  private final ImmutableMap<Capability<?>, CapabilityValue<?, ?>> values;
 
  @JsonCreator
  public SourceCapabilities(
      @JsonProperty("capabilitiesList") List<CapabilityValue<?, ?>> capabilities) {
    if (capabilities == null) {
      capabilities = ImmutableList.of();
    }
    ImmutableMap.Builder<Capability<?>, CapabilityValue<?, ?>> builder = ImmutableMap.builder();
    for (CapabilityValue<?, ?> c : capabilities) {
      builder.put(c.getCapability(), c);
    }
    this.values = builder.build();
  }
 
  public SourceCapabilities(CapabilityValue<?, ?>... capabilities) {
    ImmutableMap.Builder<Capability<?>, CapabilityValue<?, ?>> builder = ImmutableMap.builder();
    for (CapabilityValue<?, ?> c : capabilities) {
      builder.put(c.getCapability(), c);
    }
    this.values = builder.build();
  }
 
  public long getCapability(LongCapability capability) {
    CapabilityValue<?, ?> value = values.get(capability);
    if (value != null && value.getCapability().equals(capability)) {
      return (Long) value.getValue();
    } else {
      return capability.getDefaultValue();
    }
  }
 
  public boolean getCapability(BooleanCapability capability) {
    CapabilityValue<?, ?> value = values.get(capability);
    if (value != null && value.getCapability().equals(capability)) {
      return (Boolean) value.getValue();
    } else {
      return capability.getDefaultValue();
    }
  }
 
  public double getCapability(DoubleCapability capability) {
    CapabilityValue<?, ?> value = values.get(capability);
    if (value != null && value.getCapability().equals(capability)) {
      return (Double) value.getValue();
    } else {
      return capability.getDefaultValue();
    }
  }
 
  public String getCapability(StringCapability capability) {
    CapabilityValue<?, ?> value = values.get(capability);
    if (value != null && value.getCapability().equals(capability)) {
      return (String) value.getValue();
    } else {
      return capability.getDefaultValue();
    }
  }
 
  // for serialization.
  @JsonInclude(JsonInclude.Include.NON_EMPTY)
  public List<CapabilityValue<?, ?>> getCapabilitiesList() {
    return ImmutableList.copyOf(values.values());
  }
 
  @Override
  public boolean equals(final Object other) {
    if (!(other instanceof SourceCapabilities)) {
      return false;
    }
    SourceCapabilities castOther = (SourceCapabilities) other;
    return Objects.equal(values, castOther.values);
  }
 
  @Override
  public int hashCode() {
    return Objects.hashCode(values);
  }
 
  // For testing purposes only. Don't use elsewhere
  @VisibleForTesting
  public static void setCanUsePartitionStatsCapability(boolean capability) {
    CAN_USE_PARTITION_STATS = new BooleanCapability("can_use_partition_stats", capability);
  }
 
  @VisibleForTesting
  public static BooleanCapability getCanUsePartitionStats() {
    return CAN_USE_PARTITION_STATS;
  }
}

相关实现

  • 一些实现

以前也说明,存储插件是都要实现此接口能力的,以下是一个开源里边的,不包含jdbc的

DataplanePlugin的(nessie 类数据源)

public SourceCapabilities getSourceCapabilities() {
    if (this.context
        .getOptionManager()
        .getOption(VERSIONED_SOURCE_CAPABILITIES_USE_NATIVE_PRIVILEGES_ENABLED)) {
      return new SourceCapabilities(
          new BooleanCapabilityValue(
              USE_NATIVE_PRIVILEGES,
              this.pluginConfig.useNativePrivileges(this.context.getOptionManager())));
    }
 
    return SourceCapabilities.NONE;
  }

jdbc的 比较多,因为可优化点比较多

public SourceCapabilities getSourceCapabilities() {
   return this.dialect == null ? new SourceCapabilities(new CapabilityValue[0]) : new SourceCapabilities(new CapabilityValue[]{new BooleanCapabilityValue(SourceCapabilities.TREAT_CALCITE_SCAN_COST_AS_INFINITE, true), new BooleanCapabilityValue(SourceCapabilities.SUBQUERY_PUSHDOWNABLE, this.dialect.supportsSubquery()), new BooleanCapabilityValue(SourceCapabilities.CORRELATED_SUBQUERY_PUSHDOWN, this.dialect.supportsCorrelatedSubquery()), new BooleanCapabilityValue(REQUIRE_TRIMS_ON_CHARS, this.dialect.requiresTrimOnChars()), new BooleanCapabilityValue(COERCE_TIMES_TO_UTC, this.dialect.coerceTimesToUTC()), new BooleanCapabilityValue(COERCE_TIMESTAMPS_TO_UTC, this.dialect.coerceTimestampsToUTC()), new BooleanCapabilityValue(ADJUST_DATE_TIMEZONE, this.dialect.adjustDateTimezone())});
}

使用

  • 直接使用的

目前直接使用的,如下图几个类存储插件,实际上可以分为两大类,一个是反射存储插件,一个是托管存储插件(实际上就是对于实际存储插件的包装,实现管理能力),到实际查询的时候反射的也会被托管存储插件进行包装的,成为一个StoragePluginId 类

  • StoragePluginId 类getCapabilities的处理

对于实际的计划处理会使用到,一些使用参考下图,包含了逻辑计划以及物理计划

说明

以上是对于SourceCapabilities的一个简单说明,实际上在dremio 内部优化中还是比较重要的,影响dremio 的查询效率,完整的一些使用后边结合逻辑计划以及物理计划的处理进行说明

参考资料

services/namespace/src/main/java/com/dremio/service/namespace/capabilities/SourceCapabilities.java
services/namespace/src/main/java/com/dremio/service/namespace/capabilities/Capability.java
services/namespace/src/main/java/com/dremio/service/namespace/capabilities/CapabilityValue.java
sabot/kernel/src/main/java/com/dremio/exec/store/StoragePlugin.java
sabot/kernel/src/main/java/com/dremio/exec/catalog/StoragePluginId.java

posted on 2024-05-15 07:56  荣锋亮  阅读(18)  评论(0编辑  收藏  举报

导航