Calcite分析 - RelTrait
RelTrait 表示RelNode的物理属性
由RelTraitDef代表RelTrait的类型
/** * RelTrait represents the manifestation of a relational expression trait within * a trait definition. For example, a {@code CallingConvention.JAVA} is a trait * of the {@link ConventionTraitDef} trait definition. * * <h3><a id="EqualsHashCodeNote">Note about equals() and hashCode()</a></h3> * * <p>If all instances of RelTrait for a particular RelTraitDef are defined in * an {@code enum} and no new RelTraits can be introduced at runtime, you need * not override {@link #hashCode()} and {@link #equals(Object)}. If, however, * new RelTrait instances are generated at runtime (e.g. based on state external * to the planner), you must implement {@link #hashCode()} and * {@link #equals(Object)} for proper {@link RelTraitDef#canonize canonization} * of your RelTrait objects.</p> */ public interface RelTrait { //~ Methods ---------------------------------------------------------------- /** * Returns the RelTraitDef that defines this RelTrait. * * @return the RelTraitDef that defines this RelTrait */ RelTraitDef getTraitDef(); }
RelTraitDef,主要可以分为3种,
RelCollationTraitDef,排序
/** * Definition of the ordering trait. * * <p>Ordering is a physical property (i.e. a trait) because it can be changed * without loss of information. The converter to do this is the * {@link org.apache.calcite.rel.core.Sort} operator. * * <p>Unlike other current traits, a {@link RelNode} can have more than one * value of this trait simultaneously. For example, * <code>LogicalTableScan(table=TIME_BY_DAY)</code> might be sorted by * <code>{the_year, the_month, the_date}</code> and also by * <code>{time_id}</code>. We have to allow a RelNode to belong to more than * one RelSubset (these RelSubsets are always in the same set).</p> */ public class RelCollationTraitDef extends RelTraitDef<RelCollation>
RelDistributionTraitDef,分布
/** * Definition of the distribution trait. * * <p>Distribution is a physical property (i.e. a trait) because it can be * changed without loss of information. The converter to do this is the * {@link Exchange} operator. */ public class RelDistributionTraitDef extends RelTraitDef<RelDistribution>
ConventionTraitDef,转换
/** * Definition of the the convention trait. * A new set of conversion information is created for * each planner that registers at least one {@link ConverterRule} instance. * * <p>Conversion data is held in a {@link LoadingCache} * with weak keys so that the JVM's garbage * collector may reclaim the conversion data after the planner itself has been * garbage collected. The conversion information consists of a graph of * conversions (from one calling convention to another) and a map of graph arcs * to {@link ConverterRule}s. */ public class ConventionTraitDef extends RelTraitDef<Convention>
RelTraitDef中主关键的函数是,
/** * Converts the given RelNode to the given RelTrait. * * @param planner the planner requesting the conversion * @param rel RelNode to convert * @param toTrait RelTrait to convert to * @param allowInfiniteCostConverters flag indicating whether infinite cost * converters are allowed * @return a converted RelNode or null if conversion is not possible */ public abstract RelNode convert( RelOptPlanner planner, RelNode rel, T toTrait, boolean allowInfiniteCostConverters); /** * Tests whether the given RelTrait can be converted to another RelTrait. * * @param planner the planner requesting the conversion test * @param fromTrait the RelTrait to convert from * @param toTrait the RelTrait to convert to * @return true if fromTrait can be converted to toTrait */ public abstract boolean canConvert( RelOptPlanner planner, T fromTrait, T toTrait);
RelTrait的调用流程
在这两个函数中,
RelSubset.propagateCostImprovements0
VolcanoPlanner.registerImpl
会调用到checkForSatisfiedConverters,所以当RelNode发生新增或变化时,需要检测一下是否需要根据RelTrait进行convert
void checkForSatisfiedConverters( RelSet set, RelNode rel) { int i = 0; while (i < set.abstractConverters.size()) { //遍历RelSet所有的abstractConverters AbstractConverter converter = set.abstractConverters.get(i); RelNode converted = changeTraitsUsingConverters( rel, converter.getTraitSet()); //试图对RelNode进行转换 if (converted == null) { i++; // couldn't convert this; move on to the next } else { //如果转换成功 if (!isRegistered(converted)) { registerImpl(converted, set); //注册新产生的RelNode } set.abstractConverters.remove(converter); // 删除已经完成转换的abstractConverters } } }
changeTraitsUsingConverters
private RelNode changeTraitsUsingConverters( RelNode rel, RelTraitSet toTraits, boolean allowAbstractConverters) { final RelTraitSet fromTraits = rel.getTraitSet(); //RelNode本身的Traits就是from,toTraits是传入的,代表需要变成啥样 assert fromTraits.size() >= toTraits.size(); //toTraits的个数需要小于等于fromTraits // Traits may build on top of another...for example a collation trait // would typically come after a distribution trait since distribution // destroys collation; so when doing the conversion below we use // fromTraits as the trait of the just previously converted RelNode. // Also, toTraits may have fewer traits than fromTraits, excess traits // will be left as is. Finally, any null entries in toTraits are // ignored. RelNode converted = rel; for (int i = 0; (converted != null) && (i < toTraits.size()); i++) { RelTrait fromTrait = converted.getTraitSet().getTrait(i); final RelTraitDef traitDef = fromTrait.getTraitDef(); RelTrait toTrait = toTraits.getTrait(i); if (toTrait == null) { continue; } if (fromTrait.equals(toTrait)) { //from等于to,不需转换 // No need to convert; it's already correct. continue; } rel = traitDef.convert( //真正的convert this, converted, toTrait, allowInfiniteCostConverters); if (rel != null) { assert rel.getTraitSet().getTrait(traitDef).satisfies(toTrait); //判断一下是否真的转换成功if (rel != null) { register(rel, converted); } } if ((rel == null) && allowAbstractConverters) { RelTraitSet stepTraits = converted.getTraitSet().replace(toTrait); rel = getSubset(converted, stepTraits); } converted = rel; } // make sure final converted traitset subsumes what was required if (converted != null) { assert converted.getTraitSet().satisfies(toTraits); } return converted; }
核心就是调用TraitDef的Convert函数
Convert是个抽象函数,每种不同类型的Trait实现是不一样的,
我们就看下RelCollationTraitDef
/** * Definition of the ordering trait. * * <p>Ordering is a physical property (i.e. a trait) because it can be changed * without loss of information. The converter to do this is the * {@link org.apache.calcite.rel.core.Sort} operator. * * <p>Unlike other current traits, a {@link RelNode} can have more than one * value of this trait simultaneously. For example, * <code>LogicalTableScan(table=TIME_BY_DAY)</code> might be sorted by * <code>{the_year, the_month, the_date}</code> and also by * <code>{time_id}</code>. We have to allow a RelNode to belong to more than * one RelSubset (these RelSubsets are always in the same set).</p> */ public class RelCollationTraitDef extends RelTraitDef<RelCollation> { public static final RelCollationTraitDef INSTANCE = new RelCollationTraitDef(); public RelNode convert( RelOptPlanner planner, RelNode rel, RelCollation toCollation, boolean allowInfiniteCostConverters) { if (toCollation.getFieldCollations().isEmpty()) { // An empty sort doesn't make sense. return null; } // Create a logical sort, then ask the planner to convert its remaining // traits (e.g. convert it to an EnumerableSortRel if rel is enumerable // convention) final Sort sort = LogicalSort.create(rel, toCollation, null, null); //LogicalSort是SingleRel,单输入的RelNode RelNode newRel = planner.register(sort, rel); //因为新产生了RelNode,需要register,加入SubSet,更新cost,importance final RelTraitSet newTraitSet = rel.getTraitSet().replace(toCollation); //完成convention,所以替换成新的trait return newRel; } @Override public boolean canConvert(RelOptPlanner planner, RelCollation fromTrait, RelCollation toTrait, RelNode fromRel) { // Returns true only if we can convert. In this case, we can only convert // if the fromTrait (the input) has fields that the toTrait wants to sort. //判断field数是否匹配,toTrait的每个RelFieldCollation中的field数都需要大于from的fieldCount for (RelFieldCollation field : toTrait.getFieldCollations()) { int index = field.getFieldIndex(); if (index >= fromRel.getRowType().getFieldCount()) { return false; } } return true; } }
Convert里面的replace实现如下,
RelTraitSet类 /** * Returns a trait set consisting of the current set plus a new trait. * * <p>If the set does not contain a trait of the same {@link RelTraitDef}, * the trait is ignored, and this trait set is returned. * * @param trait the new trait * @return New set * @see #plus(RelTrait) */ public RelTraitSet replace( RelTrait trait) { // Quick check for common case if (containsShallow(traits, trait)) { return this; } final RelTraitDef traitDef = trait.getTraitDef(); //替换的前提是TraitDef相同,RelCollationTraitDef的trait只能替换相同def的trait int index = findIndex(traitDef); if (index < 0) { // Trait is not present. Ignore it. return this; } return replace(index, trait); }
再看下RelDistributionTraitDef的实现,除了创建的是Exchange以外,和Collation相同
public RelNode convert(RelOptPlanner planner, RelNode rel, RelDistribution toDistribution, boolean allowInfiniteCostConverters) { if (toDistribution == RelDistributions.ANY) { return rel; } //对于distribution,就是创建LogicalExchange final Exchange exchange = LogicalExchange.create(rel, toDistribution); RelNode newRel = planner.register(exchange, rel); final RelTraitSet newTraitSet = rel.getTraitSet().replace(toDistribution); if (!newRel.getTraitSet().equals(newTraitSet)) { newRel = planner.changeTraits(newRel, newTraitSet); } return newRel; }
ConventionTraitDef
对于这种情况,会复杂些,
Convention
RelTrait的子类,增加如下接口,
/** * Calling convention trait. */ public interface Convention extends RelTrait { /** * Convention that for a relational expression that does not support any * convention. It is not implementable, and has to be transformed to * something else in order to be implemented. * * <p>Relational expressions generally start off in this form.</p> * * <p>Such expressions always have infinite cost.</p> */ Convention NONE = new Impl("NONE", RelNode.class); Class getInterface(); String getName(); /** * Returns whether we should convert from this convention to * {@code toConvention}. Used by {@link ConventionTraitDef}. * * @param toConvention Desired convention to convert to * @return Whether we should convert from this convention to toConvention */ boolean canConvertConvention(Convention toConvention); /** * Returns whether we should convert from this trait set to the other trait * set. * * <p>The convention decides whether it wants to handle other trait * conversions, e.g. collation, distribution, etc. For a given convention, we * will only add abstract converters to handle the trait (convention, * collation, distribution, etc.) conversions if this function returns true. * * @param fromTraits Traits of the RelNode that we are converting from * @param toTraits Target traits * @return Whether we should add converters */ boolean useAbstractConvertersForConversion(RelTraitSet fromTraits, RelTraitSet toTraits);
ConversionData
这里复杂是因为,不光一个convention,而且很多convention构成一个DAG
而且convention之间的Edge,也可能会包含多个Rule
可以看到ConventionDef是RelDistributionTraitDef,Collation的通用形式
/** Workspace for converting from one convention to another. */ private static final class ConversionData { final DirectedGraph<Convention, DefaultEdge> conversionGraph = //记录Convention之间的关系 DefaultDirectedGraph.create(); /** * For a given source/target convention, there may be several possible * conversion rules. Maps {@link DefaultEdge} to a * collection of {@link ConverterRule} objects. */ final Multimap<Pair<Convention, Convention>, ConverterRule> mapArcToConverterRule = //记录某一Edge上对应的rules HashMultimap.create(); private Graphs.FrozenGraph<Convention, DefaultEdge> pathMap; public List<List<Convention>> getPaths( Convention fromConvention, Convention toConvention) { return getPathMap().getPaths(fromConvention, toConvention); } private Graphs.FrozenGraph<Convention, DefaultEdge> getPathMap() { //获取Graph的静态Snapshot if (pathMap == null) { pathMap = Graphs.makeImmutable(conversionGraph); } return pathMap; } public List<Convention> getShortestPath( //获取Convention之间的最短路径 Convention fromConvention, Convention toConvention) { return getPathMap().getShortestPath(fromConvention, toConvention); } } }
ConvertRule
和普通的RelOptRule差不多,
主要触发逻辑在onMatch中,主要调用convert
/** * Returns true if this rule can convert <em>any</em> relational expression * of the input convention. * * <p>The union-to-java converter, for example, is not guaranteed, because * it only works on unions.</p> * * @return {@code true} if this rule can convert <em>any</em> relational * expression */ public boolean isGuaranteed() { return false; } public void onMatch(RelOptRuleCall call) { RelNode rel = call.rel(0); if (rel.getTraitSet().contains(inTrait)) { final RelNode converted = convert(rel); if (converted != null) { call.transformTo(converted); } } }
看一个convert实现的例子,
class EnumerableSortRule extends ConverterRule { EnumerableSortRule() { super(Sort.class, Convention.NONE, EnumerableConvention.INSTANCE, "EnumerableSortRule"); } public RelNode convert(RelNode rel) { final Sort sort = (Sort) rel; if (sort.offset != null || sort.fetch != null) { return null; } final RelNode input = sort.getInput(); return EnumerableSort.create( convert( input, input.getTraitSet().replace(EnumerableConvention.INSTANCE)), sort.getCollation(), null, null); } }
ConventionTraitDef
最后看下ConventionTraitDef的核心逻辑,
// implement RelTraitDef public RelNode convert( RelOptPlanner planner, RelNode rel, Convention toConvention, boolean allowInfiniteCostConverters) { final RelMetadataQuery mq = rel.getCluster().getMetadataQuery(); final ConversionData conversionData = getConversionData(planner); final Convention fromConvention = rel.getConvention(); List<List<Convention>> conversionPaths = //根据from和to,取出Paths conversionData.getPaths(fromConvention, toConvention); loop: for (List<Convention> conversionPath : conversionPaths) { RelNode converted = rel; Convention previous = null; for (Convention arc : conversionPath) { if (planner.getCost(converted, mq).isInfinite() //如果cost infinite,忽略 && !allowInfiniteCostConverters) { continue loop; } if (previous != null) { converted = changeConvention( //变换Convention converted, previous, arc, conversionData.mapArcToConverterRule); if (converted == null) { throw new AssertionError("Converter from " + previous + " to " + arc + " guaranteed that it could convert any relexp"); } } previous = arc; } return converted; } return null; } /** * Tries to convert a relational expression to the target convention of an * arc. */ private RelNode changeConvention( RelNode rel, Convention source, Convention target, final Multimap<Pair<Convention, Convention>, ConverterRule> mapArcToConverterRule) { // Try to apply each converter rule for this arc's source/target calling // conventions. final Pair<Convention, Convention> key = Pair.of(source, target); for (ConverterRule rule : mapArcToConverterRule.get(key)) { //取出Path或edge对应的Rules assert rule.getInTrait() == source; assert rule.getOutTrait() == target; RelNode converted = rule.convert(rel); //逐个Rule Convert if (converted != null) { return converted; } } return null; }