You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@calcite.apache.org by hy...@apache.org on 2020/06/09 00:55:30 UTC

[calcite] branch master updated: [CALCITE-4041] Support trait propagation for EnumerableCorrelate

This is an automated email from the ASF dual-hosted git repository.

hyuan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/calcite.git


The following commit(s) were added to refs/heads/master by this push:
     new 61cf2bf  [CALCITE-4041] Support trait propagation for EnumerableCorrelate
61cf2bf is described below

commit 61cf2bf30d0496cf795fd0f5d4cf9dc9f468c9b5
Author: rubenada <ru...@gmail.com>
AuthorDate: Fri Jun 5 14:45:31 2020 +0200

    [CALCITE-4041] Support trait propagation for EnumerableCorrelate
    
    Close #2005
---
 .../adapter/enumerable/EnumerableCorrelate.java    |  49 +++++++++
 .../org/apache/calcite/test/TopDownOptTest.java    |  68 ++++++++++++
 .../org/apache/calcite/test/TopDownOptTest.xml     | 120 +++++++++++++++++++++
 3 files changed, 237 insertions(+)

diff --git a/core/src/main/java/org/apache/calcite/adapter/enumerable/EnumerableCorrelate.java b/core/src/main/java/org/apache/calcite/adapter/enumerable/EnumerableCorrelate.java
index aa30f14..cc1e56c 100644
--- a/core/src/main/java/org/apache/calcite/adapter/enumerable/EnumerableCorrelate.java
+++ b/core/src/main/java/org/apache/calcite/adapter/enumerable/EnumerableCorrelate.java
@@ -21,9 +21,13 @@ import org.apache.calcite.linq4j.tree.Expression;
 import org.apache.calcite.linq4j.tree.Expressions;
 import org.apache.calcite.linq4j.tree.ParameterExpression;
 import org.apache.calcite.linq4j.tree.Primitive;
+import org.apache.calcite.plan.DeriveMode;
 import org.apache.calcite.plan.RelOptCluster;
 import org.apache.calcite.plan.RelTraitSet;
+import org.apache.calcite.rel.RelCollation;
 import org.apache.calcite.rel.RelCollationTraitDef;
+import org.apache.calcite.rel.RelCollations;
+import org.apache.calcite.rel.RelFieldCollation;
 import org.apache.calcite.rel.RelNode;
 import org.apache.calcite.rel.core.Correlate;
 import org.apache.calcite.rel.core.CorrelationId;
@@ -32,11 +36,13 @@ import org.apache.calcite.rel.metadata.RelMdCollation;
 import org.apache.calcite.rel.metadata.RelMetadataQuery;
 import org.apache.calcite.util.BuiltInMethod;
 import org.apache.calcite.util.ImmutableBitSet;
+import org.apache.calcite.util.Pair;
 
 import com.google.common.collect.ImmutableList;
 
 import java.lang.reflect.Modifier;
 import java.lang.reflect.Type;
+import java.util.List;
 
 /** Implementation of {@link org.apache.calcite.rel.core.Correlate} in
  * {@link org.apache.calcite.adapter.enumerable.EnumerableConvention enumerable calling convention}. */
@@ -81,6 +87,49 @@ public class EnumerableCorrelate extends Correlate
         traitSet, left, right, correlationId, requiredColumns, joinType);
   }
 
+  @Override public Pair<RelTraitSet, List<RelTraitSet>> passThroughTraits(
+      final RelTraitSet required) {
+    final RelCollation collation = required.getCollation();
+    if (collation == null || collation == RelCollations.EMPTY) {
+      return null;
+    }
+
+    // EnumerableCorrelate traits passdown shall only pass through collation to left input.
+    // This is because for EnumerableCorrelate always uses left input as the outer loop,
+    // thus only left input can preserve ordering.
+
+    for (RelFieldCollation relFieldCollation : collation.getFieldCollations()) {
+      // If field collation belongs to right input: bail out.
+      if (relFieldCollation.getFieldIndex() >= getLeft().getRowType().getFieldCount()) {
+        return null;
+      }
+    }
+
+    final RelTraitSet passThroughTraitSet = traitSet.replace(collation);
+    return Pair.of(passThroughTraitSet,
+        ImmutableList.of(
+            passThroughTraitSet,
+            passThroughTraitSet.replace(RelCollations.EMPTY)));
+  }
+
+  @Override public Pair<RelTraitSet, List<RelTraitSet>> deriveTraits(
+      final RelTraitSet childTraits, final int childId) {
+    // should only derive traits (limited to collation for now) from left input.
+    assert childId == 0;
+
+    final RelCollation collation = childTraits.getCollation();
+    if (collation == null || collation == RelCollations.EMPTY) {
+      return null;
+    }
+
+    final RelTraitSet traits = traitSet.replace(collation);
+    return Pair.of(traits, ImmutableList.of(traits, right.getTraitSet()));
+  }
+
+  @Override public DeriveMode getDeriveMode() {
+    return DeriveMode.LEFT_FIRST;
+  }
+
   public Result implement(EnumerableRelImplementor implementor,
       Prefer pref) {
     final BlockBuilder builder = new BlockBuilder();
diff --git a/core/src/test/java/org/apache/calcite/test/TopDownOptTest.java b/core/src/test/java/org/apache/calcite/test/TopDownOptTest.java
index fdac1d3..ce73180 100644
--- a/core/src/test/java/org/apache/calcite/test/TopDownOptTest.java
+++ b/core/src/test/java/org/apache/calcite/test/TopDownOptTest.java
@@ -25,6 +25,8 @@ import org.apache.calcite.plan.volcano.VolcanoPlanner;
 import org.apache.calcite.rel.RelCollationTraitDef;
 import org.apache.calcite.rel.rules.JoinCommuteRule;
 import org.apache.calcite.rel.rules.JoinPushThroughJoinRule;
+import org.apache.calcite.rel.rules.JoinToCorrelateRule;
+import org.apache.calcite.rel.rules.SemiJoinRule;
 import org.apache.calcite.rel.rules.SortJoinCopyRule;
 import org.apache.calcite.rel.rules.SortJoinTransposeRule;
 import org.apache.calcite.rel.rules.SortProjectTransposeRule;
@@ -134,6 +136,72 @@ class TopDownOptTest extends RelOptTestBase {
         .check();
   }
 
+  // Order by left field(s): push down sort to left input.
+  @Test void testCorrelateInnerJoinDeriveLeft() {
+    final String sql = "select * from emp e\n"
+        + "join dept d on e.deptno=d.deptno\n"
+        + "order by e.ename";
+    Query.create(sql)
+        .addRule(JoinToCorrelateRule.INSTANCE)
+        .removeRule(EnumerableRules.ENUMERABLE_JOIN_RULE)
+        .removeRule(EnumerableRules.ENUMERABLE_MERGE_JOIN_RULE)
+        .removeRule(EnumerableRules.ENUMERABLE_SORT_RULE)
+        .check();
+  }
+
+  // Order by contains right field: sort cannot be pushed down.
+  @Test void testCorrelateInnerJoinNoDerive() {
+    final String sql = "select * from emp e\n"
+        + "join dept d on e.deptno=d.deptno\n"
+        + "order by e.ename, d.name";
+    Query.create(sql)
+        .addRule(JoinToCorrelateRule.INSTANCE)
+        .removeRule(EnumerableRules.ENUMERABLE_JOIN_RULE)
+        .removeRule(EnumerableRules.ENUMERABLE_MERGE_JOIN_RULE)
+        .removeRule(EnumerableRules.ENUMERABLE_SORT_RULE)
+        .check();
+  }
+
+  // Order by left field(s): push down sort to left input.
+  @Test void testCorrelateLeftJoinDeriveLeft() {
+    final String sql = "select * from emp e\n"
+        + "left join dept d on e.deptno=d.deptno\n"
+        + "order by e.ename";
+    Query.create(sql)
+        .addRule(JoinToCorrelateRule.INSTANCE)
+        .removeRule(EnumerableRules.ENUMERABLE_JOIN_RULE)
+        .removeRule(EnumerableRules.ENUMERABLE_MERGE_JOIN_RULE)
+        .removeRule(EnumerableRules.ENUMERABLE_SORT_RULE)
+        .check();
+  }
+
+  // Order by contains right field: sort cannot be pushed down.
+  @Test void testCorrelateLeftJoinNoDerive() {
+    final String sql = "select * from emp e\n"
+        + "left join dept d on e.deptno=d.deptno\n"
+        + "order by e.ename, d.name";
+    Query.create(sql)
+        .addRule(JoinToCorrelateRule.INSTANCE)
+        .removeRule(EnumerableRules.ENUMERABLE_JOIN_RULE)
+        .removeRule(EnumerableRules.ENUMERABLE_MERGE_JOIN_RULE)
+        .removeRule(EnumerableRules.ENUMERABLE_SORT_RULE)
+        .check();
+  }
+
+  // Order by left field(s): push down sort to left input.
+  @Test void testCorrelateSemiJoinDeriveLeft() {
+    final String sql = "select * from dept d\n"
+        + "where exists (select 1 from emp e where e.deptno=d.deptno)\n"
+        + "order by d.name";
+    Query.create(sql)
+        .addRule(JoinToCorrelateRule.INSTANCE)
+        .addRule(SemiJoinRule.JOIN)
+        .removeRule(EnumerableRules.ENUMERABLE_JOIN_RULE)
+        .removeRule(EnumerableRules.ENUMERABLE_MERGE_JOIN_RULE)
+        .removeRule(EnumerableRules.ENUMERABLE_SORT_RULE)
+        .check();
+  }
+
   // test if "order by mgr desc nulls last" can be pushed through the projection ("select mgr").
   @Test void testSortProject() {
     final String sql = "select mgr from sales.emp order by mgr desc nulls last";
diff --git a/core/src/test/resources/org/apache/calcite/test/TopDownOptTest.xml b/core/src/test/resources/org/apache/calcite/test/TopDownOptTest.xml
index 135dee9..e151e29 100644
--- a/core/src/test/resources/org/apache/calcite/test/TopDownOptTest.xml
+++ b/core/src/test/resources/org/apache/calcite/test/TopDownOptTest.xml
@@ -1116,4 +1116,124 @@ EnumerableProject(CONTACTNO=[$0], EMAIL=[$3], ACCTNO=[$10], TYPE=[$11])
 ]]>
         </Resource>
     </TestCase>
+  <TestCase name="testCorrelateInnerJoinDeriveLeft">
+    <Resource name="sql">
+      <![CDATA[select * from emp e join dept d on e.deptno=d.deptno order by e.ename]]>
+    </Resource>
+    <Resource name="planBefore">
+      <![CDATA[
+LogicalSort(sort0=[$1], dir0=[ASC])
+  LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8], DEPTNO0=[$9], NAME=[$10])
+    LogicalJoin(condition=[=($7, $9)], joinType=[inner])
+      LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+      LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+]]>
+    </Resource>
+    <Resource name="planAfter">
+      <![CDATA[
+EnumerableCorrelate(correlation=[$cor0], joinType=[inner], requiredColumns=[{7}])
+  EnumerableSort(sort0=[$1], dir0=[ASC])
+    EnumerableTableScan(table=[[CATALOG, SALES, EMP]])
+  EnumerableFilter(condition=[=($cor0.DEPTNO, $0)])
+    EnumerableTableScan(table=[[CATALOG, SALES, DEPT]])
+]]>
+    </Resource>
+  </TestCase>
+  <TestCase name="testCorrelateInnerJoinNoDerive">
+    <Resource name="sql">
+      <![CDATA[select * from emp e join dept d on e.deptno=d.deptno order by e.ename, d.name]]>
+    </Resource>
+    <Resource name="planBefore">
+      <![CDATA[
+LogicalSort(sort0=[$1], sort1=[$10], dir0=[ASC], dir1=[ASC])
+  LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8], DEPTNO0=[$9], NAME=[$10])
+    LogicalJoin(condition=[=($7, $9)], joinType=[inner])
+      LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+      LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+]]>
+    </Resource>
+    <Resource name="planAfter">
+      <![CDATA[
+EnumerableSort(sort0=[$1], sort1=[$10], dir0=[ASC], dir1=[ASC])
+  EnumerableCorrelate(correlation=[$cor0], joinType=[inner], requiredColumns=[{7}])
+    EnumerableTableScan(table=[[CATALOG, SALES, EMP]])
+    EnumerableFilter(condition=[=($cor0.DEPTNO, $0)])
+      EnumerableTableScan(table=[[CATALOG, SALES, DEPT]])
+]]>
+    </Resource>
+  </TestCase>
+  <TestCase name="testCorrelateLeftJoinDeriveLeft">
+    <Resource name="sql">
+      <![CDATA[select * from emp e left join dept d on e.deptno=d.deptno order by e.ename]]>
+    </Resource>
+    <Resource name="planBefore">
+      <![CDATA[
+LogicalSort(sort0=[$1], dir0=[ASC])
+  LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8], DEPTNO0=[$9], NAME=[$10])
+    LogicalJoin(condition=[=($7, $9)], joinType=[left])
+      LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+      LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+]]>
+    </Resource>
+    <Resource name="planAfter">
+      <![CDATA[
+EnumerableCorrelate(correlation=[$cor0], joinType=[left], requiredColumns=[{7}])
+  EnumerableSort(sort0=[$1], dir0=[ASC])
+    EnumerableTableScan(table=[[CATALOG, SALES, EMP]])
+  EnumerableFilter(condition=[=($cor0.DEPTNO, $0)])
+    EnumerableTableScan(table=[[CATALOG, SALES, DEPT]])
+]]>
+    </Resource>
+  </TestCase>
+  <TestCase name="testCorrelateLeftJoinNoDerive">
+    <Resource name="sql">
+      <![CDATA[select * from emp e left join dept d on e.deptno=d.deptno order by e.ename, d.name]]>
+    </Resource>
+    <Resource name="planBefore">
+      <![CDATA[
+LogicalSort(sort0=[$1], sort1=[$10], dir0=[ASC], dir1=[ASC])
+  LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8], DEPTNO0=[$9], NAME=[$10])
+    LogicalJoin(condition=[=($7, $9)], joinType=[left])
+      LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+      LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+]]>
+    </Resource>
+    <Resource name="planAfter">
+      <![CDATA[
+EnumerableSort(sort0=[$1], sort1=[$10], dir0=[ASC], dir1=[ASC])
+  EnumerableCorrelate(correlation=[$cor0], joinType=[left], requiredColumns=[{7}])
+    EnumerableTableScan(table=[[CATALOG, SALES, EMP]])
+    EnumerableFilter(condition=[=($cor0.DEPTNO, $0)])
+      EnumerableTableScan(table=[[CATALOG, SALES, DEPT]])
+]]>
+    </Resource>
+  </TestCase>
+  <TestCase name="testCorrelateSemiJoinDeriveLeft">
+    <Resource name="sql">
+      <![CDATA[select * from dept d where exists
+            (select 1 from emp e where e.deptno=d.deptno) order by d.name]]>
+    </Resource>
+    <Resource name="planBefore">
+      <![CDATA[
+LogicalSort(sort0=[$1], dir0=[ASC])
+  LogicalProject(DEPTNO=[$0], NAME=[$1])
+    LogicalProject(DEPTNO=[$0], NAME=[$1], DEPTNO0=[CAST($2):INTEGER], $f1=[CAST($3):BOOLEAN])
+      LogicalJoin(condition=[=($0, $2)], joinType=[inner])
+        LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+        LogicalAggregate(group=[{0}], agg#0=[MIN($1)])
+          LogicalProject(DEPTNO=[$7], $f0=[true])
+            LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+]]>
+    </Resource>
+    <Resource name="planAfter">
+      <![CDATA[
+EnumerableCorrelate(correlation=[$cor2], joinType=[semi], requiredColumns=[{0}])
+  EnumerableSort(sort0=[$1], dir0=[ASC])
+    EnumerableTableScan(table=[[CATALOG, SALES, DEPT]])
+  EnumerableFilter(condition=[=($cor2.DEPTNO, $0)])
+    EnumerableProject(DEPTNO=[$7], $f0=[true])
+      EnumerableTableScan(table=[[CATALOG, SALES, EMP]])
+]]>
+    </Resource>
+  </TestCase>
 </Root>