You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@calcite.apache.org by da...@apache.org on 2019/07/09 01:29:18 UTC

[calcite] branch master updated: [CALCITE-3170] ANTI join on conditions push down generates wrong plan

This is an automated email from the ASF dual-hosted git repository.

danny0405 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/calcite.git


The following commit(s) were added to refs/heads/master by this push:
     new cb364ce  [CALCITE-3170] ANTI join on conditions push down generates wrong plan
cb364ce is described below

commit cb364ce8109f92c80c17bb37d1cb1a03e6222bbe
Author: yuzhao.cyz <yu...@alibaba-inc.com>
AuthorDate: Sun Jul 7 13:56:17 2019 +0800

    [CALCITE-3170] ANTI join on conditions push down generates wrong plan
    
    Antijoin on conditions should not be pushed down to either left or right hand
    side.
---
 .../java/org/apache/calcite/plan/RelOptUtil.java   | 13 +---
 .../calcite/rel/rules/FilterCorrelateRule.java     |  5 +-
 .../apache/calcite/rel/rules/FilterJoinRule.java   | 15 ++++-
 .../org/apache/calcite/test/RelOptRulesTest.java   | 76 ++++++++++++++++++++++
 .../test/enumerable/EnumerableJoinTest.java        | 30 +++++++++
 .../org/apache/calcite/test/RelOptRulesTest.xml    | 30 +++++++++
 6 files changed, 153 insertions(+), 16 deletions(-)

diff --git a/core/src/main/java/org/apache/calcite/plan/RelOptUtil.java b/core/src/main/java/org/apache/calcite/plan/RelOptUtil.java
index 9487b0b..b795a42 100644
--- a/core/src/main/java/org/apache/calcite/plan/RelOptUtil.java
+++ b/core/src/main/java/org/apache/calcite/plan/RelOptUtil.java
@@ -33,7 +33,6 @@ import org.apache.calcite.rel.RelWriter;
 import org.apache.calcite.rel.core.Aggregate;
 import org.apache.calcite.rel.core.AggregateCall;
 import org.apache.calcite.rel.core.Calc;
-import org.apache.calcite.rel.core.Correlate;
 import org.apache.calcite.rel.core.CorrelationId;
 import org.apache.calcite.rel.core.EquiJoin;
 import org.apache.calcite.rel.core.Filter;
@@ -2510,7 +2509,8 @@ public abstract class RelOptUtil {
         joinRel.getInputs().get(1).getRowType().getFieldList();
     final int nFieldsRight = rightFields.size();
 
-    assert nTotalFields == (returnsJustFirstInput(joinRel)
+    // SemiJoin, CorrelateSemiJoin, CorrelateAntiJoin: right fields are not returned
+    assert nTotalFields == (!joinType.projectsRight()
             ? nSysFields + nFieldsLeft
             : nSysFields + nFieldsLeft + nFieldsRight);
 
@@ -2599,14 +2599,6 @@ public abstract class RelOptUtil {
     return !filtersToRemove.isEmpty();
   }
 
-  private static boolean returnsJustFirstInput(RelNode joinRel) {
-    // SemiJoin, CorrelateSemiJoin, CorrelateAntiJoin: right fields are not returned
-    return (joinRel instanceof Join
-                && !((Join) joinRel).getJoinType().projectsRight())
-            || (joinRel instanceof Correlate
-                && !((Correlate) joinRel).getJoinType().projectsRight());
-  }
-
   private static RexNode shiftFilter(
       int start,
       int end,
@@ -3449,7 +3441,6 @@ public abstract class RelOptUtil {
       for (int i = 0; i < operands.size(); i++) {
         RexNode operand = operands.get(i);
         final int left2 = leftCount + extraLeftExprs.size();
-        final int right2 = rightCount + extraRightExprs.size();
         final RexNode e =
             pushDownEqualJoinConditions(
                 operand,
diff --git a/core/src/main/java/org/apache/calcite/rel/rules/FilterCorrelateRule.java b/core/src/main/java/org/apache/calcite/rel/rules/FilterCorrelateRule.java
index a72e2a8..d768055 100644
--- a/core/src/main/java/org/apache/calcite/rel/rules/FilterCorrelateRule.java
+++ b/core/src/main/java/org/apache/calcite/rel/rules/FilterCorrelateRule.java
@@ -22,7 +22,6 @@ import org.apache.calcite.plan.RelOptUtil;
 import org.apache.calcite.rel.RelNode;
 import org.apache.calcite.rel.core.Correlate;
 import org.apache.calcite.rel.core.Filter;
-import org.apache.calcite.rel.core.JoinRelType;
 import org.apache.calcite.rel.core.RelFactories;
 import org.apache.calcite.rex.RexBuilder;
 import org.apache.calcite.rex.RexNode;
@@ -84,10 +83,10 @@ public class FilterCorrelateRule extends RelOptRule {
     RelOptUtil.classifyFilters(
         corr,
         aboveFilters,
-        JoinRelType.INNER,
+        corr.getJoinType(),
         false,
         true,
-        corr.getJoinType() == JoinRelType.INNER,
+        !corr.getJoinType().generatesNullsOnRight(),
         aboveFilters,
         leftFilters,
         rightFilters);
diff --git a/core/src/main/java/org/apache/calcite/rel/rules/FilterJoinRule.java b/core/src/main/java/org/apache/calcite/rel/rules/FilterJoinRule.java
index a3f429b..9087195 100644
--- a/core/src/main/java/org/apache/calcite/rel/rules/FilterJoinRule.java
+++ b/core/src/main/java/org/apache/calcite/rel/rules/FilterJoinRule.java
@@ -120,7 +120,7 @@ public abstract class FilterJoinRule extends RelOptRule {
    */
   private boolean needsPushInto(Join join) {
     // If the join force the join info to be based on column equality,
-    // Or it is a non-correlated semijoin, returns false.
+    // or it is a non-correlated semijoin, returns false.
     return !RelOptUtil.forceEquiJoin(join);
   }
 
@@ -195,7 +195,18 @@ public abstract class FilterJoinRule extends RelOptRule {
     // Try to push down filters in ON clause. A ON clause filter can only be
     // pushed down if it does not affect the non-matching set, i.e. it is
     // not on the side which is preserved.
-    if (RelOptUtil.classifyFilters(
+
+    // Anti-join on conditions can not be pushed into left or right, e.g. for plan:
+    //
+    //     Join(condition=[AND(cond1, $2)], joinType=[anti])
+    //     :  - prj(f0=[$0], f1=[$1], f2=[$2])
+    //     :  - prj(f0=[$0])
+    //
+    // The semantic would change if join condition $2 is pushed into left,
+    // that is, the result set may be smaller. The right can not be pushed
+    // into for the same reason.
+    if (joinType != JoinRelType.ANTI
+        && RelOptUtil.classifyFilters(
         join,
         joinFilters,
         joinType,
diff --git a/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java b/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java
index b25cb7b..0bb3c54 100644
--- a/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java
+++ b/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java
@@ -416,6 +416,82 @@ public class RelOptRulesTest extends RelOptTestBase {
   }
 
   /** Test case for
+   * <a href="https://issues.apache.org/jira/browse/CALCITE-3170">[CALCITE-3170]
+   * ANTI join on conditions push down generates wrong plan</a>. */
+  @Test public void testCanNotPushAntiJoinConditionsToLeft() {
+    final RelBuilder relBuilder = RelBuilder.create(RelBuilderTest.config().build());
+    // build a rel equivalent to sql:
+    // select * from emp
+    // where emp.deptno
+    // not in (select dept.deptno from dept where emp.deptno > 20)
+    RelNode left = relBuilder.scan("EMP").build();
+    RelNode right = relBuilder.scan("DEPT").build();
+    RelNode join = relBuilder.push(left)
+        .push(right)
+        .antiJoin(
+            relBuilder.call(SqlStdOperatorTable.IS_NOT_DISTINCT_FROM,
+                relBuilder.field(2, 0, "DEPTNO"),
+                relBuilder.field(2, 1, "DEPTNO")),
+            relBuilder.call(SqlStdOperatorTable.GREATER_THAN,
+            RexInputRef.of(0, left.getRowType()),
+            relBuilder.literal(20)))
+        .build();
+
+    relBuilder.push(join);
+    RelNode relNode = relBuilder.project(relBuilder.field(0))
+        .build();
+
+    HepProgram program = new HepProgramBuilder()
+        .addRuleInstance(FilterJoinRule.JOIN)
+        .build();
+
+    HepPlanner hepPlanner = new HepPlanner(program);
+    hepPlanner.setRoot(relNode);
+    RelNode output = hepPlanner.findBestExp();
+
+    final String planAfter = NL + RelOptUtil.toString(output);
+    final DiffRepository diffRepos = getDiffRepos();
+    diffRepos.assertEquals("planAfter", "${planAfter}", planAfter);
+    SqlToRelTestBase.assertValid(output);
+  }
+
+  @Test public void testCanNotPushAntiJoinConditionsToRight() {
+    final RelBuilder relBuilder = RelBuilder.create(RelBuilderTest.config().build());
+    // build a rel equivalent to sql:
+    // select * from emp
+    // where emp.deptno
+    // not in (select dept.deptno from dept where dept.dname = 'ddd')
+    RelNode left = relBuilder.scan("EMP").build();
+    RelNode right = relBuilder.scan("DEPT").build();
+    RelNode join = relBuilder.push(left)
+        .push(right)
+        .antiJoin(
+            relBuilder.call(SqlStdOperatorTable.IS_NOT_DISTINCT_FROM,
+                relBuilder.field(2, 0, "DEPTNO"),
+                relBuilder.field(2, 1, "DEPTNO")),
+            relBuilder.equals(relBuilder.field(2, 1, "DNAME"),
+                relBuilder.literal("ddd")))
+        .build();
+
+    relBuilder.push(join);
+    RelNode relNode = relBuilder.project(relBuilder.field(0))
+        .build();
+
+    HepProgram program = new HepProgramBuilder()
+        .addRuleInstance(FilterJoinRule.JOIN)
+        .build();
+
+    HepPlanner hepPlanner = new HepPlanner(program);
+    hepPlanner.setRoot(relNode);
+    RelNode output = hepPlanner.findBestExp();
+
+    final String planAfter = NL + RelOptUtil.toString(output);
+    final DiffRepository diffRepos = getDiffRepos();
+    diffRepos.assertEquals("planAfter", "${planAfter}", planAfter);
+    SqlToRelTestBase.assertValid(output);
+  }
+
+  /** Test case for
    * <a href="https://issues.apache.org/jira/browse/CALCITE-3171">[CALCITE-3171]
    * SemiJoin on conditions push down throws IndexOutOfBoundsException</a>. */
   @Test public void testPushSemiJoinConditionsToLeft() {
diff --git a/core/src/test/java/org/apache/calcite/test/enumerable/EnumerableJoinTest.java b/core/src/test/java/org/apache/calcite/test/enumerable/EnumerableJoinTest.java
index e800eb9..dde43c2 100644
--- a/core/src/test/java/org/apache/calcite/test/enumerable/EnumerableJoinTest.java
+++ b/core/src/test/java/org/apache/calcite/test/enumerable/EnumerableJoinTest.java
@@ -128,6 +128,36 @@ public class EnumerableJoinTest {
         .returnsUnordered("empid=200; name=Eric");
   }
 
+  /** Test case for
+   * <a href="https://issues.apache.org/jira/browse/CALCITE-3170">[CALCITE-3170]
+   * ANTI join on conditions push down generates wrong plan</a>. */
+  @Test public void testCanNotPushAntiJoinConditionsToLeft() {
+    tester(false, new JdbcTest.HrSchema())
+        .query("?").withRel(
+            // build a rel equivalent to sql:
+            // select * from emps
+            // where emps.deptno
+            // not in (select depts.deptno from depts where emps.name = 'ddd')
+
+            // Use `equals` instead of `is not distinct from` only for testing.
+            builder -> builder
+                .scan("s", "emps")
+                .scan("s", "depts")
+                .antiJoin(
+                    builder.equals(
+                        builder.field(2, 0, "deptno"),
+                        builder.field(2, 1, "deptno")),
+                    builder.equals(builder.field(2, 0, "name"),
+                        builder.literal("ddd")))
+                .project(builder.field(0))
+                .build()
+    ).returnsUnordered(
+        "empid=100",
+        "empid=110",
+        "empid=150",
+        "empid=200");
+  }
+
   private CalciteAssert.AssertThat tester(boolean forceDecorrelate,
       Object schema) {
     return CalciteAssert.that()
diff --git a/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml b/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
index 1edf4f1..13ea276 100644
--- a/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
+++ b/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
@@ -5987,6 +5987,36 @@ LogicalProject(DEPTNO=[$0], NAME=[$1], EMPNO=[$2], ENAME=[$3], JOB=[$4], MGR=[$5
 ]]>
         </Resource>
     </TestCase>
+    <TestCase name="testCanNotPushAntiJoinConditionsToLeft">
+        <Resource name="sql">
+            <![CDATA[select * from emp
+where emp.deptno not in
+(select dept.deptno from dept where emp.deptno > 20)]]>
+        </Resource>
+        <Resource name="planAfter">
+            <![CDATA[
+LogicalProject(EMPNO=[$0])
+  LogicalJoin(condition=[AND(IS NOT DISTINCT FROM($7, $8), >($0, 20))], joinType=[anti])
+    LogicalTableScan(table=[[scott, EMP]])
+    LogicalTableScan(table=[[scott, DEPT]])
+]]>
+        </Resource>
+    </TestCase>
+    <TestCase name="testCanNotPushAntiJoinConditionsToRight">
+        <Resource name="sql">
+            <![CDATA[select * from emp
+where emp.deptno
+not in (select dept.deptno from dept where dept.dname = 'ddd')]]>
+        </Resource>
+        <Resource name="planAfter">
+            <![CDATA[
+LogicalProject(EMPNO=[$0])
+  LogicalJoin(condition=[AND(IS NOT DISTINCT FROM($7, $8), =($9, 'ddd'))], joinType=[anti])
+    LogicalTableScan(table=[[scott, EMP]])
+    LogicalTableScan(table=[[scott, DEPT]])
+]]>
+        </Resource>
+    </TestCase>
     <TestCase name="testPushSemiJoinConditionsToLeft">
         <Resource name="sql">
             <![CDATA[select * from emp