You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@calcite.apache.org by jc...@apache.org on 2021/06/03 23:38:07 UTC

[calcite] branch master updated: [CALCITE-4499] FilterJoinRule misses opportunity to push filter to semijoin input

This is an automated email from the ASF dual-hosted git repository.

jcamacho pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/calcite.git


The following commit(s) were added to refs/heads/master by this push:
     new de48e55  [CALCITE-4499] FilterJoinRule misses opportunity to push filter to semijoin input
de48e55 is described below

commit de48e55783a140e6927f88a445d9cbdf2e7623b5
Author: Jesus Camacho Rodriguez <jc...@cloudera.com>
AuthorDate: Mon Feb 15 20:17:14 2021 -0800

    [CALCITE-4499] FilterJoinRule misses opportunity to push filter to semijoin input
    
    Close apache/calcite#2349
---
 .../java/org/apache/calcite/plan/RelOptUtil.java   | 123 +++++++++++++++++++++
 .../org/apache/calcite/rel/core/JoinRelType.java   |  32 ++++++
 .../org/apache/calcite/rel/metadata/RelMdUtil.java |   7 +-
 .../calcite/rel/rules/FilterCorrelateRule.java     |   3 +-
 .../apache/calcite/rel/rules/FilterJoinRule.java   |  15 +--
 .../org/apache/calcite/test/RelOptRulesTest.java   |  46 ++++++++
 .../org/apache/calcite/test/RelOptRulesTest.xml    |  19 ++++
 7 files changed, 230 insertions(+), 15 deletions(-)

diff --git a/core/src/main/java/org/apache/calcite/plan/RelOptUtil.java b/core/src/main/java/org/apache/calcite/plan/RelOptUtil.java
index 939a8a1..545943b 100644
--- a/core/src/main/java/org/apache/calcite/plan/RelOptUtil.java
+++ b/core/src/main/java/org/apache/calcite/plan/RelOptUtil.java
@@ -2793,6 +2793,125 @@ public abstract class RelOptUtil {
    *
    * @param joinRel      join node
    * @param filters      filters to be classified
+   * @param pushInto     whether filters can be pushed into the join
+   * @param pushLeft     true if filters can be pushed to the left
+   * @param pushRight    true if filters can be pushed to the right
+   * @param joinFilters  list of filters to push to the join
+   * @param leftFilters  list of filters to push to the left child
+   * @param rightFilters list of filters to push to the right child
+   * @return whether at least one filter was pushed
+   */
+  public static boolean classifyFilters(
+      RelNode joinRel,
+      List<RexNode> filters,
+      boolean pushInto,
+      boolean pushLeft,
+      boolean pushRight,
+      List<RexNode> joinFilters,
+      List<RexNode> leftFilters,
+      List<RexNode> rightFilters) {
+    RexBuilder rexBuilder = joinRel.getCluster().getRexBuilder();
+    List<RelDataTypeField> joinFields = joinRel.getRowType().getFieldList();
+    final int nSysFields = 0; // joinRel.getSystemFieldList().size();
+    final List<RelDataTypeField> leftFields =
+        joinRel.getInputs().get(0).getRowType().getFieldList();
+    final int nFieldsLeft = leftFields.size();
+    final List<RelDataTypeField> rightFields =
+        joinRel.getInputs().get(1).getRowType().getFieldList();
+    final int nFieldsRight = rightFields.size();
+    final int nTotalFields = nFieldsLeft + nFieldsRight;
+
+    // set the reference bitmaps for the left and right children
+    ImmutableBitSet leftBitmap =
+        ImmutableBitSet.range(nSysFields, nSysFields + nFieldsLeft);
+    ImmutableBitSet rightBitmap =
+        ImmutableBitSet.range(nSysFields + nFieldsLeft, nTotalFields);
+
+    final List<RexNode> filtersToRemove = new ArrayList<>();
+    for (RexNode filter : filters) {
+      final InputFinder inputFinder = InputFinder.analyze(filter);
+      final ImmutableBitSet inputBits = inputFinder.build();
+
+      // REVIEW - are there any expressions that need special handling
+      // and therefore cannot be pushed?
+
+      // filters can be pushed to the left child if the left child
+      // does not generate NULLs and the only columns referenced in
+      // the filter originate from the left child
+      if (pushLeft && leftBitmap.contains(inputBits)) {
+        // ignore filters that always evaluate to true
+        if (!filter.isAlwaysTrue()) {
+          // adjust the field references in the filter to reflect
+          // that fields in the left now shift over by the number
+          // of system fields
+          final RexNode shiftedFilter =
+              shiftFilter(
+                  nSysFields,
+                  nSysFields + nFieldsLeft,
+                  -nSysFields,
+                  rexBuilder,
+                  joinFields,
+                  nTotalFields,
+                  leftFields,
+                  filter);
+
+          leftFilters.add(shiftedFilter);
+        }
+        filtersToRemove.add(filter);
+
+        // filters can be pushed to the right child if the right child
+        // does not generate NULLs and the only columns referenced in
+        // the filter originate from the right child
+      } else if (pushRight && rightBitmap.contains(inputBits)) {
+        if (!filter.isAlwaysTrue()) {
+          // adjust the field references in the filter to reflect
+          // that fields in the right now shift over to the left;
+          // since we never push filters to a NULL generating
+          // child, the types of the source should match the dest
+          // so we don't need to explicitly pass the destination
+          // fields to RexInputConverter
+          final RexNode shiftedFilter =
+              shiftFilter(
+                  nSysFields + nFieldsLeft,
+                  nTotalFields,
+                  -(nSysFields + nFieldsLeft),
+                  rexBuilder,
+                  joinFields,
+                  nTotalFields,
+                  rightFields,
+                  filter);
+          rightFilters.add(shiftedFilter);
+        }
+        filtersToRemove.add(filter);
+
+      } else {
+        // If the filter can't be pushed to either child, we may push them into the join
+        if (pushInto) {
+          if (!joinFilters.contains(filter)) {
+            joinFilters.add(filter);
+          }
+          filtersToRemove.add(filter);
+        }
+      }
+    }
+
+    // Remove filters after the loop, to prevent concurrent modification.
+    if (!filtersToRemove.isEmpty()) {
+      filters.removeAll(filtersToRemove);
+    }
+
+    // Did anything change?
+    return !filtersToRemove.isEmpty();
+  }
+
+  /**
+   * Classifies filters according to where they should be processed. They
+   * either stay where they are, are pushed to the join (if they originated
+   * from above the join), or are pushed to one of the children. Filters that
+   * are pushed are added to list passed in as input parameters.
+   *
+   * @param joinRel      join node
+   * @param filters      filters to be classified
    * @param joinType     join type
    * @param pushInto     whether filters can be pushed into the ON clause
    * @param pushLeft     true if filters can be pushed to the left
@@ -2801,7 +2920,11 @@ public abstract class RelOptUtil {
    * @param leftFilters  list of filters to push to the left child
    * @param rightFilters list of filters to push to the right child
    * @return whether at least one filter was pushed
+   *
+   * @deprecated Use
+   * {@link RelOptUtil#classifyFilters(RelNode, List, boolean, boolean, boolean, List, List, List)}
    */
+  @Deprecated // to be removed before 2.0
   public static boolean classifyFilters(
       RelNode joinRel,
       List<RexNode> filters,
diff --git a/core/src/main/java/org/apache/calcite/rel/core/JoinRelType.java b/core/src/main/java/org/apache/calcite/rel/core/JoinRelType.java
index 97c2b89..eef436c 100644
--- a/core/src/main/java/org/apache/calcite/rel/core/JoinRelType.java
+++ b/core/src/main/java/org/apache/calcite/rel/core/JoinRelType.java
@@ -16,6 +16,8 @@
  */
 package org.apache.calcite.rel.core;
 
+import org.apiguardian.api.API;
+
 import java.util.Locale;
 
 /**
@@ -152,4 +154,34 @@ public enum JoinRelType {
   public boolean projectsRight() {
     return this != SEMI && this != ANTI;
   }
+
+  /** Returns whether this join type accepts pushing predicates from above into its predicate. */
+  @API(since = "1.28", status = API.Status.EXPERIMENTAL)
+  public boolean canPushIntoFromAbove() {
+    return (this == INNER) || (this == SEMI);
+  }
+
+  /** Returns whether this join type accepts pushing predicates from above into its left input. */
+  @API(since = "1.28", status = API.Status.EXPERIMENTAL)
+  public boolean canPushLeftFromAbove() {
+    return (this == INNER) || (this == LEFT) || (this == SEMI) || (this == ANTI);
+  }
+
+  /** Returns whether this join type accepts pushing predicates from above into its right input. */
+  @API(since = "1.28", status = API.Status.EXPERIMENTAL)
+  public boolean canPushRightFromAbove() {
+    return (this == INNER) || (this == RIGHT);
+  }
+
+  /** Returns whether this join type accepts pushing predicates from within into its left input. */
+  @API(since = "1.28", status = API.Status.EXPERIMENTAL)
+  public boolean canPushLeftFromWithin() {
+    return (this == INNER) || (this == RIGHT) || (this == SEMI);
+  }
+
+  /** Returns whether this join type accepts pushing predicates from within into its right input. */
+  @API(since = "1.28", status = API.Status.EXPERIMENTAL)
+  public boolean canPushRightFromWithin() {
+    return (this == INNER) || (this == LEFT) || (this == SEMI);
+  }
 }
diff --git a/core/src/main/java/org/apache/calcite/rel/metadata/RelMdUtil.java b/core/src/main/java/org/apache/calcite/rel/metadata/RelMdUtil.java
index efb2373..8370bab 100644
--- a/core/src/main/java/org/apache/calcite/rel/metadata/RelMdUtil.java
+++ b/core/src/main/java/org/apache/calcite/rel/metadata/RelMdUtil.java
@@ -657,10 +657,9 @@ public class RelMdUtil {
       RelOptUtil.classifyFilters(
           joinRel,
           predList,
-          joinType,
-          !joinType.isOuterJoin(),
-          !joinType.generatesNullsOnLeft(),
-          !joinType.generatesNullsOnRight(),
+          joinType.canPushIntoFromAbove(),
+          joinType.canPushLeftFromAbove(),
+          joinType.canPushRightFromAbove(),
           joinFilters,
           leftFilters,
           rightFilters);
diff --git a/core/src/main/java/org/apache/calcite/rel/rules/FilterCorrelateRule.java b/core/src/main/java/org/apache/calcite/rel/rules/FilterCorrelateRule.java
index 71ed11a..06c22dc 100644
--- a/core/src/main/java/org/apache/calcite/rel/rules/FilterCorrelateRule.java
+++ b/core/src/main/java/org/apache/calcite/rel/rules/FilterCorrelateRule.java
@@ -79,10 +79,9 @@ public class FilterCorrelateRule
     RelOptUtil.classifyFilters(
         corr,
         aboveFilters,
-        corr.getJoinType(),
         false,
         true,
-        !corr.getJoinType().generatesNullsOnRight(),
+        corr.getJoinType().canPushRightFromAbove(),
         aboveFilters,
         leftFilters,
         rightFilters);
diff --git a/core/src/main/java/org/apache/calcite/rel/rules/FilterJoinRule.java b/core/src/main/java/org/apache/calcite/rel/rules/FilterJoinRule.java
index 7cbabf7..756d18a 100644
--- a/core/src/main/java/org/apache/calcite/rel/rules/FilterJoinRule.java
+++ b/core/src/main/java/org/apache/calcite/rel/rules/FilterJoinRule.java
@@ -110,10 +110,9 @@ public abstract class FilterJoinRule<C extends FilterJoinRule.Config>
     if (RelOptUtil.classifyFilters(
         join,
         aboveFilters,
-        joinType,
-        true,
-        !joinType.generatesNullsOnLeft(),
-        !joinType.generatesNullsOnRight(),
+        joinType.canPushIntoFromAbove(),
+        joinType.canPushLeftFromAbove(),
+        joinType.canPushRightFromAbove(),
         joinFilters,
         leftFilters,
         rightFilters)) {
@@ -147,14 +146,12 @@ public abstract class FilterJoinRule<C extends FilterJoinRule.Config>
     // The semantic would change if join condition $2 is pushed into left,
     // that is, the result set may be smaller. The right can not be pushed
     // into for the same reason.
-    if (joinType != JoinRelType.ANTI
-        && RelOptUtil.classifyFilters(
+    if (RelOptUtil.classifyFilters(
         join,
         joinFilters,
-        joinType,
         false,
-        !joinType.generatesNullsOnRight(),
-        !joinType.generatesNullsOnLeft(),
+        joinType.canPushLeftFromWithin(),
+        joinType.canPushRightFromWithin(),
         joinFilters,
         leftFilters,
         rightFilters)) {
diff --git a/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java b/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java
index d988d72..f76b62e 100644
--- a/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java
+++ b/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java
@@ -790,6 +790,52 @@ class RelOptRulesTest extends RelOptTestBase {
         .check();
   }
 
+  /** Test case for
+   * <a href="https://issues.apache.org/jira/browse/CALCITE-4499">[CALCITE-4499]
+   * FilterJoinRule misses opportunity to push filter to semijoin input</a>. */
+  @Test void testPushFilterSemijoin() {
+    final FilterJoinRule.Predicate predicate =
+        (join, joinType, exp) -> joinType != JoinRelType.INNER;
+    final FilterJoinRule.JoinConditionPushRule join =
+        CoreRules.JOIN_CONDITION_PUSH.config
+            .withPredicate(predicate)
+            .withDescription("FilterJoinRule:no-filter")
+            .as(FilterJoinRule.JoinConditionPushRule.Config.class)
+            .toRule();
+    final RelBuilder relBuilder = RelBuilder.create(RelBuilderTest.config().build());
+
+    RelNode left = relBuilder.scan("DEPT").build();
+    RelNode right = relBuilder.scan("EMP").build();
+    RelNode plan = relBuilder.push(left)
+        .push(right)
+        .semiJoin(
+            relBuilder.and(
+                relBuilder.call(SqlStdOperatorTable.EQUALS,
+                    relBuilder.field(2, 0, 0),
+                    relBuilder.field(2, 1, 7)),
+                relBuilder.call(SqlStdOperatorTable.EQUALS,
+                    relBuilder.field(2, 1, 5),
+                    relBuilder.literal(100))))
+        .project(relBuilder.field(1))
+        .build();
+
+    final String planBefore = NL + RelOptUtil.toString(plan);
+
+    HepProgram program = new HepProgramBuilder()
+        .addRuleInstance(join)
+        .build();
+
+    HepPlanner hepPlanner = new HepPlanner(program);
+    hepPlanner.setRoot(plan);
+    RelNode output = hepPlanner.findBestExp();
+
+    final String planAfter = NL + RelOptUtil.toString(output);
+    final DiffRepository diffRepos = getDiffRepos();
+    diffRepos.assertEquals("planBefore", "${planBefore}", planBefore);
+    diffRepos.assertEquals("planAfter", "${planAfter}", planAfter);
+    SqlToRelTestBase.assertValid(output);
+  }
+
   @Test void testSemiJoinProjectTranspose() {
     final RelBuilder relBuilder = RelBuilder.create(RelBuilderTest.config().build());
     // build a rel equivalent to sql:
diff --git a/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml b/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
index 0005a68..1a131c1 100644
--- a/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
+++ b/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
@@ -8088,6 +8088,25 @@ LogicalProject(NAME=[$1])
 ]]>
     </Resource>
   </TestCase>
+  <TestCase name="testPushFilterSemijoin">
+    <Resource name="planBefore">
+      <![CDATA[
+LogicalProject(DNAME=[$1])
+  LogicalJoin(condition=[AND(=($0, $10), =($8, 100))], joinType=[semi])
+    LogicalTableScan(table=[[scott, DEPT]])
+    LogicalTableScan(table=[[scott, EMP]])
+]]>
+    </Resource>
+    <Resource name="planAfter">
+      <![CDATA[
+LogicalProject(DNAME=[$1])
+  LogicalJoin(condition=[=($0, $10)], joinType=[semi])
+    LogicalTableScan(table=[[scott, DEPT]])
+    LogicalFilter(condition=[=($5, 100)])
+      LogicalTableScan(table=[[scott, EMP]])
+]]>
+    </Resource>
+  </TestCase>
   <TestCase name="testPushFilterThroughOuterJoin">
     <Resource name="sql">
       <![CDATA[select 1 from sales.dept d left outer join sales.emp e on d.deptno = e.deptno where d.name = 'Charlie']]>