You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@calcite.apache.org by jc...@apache.org on 2021/06/03 23:38:07 UTC
[calcite] branch master updated: [CALCITE-4499] FilterJoinRule
misses opportunity to push filter to semijoin input
This is an automated email from the ASF dual-hosted git repository.
jcamacho pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/calcite.git
The following commit(s) were added to refs/heads/master by this push:
new de48e55 [CALCITE-4499] FilterJoinRule misses opportunity to push filter to semijoin input
de48e55 is described below
commit de48e55783a140e6927f88a445d9cbdf2e7623b5
Author: Jesus Camacho Rodriguez <jc...@cloudera.com>
AuthorDate: Mon Feb 15 20:17:14 2021 -0800
[CALCITE-4499] FilterJoinRule misses opportunity to push filter to semijoin input
Close apache/calcite#2349
---
.../java/org/apache/calcite/plan/RelOptUtil.java | 123 +++++++++++++++++++++
.../org/apache/calcite/rel/core/JoinRelType.java | 32 ++++++
.../org/apache/calcite/rel/metadata/RelMdUtil.java | 7 +-
.../calcite/rel/rules/FilterCorrelateRule.java | 3 +-
.../apache/calcite/rel/rules/FilterJoinRule.java | 15 +--
.../org/apache/calcite/test/RelOptRulesTest.java | 46 ++++++++
.../org/apache/calcite/test/RelOptRulesTest.xml | 19 ++++
7 files changed, 230 insertions(+), 15 deletions(-)
diff --git a/core/src/main/java/org/apache/calcite/plan/RelOptUtil.java b/core/src/main/java/org/apache/calcite/plan/RelOptUtil.java
index 939a8a1..545943b 100644
--- a/core/src/main/java/org/apache/calcite/plan/RelOptUtil.java
+++ b/core/src/main/java/org/apache/calcite/plan/RelOptUtil.java
@@ -2793,6 +2793,125 @@ public abstract class RelOptUtil {
*
* @param joinRel join node
* @param filters filters to be classified
+ * @param pushInto whether filters can be pushed into the join
+ * @param pushLeft true if filters can be pushed to the left
+ * @param pushRight true if filters can be pushed to the right
+ * @param joinFilters list of filters to push to the join
+ * @param leftFilters list of filters to push to the left child
+ * @param rightFilters list of filters to push to the right child
+ * @return whether at least one filter was pushed
+ */
+ public static boolean classifyFilters(
+ RelNode joinRel,
+ List<RexNode> filters,
+ boolean pushInto,
+ boolean pushLeft,
+ boolean pushRight,
+ List<RexNode> joinFilters,
+ List<RexNode> leftFilters,
+ List<RexNode> rightFilters) {
+ RexBuilder rexBuilder = joinRel.getCluster().getRexBuilder();
+ List<RelDataTypeField> joinFields = joinRel.getRowType().getFieldList();
+ final int nSysFields = 0; // joinRel.getSystemFieldList().size();
+ final List<RelDataTypeField> leftFields =
+ joinRel.getInputs().get(0).getRowType().getFieldList();
+ final int nFieldsLeft = leftFields.size();
+ final List<RelDataTypeField> rightFields =
+ joinRel.getInputs().get(1).getRowType().getFieldList();
+ final int nFieldsRight = rightFields.size();
+ final int nTotalFields = nFieldsLeft + nFieldsRight;
+
+ // set the reference bitmaps for the left and right children
+ ImmutableBitSet leftBitmap =
+ ImmutableBitSet.range(nSysFields, nSysFields + nFieldsLeft);
+ ImmutableBitSet rightBitmap =
+ ImmutableBitSet.range(nSysFields + nFieldsLeft, nTotalFields);
+
+ final List<RexNode> filtersToRemove = new ArrayList<>();
+ for (RexNode filter : filters) {
+ final InputFinder inputFinder = InputFinder.analyze(filter);
+ final ImmutableBitSet inputBits = inputFinder.build();
+
+ // REVIEW - are there any expressions that need special handling
+ // and therefore cannot be pushed?
+
+ // filters can be pushed to the left child if the left child
+ // does not generate NULLs and the only columns referenced in
+ // the filter originate from the left child
+ if (pushLeft && leftBitmap.contains(inputBits)) {
+ // ignore filters that always evaluate to true
+ if (!filter.isAlwaysTrue()) {
+ // adjust the field references in the filter to reflect
+ // that fields in the left now shift over by the number
+ // of system fields
+ final RexNode shiftedFilter =
+ shiftFilter(
+ nSysFields,
+ nSysFields + nFieldsLeft,
+ -nSysFields,
+ rexBuilder,
+ joinFields,
+ nTotalFields,
+ leftFields,
+ filter);
+
+ leftFilters.add(shiftedFilter);
+ }
+ filtersToRemove.add(filter);
+
+ // filters can be pushed to the right child if the right child
+ // does not generate NULLs and the only columns referenced in
+ // the filter originate from the right child
+ } else if (pushRight && rightBitmap.contains(inputBits)) {
+ if (!filter.isAlwaysTrue()) {
+ // adjust the field references in the filter to reflect
+ // that fields in the right now shift over to the left;
+ // since we never push filters to a NULL generating
+ // child, the types of the source should match the dest
+ // so we don't need to explicitly pass the destination
+ // fields to RexInputConverter
+ final RexNode shiftedFilter =
+ shiftFilter(
+ nSysFields + nFieldsLeft,
+ nTotalFields,
+ -(nSysFields + nFieldsLeft),
+ rexBuilder,
+ joinFields,
+ nTotalFields,
+ rightFields,
+ filter);
+ rightFilters.add(shiftedFilter);
+ }
+ filtersToRemove.add(filter);
+
+ } else {
+ // If the filter can't be pushed to either child, we may push them into the join
+ if (pushInto) {
+ if (!joinFilters.contains(filter)) {
+ joinFilters.add(filter);
+ }
+ filtersToRemove.add(filter);
+ }
+ }
+ }
+
+ // Remove filters after the loop, to prevent concurrent modification.
+ if (!filtersToRemove.isEmpty()) {
+ filters.removeAll(filtersToRemove);
+ }
+
+ // Did anything change?
+ return !filtersToRemove.isEmpty();
+ }
+
+ /**
+ * Classifies filters according to where they should be processed. They
+ * either stay where they are, are pushed to the join (if they originated
+ * from above the join), or are pushed to one of the children. Filters that
+ * are pushed are added to list passed in as input parameters.
+ *
+ * @param joinRel join node
+ * @param filters filters to be classified
* @param joinType join type
* @param pushInto whether filters can be pushed into the ON clause
* @param pushLeft true if filters can be pushed to the left
@@ -2801,7 +2920,11 @@ public abstract class RelOptUtil {
* @param leftFilters list of filters to push to the left child
* @param rightFilters list of filters to push to the right child
* @return whether at least one filter was pushed
+ *
+ * @deprecated Use
+ * {@link RelOptUtil#classifyFilters(RelNode, List, boolean, boolean, boolean, List, List, List)}
*/
+ @Deprecated // to be removed before 2.0
public static boolean classifyFilters(
RelNode joinRel,
List<RexNode> filters,
diff --git a/core/src/main/java/org/apache/calcite/rel/core/JoinRelType.java b/core/src/main/java/org/apache/calcite/rel/core/JoinRelType.java
index 97c2b89..eef436c 100644
--- a/core/src/main/java/org/apache/calcite/rel/core/JoinRelType.java
+++ b/core/src/main/java/org/apache/calcite/rel/core/JoinRelType.java
@@ -16,6 +16,8 @@
*/
package org.apache.calcite.rel.core;
+import org.apiguardian.api.API;
+
import java.util.Locale;
/**
@@ -152,4 +154,34 @@ public enum JoinRelType {
public boolean projectsRight() {
return this != SEMI && this != ANTI;
}
+
+ /** Returns whether this join type accepts pushing predicates from above into its predicate. */
+ @API(since = "1.28", status = API.Status.EXPERIMENTAL)
+ public boolean canPushIntoFromAbove() {
+ return (this == INNER) || (this == SEMI);
+ }
+
+ /** Returns whether this join type accepts pushing predicates from above into its left input. */
+ @API(since = "1.28", status = API.Status.EXPERIMENTAL)
+ public boolean canPushLeftFromAbove() {
+ return (this == INNER) || (this == LEFT) || (this == SEMI) || (this == ANTI);
+ }
+
+ /** Returns whether this join type accepts pushing predicates from above into its right input. */
+ @API(since = "1.28", status = API.Status.EXPERIMENTAL)
+ public boolean canPushRightFromAbove() {
+ return (this == INNER) || (this == RIGHT);
+ }
+
+ /** Returns whether this join type accepts pushing predicates from within into its left input. */
+ @API(since = "1.28", status = API.Status.EXPERIMENTAL)
+ public boolean canPushLeftFromWithin() {
+ return (this == INNER) || (this == RIGHT) || (this == SEMI);
+ }
+
+ /** Returns whether this join type accepts pushing predicates from within into its right input. */
+ @API(since = "1.28", status = API.Status.EXPERIMENTAL)
+ public boolean canPushRightFromWithin() {
+ return (this == INNER) || (this == LEFT) || (this == SEMI);
+ }
}
diff --git a/core/src/main/java/org/apache/calcite/rel/metadata/RelMdUtil.java b/core/src/main/java/org/apache/calcite/rel/metadata/RelMdUtil.java
index efb2373..8370bab 100644
--- a/core/src/main/java/org/apache/calcite/rel/metadata/RelMdUtil.java
+++ b/core/src/main/java/org/apache/calcite/rel/metadata/RelMdUtil.java
@@ -657,10 +657,9 @@ public class RelMdUtil {
RelOptUtil.classifyFilters(
joinRel,
predList,
- joinType,
- !joinType.isOuterJoin(),
- !joinType.generatesNullsOnLeft(),
- !joinType.generatesNullsOnRight(),
+ joinType.canPushIntoFromAbove(),
+ joinType.canPushLeftFromAbove(),
+ joinType.canPushRightFromAbove(),
joinFilters,
leftFilters,
rightFilters);
diff --git a/core/src/main/java/org/apache/calcite/rel/rules/FilterCorrelateRule.java b/core/src/main/java/org/apache/calcite/rel/rules/FilterCorrelateRule.java
index 71ed11a..06c22dc 100644
--- a/core/src/main/java/org/apache/calcite/rel/rules/FilterCorrelateRule.java
+++ b/core/src/main/java/org/apache/calcite/rel/rules/FilterCorrelateRule.java
@@ -79,10 +79,9 @@ public class FilterCorrelateRule
RelOptUtil.classifyFilters(
corr,
aboveFilters,
- corr.getJoinType(),
false,
true,
- !corr.getJoinType().generatesNullsOnRight(),
+ corr.getJoinType().canPushRightFromAbove(),
aboveFilters,
leftFilters,
rightFilters);
diff --git a/core/src/main/java/org/apache/calcite/rel/rules/FilterJoinRule.java b/core/src/main/java/org/apache/calcite/rel/rules/FilterJoinRule.java
index 7cbabf7..756d18a 100644
--- a/core/src/main/java/org/apache/calcite/rel/rules/FilterJoinRule.java
+++ b/core/src/main/java/org/apache/calcite/rel/rules/FilterJoinRule.java
@@ -110,10 +110,9 @@ public abstract class FilterJoinRule<C extends FilterJoinRule.Config>
if (RelOptUtil.classifyFilters(
join,
aboveFilters,
- joinType,
- true,
- !joinType.generatesNullsOnLeft(),
- !joinType.generatesNullsOnRight(),
+ joinType.canPushIntoFromAbove(),
+ joinType.canPushLeftFromAbove(),
+ joinType.canPushRightFromAbove(),
joinFilters,
leftFilters,
rightFilters)) {
@@ -147,14 +146,12 @@ public abstract class FilterJoinRule<C extends FilterJoinRule.Config>
// The semantic would change if join condition $2 is pushed into left,
// that is, the result set may be smaller. The right can not be pushed
// into for the same reason.
- if (joinType != JoinRelType.ANTI
- && RelOptUtil.classifyFilters(
+ if (RelOptUtil.classifyFilters(
join,
joinFilters,
- joinType,
false,
- !joinType.generatesNullsOnRight(),
- !joinType.generatesNullsOnLeft(),
+ joinType.canPushLeftFromWithin(),
+ joinType.canPushRightFromWithin(),
joinFilters,
leftFilters,
rightFilters)) {
diff --git a/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java b/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java
index d988d72..f76b62e 100644
--- a/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java
+++ b/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java
@@ -790,6 +790,52 @@ class RelOptRulesTest extends RelOptTestBase {
.check();
}
+ /** Test case for
+ * <a href="https://issues.apache.org/jira/browse/CALCITE-4499">[CALCITE-4499]
+ * FilterJoinRule misses opportunity to push filter to semijoin input</a>. */
+ @Test void testPushFilterSemijoin() {
+ final FilterJoinRule.Predicate predicate =
+ (join, joinType, exp) -> joinType != JoinRelType.INNER;
+ final FilterJoinRule.JoinConditionPushRule join =
+ CoreRules.JOIN_CONDITION_PUSH.config
+ .withPredicate(predicate)
+ .withDescription("FilterJoinRule:no-filter")
+ .as(FilterJoinRule.JoinConditionPushRule.Config.class)
+ .toRule();
+ final RelBuilder relBuilder = RelBuilder.create(RelBuilderTest.config().build());
+
+ RelNode left = relBuilder.scan("DEPT").build();
+ RelNode right = relBuilder.scan("EMP").build();
+ RelNode plan = relBuilder.push(left)
+ .push(right)
+ .semiJoin(
+ relBuilder.and(
+ relBuilder.call(SqlStdOperatorTable.EQUALS,
+ relBuilder.field(2, 0, 0),
+ relBuilder.field(2, 1, 7)),
+ relBuilder.call(SqlStdOperatorTable.EQUALS,
+ relBuilder.field(2, 1, 5),
+ relBuilder.literal(100))))
+ .project(relBuilder.field(1))
+ .build();
+
+ final String planBefore = NL + RelOptUtil.toString(plan);
+
+ HepProgram program = new HepProgramBuilder()
+ .addRuleInstance(join)
+ .build();
+
+ HepPlanner hepPlanner = new HepPlanner(program);
+ hepPlanner.setRoot(plan);
+ RelNode output = hepPlanner.findBestExp();
+
+ final String planAfter = NL + RelOptUtil.toString(output);
+ final DiffRepository diffRepos = getDiffRepos();
+ diffRepos.assertEquals("planBefore", "${planBefore}", planBefore);
+ diffRepos.assertEquals("planAfter", "${planAfter}", planAfter);
+ SqlToRelTestBase.assertValid(output);
+ }
+
@Test void testSemiJoinProjectTranspose() {
final RelBuilder relBuilder = RelBuilder.create(RelBuilderTest.config().build());
// build a rel equivalent to sql:
diff --git a/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml b/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
index 0005a68..1a131c1 100644
--- a/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
+++ b/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
@@ -8088,6 +8088,25 @@ LogicalProject(NAME=[$1])
]]>
</Resource>
</TestCase>
+ <TestCase name="testPushFilterSemijoin">
+ <Resource name="planBefore">
+ <![CDATA[
+LogicalProject(DNAME=[$1])
+ LogicalJoin(condition=[AND(=($0, $10), =($8, 100))], joinType=[semi])
+ LogicalTableScan(table=[[scott, DEPT]])
+ LogicalTableScan(table=[[scott, EMP]])
+]]>
+ </Resource>
+ <Resource name="planAfter">
+ <![CDATA[
+LogicalProject(DNAME=[$1])
+ LogicalJoin(condition=[=($0, $10)], joinType=[semi])
+ LogicalTableScan(table=[[scott, DEPT]])
+ LogicalFilter(condition=[=($5, 100)])
+ LogicalTableScan(table=[[scott, EMP]])
+]]>
+ </Resource>
+ </TestCase>
<TestCase name="testPushFilterThroughOuterJoin">
<Resource name="sql">
<![CDATA[select 1 from sales.dept d left outer join sales.emp e on d.deptno = e.deptno where d.name = 'Charlie']]>