You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@calcite.apache.org by li...@apache.org on 2022/09/01 02:50:52 UTC

[calcite] branch main updated: [CALCITE-5247] FilterJoinRule cannot simplify left join to inner join for `WHERE RHS.C1 IS NOT NULL OR RHS.C2 IS NOT NULL`

This is an automated email from the ASF dual-hosted git repository.

libenchao pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/calcite.git


The following commit(s) were added to refs/heads/main by this push:
     new fe14559360 [CALCITE-5247] FilterJoinRule cannot simplify left join to inner join for `WHERE RHS.C1 IS NOT NULL OR RHS.C2 IS NOT NULL`
fe14559360 is described below

commit fe14559360f6b72aa8c427add63450ead20f037b
Author: almansour <Al...@murex.com>
AuthorDate: Sat Aug 27 19:03:28 2022 +0300

    [CALCITE-5247] FilterJoinRule cannot simplify left join to inner join for `WHERE RHS.C1 IS NOT NULL OR RHS.C2 IS NOT NULL`
    
    This closes #2887
---
 .../main/java/org/apache/calcite/plan/Strong.java  | 27 +++++++++-
 .../org/apache/calcite/rex/RexProgramTest.java     |  3 ++
 .../org/apache/calcite/test/RelOptRulesTest.java   | 40 +++++++++++++++
 .../org/apache/calcite/test/RelOptRulesTest.xml    | 60 +++++++++++++++++++++-
 core/src/test/resources/sql/sub-query.iq           |  9 ++--
 5 files changed, 131 insertions(+), 8 deletions(-)

diff --git a/core/src/main/java/org/apache/calcite/plan/Strong.java b/core/src/main/java/org/apache/calcite/plan/Strong.java
index 52b7f2686e..5e6612d520 100644
--- a/core/src/main/java/org/apache/calcite/plan/Strong.java
+++ b/core/src/main/java/org/apache/calcite/plan/Strong.java
@@ -152,13 +152,38 @@ public class Strong {
   /** Returns whether an expression is definitely not true. */
   public boolean isNotTrue(RexNode node) {
     switch (node.getKind()) {
+    //TODO Enrich with more possible cases?
     case IS_NOT_NULL:
-      return anyNull(((RexCall) node).getOperands());
+      return isNull(((RexCall) node).getOperands().get(0));
+    case OR:
+      return allNotTrue(((RexCall) node).getOperands());
+    case AND:
+      return anyNotTrue(((RexCall) node).getOperands());
     default:
       return isNull(node);
     }
   }
 
+  /** Returns whether all expressions in a list are definitely not true. */
+  private boolean allNotTrue(List<RexNode> operands) {
+    for (RexNode operand : operands) {
+      if (!isNotTrue(operand)) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  /** Returns whether any expressions in a list are definitely not true. */
+  private boolean anyNotTrue(List<RexNode> operands) {
+    for (RexNode operand : operands) {
+      if (isNotTrue(operand)) {
+        return true;
+      }
+    }
+    return false;
+  }
+
   /** Returns whether an expression is definitely null.
    *
    * <p>The answer is based on calls to {@link #isNull} for its constituent
diff --git a/core/src/test/java/org/apache/calcite/rex/RexProgramTest.java b/core/src/test/java/org/apache/calcite/rex/RexProgramTest.java
index b3aea34139..bd548eb311 100644
--- a/core/src/test/java/org/apache/calcite/rex/RexProgramTest.java
+++ b/core/src/test/java/org/apache/calcite/rex/RexProgramTest.java
@@ -405,8 +405,11 @@ class RexProgramTest extends RexProgramTestBase {
 
     // If i0 is null, then "i0 is not null" is false
     RexNode i0NotNull = isNotNull(i0);
+    RexNode i1NotNull = isNotNull(i1);
     assertThat(Strong.isNull(i0NotNull, c0), is(false));
     assertThat(Strong.isNotTrue(i0NotNull, c0), is(true));
+    assertThat(Strong.isNotTrue(or(i0NotNull, i1NotNull), c01), is(true));
+    assertThat(Strong.isNotTrue(and(i0NotNull, i1NotNull), c1), is(true));
 
     // If i0 is null, then "not(i0 is not null)" is true.
     // Join-strengthening relies on this.
diff --git a/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java b/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java
index 6cb5d81e6b..a5cdf6c0a2 100644
--- a/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java
+++ b/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java
@@ -7145,6 +7145,46 @@ class RelOptRulesTest extends RelOptTestBase {
         .checkUnchanged();
   }
 
+  /**
+   * Test case for <a href="https://issues.apache.org/jira/browse/CALCITE-5247">[CALCITE-5247]
+   *    * FilterJoinRule cannot simplify left join to inner join for `WHERE RHS.C1 IS NOT NULL OR
+   *    RHS.C2 IS NOT NULL`</a>.
+   *
+   *    This tests the case where the condition contains an OR between the IS NOT NULL filters
+   */
+  @Test void testFilterJoinRuleOrIsNotNull() {
+    final String sql = "select * from\n"
+        + "emp LHS\n"
+        + "left join dept RHS on LHS.EMPNO = RHS.DEPTNO\n"
+        + "where\n"
+        + "RHS.DEPTNO is not null\n"
+        + "OR RHS.NAME is not null";
+
+    sql(sql)
+        .withRule(CoreRules.FILTER_INTO_JOIN)
+        .check();
+  }
+
+  /**
+   * Test case for <a href="https://issues.apache.org/jira/browse/CALCITE-5247">[CALCITE-5247]
+   *    * FilterJoinRule cannot simplify left join to inner join for `WHERE RHS.C1 IS NOT NULL OR
+   *    RHS.C2 IS NOT NULL`</a>.
+   *
+   *    This tests the case where the condition contains an AND between the IS NOT NULL filters
+   */
+  @Test void testFilterJoinRuleAndIsNotNull() {
+    final String sql = "select * from\n"
+        + "emp LHS\n"
+        + "left join dept RHS on LHS.EMPNO = RHS.DEPTNO\n"
+        + "where\n"
+        + "RHS.DEPTNO is not null\n"
+        + "AND RHS.NAME is not null";
+
+    sql(sql)
+        .withRule(CoreRules.FILTER_INTO_JOIN)
+        .check();
+  }
+
   @Test void testJoinCommuteRuleWithAlwaysTrueConditionAllowed() {
     checkJoinCommuteRuleWithAlwaysTrueConditionDisallowed(true);
   }
diff --git a/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml b/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
index a3ad0a8f6f..5426348933 100644
--- a/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
+++ b/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
@@ -3916,6 +3916,62 @@ MyProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5], C
 MultiJoin(joinFilter=[true], isFullOuterJoin=[false], joinTypes=[[INNER, LEFT]], outerJoinConditions=[[NULL, =($7, $9)]], projFields=[[{0, 1, 2, 3, 4, 5, 6, 7, 8}, {0, 1}]], postJoinFilter=[>($9, 3)])
   LogicalTableScan(table=[[CATALOG, SALES, EMP]])
   LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+]]>
+    </Resource>
+  </TestCase>
+  <TestCase name="testFilterJoinRuleAndIsNotNull">
+    <Resource name="sql">
+      <![CDATA[select * from
+emp LHS
+left join dept RHS on LHS.EMPNO = RHS.DEPTNO
+where
+RHS.DEPTNO is not null
+AND LHS.EMPNO is not null]]>
+    </Resource>
+    <Resource name="planBefore">
+      <![CDATA[
+LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8], DEPTNO0=[$9], NAME=[$10])
+  LogicalFilter(condition=[AND(IS NOT NULL($9), IS NOT NULL($10))])
+    LogicalJoin(condition=[=($0, $9)], joinType=[left])
+      LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+      LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+]]>
+    </Resource>
+    <Resource name="planAfter">
+      <![CDATA[
+LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8], DEPTNO0=[$9], NAME=[$10])
+  LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8], DEPTNO0=[CAST($9):INTEGER], NAME=[CAST($10):VARCHAR(10)])
+    LogicalJoin(condition=[=($0, $9)], joinType=[inner])
+      LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+      LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+]]>
+    </Resource>
+  </TestCase>
+  <TestCase name="testFilterJoinRuleOrIsNotNull">
+    <Resource name="sql">
+      <![CDATA[select * from
+emp LHS
+left join dept RHS on LHS.EMPNO = RHS.DEPTNO
+where
+RHS.DEPTNO is not null
+OR RHS.NAME is not null]]>
+    </Resource>
+    <Resource name="planBefore">
+      <![CDATA[
+LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8], DEPTNO0=[$9], NAME=[$10])
+  LogicalFilter(condition=[OR(IS NOT NULL($9), IS NOT NULL($10))])
+    LogicalJoin(condition=[=($0, $9)], joinType=[left])
+      LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+      LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+]]>
+    </Resource>
+    <Resource name="planAfter">
+      <![CDATA[
+LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8], DEPTNO0=[$9], NAME=[$10])
+  LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8], DEPTNO0=[CAST($9):INTEGER], NAME=[CAST($10):VARCHAR(10)])
+    LogicalJoin(condition=[=($0, $9)], joinType=[inner])
+      LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+      LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
 ]]>
     </Resource>
   </TestCase>
@@ -14046,8 +14102,8 @@ LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$
     <Resource name="planAfter">
       <![CDATA[
 LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8])
-  LogicalFilter(condition=[OR(AND(>($0, $9), IS NOT TRUE(OR(IS NULL($12), =($10, 0)))), AND(>($0, $9), IS NOT TRUE(OR(IS NULL($12), =($10, 0))), IS NOT TRUE(>($0, $9)), IS NOT TRUE(>($10, $11))))])
-    LogicalJoin(condition=[=($2, $13)], joinType=[left])
+  LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8], m=[CAST($9):INTEGER], c=[CAST($10):BIGINT], d=[CAST($11):BIGINT], trueLiteral=[CAST($12):BOOLEAN], NAME=[CAST($13):VARCHAR(10)])
+    LogicalJoin(condition=[AND(=($2, $13), OR(AND(>($0, $9), IS NOT TRUE(OR(IS NULL($12), =($10, 0)))), AND(>($0, $9), IS NOT TRUE(OR(IS NULL($12), =($10, 0))), IS NOT TRUE(>($0, $9)), IS NOT TRUE(>($10, $11)))))], joinType=[inner])
       LogicalTableScan(table=[[CATALOG, SALES, EMP]])
       LogicalProject(m=[$1], c=[$2], d=[$2], trueLiteral=[true], NAME=[$0])
         LogicalAggregate(group=[{0}], m=[MIN($1)], c=[COUNT()])
diff --git a/core/src/test/resources/sql/sub-query.iq b/core/src/test/resources/sql/sub-query.iq
index 0664f6b78c..c50ab43c0f 100644
--- a/core/src/test/resources/sql/sub-query.iq
+++ b/core/src/test/resources/sql/sub-query.iq
@@ -2053,11 +2053,10 @@ EnumerableAggregate(group=[{}], C=[COUNT()])
 select empno from "scott".emp as e
 where e.empno > ANY(
   select 2 from "scott".dept e2 where e2.deptno = e.deptno) ;
-EnumerableCalc(expr#0..6=[{inputs}], expr#7=[>($t0, $t2)], expr#8=[IS NULL($t5)], expr#9=[0], expr#10=[=($t3, $t9)], expr#11=[OR($t8, $t10)], expr#12=[IS NOT TRUE($t11)], expr#13=[AND($t7, $t12)], expr#14=[IS NOT TRUE($t7)], expr#15=[>($t3, $t4)], expr#16=[IS NOT TRUE($t15)], expr#17=[AND($t7, $t12, $t14, $t16)], expr#18=[OR($t13, $t17)], EMPNO=[$t0], $condition=[$t18])
-  EnumerableMergeJoin(condition=[=($1, $6)], joinType=[left])
-    EnumerableSort(sort0=[$1], dir0=[ASC])
-      EnumerableCalc(expr#0..7=[{inputs}], EMPNO=[$t0], DEPTNO=[$t7])
-        EnumerableTableScan(table=[[scott, EMP]])
+EnumerableCalc(expr#0..1=[{inputs}], EMPNO=[$t0])
+  EnumerableHashJoin(condition=[AND(=($1, $6), OR(AND(>($0, $2), <>($3, 0)), AND(>($0, $2), <>($3, 0), IS NOT TRUE(>($0, $2)), IS NOT TRUE(>($3, $4)))))], joinType=[semi])
+    EnumerableCalc(expr#0..7=[{inputs}], EMPNO=[$t0], DEPTNO=[$t7])
+      EnumerableTableScan(table=[[scott, EMP]])
     EnumerableCalc(expr#0..2=[{inputs}], expr#3=[2], expr#4=[1:BIGINT], expr#5=[true], m=[$t3], c=[$t4], d=[$t4], trueLiteral=[$t5], DEPTNO=[$t0])
       EnumerableTableScan(table=[[scott, DEPT]])
 !plan