You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@calcite.apache.org by jh...@apache.org on 2022/09/24 05:13:08 UTC

[calcite] 01/04: [CALCITE-5294] Prune the null-generating side of an outer join if it is empty

This is an automated email from the ASF dual-hosted git repository.

jhyde pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/calcite.git

commit 457927ada244618c4752099d8d5d65d6d5fe425f
Author: kasakrisz <ka...@gmail.com>
AuthorDate: Wed Sep 21 08:38:36 2022 +0200

    [CALCITE-5294] Prune the null-generating side of an outer join if it is empty
    
    Improve the join rules in PruneEmptyRules to remove a Join
    if it generates null and one branch is empty.
    
    Close apache/calcite#2916
    
    Co-authored-by: Julian Hyde <jh...@apache.org>
---
 .../apache/calcite/rel/rules/PruneEmptyRules.java  | 72 +++++++++++++++-------
 .../org/apache/calcite/test/RelOptRulesTest.xml    | 12 ++--
 core/src/test/resources/sql/sub-query.iq           | 14 ++---
 3 files changed, 57 insertions(+), 41 deletions(-)

diff --git a/core/src/main/java/org/apache/calcite/rel/rules/PruneEmptyRules.java b/core/src/main/java/org/apache/calcite/rel/rules/PruneEmptyRules.java
index cf8884a091..a5977f5ae6 100644
--- a/core/src/main/java/org/apache/calcite/rel/rules/PruneEmptyRules.java
+++ b/core/src/main/java/org/apache/calcite/rel/rules/PruneEmptyRules.java
@@ -47,6 +47,8 @@ import java.util.Collections;
 import java.util.List;
 import java.util.function.Predicate;
 
+import static com.google.common.collect.Iterables.concat;
+
 /**
  * Collection of rules which remove sections of a query plan known never to
  * produce any rows.
@@ -371,11 +373,11 @@ public abstract class PruneEmptyRules {
           final Union union = call.rel(0);
           final List<RelNode> inputs = union.getInputs();
           assert inputs != null;
-          final RelBuilder builder = call.builder();
+          final RelBuilder relBuilder = call.builder();
           int nonEmptyInputs = 0;
           for (RelNode input : inputs) {
             if (!isEmpty(input)) {
-              builder.push(input);
+              relBuilder.push(input);
               nonEmptyInputs++;
             }
           }
@@ -383,12 +385,12 @@ public abstract class PruneEmptyRules {
               : "planner promised us at least one Empty child: "
               + RelOptUtil.toString(union);
           if (nonEmptyInputs == 0) {
-            builder.push(union).empty();
+            relBuilder.push(union).empty();
           } else {
-            builder.union(union.all, nonEmptyInputs);
-            builder.convert(union.getRowType(), true);
+            relBuilder.union(union.all, nonEmptyInputs);
+            relBuilder.convert(union.getRowType(), true);
           }
-          call.transformTo(builder.build());
+          call.transformTo(relBuilder.build());
         }
       };
     }
@@ -404,10 +406,10 @@ public abstract class PruneEmptyRules {
           final List<RelNode> inputs = minus.getInputs();
           assert inputs != null;
           int nonEmptyInputs = 0;
-          final RelBuilder builder = call.builder();
+          final RelBuilder relBuilder = call.builder();
           for (RelNode input : inputs) {
             if (!isEmpty(input)) {
-              builder.push(input);
+              relBuilder.push(input);
               nonEmptyInputs++;
             } else if (nonEmptyInputs == 0) {
               // If the first input of Minus is empty, the whole thing is
@@ -419,12 +421,12 @@ public abstract class PruneEmptyRules {
               : "planner promised us at least one Empty child: "
               + RelOptUtil.toString(minus);
           if (nonEmptyInputs == 0) {
-            builder.push(minus).empty();
+            relBuilder.push(minus).empty();
           } else {
-            builder.minus(minus.all, nonEmptyInputs);
-            builder.convert(minus.getRowType(), true);
+            relBuilder.minus(minus.all, nonEmptyInputs);
+            relBuilder.convert(minus.getRowType(), true);
           }
-          call.transformTo(builder.build());
+          call.transformTo(relBuilder.build());
         }
       };
     }
@@ -439,9 +441,9 @@ public abstract class PruneEmptyRules {
       return new PruneEmptyRule(this) {
         @Override public void onMatch(RelOptRuleCall call) {
           Intersect intersect = call.rel(0);
-          final RelBuilder builder = call.builder();
-          builder.push(intersect).empty();
-          call.transformTo(builder.build());
+          final RelBuilder relBuilder = call.builder();
+          relBuilder.push(intersect).empty();
+          call.transformTo(relBuilder.build());
         }
       };
     }
@@ -479,13 +481,25 @@ public abstract class PruneEmptyRules {
     @Override default PruneEmptyRule toRule() {
       return new PruneEmptyRule(this) {
         @Override public void onMatch(RelOptRuleCall call) {
-          Join join = call.rel(0);
+          final Join join = call.rel(0);
+          final Values empty = call.rel(1);
+          final RelNode right = call.rel(2);
+          final RelBuilder relBuilder = call.builder();
           if (join.getJoinType().generatesNullsOnLeft()) {
-            // "select * from emp right join dept" is not necessarily empty if
-            // emp is empty
+            // If "emp" is empty, "select * from emp right join dept" will have
+            // the same number of rows as "dept", and null values for the
+            // columns from "emp". The left side of the join can be removed.
+            final List<RexLiteral> nullLiterals =
+                Collections.nCopies(empty.getRowType().getFieldCount(),
+                    relBuilder.literal(null));
+            call.transformTo(
+                relBuilder.push(right)
+                    .project(concat(nullLiterals, relBuilder.fields()))
+                    .convert(join.getRowType(), true)
+                    .build());
             return;
           }
-          call.transformTo(call.builder().push(join).empty().build());
+          call.transformTo(relBuilder.push(join).empty().build());
         }
       };
     }
@@ -498,10 +512,22 @@ public abstract class PruneEmptyRules {
     @Override default PruneEmptyRule toRule() {
       return new PruneEmptyRule(this) {
         @Override public void onMatch(RelOptRuleCall call) {
-          Join join = call.rel(0);
+          final Join join = call.rel(0);
+          final RelNode left = call.rel(1);
+          final Values empty = call.rel(2);
+          final RelBuilder relBuilder = call.builder();
           if (join.getJoinType().generatesNullsOnRight()) {
-            // "select * from emp left join dept" is not necessarily empty if
-            // dept is empty
+            // If "dept" is empty, "select * from emp left join dept" will have
+            // the same number of rows as "emp", and null values for the
+            // columns from "dept". The right side of the join can be removed.
+            final List<RexLiteral> nullLiterals =
+                Collections.nCopies(empty.getRowType().getFieldCount(),
+                    relBuilder.literal(null));
+            call.transformTo(
+                relBuilder.push(left)
+                    .project(concat(relBuilder.fields(), nullLiterals))
+                    .convert(join.getRowType(), true)
+                    .build());
             return;
           }
           if (join.getJoinType() == JoinRelType.ANTI) {
@@ -509,7 +535,7 @@ public abstract class PruneEmptyRules {
             call.transformTo(join.getLeft());
             return;
           }
-          call.transformTo(call.builder().push(join).empty().build());
+          call.transformTo(relBuilder.push(join).empty().build());
         }
       };
     }
diff --git a/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml b/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
index 6b12293717..b146e735c5 100644
--- a/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
+++ b/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
@@ -5335,8 +5335,7 @@ LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$
     <Resource name="planAfter">
       <![CDATA[
 LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8], DEPTNO0=[$9], NAME=[$10])
-  LogicalJoin(condition=[=($7, $9)], joinType=[full])
-    LogicalValues(tuples=[[]])
+  LogicalProject(EMPNO=[null:INTEGER], ENAME=[null:VARCHAR(20)], JOB=[null:VARCHAR(10)], MGR=[null:INTEGER], HIREDATE=[null:TIMESTAMP(0)], SAL=[null:INTEGER], COMM=[null:INTEGER], DEPTNO=[null:INTEGER], SLACKER=[null:BOOLEAN], DEPTNO0=[CAST($0):INTEGER], NAME=[CAST($1):VARCHAR(10)])
     LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
 ]]>
     </Resource>
@@ -5406,8 +5405,7 @@ LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$
     <Resource name="planAfter">
       <![CDATA[
 LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8], DEPTNO0=[$9], NAME=[$10])
-  LogicalJoin(condition=[=($7, $9)], joinType=[right])
-    LogicalValues(tuples=[[]])
+  LogicalProject(EMPNO=[null:INTEGER], ENAME=[null:VARCHAR(20)], JOB=[null:VARCHAR(10)], MGR=[null:INTEGER], HIREDATE=[null:TIMESTAMP(0)], SAL=[null:INTEGER], COMM=[null:INTEGER], DEPTNO=[null:INTEGER], SLACKER=[null:BOOLEAN], DEPTNO0=[$0], NAME=[$1])
     LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
 ]]>
     </Resource>
@@ -11810,9 +11808,8 @@ LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$
     <Resource name="planAfter">
       <![CDATA[
 LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8], DEPTNO0=[$9], NAME=[$10])
-  LogicalJoin(condition=[=($7, $9)], joinType=[full])
+  LogicalProject(EMPNO=[CAST($0):INTEGER], ENAME=[CAST($1):VARCHAR(20)], JOB=[CAST($2):VARCHAR(10)], MGR=[$3], HIREDATE=[CAST($4):TIMESTAMP(0)], SAL=[CAST($5):INTEGER], COMM=[CAST($6):INTEGER], DEPTNO=[CAST($7):INTEGER], SLACKER=[CAST($8):BOOLEAN], DEPTNO0=[null:INTEGER], NAME=[null:VARCHAR(10)])
     LogicalTableScan(table=[[CATALOG, SALES, EMP]])
-    LogicalValues(tuples=[[]])
 ]]>
     </Resource>
   </TestCase>
@@ -11858,9 +11855,8 @@ LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$
     <Resource name="planAfter">
       <![CDATA[
 LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8], DEPTNO0=[$9], NAME=[$10])
-  LogicalJoin(condition=[=($7, $9)], joinType=[left])
+  LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8], DEPTNO0=[null:INTEGER], NAME=[null:VARCHAR(10)])
     LogicalTableScan(table=[[CATALOG, SALES, EMP]])
-    LogicalValues(tuples=[[]])
 ]]>
     </Resource>
   </TestCase>
diff --git a/core/src/test/resources/sql/sub-query.iq b/core/src/test/resources/sql/sub-query.iq
index 7772c743f7..e33e094b67 100644
--- a/core/src/test/resources/sql/sub-query.iq
+++ b/core/src/test/resources/sql/sub-query.iq
@@ -2613,11 +2613,8 @@ where unique (select comm from "scott".emp where comm is null);
 
 !ok
 
-EnumerableCalc(expr#0..1=[{inputs}], expr#2=[IS NULL($t1)], DEPTNO=[$t0], $condition=[$t2])
-  EnumerableNestedLoopJoin(condition=[true], joinType=[left])
-    EnumerableCalc(expr#0..2=[{inputs}], DEPTNO=[$t0])
-      EnumerableTableScan(table=[[scott, DEPT]])
-    EnumerableValues(tuples=[[]])
+EnumerableCalc(expr#0..2=[{inputs}], DEPTNO=[$t0])
+  EnumerableTableScan(table=[[scott, DEPT]])
 !plan
 
 # Previous, as scalar sub-query.
@@ -2636,11 +2633,8 @@ from "scott".dept;
 
 !ok
 
-EnumerableCalc(expr#0..1=[{inputs}], expr#2=[IS NULL($t1)], DEPTNO=[$t0], U=[$t2])
-  EnumerableNestedLoopJoin(condition=[true], joinType=[left])
-    EnumerableCalc(expr#0..2=[{inputs}], DEPTNO=[$t0])
-      EnumerableTableScan(table=[[scott, DEPT]])
-    EnumerableValues(tuples=[[]])
+EnumerableCalc(expr#0..2=[{inputs}], expr#3=[true], DEPTNO=[$t0], U=[$t3])
+  EnumerableTableScan(table=[[scott, DEPT]])
 !plan
 
 # composite keys have unique value which excludes fully or partially null rows.