You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@calcite.apache.org by jh...@apache.org on 2022/09/24 05:13:08 UTC
[calcite] 01/04: [CALCITE-5294] Prune the null-generating side of an outer join if it is empty
This is an automated email from the ASF dual-hosted git repository.
jhyde pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/calcite.git
commit 457927ada244618c4752099d8d5d65d6d5fe425f
Author: kasakrisz <ka...@gmail.com>
AuthorDate: Wed Sep 21 08:38:36 2022 +0200
[CALCITE-5294] Prune the null-generating side of an outer join if it is empty
Improve the join rules in PruneEmptyRules to remove a Join
if it generates null and one branch is empty.
Close apache/calcite#2916
Co-authored-by: Julian Hyde <jh...@apache.org>
---
.../apache/calcite/rel/rules/PruneEmptyRules.java | 72 +++++++++++++++-------
.../org/apache/calcite/test/RelOptRulesTest.xml | 12 ++--
core/src/test/resources/sql/sub-query.iq | 14 ++---
3 files changed, 57 insertions(+), 41 deletions(-)
diff --git a/core/src/main/java/org/apache/calcite/rel/rules/PruneEmptyRules.java b/core/src/main/java/org/apache/calcite/rel/rules/PruneEmptyRules.java
index cf8884a091..a5977f5ae6 100644
--- a/core/src/main/java/org/apache/calcite/rel/rules/PruneEmptyRules.java
+++ b/core/src/main/java/org/apache/calcite/rel/rules/PruneEmptyRules.java
@@ -47,6 +47,8 @@ import java.util.Collections;
import java.util.List;
import java.util.function.Predicate;
+import static com.google.common.collect.Iterables.concat;
+
/**
* Collection of rules which remove sections of a query plan known never to
* produce any rows.
@@ -371,11 +373,11 @@ public abstract class PruneEmptyRules {
final Union union = call.rel(0);
final List<RelNode> inputs = union.getInputs();
assert inputs != null;
- final RelBuilder builder = call.builder();
+ final RelBuilder relBuilder = call.builder();
int nonEmptyInputs = 0;
for (RelNode input : inputs) {
if (!isEmpty(input)) {
- builder.push(input);
+ relBuilder.push(input);
nonEmptyInputs++;
}
}
@@ -383,12 +385,12 @@ public abstract class PruneEmptyRules {
: "planner promised us at least one Empty child: "
+ RelOptUtil.toString(union);
if (nonEmptyInputs == 0) {
- builder.push(union).empty();
+ relBuilder.push(union).empty();
} else {
- builder.union(union.all, nonEmptyInputs);
- builder.convert(union.getRowType(), true);
+ relBuilder.union(union.all, nonEmptyInputs);
+ relBuilder.convert(union.getRowType(), true);
}
- call.transformTo(builder.build());
+ call.transformTo(relBuilder.build());
}
};
}
@@ -404,10 +406,10 @@ public abstract class PruneEmptyRules {
final List<RelNode> inputs = minus.getInputs();
assert inputs != null;
int nonEmptyInputs = 0;
- final RelBuilder builder = call.builder();
+ final RelBuilder relBuilder = call.builder();
for (RelNode input : inputs) {
if (!isEmpty(input)) {
- builder.push(input);
+ relBuilder.push(input);
nonEmptyInputs++;
} else if (nonEmptyInputs == 0) {
// If the first input of Minus is empty, the whole thing is
@@ -419,12 +421,12 @@ public abstract class PruneEmptyRules {
: "planner promised us at least one Empty child: "
+ RelOptUtil.toString(minus);
if (nonEmptyInputs == 0) {
- builder.push(minus).empty();
+ relBuilder.push(minus).empty();
} else {
- builder.minus(minus.all, nonEmptyInputs);
- builder.convert(minus.getRowType(), true);
+ relBuilder.minus(minus.all, nonEmptyInputs);
+ relBuilder.convert(minus.getRowType(), true);
}
- call.transformTo(builder.build());
+ call.transformTo(relBuilder.build());
}
};
}
@@ -439,9 +441,9 @@ public abstract class PruneEmptyRules {
return new PruneEmptyRule(this) {
@Override public void onMatch(RelOptRuleCall call) {
Intersect intersect = call.rel(0);
- final RelBuilder builder = call.builder();
- builder.push(intersect).empty();
- call.transformTo(builder.build());
+ final RelBuilder relBuilder = call.builder();
+ relBuilder.push(intersect).empty();
+ call.transformTo(relBuilder.build());
}
};
}
@@ -479,13 +481,25 @@ public abstract class PruneEmptyRules {
@Override default PruneEmptyRule toRule() {
return new PruneEmptyRule(this) {
@Override public void onMatch(RelOptRuleCall call) {
- Join join = call.rel(0);
+ final Join join = call.rel(0);
+ final Values empty = call.rel(1);
+ final RelNode right = call.rel(2);
+ final RelBuilder relBuilder = call.builder();
if (join.getJoinType().generatesNullsOnLeft()) {
- // "select * from emp right join dept" is not necessarily empty if
- // emp is empty
+ // If "emp" is empty, "select * from emp right join dept" will have
+ // the same number of rows as "dept", and null values for the
+ // columns from "emp". The left side of the join can be removed.
+ final List<RexLiteral> nullLiterals =
+ Collections.nCopies(empty.getRowType().getFieldCount(),
+ relBuilder.literal(null));
+ call.transformTo(
+ relBuilder.push(right)
+ .project(concat(nullLiterals, relBuilder.fields()))
+ .convert(join.getRowType(), true)
+ .build());
return;
}
- call.transformTo(call.builder().push(join).empty().build());
+ call.transformTo(relBuilder.push(join).empty().build());
}
};
}
@@ -498,10 +512,22 @@ public abstract class PruneEmptyRules {
@Override default PruneEmptyRule toRule() {
return new PruneEmptyRule(this) {
@Override public void onMatch(RelOptRuleCall call) {
- Join join = call.rel(0);
+ final Join join = call.rel(0);
+ final RelNode left = call.rel(1);
+ final Values empty = call.rel(2);
+ final RelBuilder relBuilder = call.builder();
if (join.getJoinType().generatesNullsOnRight()) {
- // "select * from emp left join dept" is not necessarily empty if
- // dept is empty
+ // If "dept" is empty, "select * from emp left join dept" will have
+ // the same number of rows as "emp", and null values for the
+ // columns from "dept". The right side of the join can be removed.
+ final List<RexLiteral> nullLiterals =
+ Collections.nCopies(empty.getRowType().getFieldCount(),
+ relBuilder.literal(null));
+ call.transformTo(
+ relBuilder.push(left)
+ .project(concat(relBuilder.fields(), nullLiterals))
+ .convert(join.getRowType(), true)
+ .build());
return;
}
if (join.getJoinType() == JoinRelType.ANTI) {
@@ -509,7 +535,7 @@ public abstract class PruneEmptyRules {
call.transformTo(join.getLeft());
return;
}
- call.transformTo(call.builder().push(join).empty().build());
+ call.transformTo(relBuilder.push(join).empty().build());
}
};
}
diff --git a/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml b/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
index 6b12293717..b146e735c5 100644
--- a/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
+++ b/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
@@ -5335,8 +5335,7 @@ LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$
<Resource name="planAfter">
<![CDATA[
LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8], DEPTNO0=[$9], NAME=[$10])
- LogicalJoin(condition=[=($7, $9)], joinType=[full])
- LogicalValues(tuples=[[]])
+ LogicalProject(EMPNO=[null:INTEGER], ENAME=[null:VARCHAR(20)], JOB=[null:VARCHAR(10)], MGR=[null:INTEGER], HIREDATE=[null:TIMESTAMP(0)], SAL=[null:INTEGER], COMM=[null:INTEGER], DEPTNO=[null:INTEGER], SLACKER=[null:BOOLEAN], DEPTNO0=[CAST($0):INTEGER], NAME=[CAST($1):VARCHAR(10)])
LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
]]>
</Resource>
@@ -5406,8 +5405,7 @@ LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$
<Resource name="planAfter">
<![CDATA[
LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8], DEPTNO0=[$9], NAME=[$10])
- LogicalJoin(condition=[=($7, $9)], joinType=[right])
- LogicalValues(tuples=[[]])
+ LogicalProject(EMPNO=[null:INTEGER], ENAME=[null:VARCHAR(20)], JOB=[null:VARCHAR(10)], MGR=[null:INTEGER], HIREDATE=[null:TIMESTAMP(0)], SAL=[null:INTEGER], COMM=[null:INTEGER], DEPTNO=[null:INTEGER], SLACKER=[null:BOOLEAN], DEPTNO0=[$0], NAME=[$1])
LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
]]>
</Resource>
@@ -11810,9 +11808,8 @@ LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$
<Resource name="planAfter">
<![CDATA[
LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8], DEPTNO0=[$9], NAME=[$10])
- LogicalJoin(condition=[=($7, $9)], joinType=[full])
+ LogicalProject(EMPNO=[CAST($0):INTEGER], ENAME=[CAST($1):VARCHAR(20)], JOB=[CAST($2):VARCHAR(10)], MGR=[$3], HIREDATE=[CAST($4):TIMESTAMP(0)], SAL=[CAST($5):INTEGER], COMM=[CAST($6):INTEGER], DEPTNO=[CAST($7):INTEGER], SLACKER=[CAST($8):BOOLEAN], DEPTNO0=[null:INTEGER], NAME=[null:VARCHAR(10)])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
- LogicalValues(tuples=[[]])
]]>
</Resource>
</TestCase>
@@ -11858,9 +11855,8 @@ LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$
<Resource name="planAfter">
<![CDATA[
LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8], DEPTNO0=[$9], NAME=[$10])
- LogicalJoin(condition=[=($7, $9)], joinType=[left])
+ LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8], DEPTNO0=[null:INTEGER], NAME=[null:VARCHAR(10)])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
- LogicalValues(tuples=[[]])
]]>
</Resource>
</TestCase>
diff --git a/core/src/test/resources/sql/sub-query.iq b/core/src/test/resources/sql/sub-query.iq
index 7772c743f7..e33e094b67 100644
--- a/core/src/test/resources/sql/sub-query.iq
+++ b/core/src/test/resources/sql/sub-query.iq
@@ -2613,11 +2613,8 @@ where unique (select comm from "scott".emp where comm is null);
!ok
-EnumerableCalc(expr#0..1=[{inputs}], expr#2=[IS NULL($t1)], DEPTNO=[$t0], $condition=[$t2])
- EnumerableNestedLoopJoin(condition=[true], joinType=[left])
- EnumerableCalc(expr#0..2=[{inputs}], DEPTNO=[$t0])
- EnumerableTableScan(table=[[scott, DEPT]])
- EnumerableValues(tuples=[[]])
+EnumerableCalc(expr#0..2=[{inputs}], DEPTNO=[$t0])
+ EnumerableTableScan(table=[[scott, DEPT]])
!plan
# Previous, as scalar sub-query.
@@ -2636,11 +2633,8 @@ from "scott".dept;
!ok
-EnumerableCalc(expr#0..1=[{inputs}], expr#2=[IS NULL($t1)], DEPTNO=[$t0], U=[$t2])
- EnumerableNestedLoopJoin(condition=[true], joinType=[left])
- EnumerableCalc(expr#0..2=[{inputs}], DEPTNO=[$t0])
- EnumerableTableScan(table=[[scott, DEPT]])
- EnumerableValues(tuples=[[]])
+EnumerableCalc(expr#0..2=[{inputs}], expr#3=[true], DEPTNO=[$t0], U=[$t3])
+ EnumerableTableScan(table=[[scott, DEPT]])
!plan
# composite keys have unique value which excludes fully or partially null rows.