You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@calcite.apache.org by ru...@apache.org on 2020/08/24 09:17:32 UTC
[calcite] branch master updated: [CALCITE-4113] Support LEFT join
in EnumerableMergeJoin
This is an automated email from the ASF dual-hosted git repository.
rubenql pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/calcite.git
The following commit(s) were added to refs/heads/master by this push:
new 672ed7a [CALCITE-4113] Support LEFT join in EnumerableMergeJoin
672ed7a is described below
commit 672ed7a1d0dbf87760d37e52b424f16bc8c43b4d
Author: rubenada <ru...@gmail.com>
AuthorDate: Wed Aug 12 17:14:57 2020 +0100
[CALCITE-4113] Support LEFT join in EnumerableMergeJoin
---
.../apache/calcite/runtime/EnumerablesTest.java | 188 ++++++++++++++++++++-
.../java/org/apache/calcite/test/JdbcTest.java | 12 +-
.../test/enumerable/EnumerableCorrelateTest.java | 1 +
.../test/enumerable/EnumerableHashJoinTest.java | 4 +
core/src/test/resources/sql/blank.iq | 24 +--
core/src/test/resources/sql/misc.iq | 36 ++--
core/src/test/resources/sql/sub-query.iq | 58 ++++---
.../apache/calcite/linq4j/EnumerableDefaults.java | 93 +++++-----
8 files changed, 317 insertions(+), 99 deletions(-)
diff --git a/core/src/test/java/org/apache/calcite/runtime/EnumerablesTest.java b/core/src/test/java/org/apache/calcite/runtime/EnumerablesTest.java
index 20ed84a..f615d92 100644
--- a/core/src/test/java/org/apache/calcite/runtime/EnumerablesTest.java
+++ b/core/src/test/java/org/apache/calcite/runtime/EnumerablesTest.java
@@ -212,6 +212,43 @@ class EnumerablesTest {
newArrayList(1, 1, 4, 4),
equalTo("[3]"),
JoinType.ANTI);
+
+ // LEFT join tests:
+ // Matching keys at start
+ testIntersect(
+ newArrayList(1, 3, 4),
+ newArrayList(1, 4),
+ equalTo("[1-1, 3-null, 4-4]"),
+ equalTo("[1-1, 3-null, 4-4, null-null]"),
+ JoinType.LEFT);
+ // Matching key at start and end of right, not of left
+ testIntersect(
+ newArrayList(0, 1, 3, 4, 5),
+ newArrayList(1, 4),
+ equalTo("[0-null, 1-1, 3-null, 4-4, 5-null]"),
+ equalTo("[0-null, 1-1, 3-null, 4-4, 5-null, null-null]"),
+ JoinType.LEFT);
+ // Matching key at start and end of left, not right
+ testIntersect(
+ newArrayList(1, 3, 4),
+ newArrayList(0, 1, 4, 5),
+ equalTo("[1-1, 3-null, 4-4]"),
+ equalTo("[1-1, 3-null, 4-4, null-null]"),
+ JoinType.LEFT);
+ // Matching key not at start or end of left or right
+ testIntersect(
+ newArrayList(0, 2, 3, 4, 5),
+ newArrayList(1, 3, 4, 6),
+ equalTo("[0-null, 2-null, 3-3, 4-4, 5-null]"),
+ equalTo("[0-null, 2-null, 3-3, 4-4, 5-null, null-null]"),
+ JoinType.LEFT);
+ // Matching duplicated keys
+ testIntersect(
+ newArrayList(1, 3, 4),
+ newArrayList(1, 1, 4, 4),
+ equalTo("[1-1, 1-1, 3-null, 4-4, 4-4]"),
+ equalTo("[1-1, 1-1, 3-null, 4-4, 4-4, null-null]"),
+ JoinType.LEFT);
}
@Test void testMergeJoin3() {
@@ -268,21 +305,57 @@ class EnumerablesTest {
new ArrayList<>(),
equalTo("[]"),
JoinType.ANTI);
+
+ // LEFT join tests:
+ // No overlap
+ testIntersect(
+ newArrayList(0, 2, 4),
+ newArrayList(1, 3, 5),
+ equalTo("[0-null, 2-null, 4-null]"),
+ equalTo("[0-null, 2-null, 4-null, null-null]"),
+ JoinType.LEFT);
+ // Left empty
+ testIntersect(
+ new ArrayList<>(),
+ newArrayList(1, 3, 4, 6),
+ equalTo("[]"),
+ equalTo("[null-null]"),
+ JoinType.LEFT);
+ // Right empty
+ testIntersect(
+ newArrayList(3, 7),
+ new ArrayList<>(),
+ equalTo("[3-null, 7-null]"),
+ equalTo("[3-null, 7-null, null-null]"),
+ JoinType.LEFT);
+ // Both empty
+ testIntersect(
+ new ArrayList<Integer>(),
+ new ArrayList<>(),
+ equalTo("[]"),
+ equalTo("[null-null]"),
+ JoinType.LEFT);
}
private static <T extends Comparable<T>> void testIntersect(
List<T> list0, List<T> list1, org.hamcrest.Matcher<String> matcher, JoinType joinType) {
+ testIntersect(list0, list1, matcher, matcher, joinType);
+ }
+
+ private static <T extends Comparable<T>> void testIntersect(
+ List<T> list0, List<T> list1, org.hamcrest.Matcher<String> matcher,
+ org.hamcrest.Matcher<String> matcherNullLeft, JoinType joinType) {
assertThat(
intersect(list0, list1, joinType).toList().toString(),
matcher);
- // Repeat test with nulls at the end of left / right: result should not be impacted
+ // Repeat test with nulls at the end of left / right
// Null at the end of left
list0.add(null);
assertThat(
intersect(list0, list1, joinType).toList().toString(),
- matcher);
+ matcherNullLeft);
// Null at the end of right
list0.remove(list0.size() - 1);
@@ -295,17 +368,27 @@ class EnumerablesTest {
list0.add(null);
assertThat(
intersect(list0, list1, joinType).toList().toString(),
- matcher);
+ matcherNullLeft);
}
- private static <T extends Comparable<T>> Enumerable<T> intersect(
+ private static <T extends Comparable<T>> Enumerable<String> intersect(
List<T> list0, List<T> list1, JoinType joinType) {
+ if (joinType == JoinType.LEFT) {
+ return EnumerableDefaults.mergeJoin(
+ Linq4j.asEnumerable(list0),
+ Linq4j.asEnumerable(list1),
+ Functions.identitySelector(),
+ Functions.identitySelector(),
+ (v0, v1) -> String.valueOf(v0) + "-" + String.valueOf(v1),
+ JoinType.LEFT,
+ null);
+ }
return EnumerableDefaults.mergeJoin(
Linq4j.asEnumerable(list0),
Linq4j.asEnumerable(list1),
Functions.identitySelector(),
Functions.identitySelector(),
- (v0, v1) -> v0,
+ (v0, v1) -> String.valueOf(v0),
joinType,
null);
}
@@ -550,6 +633,101 @@ class EnumerablesTest {
equalTo("[Emp(30, Fred), Emp(20, Sebastian), Emp(20, Zoey)]"));
}
+ @Test void testMergeLeftJoin() {
+ assertThat(
+ EnumerableDefaults.mergeJoin(
+ Linq4j.asEnumerable(
+ Arrays.asList(
+ new Dept(10, "Marketing"),
+ new Dept(20, "Sales"),
+ new Dept(25, "HR"),
+ new Dept(30, "Research"),
+ new Dept(40, "Development"))),
+ Linq4j.asEnumerable(
+ Arrays.asList(
+ new Emp(10, "Fred"),
+ new Emp(20, "Theodore"),
+ new Emp(20, "Sebastian"),
+ new Emp(30, "Joe"),
+ new Emp(30, "Greg"),
+ new Emp(50, "Mary"))),
+ d -> d.deptno,
+ e -> e.deptno,
+ null,
+ (v0, v1) -> String.valueOf(v0) + "-" + String.valueOf(v1),
+ JoinType.LEFT,
+ null).toList().toString(), equalTo("[Dept(10, Marketing)-Emp(10, Fred),"
+ + " Dept(20, Sales)-Emp(20, Theodore),"
+ + " Dept(20, Sales)-Emp(20, Sebastian),"
+ + " Dept(25, HR)-null,"
+ + " Dept(30, Research)-Emp(30, Joe),"
+ + " Dept(30, Research)-Emp(30, Greg),"
+ + " Dept(40, Development)-null]"));
+ }
+
+ @Test void testMergeLeftJoinWithPredicate() {
+ assertThat(
+ EnumerableDefaults.mergeJoin(
+ Linq4j.asEnumerable(
+ Arrays.asList(
+ new Dept(10, "Marketing"),
+ new Dept(20, "Sales"),
+ new Dept(25, "HR"),
+ new Dept(30, "Research"),
+ new Dept(40, "Development"))),
+ Linq4j.asEnumerable(
+ Arrays.asList(
+ new Emp(10, "Fred"),
+ new Emp(20, "Theodore"),
+ new Emp(20, "Sebastian"),
+ new Emp(30, "Joe"),
+ new Emp(30, "Greg"),
+ new Emp(50, "Mary"))),
+ d -> d.deptno,
+ e -> e.deptno,
+ (d, e) -> e.name.contains("a"),
+ (v0, v1) -> String.valueOf(v0) + "-" + String.valueOf(v1),
+ JoinType.LEFT,
+ null).toList().toString(), equalTo("[Dept(10, Marketing)-null,"
+ + " Dept(20, Sales)-Emp(20, Sebastian),"
+ + " Dept(25, HR)-null,"
+ + " Dept(30, Research)-null,"
+ + " Dept(40, Development)-null]"));
+ }
+
+ @Test void testMergeLeftJoinWithNullKeys() {
+ assertThat(
+ EnumerableDefaults.mergeJoin(
+ Linq4j.asEnumerable(
+ Arrays.asList(
+ new Emp(30, "Fred"),
+ new Emp(20, "Sebastian"),
+ new Emp(30, "Theodore"),
+ new Emp(20, "Zoey"),
+ new Emp(40, null),
+ new Emp(30, null))),
+ Linq4j.asEnumerable(
+ Arrays.asList(
+ new Dept(15, "Marketing"),
+ new Dept(20, "Sales"),
+ new Dept(30, "Theodore"),
+ new Dept(25, "Theodore"),
+ new Dept(33, "Zoey"),
+ new Dept(40, null))),
+ e -> e.name,
+ d -> d.name,
+ (e, d) -> e.name.startsWith("T"),
+ (v0, v1) -> String.valueOf(v0) + "-" + String.valueOf(v1),
+ JoinType.LEFT,
+ null).toList().toString(), equalTo("[Emp(30, Fred)-null,"
+ + " Emp(20, Sebastian)-null,"
+ + " Emp(30, Theodore)-Dept(30, Theodore),"
+ + " Emp(30, Theodore)-Dept(25, Theodore),"
+ + " Emp(20, Zoey)-null,"
+ + " Emp(40, null)-null,"
+ + " Emp(30, null)-null]"));
+ }
+
@Test void testNestedLoopJoin() {
assertThat(
EnumerableDefaults.nestedLoopJoin(EMPS, DEPTS, EMP_DEPT_EQUAL_DEPTNO,
diff --git a/core/src/test/java/org/apache/calcite/test/JdbcTest.java b/core/src/test/java/org/apache/calcite/test/JdbcTest.java
index 2322d89..aae1f46 100644
--- a/core/src/test/java/org/apache/calcite/test/JdbcTest.java
+++ b/core/src/test/java/org/apache/calcite/test/JdbcTest.java
@@ -4922,11 +4922,13 @@ public class JdbcTest {
+ " where e.\"deptno\"=\"depts\".\"deptno\") on true";
final String explain = ""
+ "EnumerableCalc(expr#0..6=[{inputs}], proj#0..4=[{exprs}], I=[$t6])\n"
- + " EnumerableHashJoin(condition=[=($1, $5)], joinType=[left])\n"
- + " EnumerableTableScan(table=[[hr, emps]])\n"
- + " EnumerableCalc(expr#0=[{inputs}], expr#1=[true], proj#0..1=[{exprs}])\n"
- + " EnumerableAggregate(group=[{0}])\n"
- + " EnumerableTableScan(table=[[hr, depts]])";
+ + " EnumerableMergeJoin(condition=[=($1, $5)], joinType=[left])\n"
+ + " EnumerableSort(sort0=[$1], dir0=[ASC])\n"
+ + " EnumerableTableScan(table=[[hr, emps]])\n"
+ + " EnumerableSort(sort0=[$0], dir0=[ASC])\n"
+ + " EnumerableCalc(expr#0=[{inputs}], expr#1=[true], proj#0..1=[{exprs}])\n"
+ + " EnumerableAggregate(group=[{0}])\n"
+ + " EnumerableTableScan(table=[[hr, depts]])";
CalciteAssert.hr()
.query(sql)
.explainContains(explain)
diff --git a/core/src/test/java/org/apache/calcite/test/enumerable/EnumerableCorrelateTest.java b/core/src/test/java/org/apache/calcite/test/enumerable/EnumerableCorrelateTest.java
index f326e9e..fa15900 100644
--- a/core/src/test/java/org/apache/calcite/test/enumerable/EnumerableCorrelateTest.java
+++ b/core/src/test/java/org/apache/calcite/test/enumerable/EnumerableCorrelateTest.java
@@ -50,6 +50,7 @@ class EnumerableCorrelateTest {
// instead of EnumerableHashJoin
planner.addRule(CoreRules.JOIN_TO_CORRELATE);
planner.removeRule(EnumerableRules.ENUMERABLE_JOIN_RULE);
+ planner.removeRule(EnumerableRules.ENUMERABLE_MERGE_JOIN_RULE);
})
.explainContains(""
+ "EnumerableCalc(expr#0..4=[{inputs}], empid=[$t0], name=[$t2], dept=[$t4])\n"
diff --git a/core/src/test/java/org/apache/calcite/test/enumerable/EnumerableHashJoinTest.java b/core/src/test/java/org/apache/calcite/test/enumerable/EnumerableHashJoinTest.java
index 8870f65..183dccd 100644
--- a/core/src/test/java/org/apache/calcite/test/enumerable/EnumerableHashJoinTest.java
+++ b/core/src/test/java/org/apache/calcite/test/enumerable/EnumerableHashJoinTest.java
@@ -79,6 +79,8 @@ class EnumerableHashJoinTest {
.query(
"select e.empid, e.name, d.name as dept from emps e left outer "
+ "join depts d on e.deptno=d.deptno")
+ .withHook(Hook.PLANNER, (Consumer<RelOptPlanner>) planner ->
+ planner.removeRule(EnumerableRules.ENUMERABLE_MERGE_JOIN_RULE))
.explainContains("EnumerableCalc(expr#0..4=[{inputs}], empid=[$t0], "
+ "name=[$t2], dept=[$t4])\n"
+ " EnumerableHashJoin(condition=[=($1, $3)], joinType=[left])\n"
@@ -119,6 +121,8 @@ class EnumerableHashJoinTest {
"select e.empid, e.name, d.name as dept from emps e left outer "
+ "join depts d on e.deptno=d.deptno and e.empid<150 and e"
+ ".empid>d.deptno")
+ .withHook(Hook.PLANNER, (Consumer<RelOptPlanner>) planner ->
+ planner.removeRule(EnumerableRules.ENUMERABLE_MERGE_JOIN_RULE))
.explainContains("EnumerableCalc(expr#0..4=[{inputs}], empid=[$t0], "
+ "name=[$t2], dept=[$t4])\n"
+ " EnumerableHashJoin(condition=[AND(=($1, $3), <($0, 150), >"
diff --git a/core/src/test/resources/sql/blank.iq b/core/src/test/resources/sql/blank.iq
index 2d69aaf..b8b125c 100644
--- a/core/src/test/resources/sql/blank.iq
+++ b/core/src/test/resources/sql/blank.iq
@@ -90,16 +90,20 @@ insert into table2 values (NULL, 1), (2, 1);
!set lateDecorrelate true
select i, j from table1 where table1.j NOT IN (select i from table2 where table1.i=table2.j);
EnumerableCalc(expr#0..7=[{inputs}], expr#8=[0], expr#9=[=($t3, $t8)], expr#10=[IS NULL($t1)], expr#11=[IS NOT NULL($t7)], expr#12=[<($t4, $t3)], expr#13=[OR($t10, $t11, $t12)], expr#14=[IS NOT TRUE($t13)], expr#15=[OR($t9, $t14)], proj#0..1=[{exprs}], $condition=[$t15])
- EnumerableHashJoin(condition=[AND(=($0, $6), =($1, $5))], joinType=[left])
- EnumerableHashJoin(condition=[=($0, $2)], joinType=[left])
- EnumerableTableScan(table=[[BLANK, TABLE1]])
- EnumerableAggregate(group=[{1}], c=[COUNT()], ck=[COUNT($0)])
- EnumerableCalc(expr#0..1=[{inputs}], expr#2=[IS NOT NULL($t1)], proj#0..1=[{exprs}], $condition=[$t2])
- EnumerableTableScan(table=[[BLANK, TABLE2]])
- EnumerableCalc(expr#0..1=[{inputs}], expr#2=[true], proj#0..2=[{exprs}])
- EnumerableAggregate(group=[{0, 1}])
- EnumerableCalc(expr#0..1=[{inputs}], expr#2=[IS NOT NULL($t1)], expr#3=[IS NOT NULL($t0)], expr#4=[AND($t2, $t3)], proj#0..1=[{exprs}], $condition=[$t4])
- EnumerableTableScan(table=[[BLANK, TABLE2]])
+ EnumerableMergeJoin(condition=[AND(=($0, $6), =($1, $5))], joinType=[left])
+ EnumerableSort(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC])
+ EnumerableMergeJoin(condition=[=($0, $2)], joinType=[left])
+ EnumerableSort(sort0=[$0], dir0=[ASC])
+ EnumerableTableScan(table=[[BLANK, TABLE1]])
+ EnumerableSort(sort0=[$0], dir0=[ASC])
+ EnumerableAggregate(group=[{1}], c=[COUNT()], ck=[COUNT($0)])
+ EnumerableCalc(expr#0..1=[{inputs}], expr#2=[IS NOT NULL($t1)], proj#0..1=[{exprs}], $condition=[$t2])
+ EnumerableTableScan(table=[[BLANK, TABLE2]])
+ EnumerableSort(sort0=[$1], sort1=[$0], dir0=[ASC], dir1=[ASC])
+ EnumerableCalc(expr#0..1=[{inputs}], expr#2=[true], proj#0..2=[{exprs}])
+ EnumerableAggregate(group=[{0, 1}])
+ EnumerableCalc(expr#0..1=[{inputs}], expr#2=[IS NOT NULL($t1)], expr#3=[IS NOT NULL($t0)], expr#4=[AND($t2, $t3)], proj#0..1=[{exprs}], $condition=[$t4])
+ EnumerableTableScan(table=[[BLANK, TABLE2]])
!plan
+---+---+
| I | J |
diff --git a/core/src/test/resources/sql/misc.iq b/core/src/test/resources/sql/misc.iq
index 029525c..625e211 100644
--- a/core/src/test/resources/sql/misc.iq
+++ b/core/src/test/resources/sql/misc.iq
@@ -443,11 +443,13 @@ where not exists (
!ok
EnumerableCalc(expr#0..6=[{inputs}], expr#7=[IS NULL($t6)], proj#0..4=[{exprs}], $condition=[$t7])
- EnumerableHashJoin(condition=[=($1, $5)], joinType=[left])
- EnumerableTableScan(table=[[hr, emps]])
- EnumerableAggregate(group=[{0}], agg#0=[MIN($1)])
- EnumerableCalc(expr#0..3=[{inputs}], expr#4=[true], deptno=[$t0], $f0=[$t4])
- EnumerableTableScan(table=[[hr, depts]])
+ EnumerableMergeJoin(condition=[=($1, $5)], joinType=[left])
+ EnumerableSort(sort0=[$1], dir0=[ASC])
+ EnumerableTableScan(table=[[hr, emps]])
+ EnumerableSort(sort0=[$0], dir0=[ASC])
+ EnumerableAggregate(group=[{0}], agg#0=[MIN($1)])
+ EnumerableCalc(expr#0..3=[{inputs}], expr#4=[true], deptno=[$t0], $f0=[$t4])
+ EnumerableTableScan(table=[[hr, depts]])
!plan
# NOT EXISTS .. OR NOT EXISTS
@@ -469,16 +471,20 @@ or not exists (
!ok
EnumerableCalc(expr#0..7=[{inputs}], expr#8=[IS NULL($t5)], expr#9=[IS NULL($t7)], expr#10=[OR($t8, $t9)], proj#0..4=[{exprs}], $condition=[$t10])
- EnumerableHashJoin(condition=[=($0, $6)], joinType=[left])
- EnumerableCalc(expr#0..6=[{inputs}], proj#0..4=[{exprs}], $f0=[$t6])
- EnumerableHashJoin(condition=[=($1, $5)], joinType=[left])
- EnumerableTableScan(table=[[hr, emps]])
- EnumerableAggregate(group=[{0}], agg#0=[MIN($1)])
- EnumerableCalc(expr#0..3=[{inputs}], expr#4=[true], deptno=[$t0], $f0=[$t4])
- EnumerableTableScan(table=[[hr, depts]])
- EnumerableAggregate(group=[{0}], agg#0=[MIN($1)])
- EnumerableCalc(expr#0..3=[{inputs}], expr#4=[90], expr#5=[+($t0, $t4)], expr#6=[true], $f4=[$t5], $f0=[$t6])
- EnumerableTableScan(table=[[hr, depts]])
+ EnumerableMergeJoin(condition=[=($0, $6)], joinType=[left])
+ EnumerableSort(sort0=[$0], dir0=[ASC])
+ EnumerableCalc(expr#0..6=[{inputs}], proj#0..4=[{exprs}], $f0=[$t6])
+ EnumerableMergeJoin(condition=[=($1, $5)], joinType=[left])
+ EnumerableSort(sort0=[$1], dir0=[ASC])
+ EnumerableTableScan(table=[[hr, emps]])
+ EnumerableSort(sort0=[$0], dir0=[ASC])
+ EnumerableAggregate(group=[{0}], agg#0=[MIN($1)])
+ EnumerableCalc(expr#0..3=[{inputs}], expr#4=[true], deptno=[$t0], $f0=[$t4])
+ EnumerableTableScan(table=[[hr, depts]])
+ EnumerableSort(sort0=[$0], dir0=[ASC])
+ EnumerableAggregate(group=[{0}], agg#0=[MIN($1)])
+ EnumerableCalc(expr#0..3=[{inputs}], expr#4=[90], expr#5=[+($t0, $t4)], expr#6=[true], $f4=[$t5], $f0=[$t6])
+ EnumerableTableScan(table=[[hr, depts]])
!plan
# Left join to a relation with one row is recognized as a trivial semi-join
diff --git a/core/src/test/resources/sql/sub-query.iq b/core/src/test/resources/sql/sub-query.iq
index 9a9df2f..85cd94f 100644
--- a/core/src/test/resources/sql/sub-query.iq
+++ b/core/src/test/resources/sql/sub-query.iq
@@ -1761,9 +1761,10 @@ select sal from "scott".emp e
!ok
EnumerableCalc(expr#0..4=[{inputs}], expr#5=[NOT($t4)], expr#6=[IS NOT NULL($t4)], expr#7=[OR($t5, $t6)], expr#8=[IS NOT TRUE($t7)], SAL=[$t1], $condition=[$t8])
- EnumerableHashJoin(condition=[=($2, $3)], joinType=[left])
- EnumerableCalc(expr#0..7=[{inputs}], EMPNO=[$t0], SAL=[$t5], DEPTNO=[$t7])
- EnumerableTableScan(table=[[scott, EMP]])
+ EnumerableMergeJoin(condition=[=($2, $3)], joinType=[left])
+ EnumerableSort(sort0=[$2], dir0=[ASC])
+ EnumerableCalc(expr#0..7=[{inputs}], EMPNO=[$t0], SAL=[$t5], DEPTNO=[$t7])
+ EnumerableTableScan(table=[[scott, EMP]])
EnumerableCalc(expr#0..2=[{inputs}], expr#3=[false], DEPTNO=[$t0], $f1=[$t3])
EnumerableTableScan(table=[[scott, DEPT]])
!plan
@@ -1829,9 +1830,10 @@ select sal from "scott".emp e
!ok
EnumerableCalc(expr#0..4=[{inputs}], expr#5=[NOT($t4)], expr#6=[IS NOT NULL($t4)], expr#7=[OR($t5, $t6)], expr#8=[IS NOT TRUE($t7)], SAL=[$t1], $condition=[$t8])
- EnumerableHashJoin(condition=[=($2, $3)], joinType=[left])
- EnumerableCalc(expr#0..7=[{inputs}], EMPNO=[$t0], SAL=[$t5], DEPTNO=[$t7])
- EnumerableTableScan(table=[[scott, EMP]])
+ EnumerableMergeJoin(condition=[=($2, $3)], joinType=[left])
+ EnumerableSort(sort0=[$2], dir0=[ASC])
+ EnumerableCalc(expr#0..7=[{inputs}], EMPNO=[$t0], SAL=[$t5], DEPTNO=[$t7])
+ EnumerableTableScan(table=[[scott, EMP]])
EnumerableCalc(expr#0..2=[{inputs}], expr#3=[true], expr#4=[10], expr#5=[=($t4, $t0)], DEPTNO1=[$t0], $f1=[$t3], $condition=[$t5])
EnumerableTableScan(table=[[scott, DEPT]])
!plan
@@ -1858,9 +1860,10 @@ select sal from "scott".emp e
!ok
EnumerableCalc(expr#0..4=[{inputs}], expr#5=[NOT($t4)], expr#6=[IS NOT NULL($t4)], expr#7=[OR($t5, $t6)], expr#8=[IS NOT TRUE($t7)], SAL=[$t1], $condition=[$t8])
- EnumerableHashJoin(condition=[=($2, $3)], joinType=[left])
- EnumerableCalc(expr#0..7=[{inputs}], EMPNO=[$t0], SAL=[$t5], DEPTNO=[$t7])
- EnumerableTableScan(table=[[scott, EMP]])
+ EnumerableMergeJoin(condition=[=($2, $3)], joinType=[left])
+ EnumerableSort(sort0=[$2], dir0=[ASC])
+ EnumerableCalc(expr#0..7=[{inputs}], EMPNO=[$t0], SAL=[$t5], DEPTNO=[$t7])
+ EnumerableTableScan(table=[[scott, EMP]])
EnumerableCalc(expr#0..2=[{inputs}], expr#3=[true], expr#4=[10], expr#5=[=($t4, $t0)], DEPTNO=[$t0], $f1=[$t3], $condition=[$t5])
EnumerableTableScan(table=[[scott, DEPT]])
!plan
@@ -1890,9 +1893,10 @@ select sal from "scott".emp e
!ok
EnumerableCalc(expr#0..3=[{inputs}], SAL=[$t1])
- EnumerableHashJoin(condition=[=($2, $3)], joinType=[left])
- EnumerableCalc(expr#0..7=[{inputs}], EMPNO=[$t0], SAL=[$t5], DEPTNO=[$t7])
- EnumerableTableScan(table=[[scott, EMP]])
+ EnumerableMergeJoin(condition=[=($2, $3)], joinType=[left])
+ EnumerableSort(sort0=[$2], dir0=[ASC])
+ EnumerableCalc(expr#0..7=[{inputs}], EMPNO=[$t0], SAL=[$t5], DEPTNO=[$t7])
+ EnumerableTableScan(table=[[scott, EMP]])
EnumerableCalc(expr#0..2=[{inputs}], DEPTNO=[$t0])
EnumerableTableScan(table=[[scott, DEPT]])
!plan
@@ -2050,11 +2054,13 @@ where sal + 100 not in (
EnumerableAggregate(group=[{}], C=[COUNT()])
EnumerableCalc(expr#0..9=[{inputs}], expr#10=[0], expr#11=[=($t4, $t10)], expr#12=[IS NULL($t2)], expr#13=[IS NOT NULL($t7)], expr#14=[<($t5, $t4)], expr#15=[OR($t12, $t13, $t14)], expr#16=[IS NOT TRUE($t15)], expr#17=[OR($t11, $t16)], proj#0..9=[{exprs}], $condition=[$t17])
EnumerableHashJoin(condition=[AND(=($1, $8), =($2, $9))], joinType=[left])
- EnumerableHashJoin(condition=[=($1, $3)], joinType=[left])
- EnumerableCalc(expr#0..7=[{inputs}], proj#0..1=[{exprs}], SAL=[$t5])
- EnumerableTableScan(table=[[scott, EMP]])
- EnumerableCalc(expr#0..2=[{inputs}], expr#3=[1:BIGINT], expr#4=[IS NOT NULL($t1)], DNAME=[$t1], $f1=[$t3], $f2=[$t3], $condition=[$t4])
- EnumerableTableScan(table=[[scott, DEPT]])
+ EnumerableMergeJoin(condition=[=($1, $3)], joinType=[left])
+ EnumerableSort(sort0=[$1], dir0=[ASC])
+ EnumerableCalc(expr#0..7=[{inputs}], proj#0..1=[{exprs}], SAL=[$t5])
+ EnumerableTableScan(table=[[scott, EMP]])
+ EnumerableSort(sort0=[$0], dir0=[ASC])
+ EnumerableCalc(expr#0..2=[{inputs}], expr#3=[1:BIGINT], expr#4=[IS NOT NULL($t1)], DNAME=[$t1], $f1=[$t3], $f2=[$t3], $condition=[$t4])
+ EnumerableTableScan(table=[[scott, DEPT]])
EnumerableCalc(expr#0..4=[{inputs}], DEPTNO=[$t2], i=[$t3], DNAME=[$t4], SAL=[$t0])
EnumerableHashJoin(condition=[=($1, $2)], joinType=[inner])
EnumerableCalc(expr#0=[{inputs}], expr#1=[100], expr#2=[+($t0, $t1)], SAL=[$t0], $f1=[$t2])
@@ -2069,9 +2075,10 @@ select empno from "scott".emp as e
where e.empno > ANY(
select 2 from "scott".dept e2 where e2.deptno = e.deptno) ;
EnumerableCalc(expr#0..6=[{inputs}], expr#7=[>($t0, $t2)], expr#8=[IS NULL($t5)], expr#9=[0], expr#10=[=($t3, $t9)], expr#11=[OR($t8, $t10)], expr#12=[IS NOT TRUE($t11)], expr#13=[AND($t7, $t12)], expr#14=[IS NOT TRUE($t7)], expr#15=[>($t3, $t4)], expr#16=[IS NOT TRUE($t15)], expr#17=[AND($t7, $t12, $t14, $t16)], expr#18=[OR($t13, $t17)], EMPNO=[$t0], $condition=[$t18])
- EnumerableHashJoin(condition=[=($1, $6)], joinType=[left])
- EnumerableCalc(expr#0..7=[{inputs}], EMPNO=[$t0], DEPTNO=[$t7])
- EnumerableTableScan(table=[[scott, EMP]])
+ EnumerableMergeJoin(condition=[=($1, $6)], joinType=[left])
+ EnumerableSort(sort0=[$1], dir0=[ASC])
+ EnumerableCalc(expr#0..7=[{inputs}], EMPNO=[$t0], DEPTNO=[$t7])
+ EnumerableTableScan(table=[[scott, EMP]])
EnumerableCalc(expr#0..2=[{inputs}], expr#3=[2], expr#4=[1:BIGINT], expr#5=[true], m=[$t3], c=[$t4], d=[$t4], trueLiteral=[$t5], DEPTNO=[$t0])
EnumerableTableScan(table=[[scott, DEPT]])
!plan
@@ -2101,13 +2108,14 @@ e.deptno > ANY(
select 2 from "scott".dept e2 where e2.deptno = e.empno) from "scott".emp as e;
EnumerableCalc(expr#0..6=[{inputs}], expr#7=[>($t1, $t2)], expr#8=[IS TRUE($t7)], expr#9=[IS NULL($t5)], expr#10=[0], expr#11=[=($t3, $t10)], expr#12=[OR($t9, $t11)], expr#13=[IS NOT TRUE($t12)], expr#14=[AND($t8, $t13)], expr#15=[>($t3, $t4)], expr#16=[IS TRUE($t15)], expr#17=[null:BOOLEAN], expr#18=[IS NOT TRUE($t7)], expr#19=[AND($t16, $t17, $t13, $t18)], expr#20=[IS NOT TRUE($t15)], expr#21=[AND($t7, $t13, $t18, $t20)], expr#22=[OR($t14, $t19, $t21)], EMPNO=[$t0], EXPR$1=[$t22])
- EnumerableHashJoin(condition=[=($0, $6)], joinType=[left])
+ EnumerableMergeJoin(condition=[=($0, $6)], joinType=[left])
EnumerableCalc(expr#0..7=[{inputs}], EMPNO=[$t0], DEPTNO=[$t7])
EnumerableTableScan(table=[[scott, EMP]])
- EnumerableCalc(expr#0..3=[{inputs}], expr#4=[true], m=[$t1], c=[$t2], d=[$t3], trueLiteral=[$t4], DEPTNO0=[$t0])
- EnumerableAggregate(group=[{0}], m=[MIN($1)], c=[COUNT()], d=[COUNT($1)])
- EnumerableCalc(expr#0..2=[{inputs}], expr#3=[CAST($t0):SMALLINT NOT NULL], expr#4=[2], DEPTNO0=[$t3], EXPR$0=[$t4])
- EnumerableTableScan(table=[[scott, DEPT]])
+ EnumerableSort(sort0=[$4], dir0=[ASC])
+ EnumerableCalc(expr#0..3=[{inputs}], expr#4=[true], m=[$t1], c=[$t2], d=[$t3], trueLiteral=[$t4], DEPTNO0=[$t0])
+ EnumerableAggregate(group=[{0}], m=[MIN($1)], c=[COUNT()], d=[COUNT($1)])
+ EnumerableCalc(expr#0..2=[{inputs}], expr#3=[CAST($t0):SMALLINT NOT NULL], expr#4=[2], DEPTNO0=[$t3], EXPR$0=[$t4])
+ EnumerableTableScan(table=[[scott, DEPT]])
!plan
EMPNO | EXPR$1
-------+--------
diff --git a/linq4j/src/main/java/org/apache/calcite/linq4j/EnumerableDefaults.java b/linq4j/src/main/java/org/apache/calcite/linq4j/EnumerableDefaults.java
index db13c9a..512130d 100644
--- a/linq4j/src/main/java/org/apache/calcite/linq4j/EnumerableDefaults.java
+++ b/linq4j/src/main/java/org/apache/calcite/linq4j/EnumerableDefaults.java
@@ -2130,6 +2130,7 @@ public abstract class EnumerableDefaults {
case INNER:
case SEMI:
case ANTI:
+ case LEFT:
return true;
default:
return false;
@@ -3995,6 +3996,7 @@ public abstract class EnumerableDefaults {
* @param <TSource> left input record type
* @param <TKey> key type
* @param <TInner> right input record type */
+ @SuppressWarnings("unchecked")
private static class MergeJoinEnumerator<TResult, TSource, TInner, TKey extends Comparable<TKey>>
implements Enumerator<TResult> {
private final List<TSource> lefts = new ArrayList<>();
@@ -4012,9 +4014,10 @@ public abstract class EnumerableDefaults {
// key comparator, possibly null (Comparable#compareTo to be used in that case)
private final Comparator<TKey> comparator;
private boolean done;
- private Enumerator<TResult> results;
- // only used for ANTI join: if right input is over, all remaining elements from left are results
+ private Enumerator<TResult> results = null;
+ // used for LEFT/ANTI join: if right input is over, all remaining elements from left are results
private boolean remainingLeft;
+ private TResult current = (TResult) DUMMY;
MergeJoinEnumerator(Enumerable<TSource> leftEnumerable,
Enumerable<TInner> rightEnumerable,
@@ -4050,10 +4053,12 @@ public abstract class EnumerableDefaults {
}
/** Returns whether the left enumerator was successfully advanced to the next
- * element, and it does not have a null key. */
+ * element, and it does not have a null key (except for LEFT join, that needs to process
+ * all elements from left. */
private boolean leftMoveNext() {
return getLeftEnumerator().moveNext()
- && outerKeySelector.apply(getLeftEnumerator().current()) != null;
+ && (joinType == JoinType.LEFT
+ || outerKeySelector.apply(getLeftEnumerator().current()) != null);
}
/** Returns whether the right enumerator was successfully advanced to the
@@ -4063,21 +4068,13 @@ public abstract class EnumerableDefaults {
&& innerKeySelector.apply(getRightEnumerator().current()) != null;
}
- /**
- * Only for joinType ANTI: if right input is over, all remaining elements from left are results.
- * @param currentLeft current element from left iterator.
- */
- private void computeLeftRemainder(TSource currentLeft) {
- // before reaching this point, we have always performed a leftEnumerator moveNext,
- // so we must save the leftEnumerator current item as a result, otherwise it would be lost
- results = Linq4j.enumerator(
- Collections.singletonList(resultSelector.apply(currentLeft, null)));
- remainingLeft = true;
+ private boolean isLeftOrAntiJoin() {
+ return joinType == JoinType.LEFT || joinType == JoinType.ANTI;
}
private void start() {
- if (joinType == JoinType.ANTI) {
- startAntiJoin();
+ if (isLeftOrAntiJoin()) {
+ startLeftOrAntiJoin();
} else {
// joinType INNER or SEMI
if (!leftMoveNext() || !rightMoveNext() || !advance()) {
@@ -4086,13 +4083,13 @@ public abstract class EnumerableDefaults {
}
}
- private void startAntiJoin() {
+ private void startLeftOrAntiJoin() {
if (!leftMoveNext()) {
finish();
} else {
if (!rightMoveNext()) {
// all remaining items in left are results for anti join
- computeLeftRemainder(getLeftEnumerator().current());
+ remainingLeft = true;
} else {
if (!advance()) {
finish();
@@ -4107,7 +4104,14 @@ public abstract class EnumerableDefaults {
}
private int compare(TKey key1, TKey key2) {
- return comparator != null ? comparator.compare(key1, key2) : key1.compareTo(key2);
+ return comparator != null ? comparator.compare(key1, key2) : compareNullsLast(key1, key2);
+ }
+
+ private int compareNullsLast(TKey v0, TKey v1) {
+ return v0 == v1 ? 0
+ : v0 == null ? 1
+ : v1 == null ? -1
+ : v0.compareTo(v1);
}
/** Moves to the next key that is present in both sides. Populates
@@ -4124,9 +4128,9 @@ public abstract class EnumerableDefaults {
// mergeJoin assumes inputs sorted in ascending order with nulls last,
// if we reach a null key, we are done.
if (leftKey == null || rightKey == null) {
- if (joinType == JoinType.ANTI && leftKey != null) {
- // all remaining items in left are results for anti join
- computeLeftRemainder(left);
+ if (joinType == JoinType.LEFT || (joinType == JoinType.ANTI && leftKey != null)) {
+ // all remaining items in left are results for left/anti join
+ remainingLeft = true;
return true;
}
done = true;
@@ -4137,8 +4141,8 @@ public abstract class EnumerableDefaults {
break;
}
if (c < 0) {
- if (joinType == JoinType.ANTI) {
- // left (and all other items with the same key) are results for anti join
+ if (isLeftOrAntiJoin()) {
+ // left (and all other items with the same key) are results for left/anti join
if (!advanceLeft(left, leftKey)) {
done = true;
}
@@ -4154,9 +4158,9 @@ public abstract class EnumerableDefaults {
leftKey = outerKeySelector.apply(left);
} else {
if (!getRightEnumerator().moveNext()) {
- if (joinType == JoinType.ANTI) {
- // all remaining items in left are results for anti join
- computeLeftRemainder(left);
+ if (isLeftOrAntiJoin()) {
+ // all remaining items in left are results for left/anti join
+ remainingLeft = true;
return true;
}
done = true;
@@ -4172,9 +4176,9 @@ public abstract class EnumerableDefaults {
}
if (!advanceRight(right, rightKey)) {
- if (!done && joinType == JoinType.ANTI) {
- // all remaining items in left are results for anti join
- computeLeftRemainder(getLeftEnumerator().current());
+ if (!done && isLeftOrAntiJoin()) {
+ // all remaining items in left are results for left/anti join
+ remainingLeft = true;
} else {
done = true;
}
@@ -4221,9 +4225,9 @@ public abstract class EnumerableDefaults {
while (getLeftEnumerator().moveNext()) {
left = getLeftEnumerator().current();
TKey leftKey2 = outerKeySelector.apply(left);
- if (leftKey2 == null) {
+ if (leftKey2 == null && joinType != JoinType.LEFT) {
// mergeJoin assumes inputs sorted in ascending order with nulls last,
- // if we reach a null key, we are done
+ // if we reach a null key, we are done (except LEFT join, that needs to process LHS fully)
break;
}
int c = compare(leftKey, leftKey2);
@@ -4273,20 +4277,29 @@ public abstract class EnumerableDefaults {
}
public TResult current() {
- if (remainingLeft) {
- final TSource left = getLeftEnumerator().current();
- return resultSelector.apply(left, null);
+ if (current == DUMMY) {
+ throw new NoSuchElementException();
}
- return results.current();
+ return current;
}
public boolean moveNext() {
for (;;) {
- if (results.moveNext()) {
- return true;
+ if (results != null) {
+ if (results.moveNext()) {
+ current = results.current();
+ return true;
+ } else {
+ results = null;
+ }
}
if (remainingLeft) {
- return leftMoveNext();
+ current = resultSelector.apply(getLeftEnumerator().current(), null);
+ if (!leftMoveNext()) {
+ remainingLeft = false;
+ done = true;
+ }
+ return true;
}
if (done) {
return false;
@@ -4299,6 +4312,8 @@ public abstract class EnumerableDefaults {
public void reset() {
done = false;
+ results = null;
+ current = (TResult) DUMMY;
remainingLeft = false;
leftEnumerator.reset();
if (rightEnumerator != null) {