You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@calcite.apache.org by ru...@apache.org on 2020/08/24 09:17:32 UTC

[calcite] branch master updated: [CALCITE-4113] Support LEFT join in EnumerableMergeJoin

This is an automated email from the ASF dual-hosted git repository.

rubenql pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/calcite.git


The following commit(s) were added to refs/heads/master by this push:
     new 672ed7a  [CALCITE-4113] Support LEFT join in EnumerableMergeJoin
672ed7a is described below

commit 672ed7a1d0dbf87760d37e52b424f16bc8c43b4d
Author: rubenada <ru...@gmail.com>
AuthorDate: Wed Aug 12 17:14:57 2020 +0100

    [CALCITE-4113] Support LEFT join in EnumerableMergeJoin
---
 .../apache/calcite/runtime/EnumerablesTest.java    | 188 ++++++++++++++++++++-
 .../java/org/apache/calcite/test/JdbcTest.java     |  12 +-
 .../test/enumerable/EnumerableCorrelateTest.java   |   1 +
 .../test/enumerable/EnumerableHashJoinTest.java    |   4 +
 core/src/test/resources/sql/blank.iq               |  24 +--
 core/src/test/resources/sql/misc.iq                |  36 ++--
 core/src/test/resources/sql/sub-query.iq           |  58 ++++---
 .../apache/calcite/linq4j/EnumerableDefaults.java  |  93 +++++-----
 8 files changed, 317 insertions(+), 99 deletions(-)

diff --git a/core/src/test/java/org/apache/calcite/runtime/EnumerablesTest.java b/core/src/test/java/org/apache/calcite/runtime/EnumerablesTest.java
index 20ed84a..f615d92 100644
--- a/core/src/test/java/org/apache/calcite/runtime/EnumerablesTest.java
+++ b/core/src/test/java/org/apache/calcite/runtime/EnumerablesTest.java
@@ -212,6 +212,43 @@ class EnumerablesTest {
         newArrayList(1, 1, 4, 4),
         equalTo("[3]"),
         JoinType.ANTI);
+
+    // LEFT join tests:
+    // Matching keys at start
+    testIntersect(
+        newArrayList(1, 3, 4),
+        newArrayList(1, 4),
+        equalTo("[1-1, 3-null, 4-4]"),
+        equalTo("[1-1, 3-null, 4-4, null-null]"),
+        JoinType.LEFT);
+    // Matching key at start and end of right, not of left
+    testIntersect(
+        newArrayList(0, 1, 3, 4, 5),
+        newArrayList(1, 4),
+        equalTo("[0-null, 1-1, 3-null, 4-4, 5-null]"),
+        equalTo("[0-null, 1-1, 3-null, 4-4, 5-null, null-null]"),
+        JoinType.LEFT);
+    // Matching key at start and end of left, not right
+    testIntersect(
+        newArrayList(1, 3, 4),
+        newArrayList(0, 1, 4, 5),
+        equalTo("[1-1, 3-null, 4-4]"),
+        equalTo("[1-1, 3-null, 4-4, null-null]"),
+        JoinType.LEFT);
+    // Matching key not at start or end of left or right
+    testIntersect(
+        newArrayList(0, 2, 3, 4, 5),
+        newArrayList(1, 3, 4, 6),
+        equalTo("[0-null, 2-null, 3-3, 4-4, 5-null]"),
+        equalTo("[0-null, 2-null, 3-3, 4-4, 5-null, null-null]"),
+        JoinType.LEFT);
+    // Matching duplicated keys
+    testIntersect(
+        newArrayList(1, 3, 4),
+        newArrayList(1, 1, 4, 4),
+        equalTo("[1-1, 1-1, 3-null, 4-4, 4-4]"),
+        equalTo("[1-1, 1-1, 3-null, 4-4, 4-4, null-null]"),
+        JoinType.LEFT);
   }
 
   @Test void testMergeJoin3() {
@@ -268,21 +305,57 @@ class EnumerablesTest {
         new ArrayList<>(),
         equalTo("[]"),
         JoinType.ANTI);
+
+    // LEFT join tests:
+    // No overlap
+    testIntersect(
+        newArrayList(0, 2, 4),
+        newArrayList(1, 3, 5),
+        equalTo("[0-null, 2-null, 4-null]"),
+        equalTo("[0-null, 2-null, 4-null, null-null]"),
+        JoinType.LEFT);
+    // Left empty
+    testIntersect(
+        new ArrayList<>(),
+        newArrayList(1, 3, 4, 6),
+        equalTo("[]"),
+        equalTo("[null-null]"),
+        JoinType.LEFT);
+    // Right empty
+    testIntersect(
+        newArrayList(3, 7),
+        new ArrayList<>(),
+        equalTo("[3-null, 7-null]"),
+        equalTo("[3-null, 7-null, null-null]"),
+        JoinType.LEFT);
+    // Both empty
+    testIntersect(
+        new ArrayList<Integer>(),
+        new ArrayList<>(),
+        equalTo("[]"),
+        equalTo("[null-null]"),
+        JoinType.LEFT);
   }
 
   private static <T extends Comparable<T>> void testIntersect(
       List<T> list0, List<T> list1, org.hamcrest.Matcher<String> matcher, JoinType joinType) {
+    testIntersect(list0, list1, matcher, matcher, joinType);
+  }
+
+  private static <T extends Comparable<T>> void testIntersect(
+      List<T> list0, List<T> list1, org.hamcrest.Matcher<String> matcher,
+      org.hamcrest.Matcher<String> matcherNullLeft, JoinType joinType) {
     assertThat(
         intersect(list0, list1, joinType).toList().toString(),
         matcher);
 
-    // Repeat test with nulls at the end of left / right: result should not be impacted
+    // Repeat test with nulls at the end of left / right
 
     // Null at the end of left
     list0.add(null);
     assertThat(
         intersect(list0, list1, joinType).toList().toString(),
-        matcher);
+        matcherNullLeft);
 
     // Null at the end of right
     list0.remove(list0.size() - 1);
@@ -295,17 +368,27 @@ class EnumerablesTest {
     list0.add(null);
     assertThat(
         intersect(list0, list1, joinType).toList().toString(),
-        matcher);
+        matcherNullLeft);
   }
 
-  private static <T extends Comparable<T>> Enumerable<T> intersect(
+  private static <T extends Comparable<T>> Enumerable<String> intersect(
       List<T> list0, List<T> list1, JoinType joinType) {
+    if (joinType == JoinType.LEFT) {
+      return EnumerableDefaults.mergeJoin(
+          Linq4j.asEnumerable(list0),
+          Linq4j.asEnumerable(list1),
+          Functions.identitySelector(),
+          Functions.identitySelector(),
+          (v0, v1) -> String.valueOf(v0) + "-" + String.valueOf(v1),
+          JoinType.LEFT,
+          null);
+    }
     return EnumerableDefaults.mergeJoin(
         Linq4j.asEnumerable(list0),
         Linq4j.asEnumerable(list1),
         Functions.identitySelector(),
         Functions.identitySelector(),
-        (v0, v1) -> v0,
+        (v0, v1) -> String.valueOf(v0),
         joinType,
         null);
   }
@@ -550,6 +633,101 @@ class EnumerablesTest {
         equalTo("[Emp(30, Fred), Emp(20, Sebastian), Emp(20, Zoey)]"));
   }
 
+  @Test void testMergeLeftJoin() {
+    assertThat(
+        EnumerableDefaults.mergeJoin(
+            Linq4j.asEnumerable(
+                Arrays.asList(
+                    new Dept(10, "Marketing"),
+                    new Dept(20, "Sales"),
+                    new Dept(25, "HR"),
+                    new Dept(30, "Research"),
+                    new Dept(40, "Development"))),
+            Linq4j.asEnumerable(
+                Arrays.asList(
+                    new Emp(10, "Fred"),
+                    new Emp(20, "Theodore"),
+                    new Emp(20, "Sebastian"),
+                    new Emp(30, "Joe"),
+                    new Emp(30, "Greg"),
+                    new Emp(50, "Mary"))),
+            d -> d.deptno,
+            e -> e.deptno,
+            null,
+            (v0, v1) -> String.valueOf(v0) + "-" + String.valueOf(v1),
+            JoinType.LEFT,
+            null).toList().toString(), equalTo("[Dept(10, Marketing)-Emp(10, Fred),"
+            + " Dept(20, Sales)-Emp(20, Theodore),"
+            + " Dept(20, Sales)-Emp(20, Sebastian),"
+            + " Dept(25, HR)-null,"
+            + " Dept(30, Research)-Emp(30, Joe),"
+            + " Dept(30, Research)-Emp(30, Greg),"
+            + " Dept(40, Development)-null]"));
+  }
+
+  @Test void testMergeLeftJoinWithPredicate() {
+    assertThat(
+        EnumerableDefaults.mergeJoin(
+            Linq4j.asEnumerable(
+                Arrays.asList(
+                    new Dept(10, "Marketing"),
+                    new Dept(20, "Sales"),
+                    new Dept(25, "HR"),
+                    new Dept(30, "Research"),
+                    new Dept(40, "Development"))),
+            Linq4j.asEnumerable(
+                Arrays.asList(
+                    new Emp(10, "Fred"),
+                    new Emp(20, "Theodore"),
+                    new Emp(20, "Sebastian"),
+                    new Emp(30, "Joe"),
+                    new Emp(30, "Greg"),
+                    new Emp(50, "Mary"))),
+            d -> d.deptno,
+            e -> e.deptno,
+            (d, e) -> e.name.contains("a"),
+            (v0, v1) -> String.valueOf(v0) + "-" + String.valueOf(v1),
+            JoinType.LEFT,
+            null).toList().toString(), equalTo("[Dept(10, Marketing)-null,"
+            + " Dept(20, Sales)-Emp(20, Sebastian),"
+            + " Dept(25, HR)-null,"
+            + " Dept(30, Research)-null,"
+            + " Dept(40, Development)-null]"));
+  }
+
+  @Test void testMergeLeftJoinWithNullKeys() {
+    assertThat(
+        EnumerableDefaults.mergeJoin(
+            Linq4j.asEnumerable(
+                Arrays.asList(
+                    new Emp(30, "Fred"),
+                    new Emp(20, "Sebastian"),
+                    new Emp(30, "Theodore"),
+                    new Emp(20, "Zoey"),
+                    new Emp(40, null),
+                    new Emp(30, null))),
+            Linq4j.asEnumerable(
+                Arrays.asList(
+                    new Dept(15, "Marketing"),
+                    new Dept(20, "Sales"),
+                    new Dept(30, "Theodore"),
+                    new Dept(25, "Theodore"),
+                    new Dept(33, "Zoey"),
+                    new Dept(40, null))),
+            e -> e.name,
+            d -> d.name,
+            (e, d) -> e.name.startsWith("T"),
+            (v0, v1) -> String.valueOf(v0) + "-" + String.valueOf(v1),
+            JoinType.LEFT,
+            null).toList().toString(), equalTo("[Emp(30, Fred)-null,"
+            + " Emp(20, Sebastian)-null,"
+            + " Emp(30, Theodore)-Dept(30, Theodore),"
+            + " Emp(30, Theodore)-Dept(25, Theodore),"
+            + " Emp(20, Zoey)-null,"
+            + " Emp(40, null)-null,"
+            + " Emp(30, null)-null]"));
+  }
+
   @Test void testNestedLoopJoin() {
     assertThat(
         EnumerableDefaults.nestedLoopJoin(EMPS, DEPTS, EMP_DEPT_EQUAL_DEPTNO,
diff --git a/core/src/test/java/org/apache/calcite/test/JdbcTest.java b/core/src/test/java/org/apache/calcite/test/JdbcTest.java
index 2322d89..aae1f46 100644
--- a/core/src/test/java/org/apache/calcite/test/JdbcTest.java
+++ b/core/src/test/java/org/apache/calcite/test/JdbcTest.java
@@ -4922,11 +4922,13 @@ public class JdbcTest {
         + " where e.\"deptno\"=\"depts\".\"deptno\") on true";
     final String explain = ""
         + "EnumerableCalc(expr#0..6=[{inputs}], proj#0..4=[{exprs}], I=[$t6])\n"
-        + "  EnumerableHashJoin(condition=[=($1, $5)], joinType=[left])\n"
-        + "    EnumerableTableScan(table=[[hr, emps]])\n"
-        + "    EnumerableCalc(expr#0=[{inputs}], expr#1=[true], proj#0..1=[{exprs}])\n"
-        + "      EnumerableAggregate(group=[{0}])\n"
-        + "        EnumerableTableScan(table=[[hr, depts]])";
+        + "  EnumerableMergeJoin(condition=[=($1, $5)], joinType=[left])\n"
+        + "    EnumerableSort(sort0=[$1], dir0=[ASC])\n"
+        + "      EnumerableTableScan(table=[[hr, emps]])\n"
+        + "    EnumerableSort(sort0=[$0], dir0=[ASC])\n"
+        + "      EnumerableCalc(expr#0=[{inputs}], expr#1=[true], proj#0..1=[{exprs}])\n"
+        + "        EnumerableAggregate(group=[{0}])\n"
+        + "          EnumerableTableScan(table=[[hr, depts]])";
     CalciteAssert.hr()
         .query(sql)
         .explainContains(explain)
diff --git a/core/src/test/java/org/apache/calcite/test/enumerable/EnumerableCorrelateTest.java b/core/src/test/java/org/apache/calcite/test/enumerable/EnumerableCorrelateTest.java
index f326e9e..fa15900 100644
--- a/core/src/test/java/org/apache/calcite/test/enumerable/EnumerableCorrelateTest.java
+++ b/core/src/test/java/org/apache/calcite/test/enumerable/EnumerableCorrelateTest.java
@@ -50,6 +50,7 @@ class EnumerableCorrelateTest {
           // instead of EnumerableHashJoin
           planner.addRule(CoreRules.JOIN_TO_CORRELATE);
           planner.removeRule(EnumerableRules.ENUMERABLE_JOIN_RULE);
+          planner.removeRule(EnumerableRules.ENUMERABLE_MERGE_JOIN_RULE);
         })
         .explainContains(""
             + "EnumerableCalc(expr#0..4=[{inputs}], empid=[$t0], name=[$t2], dept=[$t4])\n"
diff --git a/core/src/test/java/org/apache/calcite/test/enumerable/EnumerableHashJoinTest.java b/core/src/test/java/org/apache/calcite/test/enumerable/EnumerableHashJoinTest.java
index 8870f65..183dccd 100644
--- a/core/src/test/java/org/apache/calcite/test/enumerable/EnumerableHashJoinTest.java
+++ b/core/src/test/java/org/apache/calcite/test/enumerable/EnumerableHashJoinTest.java
@@ -79,6 +79,8 @@ class EnumerableHashJoinTest {
         .query(
             "select e.empid, e.name, d.name as dept from emps e  left outer "
                 + "join depts d on e.deptno=d.deptno")
+        .withHook(Hook.PLANNER, (Consumer<RelOptPlanner>) planner ->
+            planner.removeRule(EnumerableRules.ENUMERABLE_MERGE_JOIN_RULE))
         .explainContains("EnumerableCalc(expr#0..4=[{inputs}], empid=[$t0], "
             + "name=[$t2], dept=[$t4])\n"
             + "  EnumerableHashJoin(condition=[=($1, $3)], joinType=[left])\n"
@@ -119,6 +121,8 @@ class EnumerableHashJoinTest {
             "select e.empid, e.name, d.name as dept from emps e left outer "
                 + "join depts d on e.deptno=d.deptno and e.empid<150 and e"
                 + ".empid>d.deptno")
+        .withHook(Hook.PLANNER, (Consumer<RelOptPlanner>) planner ->
+            planner.removeRule(EnumerableRules.ENUMERABLE_MERGE_JOIN_RULE))
         .explainContains("EnumerableCalc(expr#0..4=[{inputs}], empid=[$t0], "
             + "name=[$t2], dept=[$t4])\n"
             + "  EnumerableHashJoin(condition=[AND(=($1, $3), <($0, 150), >"
diff --git a/core/src/test/resources/sql/blank.iq b/core/src/test/resources/sql/blank.iq
index 2d69aaf..b8b125c 100644
--- a/core/src/test/resources/sql/blank.iq
+++ b/core/src/test/resources/sql/blank.iq
@@ -90,16 +90,20 @@ insert into table2 values (NULL, 1), (2, 1);
 !set lateDecorrelate true
 select i, j from table1 where table1.j NOT IN (select i from table2 where table1.i=table2.j);
 EnumerableCalc(expr#0..7=[{inputs}], expr#8=[0], expr#9=[=($t3, $t8)], expr#10=[IS NULL($t1)], expr#11=[IS NOT NULL($t7)], expr#12=[<($t4, $t3)], expr#13=[OR($t10, $t11, $t12)], expr#14=[IS NOT TRUE($t13)], expr#15=[OR($t9, $t14)], proj#0..1=[{exprs}], $condition=[$t15])
-  EnumerableHashJoin(condition=[AND(=($0, $6), =($1, $5))], joinType=[left])
-    EnumerableHashJoin(condition=[=($0, $2)], joinType=[left])
-      EnumerableTableScan(table=[[BLANK, TABLE1]])
-      EnumerableAggregate(group=[{1}], c=[COUNT()], ck=[COUNT($0)])
-        EnumerableCalc(expr#0..1=[{inputs}], expr#2=[IS NOT NULL($t1)], proj#0..1=[{exprs}], $condition=[$t2])
-          EnumerableTableScan(table=[[BLANK, TABLE2]])
-    EnumerableCalc(expr#0..1=[{inputs}], expr#2=[true], proj#0..2=[{exprs}])
-      EnumerableAggregate(group=[{0, 1}])
-        EnumerableCalc(expr#0..1=[{inputs}], expr#2=[IS NOT NULL($t1)], expr#3=[IS NOT NULL($t0)], expr#4=[AND($t2, $t3)], proj#0..1=[{exprs}], $condition=[$t4])
-          EnumerableTableScan(table=[[BLANK, TABLE2]])
+  EnumerableMergeJoin(condition=[AND(=($0, $6), =($1, $5))], joinType=[left])
+    EnumerableSort(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC])
+      EnumerableMergeJoin(condition=[=($0, $2)], joinType=[left])
+        EnumerableSort(sort0=[$0], dir0=[ASC])
+          EnumerableTableScan(table=[[BLANK, TABLE1]])
+        EnumerableSort(sort0=[$0], dir0=[ASC])
+          EnumerableAggregate(group=[{1}], c=[COUNT()], ck=[COUNT($0)])
+            EnumerableCalc(expr#0..1=[{inputs}], expr#2=[IS NOT NULL($t1)], proj#0..1=[{exprs}], $condition=[$t2])
+              EnumerableTableScan(table=[[BLANK, TABLE2]])
+    EnumerableSort(sort0=[$1], sort1=[$0], dir0=[ASC], dir1=[ASC])
+      EnumerableCalc(expr#0..1=[{inputs}], expr#2=[true], proj#0..2=[{exprs}])
+        EnumerableAggregate(group=[{0, 1}])
+          EnumerableCalc(expr#0..1=[{inputs}], expr#2=[IS NOT NULL($t1)], expr#3=[IS NOT NULL($t0)], expr#4=[AND($t2, $t3)], proj#0..1=[{exprs}], $condition=[$t4])
+            EnumerableTableScan(table=[[BLANK, TABLE2]])
 !plan
 +---+---+
 | I | J |
diff --git a/core/src/test/resources/sql/misc.iq b/core/src/test/resources/sql/misc.iq
index 029525c..625e211 100644
--- a/core/src/test/resources/sql/misc.iq
+++ b/core/src/test/resources/sql/misc.iq
@@ -443,11 +443,13 @@ where not exists (
 
 !ok
 EnumerableCalc(expr#0..6=[{inputs}], expr#7=[IS NULL($t6)], proj#0..4=[{exprs}], $condition=[$t7])
-  EnumerableHashJoin(condition=[=($1, $5)], joinType=[left])
-    EnumerableTableScan(table=[[hr, emps]])
-    EnumerableAggregate(group=[{0}], agg#0=[MIN($1)])
-      EnumerableCalc(expr#0..3=[{inputs}], expr#4=[true], deptno=[$t0], $f0=[$t4])
-        EnumerableTableScan(table=[[hr, depts]])
+  EnumerableMergeJoin(condition=[=($1, $5)], joinType=[left])
+    EnumerableSort(sort0=[$1], dir0=[ASC])
+      EnumerableTableScan(table=[[hr, emps]])
+    EnumerableSort(sort0=[$0], dir0=[ASC])
+      EnumerableAggregate(group=[{0}], agg#0=[MIN($1)])
+        EnumerableCalc(expr#0..3=[{inputs}], expr#4=[true], deptno=[$t0], $f0=[$t4])
+          EnumerableTableScan(table=[[hr, depts]])
 !plan
 
 # NOT EXISTS .. OR NOT EXISTS
@@ -469,16 +471,20 @@ or not exists (
 
 !ok
 EnumerableCalc(expr#0..7=[{inputs}], expr#8=[IS NULL($t5)], expr#9=[IS NULL($t7)], expr#10=[OR($t8, $t9)], proj#0..4=[{exprs}], $condition=[$t10])
-  EnumerableHashJoin(condition=[=($0, $6)], joinType=[left])
-    EnumerableCalc(expr#0..6=[{inputs}], proj#0..4=[{exprs}], $f0=[$t6])
-      EnumerableHashJoin(condition=[=($1, $5)], joinType=[left])
-        EnumerableTableScan(table=[[hr, emps]])
-        EnumerableAggregate(group=[{0}], agg#0=[MIN($1)])
-          EnumerableCalc(expr#0..3=[{inputs}], expr#4=[true], deptno=[$t0], $f0=[$t4])
-            EnumerableTableScan(table=[[hr, depts]])
-    EnumerableAggregate(group=[{0}], agg#0=[MIN($1)])
-      EnumerableCalc(expr#0..3=[{inputs}], expr#4=[90], expr#5=[+($t0, $t4)], expr#6=[true], $f4=[$t5], $f0=[$t6])
-        EnumerableTableScan(table=[[hr, depts]])
+  EnumerableMergeJoin(condition=[=($0, $6)], joinType=[left])
+    EnumerableSort(sort0=[$0], dir0=[ASC])
+      EnumerableCalc(expr#0..6=[{inputs}], proj#0..4=[{exprs}], $f0=[$t6])
+        EnumerableMergeJoin(condition=[=($1, $5)], joinType=[left])
+          EnumerableSort(sort0=[$1], dir0=[ASC])
+            EnumerableTableScan(table=[[hr, emps]])
+          EnumerableSort(sort0=[$0], dir0=[ASC])
+            EnumerableAggregate(group=[{0}], agg#0=[MIN($1)])
+              EnumerableCalc(expr#0..3=[{inputs}], expr#4=[true], deptno=[$t0], $f0=[$t4])
+                EnumerableTableScan(table=[[hr, depts]])
+    EnumerableSort(sort0=[$0], dir0=[ASC])
+      EnumerableAggregate(group=[{0}], agg#0=[MIN($1)])
+        EnumerableCalc(expr#0..3=[{inputs}], expr#4=[90], expr#5=[+($t0, $t4)], expr#6=[true], $f4=[$t5], $f0=[$t6])
+          EnumerableTableScan(table=[[hr, depts]])
 !plan
 
 # Left join to a relation with one row is recognized as a trivial semi-join
diff --git a/core/src/test/resources/sql/sub-query.iq b/core/src/test/resources/sql/sub-query.iq
index 9a9df2f..85cd94f 100644
--- a/core/src/test/resources/sql/sub-query.iq
+++ b/core/src/test/resources/sql/sub-query.iq
@@ -1761,9 +1761,10 @@ select sal from "scott".emp e
 
 !ok
 EnumerableCalc(expr#0..4=[{inputs}], expr#5=[NOT($t4)], expr#6=[IS NOT NULL($t4)], expr#7=[OR($t5, $t6)], expr#8=[IS NOT TRUE($t7)], SAL=[$t1], $condition=[$t8])
-  EnumerableHashJoin(condition=[=($2, $3)], joinType=[left])
-    EnumerableCalc(expr#0..7=[{inputs}], EMPNO=[$t0], SAL=[$t5], DEPTNO=[$t7])
-      EnumerableTableScan(table=[[scott, EMP]])
+  EnumerableMergeJoin(condition=[=($2, $3)], joinType=[left])
+    EnumerableSort(sort0=[$2], dir0=[ASC])
+      EnumerableCalc(expr#0..7=[{inputs}], EMPNO=[$t0], SAL=[$t5], DEPTNO=[$t7])
+        EnumerableTableScan(table=[[scott, EMP]])
     EnumerableCalc(expr#0..2=[{inputs}], expr#3=[false], DEPTNO=[$t0], $f1=[$t3])
       EnumerableTableScan(table=[[scott, DEPT]])
 !plan
@@ -1829,9 +1830,10 @@ select sal from "scott".emp e
 
 !ok
 EnumerableCalc(expr#0..4=[{inputs}], expr#5=[NOT($t4)], expr#6=[IS NOT NULL($t4)], expr#7=[OR($t5, $t6)], expr#8=[IS NOT TRUE($t7)], SAL=[$t1], $condition=[$t8])
-  EnumerableHashJoin(condition=[=($2, $3)], joinType=[left])
-    EnumerableCalc(expr#0..7=[{inputs}], EMPNO=[$t0], SAL=[$t5], DEPTNO=[$t7])
-      EnumerableTableScan(table=[[scott, EMP]])
+  EnumerableMergeJoin(condition=[=($2, $3)], joinType=[left])
+    EnumerableSort(sort0=[$2], dir0=[ASC])
+      EnumerableCalc(expr#0..7=[{inputs}], EMPNO=[$t0], SAL=[$t5], DEPTNO=[$t7])
+        EnumerableTableScan(table=[[scott, EMP]])
     EnumerableCalc(expr#0..2=[{inputs}], expr#3=[true], expr#4=[10], expr#5=[=($t4, $t0)], DEPTNO1=[$t0], $f1=[$t3], $condition=[$t5])
       EnumerableTableScan(table=[[scott, DEPT]])
 !plan
@@ -1858,9 +1860,10 @@ select sal from "scott".emp e
 
 !ok
 EnumerableCalc(expr#0..4=[{inputs}], expr#5=[NOT($t4)], expr#6=[IS NOT NULL($t4)], expr#7=[OR($t5, $t6)], expr#8=[IS NOT TRUE($t7)], SAL=[$t1], $condition=[$t8])
-  EnumerableHashJoin(condition=[=($2, $3)], joinType=[left])
-    EnumerableCalc(expr#0..7=[{inputs}], EMPNO=[$t0], SAL=[$t5], DEPTNO=[$t7])
-      EnumerableTableScan(table=[[scott, EMP]])
+  EnumerableMergeJoin(condition=[=($2, $3)], joinType=[left])
+    EnumerableSort(sort0=[$2], dir0=[ASC])
+      EnumerableCalc(expr#0..7=[{inputs}], EMPNO=[$t0], SAL=[$t5], DEPTNO=[$t7])
+        EnumerableTableScan(table=[[scott, EMP]])
     EnumerableCalc(expr#0..2=[{inputs}], expr#3=[true], expr#4=[10], expr#5=[=($t4, $t0)], DEPTNO=[$t0], $f1=[$t3], $condition=[$t5])
       EnumerableTableScan(table=[[scott, DEPT]])
 !plan
@@ -1890,9 +1893,10 @@ select sal from "scott".emp e
 
 !ok
 EnumerableCalc(expr#0..3=[{inputs}], SAL=[$t1])
-  EnumerableHashJoin(condition=[=($2, $3)], joinType=[left])
-    EnumerableCalc(expr#0..7=[{inputs}], EMPNO=[$t0], SAL=[$t5], DEPTNO=[$t7])
-      EnumerableTableScan(table=[[scott, EMP]])
+  EnumerableMergeJoin(condition=[=($2, $3)], joinType=[left])
+    EnumerableSort(sort0=[$2], dir0=[ASC])
+      EnumerableCalc(expr#0..7=[{inputs}], EMPNO=[$t0], SAL=[$t5], DEPTNO=[$t7])
+        EnumerableTableScan(table=[[scott, EMP]])
     EnumerableCalc(expr#0..2=[{inputs}], DEPTNO=[$t0])
       EnumerableTableScan(table=[[scott, DEPT]])
 !plan
@@ -2050,11 +2054,13 @@ where sal + 100 not in (
 EnumerableAggregate(group=[{}], C=[COUNT()])
   EnumerableCalc(expr#0..9=[{inputs}], expr#10=[0], expr#11=[=($t4, $t10)], expr#12=[IS NULL($t2)], expr#13=[IS NOT NULL($t7)], expr#14=[<($t5, $t4)], expr#15=[OR($t12, $t13, $t14)], expr#16=[IS NOT TRUE($t15)], expr#17=[OR($t11, $t16)], proj#0..9=[{exprs}], $condition=[$t17])
     EnumerableHashJoin(condition=[AND(=($1, $8), =($2, $9))], joinType=[left])
-      EnumerableHashJoin(condition=[=($1, $3)], joinType=[left])
-        EnumerableCalc(expr#0..7=[{inputs}], proj#0..1=[{exprs}], SAL=[$t5])
-          EnumerableTableScan(table=[[scott, EMP]])
-        EnumerableCalc(expr#0..2=[{inputs}], expr#3=[1:BIGINT], expr#4=[IS NOT NULL($t1)], DNAME=[$t1], $f1=[$t3], $f2=[$t3], $condition=[$t4])
-          EnumerableTableScan(table=[[scott, DEPT]])
+      EnumerableMergeJoin(condition=[=($1, $3)], joinType=[left])
+        EnumerableSort(sort0=[$1], dir0=[ASC])
+          EnumerableCalc(expr#0..7=[{inputs}], proj#0..1=[{exprs}], SAL=[$t5])
+            EnumerableTableScan(table=[[scott, EMP]])
+        EnumerableSort(sort0=[$0], dir0=[ASC])
+          EnumerableCalc(expr#0..2=[{inputs}], expr#3=[1:BIGINT], expr#4=[IS NOT NULL($t1)], DNAME=[$t1], $f1=[$t3], $f2=[$t3], $condition=[$t4])
+            EnumerableTableScan(table=[[scott, DEPT]])
       EnumerableCalc(expr#0..4=[{inputs}], DEPTNO=[$t2], i=[$t3], DNAME=[$t4], SAL=[$t0])
         EnumerableHashJoin(condition=[=($1, $2)], joinType=[inner])
           EnumerableCalc(expr#0=[{inputs}], expr#1=[100], expr#2=[+($t0, $t1)], SAL=[$t0], $f1=[$t2])
@@ -2069,9 +2075,10 @@ select empno from "scott".emp as e
 where e.empno > ANY(
   select 2 from "scott".dept e2 where e2.deptno = e.deptno) ;
 EnumerableCalc(expr#0..6=[{inputs}], expr#7=[>($t0, $t2)], expr#8=[IS NULL($t5)], expr#9=[0], expr#10=[=($t3, $t9)], expr#11=[OR($t8, $t10)], expr#12=[IS NOT TRUE($t11)], expr#13=[AND($t7, $t12)], expr#14=[IS NOT TRUE($t7)], expr#15=[>($t3, $t4)], expr#16=[IS NOT TRUE($t15)], expr#17=[AND($t7, $t12, $t14, $t16)], expr#18=[OR($t13, $t17)], EMPNO=[$t0], $condition=[$t18])
-  EnumerableHashJoin(condition=[=($1, $6)], joinType=[left])
-    EnumerableCalc(expr#0..7=[{inputs}], EMPNO=[$t0], DEPTNO=[$t7])
-      EnumerableTableScan(table=[[scott, EMP]])
+  EnumerableMergeJoin(condition=[=($1, $6)], joinType=[left])
+    EnumerableSort(sort0=[$1], dir0=[ASC])
+      EnumerableCalc(expr#0..7=[{inputs}], EMPNO=[$t0], DEPTNO=[$t7])
+        EnumerableTableScan(table=[[scott, EMP]])
     EnumerableCalc(expr#0..2=[{inputs}], expr#3=[2], expr#4=[1:BIGINT], expr#5=[true], m=[$t3], c=[$t4], d=[$t4], trueLiteral=[$t5], DEPTNO=[$t0])
       EnumerableTableScan(table=[[scott, DEPT]])
 !plan
@@ -2101,13 +2108,14 @@ e.deptno > ANY(
   select 2 from "scott".dept e2 where e2.deptno = e.empno) from "scott".emp as e;
 
 EnumerableCalc(expr#0..6=[{inputs}], expr#7=[>($t1, $t2)], expr#8=[IS TRUE($t7)], expr#9=[IS NULL($t5)], expr#10=[0], expr#11=[=($t3, $t10)], expr#12=[OR($t9, $t11)], expr#13=[IS NOT TRUE($t12)], expr#14=[AND($t8, $t13)], expr#15=[>($t3, $t4)], expr#16=[IS TRUE($t15)], expr#17=[null:BOOLEAN], expr#18=[IS NOT TRUE($t7)], expr#19=[AND($t16, $t17, $t13, $t18)], expr#20=[IS NOT TRUE($t15)], expr#21=[AND($t7, $t13, $t18, $t20)], expr#22=[OR($t14, $t19, $t21)], EMPNO=[$t0], EXPR$1=[$t22])
-  EnumerableHashJoin(condition=[=($0, $6)], joinType=[left])
+  EnumerableMergeJoin(condition=[=($0, $6)], joinType=[left])
     EnumerableCalc(expr#0..7=[{inputs}], EMPNO=[$t0], DEPTNO=[$t7])
       EnumerableTableScan(table=[[scott, EMP]])
-    EnumerableCalc(expr#0..3=[{inputs}], expr#4=[true], m=[$t1], c=[$t2], d=[$t3], trueLiteral=[$t4], DEPTNO0=[$t0])
-      EnumerableAggregate(group=[{0}], m=[MIN($1)], c=[COUNT()], d=[COUNT($1)])
-        EnumerableCalc(expr#0..2=[{inputs}], expr#3=[CAST($t0):SMALLINT NOT NULL], expr#4=[2], DEPTNO0=[$t3], EXPR$0=[$t4])
-          EnumerableTableScan(table=[[scott, DEPT]])
+    EnumerableSort(sort0=[$4], dir0=[ASC])
+      EnumerableCalc(expr#0..3=[{inputs}], expr#4=[true], m=[$t1], c=[$t2], d=[$t3], trueLiteral=[$t4], DEPTNO0=[$t0])
+        EnumerableAggregate(group=[{0}], m=[MIN($1)], c=[COUNT()], d=[COUNT($1)])
+          EnumerableCalc(expr#0..2=[{inputs}], expr#3=[CAST($t0):SMALLINT NOT NULL], expr#4=[2], DEPTNO0=[$t3], EXPR$0=[$t4])
+            EnumerableTableScan(table=[[scott, DEPT]])
 !plan
  EMPNO | EXPR$1
 -------+--------
diff --git a/linq4j/src/main/java/org/apache/calcite/linq4j/EnumerableDefaults.java b/linq4j/src/main/java/org/apache/calcite/linq4j/EnumerableDefaults.java
index db13c9a..512130d 100644
--- a/linq4j/src/main/java/org/apache/calcite/linq4j/EnumerableDefaults.java
+++ b/linq4j/src/main/java/org/apache/calcite/linq4j/EnumerableDefaults.java
@@ -2130,6 +2130,7 @@ public abstract class EnumerableDefaults {
     case INNER:
     case SEMI:
     case ANTI:
+    case LEFT:
       return true;
     default:
       return false;
@@ -3995,6 +3996,7 @@ public abstract class EnumerableDefaults {
    * @param <TSource> left input record type
    * @param <TKey> key type
    * @param <TInner> right input record type */
+  @SuppressWarnings("unchecked")
   private static class MergeJoinEnumerator<TResult, TSource, TInner, TKey extends Comparable<TKey>>
       implements Enumerator<TResult> {
     private final List<TSource> lefts = new ArrayList<>();
@@ -4012,9 +4014,10 @@ public abstract class EnumerableDefaults {
     // key comparator, possibly null (Comparable#compareTo to be used in that case)
     private final Comparator<TKey> comparator;
     private boolean done;
-    private Enumerator<TResult> results;
-    // only used for ANTI join: if right input is over, all remaining elements from left are results
+    private Enumerator<TResult> results = null;
+    // used for LEFT/ANTI join: if right input is over, all remaining elements from left are results
     private boolean remainingLeft;
+    private TResult current = (TResult) DUMMY;
 
     MergeJoinEnumerator(Enumerable<TSource> leftEnumerable,
         Enumerable<TInner> rightEnumerable,
@@ -4050,10 +4053,12 @@ public abstract class EnumerableDefaults {
     }
 
     /** Returns whether the left enumerator was successfully advanced to the next
-     * element, and it does not have a null key. */
+     * element, and it does not have a null key (except for LEFT join, that needs to process
+     * all elements from left. */
     private boolean leftMoveNext() {
       return getLeftEnumerator().moveNext()
-          && outerKeySelector.apply(getLeftEnumerator().current()) != null;
+          && (joinType == JoinType.LEFT
+              || outerKeySelector.apply(getLeftEnumerator().current()) != null);
     }
 
     /** Returns whether the right enumerator was successfully advanced to the
@@ -4063,21 +4068,13 @@ public abstract class EnumerableDefaults {
           && innerKeySelector.apply(getRightEnumerator().current()) != null;
     }
 
-    /**
-     * Only for joinType ANTI: if right input is over, all remaining elements from left are results.
-     * @param currentLeft current element from left iterator.
-     */
-    private void computeLeftRemainder(TSource currentLeft) {
-      // before reaching this point, we have always performed a leftEnumerator moveNext,
-      // so we must save the leftEnumerator current item as a result, otherwise it would be lost
-      results = Linq4j.enumerator(
-          Collections.singletonList(resultSelector.apply(currentLeft, null)));
-      remainingLeft = true;
+    private boolean isLeftOrAntiJoin() {
+      return joinType == JoinType.LEFT || joinType == JoinType.ANTI;
     }
 
     private void start() {
-      if (joinType == JoinType.ANTI) {
-        startAntiJoin();
+      if (isLeftOrAntiJoin()) {
+        startLeftOrAntiJoin();
       } else {
         // joinType INNER or SEMI
         if (!leftMoveNext() || !rightMoveNext() || !advance()) {
@@ -4086,13 +4083,13 @@ public abstract class EnumerableDefaults {
       }
     }
 
-    private void startAntiJoin() {
+    private void startLeftOrAntiJoin() {
       if (!leftMoveNext()) {
         finish();
       } else {
         if (!rightMoveNext()) {
           // all remaining items in left are results for anti join
-          computeLeftRemainder(getLeftEnumerator().current());
+          remainingLeft = true;
         } else {
           if (!advance()) {
             finish();
@@ -4107,7 +4104,14 @@ public abstract class EnumerableDefaults {
     }
 
     private int compare(TKey key1, TKey key2) {
-      return comparator != null ? comparator.compare(key1, key2) : key1.compareTo(key2);
+      return comparator != null ? comparator.compare(key1, key2) : compareNullsLast(key1, key2);
+    }
+
+    private int compareNullsLast(TKey v0, TKey v1) {
+      return v0 == v1 ? 0
+          : v0 == null ? 1
+              : v1 == null ? -1
+                  : v0.compareTo(v1);
     }
 
     /** Moves to the next key that is present in both sides. Populates
@@ -4124,9 +4128,9 @@ public abstract class EnumerableDefaults {
           // mergeJoin assumes inputs sorted in ascending order with nulls last,
           // if we reach a null key, we are done.
           if (leftKey == null || rightKey == null) {
-            if (joinType == JoinType.ANTI && leftKey != null) {
-              // all remaining items in left are results for anti join
-              computeLeftRemainder(left);
+            if (joinType == JoinType.LEFT || (joinType == JoinType.ANTI && leftKey != null)) {
+              // all remaining items in left are results for left/anti join
+              remainingLeft = true;
               return true;
             }
             done = true;
@@ -4137,8 +4141,8 @@ public abstract class EnumerableDefaults {
             break;
           }
           if (c < 0) {
-            if (joinType == JoinType.ANTI) {
-              // left (and all other items with the same key) are results for anti join
+            if (isLeftOrAntiJoin()) {
+              // left (and all other items with the same key) are results for left/anti join
               if (!advanceLeft(left, leftKey)) {
                 done = true;
               }
@@ -4154,9 +4158,9 @@ public abstract class EnumerableDefaults {
             leftKey = outerKeySelector.apply(left);
           } else {
             if (!getRightEnumerator().moveNext()) {
-              if (joinType == JoinType.ANTI) {
-                // all remaining items in left are results for anti join
-                computeLeftRemainder(left);
+              if (isLeftOrAntiJoin()) {
+                // all remaining items in left are results for left/anti join
+                remainingLeft = true;
                 return true;
               }
               done = true;
@@ -4172,9 +4176,9 @@ public abstract class EnumerableDefaults {
         }
 
         if (!advanceRight(right, rightKey)) {
-          if (!done && joinType == JoinType.ANTI) {
-            // all remaining items in left are results for anti join
-            computeLeftRemainder(getLeftEnumerator().current());
+          if (!done && isLeftOrAntiJoin()) {
+            // all remaining items in left are results for left/anti join
+            remainingLeft = true;
           } else {
             done = true;
           }
@@ -4221,9 +4225,9 @@ public abstract class EnumerableDefaults {
       while (getLeftEnumerator().moveNext()) {
         left = getLeftEnumerator().current();
         TKey leftKey2 = outerKeySelector.apply(left);
-        if (leftKey2 == null) {
+        if (leftKey2 == null && joinType != JoinType.LEFT) {
           // mergeJoin assumes inputs sorted in ascending order with nulls last,
-          // if we reach a null key, we are done
+          // if we reach a null key, we are done (except LEFT join, that needs to process LHS fully)
           break;
         }
         int c = compare(leftKey, leftKey2);
@@ -4273,20 +4277,29 @@ public abstract class EnumerableDefaults {
     }
 
     public TResult current() {
-      if (remainingLeft) {
-        final TSource left = getLeftEnumerator().current();
-        return resultSelector.apply(left, null);
+      if (current == DUMMY) {
+        throw new NoSuchElementException();
       }
-      return results.current();
+      return current;
     }
 
     public boolean moveNext() {
       for (;;) {
-        if (results.moveNext()) {
-          return true;
+        if (results != null) {
+          if (results.moveNext()) {
+            current = results.current();
+            return true;
+          } else {
+            results = null;
+          }
         }
         if (remainingLeft) {
-          return leftMoveNext();
+          current = resultSelector.apply(getLeftEnumerator().current(), null);
+          if (!leftMoveNext()) {
+            remainingLeft = false;
+            done = true;
+          }
+          return true;
         }
         if (done) {
           return false;
@@ -4299,6 +4312,8 @@ public abstract class EnumerableDefaults {
 
     public void reset() {
       done = false;
+      results = null;
+      current = (TResult) DUMMY;
       remainingLeft = false;
       leftEnumerator.reset();
       if (rightEnumerator != null) {