You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@calcite.apache.org by za...@apache.org on 2021/07/07 08:38:12 UTC

[calcite] branch master updated: [CALCITE-4668] RelBuilder.join should convert Correlate to Join if correlation variable is unused (James Starr)

This is an automated email from the ASF dual-hosted git repository.

zabetak pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/calcite.git


The following commit(s) were added to refs/heads/master by this push:
     new a59693e  [CALCITE-4668] RelBuilder.join should convert Correlate to Join if correlation variable is unused (James Starr)
a59693e is described below

commit a59693e0342f52fd9eeb48bf55e7f4d02913fa79
Author: James Starr <ja...@gmail.com>
AuthorDate: Fri Jun 25 13:48:47 2021 -0700

    [CALCITE-4668] RelBuilder.join should convert Correlate to Join if correlation variable is unused (James Starr)
    
    Reworking the checking and validating if join is correlated in
    RelBuilder.join to check if the correlated id is used on the right
    side.If the correlated id is not used on the right side, then the join
    is not correlated and a normal Join is emitted.
    
    1.  Updating ToLogicalConverterTest to generate a non trival Correlate
    2.  Adding tests for generating non trivial correlate joins to
    RelBuilderTest.
    3.  Updating plans in sub-query.iq, that are now simplified.
    
    Close apache/calcite#2446
---
 .../java/org/apache/calcite/tools/RelBuilder.java  |  37 +++++-
 .../rel/logical/ToLogicalConverterTest.java        |  12 +-
 .../org/apache/calcite/test/RelBuilderTest.java    | 136 ++++++++++++++++++---
 core/src/test/resources/sql/sub-query.iq           |  51 ++++----
 4 files changed, 188 insertions(+), 48 deletions(-)

diff --git a/core/src/main/java/org/apache/calcite/tools/RelBuilder.java b/core/src/main/java/org/apache/calcite/tools/RelBuilder.java
index 013e097..a5706fa 100644
--- a/core/src/main/java/org/apache/calcite/tools/RelBuilder.java
+++ b/core/src/main/java/org/apache/calcite/tools/RelBuilder.java
@@ -2409,7 +2409,7 @@ public class RelBuilder {
     Frame right = stack.pop();
     final Frame left = stack.pop();
     final RelNode join;
-    final boolean correlate = variablesSet.size() == 1;
+    final boolean correlate = checkIfCorrelated(variablesSet, joinType, left.rel, right.rel);
     RexNode postCondition = literal(true);
     if (config.simplify()) {
       // Normalize expanded versions IS NOT DISTINCT FROM so that simplifier does not
@@ -2422,10 +2422,6 @@ public class RelBuilder {
     }
     if (correlate) {
       final CorrelationId id = Iterables.getOnlyElement(variablesSet);
-      if (!RelOptUtil.notContainsCorrelation(left.rel, id, Litmus.IGNORE)) {
-        throw new IllegalArgumentException("variable " + id
-            + " must not be used by left input to correlation");
-      }
       // Correlate does not have an ON clause.
       switch (joinType) {
       case LEFT:
@@ -3439,6 +3435,37 @@ public class RelBuilder {
     }
   }
 
+  /**
+   * Checks for {@link CorrelationId}, then validates the id is not used on left,
+   * and finally checks if id is actually used on right.
+   *
+   * @return true if a correlate id is present and used
+   *
+   * @throws IllegalArgumentException if the {@link CorrelationId} is used by left side or if the a
+   *   {@link CorrelationId} is present and the {@link JoinRelType} is FULL or RIGHT.
+   */
+  private static boolean checkIfCorrelated(Set<CorrelationId> variablesSet,
+      JoinRelType joinType, RelNode leftNode, RelNode rightRel) {
+    if (variablesSet.size() != 1) {
+      return false;
+    }
+    CorrelationId id = Iterables.getOnlyElement(variablesSet);
+    if (!RelOptUtil.notContainsCorrelation(leftNode, id, Litmus.IGNORE)) {
+      throw new IllegalArgumentException("variable " + id
+          + " must not be used by left input to correlation");
+    }
+    switch (joinType) {
+    case RIGHT:
+    case FULL:
+      throw new IllegalArgumentException("Correlated " + joinType + " join is not supported");
+    default:
+      return !RelOptUtil.correlationColumns(
+          Iterables.getOnlyElement(variablesSet),
+          rightRel).isEmpty();
+    }
+  }
+
+
   /** Implementation of {@link AggCall}. */
   private class AggCallImpl implements AggCallPlus {
     private final SqlAggFunction aggFunction;
diff --git a/core/src/test/java/org/apache/calcite/rel/logical/ToLogicalConverterTest.java b/core/src/test/java/org/apache/calcite/rel/logical/ToLogicalConverterTest.java
index aec9d95..5622817 100644
--- a/core/src/test/java/org/apache/calcite/rel/logical/ToLogicalConverterTest.java
+++ b/core/src/test/java/org/apache/calcite/rel/logical/ToLogicalConverterTest.java
@@ -327,21 +327,25 @@ class ToLogicalConverterTest {
     final RelNode rel = builder.scan("EMP")
         .variable(v)
         .scan("DEPT")
+        .filter(
+            builder.equals(builder.field(0), builder.field(v.get(), "DEPTNO")))
         .join(JoinRelType.LEFT,
             builder.equals(builder.field(2, 0, "SAL"),
                 builder.literal(1000)),
             ImmutableSet.of(v.get().id))
         .build();
     String expectedPhysical = ""
-        + "EnumerableCorrelate(correlation=[$cor0], joinType=[left], requiredColumns=[{5}])\n"
+        + "EnumerableCorrelate(correlation=[$cor0], joinType=[left], requiredColumns=[{5, 7}])\n"
         + "  EnumerableTableScan(table=[[scott, EMP]])\n"
         + "  EnumerableFilter(condition=[=($cor0.SAL, 1000)])\n"
-        + "    EnumerableTableScan(table=[[scott, DEPT]])\n";
+        + "    EnumerableFilter(condition=[=($0, $cor0.DEPTNO)])\n"
+        + "      EnumerableTableScan(table=[[scott, DEPT]])\n";
     String expectedLogical = ""
-        + "LogicalCorrelate(correlation=[$cor0], joinType=[left], requiredColumns=[{5}])\n"
+        + "LogicalCorrelate(correlation=[$cor0], joinType=[left], requiredColumns=[{5, 7}])\n"
         + "  LogicalTableScan(table=[[scott, EMP]])\n"
         + "  LogicalFilter(condition=[=($cor0.SAL, 1000)])\n"
-        + "    LogicalTableScan(table=[[scott, DEPT]])\n";
+        + "    LogicalFilter(condition=[=($0, $cor0.DEPTNO)])\n"
+        + "      LogicalTableScan(table=[[scott, DEPT]])\n";
     verify(rel, expectedPhysical, expectedLogical);
   }
 
diff --git a/core/src/test/java/org/apache/calcite/test/RelBuilderTest.java b/core/src/test/java/org/apache/calcite/test/RelBuilderTest.java
index 8a3c731..2cd6f9d 100644
--- a/core/src/test/java/org/apache/calcite/test/RelBuilderTest.java
+++ b/core/src/test/java/org/apache/calcite/test/RelBuilderTest.java
@@ -2354,6 +2354,25 @@ public class RelBuilderTest {
     assertThat(root, hasTree(expected));
   }
 
+  @Test void testTrivialCorrelation() {
+    final RelBuilder builder = RelBuilder.create(config().build());
+    final Holder<@Nullable RexCorrelVariable> v = Holder.empty();
+    RelNode root = builder.scan("EMP")
+        .variable(v)
+        .scan("DEPT")
+        .join(JoinRelType.LEFT,
+            builder.equals(builder.field(2, 0, "SAL"),
+                builder.literal(1000)),
+            ImmutableSet.of(v.get().id))
+        .build();
+    // Note that the join is emitted since the query is not actually a correlated.
+    final String expected = ""
+        + "LogicalJoin(condition=[=($5, 1000)], joinType=[left])\n"
+        + "  LogicalTableScan(table=[[scott, EMP]])\n"
+        + "  LogicalTableScan(table=[[scott, DEPT]])\n";
+    assertThat(root, hasTree(expected));
+  }
+
   @Test void testAntiJoin() {
     // Equivalent SQL:
     //   SELECT * FROM dept d
@@ -3883,51 +3902,119 @@ public class RelBuilderTest {
   @Test void testSimpleSemiCorrelateViaJoin() {
     RelNode root = buildSimpleCorrelateWithJoin(JoinRelType.SEMI);
     final String expected = ""
-        + "LogicalCorrelate(correlation=[$cor0], joinType=[semi], requiredColumns=[{7}])\n"
+        + "LogicalJoin(condition=[=($7, $8)], joinType=[semi])\n"
+        + "  LogicalTableScan(table=[[scott, EMP]])\n"
+        + "  LogicalTableScan(table=[[scott, DEPT]])\n";
+    assertThat(
+        "Join with correlate id but the id never used should be simplified to a join.",
+        root, hasTree(expected));
+  }
+
+  @Test void testSemiCorrelatedViaJoin() {
+    RelNode root = buildCorrelateWithJoin(JoinRelType.SEMI);
+    final String expected = ""
+        + "LogicalCorrelate(correlation=[$cor0], joinType=[semi], requiredColumns=[{0, 7}])\n"
         + "  LogicalTableScan(table=[[scott, EMP]])\n"
         + "  LogicalFilter(condition=[=($cor0.DEPTNO, $0)])\n"
-        + "    LogicalTableScan(table=[[scott, DEPT]])\n";
-    assertThat(root, hasTree(expected));
+        + "    LogicalFilter(condition=[=($cor0.EMPNO, 'NaN')])\n"
+        + "      LogicalTableScan(table=[[scott, DEPT]])\n";
+    assertThat(
+        "Correlated semi joins should emmit a correlate with a filter on the right side.",
+        root, hasTree(expected));
   }
 
   @Test void testSimpleAntiCorrelateViaJoin() {
     RelNode root = buildSimpleCorrelateWithJoin(JoinRelType.ANTI);
     final String expected = ""
-        + "LogicalCorrelate(correlation=[$cor0], joinType=[anti], requiredColumns=[{7}])\n"
+        + "LogicalJoin(condition=[=($7, $8)], joinType=[anti])\n"
         + "  LogicalTableScan(table=[[scott, EMP]])\n"
-        + "  LogicalFilter(condition=[=($cor0.DEPTNO, $0)])\n"
-        + "    LogicalTableScan(table=[[scott, DEPT]])\n";
-    assertThat(root, hasTree(expected));
+        + "  LogicalTableScan(table=[[scott, DEPT]])\n";
+    assertThat(
+        "Join with correlate id but the id never used should be simplified to a join.",
+        root, hasTree(expected));
   }
 
+  @Test void testAntiCorrelateViaJoin() {
+    RelNode root = buildCorrelateWithJoin(JoinRelType.ANTI);
+    final String expected = ""
+        + "LogicalCorrelate(correlation=[$cor0], joinType=[anti], requiredColumns=[{0, 7}])\n"
+        + "  LogicalTableScan(table=[[scott, EMP]])\n"
+        + "  LogicalFilter(condition=[=($cor0.DEPTNO, $0)])\n"
+        + "    LogicalFilter(condition=[=($cor0.EMPNO, 'NaN')])\n"
+        + "      LogicalTableScan(table=[[scott, DEPT]])\n";
+    assertThat(
+        "Correlated anti joins should emmit a correlate with a filter on the right side.",
+        root, hasTree(expected));  }
+
   @Test void testSimpleLeftCorrelateViaJoin() {
     RelNode root = buildSimpleCorrelateWithJoin(JoinRelType.LEFT);
     final String expected = ""
-        + "LogicalCorrelate(correlation=[$cor0], joinType=[left], requiredColumns=[{7}])\n"
+        + "LogicalJoin(condition=[=($7, $8)], joinType=[left])\n"
+        + "  LogicalTableScan(table=[[scott, EMP]])\n"
+        + "  LogicalTableScan(table=[[scott, DEPT]])\n";
+    assertThat(
+        "Join with correlate id but the id never used should be simplified to a join.",
+        root, hasTree(expected));
+  }
+
+  @Test void testLeftCorrelateViaJoin() {
+    RelNode root = buildCorrelateWithJoin(JoinRelType.LEFT);
+    final String expected = ""
+        + "LogicalCorrelate(correlation=[$cor0], joinType=[left], requiredColumns=[{0, 7}])\n"
         + "  LogicalTableScan(table=[[scott, EMP]])\n"
         + "  LogicalFilter(condition=[=($cor0.DEPTNO, $0)])\n"
-        + "    LogicalTableScan(table=[[scott, DEPT]])\n";
-    assertThat(root, hasTree(expected));
+        + "    LogicalFilter(condition=[=($cor0.EMPNO, 'NaN')])\n"
+        + "      LogicalTableScan(table=[[scott, DEPT]])\n";
+    assertThat(
+        "Correlated left joins should emmit a correlate with a filter on the right side.",
+        root, hasTree(expected));
   }
 
   @Test void testSimpleInnerCorrelateViaJoin() {
     RelNode root = buildSimpleCorrelateWithJoin(JoinRelType.INNER);
     final String expected = ""
+        + "LogicalJoin(condition=[=($7, $8)], joinType=[inner])\n"
+        + "  LogicalTableScan(table=[[scott, EMP]])\n"
+        + "  LogicalTableScan(table=[[scott, DEPT]])\n";
+    assertThat("Join with correlate id but never used should be simplified to a join.",
+        root, hasTree(expected));
+  }
+
+  @Test void testInnerCorrelateViaJoin() {
+    RelNode root = buildCorrelateWithJoin(JoinRelType.INNER);
+    final String expected = ""
         + "LogicalFilter(condition=[=($7, $8)])\n"
-        + "  LogicalCorrelate(correlation=[$cor0], joinType=[inner], requiredColumns=[{}])\n"
+        + "  LogicalCorrelate(correlation=[$cor0], joinType=[inner], requiredColumns=[{0}])\n"
         + "    LogicalTableScan(table=[[scott, EMP]])\n"
-        + "    LogicalTableScan(table=[[scott, DEPT]])\n";
-    assertThat(root, hasTree(expected));
+        + "    LogicalFilter(condition=[=($cor0.EMPNO, 'NaN')])\n"
+        + "      LogicalTableScan(table=[[scott, DEPT]])\n";
+    assertThat(
+        "Correlated inner joins should emmit a correlate with a filter on top.",
+        root, hasTree(expected));
   }
 
   @Test void testSimpleRightCorrelateViaJoinThrowsException() {
     assertThrows(IllegalArgumentException.class,
-        () -> buildSimpleCorrelateWithJoin(JoinRelType.RIGHT));
+        () -> buildSimpleCorrelateWithJoin(JoinRelType.RIGHT),
+        "Right outer joins with correlated ids are invalid even if id is not used.");
   }
 
   @Test void testSimpleFullCorrelateViaJoinThrowsException() {
     assertThrows(IllegalArgumentException.class,
-        () -> buildSimpleCorrelateWithJoin(JoinRelType.FULL));
+        () -> buildSimpleCorrelateWithJoin(JoinRelType.FULL),
+        "Full outer joins with correlated ids are invalid even if id is not used.");
+  }
+
+  @Test void testRightCorrelateViaJoinThrowsException() {
+    assertThrows(IllegalArgumentException.class,
+        () -> buildCorrelateWithJoin(JoinRelType.RIGHT),
+        "Right outer joins with correlated ids are invalid.");
+  }
+
+  @Test void testFullCorrelateViaJoinThrowsException() {
+    assertThrows(IllegalArgumentException.class,
+        () -> buildCorrelateWithJoin(JoinRelType.FULL),
+        "Full outer joins with correlated ids are invalid.");
   }
 
   private static RelNode buildSimpleCorrelateWithJoin(JoinRelType type) {
@@ -3944,6 +4031,25 @@ public class RelBuilderTest {
         .build();
   }
 
+  private static RelNode buildCorrelateWithJoin(JoinRelType type) {
+    final RelBuilder builder = RelBuilder.create(config().build());
+    final RexBuilder rexBuilder = builder.getRexBuilder();
+    final Holder<@Nullable RexCorrelVariable> v = Holder.empty();
+    return builder
+        .scan("EMP")
+        .variable(v)
+        .scan("DEPT")
+        .filter(
+            builder.equals(
+                rexBuilder.makeFieldAccess(v.get(), 0),
+                builder.literal("NaN")))
+        .join(type,
+            builder.equals(
+                builder.field(2, 0, "DEPTNO"),
+                builder.field(2, 1, "DEPTNO")), ImmutableSet.of(v.get().id))
+        .build();
+  }
+
   @Test void testCorrelateWithComplexFields() {
     final RelBuilder builder = RelBuilder.create(config().build());
     final Holder<@Nullable RexCorrelVariable> v = Holder.empty();
diff --git a/core/src/test/resources/sql/sub-query.iq b/core/src/test/resources/sql/sub-query.iq
index 78aff2e..172dbe9 100644
--- a/core/src/test/resources/sql/sub-query.iq
+++ b/core/src/test/resources/sql/sub-query.iq
@@ -1609,11 +1609,7 @@ select sal from "scott".emp e
 (0 rows)
 
 !ok
-EnumerableCalc(expr#0..3=[{inputs}], SAL=[$t1])
-  EnumerableCorrelate(correlation=[$cor0], joinType=[inner], requiredColumns=[{}])
-    EnumerableCalc(expr#0..7=[{inputs}], EMPNO=[$t0], SAL=[$t5], DEPTNO=[$t7])
-      EnumerableTableScan(table=[[scott, EMP]])
-    EnumerableValues(tuples=[[]])
+EnumerableValues(tuples=[[]])
 !plan
 
 # Test filter literal IN null correlated
@@ -1639,11 +1635,7 @@ select sal from "scott".emp e
 (0 rows)
 
 !ok
-EnumerableCalc(expr#0..3=[{inputs}], SAL=[$t1])
-  EnumerableCorrelate(correlation=[$cor0], joinType=[inner], requiredColumns=[{}])
-    EnumerableCalc(expr#0..7=[{inputs}], EMPNO=[$t0], SAL=[$t5], DEPTNO=[$t7])
-      EnumerableTableScan(table=[[scott, EMP]])
-    EnumerableValues(tuples=[[]])
+EnumerableValues(tuples=[[]])
 !plan
 
 # Test filter null IN required correlated
@@ -1656,11 +1648,26 @@ select sal from "scott".emp e
 (0 rows)
 
 !ok
-EnumerableCalc(expr#0..3=[{inputs}], SAL=[$t1])
-  EnumerableCorrelate(correlation=[$cor0], joinType=[inner], requiredColumns=[{}])
-    EnumerableCalc(expr#0..7=[{inputs}], EMPNO=[$t0], SAL=[$t5], DEPTNO=[$t7])
-      EnumerableTableScan(table=[[scott, EMP]])
-    EnumerableValues(tuples=[[]])
+EnumerableValues(tuples=[[]])
+!plan
+
+# Test filter literal IN null liter with query that can not be trivially simplified
+select sal from "scott".emp e
+  where mod(cast(rand() as int), 2) = 3 OR 123 IN (
+    select cast(null as int) from "scott".dept d
+      where d.deptno = e.deptno);
+ SAL
+-----
+(0 rows)
+
+!ok
+EnumerableCalc(expr#0..4=[{inputs}], expr#5=[RAND()], expr#6=[CAST($t5):INTEGER NOT NULL], expr#7=[2], expr#8=[MOD($t6, $t7)], expr#9=[3], expr#10=[=($t8, $t9)], expr#11=[IS NOT NULL($t4)], expr#12=[AND($t4, $t11)], expr#13=[OR($t10, $t12)], SAL=[$t1], $condition=[$t13])
+  EnumerableMergeJoin(condition=[=($2, $3)], joinType=[left])
+    EnumerableSort(sort0=[$2], dir0=[ASC])
+      EnumerableCalc(expr#0..7=[{inputs}], EMPNO=[$t0], SAL=[$t5], DEPTNO=[$t7])
+        EnumerableTableScan(table=[[scott, EMP]])
+    EnumerableCalc(expr#0..2=[{inputs}], expr#3=[false], DEPTNO=[$t0], $f1=[$t3])
+      EnumerableTableScan(table=[[scott, DEPT]])
 !plan
 
 # Test filter null IN nullable correlated
@@ -1673,11 +1680,7 @@ select sal from "scott".emp e
 (0 rows)
 
 !ok
-EnumerableCalc(expr#0..3=[{inputs}], SAL=[$t1])
-  EnumerableCorrelate(correlation=[$cor0], joinType=[inner], requiredColumns=[{}])
-    EnumerableCalc(expr#0..7=[{inputs}], EMPNO=[$t0], SAL=[$t5], DEPTNO=[$t7])
-      EnumerableTableScan(table=[[scott, EMP]])
-    EnumerableValues(tuples=[[]])
+EnumerableValues(tuples=[[]])
 !plan
 
 # Test filter literal IN required correlated
@@ -2121,13 +2124,13 @@ EnumerableCalc(expr#0..6=[{inputs}], expr#7=[>($t1, $t2)], expr#8=[IS TRUE($t7)]
 !ok
 
 # [CALCITE-4560] Wrong plan when decorrelating EXISTS subquery with COALESCE in the predicate
-# The employee KING has no manager (NULL) so before the fix the following query was missing 
-# this employee from the result set. 
+# The employee KING has no manager (NULL) so before the fix the following query was missing
+# this employee from the result set.
 select ename
 from "scott".emp as e1
-where exists 
+where exists
     (select 1 from "scott".emp as e2 where coalesce(e1.mgr,0)=coalesce(e2.mgr,0));
-# The plan before the fix was wrong but also inefficient since it required the generation of 
+# The plan before the fix was wrong but also inefficient since it required the generation of
 # a value generator (see RelDecorrelator code). The value generator is not present in the
 # following plan (two scans of EMP table instead of three).
 EnumerableCalc(expr#0..2=[{inputs}], ENAME=[$t1])