You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@calcite.apache.org by jh...@apache.org on 2019/03/30 02:19:41 UTC

[calcite] 02/04: [CALCITE-2920] In RelBuilder, add antiJoin method (Ruben Quesada Lopez)

This is an automated email from the ASF dual-hosted git repository.

jhyde pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/calcite.git

commit 22fd34f892a2232d393bd5906f8bb1f21f1dff50
Author: rubenada <ru...@gmail.com>
AuthorDate: Fri Mar 15 16:56:54 2019 +0100

    [CALCITE-2920] In RelBuilder, add antiJoin method (Ruben Quesada Lopez)
    
    There is no AntiJoin relational operator (for now), so generate a
    Correlate instead.
    
    Close apache/calcite#1110
---
 .../java/org/apache/calcite/runtime/FlatLists.java |  9 ++-
 .../java/org/apache/calcite/sql/SemiJoinType.java  | 29 ++++++--
 .../java/org/apache/calcite/tools/RelBuilder.java  | 79 +++++++++++++++++++++-
 .../org/apache/calcite/test/CalciteAssert.java     | 24 ++++++-
 .../java/org/apache/calcite/test/LatticeTest.java  | 31 ++++-----
 .../org/apache/calcite/test/RelBuilderTest.java    | 22 ++++++
 .../test/enumerable/EnumerableCorrelateTest.java   | 65 ++++++++++++++++++
 site/_docs/algebra.md                              |  1 +
 8 files changed, 231 insertions(+), 29 deletions(-)

diff --git a/core/src/main/java/org/apache/calcite/runtime/FlatLists.java b/core/src/main/java/org/apache/calcite/runtime/FlatLists.java
index a2418f0..a6ea656 100644
--- a/core/src/main/java/org/apache/calcite/runtime/FlatLists.java
+++ b/core/src/main/java/org/apache/calcite/runtime/FlatLists.java
@@ -263,7 +263,14 @@ public class FlatLists {
   /** Returns a map that consists of a given map plus an (key, value),
    * guaranteed to be an {@link ImmutableMap}. */
   public static <K, V> ImmutableMap<K, V> append(Map<K, V> map, K k, V v) {
-    return ImmutableMap.<K, V>builder().putAll(map).put(k, v).build();
+    final ImmutableMap.Builder<K, V> builder = ImmutableMap.builder();
+    builder.put(k, v);
+    map.forEach((k2, v2) -> {
+      if (!k.equals(k2)) {
+        builder.put(k2, v2);
+      }
+    });
+    return builder.build();
   }
 
   /** Base class for flat lists.
diff --git a/core/src/main/java/org/apache/calcite/sql/SemiJoinType.java b/core/src/main/java/org/apache/calcite/sql/SemiJoinType.java
index 473e812..2569d76 100644
--- a/core/src/main/java/org/apache/calcite/sql/SemiJoinType.java
+++ b/core/src/main/java/org/apache/calcite/sql/SemiJoinType.java
@@ -28,25 +28,40 @@ import java.util.Locale;
  */
 public enum SemiJoinType {
   /**
-   * Inner join
+   * Inner join.
    */
   INNER,
 
   /**
-   * Left-outer join
+   * Left-outer join.
    */
   LEFT,
 
   /**
-   * Semi-join
-   * <p>Similar to from A ... where a in (select b from B ...)</p>
+   * Semi-join.
+   *
+   * <p>For example, {@code EMP semi-join DEPT} finds all {@code EMP} records
+   * that have a corresponding {@code DEPT} record:
+   *
+   * <blockquote><pre>
+   * SELECT * FROM EMP
+   * WHERE EXISTS (SELECT 1 FROM DEPT
+   *     WHERE DEPT.DEPTNO = EMP.DEPTNO)</pre>
+   * </blockquote>
    */
   SEMI,
 
   /**
-   * Anti-join
-   * <p>Similar to from A ... where a NOT in (select b from B ...)</p>
-   * <p>Note: if B.b is nullable and B has nulls, no rows must be returned</p>
+   * Anti-join.
+   *
+   * <p>For example, {@code EMP anti-join DEPT} finds all {@code EMP} records
+   * that do not have a corresponding {@code DEPT} record:
+   *
+   * <blockquote><pre>
+   * SELECT * FROM EMP
+   * WHERE NOT EXISTS (SELECT 1 FROM DEPT
+   *     WHERE DEPT.DEPTNO = EMP.DEPTNO)</pre>
+   * </blockquote>
    */
   ANTI;
 
diff --git a/core/src/main/java/org/apache/calcite/tools/RelBuilder.java b/core/src/main/java/org/apache/calcite/tools/RelBuilder.java
index 0d2972b..68492c9 100644
--- a/core/src/main/java/org/apache/calcite/tools/RelBuilder.java
+++ b/core/src/main/java/org/apache/calcite/tools/RelBuilder.java
@@ -1788,7 +1788,23 @@ public class RelBuilder {
     return join(joinType, conditions);
   }
 
-  /** Creates a {@link SemiJoin}. */
+  /** Creates a {@link SemiJoin}.
+   *
+   * <p>A semi-join is a form of join that combines two relational expressions
+   * according to some condition, and outputs only rows from the left input for
+   * which at least one row from the right input matches. It only outputs
+   * columns from the left input, and ignores duplicates on the right.
+   *
+   * <p>For example, {@code EMP semi-join DEPT} finds all {@code EMP} records
+   * that do not have a corresponding {@code DEPT} record, similar to the
+   * following SQL:
+   *
+   * <blockquote><pre>
+   * SELECT * FROM EMP
+   * WHERE EXISTS (SELECT 1 FROM DEPT
+   *     WHERE DEPT.DEPTNO = EMP.DEPTNO)</pre>
+   * </blockquote>
+   */
   public RelBuilder semiJoin(Iterable<? extends RexNode> conditions) {
     final Frame right = stack.pop();
     final RelNode semiJoin =
@@ -1797,11 +1813,70 @@ public class RelBuilder {
     return this;
   }
 
-  /** Creates a {@link SemiJoin}. */
+  /** Creates a {@link SemiJoin}.
+   *
+   * @see #semiJoin(Iterable) */
   public RelBuilder semiJoin(RexNode... conditions) {
     return semiJoin(ImmutableList.copyOf(conditions));
   }
 
+  /** Creates an anti-join.
+   *
+   * <p>An anti-join is a form of join that combines two relational expressions
+   * according to some condition, but outputs only rows from the left input
+   * for which no rows from the right input match.
+   *
+   * <p>For example, {@code EMP anti-join DEPT} finds all {@code EMP} records
+   * that do not have a corresponding {@code DEPT} record, similar to the
+   * following SQL:
+   *
+   * <blockquote><pre>
+   * SELECT * FROM EMP
+   * WHERE NOT EXISTS (SELECT 1 FROM DEPT
+   *     WHERE DEPT.DEPTNO = EMP.DEPTNO)</pre>
+   * </blockquote>
+   */
+  public RelBuilder antiJoin(Iterable<? extends RexNode> conditions) {
+    // There is currently no "AntiJoin" relational expression, so we
+    // simulate it using a LogicalCorrelate with SemiJoinType.ANTI.
+    final RexBuilder rexBuilder = getRexBuilder();
+    final RelNode right = build();
+    final RelNode left = peek();
+    final int leftFieldCount = left.getRowType().getFieldCount();
+    final CorrelationId correlationId = cluster.createCorrel();
+    final RexNode corrVar =
+        rexBuilder.makeCorrel(left.getRowType(), correlationId);
+    final ImmutableBitSet.Builder requiredColumns = ImmutableBitSet.builder();
+
+    // Replace all references of left input with FieldAccess(corrVar, field)
+    final RexNode condition = and(conditions).accept(new RexShuttle() {
+      @Override public RexNode visitInputRef(RexInputRef input) {
+        final int field = input.getIndex();
+        if (field >= leftFieldCount) {
+          return rexBuilder.makeInputRef(input.getType(), input.getIndex()
+              - leftFieldCount);
+        }
+        requiredColumns.set(field);
+        return rexBuilder.makeFieldAccess(corrVar, field);
+      }
+    });
+
+    final RelNode right2 = push(right).filter(condition).build();
+
+    final RelNode antiJoin =
+        correlateFactory.createCorrelate(left, right2, correlationId,
+            requiredColumns.build(), SemiJoinType.ANTI);
+    replaceTop(antiJoin);
+    return this;
+  }
+
+  /** Creates an anti-join.
+   *
+   * @see #antiJoin(Iterable) */
+  public RelBuilder antiJoin(RexNode... conditions) {
+    return antiJoin(ImmutableList.copyOf(conditions));
+  }
+
   /** Assigns a table alias to the top entry on the stack. */
   public RelBuilder as(final String alias) {
     final Frame pair = stack.pop();
diff --git a/core/src/test/java/org/apache/calcite/test/CalciteAssert.java b/core/src/test/java/org/apache/calcite/test/CalciteAssert.java
index 26e29f2..943b2fd 100644
--- a/core/src/test/java/org/apache/calcite/test/CalciteAssert.java
+++ b/core/src/test/java/org/apache/calcite/test/CalciteAssert.java
@@ -22,6 +22,7 @@ import org.apache.calcite.adapter.java.ReflectiveSchema;
 import org.apache.calcite.adapter.jdbc.JdbcSchema;
 import org.apache.calcite.avatica.ConnectionProperty;
 import org.apache.calcite.avatica.util.DateTimeUtils;
+import org.apache.calcite.config.CalciteConnectionConfigImpl;
 import org.apache.calcite.config.CalciteConnectionProperty;
 import org.apache.calcite.config.CalciteSystemProperty;
 import org.apache.calcite.config.Lex;
@@ -31,6 +32,7 @@ import org.apache.calcite.jdbc.CalcitePrepare;
 import org.apache.calcite.jdbc.CalciteSchema;
 import org.apache.calcite.materialize.Lattice;
 import org.apache.calcite.model.ModelHandler;
+import org.apache.calcite.plan.Contexts;
 import org.apache.calcite.plan.RelOptUtil;
 import org.apache.calcite.rel.RelNode;
 import org.apache.calcite.runtime.CalciteException;
@@ -47,6 +49,7 @@ import org.apache.calcite.schema.impl.ViewTableMacro;
 import org.apache.calcite.sql.validate.SqlConformanceEnum;
 import org.apache.calcite.sql.validate.SqlValidatorException;
 import org.apache.calcite.tools.FrameworkConfig;
+import org.apache.calcite.tools.Frameworks;
 import org.apache.calcite.tools.RelBuilder;
 import org.apache.calcite.util.Closer;
 import org.apache.calcite.util.Holder;
@@ -1368,7 +1371,7 @@ public class CalciteAssert {
       try (Connection c = createConnection()) {
         f.accept(c);
       } catch (SQLException e) {
-        TestUtil.rethrow(e);
+        throw TestUtil.rethrow(e);
       }
       return this;
     }
@@ -1570,7 +1573,7 @@ public class CalciteAssert {
       if (plan != null) {
         return;
       }
-      addHook(Hook.JAVA_PLAN, (Consumer<String>) a0 -> plan = a0);
+      addHook(Hook.JAVA_PLAN, this::setPlan);
       withConnection(connection -> {
         assertQuery(connection, sql, limit, materializationsEnabled,
             hooks, null, checkUpdate, null);
@@ -1578,6 +1581,10 @@ public class CalciteAssert {
       });
     }
 
+    private void setPlan(String plan) {
+      this.plan = plan;
+    }
+
     /** Runs the query and applies a checker to the generated third-party
      * queries. The checker should throw to fail the test if it does not see
      * what it wants. This method can be used to check whether a particular
@@ -1655,10 +1662,21 @@ public class CalciteAssert {
       return withHook(Hook.STRING_TO_QUERY,
           (Consumer<Pair<FrameworkConfig, Holder<CalcitePrepare.Query>>>)
           pair -> {
-            final RelBuilder b = RelBuilder.create(pair.left);
+            final FrameworkConfig config = forceDecorrelate(pair.left);
+            final RelBuilder b = RelBuilder.create(config);
             pair.right.set(CalcitePrepare.Query.of(relFn.apply(b)));
           });
     }
+
+    /** Creates a {@link FrameworkConfig} that does not decorrelate. */
+    private FrameworkConfig forceDecorrelate(FrameworkConfig config) {
+      return Frameworks.newConfigBuilder(config)
+          .context(
+              Contexts.of(new CalciteConnectionConfigImpl(new Properties())
+                  .set(CalciteConnectionProperty.FORCE_DECORRELATE,
+                      Boolean.toString(false))))
+          .build();
+    }
   }
 
   /** Fluent interface for building a metadata query to be tested. */
diff --git a/core/src/test/java/org/apache/calcite/test/LatticeTest.java b/core/src/test/java/org/apache/calcite/test/LatticeTest.java
index ca7cacd..b4cbcba 100644
--- a/core/src/test/java/org/apache/calcite/test/LatticeTest.java
+++ b/core/src/test/java/org/apache/calcite/test/LatticeTest.java
@@ -21,11 +21,11 @@ import org.apache.calcite.materialize.Lattice;
 import org.apache.calcite.materialize.Lattices;
 import org.apache.calcite.materialize.MaterializationService;
 import org.apache.calcite.plan.RelOptPlanner;
+import org.apache.calcite.plan.RelOptRule;
 import org.apache.calcite.plan.RelOptUtil;
 import org.apache.calcite.rel.rules.AbstractMaterializedViewRule;
 import org.apache.calcite.runtime.Hook;
 import org.apache.calcite.schema.SchemaPlus;
-import org.apache.calcite.test.CalciteAssert.AssertQuery;
 import org.apache.calcite.util.ImmutableBitSet;
 import org.apache.calcite.util.TestUtil;
 
@@ -502,35 +502,34 @@ public class LatticeTest {
 
   private void checkTileAlgorithm(String statisticProvider,
       String expectedExplain) {
+    final RelOptRule[] rules = {
+        AbstractMaterializedViewRule.INSTANCE_PROJECT_FILTER,
+        AbstractMaterializedViewRule.INSTANCE_FILTER,
+        AbstractMaterializedViewRule.INSTANCE_PROJECT_JOIN,
+        AbstractMaterializedViewRule.INSTANCE_JOIN,
+        AbstractMaterializedViewRule.INSTANCE_PROJECT_AGGREGATE,
+        AbstractMaterializedViewRule.INSTANCE_AGGREGATE
+    };
     MaterializationService.setThreadLocal();
     MaterializationService.instance().clear();
-    AssertQuery that = foodmartLatticeModel(statisticProvider)
+    foodmartLatticeModel(statisticProvider)
         .query("select distinct t.\"the_year\", t.\"quarter\"\n"
             + "from \"foodmart\".\"sales_fact_1997\" as s\n"
             + "join \"foodmart\".\"time_by_day\" as t using (\"time_id\")\n")
-        .enableMaterializations(true);
+        .enableMaterializations(true)
 
     // Disable materialization rules from this test. For some reason, there is
     // a weird interaction between these rules and the lattice rewriting that
     // produces non-deterministic rewriting (even when only lattices are present).
     // For more context, see
     // <a href="https://issues.apache.org/jira/browse/CALCITE-2953">[CALCITE-2953]</a>.
-    that.withHook(Hook.PLANNER, (Consumer<RelOptPlanner>) planner -> {
-      ImmutableList
-          .of(
-              AbstractMaterializedViewRule.INSTANCE_PROJECT_FILTER,
-              AbstractMaterializedViewRule.INSTANCE_FILTER,
-              AbstractMaterializedViewRule.INSTANCE_PROJECT_JOIN,
-              AbstractMaterializedViewRule.INSTANCE_JOIN,
-              AbstractMaterializedViewRule.INSTANCE_PROJECT_AGGREGATE,
-              AbstractMaterializedViewRule.INSTANCE_AGGREGATE)
-          .forEach(planner::removeRule);
-    });
+        .withHook(Hook.PLANNER, (Consumer<RelOptPlanner>) planner ->
+            Arrays.asList(rules).forEach(planner::removeRule))
 
     // disable for MySQL; times out running star-join query
     // disable for H2; it thinks our generated SQL has invalid syntax
-    that.enable(CalciteAssert.DB != CalciteAssert.DatabaseInstance.MYSQL
-        && CalciteAssert.DB != CalciteAssert.DatabaseInstance.H2)
+        .enable(CalciteAssert.DB != CalciteAssert.DatabaseInstance.MYSQL
+            && CalciteAssert.DB != CalciteAssert.DatabaseInstance.H2)
         .explainContains(expectedExplain)
         .returnsUnordered("the_year=1997; quarter=Q1",
             "the_year=1997; quarter=Q2",
diff --git a/core/src/test/java/org/apache/calcite/test/RelBuilderTest.java b/core/src/test/java/org/apache/calcite/test/RelBuilderTest.java
index 75feb03..dc0240f 100644
--- a/core/src/test/java/org/apache/calcite/test/RelBuilderTest.java
+++ b/core/src/test/java/org/apache/calcite/test/RelBuilderTest.java
@@ -1501,6 +1501,28 @@ public class RelBuilderTest {
     assertThat(root, hasTree(expected));
   }
 
+  @Test public void testAntiJoin() {
+    // Equivalent SQL:
+    //   SELECT * FROM dept d
+    //   WHERE NOT EXISTS (SELECT 1 FROM emp e WHERE e.deptno = d.deptno)
+    final RelBuilder builder = RelBuilder.create(config().build());
+    RelNode root = builder
+        .scan("DEPT")
+        .scan("EMP")
+        .antiJoin(
+            builder.equals(
+                builder.field(2, 0, "DEPTNO"),
+                builder.field(2, 1, "DEPTNO")))
+        .build();
+    // AntiJoin implemented as LogicalCorrelate with joinType=anti
+    final String expected = ""
+        + "LogicalCorrelate(correlation=[$cor0], joinType=[anti], requiredColumns=[{0}])\n"
+        + "  LogicalTableScan(table=[[scott, DEPT]])\n"
+        + "  LogicalFilter(condition=[=($cor0.DEPTNO, $7)])\n"
+        + "    LogicalTableScan(table=[[scott, EMP]])\n";
+    assertThat(root, hasTree(expected));
+  }
+
   @Test public void testAlias() {
     // Equivalent SQL:
     //   SELECT *
diff --git a/core/src/test/java/org/apache/calcite/test/enumerable/EnumerableCorrelateTest.java b/core/src/test/java/org/apache/calcite/test/enumerable/EnumerableCorrelateTest.java
index 9e3793a..451d95d 100644
--- a/core/src/test/java/org/apache/calcite/test/enumerable/EnumerableCorrelateTest.java
+++ b/core/src/test/java/org/apache/calcite/test/enumerable/EnumerableCorrelateTest.java
@@ -133,6 +133,71 @@ public class EnumerableCorrelateTest {
             "empid=200");
   }
 
+  /** Test case for
+   * <a href="https://issues.apache.org/jira/browse/CALCITE-2920">[CALCITE-2920]
+   * RelBuilder: new method to create an anti-join</a>. */
+  @Test public void antiJoinCorrelate() {
+    tester(false, new JdbcTest.HrSchema())
+        .query("?")
+        .withRel(
+            // Retrieve departments without employees. Equivalent SQL:
+            //   SELECT d.deptno, d.name FROM depts d
+            //   WHERE NOT EXISTS (SELECT 1 FROM emps e WHERE e.deptno = d.deptno)
+            builder -> builder
+                .scan("s", "depts").as("d")
+                .scan("s", "emps").as("e")
+                .antiJoin(
+                    builder.equals(
+                        builder.field(2, "d", "deptno"),
+                        builder.field(2, "e", "deptno")))
+                    .project(
+                        builder.field("deptno"),
+                        builder.field("name"))
+                    .build())
+        .returnsUnordered(
+            "deptno=30; name=Marketing",
+            "deptno=40; name=HR");
+  }
+
+  /** Test case for
+   * <a href="https://issues.apache.org/jira/browse/CALCITE-2920">[CALCITE-2920]
+   * RelBuilder: new method to create an antijoin</a> */
+  @Test public void antiJoinCorrelateWithNullValues() {
+    final Integer salesDeptNo = 10;
+    tester(false, new JdbcTest.HrSchema())
+        .query("?")
+        .withRel(
+            // Retrieve employees from any department other than Sales (deptno 10) whose
+            // commission is different from any Sales employee commission. Since there
+            // is a Sales employee with null commission, the goal is to validate that antiJoin
+            // behaves as a NOT EXISTS (and returns results), and not as a NOT IN (which would
+            // not return any result due to its null handling). Equivalent SQL:
+            //   SELECT empOther.empid, empOther.name FROM emps empOther
+            //   WHERE empOther.deptno <> 10 AND NOT EXISTS
+            //     (SELECT 1 FROM emps empSales
+            //      WHERE empSales.deptno = 10 AND empSales.commission = empOther.commission)
+            builder -> builder
+                .scan("s", "emps").as("empOther")
+                .filter(
+                    builder.notEquals(
+                        builder.field("empOther", "deptno"),
+                        builder.literal(salesDeptNo)))
+                .scan("s", "emps").as("empSales")
+                .filter(
+                    builder.equals(
+                        builder.field("empSales", "deptno"),
+                        builder.literal(salesDeptNo)))
+                .antiJoin(
+                    builder.equals(
+                        builder.field(2, "empOther", "commission"),
+                        builder.field(2, "empSales", "commission")))
+                .project(
+                    builder.field("empid"),
+                    builder.field("name"))
+                .build())
+        .returnsUnordered("empid=200; name=Eric");
+  }
+
   private CalciteAssert.AssertThat tester(boolean forceDecorrelate,
       Object schema) {
     return CalciteAssert.that()
diff --git a/site/_docs/algebra.md b/site/_docs/algebra.md
index a68bb89..8802f81 100644
--- a/site/_docs/algebra.md
+++ b/site/_docs/algebra.md
@@ -275,6 +275,7 @@ return the `RelBuilder`.
 | `sortExchange(distribution, collation)` | Creates a [SortExchange]({{ site.apiRoot }}/org/apache/calcite/rel/core/SortExchange.html).
 | `join(joinType, expr...)`<br/>`join(joinType, exprList)`<br/>`join(joinType, fieldName...)` | Creates a [Join]({{ site.apiRoot }}/org/apache/calcite/rel/core/Join.html) of the two most recent relational expressions.<br/><br/>The first form joins on a boolean expression (multiple conditions are combined using AND).<br/><br/>The last form joins on named fields; each side must have a field of each name.
 | `semiJoin(expr)` | Creates a [SemiJoin]({{ site.apiRoot }}/org/apache/calcite/rel/core/SemiJoin.html) of the two most recent relational expressions.
+| `antiJoin(expr)` | Creates an AntiJoin of the two most recent relational expressions.
 | `union(all [, n])` | Creates a [Union]({{ site.apiRoot }}/org/apache/calcite/rel/core/Union.html) of the `n` (default two) most recent relational expressions.
 | `intersect(all [, n])` | Creates an [Intersect]({{ site.apiRoot }}/org/apache/calcite/rel/core/Intersect.html) of the `n` (default two) most recent relational expressions.
 | `minus(all)` | Creates a [Minus]({{ site.apiRoot }}/org/apache/calcite/rel/core/Minus.html) of the two most recent relational expressions.