You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@calcite.apache.org by jh...@apache.org on 2019/03/30 02:19:41 UTC
[calcite] 02/04: [CALCITE-2920] In RelBuilder,
add antiJoin method (Ruben Quesada Lopez)
This is an automated email from the ASF dual-hosted git repository.
jhyde pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/calcite.git
commit 22fd34f892a2232d393bd5906f8bb1f21f1dff50
Author: rubenada <ru...@gmail.com>
AuthorDate: Fri Mar 15 16:56:54 2019 +0100
[CALCITE-2920] In RelBuilder, add antiJoin method (Ruben Quesada Lopez)
There is no AntiJoin relational operator (for now), so generate a
Correlate instead.
Close apache/calcite#1110
---
.../java/org/apache/calcite/runtime/FlatLists.java | 9 ++-
.../java/org/apache/calcite/sql/SemiJoinType.java | 29 ++++++--
.../java/org/apache/calcite/tools/RelBuilder.java | 79 +++++++++++++++++++++-
.../org/apache/calcite/test/CalciteAssert.java | 24 ++++++-
.../java/org/apache/calcite/test/LatticeTest.java | 31 ++++-----
.../org/apache/calcite/test/RelBuilderTest.java | 22 ++++++
.../test/enumerable/EnumerableCorrelateTest.java | 65 ++++++++++++++++++
site/_docs/algebra.md | 1 +
8 files changed, 231 insertions(+), 29 deletions(-)
diff --git a/core/src/main/java/org/apache/calcite/runtime/FlatLists.java b/core/src/main/java/org/apache/calcite/runtime/FlatLists.java
index a2418f0..a6ea656 100644
--- a/core/src/main/java/org/apache/calcite/runtime/FlatLists.java
+++ b/core/src/main/java/org/apache/calcite/runtime/FlatLists.java
@@ -263,7 +263,14 @@ public class FlatLists {
/** Returns a map that consists of a given map plus an (key, value),
* guaranteed to be an {@link ImmutableMap}. */
public static <K, V> ImmutableMap<K, V> append(Map<K, V> map, K k, V v) {
- return ImmutableMap.<K, V>builder().putAll(map).put(k, v).build();
+ final ImmutableMap.Builder<K, V> builder = ImmutableMap.builder();
+ builder.put(k, v);
+ map.forEach((k2, v2) -> {
+ if (!k.equals(k2)) {
+ builder.put(k2, v2);
+ }
+ });
+ return builder.build();
}
/** Base class for flat lists.
diff --git a/core/src/main/java/org/apache/calcite/sql/SemiJoinType.java b/core/src/main/java/org/apache/calcite/sql/SemiJoinType.java
index 473e812..2569d76 100644
--- a/core/src/main/java/org/apache/calcite/sql/SemiJoinType.java
+++ b/core/src/main/java/org/apache/calcite/sql/SemiJoinType.java
@@ -28,25 +28,40 @@ import java.util.Locale;
*/
public enum SemiJoinType {
/**
- * Inner join
+ * Inner join.
*/
INNER,
/**
- * Left-outer join
+ * Left-outer join.
*/
LEFT,
/**
- * Semi-join
- * <p>Similar to from A ... where a in (select b from B ...)</p>
+ * Semi-join.
+ *
+ * <p>For example, {@code EMP semi-join DEPT} finds all {@code EMP} records
+ * that have a corresponding {@code DEPT} record:
+ *
+ * <blockquote><pre>
+ * SELECT * FROM EMP
+ * WHERE EXISTS (SELECT 1 FROM DEPT
+ * WHERE DEPT.DEPTNO = EMP.DEPTNO)</pre>
+ * </blockquote>
*/
SEMI,
/**
- * Anti-join
- * <p>Similar to from A ... where a NOT in (select b from B ...)</p>
- * <p>Note: if B.b is nullable and B has nulls, no rows must be returned</p>
+ * Anti-join.
+ *
+ * <p>For example, {@code EMP anti-join DEPT} finds all {@code EMP} records
+ * that do not have a corresponding {@code DEPT} record:
+ *
+ * <blockquote><pre>
+ * SELECT * FROM EMP
+ * WHERE NOT EXISTS (SELECT 1 FROM DEPT
+ * WHERE DEPT.DEPTNO = EMP.DEPTNO)</pre>
+ * </blockquote>
*/
ANTI;
diff --git a/core/src/main/java/org/apache/calcite/tools/RelBuilder.java b/core/src/main/java/org/apache/calcite/tools/RelBuilder.java
index 0d2972b..68492c9 100644
--- a/core/src/main/java/org/apache/calcite/tools/RelBuilder.java
+++ b/core/src/main/java/org/apache/calcite/tools/RelBuilder.java
@@ -1788,7 +1788,23 @@ public class RelBuilder {
return join(joinType, conditions);
}
- /** Creates a {@link SemiJoin}. */
+ /** Creates a {@link SemiJoin}.
+ *
+ * <p>A semi-join is a form of join that combines two relational expressions
+ * according to some condition, and outputs only rows from the left input for
+ * which at least one row from the right input matches. It only outputs
+ * columns from the left input, and ignores duplicates on the right.
+ *
+ * <p>For example, {@code EMP semi-join DEPT} finds all {@code EMP} records
+ * that do not have a corresponding {@code DEPT} record, similar to the
+ * following SQL:
+ *
+ * <blockquote><pre>
+ * SELECT * FROM EMP
+ * WHERE EXISTS (SELECT 1 FROM DEPT
+ * WHERE DEPT.DEPTNO = EMP.DEPTNO)</pre>
+ * </blockquote>
+ */
public RelBuilder semiJoin(Iterable<? extends RexNode> conditions) {
final Frame right = stack.pop();
final RelNode semiJoin =
@@ -1797,11 +1813,70 @@ public class RelBuilder {
return this;
}
- /** Creates a {@link SemiJoin}. */
+ /** Creates a {@link SemiJoin}.
+ *
+ * @see #semiJoin(Iterable) */
public RelBuilder semiJoin(RexNode... conditions) {
return semiJoin(ImmutableList.copyOf(conditions));
}
+ /** Creates an anti-join.
+ *
+ * <p>An anti-join is a form of join that combines two relational expressions
+ * according to some condition, but outputs only rows from the left input
+ * for which no rows from the right input match.
+ *
+ * <p>For example, {@code EMP anti-join DEPT} finds all {@code EMP} records
+ * that do not have a corresponding {@code DEPT} record, similar to the
+ * following SQL:
+ *
+ * <blockquote><pre>
+ * SELECT * FROM EMP
+ * WHERE NOT EXISTS (SELECT 1 FROM DEPT
+ * WHERE DEPT.DEPTNO = EMP.DEPTNO)</pre>
+ * </blockquote>
+ */
+ public RelBuilder antiJoin(Iterable<? extends RexNode> conditions) {
+ // There is currently no "AntiJoin" relational expression, so we
+ // simulate it using a LogicalCorrelate with SemiJoinType.ANTI.
+ final RexBuilder rexBuilder = getRexBuilder();
+ final RelNode right = build();
+ final RelNode left = peek();
+ final int leftFieldCount = left.getRowType().getFieldCount();
+ final CorrelationId correlationId = cluster.createCorrel();
+ final RexNode corrVar =
+ rexBuilder.makeCorrel(left.getRowType(), correlationId);
+ final ImmutableBitSet.Builder requiredColumns = ImmutableBitSet.builder();
+
+ // Replace all references of left input with FieldAccess(corrVar, field)
+ final RexNode condition = and(conditions).accept(new RexShuttle() {
+ @Override public RexNode visitInputRef(RexInputRef input) {
+ final int field = input.getIndex();
+ if (field >= leftFieldCount) {
+ return rexBuilder.makeInputRef(input.getType(), input.getIndex()
+ - leftFieldCount);
+ }
+ requiredColumns.set(field);
+ return rexBuilder.makeFieldAccess(corrVar, field);
+ }
+ });
+
+ final RelNode right2 = push(right).filter(condition).build();
+
+ final RelNode antiJoin =
+ correlateFactory.createCorrelate(left, right2, correlationId,
+ requiredColumns.build(), SemiJoinType.ANTI);
+ replaceTop(antiJoin);
+ return this;
+ }
+
+ /** Creates an anti-join.
+ *
+ * @see #antiJoin(Iterable) */
+ public RelBuilder antiJoin(RexNode... conditions) {
+ return antiJoin(ImmutableList.copyOf(conditions));
+ }
+
/** Assigns a table alias to the top entry on the stack. */
public RelBuilder as(final String alias) {
final Frame pair = stack.pop();
diff --git a/core/src/test/java/org/apache/calcite/test/CalciteAssert.java b/core/src/test/java/org/apache/calcite/test/CalciteAssert.java
index 26e29f2..943b2fd 100644
--- a/core/src/test/java/org/apache/calcite/test/CalciteAssert.java
+++ b/core/src/test/java/org/apache/calcite/test/CalciteAssert.java
@@ -22,6 +22,7 @@ import org.apache.calcite.adapter.java.ReflectiveSchema;
import org.apache.calcite.adapter.jdbc.JdbcSchema;
import org.apache.calcite.avatica.ConnectionProperty;
import org.apache.calcite.avatica.util.DateTimeUtils;
+import org.apache.calcite.config.CalciteConnectionConfigImpl;
import org.apache.calcite.config.CalciteConnectionProperty;
import org.apache.calcite.config.CalciteSystemProperty;
import org.apache.calcite.config.Lex;
@@ -31,6 +32,7 @@ import org.apache.calcite.jdbc.CalcitePrepare;
import org.apache.calcite.jdbc.CalciteSchema;
import org.apache.calcite.materialize.Lattice;
import org.apache.calcite.model.ModelHandler;
+import org.apache.calcite.plan.Contexts;
import org.apache.calcite.plan.RelOptUtil;
import org.apache.calcite.rel.RelNode;
import org.apache.calcite.runtime.CalciteException;
@@ -47,6 +49,7 @@ import org.apache.calcite.schema.impl.ViewTableMacro;
import org.apache.calcite.sql.validate.SqlConformanceEnum;
import org.apache.calcite.sql.validate.SqlValidatorException;
import org.apache.calcite.tools.FrameworkConfig;
+import org.apache.calcite.tools.Frameworks;
import org.apache.calcite.tools.RelBuilder;
import org.apache.calcite.util.Closer;
import org.apache.calcite.util.Holder;
@@ -1368,7 +1371,7 @@ public class CalciteAssert {
try (Connection c = createConnection()) {
f.accept(c);
} catch (SQLException e) {
- TestUtil.rethrow(e);
+ throw TestUtil.rethrow(e);
}
return this;
}
@@ -1570,7 +1573,7 @@ public class CalciteAssert {
if (plan != null) {
return;
}
- addHook(Hook.JAVA_PLAN, (Consumer<String>) a0 -> plan = a0);
+ addHook(Hook.JAVA_PLAN, this::setPlan);
withConnection(connection -> {
assertQuery(connection, sql, limit, materializationsEnabled,
hooks, null, checkUpdate, null);
@@ -1578,6 +1581,10 @@ public class CalciteAssert {
});
}
+ private void setPlan(String plan) {
+ this.plan = plan;
+ }
+
/** Runs the query and applies a checker to the generated third-party
* queries. The checker should throw to fail the test if it does not see
* what it wants. This method can be used to check whether a particular
@@ -1655,10 +1662,21 @@ public class CalciteAssert {
return withHook(Hook.STRING_TO_QUERY,
(Consumer<Pair<FrameworkConfig, Holder<CalcitePrepare.Query>>>)
pair -> {
- final RelBuilder b = RelBuilder.create(pair.left);
+ final FrameworkConfig config = forceDecorrelate(pair.left);
+ final RelBuilder b = RelBuilder.create(config);
pair.right.set(CalcitePrepare.Query.of(relFn.apply(b)));
});
}
+
+ /** Creates a {@link FrameworkConfig} that does not decorrelate. */
+ private FrameworkConfig forceDecorrelate(FrameworkConfig config) {
+ return Frameworks.newConfigBuilder(config)
+ .context(
+ Contexts.of(new CalciteConnectionConfigImpl(new Properties())
+ .set(CalciteConnectionProperty.FORCE_DECORRELATE,
+ Boolean.toString(false))))
+ .build();
+ }
}
/** Fluent interface for building a metadata query to be tested. */
diff --git a/core/src/test/java/org/apache/calcite/test/LatticeTest.java b/core/src/test/java/org/apache/calcite/test/LatticeTest.java
index ca7cacd..b4cbcba 100644
--- a/core/src/test/java/org/apache/calcite/test/LatticeTest.java
+++ b/core/src/test/java/org/apache/calcite/test/LatticeTest.java
@@ -21,11 +21,11 @@ import org.apache.calcite.materialize.Lattice;
import org.apache.calcite.materialize.Lattices;
import org.apache.calcite.materialize.MaterializationService;
import org.apache.calcite.plan.RelOptPlanner;
+import org.apache.calcite.plan.RelOptRule;
import org.apache.calcite.plan.RelOptUtil;
import org.apache.calcite.rel.rules.AbstractMaterializedViewRule;
import org.apache.calcite.runtime.Hook;
import org.apache.calcite.schema.SchemaPlus;
-import org.apache.calcite.test.CalciteAssert.AssertQuery;
import org.apache.calcite.util.ImmutableBitSet;
import org.apache.calcite.util.TestUtil;
@@ -502,35 +502,34 @@ public class LatticeTest {
private void checkTileAlgorithm(String statisticProvider,
String expectedExplain) {
+ final RelOptRule[] rules = {
+ AbstractMaterializedViewRule.INSTANCE_PROJECT_FILTER,
+ AbstractMaterializedViewRule.INSTANCE_FILTER,
+ AbstractMaterializedViewRule.INSTANCE_PROJECT_JOIN,
+ AbstractMaterializedViewRule.INSTANCE_JOIN,
+ AbstractMaterializedViewRule.INSTANCE_PROJECT_AGGREGATE,
+ AbstractMaterializedViewRule.INSTANCE_AGGREGATE
+ };
MaterializationService.setThreadLocal();
MaterializationService.instance().clear();
- AssertQuery that = foodmartLatticeModel(statisticProvider)
+ foodmartLatticeModel(statisticProvider)
.query("select distinct t.\"the_year\", t.\"quarter\"\n"
+ "from \"foodmart\".\"sales_fact_1997\" as s\n"
+ "join \"foodmart\".\"time_by_day\" as t using (\"time_id\")\n")
- .enableMaterializations(true);
+ .enableMaterializations(true)
// Disable materialization rules from this test. For some reason, there is
// a weird interaction between these rules and the lattice rewriting that
// produces non-deterministic rewriting (even when only lattices are present).
// For more context, see
// <a href="https://issues.apache.org/jira/browse/CALCITE-2953">[CALCITE-2953]</a>.
- that.withHook(Hook.PLANNER, (Consumer<RelOptPlanner>) planner -> {
- ImmutableList
- .of(
- AbstractMaterializedViewRule.INSTANCE_PROJECT_FILTER,
- AbstractMaterializedViewRule.INSTANCE_FILTER,
- AbstractMaterializedViewRule.INSTANCE_PROJECT_JOIN,
- AbstractMaterializedViewRule.INSTANCE_JOIN,
- AbstractMaterializedViewRule.INSTANCE_PROJECT_AGGREGATE,
- AbstractMaterializedViewRule.INSTANCE_AGGREGATE)
- .forEach(planner::removeRule);
- });
+ .withHook(Hook.PLANNER, (Consumer<RelOptPlanner>) planner ->
+ Arrays.asList(rules).forEach(planner::removeRule))
// disable for MySQL; times out running star-join query
// disable for H2; it thinks our generated SQL has invalid syntax
- that.enable(CalciteAssert.DB != CalciteAssert.DatabaseInstance.MYSQL
- && CalciteAssert.DB != CalciteAssert.DatabaseInstance.H2)
+ .enable(CalciteAssert.DB != CalciteAssert.DatabaseInstance.MYSQL
+ && CalciteAssert.DB != CalciteAssert.DatabaseInstance.H2)
.explainContains(expectedExplain)
.returnsUnordered("the_year=1997; quarter=Q1",
"the_year=1997; quarter=Q2",
diff --git a/core/src/test/java/org/apache/calcite/test/RelBuilderTest.java b/core/src/test/java/org/apache/calcite/test/RelBuilderTest.java
index 75feb03..dc0240f 100644
--- a/core/src/test/java/org/apache/calcite/test/RelBuilderTest.java
+++ b/core/src/test/java/org/apache/calcite/test/RelBuilderTest.java
@@ -1501,6 +1501,28 @@ public class RelBuilderTest {
assertThat(root, hasTree(expected));
}
+ @Test public void testAntiJoin() {
+ // Equivalent SQL:
+ // SELECT * FROM dept d
+ // WHERE NOT EXISTS (SELECT 1 FROM emp e WHERE e.deptno = d.deptno)
+ final RelBuilder builder = RelBuilder.create(config().build());
+ RelNode root = builder
+ .scan("DEPT")
+ .scan("EMP")
+ .antiJoin(
+ builder.equals(
+ builder.field(2, 0, "DEPTNO"),
+ builder.field(2, 1, "DEPTNO")))
+ .build();
+ // AntiJoin implemented as LogicalCorrelate with joinType=anti
+ final String expected = ""
+ + "LogicalCorrelate(correlation=[$cor0], joinType=[anti], requiredColumns=[{0}])\n"
+ + " LogicalTableScan(table=[[scott, DEPT]])\n"
+ + " LogicalFilter(condition=[=($cor0.DEPTNO, $7)])\n"
+ + " LogicalTableScan(table=[[scott, EMP]])\n";
+ assertThat(root, hasTree(expected));
+ }
+
@Test public void testAlias() {
// Equivalent SQL:
// SELECT *
diff --git a/core/src/test/java/org/apache/calcite/test/enumerable/EnumerableCorrelateTest.java b/core/src/test/java/org/apache/calcite/test/enumerable/EnumerableCorrelateTest.java
index 9e3793a..451d95d 100644
--- a/core/src/test/java/org/apache/calcite/test/enumerable/EnumerableCorrelateTest.java
+++ b/core/src/test/java/org/apache/calcite/test/enumerable/EnumerableCorrelateTest.java
@@ -133,6 +133,71 @@ public class EnumerableCorrelateTest {
"empid=200");
}
+ /** Test case for
+ * <a href="https://issues.apache.org/jira/browse/CALCITE-2920">[CALCITE-2920]
+ * RelBuilder: new method to create an anti-join</a>. */
+ @Test public void antiJoinCorrelate() {
+ tester(false, new JdbcTest.HrSchema())
+ .query("?")
+ .withRel(
+ // Retrieve departments without employees. Equivalent SQL:
+ // SELECT d.deptno, d.name FROM depts d
+ // WHERE NOT EXISTS (SELECT 1 FROM emps e WHERE e.deptno = d.deptno)
+ builder -> builder
+ .scan("s", "depts").as("d")
+ .scan("s", "emps").as("e")
+ .antiJoin(
+ builder.equals(
+ builder.field(2, "d", "deptno"),
+ builder.field(2, "e", "deptno")))
+ .project(
+ builder.field("deptno"),
+ builder.field("name"))
+ .build())
+ .returnsUnordered(
+ "deptno=30; name=Marketing",
+ "deptno=40; name=HR");
+ }
+
+ /** Test case for
+ * <a href="https://issues.apache.org/jira/browse/CALCITE-2920">[CALCITE-2920]
+ * RelBuilder: new method to create an antijoin</a> */
+ @Test public void antiJoinCorrelateWithNullValues() {
+ final Integer salesDeptNo = 10;
+ tester(false, new JdbcTest.HrSchema())
+ .query("?")
+ .withRel(
+ // Retrieve employees from any department other than Sales (deptno 10) whose
+ // commission is different from any Sales employee commission. Since there
+ // is a Sales employee with null commission, the goal is to validate that antiJoin
+ // behaves as a NOT EXISTS (and returns results), and not as a NOT IN (which would
+ // not return any result due to its null handling). Equivalent SQL:
+ // SELECT empOther.empid, empOther.name FROM emps empOther
+ // WHERE empOther.deptno <> 10 AND NOT EXISTS
+ // (SELECT 1 FROM emps empSales
+ // WHERE empSales.deptno = 10 AND empSales.commission = empOther.commission)
+ builder -> builder
+ .scan("s", "emps").as("empOther")
+ .filter(
+ builder.notEquals(
+ builder.field("empOther", "deptno"),
+ builder.literal(salesDeptNo)))
+ .scan("s", "emps").as("empSales")
+ .filter(
+ builder.equals(
+ builder.field("empSales", "deptno"),
+ builder.literal(salesDeptNo)))
+ .antiJoin(
+ builder.equals(
+ builder.field(2, "empOther", "commission"),
+ builder.field(2, "empSales", "commission")))
+ .project(
+ builder.field("empid"),
+ builder.field("name"))
+ .build())
+ .returnsUnordered("empid=200; name=Eric");
+ }
+
private CalciteAssert.AssertThat tester(boolean forceDecorrelate,
Object schema) {
return CalciteAssert.that()
diff --git a/site/_docs/algebra.md b/site/_docs/algebra.md
index a68bb89..8802f81 100644
--- a/site/_docs/algebra.md
+++ b/site/_docs/algebra.md
@@ -275,6 +275,7 @@ return the `RelBuilder`.
| `sortExchange(distribution, collation)` | Creates a [SortExchange]({{ site.apiRoot }}/org/apache/calcite/rel/core/SortExchange.html).
| `join(joinType, expr...)`<br/>`join(joinType, exprList)`<br/>`join(joinType, fieldName...)` | Creates a [Join]({{ site.apiRoot }}/org/apache/calcite/rel/core/Join.html) of the two most recent relational expressions.<br/><br/>The first form joins on a boolean expression (multiple conditions are combined using AND).<br/><br/>The last form joins on named fields; each side must have a field of each name.
| `semiJoin(expr)` | Creates a [SemiJoin]({{ site.apiRoot }}/org/apache/calcite/rel/core/SemiJoin.html) of the two most recent relational expressions.
+| `antiJoin(expr)` | Creates an AntiJoin of the two most recent relational expressions.
| `union(all [, n])` | Creates a [Union]({{ site.apiRoot }}/org/apache/calcite/rel/core/Union.html) of the `n` (default two) most recent relational expressions.
| `intersect(all [, n])` | Creates an [Intersect]({{ site.apiRoot }}/org/apache/calcite/rel/core/Intersect.html) of the `n` (default two) most recent relational expressions.
| `minus(all)` | Creates a [Minus]({{ site.apiRoot }}/org/apache/calcite/rel/core/Minus.html) of the two most recent relational expressions.