You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@calcite.apache.org by jc...@apache.org on 2017/11/09 04:36:41 UTC

calcite git commit: [CALCITE-1984] Incorrect rewriting with materialized views using DISTINCT in aggregate functions

Repository: calcite
Updated Branches:
  refs/heads/master 77a354917 -> 81691cef5


[CALCITE-1984] Incorrect rewriting with materialized views using DISTINCT in aggregate functions


Project: http://git-wip-us.apache.org/repos/asf/calcite/repo
Commit: http://git-wip-us.apache.org/repos/asf/calcite/commit/81691cef
Tree: http://git-wip-us.apache.org/repos/asf/calcite/tree/81691cef
Diff: http://git-wip-us.apache.org/repos/asf/calcite/diff/81691cef

Branch: refs/heads/master
Commit: 81691cef5c889f27ae25e698373bdf6b46a51722
Parents: 77a3549
Author: Jesus Camacho Rodriguez <jc...@apache.org>
Authored: Wed Nov 8 20:35:20 2017 -0800
Committer: Jesus Camacho Rodriguez <jc...@apache.org>
Committed: Wed Nov 8 20:35:30 2017 -0800

----------------------------------------------------------------------
 .../rel/rules/AbstractMaterializedViewRule.java |  4 +-
 .../org/apache/calcite/tools/RelBuilder.java    |  9 +-
 .../calcite/test/MaterializationTest.java       | 89 +++++++++++++++++++-
 3 files changed, 98 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/calcite/blob/81691cef/core/src/main/java/org/apache/calcite/rel/rules/AbstractMaterializedViewRule.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/calcite/rel/rules/AbstractMaterializedViewRule.java b/core/src/main/java/org/apache/calcite/rel/rules/AbstractMaterializedViewRule.java
index e2ea81d..f7a2334 100644
--- a/core/src/main/java/org/apache/calcite/rel/rules/AbstractMaterializedViewRule.java
+++ b/core/src/main/java/org/apache/calcite/rel/rules/AbstractMaterializedViewRule.java
@@ -1054,7 +1054,7 @@ public abstract class AbstractMaterializedViewRule extends RelOptRule {
                     aggregate.getGroupCount() + i)));
       }
       RelNode result = relBuilder
-          .aggregate(relBuilder.groupKey(groupSet, null), aggregateCalls)
+          .aggregate(relBuilder.groupKey(groupSet, null), aggregateCalls, true)
           .build();
       if (topProject != null) {
         // Top project
@@ -1273,7 +1273,7 @@ public abstract class AbstractMaterializedViewRule extends RelOptRule {
         }
         result = relBuilder
             .push(result)
-            .aggregate(relBuilder.groupKey(groupSet, null), aggregateCalls)
+            .aggregate(relBuilder.groupKey(groupSet, null), aggregateCalls, true)
             .build();
         // We introduce a project on top, as group by columns order is lost
         List<RexNode> projects = new ArrayList<>();

http://git-wip-us.apache.org/repos/asf/calcite/blob/81691cef/core/src/main/java/org/apache/calcite/tools/RelBuilder.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/calcite/tools/RelBuilder.java b/core/src/main/java/org/apache/calcite/tools/RelBuilder.java
index 0b871f9..8000085 100644
--- a/core/src/main/java/org/apache/calcite/tools/RelBuilder.java
+++ b/core/src/main/java/org/apache/calcite/tools/RelBuilder.java
@@ -1168,6 +1168,13 @@ public class RelBuilder {
   /** Creates an {@link org.apache.calcite.rel.core.Aggregate} with a list of
    * calls. */
   public RelBuilder aggregate(GroupKey groupKey, Iterable<AggCall> aggCalls) {
+    return aggregate(groupKey, aggCalls, false);
+  }
+
+  /** Creates an {@link org.apache.calcite.rel.core.Aggregate} with a list of
+   * calls. It allows to force the creation of the Aggregate operator even
+   * if it is not needed. */
+  public RelBuilder aggregate(GroupKey groupKey, Iterable<AggCall> aggCalls, boolean force) {
     final Registrar registrar = new Registrar();
     registrar.extraNodes.addAll(fields());
     registrar.names.addAll(peek().getRowType().getFieldNames());
@@ -1175,7 +1182,7 @@ public class RelBuilder {
     final ImmutableBitSet groupSet =
         ImmutableBitSet.of(registrar.registerExpressions(groupKey_.nodes));
   label:
-    if (Iterables.isEmpty(aggCalls) && !groupKey_.indicator) {
+    if (!force && Iterables.isEmpty(aggCalls) && !groupKey_.indicator) {
       final RelMetadataQuery mq = peek().getCluster().getMetadataQuery();
       if (groupSet.isEmpty()) {
         final Double minRowCount = mq.getMinRowCount(peek());

http://git-wip-us.apache.org/repos/asf/calcite/blob/81691cef/core/src/test/java/org/apache/calcite/test/MaterializationTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/calcite/test/MaterializationTest.java b/core/src/test/java/org/apache/calcite/test/MaterializationTest.java
index 20093f1..04cbe62 100644
--- a/core/src/test/java/org/apache/calcite/test/MaterializationTest.java
+++ b/core/src/test/java/org/apache/calcite/test/MaterializationTest.java
@@ -1582,6 +1582,19 @@ public class MaterializationTest {
         HR_FKUK_MODEL);
   }
 
+  @Test public void testJoinAggregateMaterializationAggregateFuncs13() {
+    checkNoMaterialize(
+        "select \"dependents\".\"empid\", \"emps\".\"deptno\", count(distinct \"salary\") as s\n"
+            + "from \"emps\"\n"
+            + "join \"dependents\" on (\"emps\".\"empid\" = \"dependents\".\"empid\")\n"
+            + "group by \"dependents\".\"empid\", \"emps\".\"deptno\"",
+        "select \"emps\".\"deptno\", count(\"salary\") as s\n"
+            + "from \"emps\"\n"
+            + "join \"dependents\" on (\"emps\".\"empid\" = \"dependents\".\"empid\")\n"
+            + "group by \"dependents\".\"empid\", \"emps\".\"deptno\"",
+        HR_FKUK_MODEL);
+  }
+
   @Test public void testJoinMaterialization4() {
     checkMaterialize(
         "select \"empid\" \"deptno\" from \"emps\"\n"
@@ -1995,6 +2008,80 @@ public class MaterializationTest {
     }
   }
 
+  @Test public void testAggregateMaterializationOnCountDistinctQuery1() {
+    // The column empid is already unique, thus DISTINCT is not
+    // in the COUNT of the resulting rewriting
+    checkMaterialize(
+        "select \"deptno\", \"empid\", \"salary\"\n"
+            + "from \"emps\"\n"
+            + "group by \"deptno\", \"empid\", \"salary\"",
+        "select \"deptno\", count(distinct \"empid\") as c from (\n"
+            + "select \"deptno\", \"empid\"\n"
+            + "from \"emps\"\n"
+            + "group by \"deptno\", \"empid\")\n"
+            + "group by \"deptno\"",
+        HR_FKUK_MODEL,
+        CalciteAssert.checkResultContains(
+            "EnumerableAggregate(group=[{0}], C=[COUNT($1)])\n"
+                + "  EnumerableTableScan(table=[[hr, m0]]"));
+  }
+
+  @Test public void testAggregateMaterializationOnCountDistinctQuery2() {
+    // The column empid is already unique, thus DISTINCT is not
+    // in the COUNT of the resulting rewriting
+    checkMaterialize(
+        "select \"deptno\", \"salary\", \"empid\"\n"
+            + "from \"emps\"\n"
+            + "group by \"deptno\", \"salary\", \"empid\"",
+        "select \"deptno\", count(distinct \"empid\") as c from (\n"
+            + "select \"deptno\", \"empid\"\n"
+            + "from \"emps\"\n"
+            + "group by \"deptno\", \"empid\")\n"
+            + "group by \"deptno\"",
+        HR_FKUK_MODEL,
+        CalciteAssert.checkResultContains(
+            "EnumerableAggregate(group=[{0}], C=[COUNT($2)])\n"
+                + "  EnumerableTableScan(table=[[hr, m0]]"));
+  }
+
+  @Test public void testAggregateMaterializationOnCountDistinctQuery3() {
+    // The column salary is not unique, thus we end up with
+    // a different rewriting
+    checkMaterialize(
+        "select \"deptno\", \"empid\", \"salary\"\n"
+            + "from \"emps\"\n"
+            + "group by \"deptno\", \"empid\", \"salary\"",
+        "select \"deptno\", count(distinct \"salary\") from (\n"
+            + "select \"deptno\", \"salary\"\n"
+            + "from \"emps\"\n"
+            + "group by \"deptno\", \"salary\")\n"
+            + "group by \"deptno\"",
+        HR_FKUK_MODEL,
+        CalciteAssert.checkResultContains(
+            "EnumerableAggregate(group=[{0}], EXPR$1=[COUNT($1)])\n"
+                + "  EnumerableAggregate(group=[{0, 2}])\n"
+                + "    EnumerableTableScan(table=[[hr, m0]]"));
+  }
+
+  @Test public void testAggregateMaterializationOnCountDistinctQuery4() {
+    // Although there is no DISTINCT in the COUNT, this is
+    // equivalent to previous query
+    checkMaterialize(
+        "select \"deptno\", \"salary\", \"empid\"\n"
+            + "from \"emps\"\n"
+            + "group by \"deptno\", \"salary\", \"empid\"",
+        "select \"deptno\", count(\"salary\") from (\n"
+            + "select \"deptno\", \"salary\"\n"
+            + "from \"emps\"\n"
+            + "group by \"deptno\", \"salary\")\n"
+            + "group by \"deptno\"",
+        HR_FKUK_MODEL,
+        CalciteAssert.checkResultContains(
+            "EnumerableAggregate(group=[{0}], EXPR$1=[COUNT()])\n"
+                + "  EnumerableAggregate(group=[{0, 1}])\n"
+                + "    EnumerableTableScan(table=[[hr, m0]]"));
+  }
+
   @Test public void testMaterializationSubstitution() {
     String q = "select *\n"
         + "from (select * from \"emps\" where \"empid\" < 300)\n"
@@ -2127,7 +2214,7 @@ public class MaterializationTest {
         new Employee(100, 10, "Bill", 10000, 1000),
         new Employee(200, 20, "Eric", 8000, 500),
         new Employee(150, 10, "Sebastian", 7000, null),
-        new Employee(110, 10, "Theodore", 11500, 250),
+        new Employee(110, 10, "Theodore", 10000, 250),
     };
     public final Department[] depts = {
         new Department(10, "Sales", Arrays.asList(emps[0], emps[2], emps[3]),