You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@calcite.apache.org by jh...@apache.org on 2017/01/19 23:47:33 UTC
[1/4] calcite git commit: Druid adapter: Add enum Granularity
Repository: calcite
Updated Branches:
refs/heads/master aadc62d4c -> e5c9f2ed3
Druid adapter: Add enum Granularity
Project: http://git-wip-us.apache.org/repos/asf/calcite/repo
Commit: http://git-wip-us.apache.org/repos/asf/calcite/commit/a6219028
Tree: http://git-wip-us.apache.org/repos/asf/calcite/tree/a6219028
Diff: http://git-wip-us.apache.org/repos/asf/calcite/diff/a6219028
Branch: refs/heads/master
Commit: a621902869d0e95b8b46c58e33e870ef1a76c17e
Parents: aadc62d
Author: Julian Hyde <jh...@apache.org>
Authored: Tue Jan 17 16:13:11 2017 -0800
Committer: Julian Hyde <jh...@apache.org>
Committed: Thu Jan 19 10:43:45 2017 -0800
----------------------------------------------------------------------
.../adapter/druid/DruidDateTimeUtils.java | 11 +++-
.../calcite/adapter/druid/DruidQuery.java | 16 ++---
.../calcite/adapter/druid/Granularity.java | 36 ++++++++++
.../org/apache/calcite/test/DruidAdapterIT.java | 69 +++++++++++++++++---
4 files changed, 112 insertions(+), 20 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/calcite/blob/a6219028/druid/src/main/java/org/apache/calcite/adapter/druid/DruidDateTimeUtils.java
----------------------------------------------------------------------
diff --git a/druid/src/main/java/org/apache/calcite/adapter/druid/DruidDateTimeUtils.java b/druid/src/main/java/org/apache/calcite/adapter/druid/DruidDateTimeUtils.java
index 4c81b60..84ae181 100644
--- a/druid/src/main/java/org/apache/calcite/adapter/druid/DruidDateTimeUtils.java
+++ b/druid/src/main/java/org/apache/calcite/adapter/druid/DruidDateTimeUtils.java
@@ -259,7 +259,7 @@ public class DruidDateTimeUtils {
* @param call the function call
* @return the granularity, or null if it cannot be inferred
*/
- public static String extractGranularity(RexCall call) {
+ public static Granularity extractGranularity(RexCall call) {
if (call.getKind() != SqlKind.FLOOR
|| call.getOperands().size() != 2) {
return null;
@@ -271,14 +271,21 @@ public class DruidDateTimeUtils {
}
switch (timeUnit) {
case YEAR:
+ return Granularity.YEAR;
case QUARTER:
+ return Granularity.QUARTER;
case MONTH:
+ return Granularity.MONTH;
case WEEK:
+ return Granularity.WEEK;
case DAY:
+ return Granularity.DAY;
case HOUR:
+ return Granularity.HOUR;
case MINUTE:
+ return Granularity.MINUTE;
case SECOND:
- return timeUnit.name();
+ return Granularity.SECOND;
default:
return null;
}
http://git-wip-us.apache.org/repos/asf/calcite/blob/a6219028/druid/src/main/java/org/apache/calcite/adapter/druid/DruidQuery.java
----------------------------------------------------------------------
diff --git a/druid/src/main/java/org/apache/calcite/adapter/druid/DruidQuery.java b/druid/src/main/java/org/apache/calcite/adapter/druid/DruidQuery.java
index 46832fa..97d4484 100644
--- a/druid/src/main/java/org/apache/calcite/adapter/druid/DruidQuery.java
+++ b/druid/src/main/java/org/apache/calcite/adapter/druid/DruidQuery.java
@@ -441,7 +441,7 @@ public class DruidQuery extends AbstractRelNode implements BindableRel {
// executed as a Timeseries, TopN, or GroupBy in Druid
final List<String> dimensions = new ArrayList<>();
final List<JsonAggregation> aggregations = new ArrayList<>();
- String granularity = "all";
+ Granularity granularity = Granularity.ALL;
Direction timeSeriesDirection = null;
JsonLimit limit = null;
if (groupSet != null) {
@@ -461,7 +461,7 @@ public class DruidQuery extends AbstractRelNode implements BindableRel {
final String origin = druidTable.getRowType(getCluster().getTypeFactory())
.getFieldList().get(ref.getIndex()).getName();
if (origin.equals(druidTable.timestampFieldName)) {
- granularity = "none";
+ granularity = Granularity.NONE;
builder.add(s);
assert timePositionIdx == -1;
timePositionIdx = groupKey;
@@ -472,7 +472,7 @@ public class DruidQuery extends AbstractRelNode implements BindableRel {
} else if (project instanceof RexCall) {
// Call, check if we should infer granularity
final RexCall call = (RexCall) project;
- final String funcGranularity =
+ final Granularity funcGranularity =
DruidDateTimeUtils.extractGranularity(call);
if (funcGranularity != null) {
granularity = funcGranularity;
@@ -491,7 +491,7 @@ public class DruidQuery extends AbstractRelNode implements BindableRel {
for (int groupKey : groupSet) {
final String s = fieldNames.get(groupKey);
if (s.equals(druidTable.timestampFieldName)) {
- granularity = "NONE";
+ granularity = Granularity.NONE;
builder.add(s);
assert timePositionIdx == -1;
timePositionIdx = groupKey;
@@ -562,7 +562,7 @@ public class DruidQuery extends AbstractRelNode implements BindableRel {
generator.writeStringField("dataSource", druidTable.dataSource);
generator.writeBooleanField("descending", timeSeriesDirection != null
&& timeSeriesDirection == Direction.DESCENDING);
- generator.writeStringField("granularity", granularity);
+ generator.writeStringField("granularity", granularity.value);
writeFieldIf(generator, "filter", jsonFilter);
writeField(generator, "aggregations", aggregations);
writeFieldIf(generator, "postAggregations", null);
@@ -576,7 +576,7 @@ public class DruidQuery extends AbstractRelNode implements BindableRel {
generator.writeStringField("queryType", "topN");
generator.writeStringField("dataSource", druidTable.dataSource);
- generator.writeStringField("granularity", granularity);
+ generator.writeStringField("granularity", granularity.value);
generator.writeStringField("dimension", dimensions.get(0));
generator.writeStringField("metric", fieldNames.get(collationIndexes.get(0)));
writeFieldIf(generator, "filter", jsonFilter);
@@ -600,7 +600,7 @@ public class DruidQuery extends AbstractRelNode implements BindableRel {
generator.writeStringField("queryType", "groupBy");
generator.writeStringField("dataSource", druidTable.dataSource);
- generator.writeStringField("granularity", granularity);
+ generator.writeStringField("granularity", granularity.value);
writeField(generator, "dimensions", dimensions);
writeFieldIf(generator, "limitSpec", limit);
writeFieldIf(generator, "filter", jsonFilter);
@@ -622,7 +622,7 @@ public class DruidQuery extends AbstractRelNode implements BindableRel {
writeFieldIf(generator, "filter", jsonFilter);
writeField(generator, "dimensions", translator.dimensions);
writeField(generator, "metrics", translator.metrics);
- generator.writeStringField("granularity", granularity);
+ generator.writeStringField("granularity", granularity.value);
generator.writeFieldName("pagingSpec");
generator.writeStartObject();
http://git-wip-us.apache.org/repos/asf/calcite/blob/a6219028/druid/src/main/java/org/apache/calcite/adapter/druid/Granularity.java
----------------------------------------------------------------------
diff --git a/druid/src/main/java/org/apache/calcite/adapter/druid/Granularity.java b/druid/src/main/java/org/apache/calcite/adapter/druid/Granularity.java
new file mode 100644
index 0000000..b3f1e85
--- /dev/null
+++ b/druid/src/main/java/org/apache/calcite/adapter/druid/Granularity.java
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.adapter.druid;
+
+/** Granularity of a Druid query. */
+public enum Granularity {
+ ALL,
+ YEAR,
+ QUARTER,
+ MONTH,
+ WEEK,
+ DAY,
+ HOUR,
+ MINUTE,
+ SECOND,
+ NONE;
+
+ /** JSON attribute value in a Druid query. */
+ public final String value = name().toLowerCase();
+}
+
+// End Granularity.java
http://git-wip-us.apache.org/repos/asf/calcite/blob/a6219028/druid/src/test/java/org/apache/calcite/test/DruidAdapterIT.java
----------------------------------------------------------------------
diff --git a/druid/src/test/java/org/apache/calcite/test/DruidAdapterIT.java b/druid/src/test/java/org/apache/calcite/test/DruidAdapterIT.java
index 3a52814..75331b7 100644
--- a/druid/src/test/java/org/apache/calcite/test/DruidAdapterIT.java
+++ b/druid/src/test/java/org/apache/calcite/test/DruidAdapterIT.java
@@ -182,7 +182,7 @@ public class DruidAdapterIT {
+ " BindableProject(EXPR$0=[$1])\n"
+ " DruidQuery(table=[[wiki, wikiticker]], intervals=[[1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z]], projects=[[FLOOR($0, FLAG(DAY)), $1]], groups=[{0}], aggs=[[SUM($1)]])\n";
final String druidQuery = "{'queryType':'timeseries',"
- + "'dataSource':'wikiticker','descending':false,'granularity':'DAY',"
+ + "'dataSource':'wikiticker','descending':false,'granularity':'day',"
+ "'aggregations':[{'type':'longSum','name':'EXPR$0','fieldName':'added'}],"
+ "'intervals':['1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z']}";
sql(sql, WIKI_AUTO2)
@@ -220,7 +220,7 @@ public class DruidAdapterIT {
+ "EnumerableInterpreter\n"
+ " DruidQuery(table=[[wiki, wikiticker]], intervals=[[1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z]], projects=[[FLOOR($0, FLAG(DAY)), $1]], groups=[{0}], aggs=[[SUM($1)]])\n";
final String druidQuery = "{'queryType':'timeseries',"
- + "'dataSource':'wikiticker','descending':false,'granularity':'DAY',"
+ + "'dataSource':'wikiticker','descending':false,'granularity':'day',"
+ "'aggregations':[{'type':'longSum','name':'EXPR$1','fieldName':'added'}],"
+ "'intervals':['1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z']}";
sql(sql, WIKI_AUTO2)
@@ -239,11 +239,11 @@ public class DruidAdapterIT {
+ "order by \"s\" desc";
final String explain = "PLAN="
+ "EnumerableInterpreter\n"
- + " BindableSort(sort0=[$0], dir0=[DESC])\n"
- + " BindableProject(s=[$2], page=[$0], day=[$1])\n"
+ + " BindableProject(s=[$2], page=[$0], day=[$1])\n"
+ + " BindableSort(sort0=[$2], dir0=[DESC])\n"
+ " DruidQuery(table=[[wiki, wikiticker]], intervals=[[1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z]], projects=[[$17, FLOOR($0, FLAG(DAY)), $1]], groups=[{0, 1}], aggs=[[SUM($2)]])\n";
final String druidQuery = "{'queryType':'groupBy',"
- + "'dataSource':'wikiticker','granularity':'DAY','dimensions':['page'],"
+ + "'dataSource':'wikiticker','granularity':'day','dimensions':['page'],"
+ "'limitSpec':{'type':'default'},"
+ "'aggregations':[{'type':'longSum','name':'s','fieldName':'added'}],"
+ "'intervals':['1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z']}";
@@ -433,6 +433,55 @@ public class DruidAdapterIT {
.queryContains(druidChecker(druidQuery));
}
+ /** Test case for
+ * <a href="https://issues.apache.org/jira/browse/CALCITE-1578">[CALCITE-1578]
+ * Druid adapter: wrong semantics of topN query limit with granularity</a>. */
+ @Test public void testGroupBySortLimit() {
+ final String sql = "select \"brand_name\", \"gender\", sum(\"unit_sales\") as s\n"
+ + "from \"foodmart\"\n"
+ + "group by \"brand_name\", \"gender\"\n"
+ + "order by s desc limit 3";
+ final String druidQuery = "{'queryType':'groupBy','dataSource':'foodmart',"
+ + "'granularity':'all','dimensions':['brand_name','gender'],"
+ + "'limitSpec':{'type':'default','limit':3,'columns':[{'dimension':'S','direction':'descending'}]},"
+ + "'aggregations':[{'type':'longSum','name':'S','fieldName':'unit_sales'}],"
+ + "'intervals':['1900-01-09T00:00:00.000Z/2992-01-10T00:00:00.000Z']}";
+ final String explain = "PLAN=EnumerableInterpreter\n"
+ + " DruidQuery(table=[[foodmart, foodmart]], "
+ + "intervals=[[1900-01-09T00:00:00.000Z/2992-01-10T00:00:00.000Z]], "
+ + "groups=[{2, 39}], aggs=[[SUM($89)]], sort0=[2], dir0=[DESC], fetch=[3])\n";
+ sql(sql)
+ .runs()
+ .returnsOrdered("brand_name=Hermanos; gender=M; S=4286",
+ "brand_name=Hermanos; gender=F; S=4183",
+ "brand_name=Tell Tale; gender=F; S=4033")
+ .explainContains(explain)
+ .queryContains(druidChecker(druidQuery));
+ }
+
+ @Test public void testGroupBySingleSortLimit() {
+ final String sql = "select \"brand_name\", sum(\"unit_sales\") as s\n"
+ + "from \"foodmart\"\n"
+ + "group by \"brand_name\"\n"
+ + "order by s desc limit 3";
+ final String druidQuery = "{'queryType':'topN','dataSource':'foodmart',"
+ + "'granularity':'all','dimension':'brand_name','metric':'S',"
+ + "'aggregations':[{'type':'longSum','name':'S','fieldName':'unit_sales'}],"
+ + "'intervals':['1900-01-09T00:00:00.000Z/2992-01-10T00:00:00.000Z'],"
+ + "'threshold':3}";
+ final String explain = "PLAN=EnumerableInterpreter\n"
+ + " DruidQuery(table=[[foodmart, foodmart]], "
+ + "intervals=[[1900-01-09T00:00:00.000Z/2992-01-10T00:00:00.000Z]], "
+ + "groups=[{2}], aggs=[[SUM($89)]], sort0=[1], dir0=[DESC], fetch=[3])\n";
+ sql(sql)
+ .runs()
+ .returnsOrdered("brand_name=Hermanos; S=8469",
+ "brand_name=Tell Tale; S=7877",
+ "brand_name=Ebony; S=7438")
+ .explainContains(explain)
+ .queryContains(druidChecker(druidQuery));
+ }
+
/** Tests a query that contains no GROUP BY and is therefore executed as a
* Druid "select" query. */
@Test public void testFilterSortDesc() {
@@ -693,7 +742,7 @@ public class DruidAdapterIT {
+ "from \"foodmart\"\n"
+ "group by floor(\"timestamp\" to MONTH)";
String druidQuery = "{'queryType':'timeseries','dataSource':'foodmart',"
- + "'descending':false,'granularity':'MONTH',"
+ + "'descending':false,'granularity':'month',"
+ "'aggregations':[{'type':'longSum','name':'S','fieldName':'unit_sales'},"
+ "{'type':'count','name':'C','fieldName':'store_sqft'}],"
+ "'intervals':['1900-01-09T00:00:00.000Z/2992-01-10T00:00:00.000Z']}";
@@ -709,7 +758,7 @@ public class DruidAdapterIT {
+ "from \"foodmart\"\n"
+ "group by floor(\"timestamp\" to DAY)";
String druidQuery = "{'queryType':'timeseries','dataSource':'foodmart',"
- + "'descending':false,'granularity':'DAY',"
+ + "'descending':false,'granularity':'day',"
+ "'aggregations':[{'type':'longSum','name':'S','fieldName':'unit_sales'},"
+ "{'type':'count','name':'C','fieldName':'store_sqft'}],"
+ "'intervals':['1900-01-09T00:00:00.000Z/2992-01-10T00:00:00.000Z']}";
@@ -727,7 +776,7 @@ public class DruidAdapterIT {
+ " \"timestamp\" < '1998-01-01 00:00:00'\n"
+ "group by floor(\"timestamp\" to MONTH)";
String druidQuery = "{'queryType':'timeseries','dataSource':'foodmart',"
- + "'descending':false,'granularity':'MONTH',"
+ + "'descending':false,'granularity':'month',"
+ "'aggregations':[{'type':'longSum','name':'S','fieldName':'unit_sales'},"
+ "{'type':'count','name':'C','fieldName':'store_sqft'}],"
+ "'intervals':['1996-01-01T00:00:00.000Z/1998-01-01T00:00:00.000Z']}";
@@ -749,7 +798,7 @@ public class DruidAdapterIT {
+ " BindableProject(S=[$2], M=[$3], P=[$0])\n"
+ " DruidQuery(table=[[foodmart, foodmart]], intervals=[[1900-01-09T00:00:00.000Z/2992-01-10T00:00:00.000Z]], projects=[[$30, FLOOR($0, FLAG(MONTH)), $89]], groups=[{0, 1}], aggs=[[SUM($2), MAX($2)]], sort0=[2], dir0=[DESC], fetch=[3])";
final String druidQuery = "{'queryType':'topN','dataSource':'foodmart',"
- + "'granularity':'MONTH','dimension':'state_province','metric':'S',"
+ + "'granularity':'month','dimension':'state_province','metric':'S',"
+ "'aggregations':[{'type':'longSum','name':'S','fieldName':'unit_sales'},"
+ "{'type':'longMax','name':'M','fieldName':'unit_sales'}],"
+ "'intervals':['1900-01-09T00:00:00.000Z/2992-01-10T00:00:00.000Z'],'threshold':3}";
@@ -773,7 +822,7 @@ public class DruidAdapterIT {
+ " BindableProject(S=[$2], M=[$3], P=[$0])\n"
+ " DruidQuery(table=[[foodmart, foodmart]], intervals=[[1997-01-01T00:00:00.000Z/1997-09-01T00:00:00.000Z]], projects=[[$30, FLOOR($0, FLAG(DAY)), $89]], groups=[{0, 1}], aggs=[[SUM($2), MAX($2)]], sort0=[2], dir0=[DESC], fetch=[3])";
final String druidQuery = "{'queryType':'topN','dataSource':'foodmart',"
- + "'granularity':'DAY','dimension':'state_province','metric':'S',"
+ + "'granularity':'day','dimension':'state_province','metric':'S',"
+ "'aggregations':[{'type':'longSum','name':'S','fieldName':'unit_sales'},"
+ "{'type':'longMax','name':'M','fieldName':'unit_sales'}],"
+ "'intervals':['1997-01-01T00:00:00.000Z/1997-09-01T00:00:00.000Z'],'threshold':3}";
[4/4] calcite git commit: [CALCITE-1577] Druid adapter: Incorrect
result - limit on timestamp disappears
Posted by jh...@apache.org.
[CALCITE-1577] Druid adapter: Incorrect result - limit on timestamp disappears
Close apache/calcite#355
Project: http://git-wip-us.apache.org/repos/asf/calcite/repo
Commit: http://git-wip-us.apache.org/repos/asf/calcite/commit/e5c9f2ed
Tree: http://git-wip-us.apache.org/repos/asf/calcite/tree/e5c9f2ed
Diff: http://git-wip-us.apache.org/repos/asf/calcite/diff/e5c9f2ed
Branch: refs/heads/master
Commit: e5c9f2ed3d44e690185ff7c5da25a9d655fbfc9e
Parents: 517bf62
Author: Jesus Camacho Rodriguez <jc...@apache.org>
Authored: Thu Jan 19 17:57:21 2017 +0000
Committer: Julian Hyde <jh...@apache.org>
Committed: Thu Jan 19 12:27:59 2017 -0800
----------------------------------------------------------------------
.../calcite/adapter/druid/DruidRules.java | 7 +++
.../org/apache/calcite/test/DruidAdapterIT.java | 54 ++++++++++++++++++++
2 files changed, 61 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/calcite/blob/e5c9f2ed/druid/src/main/java/org/apache/calcite/adapter/druid/DruidRules.java
----------------------------------------------------------------------
diff --git a/druid/src/main/java/org/apache/calcite/adapter/druid/DruidRules.java b/druid/src/main/java/org/apache/calcite/adapter/druid/DruidRules.java
index 1b16984..877df2a 100644
--- a/druid/src/main/java/org/apache/calcite/adapter/druid/DruidRules.java
+++ b/druid/src/main/java/org/apache/calcite/adapter/druid/DruidRules.java
@@ -477,6 +477,7 @@ public class DruidRules {
boolean refsTimestamp =
checkTimestampRefOnQuery(positionsReferenced.build(), topAgg.getInput(), query);
if (refsTimestamp && metricsRefs != 0) {
+ // Metrics reference timestamp too
return false;
}
// If the aggregate is grouping by timestamp (or a function of the
@@ -488,6 +489,12 @@ public class DruidRules {
if (aggregateRefsTimestamp && metricsRefs != 0) {
return false;
}
+ if (refsTimestamp
+ && sort.collation.getFieldCollations().size() == 1
+ && topAgg.getGroupCount() == 1) {
+ // Timeseries query: if it has a limit, we cannot push
+ return !RelOptUtil.isLimit(sort);
+ }
return true;
}
// If it is going to be a Druid select operator, we push the limit if
http://git-wip-us.apache.org/repos/asf/calcite/blob/e5c9f2ed/druid/src/test/java/org/apache/calcite/test/DruidAdapterIT.java
----------------------------------------------------------------------
diff --git a/druid/src/test/java/org/apache/calcite/test/DruidAdapterIT.java b/druid/src/test/java/org/apache/calcite/test/DruidAdapterIT.java
index 965f6ac..97b8188 100644
--- a/druid/src/test/java/org/apache/calcite/test/DruidAdapterIT.java
+++ b/druid/src/test/java/org/apache/calcite/test/DruidAdapterIT.java
@@ -890,6 +890,60 @@ public class DruidAdapterIT {
.queryContains(druidChecker(druidQuery));
}
+ /** Test case for
+ * <a href="https://issues.apache.org/jira/browse/CALCITE-1577">[CALCITE-1577]
+ * Druid adapter: Incorrect result - limit on timestamp disappears</a>. */
+ @Test public void testGroupByMonthGranularitySort() {
+ final String sql = "select floor(\"timestamp\" to MONTH) as m,\n"
+ + " sum(\"unit_sales\") as s,\n"
+ + " count(\"store_sqft\") as c\n"
+ + "from \"foodmart\"\n"
+ + "group by floor(\"timestamp\" to MONTH)\n"
+ + "order by floor(\"timestamp\" to MONTH) ASC";
+ final String explain = "PLAN="
+ + "EnumerableInterpreter\n"
+ + " BindableSort(sort0=[$0], dir0=[ASC])\n"
+ + " DruidQuery(table=[[foodmart, foodmart]], "
+ + "intervals=[[1900-01-09T00:00:00.000Z/2992-01-10T00:00:00.000Z]], "
+ + "projects=[[FLOOR($0, FLAG(MONTH)), $89, $71]], groups=[{0}], "
+ + "aggs=[[SUM($1), COUNT($2)]])";
+ sql(sql)
+ .returnsOrdered("M=1997-01-01 00:00:00; S=21628; C=7033",
+ "M=1997-02-01 00:00:00; S=20957; C=6844",
+ "M=1997-03-01 00:00:00; S=23706; C=7710",
+ "M=1997-04-01 00:00:00; S=20179; C=6588",
+ "M=1997-05-01 00:00:00; S=21081; C=6865",
+ "M=1997-06-01 00:00:00; S=21350; C=6912",
+ "M=1997-07-01 00:00:00; S=23763; C=7752",
+ "M=1997-08-01 00:00:00; S=21697; C=7038",
+ "M=1997-09-01 00:00:00; S=20388; C=6662",
+ "M=1997-10-01 00:00:00; S=19958; C=6478",
+ "M=1997-11-01 00:00:00; S=25270; C=8231",
+ "M=1997-12-01 00:00:00; S=26796; C=8716")
+ .explainContains(explain);
+ }
+
+ @Test public void testGroupByMonthGranularitySortLimit() {
+ final String sql = "select floor(\"timestamp\" to MONTH) as m,\n"
+ + " sum(\"unit_sales\") as s,\n"
+ + " count(\"store_sqft\") as c\n"
+ + "from \"foodmart\"\n"
+ + "group by floor(\"timestamp\" to MONTH)\n"
+ + "order by floor(\"timestamp\" to MONTH) limit 3";
+ final String explain = "PLAN="
+ + "EnumerableInterpreter\n"
+ + " BindableSort(sort0=[$0], dir0=[ASC], fetch=[3])\n"
+ + " DruidQuery(table=[[foodmart, foodmart]], "
+ + "intervals=[[1900-01-09T00:00:00.000Z/2992-01-10T00:00:00.000Z]], "
+ + "projects=[[FLOOR($0, FLAG(MONTH)), $89, $71]], groups=[{0}], "
+ + "aggs=[[SUM($1), COUNT($2)]])";
+ sql(sql)
+ .returnsOrdered("M=1997-01-01 00:00:00; S=21628; C=7033",
+ "M=1997-02-01 00:00:00; S=20957; C=6844",
+ "M=1997-03-01 00:00:00; S=23706; C=7710")
+ .explainContains(explain);
+ }
+
@Test public void testGroupByDayGranularity() {
final String sql = "select sum(\"unit_sales\") as s,\n"
+ " count(\"store_sqft\") as c\n"
[3/4] calcite git commit: [CALCITE-1587] Druid adapter: topN returns
approximate results
Posted by jh...@apache.org.
[CALCITE-1587] Druid adapter: topN returns approximate results
Add connection properties "approximateDistinctCount" and
"approximateTopN", default false, which allow plans with
approximation. Currently they are only set in tests; we plan to allow
them to be set via SQL syntax.
Project: http://git-wip-us.apache.org/repos/asf/calcite/repo
Commit: http://git-wip-us.apache.org/repos/asf/calcite/commit/517bf62e
Tree: http://git-wip-us.apache.org/repos/asf/calcite/tree/517bf62e
Diff: http://git-wip-us.apache.org/repos/asf/calcite/diff/517bf62e
Branch: refs/heads/master
Commit: 517bf62e5b891fa7e927486ab7c4ab22eb04bd54
Parents: a118f82
Author: Julian Hyde <jh...@apache.org>
Authored: Wed Jan 18 15:29:28 2017 -0800
Committer: Julian Hyde <jh...@apache.org>
Committed: Thu Jan 19 11:43:40 2017 -0800
----------------------------------------------------------------------
.../calcite/config/CalciteConnectionConfig.java | 4 +++
.../config/CalciteConnectionConfigImpl.java | 10 +++++++
.../config/CalciteConnectionProperty.java | 9 ++++++
.../calcite/adapter/druid/DruidQuery.java | 7 ++++-
.../org/apache/calcite/test/DruidAdapterIT.java | 31 ++++++++++++++++++--
site/_docs/adapter.md | 2 ++
6 files changed, 60 insertions(+), 3 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/calcite/blob/517bf62e/core/src/main/java/org/apache/calcite/config/CalciteConnectionConfig.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/calcite/config/CalciteConnectionConfig.java b/core/src/main/java/org/apache/calcite/config/CalciteConnectionConfig.java
index 6eac17d..b96f8a9 100644
--- a/core/src/main/java/org/apache/calcite/config/CalciteConnectionConfig.java
+++ b/core/src/main/java/org/apache/calcite/config/CalciteConnectionConfig.java
@@ -26,6 +26,10 @@ import org.apache.calcite.sql.validate.SqlConformance;
* a method for every property. At some point there will be similar config
* classes for system and statement properties. */
public interface CalciteConnectionConfig extends ConnectionConfig {
+ /** @see CalciteConnectionProperty#APPROXIMATE_DISTINCT_COUNT */
+ boolean approximateDistinctCount();
+ /** @see CalciteConnectionProperty#APPROXIMATE_TOP_N */
+ boolean approximateTopN();
/** @see CalciteConnectionProperty#AUTO_TEMP */
boolean autoTemp();
/** @see CalciteConnectionProperty#MATERIALIZATIONS_ENABLED */
http://git-wip-us.apache.org/repos/asf/calcite/blob/517bf62e/core/src/main/java/org/apache/calcite/config/CalciteConnectionConfigImpl.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/calcite/config/CalciteConnectionConfigImpl.java b/core/src/main/java/org/apache/calcite/config/CalciteConnectionConfigImpl.java
index c169d9d..5527752 100644
--- a/core/src/main/java/org/apache/calcite/config/CalciteConnectionConfigImpl.java
+++ b/core/src/main/java/org/apache/calcite/config/CalciteConnectionConfigImpl.java
@@ -46,6 +46,16 @@ public class CalciteConnectionConfigImpl extends ConnectionConfigImpl
return new CalciteConnectionConfigImpl(properties1);
}
+ public boolean approximateDistinctCount() {
+ return CalciteConnectionProperty.APPROXIMATE_DISTINCT_COUNT.wrap(properties)
+ .getBoolean();
+ }
+
+ public boolean approximateTopN() {
+ return CalciteConnectionProperty.APPROXIMATE_TOP_N.wrap(properties)
+ .getBoolean();
+ }
+
public boolean autoTemp() {
return CalciteConnectionProperty.AUTO_TEMP.wrap(properties).getBoolean();
}
http://git-wip-us.apache.org/repos/asf/calcite/blob/517bf62e/core/src/main/java/org/apache/calcite/config/CalciteConnectionProperty.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/calcite/config/CalciteConnectionProperty.java b/core/src/main/java/org/apache/calcite/config/CalciteConnectionProperty.java
index 20baf7f..c2e1027 100644
--- a/core/src/main/java/org/apache/calcite/config/CalciteConnectionProperty.java
+++ b/core/src/main/java/org/apache/calcite/config/CalciteConnectionProperty.java
@@ -33,6 +33,15 @@ import static org.apache.calcite.avatica.ConnectionConfigImpl.parse;
* Properties that may be specified on the JDBC connect string.
*/
public enum CalciteConnectionProperty implements ConnectionProperty {
+ /** Whether approximate results from {@code COUNT(DISTINCT ...)} aggregate
+ * functions are acceptable. */
+ APPROXIMATE_DISTINCT_COUNT("approximateDistinctCount", Type.BOOLEAN, false,
+ false),
+
+ /** Whether approximate results from "Top N" queries
+ * ({@code ORDER BY aggFun DESC LIMIT n}) are acceptable. */
+ APPROXIMATE_TOP_N("approximateTopN", Type.BOOLEAN, false, false),
+
/** Whether to store query results in temporary tables. */
AUTO_TEMP("autoTemp", Type.BOOLEAN, false, false),
http://git-wip-us.apache.org/repos/asf/calcite/blob/517bf62e/druid/src/main/java/org/apache/calcite/adapter/druid/DruidQuery.java
----------------------------------------------------------------------
diff --git a/druid/src/main/java/org/apache/calcite/adapter/druid/DruidQuery.java b/druid/src/main/java/org/apache/calcite/adapter/druid/DruidQuery.java
index 5049d6a..f5c034e 100644
--- a/druid/src/main/java/org/apache/calcite/adapter/druid/DruidQuery.java
+++ b/druid/src/main/java/org/apache/calcite/adapter/druid/DruidQuery.java
@@ -18,6 +18,7 @@ package org.apache.calcite.adapter.druid;
import org.apache.calcite.DataContext;
import org.apache.calcite.avatica.ColumnMetaData;
+import org.apache.calcite.config.CalciteConnectionConfig;
import org.apache.calcite.config.CalciteConnectionProperty;
import org.apache.calcite.interpreter.BindableRel;
import org.apache.calcite.interpreter.Bindables;
@@ -414,6 +415,9 @@ public class DruidQuery extends AbstractRelNode implements BindableRel {
protected QuerySpec getQuery(RelDataType rowType, RexNode filter, List<RexNode> projects,
ImmutableBitSet groupSet, List<AggregateCall> aggCalls, List<String> aggNames,
List<Integer> collationIndexes, List<Direction> collationDirections, Integer fetch) {
+ final CalciteConnectionConfig config =
+ getCluster().getPlanner().getContext()
+ .unwrap(CalciteConnectionConfig.class);
QueryType queryType = QueryType.SELECT;
final Translator translator = new Translator(druidTable, rowType);
List<String> fieldNames = rowType.getFieldNames();
@@ -541,7 +545,8 @@ public class DruidQuery extends AbstractRelNode implements BindableRel {
&& granularity == Granularity.ALL
&& sortsMetric
&& collations.size() == 1
- && fetch != null) {
+ && fetch != null
+ && config.approximateTopN()) {
queryType = QueryType.TOP_N;
} else {
queryType = QueryType.GROUP_BY;
http://git-wip-us.apache.org/repos/asf/calcite/blob/517bf62e/druid/src/test/java/org/apache/calcite/test/DruidAdapterIT.java
----------------------------------------------------------------------
diff --git a/druid/src/test/java/org/apache/calcite/test/DruidAdapterIT.java b/druid/src/test/java/org/apache/calcite/test/DruidAdapterIT.java
index c7807a9..965f6ac 100644
--- a/druid/src/test/java/org/apache/calcite/test/DruidAdapterIT.java
+++ b/druid/src/test/java/org/apache/calcite/test/DruidAdapterIT.java
@@ -17,6 +17,8 @@
package org.apache.calcite.test;
import org.apache.calcite.adapter.druid.DruidQuery;
+import org.apache.calcite.config.CalciteConnectionConfig;
+import org.apache.calcite.config.CalciteConnectionProperty;
import org.apache.calcite.util.Util;
import com.google.common.base.Function;
@@ -459,21 +461,46 @@ public class DruidAdapterIT {
.queryContains(druidChecker(druidQuery));
}
+ /** Test case for
+ * <a href="https://issues.apache.org/jira/browse/CALCITE-1587">[CALCITE-1587]
+ * Druid adapter: topN returns approximate results</a>. */
@Test public void testGroupBySingleSortLimit() {
+ checkGroupBySingleSortLimit(false);
+ }
+
+ /** As {@link #testGroupBySingleSortLimit}, but allowing approximate results
+ * due to {@link CalciteConnectionConfig#approximateDistinctCount()}.
+ * Therefore we send a "topN" query to Druid. */
+ @Test public void testGroupBySingleSortLimitApprox() {
+ checkGroupBySingleSortLimit(true);
+ }
+
+ private void checkGroupBySingleSortLimit(boolean approx) {
final String sql = "select \"brand_name\", sum(\"unit_sales\") as s\n"
+ "from \"foodmart\"\n"
+ "group by \"brand_name\"\n"
+ "order by s desc limit 3";
- final String druidQuery = "{'queryType':'topN','dataSource':'foodmart',"
+ final String approxDruid = "{'queryType':'topN','dataSource':'foodmart',"
+ "'granularity':'all','dimension':'brand_name','metric':'S',"
+ "'aggregations':[{'type':'longSum','name':'S','fieldName':'unit_sales'}],"
+ "'intervals':['1900-01-09T00:00:00.000Z/2992-01-10T00:00:00.000Z'],"
+ "'threshold':3}";
+ final String exactDruid = "{'queryType':'groupBy','dataSource':'foodmart',"
+ + "'granularity':'all','dimensions':['brand_name'],"
+ + "'limitSpec':{'type':'default','limit':3,"
+ + "'columns':[{'dimension':'S','direction':'descending'}]},"
+ + "'aggregations':[{'type':'longSum','name':'S','fieldName':'unit_sales'}],"
+ + "'intervals':['1900-01-09T00:00:00.000Z/2992-01-10T00:00:00.000Z']}";
+ final String druidQuery = approx ? approxDruid : exactDruid;
final String explain = "PLAN=EnumerableInterpreter\n"
+ " DruidQuery(table=[[foodmart, foodmart]], "
+ "intervals=[[1900-01-09T00:00:00.000Z/2992-01-10T00:00:00.000Z]], "
+ "groups=[{2}], aggs=[[SUM($89)]], sort0=[1], dir0=[DESC], fetch=[3])\n";
- sql(sql)
+ CalciteAssert.that()
+ .enable(enabled())
+ .with(ImmutableMap.of("model", FOODMART.getPath()))
+ .with(CalciteConnectionProperty.APPROXIMATE_TOP_N.name(), approx)
+ .query(sql)
.runs()
.returnsOrdered("brand_name=Hermanos; S=8469",
"brand_name=Tell Tale; S=7877",
http://git-wip-us.apache.org/repos/asf/calcite/blob/517bf62e/site/_docs/adapter.md
----------------------------------------------------------------------
diff --git a/site/_docs/adapter.md b/site/_docs/adapter.md
index 5f9a20d..d8462ae 100644
--- a/site/_docs/adapter.md
+++ b/site/_docs/adapter.md
@@ -69,6 +69,8 @@ as implemented by Avatica's
| Property | Description |
|:-------- |:------------|
+| <a href="{{ site.apiRoot }}/org/apache/calcite/config/CalciteConnectionProperty.html#APPROXIMATE_DISTINCT_COUNT">approximateDistinctCount</a> | Whether approximate results from `COUNT(DISTINCT ...)` aggregate functions are acceptable
+| <a href="{{ site.apiRoot }}/org/apache/calcite/config/CalciteConnectionProperty.html#APPROXIMATE_TOP_N">approximateTopN</a> | Whether approximate results from "Top N" queries * (`ORDER BY aggFun() DESC LIMIT n`) are acceptable
| <a href="{{ site.apiRoot }}/org/apache/calcite/config/CalciteConnectionProperty.html#CASE_SENSITIVE">caseSensitive</a> | Whether identifiers are matched case-sensitively. If not specified, value from `lex` is used.
| <a href="{{ site.apiRoot }}/org/apache/calcite/config/CalciteConnectionProperty.html#CONFORMANCE">conformance</a> | SQL conformance level. Values: DEFAULT (the default, similar to PRAGMATIC_2003), ORACLE_10, ORACLE_12, PRAGMATIC_99, PRAGMATIC_2003, STRICT_92, STRICT_99, STRICT_2003, SQL_SERVER_2008.
| <a href="{{ site.apiRoot }}/org/apache/calcite/config/CalciteConnectionProperty.html#CREATE_MATERIALIZATIONS">createMaterializations</a> | Whether Calcite should create materializations. Default false.
[2/4] calcite git commit: [CALCITE-1578] Druid adapter: wrong
semantics of topN query limit with granularity
Posted by jh...@apache.org.
[CALCITE-1578] Druid adapter: wrong semantics of topN query limit with granularity
[CALCITE-1579] Druid adapter: wrong semantics of groupBy query limit with granularity
[CALCITE-1580] Druid adapter: Wrong semantics for ordering within groupBy queries
Add CalciteAssert.returnsStartingWith, to allow checking large results
without introducing LIMIT to the query (which might cause Calcite to
use a different plan).
Project: http://git-wip-us.apache.org/repos/asf/calcite/repo
Commit: http://git-wip-us.apache.org/repos/asf/calcite/commit/a118f821
Tree: http://git-wip-us.apache.org/repos/asf/calcite/tree/a118f821
Diff: http://git-wip-us.apache.org/repos/asf/calcite/diff/a118f821
Branch: refs/heads/master
Commit: a118f821e3d49b6fc30c0e1e39d72026f2bf6ea1
Parents: a621902
Author: Julian Hyde <jh...@apache.org>
Authored: Tue Jan 17 16:24:04 2017 -0800
Committer: Julian Hyde <jh...@apache.org>
Committed: Thu Jan 19 11:42:55 2017 -0800
----------------------------------------------------------------------
.../org/apache/calcite/test/CalciteAssert.java | 23 ++-
.../calcite/adapter/druid/DruidQuery.java | 6 +-
.../calcite/adapter/druid/DruidRules.java | 9 +
.../org/apache/calcite/test/DruidAdapterIT.java | 165 ++++++++++++++++---
4 files changed, 177 insertions(+), 26 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/calcite/blob/a118f821/core/src/test/java/org/apache/calcite/test/CalciteAssert.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/calcite/test/CalciteAssert.java b/core/src/test/java/org/apache/calcite/test/CalciteAssert.java
index a6ff27f..c430d7b 100644
--- a/core/src/test/java/org/apache/calcite/test/CalciteAssert.java
+++ b/core/src/test/java/org/apache/calcite/test/CalciteAssert.java
@@ -357,11 +357,12 @@ public class CalciteAssert {
/** @see Matchers#returnsUnordered(String...) */
static Function<ResultSet, Void> checkResultUnordered(final String... lines) {
- return checkResult(true, lines);
+ return checkResult(true, false, lines);
}
/** @see Matchers#returnsUnordered(String...) */
- static Function<ResultSet, Void> checkResult(final boolean sort, final String... lines) {
+ static Function<ResultSet, Void> checkResult(final boolean sort,
+ final boolean head, final String... lines) {
return new Function<ResultSet, Void>() {
public Void apply(ResultSet resultSet) {
try {
@@ -374,8 +375,14 @@ public class CalciteAssert {
if (sort) {
Collections.sort(actualList);
}
- if (!actualList.equals(expectedList)) {
- assertThat(Util.lines(actualList),
+ final List<String> trimmedActualList;
+ if (head && actualList.size() > expectedList.size()) {
+ trimmedActualList = actualList.subList(0, expectedList.size());
+ } else {
+ trimmedActualList = actualList;
+ }
+ if (!trimmedActualList.equals(expectedList)) {
+ assertThat(Util.lines(trimmedActualList),
equalTo(Util.lines(expectedList)));
}
return null;
@@ -1238,11 +1245,15 @@ public class CalciteAssert {
}
public AssertQuery returnsUnordered(String... lines) {
- return returns(checkResult(true, lines));
+ return returns(checkResult(true, false, lines));
}
public AssertQuery returnsOrdered(String... lines) {
- return returns(checkResult(false, lines));
+ return returns(checkResult(false, false, lines));
+ }
+
+ public AssertQuery returnsStartingWith(String... lines) {
+ return returns(checkResult(false, true, lines));
}
public AssertQuery throws_(String message) {
http://git-wip-us.apache.org/repos/asf/calcite/blob/a118f821/druid/src/main/java/org/apache/calcite/adapter/druid/DruidQuery.java
----------------------------------------------------------------------
diff --git a/druid/src/main/java/org/apache/calcite/adapter/druid/DruidQuery.java b/druid/src/main/java/org/apache/calcite/adapter/druid/DruidQuery.java
index 97d4484..5049d6a 100644
--- a/druid/src/main/java/org/apache/calcite/adapter/druid/DruidQuery.java
+++ b/druid/src/main/java/org/apache/calcite/adapter/druid/DruidQuery.java
@@ -537,7 +537,11 @@ public class DruidQuery extends AbstractRelNode implements BindableRel {
if (dimensions.isEmpty() && (collations == null || timeSeriesDirection != null)) {
queryType = QueryType.TIMESERIES;
assert fetch == null;
- } else if (dimensions.size() == 1 && sortsMetric && collations.size() == 1 && fetch != null) {
+ } else if (dimensions.size() == 1
+ && granularity == Granularity.ALL
+ && sortsMetric
+ && collations.size() == 1
+ && fetch != null) {
queryType = QueryType.TOP_N;
} else {
queryType = QueryType.GROUP_BY;
http://git-wip-us.apache.org/repos/asf/calcite/blob/a118f821/druid/src/main/java/org/apache/calcite/adapter/druid/DruidRules.java
----------------------------------------------------------------------
diff --git a/druid/src/main/java/org/apache/calcite/adapter/druid/DruidRules.java b/druid/src/main/java/org/apache/calcite/adapter/druid/DruidRules.java
index 769e728..1b16984 100644
--- a/druid/src/main/java/org/apache/calcite/adapter/druid/DruidRules.java
+++ b/druid/src/main/java/org/apache/calcite/adapter/druid/DruidRules.java
@@ -479,6 +479,15 @@ public class DruidRules {
if (refsTimestamp && metricsRefs != 0) {
return false;
}
+ // If the aggregate is grouping by timestamp (or a function of the
+ // timestamp such as month) then we cannot push Sort to Druid.
+ // Druid's topN and groupBy operators would sort only within the
+ // granularity, whereas we want global sort.
+ final boolean aggregateRefsTimestamp =
+ checkTimestampRefOnQuery(topAgg.getGroupSet(), topAgg.getInput(), query);
+ if (aggregateRefsTimestamp && metricsRefs != 0) {
+ return false;
+ }
return true;
}
// If it is going to be a Druid select operator, we push the limit if
http://git-wip-us.apache.org/repos/asf/calcite/blob/a118f821/druid/src/test/java/org/apache/calcite/test/DruidAdapterIT.java
----------------------------------------------------------------------
diff --git a/druid/src/test/java/org/apache/calcite/test/DruidAdapterIT.java b/druid/src/test/java/org/apache/calcite/test/DruidAdapterIT.java
index 75331b7..c7807a9 100644
--- a/druid/src/test/java/org/apache/calcite/test/DruidAdapterIT.java
+++ b/druid/src/test/java/org/apache/calcite/test/DruidAdapterIT.java
@@ -415,7 +415,7 @@ public class DruidAdapterIT {
.queryContains(druidChecker(druidQuery));
}
- @Test public void testGroupByLimit() {
+ @Test public void testDistinctLimit() {
// We do not yet push LIMIT into a Druid "groupBy" query.
final String sql = "select distinct \"gender\", \"state_province\"\n"
+ "from \"foodmart\" fetch next 3 rows only";
@@ -482,6 +482,101 @@ public class DruidAdapterIT {
.queryContains(druidChecker(druidQuery));
}
+ /** Test case for
+ * <a href="https://issues.apache.org/jira/browse/CALCITE-1578">[CALCITE-1578]
+ * Druid adapter: wrong semantics of groupBy query limit with granularity</a>.
+ *
+ * <p>Before CALCITE-1578 was fixed, this would use a "topN" query but return
+ * the wrong results. */
+ @Test public void testGroupByDaySortDescLimit() {
+ final String sql = "select \"brand_name\", floor(\"timestamp\" to DAY) as d,"
+ + " sum(\"unit_sales\") as s\n"
+ + "from \"foodmart\"\n"
+ + "group by \"brand_name\", floor(\"timestamp\" to DAY)\n"
+ + "order by s desc limit 30";
+ final String druidQuery = "{'queryType':'groupBy','dataSource':'foodmart',"
+ + "'granularity':'day','dimensions':['brand_name'],"
+ + "'limitSpec':{'type':'default'},"
+ + "'aggregations':[{'type':'longSum','name':'S','fieldName':'unit_sales'}],"
+ + "'intervals':['1900-01-09T00:00:00.000Z/2992-01-10T00:00:00.000Z']}";
+ final String explain = "PLAN=EnumerableInterpreter\n"
+ + " BindableSort(sort0=[$2], dir0=[DESC], fetch=[30])\n"
+ + " DruidQuery(table=[[foodmart, foodmart]], "
+ + "intervals=[[1900-01-09T00:00:00.000Z/2992-01-10T00:00:00.000Z]], "
+ + "projects=[[$2, FLOOR($0, FLAG(DAY)), $89]], groups=[{0, 1}], "
+ + "aggs=[[SUM($2)]])\n";
+ sql(sql)
+ .runs()
+ .returnsStartingWith("brand_name=Ebony; D=1997-07-27 00:00:00; S=135",
+ "brand_name=Tri-State; D=1997-05-09 00:00:00; S=120",
+ "brand_name=Hermanos; D=1997-05-09 00:00:00; S=115")
+ .explainContains(explain)
+ .queryContains(druidChecker(druidQuery));
+ }
+
+ /** Test case for
+ * <a href="https://issues.apache.org/jira/browse/CALCITE-1579">[CALCITE-1579]
+ * Druid adapter: wrong semantics of groupBy query limit with
+ * granularity</a>.
+ *
+ * <p>Before CALCITE-1579 was fixed, this would use a "groupBy" query but
+ * wrongly try to use a {@code limitSpec} to sort and filter. (A "topN" query
+ * was not possible because the sort was {@code ASC}.) */
+ @Test public void testGroupByDaySortLimit() {
+ final String sql = "select \"brand_name\", floor(\"timestamp\" to DAY) as d,"
+ + " sum(\"unit_sales\") as s\n"
+ + "from \"foodmart\"\n"
+ + "group by \"brand_name\", floor(\"timestamp\" to DAY)\n"
+ + "order by s desc limit 30";
+ final String druidQuery = "{'queryType':'groupBy','dataSource':'foodmart',"
+ + "'granularity':'day','dimensions':['brand_name'],"
+ + "'limitSpec':{'type':'default'},"
+ + "'aggregations':[{'type':'longSum','name':'S','fieldName':'unit_sales'}],"
+ + "'intervals':['1900-01-09T00:00:00.000Z/2992-01-10T00:00:00.000Z']}";
+ final String explain = "PLAN=EnumerableInterpreter\n"
+ + " BindableSort(sort0=[$2], dir0=[DESC], fetch=[30])\n"
+ + " DruidQuery(table=[[foodmart, foodmart]], "
+ + "intervals=[[1900-01-09T00:00:00.000Z/2992-01-10T00:00:00.000Z]], "
+ + "projects=[[$2, FLOOR($0, FLAG(DAY)), $89]], groups=[{0, 1}], "
+ + "aggs=[[SUM($2)]])\n";
+ sql(sql)
+ .runs()
+ .returnsStartingWith("brand_name=Ebony; D=1997-07-27 00:00:00; S=135",
+ "brand_name=Tri-State; D=1997-05-09 00:00:00; S=120",
+ "brand_name=Hermanos; D=1997-05-09 00:00:00; S=115")
+ .explainContains(explain)
+ .queryContains(druidChecker(druidQuery));
+ }
+
+ /** Test case for
+ * <a href="https://issues.apache.org/jira/browse/CALCITE-1580">[CALCITE-1580]
+ * Druid adapter: Wrong semantics for ordering within groupBy queries</a>. */
+ @Test public void testGroupByDaySortDimension() {
+ final String sql = "select \"brand_name\", floor(\"timestamp\" to DAY) as d,"
+ + " sum(\"unit_sales\") as s\n"
+ + "from \"foodmart\"\n"
+ + "group by \"brand_name\", floor(\"timestamp\" to DAY)\n"
+ + "order by \"brand_name\"";
+ final String druidQuery = "{'queryType':'groupBy','dataSource':'foodmart',"
+ + "'granularity':'day','dimensions':['brand_name'],"
+ + "'limitSpec':{'type':'default'},"
+ + "'aggregations':[{'type':'longSum','name':'S','fieldName':'unit_sales'}],"
+ + "'intervals':['1900-01-09T00:00:00.000Z/2992-01-10T00:00:00.000Z']}";
+ final String explain = "PLAN=EnumerableInterpreter\n"
+ + " BindableSort(sort0=[$0], dir0=[ASC])\n"
+ + " DruidQuery(table=[[foodmart, foodmart]], "
+ + "intervals=[[1900-01-09T00:00:00.000Z/2992-01-10T00:00:00.000Z]], "
+ + "projects=[[$2, FLOOR($0, FLAG(DAY)), $89]], groups=[{0, 1}], "
+ + "aggs=[[SUM($2)]])\n";
+ sql(sql)
+ .runs()
+ .returnsStartingWith("brand_name=ADJ; D=1997-01-11 00:00:00; S=2",
+ "brand_name=ADJ; D=1997-01-12 00:00:00; S=3",
+ "brand_name=ADJ; D=1997-01-17 00:00:00; S=3")
+ .explainContains(explain)
+ .queryContains(druidChecker(druidQuery));
+ }
+
/** Tests a query that contains no GROUP BY and is therefore executed as a
* Druid "select" query. */
@Test public void testFilterSortDesc() {
@@ -666,16 +761,32 @@ public class DruidAdapterIT {
"C=40778");
}
- @Test public void testGroupByTimeAndOneColumnNotProjected() {
+ /** Unlike {@link #testGroupByTimeAndOneColumnNotProjected()}, we cannot use
+ * "topN" because we have a global limit, and that requires
+ * {@code granularity: all}. */
+ @Test public void testGroupByTimeAndOneColumnNotProjectedWithLimit() {
final String sql = "select count(*) as \"c\", floor(\"timestamp\" to MONTH) as \"month\"\n"
+ "from \"foodmart\"\n"
+ "group by floor(\"timestamp\" to MONTH), \"state_province\"\n"
+ "order by \"c\" desc limit 3";
sql(sql)
- .returnsOrdered("c=3072; month=1997-01-01 00:00:00",
- "c=2231; month=1997-01-01 00:00:00",
- "c=1730; month=1997-01-01 00:00:00")
- .queryContains(druidChecker("'queryType':'topN'"));
+ .returnsOrdered("c=4070; month=1997-12-01 00:00:00",
+ "c=4033; month=1997-11-01 00:00:00",
+ "c=3511; month=1997-07-01 00:00:00")
+ .queryContains(druidChecker("'queryType':'groupBy'"));
+ }
+
+ @Test public void testGroupByTimeAndOneColumnNotProjected() {
+ final String sql = "select count(*) as \"c\",\n"
+ + " floor(\"timestamp\" to MONTH) as \"month\"\n"
+ + "from \"foodmart\"\n"
+ + "group by floor(\"timestamp\" to MONTH), \"state_province\"\n"
+ + "having count(*) > 3500";
+ sql(sql)
+ .returnsUnordered("c=3511; month=1997-07-01 00:00:00",
+ "c=4033; month=1997-11-01 00:00:00",
+ "c=4070; month=1997-12-01 00:00:00")
+ .queryContains(druidChecker("'queryType':'groupBy'"));
}
@Test public void testOrderByOneColumnNotProjected() {
@@ -793,17 +904,23 @@ public class DruidAdapterIT {
+ "from \"foodmart\"\n"
+ "group by \"state_province\", floor(\"timestamp\" to MONTH)\n"
+ "order by s desc limit 3";
+ // Cannot use a Druid "topN" query, granularity != "all";
+ // have to use "groupBy" query followed by external Sort and fetch.
final String explain = "PLAN="
+ "EnumerableInterpreter\n"
- + " BindableProject(S=[$2], M=[$3], P=[$0])\n"
- + " DruidQuery(table=[[foodmart, foodmart]], intervals=[[1900-01-09T00:00:00.000Z/2992-01-10T00:00:00.000Z]], projects=[[$30, FLOOR($0, FLAG(MONTH)), $89]], groups=[{0, 1}], aggs=[[SUM($2), MAX($2)]], sort0=[2], dir0=[DESC], fetch=[3])";
- final String druidQuery = "{'queryType':'topN','dataSource':'foodmart',"
- + "'granularity':'month','dimension':'state_province','metric':'S',"
+ + " BindableSort(sort0=[$0], dir0=[DESC], fetch=[3])\n"
+ + " BindableProject(S=[$2], M=[$3], P=[$0])\n"
+ + " DruidQuery(table=[[foodmart, foodmart]], intervals=[[1900-01-09T00:00:00.000Z/2992-01-10T00:00:00.000Z]], projects=[[$30, FLOOR($0, FLAG(MONTH)), $89]], groups=[{0, 1}], aggs=[[SUM($2), MAX($2)]])";
+ final String druidQuery = "{'queryType':'groupBy','dataSource':'foodmart',"
+ + "'granularity':'month','dimensions':['state_province'],"
+ + "'limitSpec':{'type':'default'},"
+ "'aggregations':[{'type':'longSum','name':'S','fieldName':'unit_sales'},"
+ "{'type':'longMax','name':'M','fieldName':'unit_sales'}],"
- + "'intervals':['1900-01-09T00:00:00.000Z/2992-01-10T00:00:00.000Z'],'threshold':3}";
+ + "'intervals':['1900-01-09T00:00:00.000Z/2992-01-10T00:00:00.000Z']}";
sql(sql)
- .returnsOrdered("S=9342; M=6; P=WA", "S=6909; M=6; P=OR", "S=5377; M=7; P=CA")
+ .returnsUnordered("S=12399; M=6; P=WA",
+ "S=12297; M=7; P=WA",
+ "S=10640; M=6; P=WA")
.explainContains(explain)
.queryContains(druidChecker(druidQuery));
}
@@ -816,18 +933,28 @@ public class DruidAdapterIT {
+ "where \"timestamp\" >= '1997-01-01 00:00:00' and "
+ " \"timestamp\" < '1997-09-01 00:00:00'\n"
+ "group by \"state_province\", floor(\"timestamp\" to DAY)\n"
- + "order by s desc limit 3";
+ + "order by s desc limit 6";
final String explain = "PLAN="
+ "EnumerableInterpreter\n"
- + " BindableProject(S=[$2], M=[$3], P=[$0])\n"
- + " DruidQuery(table=[[foodmart, foodmart]], intervals=[[1997-01-01T00:00:00.000Z/1997-09-01T00:00:00.000Z]], projects=[[$30, FLOOR($0, FLAG(DAY)), $89]], groups=[{0, 1}], aggs=[[SUM($2), MAX($2)]], sort0=[2], dir0=[DESC], fetch=[3])";
- final String druidQuery = "{'queryType':'topN','dataSource':'foodmart',"
- + "'granularity':'day','dimension':'state_province','metric':'S',"
+ + " BindableSort(sort0=[$0], dir0=[DESC], fetch=[6])\n"
+ + " BindableProject(S=[$2], M=[$3], P=[$0])\n"
+ + " DruidQuery(table=[[foodmart, foodmart]], "
+ + "intervals=[[1997-01-01T00:00:00.000Z/1997-09-01T00:00:00.000Z]], "
+ + "projects=[[$30, FLOOR($0, FLAG(DAY)), $89]], groups=[{0, 1}], "
+ + "aggs=[[SUM($2), MAX($2)]]";
+ final String druidQuery = "{'queryType':'groupBy','dataSource':'foodmart',"
+ + "'granularity':'day','dimensions':['state_province'],"
+ + "'limitSpec':{'type':'default'},"
+ "'aggregations':[{'type':'longSum','name':'S','fieldName':'unit_sales'},"
+ "{'type':'longMax','name':'M','fieldName':'unit_sales'}],"
- + "'intervals':['1997-01-01T00:00:00.000Z/1997-09-01T00:00:00.000Z'],'threshold':3}";
+ + "'intervals':['1997-01-01T00:00:00.000Z/1997-09-01T00:00:00.000Z']}";
sql(sql)
- .returnsOrdered("S=348; M=5; P=CA")
+ .returnsOrdered("S=2527; M=5; P=OR",
+ "S=2525; M=6; P=OR",
+ "S=2238; M=6; P=OR",
+ "S=1715; M=5; P=OR",
+ "S=1691; M=5; P=OR",
+ "S=1629; M=5; P=WA")
.explainContains(explain)
.queryContains(druidChecker(druidQuery));
}