You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@calcite.apache.org by jh...@apache.org on 2017/01/19 23:47:35 UTC
[3/4] calcite git commit: [CALCITE-1587] Druid adapter: topN returns
approximate results
[CALCITE-1587] Druid adapter: topN returns approximate results
Add connection properties "approximateDistinctCount" and
"approximateTopN", default false, which allow plans with
approximation. Currently they are only set in tests; we plan to allow
them to be set via SQL syntax.
Project: http://git-wip-us.apache.org/repos/asf/calcite/repo
Commit: http://git-wip-us.apache.org/repos/asf/calcite/commit/517bf62e
Tree: http://git-wip-us.apache.org/repos/asf/calcite/tree/517bf62e
Diff: http://git-wip-us.apache.org/repos/asf/calcite/diff/517bf62e
Branch: refs/heads/master
Commit: 517bf62e5b891fa7e927486ab7c4ab22eb04bd54
Parents: a118f82
Author: Julian Hyde <jh...@apache.org>
Authored: Wed Jan 18 15:29:28 2017 -0800
Committer: Julian Hyde <jh...@apache.org>
Committed: Thu Jan 19 11:43:40 2017 -0800
----------------------------------------------------------------------
.../calcite/config/CalciteConnectionConfig.java | 4 +++
.../config/CalciteConnectionConfigImpl.java | 10 +++++++
.../config/CalciteConnectionProperty.java | 9 ++++++
.../calcite/adapter/druid/DruidQuery.java | 7 ++++-
.../org/apache/calcite/test/DruidAdapterIT.java | 31 ++++++++++++++++++--
site/_docs/adapter.md | 2 ++
6 files changed, 60 insertions(+), 3 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/calcite/blob/517bf62e/core/src/main/java/org/apache/calcite/config/CalciteConnectionConfig.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/calcite/config/CalciteConnectionConfig.java b/core/src/main/java/org/apache/calcite/config/CalciteConnectionConfig.java
index 6eac17d..b96f8a9 100644
--- a/core/src/main/java/org/apache/calcite/config/CalciteConnectionConfig.java
+++ b/core/src/main/java/org/apache/calcite/config/CalciteConnectionConfig.java
@@ -26,6 +26,10 @@ import org.apache.calcite.sql.validate.SqlConformance;
* a method for every property. At some point there will be similar config
* classes for system and statement properties. */
public interface CalciteConnectionConfig extends ConnectionConfig {
+ /** @see CalciteConnectionProperty#APPROXIMATE_DISTINCT_COUNT */
+ boolean approximateDistinctCount();
+ /** @see CalciteConnectionProperty#APPROXIMATE_TOP_N */
+ boolean approximateTopN();
/** @see CalciteConnectionProperty#AUTO_TEMP */
boolean autoTemp();
/** @see CalciteConnectionProperty#MATERIALIZATIONS_ENABLED */
http://git-wip-us.apache.org/repos/asf/calcite/blob/517bf62e/core/src/main/java/org/apache/calcite/config/CalciteConnectionConfigImpl.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/calcite/config/CalciteConnectionConfigImpl.java b/core/src/main/java/org/apache/calcite/config/CalciteConnectionConfigImpl.java
index c169d9d..5527752 100644
--- a/core/src/main/java/org/apache/calcite/config/CalciteConnectionConfigImpl.java
+++ b/core/src/main/java/org/apache/calcite/config/CalciteConnectionConfigImpl.java
@@ -46,6 +46,16 @@ public class CalciteConnectionConfigImpl extends ConnectionConfigImpl
return new CalciteConnectionConfigImpl(properties1);
}
+ public boolean approximateDistinctCount() {
+ return CalciteConnectionProperty.APPROXIMATE_DISTINCT_COUNT.wrap(properties)
+ .getBoolean();
+ }
+
+ public boolean approximateTopN() {
+ return CalciteConnectionProperty.APPROXIMATE_TOP_N.wrap(properties)
+ .getBoolean();
+ }
+
public boolean autoTemp() {
return CalciteConnectionProperty.AUTO_TEMP.wrap(properties).getBoolean();
}
http://git-wip-us.apache.org/repos/asf/calcite/blob/517bf62e/core/src/main/java/org/apache/calcite/config/CalciteConnectionProperty.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/calcite/config/CalciteConnectionProperty.java b/core/src/main/java/org/apache/calcite/config/CalciteConnectionProperty.java
index 20baf7f..c2e1027 100644
--- a/core/src/main/java/org/apache/calcite/config/CalciteConnectionProperty.java
+++ b/core/src/main/java/org/apache/calcite/config/CalciteConnectionProperty.java
@@ -33,6 +33,15 @@ import static org.apache.calcite.avatica.ConnectionConfigImpl.parse;
* Properties that may be specified on the JDBC connect string.
*/
public enum CalciteConnectionProperty implements ConnectionProperty {
+ /** Whether approximate results from {@code COUNT(DISTINCT ...)} aggregate
+ * functions are acceptable. */
+ APPROXIMATE_DISTINCT_COUNT("approximateDistinctCount", Type.BOOLEAN, false,
+ false),
+
+ /** Whether approximate results from "Top N" queries
+ * ({@code ORDER BY aggFun DESC LIMIT n}) are acceptable. */
+ APPROXIMATE_TOP_N("approximateTopN", Type.BOOLEAN, false, false),
+
/** Whether to store query results in temporary tables. */
AUTO_TEMP("autoTemp", Type.BOOLEAN, false, false),
http://git-wip-us.apache.org/repos/asf/calcite/blob/517bf62e/druid/src/main/java/org/apache/calcite/adapter/druid/DruidQuery.java
----------------------------------------------------------------------
diff --git a/druid/src/main/java/org/apache/calcite/adapter/druid/DruidQuery.java b/druid/src/main/java/org/apache/calcite/adapter/druid/DruidQuery.java
index 5049d6a..f5c034e 100644
--- a/druid/src/main/java/org/apache/calcite/adapter/druid/DruidQuery.java
+++ b/druid/src/main/java/org/apache/calcite/adapter/druid/DruidQuery.java
@@ -18,6 +18,7 @@ package org.apache.calcite.adapter.druid;
import org.apache.calcite.DataContext;
import org.apache.calcite.avatica.ColumnMetaData;
+import org.apache.calcite.config.CalciteConnectionConfig;
import org.apache.calcite.config.CalciteConnectionProperty;
import org.apache.calcite.interpreter.BindableRel;
import org.apache.calcite.interpreter.Bindables;
@@ -414,6 +415,9 @@ public class DruidQuery extends AbstractRelNode implements BindableRel {
protected QuerySpec getQuery(RelDataType rowType, RexNode filter, List<RexNode> projects,
ImmutableBitSet groupSet, List<AggregateCall> aggCalls, List<String> aggNames,
List<Integer> collationIndexes, List<Direction> collationDirections, Integer fetch) {
+ final CalciteConnectionConfig config =
+ getCluster().getPlanner().getContext()
+ .unwrap(CalciteConnectionConfig.class);
QueryType queryType = QueryType.SELECT;
final Translator translator = new Translator(druidTable, rowType);
List<String> fieldNames = rowType.getFieldNames();
@@ -541,7 +545,8 @@ public class DruidQuery extends AbstractRelNode implements BindableRel {
&& granularity == Granularity.ALL
&& sortsMetric
&& collations.size() == 1
- && fetch != null) {
+ && fetch != null
+ && config.approximateTopN()) {
queryType = QueryType.TOP_N;
} else {
queryType = QueryType.GROUP_BY;
http://git-wip-us.apache.org/repos/asf/calcite/blob/517bf62e/druid/src/test/java/org/apache/calcite/test/DruidAdapterIT.java
----------------------------------------------------------------------
diff --git a/druid/src/test/java/org/apache/calcite/test/DruidAdapterIT.java b/druid/src/test/java/org/apache/calcite/test/DruidAdapterIT.java
index c7807a9..965f6ac 100644
--- a/druid/src/test/java/org/apache/calcite/test/DruidAdapterIT.java
+++ b/druid/src/test/java/org/apache/calcite/test/DruidAdapterIT.java
@@ -17,6 +17,8 @@
package org.apache.calcite.test;
import org.apache.calcite.adapter.druid.DruidQuery;
+import org.apache.calcite.config.CalciteConnectionConfig;
+import org.apache.calcite.config.CalciteConnectionProperty;
import org.apache.calcite.util.Util;
import com.google.common.base.Function;
@@ -459,21 +461,46 @@ public class DruidAdapterIT {
.queryContains(druidChecker(druidQuery));
}
+ /** Test case for
+ * <a href="https://issues.apache.org/jira/browse/CALCITE-1587">[CALCITE-1587]
+ * Druid adapter: topN returns approximate results</a>. */
@Test public void testGroupBySingleSortLimit() {
+ checkGroupBySingleSortLimit(false);
+ }
+
+ /** As {@link #testGroupBySingleSortLimit}, but allowing approximate results
+ * due to {@link CalciteConnectionConfig#approximateDistinctCount()}.
+ * Therefore we send a "topN" query to Druid. */
+ @Test public void testGroupBySingleSortLimitApprox() {
+ checkGroupBySingleSortLimit(true);
+ }
+
+ private void checkGroupBySingleSortLimit(boolean approx) {
final String sql = "select \"brand_name\", sum(\"unit_sales\") as s\n"
+ "from \"foodmart\"\n"
+ "group by \"brand_name\"\n"
+ "order by s desc limit 3";
- final String druidQuery = "{'queryType':'topN','dataSource':'foodmart',"
+ final String approxDruid = "{'queryType':'topN','dataSource':'foodmart',"
+ "'granularity':'all','dimension':'brand_name','metric':'S',"
+ "'aggregations':[{'type':'longSum','name':'S','fieldName':'unit_sales'}],"
+ "'intervals':['1900-01-09T00:00:00.000Z/2992-01-10T00:00:00.000Z'],"
+ "'threshold':3}";
+ final String exactDruid = "{'queryType':'groupBy','dataSource':'foodmart',"
+ + "'granularity':'all','dimensions':['brand_name'],"
+ + "'limitSpec':{'type':'default','limit':3,"
+ + "'columns':[{'dimension':'S','direction':'descending'}]},"
+ + "'aggregations':[{'type':'longSum','name':'S','fieldName':'unit_sales'}],"
+ + "'intervals':['1900-01-09T00:00:00.000Z/2992-01-10T00:00:00.000Z']}";
+ final String druidQuery = approx ? approxDruid : exactDruid;
final String explain = "PLAN=EnumerableInterpreter\n"
+ " DruidQuery(table=[[foodmart, foodmart]], "
+ "intervals=[[1900-01-09T00:00:00.000Z/2992-01-10T00:00:00.000Z]], "
+ "groups=[{2}], aggs=[[SUM($89)]], sort0=[1], dir0=[DESC], fetch=[3])\n";
- sql(sql)
+ CalciteAssert.that()
+ .enable(enabled())
+ .with(ImmutableMap.of("model", FOODMART.getPath()))
+ .with(CalciteConnectionProperty.APPROXIMATE_TOP_N.name(), approx)
+ .query(sql)
.runs()
.returnsOrdered("brand_name=Hermanos; S=8469",
"brand_name=Tell Tale; S=7877",
http://git-wip-us.apache.org/repos/asf/calcite/blob/517bf62e/site/_docs/adapter.md
----------------------------------------------------------------------
diff --git a/site/_docs/adapter.md b/site/_docs/adapter.md
index 5f9a20d..d8462ae 100644
--- a/site/_docs/adapter.md
+++ b/site/_docs/adapter.md
@@ -69,6 +69,8 @@ as implemented by Avatica's
| Property | Description |
|:-------- |:------------|
+| <a href="{{ site.apiRoot }}/org/apache/calcite/config/CalciteConnectionProperty.html#APPROXIMATE_DISTINCT_COUNT">approximateDistinctCount</a> | Whether approximate results from `COUNT(DISTINCT ...)` aggregate functions are acceptable
+| <a href="{{ site.apiRoot }}/org/apache/calcite/config/CalciteConnectionProperty.html#APPROXIMATE_TOP_N">approximateTopN</a> | Whether approximate results from "Top N" queries * (`ORDER BY aggFun() DESC LIMIT n`) are acceptable
| <a href="{{ site.apiRoot }}/org/apache/calcite/config/CalciteConnectionProperty.html#CASE_SENSITIVE">caseSensitive</a> | Whether identifiers are matched case-sensitively. If not specified, value from `lex` is used.
| <a href="{{ site.apiRoot }}/org/apache/calcite/config/CalciteConnectionProperty.html#CONFORMANCE">conformance</a> | SQL conformance level. Values: DEFAULT (the default, similar to PRAGMATIC_2003), ORACLE_10, ORACLE_12, PRAGMATIC_99, PRAGMATIC_2003, STRICT_92, STRICT_99, STRICT_2003, SQL_SERVER_2008.
| <a href="{{ site.apiRoot }}/org/apache/calcite/config/CalciteConnectionProperty.html#CREATE_MATERIALIZATIONS">createMaterializations</a> | Whether Calcite should create materializations. Default false.