You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@calcite.apache.org by jh...@apache.org on 2017/01/19 23:47:35 UTC

[3/4] calcite git commit: [CALCITE-1587] Druid adapter: topN returns approximate results

[CALCITE-1587] Druid adapter: topN returns approximate results

Add connection properties "approximateDistinctCount" and
"approximateTopN", default false, which allow plans with
approximation. Currently they are only set in tests; we plan to allow
them to be set via SQL syntax.


Project: http://git-wip-us.apache.org/repos/asf/calcite/repo
Commit: http://git-wip-us.apache.org/repos/asf/calcite/commit/517bf62e
Tree: http://git-wip-us.apache.org/repos/asf/calcite/tree/517bf62e
Diff: http://git-wip-us.apache.org/repos/asf/calcite/diff/517bf62e

Branch: refs/heads/master
Commit: 517bf62e5b891fa7e927486ab7c4ab22eb04bd54
Parents: a118f82
Author: Julian Hyde <jh...@apache.org>
Authored: Wed Jan 18 15:29:28 2017 -0800
Committer: Julian Hyde <jh...@apache.org>
Committed: Thu Jan 19 11:43:40 2017 -0800

----------------------------------------------------------------------
 .../calcite/config/CalciteConnectionConfig.java |  4 +++
 .../config/CalciteConnectionConfigImpl.java     | 10 +++++++
 .../config/CalciteConnectionProperty.java       |  9 ++++++
 .../calcite/adapter/druid/DruidQuery.java       |  7 ++++-
 .../org/apache/calcite/test/DruidAdapterIT.java | 31 ++++++++++++++++++--
 site/_docs/adapter.md                           |  2 ++
 6 files changed, 60 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/calcite/blob/517bf62e/core/src/main/java/org/apache/calcite/config/CalciteConnectionConfig.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/calcite/config/CalciteConnectionConfig.java b/core/src/main/java/org/apache/calcite/config/CalciteConnectionConfig.java
index 6eac17d..b96f8a9 100644
--- a/core/src/main/java/org/apache/calcite/config/CalciteConnectionConfig.java
+++ b/core/src/main/java/org/apache/calcite/config/CalciteConnectionConfig.java
@@ -26,6 +26,10 @@ import org.apache.calcite.sql.validate.SqlConformance;
  * a method for every property. At some point there will be similar config
  * classes for system and statement properties. */
 public interface CalciteConnectionConfig extends ConnectionConfig {
+  /** @see CalciteConnectionProperty#APPROXIMATE_DISTINCT_COUNT */
+  boolean approximateDistinctCount();
+  /** @see CalciteConnectionProperty#APPROXIMATE_TOP_N */
+  boolean approximateTopN();
   /** @see CalciteConnectionProperty#AUTO_TEMP */
   boolean autoTemp();
   /** @see CalciteConnectionProperty#MATERIALIZATIONS_ENABLED */

http://git-wip-us.apache.org/repos/asf/calcite/blob/517bf62e/core/src/main/java/org/apache/calcite/config/CalciteConnectionConfigImpl.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/calcite/config/CalciteConnectionConfigImpl.java b/core/src/main/java/org/apache/calcite/config/CalciteConnectionConfigImpl.java
index c169d9d..5527752 100644
--- a/core/src/main/java/org/apache/calcite/config/CalciteConnectionConfigImpl.java
+++ b/core/src/main/java/org/apache/calcite/config/CalciteConnectionConfigImpl.java
@@ -46,6 +46,16 @@ public class CalciteConnectionConfigImpl extends ConnectionConfigImpl
     return new CalciteConnectionConfigImpl(properties1);
   }
 
+  public boolean approximateDistinctCount() {
+    return CalciteConnectionProperty.APPROXIMATE_DISTINCT_COUNT.wrap(properties)
+        .getBoolean();
+  }
+
+  public boolean approximateTopN() {
+    return CalciteConnectionProperty.APPROXIMATE_TOP_N.wrap(properties)
+        .getBoolean();
+  }
+
   public boolean autoTemp() {
     return CalciteConnectionProperty.AUTO_TEMP.wrap(properties).getBoolean();
   }

http://git-wip-us.apache.org/repos/asf/calcite/blob/517bf62e/core/src/main/java/org/apache/calcite/config/CalciteConnectionProperty.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/calcite/config/CalciteConnectionProperty.java b/core/src/main/java/org/apache/calcite/config/CalciteConnectionProperty.java
index 20baf7f..c2e1027 100644
--- a/core/src/main/java/org/apache/calcite/config/CalciteConnectionProperty.java
+++ b/core/src/main/java/org/apache/calcite/config/CalciteConnectionProperty.java
@@ -33,6 +33,15 @@ import static org.apache.calcite.avatica.ConnectionConfigImpl.parse;
  * Properties that may be specified on the JDBC connect string.
  */
 public enum CalciteConnectionProperty implements ConnectionProperty {
+  /** Whether approximate results from {@code COUNT(DISTINCT ...)} aggregate
+   * functions are acceptable. */
+  APPROXIMATE_DISTINCT_COUNT("approximateDistinctCount", Type.BOOLEAN, false,
+      false),
+
+  /** Whether approximate results from "Top N" queries
+   * ({@code ORDER BY aggFun DESC LIMIT n}) are acceptable. */
+  APPROXIMATE_TOP_N("approximateTopN", Type.BOOLEAN, false, false),
+
   /** Whether to store query results in temporary tables. */
   AUTO_TEMP("autoTemp", Type.BOOLEAN, false, false),
 

http://git-wip-us.apache.org/repos/asf/calcite/blob/517bf62e/druid/src/main/java/org/apache/calcite/adapter/druid/DruidQuery.java
----------------------------------------------------------------------
diff --git a/druid/src/main/java/org/apache/calcite/adapter/druid/DruidQuery.java b/druid/src/main/java/org/apache/calcite/adapter/druid/DruidQuery.java
index 5049d6a..f5c034e 100644
--- a/druid/src/main/java/org/apache/calcite/adapter/druid/DruidQuery.java
+++ b/druid/src/main/java/org/apache/calcite/adapter/druid/DruidQuery.java
@@ -18,6 +18,7 @@ package org.apache.calcite.adapter.druid;
 
 import org.apache.calcite.DataContext;
 import org.apache.calcite.avatica.ColumnMetaData;
+import org.apache.calcite.config.CalciteConnectionConfig;
 import org.apache.calcite.config.CalciteConnectionProperty;
 import org.apache.calcite.interpreter.BindableRel;
 import org.apache.calcite.interpreter.Bindables;
@@ -414,6 +415,9 @@ public class DruidQuery extends AbstractRelNode implements BindableRel {
   protected QuerySpec getQuery(RelDataType rowType, RexNode filter, List<RexNode> projects,
       ImmutableBitSet groupSet, List<AggregateCall> aggCalls, List<String> aggNames,
       List<Integer> collationIndexes, List<Direction> collationDirections, Integer fetch) {
+    final CalciteConnectionConfig config =
+        getCluster().getPlanner().getContext()
+            .unwrap(CalciteConnectionConfig.class);
     QueryType queryType = QueryType.SELECT;
     final Translator translator = new Translator(druidTable, rowType);
     List<String> fieldNames = rowType.getFieldNames();
@@ -541,7 +545,8 @@ public class DruidQuery extends AbstractRelNode implements BindableRel {
           && granularity == Granularity.ALL
           && sortsMetric
           && collations.size() == 1
-          && fetch != null) {
+          && fetch != null
+          && config.approximateTopN()) {
         queryType = QueryType.TOP_N;
       } else {
         queryType = QueryType.GROUP_BY;

http://git-wip-us.apache.org/repos/asf/calcite/blob/517bf62e/druid/src/test/java/org/apache/calcite/test/DruidAdapterIT.java
----------------------------------------------------------------------
diff --git a/druid/src/test/java/org/apache/calcite/test/DruidAdapterIT.java b/druid/src/test/java/org/apache/calcite/test/DruidAdapterIT.java
index c7807a9..965f6ac 100644
--- a/druid/src/test/java/org/apache/calcite/test/DruidAdapterIT.java
+++ b/druid/src/test/java/org/apache/calcite/test/DruidAdapterIT.java
@@ -17,6 +17,8 @@
 package org.apache.calcite.test;
 
 import org.apache.calcite.adapter.druid.DruidQuery;
+import org.apache.calcite.config.CalciteConnectionConfig;
+import org.apache.calcite.config.CalciteConnectionProperty;
 import org.apache.calcite.util.Util;
 
 import com.google.common.base.Function;
@@ -459,21 +461,46 @@ public class DruidAdapterIT {
         .queryContains(druidChecker(druidQuery));
   }
 
+  /** Test case for
+   * <a href="https://issues.apache.org/jira/browse/CALCITE-1587">[CALCITE-1587]
+   * Druid adapter: topN returns approximate results</a>. */
   @Test public void testGroupBySingleSortLimit() {
+    checkGroupBySingleSortLimit(false);
+  }
+
+  /** As {@link #testGroupBySingleSortLimit}, but allowing approximate results
+   * due to {@link CalciteConnectionConfig#approximateDistinctCount()}.
+   * Therefore we send a "topN" query to Druid. */
+  @Test public void testGroupBySingleSortLimitApprox() {
+    checkGroupBySingleSortLimit(true);
+  }
+
+  private void checkGroupBySingleSortLimit(boolean approx) {
     final String sql = "select \"brand_name\", sum(\"unit_sales\") as s\n"
         + "from \"foodmart\"\n"
         + "group by \"brand_name\"\n"
         + "order by s desc limit 3";
-    final String druidQuery = "{'queryType':'topN','dataSource':'foodmart',"
+    final String approxDruid = "{'queryType':'topN','dataSource':'foodmart',"
         + "'granularity':'all','dimension':'brand_name','metric':'S',"
         + "'aggregations':[{'type':'longSum','name':'S','fieldName':'unit_sales'}],"
         + "'intervals':['1900-01-09T00:00:00.000Z/2992-01-10T00:00:00.000Z'],"
         + "'threshold':3}";
+    final String exactDruid = "{'queryType':'groupBy','dataSource':'foodmart',"
+        + "'granularity':'all','dimensions':['brand_name'],"
+        + "'limitSpec':{'type':'default','limit':3,"
+        + "'columns':[{'dimension':'S','direction':'descending'}]},"
+        + "'aggregations':[{'type':'longSum','name':'S','fieldName':'unit_sales'}],"
+        + "'intervals':['1900-01-09T00:00:00.000Z/2992-01-10T00:00:00.000Z']}";
+    final String druidQuery = approx ? approxDruid : exactDruid;
     final String explain = "PLAN=EnumerableInterpreter\n"
         + "  DruidQuery(table=[[foodmart, foodmart]], "
         + "intervals=[[1900-01-09T00:00:00.000Z/2992-01-10T00:00:00.000Z]], "
         + "groups=[{2}], aggs=[[SUM($89)]], sort0=[1], dir0=[DESC], fetch=[3])\n";
-    sql(sql)
+    CalciteAssert.that()
+        .enable(enabled())
+        .with(ImmutableMap.of("model", FOODMART.getPath()))
+        .with(CalciteConnectionProperty.APPROXIMATE_TOP_N.name(), approx)
+        .query(sql)
         .runs()
         .returnsOrdered("brand_name=Hermanos; S=8469",
             "brand_name=Tell Tale; S=7877",

http://git-wip-us.apache.org/repos/asf/calcite/blob/517bf62e/site/_docs/adapter.md
----------------------------------------------------------------------
diff --git a/site/_docs/adapter.md b/site/_docs/adapter.md
index 5f9a20d..d8462ae 100644
--- a/site/_docs/adapter.md
+++ b/site/_docs/adapter.md
@@ -69,6 +69,8 @@ as implemented by Avatica's
 
 | Property | Description |
 |:-------- |:------------|
+| <a href="{{ site.apiRoot }}/org/apache/calcite/config/CalciteConnectionProperty.html#APPROXIMATE_DISTINCT_COUNT">approximateDistinctCount</a> | Whether approximate results from `COUNT(DISTINCT ...)` aggregate functions are acceptable
+| <a href="{{ site.apiRoot }}/org/apache/calcite/config/CalciteConnectionProperty.html#APPROXIMATE_TOP_N">approximateTopN</a> | Whether approximate results from "Top N" queries * (`ORDER BY aggFun() DESC LIMIT n`) are acceptable
 | <a href="{{ site.apiRoot }}/org/apache/calcite/config/CalciteConnectionProperty.html#CASE_SENSITIVE">caseSensitive</a> | Whether identifiers are matched case-sensitively. If not specified, value from `lex` is used.
 | <a href="{{ site.apiRoot }}/org/apache/calcite/config/CalciteConnectionProperty.html#CONFORMANCE">conformance</a> | SQL conformance level. Values: DEFAULT (the default, similar to PRAGMATIC_2003), ORACLE_10, ORACLE_12, PRAGMATIC_99, PRAGMATIC_2003, STRICT_92, STRICT_99, STRICT_2003, SQL_SERVER_2008.
 | <a href="{{ site.apiRoot }}/org/apache/calcite/config/CalciteConnectionProperty.html#CREATE_MATERIALIZATIONS">createMaterializations</a> | Whether Calcite should create materializations. Default false.