You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@druid.apache.org by kf...@apache.org on 2022/01/24 08:30:37 UTC

[druid] branch master updated: Change value of `druid.sql.planner.useGroupingSetForExactDistinct` in common.runtime.properties (#12182)

This is an automated email from the ASF dual-hosted git repository.

kfaraz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git


The following commit(s) were added to refs/heads/master by this push:
     new dc1703d  Change value of `druid.sql.planner.useGroupingSetForExactDistinct` in common.runtime.properties (#12182)
dc1703d is described below

commit dc1703d5f9723634bae123e9a11a6899b2abf05a
Author: Laksh Singla <30...@users.noreply.github.com>
AuthorDate: Mon Jan 24 14:00:03 2022 +0530

    Change value of `druid.sql.planner.useGroupingSetForExactDistinct` in common.runtime.properties (#12182)
    
    This PR changes the value of the property `druid.sql.planner.useGroupingSetForExactDistinct` from `false` to `true` in the runtime.properties files, so that newer installations have this property as `true`, while the default still remains as `false`.
    
    The flag determines how queries which contain an aggregation over `DISTINCT` like `SELECT COUNT(DISTINCT foo.dim1) FILTER(WHERE foo.cnt = 1), SUM(foo.cnt) FROM druid.foo` get planned by Calcite. With the flag being set to false, it plans it via joins, whereas with it being set to true, the query is set using grouping sets.
    
    There is a known issue with Calcite (https://github.com/apache/druid/issues/7953), where an NPE is thrown while planning the above query with joins. There is no such issue while planning the query using grouping sets.
---
 .../cluster/_common/common.runtime.properties      |  3 +
 .../large/_common/common.runtime.properties        |  3 +
 .../medium/_common/common.runtime.properties       |  3 +
 .../_common/common.runtime.properties              |  3 +
 .../_common/common.runtime.properties              |  3 +
 .../small/_common/common.runtime.properties        |  3 +
 .../xlarge/_common/common.runtime.properties       |  3 +
 .../apache/druid/sql/calcite/CalciteQueryTest.java | 93 ++++++++++++++++++++++
 8 files changed, 114 insertions(+)

diff --git a/examples/conf/druid/cluster/_common/common.runtime.properties b/examples/conf/druid/cluster/_common/common.runtime.properties
index fbd790f..0fa2e40 100644
--- a/examples/conf/druid/cluster/_common/common.runtime.properties
+++ b/examples/conf/druid/cluster/_common/common.runtime.properties
@@ -139,6 +139,9 @@ druid.server.hiddenProperties=["druid.s3.accessKey","druid.s3.secretKey","druid.
 #
 druid.sql.enable=true
 
+# Planning SQL query when there is aggregate distinct in the statement
+druid.sql.planner.useGroupingSetForExactDistinct=true
+
 #
 # Lookups
 #
diff --git a/examples/conf/druid/single-server/large/_common/common.runtime.properties b/examples/conf/druid/single-server/large/_common/common.runtime.properties
index fbd790f..0fa2e40 100644
--- a/examples/conf/druid/single-server/large/_common/common.runtime.properties
+++ b/examples/conf/druid/single-server/large/_common/common.runtime.properties
@@ -139,6 +139,9 @@ druid.server.hiddenProperties=["druid.s3.accessKey","druid.s3.secretKey","druid.
 #
 druid.sql.enable=true
 
+# Planning SQL query when there is aggregate distinct in the statement
+druid.sql.planner.useGroupingSetForExactDistinct=true
+
 #
 # Lookups
 #
diff --git a/examples/conf/druid/single-server/medium/_common/common.runtime.properties b/examples/conf/druid/single-server/medium/_common/common.runtime.properties
index fbd790f..0fa2e40 100644
--- a/examples/conf/druid/single-server/medium/_common/common.runtime.properties
+++ b/examples/conf/druid/single-server/medium/_common/common.runtime.properties
@@ -139,6 +139,9 @@ druid.server.hiddenProperties=["druid.s3.accessKey","druid.s3.secretKey","druid.
 #
 druid.sql.enable=true
 
+# Planning SQL query when there is aggregate distinct in the statement
+druid.sql.planner.useGroupingSetForExactDistinct=true
+
 #
 # Lookups
 #
diff --git a/examples/conf/druid/single-server/micro-quickstart/_common/common.runtime.properties b/examples/conf/druid/single-server/micro-quickstart/_common/common.runtime.properties
index fbd790f..0fa2e40 100644
--- a/examples/conf/druid/single-server/micro-quickstart/_common/common.runtime.properties
+++ b/examples/conf/druid/single-server/micro-quickstart/_common/common.runtime.properties
@@ -139,6 +139,9 @@ druid.server.hiddenProperties=["druid.s3.accessKey","druid.s3.secretKey","druid.
 #
 druid.sql.enable=true
 
+# Planning SQL query when there is aggregate distinct in the statement
+druid.sql.planner.useGroupingSetForExactDistinct=true
+
 #
 # Lookups
 #
diff --git a/examples/conf/druid/single-server/nano-quickstart/_common/common.runtime.properties b/examples/conf/druid/single-server/nano-quickstart/_common/common.runtime.properties
index fbd790f..0fa2e40 100644
--- a/examples/conf/druid/single-server/nano-quickstart/_common/common.runtime.properties
+++ b/examples/conf/druid/single-server/nano-quickstart/_common/common.runtime.properties
@@ -139,6 +139,9 @@ druid.server.hiddenProperties=["druid.s3.accessKey","druid.s3.secretKey","druid.
 #
 druid.sql.enable=true
 
+# Planning SQL query when there is aggregate distinct in the statement
+druid.sql.planner.useGroupingSetForExactDistinct=true
+
 #
 # Lookups
 #
diff --git a/examples/conf/druid/single-server/small/_common/common.runtime.properties b/examples/conf/druid/single-server/small/_common/common.runtime.properties
index fbd790f..0fa2e40 100644
--- a/examples/conf/druid/single-server/small/_common/common.runtime.properties
+++ b/examples/conf/druid/single-server/small/_common/common.runtime.properties
@@ -139,6 +139,9 @@ druid.server.hiddenProperties=["druid.s3.accessKey","druid.s3.secretKey","druid.
 #
 druid.sql.enable=true
 
+# Planning SQL query when there is aggregate distinct in the statement
+druid.sql.planner.useGroupingSetForExactDistinct=true
+
 #
 # Lookups
 #
diff --git a/examples/conf/druid/single-server/xlarge/_common/common.runtime.properties b/examples/conf/druid/single-server/xlarge/_common/common.runtime.properties
index fbd790f..0fa2e40 100644
--- a/examples/conf/druid/single-server/xlarge/_common/common.runtime.properties
+++ b/examples/conf/druid/single-server/xlarge/_common/common.runtime.properties
@@ -139,6 +139,9 @@ druid.server.hiddenProperties=["druid.s3.accessKey","druid.s3.secretKey","druid.
 #
 druid.sql.enable=true
 
+# Planning SQL query when there is aggregate distinct in the statement
+druid.sql.planner.useGroupingSetForExactDistinct=true
+
 #
 # Lookups
 #
diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java
index 3c77792..4c63b21 100644
--- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java
+++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java
@@ -2144,6 +2144,99 @@ public class CalciteQueryTest extends BaseCalciteQueryTest
   }
 
   @Test
+  public void testExactCountDistinctWithFilter() throws Exception
+  {
+
+    final String sqlQuery = "SELECT COUNT(DISTINCT foo.dim1) FILTER(WHERE foo.cnt = 1), SUM(foo.cnt) FROM druid.foo";
+    // When useApproximateCountDistinct=true and useGroupingSetForExactDistinct=false, planning fails due
+    // to a bug in the Calcite's rule (AggregateExpandDistinctAggregatesRule)
+    try {
+      testQuery(
+          PLANNER_CONFIG_NO_HLL.withOverrides(ImmutableMap.of(
+              PlannerConfig.CTX_KEY_USE_GROUPING_SET_FOR_EXACT_DISTINCT,
+              "false"
+          )), // Enable exact count distinct
+          sqlQuery,
+          CalciteTests.REGULAR_USER_AUTH_RESULT,
+          ImmutableList.of(),
+          ImmutableList.of()
+      );
+      Assert.fail("query execution should fail");
+    }
+    catch (RuntimeException e) {
+      Assert.assertTrue(e.getMessage().contains("Error while applying rule AggregateExpandDistinctAggregatesRule"));
+    }
+
+    requireMergeBuffers(3);
+    testQuery(
+        PLANNER_CONFIG_NO_HLL.withOverrides(ImmutableMap.of(
+            PlannerConfig.CTX_KEY_USE_GROUPING_SET_FOR_EXACT_DISTINCT,
+            "true"
+        )),
+        sqlQuery,
+        CalciteTests.REGULAR_USER_AUTH_RESULT,
+        ImmutableList.of(
+            GroupByQuery.builder()
+                        .setDataSource(
+                            new QueryDataSource(
+                                GroupByQuery.builder()
+                                            .setDataSource(CalciteTests.DATASOURCE1)
+                                            .setInterval(querySegmentSpec(Filtration.eternity()))
+                                            .setGranularity(Granularities.ALL)
+                                            .setVirtualColumns(expressionVirtualColumn(
+                                                "v0",
+                                                NullHandling.replaceWithDefault()
+                                                ? "(\"cnt\" == 1)"
+                                                : "((\"cnt\" == 1) > 0)",
+                                                ColumnType.LONG
+                                            ))
+                                            .setDimensions(dimensions(
+                                                new DefaultDimensionSpec("dim1", "d0", ColumnType.STRING),
+                                                new DefaultDimensionSpec("v0", "d1", ColumnType.LONG)
+                                            ))
+                                            .setAggregatorSpecs(
+                                                aggregators(
+                                                    new LongSumAggregatorFactory("a0", "cnt"),
+                                                    new GroupingAggregatorFactory(
+                                                        "a1",
+                                                        Arrays.asList("dim1", "v0")
+                                                    )
+                                                )
+                                            )
+                                            .setSubtotalsSpec(
+                                                ImmutableList.of(
+                                                    ImmutableList.of("d0", "d1"),
+                                                    ImmutableList.of()
+                                                )
+                                            )
+                                            .build()
+                            )
+                        )
+                        .setInterval(querySegmentSpec(Filtration.eternity()))
+                        .setGranularity(Granularities.ALL)
+                        .setAggregatorSpecs(aggregators(
+                            new FilteredAggregatorFactory(
+                                new CountAggregatorFactory("_a0"),
+                                and(
+                                    not(selector("d0", null, null)),
+                                    selector("a1", "0", null)
+                                )
+                            ),
+                            new FilteredAggregatorFactory(
+                                new LongMinAggregatorFactory("_a1", "a0"),
+                                selector("a1", "3", null)
+                            )
+                        ))
+                        .setContext(QUERY_CONTEXT_DEFAULT)
+                        .build()
+        ),
+        ImmutableList.of(
+            new Object[]{NullHandling.replaceWithDefault() ? 5L : 6L, 6L}
+        )
+    );
+  }
+
+  @Test
   public void testHavingOnFloatSum() throws Exception
   {
     testQuery(

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@druid.apache.org
For additional commands, e-mail: commits-help@druid.apache.org