You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by ro...@apache.org on 2023/07/25 00:13:51 UTC

[pinot] branch master updated: [multistage] Register theta sketch aggregation functions in v2 query engine (#11143)

This is an automated email from the ASF dual-hosted git repository.

rongr pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new ff6d527fbf [multistage] Register theta sketch aggregation functions in v2 query engine (#11143)
ff6d527fbf is described below

commit ff6d527fbfe71cfa1afff601627b796ea59b168a
Author: Xiang Fu <xi...@gmail.com>
AuthorDate: Mon Jul 24 17:13:45 2023 -0700

    [multistage] Register theta sketch aggregation functions in v2 query engine (#11143)
    
    * Support distinctCountThetaSketch in v2
    * VARIADIC variance of the ThetaSketch functions are not supported
---
 .../tests/ThetaSketchIntegrationTest.java          | 139 ++++++++++++++++++++-
 .../pinot/segment/spi/AggregationFunctionType.java |  10 +-
 2 files changed, 144 insertions(+), 5 deletions(-)

diff --git a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/ThetaSketchIntegrationTest.java b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/ThetaSketchIntegrationTest.java
index e032427513..80e5ad176b 100644
--- a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/ThetaSketchIntegrationTest.java
+++ b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/ThetaSketchIntegrationTest.java
@@ -107,9 +107,10 @@ public class ThetaSketchIntegrationTest extends BaseClusterIntegrationTest {
     return 10;
   }
 
-  @Test
-  public void testThetaSketchQuery()
+  @Test(dataProvider = "useV1QueryEngine")
+  public void testThetaSketchQueryV1(boolean useMultiStageQueryEngine)
       throws Exception {
+    setUseMultiStageQueryEngine(useMultiStageQueryEngine);
     /*
     Original data:
 
@@ -206,7 +207,7 @@ public class ThetaSketchIntegrationTest extends BaseClusterIntegrationTest {
       runAndAssert(query, expected);
     }
 
-     // gender = female DIFF course = history
+    // gender = female DIFF course = history
     {
       String query = "select distinctCountThetaSketch(thetaSketchCol, '', "
           + "'dimName = ''gender'' and dimValue = ''Female''', 'dimName = ''course'' and dimValue = ''History''', "
@@ -230,6 +231,138 @@ public class ThetaSketchIntegrationTest extends BaseClusterIntegrationTest {
     }
   }
 
+  @Test(dataProvider = "useV2QueryEngine")
+  public void testThetaSketchQueryV2(boolean useMultiStageQueryEngine)
+      throws Exception {
+    setUseMultiStageQueryEngine(useMultiStageQueryEngine);
+    /*
+    Original data:
+
+    Gender    Course   Shard#1  Shard#2
+    --------  -------  -------  -------
+    Female    Math     50       110
+    Female    History  60       120
+    Female    Biology  70       130
+    Male      Math     80       140
+    Male      History  90       150
+    Male      Biology  100      160
+     */
+
+    // gender = female
+    {
+      String query = "select distinctCountThetaSketch(thetaSketchCol) from " + DEFAULT_TABLE_NAME
+          + " where dimName = 'gender' and dimValue = 'Female'";
+      int expected = 50 + 60 + 70 + 110 + 120 + 130;
+      runAndAssert(query, expected);
+
+      /*
+      query = "select distinctCountThetaSketch(thetaSketchCol, '', 'dimName = ''gender'' and dimValue = ''Female''', "
+          + "'$1') from " + DEFAULT_TABLE_NAME;
+      runAndAssert(query, expected);
+
+      query = "select distinctCountThetaSketch(thetaSketchCol, '', "
+          + "'dimName = ''gender''', 'dimValue = ''Female''', 'SET_INTERSECT($1, $2)') from " + DEFAULT_TABLE_NAME;
+      runAndAssert(query, expected);
+       */
+    }
+
+    // gender = male
+    {
+      String query = "select distinctCountThetaSketch(thetaSketchCol) from " + DEFAULT_TABLE_NAME
+          + " where dimName = 'gender' and dimValue = 'Male'";
+      int expected = 80 + 90 + 100 + 140 + 150 + 160;
+      runAndAssert(query, expected);
+
+      /*
+      query =
+          "select distinctCountThetaSketch(thetaSketchCol, '', 'dimName = ''gender'' and dimValue = ''Male''', '$1') "
+              + "from " + DEFAULT_TABLE_NAME;
+      runAndAssert(query, expected);
+
+      query = "select distinctCountThetaSketch(thetaSketchCol, '', "
+          + "'dimName = ''gender''', 'dimValue = ''Male''', 'SET_INTERSECT($1, $2)') from " + DEFAULT_TABLE_NAME;
+      runAndAssert(query, expected);
+       */
+    }
+
+    // course = math
+    {
+      String query = "select distinctCountThetaSketch(thetaSketchCol) from " + DEFAULT_TABLE_NAME
+          + " where dimName = 'course' AND dimValue = 'Math'";
+      int expected = 50 + 80 + 110 + 140;
+      runAndAssert(query, expected);
+
+      /*
+      query =
+          "select distinctCountThetaSketch(thetaSketchCol, '', 'dimName = ''course'' and dimValue = ''Math''', '$1') "
+              + "from " + DEFAULT_TABLE_NAME;
+      runAndAssert(query, expected);
+
+      query = "select distinctCountThetaSketch(thetaSketchCol, '', "
+          + "'dimName = ''course''', 'dimValue = ''Math''', 'SET_INTERSECT($1, $2)') from " + DEFAULT_TABLE_NAME;
+      runAndAssert(query, expected);
+       */
+    }
+
+    /*
+    // gender = female INTERSECT course = math
+    {
+      String query = "select distinctCountThetaSketch(thetaSketchCol, '', "
+          + "'dimName = ''gender'' and dimValue = ''Female''', 'dimName = ''course'' and dimValue = ''Math''', "
+          + "'SET_INTERSECT($1, $2)') from " + DEFAULT_TABLE_NAME;
+      int expected = 50 + 110;
+      runAndAssert(query, expected);
+
+      query = "select distinctCountThetaSketch(thetaSketchCol, '', "
+          + "'dimName = ''gender''', 'dimValue = ''Female''', 'dimName = ''course''', 'dimValue = ''Math''', "
+          + "'SET_INTERSECT($1, $2, $3, $4)') from " + DEFAULT_TABLE_NAME;
+      runAndAssert(query, expected);
+
+      query = "select distinctCountThetaSketch(thetaSketchCol, '', "
+          + "'dimName = ''gender''', 'dimValue = ''Female''', 'dimName = ''course''', 'dimValue = ''Math''', "
+          + "'SET_INTERSECT(SET_INTERSECT($1, $2), SET_INTERSECT($3, $4))') from " + DEFAULT_TABLE_NAME;
+      runAndAssert(query, expected);
+    }
+
+    // gender = male UNION course = biology
+    {
+      String query = "select distinctCountThetaSketch(thetaSketchCol, '', "
+          + "'dimName = ''gender'' and dimValue = ''Male''', 'dimName = ''course'' and dimValue = ''Biology''', "
+          + "'SET_UNION($1, $2)') from " + DEFAULT_TABLE_NAME;
+      int expected = 70 + 80 + 90 + 100 + 130 + 140 + 150 + 160;
+      runAndAssert(query, expected);
+
+      query = "select distinctCountThetaSketch(thetaSketchCol, '', "
+          + "'dimName = ''gender''', 'dimValue = ''Male''', 'dimName = ''course''', 'dimValue = ''Biology''', "
+          + "'SET_UNION(SET_INTERSECT($1, $2), SET_INTERSECT($3, $4))') from " + DEFAULT_TABLE_NAME;
+      runAndAssert(query, expected);
+    }
+
+    // gender = female DIFF course = history
+    {
+      String query = "select distinctCountThetaSketch(thetaSketchCol, '', "
+          + "'dimName = ''gender'' and dimValue = ''Female''', 'dimName = ''course'' and dimValue = ''History''', "
+          + "'SET_DIFF($1, $2)') from " + DEFAULT_TABLE_NAME;
+      int expected = 50 + 110 + 70 + 130;
+      runAndAssert(query, expected);
+
+      query = "select distinctCountThetaSketch(thetaSketchCol, '', "
+          + "'dimName = ''gender''', 'dimValue = ''Female''', 'dimName = ''course''', 'dimValue = ''History''', "
+          + "'SET_DIFF(SET_INTERSECT($1, $2), SET_INTERSECT($3, $4))') from " + DEFAULT_TABLE_NAME;
+      runAndAssert(query, expected);
+    }
+     */
+
+    // group by gender
+    {
+      String query = "select dimValue, distinctCountThetaSketch(thetaSketchCol) from " + DEFAULT_TABLE_NAME
+          + " where dimName = 'gender' group by dimValue";
+      ImmutableMap<String, Integer> expected =
+          ImmutableMap.of("Female", 50 + 60 + 70 + 110 + 120 + 130, "Male", 80 + 90 + 100 + 140 + 150 + 160);
+      runAndAssert(query, expected);
+    }
+  }
+
   private void runAndAssert(String query, int expected)
       throws Exception {
     JsonNode jsonNode = postQuery(query);
diff --git a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/AggregationFunctionType.java b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/AggregationFunctionType.java
index dc9137049c..e603f04f9f 100644
--- a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/AggregationFunctionType.java
+++ b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/AggregationFunctionType.java
@@ -88,8 +88,14 @@ public enum AggregationFunctionType {
   DISTINCTCOUNTRAWHLL("distinctCountRawHLL"),
   DISTINCTCOUNTSMARTHLL("distinctCountSmartHLL"),
   FASTHLL("fastHLL"),
-  DISTINCTCOUNTTHETASKETCH("distinctCountThetaSketch"),
-  DISTINCTCOUNTRAWTHETASKETCH("distinctCountRawThetaSketch"),
+  DISTINCTCOUNTTHETASKETCH("distinctCountThetaSketch", ImmutableList.of("DISTINCT_COUNT_THETA_SKETCH"),
+      SqlKind.OTHER_FUNCTION, SqlFunctionCategory.USER_DEFINED_FUNCTION,
+      OperandTypes.family(ImmutableList.of(SqlTypeFamily.ANY, SqlTypeFamily.CHARACTER), ordinal -> ordinal > 0),
+      ReturnTypes.BIGINT, ReturnTypes.explicit(SqlTypeName.OTHER)),
+  DISTINCTCOUNTRAWTHETASKETCH("distinctCountRawThetaSketch", ImmutableList.of("DISTINCT_COUNT_RAW_THETA_SKETCH"),
+      SqlKind.OTHER_FUNCTION, SqlFunctionCategory.USER_DEFINED_FUNCTION,
+      OperandTypes.family(ImmutableList.of(SqlTypeFamily.ANY, SqlTypeFamily.CHARACTER), ordinal -> ordinal > 0),
+      ReturnTypes.VARCHAR_2000, ReturnTypes.explicit(SqlTypeName.OTHER)),
   DISTINCTSUM("distinctSum"),
   DISTINCTAVG("distinctAvg"),
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org