You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by ro...@apache.org on 2023/07/25 00:13:51 UTC
[pinot] branch master updated: [multistage] Register theta sketch aggregation functions in v2 query engine (#11143)
This is an automated email from the ASF dual-hosted git repository.
rongr pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push:
new ff6d527fbf [multistage] Register theta sketch aggregation functions in v2 query engine (#11143)
ff6d527fbf is described below
commit ff6d527fbfe71cfa1afff601627b796ea59b168a
Author: Xiang Fu <xi...@gmail.com>
AuthorDate: Mon Jul 24 17:13:45 2023 -0700
[multistage] Register theta sketch aggregation functions in v2 query engine (#11143)
* Support distinctCountThetaSketch in v2
* VARIADIC variance of the ThetaSketch functions are not supported
---
.../tests/ThetaSketchIntegrationTest.java | 139 ++++++++++++++++++++-
.../pinot/segment/spi/AggregationFunctionType.java | 10 +-
2 files changed, 144 insertions(+), 5 deletions(-)
diff --git a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/ThetaSketchIntegrationTest.java b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/ThetaSketchIntegrationTest.java
index e032427513..80e5ad176b 100644
--- a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/ThetaSketchIntegrationTest.java
+++ b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/ThetaSketchIntegrationTest.java
@@ -107,9 +107,10 @@ public class ThetaSketchIntegrationTest extends BaseClusterIntegrationTest {
return 10;
}
- @Test
- public void testThetaSketchQuery()
+ @Test(dataProvider = "useV1QueryEngine")
+ public void testThetaSketchQueryV1(boolean useMultiStageQueryEngine)
throws Exception {
+ setUseMultiStageQueryEngine(useMultiStageQueryEngine);
/*
Original data:
@@ -206,7 +207,7 @@ public class ThetaSketchIntegrationTest extends BaseClusterIntegrationTest {
runAndAssert(query, expected);
}
- // gender = female DIFF course = history
+ // gender = female DIFF course = history
{
String query = "select distinctCountThetaSketch(thetaSketchCol, '', "
+ "'dimName = ''gender'' and dimValue = ''Female''', 'dimName = ''course'' and dimValue = ''History''', "
@@ -230,6 +231,138 @@ public class ThetaSketchIntegrationTest extends BaseClusterIntegrationTest {
}
}
+ @Test(dataProvider = "useV2QueryEngine")
+ public void testThetaSketchQueryV2(boolean useMultiStageQueryEngine)
+ throws Exception {
+ setUseMultiStageQueryEngine(useMultiStageQueryEngine);
+ /*
+ Original data:
+
+ Gender Course Shard#1 Shard#2
+ -------- ------- ------- -------
+ Female Math 50 110
+ Female History 60 120
+ Female Biology 70 130
+ Male Math 80 140
+ Male History 90 150
+ Male Biology 100 160
+ */
+
+ // gender = female
+ {
+ String query = "select distinctCountThetaSketch(thetaSketchCol) from " + DEFAULT_TABLE_NAME
+ + " where dimName = 'gender' and dimValue = 'Female'";
+ int expected = 50 + 60 + 70 + 110 + 120 + 130;
+ runAndAssert(query, expected);
+
+ /*
+ query = "select distinctCountThetaSketch(thetaSketchCol, '', 'dimName = ''gender'' and dimValue = ''Female''', "
+ + "'$1') from " + DEFAULT_TABLE_NAME;
+ runAndAssert(query, expected);
+
+ query = "select distinctCountThetaSketch(thetaSketchCol, '', "
+ + "'dimName = ''gender''', 'dimValue = ''Female''', 'SET_INTERSECT($1, $2)') from " + DEFAULT_TABLE_NAME;
+ runAndAssert(query, expected);
+ */
+ }
+
+ // gender = male
+ {
+ String query = "select distinctCountThetaSketch(thetaSketchCol) from " + DEFAULT_TABLE_NAME
+ + " where dimName = 'gender' and dimValue = 'Male'";
+ int expected = 80 + 90 + 100 + 140 + 150 + 160;
+ runAndAssert(query, expected);
+
+ /*
+ query =
+ "select distinctCountThetaSketch(thetaSketchCol, '', 'dimName = ''gender'' and dimValue = ''Male''', '$1') "
+ + "from " + DEFAULT_TABLE_NAME;
+ runAndAssert(query, expected);
+
+ query = "select distinctCountThetaSketch(thetaSketchCol, '', "
+ + "'dimName = ''gender''', 'dimValue = ''Male''', 'SET_INTERSECT($1, $2)') from " + DEFAULT_TABLE_NAME;
+ runAndAssert(query, expected);
+ */
+ }
+
+ // course = math
+ {
+ String query = "select distinctCountThetaSketch(thetaSketchCol) from " + DEFAULT_TABLE_NAME
+ + " where dimName = 'course' AND dimValue = 'Math'";
+ int expected = 50 + 80 + 110 + 140;
+ runAndAssert(query, expected);
+
+ /*
+ query =
+ "select distinctCountThetaSketch(thetaSketchCol, '', 'dimName = ''course'' and dimValue = ''Math''', '$1') "
+ + "from " + DEFAULT_TABLE_NAME;
+ runAndAssert(query, expected);
+
+ query = "select distinctCountThetaSketch(thetaSketchCol, '', "
+ + "'dimName = ''course''', 'dimValue = ''Math''', 'SET_INTERSECT($1, $2)') from " + DEFAULT_TABLE_NAME;
+ runAndAssert(query, expected);
+ */
+ }
+
+ /*
+ // gender = female INTERSECT course = math
+ {
+ String query = "select distinctCountThetaSketch(thetaSketchCol, '', "
+ + "'dimName = ''gender'' and dimValue = ''Female''', 'dimName = ''course'' and dimValue = ''Math''', "
+ + "'SET_INTERSECT($1, $2)') from " + DEFAULT_TABLE_NAME;
+ int expected = 50 + 110;
+ runAndAssert(query, expected);
+
+ query = "select distinctCountThetaSketch(thetaSketchCol, '', "
+ + "'dimName = ''gender''', 'dimValue = ''Female''', 'dimName = ''course''', 'dimValue = ''Math''', "
+ + "'SET_INTERSECT($1, $2, $3, $4)') from " + DEFAULT_TABLE_NAME;
+ runAndAssert(query, expected);
+
+ query = "select distinctCountThetaSketch(thetaSketchCol, '', "
+ + "'dimName = ''gender''', 'dimValue = ''Female''', 'dimName = ''course''', 'dimValue = ''Math''', "
+ + "'SET_INTERSECT(SET_INTERSECT($1, $2), SET_INTERSECT($3, $4))') from " + DEFAULT_TABLE_NAME;
+ runAndAssert(query, expected);
+ }
+
+ // gender = male UNION course = biology
+ {
+ String query = "select distinctCountThetaSketch(thetaSketchCol, '', "
+ + "'dimName = ''gender'' and dimValue = ''Male''', 'dimName = ''course'' and dimValue = ''Biology''', "
+ + "'SET_UNION($1, $2)') from " + DEFAULT_TABLE_NAME;
+ int expected = 70 + 80 + 90 + 100 + 130 + 140 + 150 + 160;
+ runAndAssert(query, expected);
+
+ query = "select distinctCountThetaSketch(thetaSketchCol, '', "
+ + "'dimName = ''gender''', 'dimValue = ''Male''', 'dimName = ''course''', 'dimValue = ''Biology''', "
+ + "'SET_UNION(SET_INTERSECT($1, $2), SET_INTERSECT($3, $4))') from " + DEFAULT_TABLE_NAME;
+ runAndAssert(query, expected);
+ }
+
+ // gender = female DIFF course = history
+ {
+ String query = "select distinctCountThetaSketch(thetaSketchCol, '', "
+ + "'dimName = ''gender'' and dimValue = ''Female''', 'dimName = ''course'' and dimValue = ''History''', "
+ + "'SET_DIFF($1, $2)') from " + DEFAULT_TABLE_NAME;
+ int expected = 50 + 110 + 70 + 130;
+ runAndAssert(query, expected);
+
+ query = "select distinctCountThetaSketch(thetaSketchCol, '', "
+ + "'dimName = ''gender''', 'dimValue = ''Female''', 'dimName = ''course''', 'dimValue = ''History''', "
+ + "'SET_DIFF(SET_INTERSECT($1, $2), SET_INTERSECT($3, $4))') from " + DEFAULT_TABLE_NAME;
+ runAndAssert(query, expected);
+ }
+ */
+
+ // group by gender
+ {
+ String query = "select dimValue, distinctCountThetaSketch(thetaSketchCol) from " + DEFAULT_TABLE_NAME
+ + " where dimName = 'gender' group by dimValue";
+ ImmutableMap<String, Integer> expected =
+ ImmutableMap.of("Female", 50 + 60 + 70 + 110 + 120 + 130, "Male", 80 + 90 + 100 + 140 + 150 + 160);
+ runAndAssert(query, expected);
+ }
+ }
+
private void runAndAssert(String query, int expected)
throws Exception {
JsonNode jsonNode = postQuery(query);
diff --git a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/AggregationFunctionType.java b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/AggregationFunctionType.java
index dc9137049c..e603f04f9f 100644
--- a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/AggregationFunctionType.java
+++ b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/AggregationFunctionType.java
@@ -88,8 +88,14 @@ public enum AggregationFunctionType {
DISTINCTCOUNTRAWHLL("distinctCountRawHLL"),
DISTINCTCOUNTSMARTHLL("distinctCountSmartHLL"),
FASTHLL("fastHLL"),
- DISTINCTCOUNTTHETASKETCH("distinctCountThetaSketch"),
- DISTINCTCOUNTRAWTHETASKETCH("distinctCountRawThetaSketch"),
+ DISTINCTCOUNTTHETASKETCH("distinctCountThetaSketch", ImmutableList.of("DISTINCT_COUNT_THETA_SKETCH"),
+ SqlKind.OTHER_FUNCTION, SqlFunctionCategory.USER_DEFINED_FUNCTION,
+ OperandTypes.family(ImmutableList.of(SqlTypeFamily.ANY, SqlTypeFamily.CHARACTER), ordinal -> ordinal > 0),
+ ReturnTypes.BIGINT, ReturnTypes.explicit(SqlTypeName.OTHER)),
+ DISTINCTCOUNTRAWTHETASKETCH("distinctCountRawThetaSketch", ImmutableList.of("DISTINCT_COUNT_RAW_THETA_SKETCH"),
+ SqlKind.OTHER_FUNCTION, SqlFunctionCategory.USER_DEFINED_FUNCTION,
+ OperandTypes.family(ImmutableList.of(SqlTypeFamily.ANY, SqlTypeFamily.CHARACTER), ordinal -> ordinal > 0),
+ ReturnTypes.VARCHAR_2000, ReturnTypes.explicit(SqlTypeName.OTHER)),
DISTINCTSUM("distinctSum"),
DISTINCTAVG("distinctAvg"),
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org