You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by ro...@apache.org on 2023/07/29 18:16:59 UTC
[pinot] branch master updated: [multistage] enable theta sketch integration tests for v2 (#11214)
This is an automated email from the ASF dual-hosted git repository.
rongr pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push:
new 834c9707e8 [multistage] enable theta sketch integration tests for v2 (#11214)
834c9707e8 is described below
commit 834c9707e81dc6b40660f6eb0737f5ca3293a2e2
Author: Rong Rong <ro...@apache.org>
AuthorDate: Sat Jul 29 11:16:53 2023 -0700
[multistage] enable theta sketch integration tests for v2 (#11214)
enable theta sketch test for v2 after #11144 and #11153
Co-authored-by: Rong Rong <ro...@startree.ai>
---
.../tests/ThetaSketchIntegrationTest.java | 108 +++++++++++++--------
1 file changed, 65 insertions(+), 43 deletions(-)
diff --git a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/ThetaSketchIntegrationTest.java b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/ThetaSketchIntegrationTest.java
index 80e5ad176b..df44244166 100644
--- a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/ThetaSketchIntegrationTest.java
+++ b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/ThetaSketchIntegrationTest.java
@@ -255,15 +255,15 @@ public class ThetaSketchIntegrationTest extends BaseClusterIntegrationTest {
int expected = 50 + 60 + 70 + 110 + 120 + 130;
runAndAssert(query, expected);
- /*
- query = "select distinctCountThetaSketch(thetaSketchCol, '', 'dimName = ''gender'' and dimValue = ''Female''', "
- + "'$1') from " + DEFAULT_TABLE_NAME;
+ query = "select getThetaSketchEstimate(distinctCountRAWThetaSketch(thetaSketchCol)"
+ + " FILTER (WHERE dimName = 'gender' and dimValue = 'Female')) from " + DEFAULT_TABLE_NAME;
runAndAssert(query, expected);
- query = "select distinctCountThetaSketch(thetaSketchCol, '', "
- + "'dimName = ''gender''', 'dimValue = ''Female''', 'SET_INTERSECT($1, $2)') from " + DEFAULT_TABLE_NAME;
+ query = "select GET_THETA_SKETCH_ESTIMATE(THETA_SKETCH_INTERSECT( "
+ + " DISTINCT_COUNT_RAW_THETA_SKETCH(thetaSketchCol, '') FILTER (WHERE dimName = 'gender'),"
+ + " DISTINCT_COUNT_RAW_THETA_SKETCH(thetaSketchCol, '') FILTER (WHERE dimValue = 'Female'))) "
+ + " FROM " + DEFAULT_TABLE_NAME;
runAndAssert(query, expected);
- */
}
// gender = male
@@ -273,16 +273,15 @@ public class ThetaSketchIntegrationTest extends BaseClusterIntegrationTest {
int expected = 80 + 90 + 100 + 140 + 150 + 160;
runAndAssert(query, expected);
- /*
- query =
- "select distinctCountThetaSketch(thetaSketchCol, '', 'dimName = ''gender'' and dimValue = ''Male''', '$1') "
- + "from " + DEFAULT_TABLE_NAME;
+ query = "select getThetaSketchEstimate(distinctCountRAWThetaSketch(thetaSketchCol)"
+ + " FILTER (WHERE dimName = 'gender' and dimValue = 'Male')) from " + DEFAULT_TABLE_NAME;
runAndAssert(query, expected);
- query = "select distinctCountThetaSketch(thetaSketchCol, '', "
- + "'dimName = ''gender''', 'dimValue = ''Male''', 'SET_INTERSECT($1, $2)') from " + DEFAULT_TABLE_NAME;
+ query = "select GET_THETA_SKETCH_ESTIMATE(THETA_SKETCH_INTERSECT( "
+ + " DISTINCT_COUNT_RAW_THETA_SKETCH(thetaSketchCol, '') FILTER (WHERE dimName = 'gender'),"
+ + " DISTINCT_COUNT_RAW_THETA_SKETCH(thetaSketchCol, '') FILTER (WHERE dimValue = 'Male'))) "
+ + " FROM " + DEFAULT_TABLE_NAME;
runAndAssert(query, expected);
- */
}
// course = math
@@ -292,66 +291,89 @@ public class ThetaSketchIntegrationTest extends BaseClusterIntegrationTest {
int expected = 50 + 80 + 110 + 140;
runAndAssert(query, expected);
- /*
- query =
- "select distinctCountThetaSketch(thetaSketchCol, '', 'dimName = ''course'' and dimValue = ''Math''', '$1') "
- + "from " + DEFAULT_TABLE_NAME;
+ query = "select getThetaSketchEstimate(distinctCountRAWThetaSketch(thetaSketchCol)"
+ + " FILTER (WHERE dimName = 'course' and dimValue = 'Math')) from " + DEFAULT_TABLE_NAME;
runAndAssert(query, expected);
- query = "select distinctCountThetaSketch(thetaSketchCol, '', "
- + "'dimName = ''course''', 'dimValue = ''Math''', 'SET_INTERSECT($1, $2)') from " + DEFAULT_TABLE_NAME;
+ query = "select GET_THETA_SKETCH_ESTIMATE(THETA_SKETCH_INTERSECT( "
+ + " DISTINCT_COUNT_RAW_THETA_SKETCH(thetaSketchCol, '') FILTER (WHERE dimName = 'course'),"
+ + " DISTINCT_COUNT_RAW_THETA_SKETCH(thetaSketchCol, '') FILTER (WHERE dimValue = 'Math'))) "
+ + " FROM " + DEFAULT_TABLE_NAME;
runAndAssert(query, expected);
- */
}
- /*
// gender = female INTERSECT course = math
{
- String query = "select distinctCountThetaSketch(thetaSketchCol, '', "
- + "'dimName = ''gender'' and dimValue = ''Female''', 'dimName = ''course'' and dimValue = ''Math''', "
- + "'SET_INTERSECT($1, $2)') from " + DEFAULT_TABLE_NAME;
+ String query = "select GET_THETA_SKETCH_ESTIMATE(THETA_SKETCH_INTERSECT( "
+ + " DISTINCT_COUNT_RAW_THETA_SKETCH(thetaSketchCol, '') FILTER ("
+ + " WHERE dimName = 'gender' and dimValue = 'Female'), "
+ + " DISTINCT_COUNT_RAW_THETA_SKETCH(thetaSketchCol, '') FILTER ("
+ + " WHERE dimName = 'course' and dimValue = 'Math'))) "
+ + " FROM " + DEFAULT_TABLE_NAME;
int expected = 50 + 110;
runAndAssert(query, expected);
- query = "select distinctCountThetaSketch(thetaSketchCol, '', "
- + "'dimName = ''gender''', 'dimValue = ''Female''', 'dimName = ''course''', 'dimValue = ''Math''', "
- + "'SET_INTERSECT($1, $2, $3, $4)') from " + DEFAULT_TABLE_NAME;
+ query = "select GET_THETA_SKETCH_ESTIMATE(THETA_SKETCH_INTERSECT( "
+ + " DISTINCT_COUNT_RAW_THETA_SKETCH(thetaSketchCol, '') FILTER (WHERE dimName = 'gender'), "
+ + " DISTINCT_COUNT_RAW_THETA_SKETCH(thetaSketchCol, '') FILTER (WHERE dimValue = 'Female'), "
+ + " DISTINCT_COUNT_RAW_THETA_SKETCH(thetaSketchCol, '') FILTER (WHERE dimName = 'course'), "
+ + " DISTINCT_COUNT_RAW_THETA_SKETCH(thetaSketchCol, '') FILTER (WHERE dimValue = 'Math'))) "
+ + " FROM " + DEFAULT_TABLE_NAME;
runAndAssert(query, expected);
- query = "select distinctCountThetaSketch(thetaSketchCol, '', "
- + "'dimName = ''gender''', 'dimValue = ''Female''', 'dimName = ''course''', 'dimValue = ''Math''', "
- + "'SET_INTERSECT(SET_INTERSECT($1, $2), SET_INTERSECT($3, $4))') from " + DEFAULT_TABLE_NAME;
+ query = "select GET_THETA_SKETCH_ESTIMATE(THETA_SKETCH_INTERSECT(THETA_SKETCH_INTERSECT("
+ + " DISTINCT_COUNT_RAW_THETA_SKETCH(thetaSketchCol, '') FILTER (WHERE dimName = 'gender'), "
+ + " DISTINCT_COUNT_RAW_THETA_SKETCH(thetaSketchCol, '') FILTER (WHERE dimValue = 'Female')), "
+ + " THETA_SKETCH_INTERSECT("
+ + " DISTINCT_COUNT_RAW_THETA_SKETCH(thetaSketchCol, '') FILTER (WHERE dimName = 'course'), "
+ + " DISTINCT_COUNT_RAW_THETA_SKETCH(thetaSketchCol, '') FILTER (WHERE dimValue = 'Math')))) "
+ + " FROM " + DEFAULT_TABLE_NAME;
runAndAssert(query, expected);
}
// gender = male UNION course = biology
{
- String query = "select distinctCountThetaSketch(thetaSketchCol, '', "
- + "'dimName = ''gender'' and dimValue = ''Male''', 'dimName = ''course'' and dimValue = ''Biology''', "
- + "'SET_UNION($1, $2)') from " + DEFAULT_TABLE_NAME;
+ String query = "select GET_THETA_SKETCH_ESTIMATE(THETA_SKETCH_UNION( "
+ + " DISTINCT_COUNT_RAW_THETA_SKETCH(thetaSketchCol, '') FILTER ("
+ + " WHERE dimName = 'gender' and dimValue = 'Male'), "
+ + " DISTINCT_COUNT_RAW_THETA_SKETCH(thetaSketchCol, '') FILTER ("
+ + " WHERE dimName = 'course' and dimValue = 'Biology'))) "
+ + " FROM " + DEFAULT_TABLE_NAME;
int expected = 70 + 80 + 90 + 100 + 130 + 140 + 150 + 160;
runAndAssert(query, expected);
- query = "select distinctCountThetaSketch(thetaSketchCol, '', "
- + "'dimName = ''gender''', 'dimValue = ''Male''', 'dimName = ''course''', 'dimValue = ''Biology''', "
- + "'SET_UNION(SET_INTERSECT($1, $2), SET_INTERSECT($3, $4))') from " + DEFAULT_TABLE_NAME;
+ query = "select GET_THETA_SKETCH_ESTIMATE(THETA_SKETCH_UNION("
+ + " THETA_SKETCH_INTERSECT("
+ + " DISTINCT_COUNT_RAW_THETA_SKETCH(thetaSketchCol, '') FILTER (WHERE dimName = 'gender'), "
+ + " DISTINCT_COUNT_RAW_THETA_SKETCH(thetaSketchCol, '') FILTER (WHERE dimValue = 'Male')), "
+ + " THETA_SKETCH_INTERSECT("
+ + " DISTINCT_COUNT_RAW_THETA_SKETCH(thetaSketchCol, '') FILTER (WHERE dimName = 'course'), "
+ + " DISTINCT_COUNT_RAW_THETA_SKETCH(thetaSketchCol, '') FILTER (WHERE dimValue = 'Biology')))) "
+ + " FROM " + DEFAULT_TABLE_NAME;
runAndAssert(query, expected);
}
// gender = female DIFF course = history
{
- String query = "select distinctCountThetaSketch(thetaSketchCol, '', "
- + "'dimName = ''gender'' and dimValue = ''Female''', 'dimName = ''course'' and dimValue = ''History''', "
- + "'SET_DIFF($1, $2)') from " + DEFAULT_TABLE_NAME;
+ String query = "select GET_THETA_SKETCH_ESTIMATE(THETA_SKETCH_DIFF( "
+ + " DISTINCT_COUNT_RAW_THETA_SKETCH(thetaSketchCol, '') FILTER ("
+ + " WHERE dimName = 'gender' and dimValue = 'Female'), "
+ + " DISTINCT_COUNT_RAW_THETA_SKETCH(thetaSketchCol, '') FILTER ("
+ + " WHERE dimName = 'course' and dimValue = 'History'))) "
+ + " FROM " + DEFAULT_TABLE_NAME;
int expected = 50 + 110 + 70 + 130;
runAndAssert(query, expected);
- query = "select distinctCountThetaSketch(thetaSketchCol, '', "
- + "'dimName = ''gender''', 'dimValue = ''Female''', 'dimName = ''course''', 'dimValue = ''History''', "
- + "'SET_DIFF(SET_INTERSECT($1, $2), SET_INTERSECT($3, $4))') from " + DEFAULT_TABLE_NAME;
+ query = "select GET_THETA_SKETCH_ESTIMATE(THETA_SKETCH_DIFF("
+ + " THETA_SKETCH_INTERSECT("
+ + " DISTINCT_COUNT_RAW_THETA_SKETCH(thetaSketchCol, '') FILTER (WHERE dimName = 'gender'), "
+ + " DISTINCT_COUNT_RAW_THETA_SKETCH(thetaSketchCol, '') FILTER (WHERE dimValue = 'Female')), "
+ + " THETA_SKETCH_INTERSECT("
+ + " DISTINCT_COUNT_RAW_THETA_SKETCH(thetaSketchCol, '') FILTER (WHERE dimName = 'course'), "
+ + " DISTINCT_COUNT_RAW_THETA_SKETCH(thetaSketchCol, '') FILTER (WHERE dimValue = 'History')))) "
+ + " FROM " + DEFAULT_TABLE_NAME;
runAndAssert(query, expected);
}
- */
// group by gender
{
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org