You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by ro...@apache.org on 2023/07/29 18:16:59 UTC

[pinot] branch master updated: [multistage] enable theta sketch integration tests for v2 (#11214)

This is an automated email from the ASF dual-hosted git repository.

rongr pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new 834c9707e8 [multistage] enable theta sketch integration tests for v2 (#11214)
834c9707e8 is described below

commit 834c9707e81dc6b40660f6eb0737f5ca3293a2e2
Author: Rong Rong <ro...@apache.org>
AuthorDate: Sat Jul 29 11:16:53 2023 -0700

    [multistage] enable theta sketch integration tests for v2 (#11214)
    
    enable theta sketch test for v2 after #11144 and #11153
    
    Co-authored-by: Rong Rong <ro...@startree.ai>
---
 .../tests/ThetaSketchIntegrationTest.java          | 108 +++++++++++++--------
 1 file changed, 65 insertions(+), 43 deletions(-)

diff --git a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/ThetaSketchIntegrationTest.java b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/ThetaSketchIntegrationTest.java
index 80e5ad176b..df44244166 100644
--- a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/ThetaSketchIntegrationTest.java
+++ b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/ThetaSketchIntegrationTest.java
@@ -255,15 +255,15 @@ public class ThetaSketchIntegrationTest extends BaseClusterIntegrationTest {
       int expected = 50 + 60 + 70 + 110 + 120 + 130;
       runAndAssert(query, expected);
 
-      /*
-      query = "select distinctCountThetaSketch(thetaSketchCol, '', 'dimName = ''gender'' and dimValue = ''Female''', "
-          + "'$1') from " + DEFAULT_TABLE_NAME;
+      query = "select getThetaSketchEstimate(distinctCountRAWThetaSketch(thetaSketchCol)"
+          + " FILTER (WHERE dimName = 'gender' and dimValue = 'Female')) from " + DEFAULT_TABLE_NAME;
       runAndAssert(query, expected);
 
-      query = "select distinctCountThetaSketch(thetaSketchCol, '', "
-          + "'dimName = ''gender''', 'dimValue = ''Female''', 'SET_INTERSECT($1, $2)') from " + DEFAULT_TABLE_NAME;
+      query = "select GET_THETA_SKETCH_ESTIMATE(THETA_SKETCH_INTERSECT( "
+          + "    DISTINCT_COUNT_RAW_THETA_SKETCH(thetaSketchCol, '') FILTER (WHERE dimName = 'gender'),"
+          + "    DISTINCT_COUNT_RAW_THETA_SKETCH(thetaSketchCol, '') FILTER (WHERE dimValue = 'Female'))) "
+          + "  FROM " + DEFAULT_TABLE_NAME;
       runAndAssert(query, expected);
-       */
     }
 
     // gender = male
@@ -273,16 +273,15 @@ public class ThetaSketchIntegrationTest extends BaseClusterIntegrationTest {
       int expected = 80 + 90 + 100 + 140 + 150 + 160;
       runAndAssert(query, expected);
 
-      /*
-      query =
-          "select distinctCountThetaSketch(thetaSketchCol, '', 'dimName = ''gender'' and dimValue = ''Male''', '$1') "
-              + "from " + DEFAULT_TABLE_NAME;
+      query = "select getThetaSketchEstimate(distinctCountRAWThetaSketch(thetaSketchCol)"
+          + " FILTER (WHERE dimName = 'gender' and dimValue = 'Male')) from " + DEFAULT_TABLE_NAME;
       runAndAssert(query, expected);
 
-      query = "select distinctCountThetaSketch(thetaSketchCol, '', "
-          + "'dimName = ''gender''', 'dimValue = ''Male''', 'SET_INTERSECT($1, $2)') from " + DEFAULT_TABLE_NAME;
+      query = "select GET_THETA_SKETCH_ESTIMATE(THETA_SKETCH_INTERSECT( "
+          + "    DISTINCT_COUNT_RAW_THETA_SKETCH(thetaSketchCol, '') FILTER (WHERE dimName = 'gender'),"
+          + "    DISTINCT_COUNT_RAW_THETA_SKETCH(thetaSketchCol, '') FILTER (WHERE dimValue = 'Male'))) "
+          + "  FROM " + DEFAULT_TABLE_NAME;
       runAndAssert(query, expected);
-       */
     }
 
     // course = math
@@ -292,66 +291,89 @@ public class ThetaSketchIntegrationTest extends BaseClusterIntegrationTest {
       int expected = 50 + 80 + 110 + 140;
       runAndAssert(query, expected);
 
-      /*
-      query =
-          "select distinctCountThetaSketch(thetaSketchCol, '', 'dimName = ''course'' and dimValue = ''Math''', '$1') "
-              + "from " + DEFAULT_TABLE_NAME;
+      query = "select getThetaSketchEstimate(distinctCountRAWThetaSketch(thetaSketchCol)"
+          + " FILTER (WHERE dimName = 'course' and dimValue = 'Math')) from " + DEFAULT_TABLE_NAME;
       runAndAssert(query, expected);
 
-      query = "select distinctCountThetaSketch(thetaSketchCol, '', "
-          + "'dimName = ''course''', 'dimValue = ''Math''', 'SET_INTERSECT($1, $2)') from " + DEFAULT_TABLE_NAME;
+      query = "select GET_THETA_SKETCH_ESTIMATE(THETA_SKETCH_INTERSECT( "
+          + "    DISTINCT_COUNT_RAW_THETA_SKETCH(thetaSketchCol, '') FILTER (WHERE dimName = 'course'),"
+          + "    DISTINCT_COUNT_RAW_THETA_SKETCH(thetaSketchCol, '') FILTER (WHERE dimValue = 'Math'))) "
+          + "  FROM " + DEFAULT_TABLE_NAME;
       runAndAssert(query, expected);
-       */
     }
 
-    /*
     // gender = female INTERSECT course = math
     {
-      String query = "select distinctCountThetaSketch(thetaSketchCol, '', "
-          + "'dimName = ''gender'' and dimValue = ''Female''', 'dimName = ''course'' and dimValue = ''Math''', "
-          + "'SET_INTERSECT($1, $2)') from " + DEFAULT_TABLE_NAME;
+      String query = "select GET_THETA_SKETCH_ESTIMATE(THETA_SKETCH_INTERSECT( "
+          + "    DISTINCT_COUNT_RAW_THETA_SKETCH(thetaSketchCol, '') FILTER ("
+          + "        WHERE dimName = 'gender' and dimValue = 'Female'), "
+          + "    DISTINCT_COUNT_RAW_THETA_SKETCH(thetaSketchCol, '') FILTER ("
+          + "        WHERE dimName = 'course' and dimValue = 'Math'))) "
+          + "  FROM " + DEFAULT_TABLE_NAME;
       int expected = 50 + 110;
       runAndAssert(query, expected);
 
-      query = "select distinctCountThetaSketch(thetaSketchCol, '', "
-          + "'dimName = ''gender''', 'dimValue = ''Female''', 'dimName = ''course''', 'dimValue = ''Math''', "
-          + "'SET_INTERSECT($1, $2, $3, $4)') from " + DEFAULT_TABLE_NAME;
+      query = "select GET_THETA_SKETCH_ESTIMATE(THETA_SKETCH_INTERSECT( "
+          + "    DISTINCT_COUNT_RAW_THETA_SKETCH(thetaSketchCol, '') FILTER (WHERE dimName = 'gender'), "
+          + "    DISTINCT_COUNT_RAW_THETA_SKETCH(thetaSketchCol, '') FILTER (WHERE dimValue = 'Female'), "
+          + "    DISTINCT_COUNT_RAW_THETA_SKETCH(thetaSketchCol, '') FILTER (WHERE dimName = 'course'), "
+          + "    DISTINCT_COUNT_RAW_THETA_SKETCH(thetaSketchCol, '') FILTER (WHERE dimValue = 'Math'))) "
+          + "  FROM " + DEFAULT_TABLE_NAME;
       runAndAssert(query, expected);
 
-      query = "select distinctCountThetaSketch(thetaSketchCol, '', "
-          + "'dimName = ''gender''', 'dimValue = ''Female''', 'dimName = ''course''', 'dimValue = ''Math''', "
-          + "'SET_INTERSECT(SET_INTERSECT($1, $2), SET_INTERSECT($3, $4))') from " + DEFAULT_TABLE_NAME;
+      query = "select GET_THETA_SKETCH_ESTIMATE(THETA_SKETCH_INTERSECT(THETA_SKETCH_INTERSECT("
+          + "    DISTINCT_COUNT_RAW_THETA_SKETCH(thetaSketchCol, '') FILTER (WHERE dimName = 'gender'), "
+          + "    DISTINCT_COUNT_RAW_THETA_SKETCH(thetaSketchCol, '') FILTER (WHERE dimValue = 'Female')), "
+          + "  THETA_SKETCH_INTERSECT("
+          + "    DISTINCT_COUNT_RAW_THETA_SKETCH(thetaSketchCol, '') FILTER (WHERE dimName = 'course'), "
+          + "    DISTINCT_COUNT_RAW_THETA_SKETCH(thetaSketchCol, '') FILTER (WHERE dimValue = 'Math')))) "
+          + "  FROM " + DEFAULT_TABLE_NAME;
       runAndAssert(query, expected);
     }
 
     // gender = male UNION course = biology
     {
-      String query = "select distinctCountThetaSketch(thetaSketchCol, '', "
-          + "'dimName = ''gender'' and dimValue = ''Male''', 'dimName = ''course'' and dimValue = ''Biology''', "
-          + "'SET_UNION($1, $2)') from " + DEFAULT_TABLE_NAME;
+      String query = "select GET_THETA_SKETCH_ESTIMATE(THETA_SKETCH_UNION( "
+          + "    DISTINCT_COUNT_RAW_THETA_SKETCH(thetaSketchCol, '') FILTER ("
+          + "        WHERE dimName = 'gender' and dimValue = 'Male'), "
+          + "    DISTINCT_COUNT_RAW_THETA_SKETCH(thetaSketchCol, '') FILTER ("
+          + "        WHERE dimName = 'course' and dimValue = 'Biology'))) "
+          + "  FROM " + DEFAULT_TABLE_NAME;
       int expected = 70 + 80 + 90 + 100 + 130 + 140 + 150 + 160;
       runAndAssert(query, expected);
 
-      query = "select distinctCountThetaSketch(thetaSketchCol, '', "
-          + "'dimName = ''gender''', 'dimValue = ''Male''', 'dimName = ''course''', 'dimValue = ''Biology''', "
-          + "'SET_UNION(SET_INTERSECT($1, $2), SET_INTERSECT($3, $4))') from " + DEFAULT_TABLE_NAME;
+      query = "select GET_THETA_SKETCH_ESTIMATE(THETA_SKETCH_UNION("
+          + "  THETA_SKETCH_INTERSECT("
+          + "    DISTINCT_COUNT_RAW_THETA_SKETCH(thetaSketchCol, '') FILTER (WHERE dimName = 'gender'), "
+          + "    DISTINCT_COUNT_RAW_THETA_SKETCH(thetaSketchCol, '') FILTER (WHERE dimValue = 'Male')), "
+          + "  THETA_SKETCH_INTERSECT("
+          + "    DISTINCT_COUNT_RAW_THETA_SKETCH(thetaSketchCol, '') FILTER (WHERE dimName = 'course'), "
+          + "    DISTINCT_COUNT_RAW_THETA_SKETCH(thetaSketchCol, '') FILTER (WHERE dimValue = 'Biology')))) "
+          + "  FROM " + DEFAULT_TABLE_NAME;
       runAndAssert(query, expected);
     }
 
     // gender = female DIFF course = history
     {
-      String query = "select distinctCountThetaSketch(thetaSketchCol, '', "
-          + "'dimName = ''gender'' and dimValue = ''Female''', 'dimName = ''course'' and dimValue = ''History''', "
-          + "'SET_DIFF($1, $2)') from " + DEFAULT_TABLE_NAME;
+      String query = "select GET_THETA_SKETCH_ESTIMATE(THETA_SKETCH_DIFF( "
+          + "    DISTINCT_COUNT_RAW_THETA_SKETCH(thetaSketchCol, '') FILTER ("
+          + "        WHERE dimName = 'gender' and dimValue = 'Female'), "
+          + "    DISTINCT_COUNT_RAW_THETA_SKETCH(thetaSketchCol, '') FILTER ("
+          + "        WHERE dimName = 'course' and dimValue = 'History'))) "
+          + "  FROM " + DEFAULT_TABLE_NAME;
       int expected = 50 + 110 + 70 + 130;
       runAndAssert(query, expected);
 
-      query = "select distinctCountThetaSketch(thetaSketchCol, '', "
-          + "'dimName = ''gender''', 'dimValue = ''Female''', 'dimName = ''course''', 'dimValue = ''History''', "
-          + "'SET_DIFF(SET_INTERSECT($1, $2), SET_INTERSECT($3, $4))') from " + DEFAULT_TABLE_NAME;
+      query = "select GET_THETA_SKETCH_ESTIMATE(THETA_SKETCH_DIFF("
+          + "  THETA_SKETCH_INTERSECT("
+          + "    DISTINCT_COUNT_RAW_THETA_SKETCH(thetaSketchCol, '') FILTER (WHERE dimName = 'gender'), "
+          + "    DISTINCT_COUNT_RAW_THETA_SKETCH(thetaSketchCol, '') FILTER (WHERE dimValue = 'Female')), "
+          + "  THETA_SKETCH_INTERSECT("
+          + "    DISTINCT_COUNT_RAW_THETA_SKETCH(thetaSketchCol, '') FILTER (WHERE dimName = 'course'), "
+          + "    DISTINCT_COUNT_RAW_THETA_SKETCH(thetaSketchCol, '') FILTER (WHERE dimValue = 'History')))) "
+          + "  FROM " + DEFAULT_TABLE_NAME;
       runAndAssert(query, expected);
     }
-     */
 
     // group by gender
     {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org