You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by ja...@apache.org on 2023/02/15 18:52:41 UTC

[pinot] branch master updated: Update Apache Datasketches to 3.3.0 (#10281)

This is an automated email from the ASF dual-hosted git repository.

jackie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new e3a70d1186 Update Apache Datasketches to 3.3.0 (#10281)
e3a70d1186 is described below

commit e3a70d1186105349a75c4f8dd2ab3538782a0779
Author: David Cromberge <da...@permutive.com>
AuthorDate: Wed Feb 15 18:52:33 2023 +0000

    Update Apache Datasketches to 3.3.0 (#10281)
---
 ...istinctCountThetaSketchAggregationFunction.java | 26 +++++++++++-----------
 .../tests/ThetaSketchIntegrationTest.java          | 14 ++++++++++++
 pom.xml                                            |  2 +-
 3 files changed, 28 insertions(+), 14 deletions(-)

diff --git a/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/DistinctCountThetaSketchAggregationFunction.java b/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/DistinctCountThetaSketchAggregationFunction.java
index 317cb48693..a9c143bb5c 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/DistinctCountThetaSketchAggregationFunction.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/DistinctCountThetaSketchAggregationFunction.java
@@ -406,7 +406,7 @@ public class DistinctCountThetaSketchAggregationFunction
       if (_includeDefaultSketch) {
         Union defaultUnion = unions.get(0);
         for (Sketch sketch : sketches) {
-          defaultUnion.update(sketch);
+          defaultUnion.union(sketch);
         }
       }
       for (int i = 0; i < numFilters; i++) {
@@ -414,7 +414,7 @@ public class DistinctCountThetaSketchAggregationFunction
         Union union = unions.get(i + 1);
         for (int j = 0; j < length; j++) {
           if (filterEvaluator.evaluate(singleValues, valueTypes, valueArrays, j)) {
-            union.update(sketches[j]);
+            union.union(sketches[j]);
           }
         }
       }
@@ -634,11 +634,11 @@ public class DistinctCountThetaSketchAggregationFunction
         List<Union> unions = getUnions(groupByResultHolder, groupKeyArray[i]);
         Sketch sketch = sketches[i];
         if (_includeDefaultSketch) {
-          unions.get(0).update(sketch);
+          unions.get(0).union(sketch);
         }
         for (int j = 0; j < numFilters; j++) {
           if (_filterEvaluators.get(j).evaluate(singleValues, valueTypes, valueArrays, i)) {
-            unions.get(j + 1).update(sketch);
+            unions.get(j + 1).union(sketch);
           }
         }
       }
@@ -907,7 +907,7 @@ public class DistinctCountThetaSketchAggregationFunction
       if (_includeDefaultSketch) {
         for (int i = 0; i < length; i++) {
           for (int groupKey : groupKeysArray[i]) {
-            getUnions(groupByResultHolder, groupKey).get(0).update(sketches[i]);
+            getUnions(groupByResultHolder, groupKey).get(0).union(sketches[i]);
           }
         }
       }
@@ -916,7 +916,7 @@ public class DistinctCountThetaSketchAggregationFunction
         for (int j = 0; j < length; j++) {
           if (filterEvaluator.evaluate(singleValues, valueTypes, valueArrays, j)) {
             for (int groupKey : groupKeysArray[i]) {
-              getUnions(groupByResultHolder, groupKey).get(i + 1).update(sketches[i]);
+              getUnions(groupByResultHolder, groupKey).get(i + 1).union(sketches[i]);
             }
           }
         }
@@ -969,8 +969,8 @@ public class DistinctCountThetaSketchAggregationFunction
         continue;
       }
       Union union = _setOperationBuilder.buildUnion();
-      union.update(sketch1);
-      union.update(sketch2);
+      union.union(sketch1);
+      union.union(sketch2);
       // NOTE: Compact the sketch in unsorted, on-heap fashion for performance concern.
       //       See https://datasketches.apache.org/docs/Theta/ThetaSize.html for more details.
       mergedSketches.add(union.getResult(false, null));
@@ -1278,20 +1278,20 @@ public class DistinctCountThetaSketchAggregationFunction
       case SET_UNION:
         Union union = _setOperationBuilder.buildUnion();
         for (ExpressionContext argument : arguments) {
-          union.update(evaluatePostAggregationExpression(argument, sketches));
+          union.union(evaluatePostAggregationExpression(argument, sketches));
         }
         return union.getResult(false, null);
       case SET_INTERSECT:
         Intersection intersection = _setOperationBuilder.buildIntersection();
         for (ExpressionContext argument : arguments) {
-          intersection.update(evaluatePostAggregationExpression(argument, sketches));
+          intersection.intersect(evaluatePostAggregationExpression(argument, sketches));
         }
         return intersection.getResult(false, null);
       case SET_DIFF:
         AnotB diff = _setOperationBuilder.buildANotB();
-        diff.update(evaluatePostAggregationExpression(arguments.get(0), sketches),
-            evaluatePostAggregationExpression(arguments.get(1), sketches));
-        return diff.getResult(false, null);
+        diff.setA(evaluatePostAggregationExpression(arguments.get(0), sketches));
+        diff.notB(evaluatePostAggregationExpression(arguments.get(1), sketches));
+        return diff.getResult(false, null, false);
       default:
         throw new IllegalStateException();
     }
diff --git a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/ThetaSketchIntegrationTest.java b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/ThetaSketchIntegrationTest.java
index 5b0171718b..5361cfb449 100644
--- a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/ThetaSketchIntegrationTest.java
+++ b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/ThetaSketchIntegrationTest.java
@@ -206,6 +206,20 @@ public class ThetaSketchIntegrationTest extends BaseClusterIntegrationTest {
       runAndAssert(query, expected);
     }
 
+     // gender = female DIFF course = history
+    {
+      String query = "select distinctCountThetaSketch(thetaSketchCol, '', "
+          + "'dimName = ''gender'' and dimValue = ''Female''', 'dimName = ''course'' and dimValue = ''History''', "
+          + "'SET_DIFF($1, $2)') from " + DEFAULT_TABLE_NAME;
+      int expected = 50 + 110 + 70 + 130;
+      runAndAssert(query, expected);
+
+      query = "select distinctCountThetaSketch(thetaSketchCol, '', "
+          + "'dimName = ''gender''', 'dimValue = ''Female''', 'dimName = ''course''', 'dimValue = ''History''', "
+          + "'SET_DIFF(SET_INTERSECT($1, $2), SET_INTERSECT($3, $4))') from " + DEFAULT_TABLE_NAME;
+      runAndAssert(query, expected);
+    }
+
     // group by gender
     {
       String query = "select dimValue, distinctCountThetaSketch(thetaSketchCol) from " + DEFAULT_TABLE_NAME
diff --git a/pom.xml b/pom.xml
index b3f4d8409c..83af7f3d73 100644
--- a/pom.xml
+++ b/pom.xml
@@ -886,7 +886,7 @@
       <dependency>
         <groupId>org.apache.datasketches</groupId>
         <artifactId>datasketches-java</artifactId>
-        <version>1.2.0-incubating</version>
+        <version>3.3.0</version>
       </dependency>
       <dependency>
         <groupId>com.tdunning</groupId>


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org