You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by ja...@apache.org on 2023/02/15 18:52:41 UTC
[pinot] branch master updated: Update Apache Datasketches to 3.3.0 (#10281)
This is an automated email from the ASF dual-hosted git repository.
jackie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push:
new e3a70d1186 Update Apache Datasketches to 3.3.0 (#10281)
e3a70d1186 is described below
commit e3a70d1186105349a75c4f8dd2ab3538782a0779
Author: David Cromberge <da...@permutive.com>
AuthorDate: Wed Feb 15 18:52:33 2023 +0000
Update Apache Datasketches to 3.3.0 (#10281)
---
...istinctCountThetaSketchAggregationFunction.java | 26 +++++++++++-----------
.../tests/ThetaSketchIntegrationTest.java | 14 ++++++++++++
pom.xml | 2 +-
3 files changed, 28 insertions(+), 14 deletions(-)
diff --git a/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/DistinctCountThetaSketchAggregationFunction.java b/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/DistinctCountThetaSketchAggregationFunction.java
index 317cb48693..a9c143bb5c 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/DistinctCountThetaSketchAggregationFunction.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/DistinctCountThetaSketchAggregationFunction.java
@@ -406,7 +406,7 @@ public class DistinctCountThetaSketchAggregationFunction
if (_includeDefaultSketch) {
Union defaultUnion = unions.get(0);
for (Sketch sketch : sketches) {
- defaultUnion.update(sketch);
+ defaultUnion.union(sketch);
}
}
for (int i = 0; i < numFilters; i++) {
@@ -414,7 +414,7 @@ public class DistinctCountThetaSketchAggregationFunction
Union union = unions.get(i + 1);
for (int j = 0; j < length; j++) {
if (filterEvaluator.evaluate(singleValues, valueTypes, valueArrays, j)) {
- union.update(sketches[j]);
+ union.union(sketches[j]);
}
}
}
@@ -634,11 +634,11 @@ public class DistinctCountThetaSketchAggregationFunction
List<Union> unions = getUnions(groupByResultHolder, groupKeyArray[i]);
Sketch sketch = sketches[i];
if (_includeDefaultSketch) {
- unions.get(0).update(sketch);
+ unions.get(0).union(sketch);
}
for (int j = 0; j < numFilters; j++) {
if (_filterEvaluators.get(j).evaluate(singleValues, valueTypes, valueArrays, i)) {
- unions.get(j + 1).update(sketch);
+ unions.get(j + 1).union(sketch);
}
}
}
@@ -907,7 +907,7 @@ public class DistinctCountThetaSketchAggregationFunction
if (_includeDefaultSketch) {
for (int i = 0; i < length; i++) {
for (int groupKey : groupKeysArray[i]) {
- getUnions(groupByResultHolder, groupKey).get(0).update(sketches[i]);
+ getUnions(groupByResultHolder, groupKey).get(0).union(sketches[i]);
}
}
}
@@ -916,7 +916,7 @@ public class DistinctCountThetaSketchAggregationFunction
for (int j = 0; j < length; j++) {
if (filterEvaluator.evaluate(singleValues, valueTypes, valueArrays, j)) {
for (int groupKey : groupKeysArray[i]) {
- getUnions(groupByResultHolder, groupKey).get(i + 1).update(sketches[i]);
+ getUnions(groupByResultHolder, groupKey).get(i + 1).union(sketches[i]);
}
}
}
@@ -969,8 +969,8 @@ public class DistinctCountThetaSketchAggregationFunction
continue;
}
Union union = _setOperationBuilder.buildUnion();
- union.update(sketch1);
- union.update(sketch2);
+ union.union(sketch1);
+ union.union(sketch2);
// NOTE: Compact the sketch in unsorted, on-heap fashion for performance concern.
// See https://datasketches.apache.org/docs/Theta/ThetaSize.html for more details.
mergedSketches.add(union.getResult(false, null));
@@ -1278,20 +1278,20 @@ public class DistinctCountThetaSketchAggregationFunction
case SET_UNION:
Union union = _setOperationBuilder.buildUnion();
for (ExpressionContext argument : arguments) {
- union.update(evaluatePostAggregationExpression(argument, sketches));
+ union.union(evaluatePostAggregationExpression(argument, sketches));
}
return union.getResult(false, null);
case SET_INTERSECT:
Intersection intersection = _setOperationBuilder.buildIntersection();
for (ExpressionContext argument : arguments) {
- intersection.update(evaluatePostAggregationExpression(argument, sketches));
+ intersection.intersect(evaluatePostAggregationExpression(argument, sketches));
}
return intersection.getResult(false, null);
case SET_DIFF:
AnotB diff = _setOperationBuilder.buildANotB();
- diff.update(evaluatePostAggregationExpression(arguments.get(0), sketches),
- evaluatePostAggregationExpression(arguments.get(1), sketches));
- return diff.getResult(false, null);
+ diff.setA(evaluatePostAggregationExpression(arguments.get(0), sketches));
+ diff.notB(evaluatePostAggregationExpression(arguments.get(1), sketches));
+ return diff.getResult(false, null, false);
default:
throw new IllegalStateException();
}
diff --git a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/ThetaSketchIntegrationTest.java b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/ThetaSketchIntegrationTest.java
index 5b0171718b..5361cfb449 100644
--- a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/ThetaSketchIntegrationTest.java
+++ b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/ThetaSketchIntegrationTest.java
@@ -206,6 +206,20 @@ public class ThetaSketchIntegrationTest extends BaseClusterIntegrationTest {
runAndAssert(query, expected);
}
+ // gender = female DIFF course = history
+ {
+ String query = "select distinctCountThetaSketch(thetaSketchCol, '', "
+ + "'dimName = ''gender'' and dimValue = ''Female''', 'dimName = ''course'' and dimValue = ''History''', "
+ + "'SET_DIFF($1, $2)') from " + DEFAULT_TABLE_NAME;
+ int expected = 50 + 110 + 70 + 130;
+ runAndAssert(query, expected);
+
+ query = "select distinctCountThetaSketch(thetaSketchCol, '', "
+ + "'dimName = ''gender''', 'dimValue = ''Female''', 'dimName = ''course''', 'dimValue = ''History''', "
+ + "'SET_DIFF(SET_INTERSECT($1, $2), SET_INTERSECT($3, $4))') from " + DEFAULT_TABLE_NAME;
+ runAndAssert(query, expected);
+ }
+
// group by gender
{
String query = "select dimValue, distinctCountThetaSketch(thetaSketchCol) from " + DEFAULT_TABLE_NAME
diff --git a/pom.xml b/pom.xml
index b3f4d8409c..83af7f3d73 100644
--- a/pom.xml
+++ b/pom.xml
@@ -886,7 +886,7 @@
<dependency>
<groupId>org.apache.datasketches</groupId>
<artifactId>datasketches-java</artifactId>
- <version>1.2.0-incubating</version>
+ <version>3.3.0</version>
</dependency>
<dependency>
<groupId>com.tdunning</groupId>
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org