You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by GitBox <gi...@apache.org> on 2021/10/25 15:27:17 UTC

[GitHub] [pinot] yupeng9 commented on a change in pull request #7630: Apply the optimization on dictIds within the segment to DistinctCountHLL aggregation func

yupeng9 commented on a change in pull request #7630:
URL: https://github.com/apache/pinot/pull/7630#discussion_r735715494



##########
File path: pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/DistinctCountHLLAggregationFunction.java
##########
@@ -326,6 +375,54 @@ public Long extractFinalResult(HyperLogLog intermediateResult) {
     return intermediateResult.cardinality();
   }
 
+  /**
+   * Returns the dictionary id bitmap from the result holder or creates a new one if it does not exist.
+   */
+  protected static RoaringBitmap getDictIdBitmap(AggregationResultHolder aggregationResultHolder,
+      Dictionary dictionary) {
+    DistinctCountHLLAggregationFunction.DictIdsWrapper dictIdsWrapper = aggregationResultHolder.getResult();
+    if (dictIdsWrapper == null) {
+      dictIdsWrapper = new DistinctCountHLLAggregationFunction.DictIdsWrapper(dictionary);
+      aggregationResultHolder.setValue(dictIdsWrapper);
+    }
+    return dictIdsWrapper._dictIdBitmap;
+  }
+
+  /**
+   * Returns the dictionary id bitmap for the given group key or creates a new one if it does not exist.
+   */
+  protected static RoaringBitmap getDictIdBitmap(GroupByResultHolder groupByResultHolder, int groupKey,
+      Dictionary dictionary) {
+    DistinctCountHLLAggregationFunction.DictIdsWrapper dictIdsWrapper = groupByResultHolder.getResult(groupKey);
+    if (dictIdsWrapper == null) {
+      dictIdsWrapper = new DistinctCountHLLAggregationFunction.DictIdsWrapper(dictionary);
+      groupByResultHolder.setValueForKey(groupKey, dictIdsWrapper);
+    }
+    return dictIdsWrapper._dictIdBitmap;
+  }
+
+  /**
+   * Helper method to set dictionary id for the given group keys into the result holder.
+   */
+  private static void setDictIdForGroupKeys(GroupByResultHolder groupByResultHolder, int[] groupKeys,
+      Dictionary dictionary, int dictId) {
+    for (int groupKey : groupKeys) {
+      getDictIdBitmap(groupByResultHolder, groupKey, dictionary).add(dictId);
+    }
+  }
+
+  private HyperLogLog convertToHLL(DictIdsWrapper dictIdsWrapper) {
+    Dictionary dictionary = dictIdsWrapper._dictionary;
+    RoaringBitmap dictIdBitmap = dictIdsWrapper._dictIdBitmap;
+    int numValues = dictIdBitmap.getCardinality();
+    PeekableIntIterator iterator = dictIdBitmap.getIntIterator();
+    HyperLogLog hyperLogLog = new HyperLogLog(_log2m);
+    while (iterator.hasNext()) {
+      hyperLogLog.offer(iterator.next());
+    }

Review comment:
       Good to know. Could you share some articles explaining why?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org