You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by GitBox <gi...@apache.org> on 2021/10/06 19:12:17 UTC

[GitHub] [pinot] Jackie-Jiang commented on a change in pull request #7530: Faster bitmap scans

Jackie-Jiang commented on a change in pull request #7530:
URL: https://github.com/apache/pinot/pull/7530#discussion_r723601916



##########
File path: pinot-core/src/main/java/org/apache/pinot/core/operator/dociditerators/SVScanDocIdIterator.java
##########
@@ -72,16 +73,26 @@ public int advance(int targetDocId) {
 
   @Override
   public MutableRoaringBitmap applyAnd(ImmutableRoaringBitmap docIds) {
-    MutableRoaringBitmap result = new MutableRoaringBitmap();
-    IntIterator docIdIterator = docIds.getIntIterator();
-    int nextDocId;
-    while (docIdIterator.hasNext() && (nextDocId = docIdIterator.next()) < _numDocs) {
-      _numEntriesScanned++;
-      if (_valueMatcher.doesValueMatch(nextDocId)) {
-        result.add(nextDocId);
+    if (docIds.isEmpty()) {
+      return new MutableRoaringBitmap();
+    }
+    RoaringBitmapWriter<MutableRoaringBitmap> result = RoaringBitmapWriter.bufferWriter()
+        .expectedRange(docIds.first(), docIds.last()).runCompress(false).get();
+    BatchIterator docIdIterator = docIds.getBatchIterator();
+    int[] buffer = new int[OPTIMAL_ITERATOR_BATCH_SIZE];
+    while (docIdIterator.hasNext()) {
+      int limit = docIdIterator.nextBatch(buffer);
+      for (int i = 0; i < limit; i++) {
+        int nextDocId = buffer[i];
+        if (nextDocId < _numDocs) {

Review comment:
       Same here

##########
File path: pinot-core/src/main/java/org/apache/pinot/core/operator/dociditerators/MVScanDocIdIterator.java
##########
@@ -73,17 +74,27 @@ public int advance(int targetDocId) {
 
   @Override
   public MutableRoaringBitmap applyAnd(ImmutableRoaringBitmap docIds) {
-    MutableRoaringBitmap result = new MutableRoaringBitmap();
-    IntIterator docIdIterator = docIds.getIntIterator();
-    int nextDocId;
-    while (docIdIterator.hasNext() && (nextDocId = docIdIterator.next()) < _numDocs) {
-      int length = _reader.getDictIdMV(nextDocId, _dictIdBuffer, _readerContext);
-      _numEntriesScanned += length;
-      if (_predicateEvaluator.applyMV(_dictIdBuffer, length)) {
-        result.add(nextDocId);
+    if (docIds.isEmpty()) {
+      return new MutableRoaringBitmap();
+    }
+    RoaringBitmapWriter<MutableRoaringBitmap> result = RoaringBitmapWriter.bufferWriter()
+        .expectedRange(docIds.first(), docIds.last()).runCompress(false).get();
+    BatchIterator docIdIterator = docIds.getBatchIterator();
+    int[] buffer = new int[OPTIMAL_ITERATOR_BATCH_SIZE];
+    while (docIdIterator.hasNext()) {
+      int limit = docIdIterator.nextBatch(buffer);
+      for (int i = 0; i < limit; i++) {
+        int nextDocId = buffer[i];
+        if (nextDocId < _numDocs) {

Review comment:
       (Optional and not introduced in this PR) I think this if check is redundant. We need this check for the old star-tree, which is already removed and no longer supported.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org