You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by GitBox <gi...@apache.org> on 2021/10/08 20:48:41 UTC

[GitHub] [pinot] richardstartin commented on a change in pull request #7535: map bitmaps through a bounded window to avoid excessive disk pressure…

richardstartin commented on a change in pull request #7535:
URL: https://github.com/apache/pinot/pull/7535#discussion_r725293032



##########
File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/inv/BitmapInvertedIndexWriter.java
##########
@@ -48,37 +48,64 @@
  * </pre>
  */
 public final class BitmapInvertedIndexWriter implements Closeable {
+  // 256MB - worst case serialized size of a single bitmap with Integer.MAX_VALUE rows
+  private static final long MAX_INITIAL_BUFFER_SIZE = 256 << 20;
+  // 128KB derived from 1M rows (15 containers), worst case 8KB per container = 120KB + 8KB extra
+  private static final long PESSIMISTIC_BITMAP_SIZE_ESTIMATE = 128 << 10;
   private final FileChannel _fileChannel;
   private final ByteBuffer _offsetBuffer;
-  private final ByteBuffer _bitmapBuffer;
+  private ByteBuffer _bitmapBuffer;
+  private int _bytesWritten;
 
   public BitmapInvertedIndexWriter(File outputFile, int numBitmaps)
       throws IOException {
+    int sizeForOffsets = (numBitmaps + 1) * Integer.BYTES;
+    long bitmapBufferEstimate = Math.min(PESSIMISTIC_BITMAP_SIZE_ESTIMATE * numBitmaps, MAX_INITIAL_BUFFER_SIZE);
     _fileChannel = new RandomAccessFile(outputFile, "rw").getChannel();
-    _offsetBuffer = _fileChannel.map(FileChannel.MapMode.READ_WRITE, 0, Integer.MAX_VALUE);
-    _bitmapBuffer = _offsetBuffer.duplicate().order(ByteOrder.LITTLE_ENDIAN);
-    _bitmapBuffer.position((numBitmaps + 1) * Integer.BYTES);
+    _offsetBuffer = _fileChannel.map(FileChannel.MapMode.READ_WRITE, 0, sizeForOffsets);
+    _bytesWritten = sizeForOffsets;
+    mapBitmapBuffer(bitmapBufferEstimate);
   }
 
   public void add(RoaringBitmap bitmap)
       throws IOException {
-    _offsetBuffer.putInt(_bitmapBuffer.position());
+    int length = bitmap.serializedSizeInBytes();
+    resizeIfNecessary(length);
+    _offsetBuffer.putInt(_bytesWritten);
     bitmap.serialize(_bitmapBuffer);
+    _bytesWritten += length;
   }
 
-  public void add(byte[] bitmapBytes) {
+  public void add(byte[] bitmapBytes)
+      throws IOException {
     add(bitmapBytes, bitmapBytes.length);
   }
 
-  public void add(byte[] bitmapBytes, int length) {
-    _offsetBuffer.putInt(_bitmapBuffer.position());
+  public void add(byte[] bitmapBytes, int length)
+      throws IOException {
+    resizeIfNecessary(length);
+    _offsetBuffer.putInt(_bytesWritten);
     _bitmapBuffer.put(bitmapBytes, 0, length);
+    _bytesWritten += length;
+  }
+
+  private void resizeIfNecessary(int required)
+      throws IOException {
+    if (_bitmapBuffer.capacity() - required < _bitmapBuffer.position()) {
+      mapBitmapBuffer(MAX_INITIAL_BUFFER_SIZE);
+    }
+  }
+
+  private void mapBitmapBuffer(long size)
+      throws IOException {
+    _bitmapBuffer = _fileChannel.map(FileChannel.MapMode.READ_WRITE, _bytesWritten, size)
+        .order(ByteOrder.LITTLE_ENDIAN);
   }
 
   @Override
   public void close()
       throws IOException {
-    int fileLength = _bitmapBuffer.position();
+    int fileLength = _bytesWritten;

Review comment:
       You're right, sorry that completely escape my notice. Please take a look now.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org