You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by ja...@apache.org on 2020/10/15 17:36:36 UTC

[incubator-pinot] branch master updated: Add OnHeapGuavaBloomFilterReader (#6147)

This is an automated email from the ASF dual-hosted git repository.

jackie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new a71b960  Add OnHeapGuavaBloomFilterReader (#6147)
a71b960 is described below

commit a71b96094dc58340274d11dad76b2bc58c7e09ff
Author: Xiaotian (Jackie) Jiang <17...@users.noreply.github.com>
AuthorDate: Thu Oct 15 10:36:20 2020 -0700

    Add OnHeapGuavaBloomFilterReader (#6147)
    
    Add the on-heap version of the guava bloom filter reader
    Add 2 new fields into the `BloomFilterConfig`:
    - maxSizeInBytes: if configured, limit the max size of the bloom filter (will use a larger fpp if the configured fpp hits the limit)
    - loadOnHeap: load the bloom filter on-heap or off-heap
---
 .../impl/bloom/OnHeapGuavaBloomFilterCreator.java  | 16 ++++++-
 .../index/column/PhysicalColumnIndexContainer.java | 23 ++++-----
 .../segment/index/loader/IndexLoadingConfig.java   |  2 +-
 .../segment/index/readers/BloomFilterReader.java   |  2 +-
 ...Reader.java => BaseGuavaBloomFilterReader.java} | 36 ++++-----------
 .../readers/bloom/BloomFilterReaderFactory.java    |  6 ++-
 .../readers/bloom/GuavaBloomFilterReaderUtils.java | 25 ++++++++++
 .../bloom/OffHeapGuavaBloomFilterReader.java       | 40 ++--------------
 .../bloom/OnHeapGuavaBloomFilterReader.java        | 54 ++++++++++++++++++++++
 .../index/creator/BloomFilterCreatorTest.java      | 11 +++--
 .../pinot/spi/config/table/BloomFilterConfig.java  | 24 ++++++++--
 11 files changed, 148 insertions(+), 91 deletions(-)

diff --git a/pinot-core/src/main/java/org/apache/pinot/core/segment/creator/impl/bloom/OnHeapGuavaBloomFilterCreator.java b/pinot-core/src/main/java/org/apache/pinot/core/segment/creator/impl/bloom/OnHeapGuavaBloomFilterCreator.java
index 1fb9b55..2ca942a 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/segment/creator/impl/bloom/OnHeapGuavaBloomFilterCreator.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/segment/creator/impl/bloom/OnHeapGuavaBloomFilterCreator.java
@@ -27,7 +27,10 @@ import java.io.IOException;
 import java.nio.charset.StandardCharsets;
 import org.apache.pinot.core.segment.creator.BloomFilterCreator;
 import org.apache.pinot.core.segment.creator.impl.V1Constants;
+import org.apache.pinot.core.segment.index.readers.bloom.GuavaBloomFilterReaderUtils;
 import org.apache.pinot.spi.config.table.BloomFilterConfig;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 
 /**
@@ -37,6 +40,8 @@ import org.apache.pinot.spi.config.table.BloomFilterConfig;
  */
 @SuppressWarnings("UnstableApiUsage")
 public class OnHeapGuavaBloomFilterCreator implements BloomFilterCreator {
+  private static final Logger LOGGER = LoggerFactory.getLogger(OnHeapGuavaBloomFilterCreator.class);
+
   public static final int TYPE_VALUE = 1;
   public static final int VERSION = 1;
 
@@ -46,8 +51,15 @@ public class OnHeapGuavaBloomFilterCreator implements BloomFilterCreator {
   public OnHeapGuavaBloomFilterCreator(File indexDir, String columnName, int cardinality,
       BloomFilterConfig bloomFilterConfig) {
     _bloomFilterFile = new File(indexDir, columnName + V1Constants.Indexes.BLOOM_FILTER_FILE_EXTENSION);
-    _bloomFilter =
-        BloomFilter.create(Funnels.stringFunnel(StandardCharsets.UTF_8), cardinality, bloomFilterConfig.getFpp());
+    // Calculate the actual fpp with regards to the max size for the bloom filter
+    double fpp = bloomFilterConfig.getFpp();
+    int maxSizeInBytes = bloomFilterConfig.getMaxSizeInBytes();
+    if (maxSizeInBytes > 0) {
+      double minFpp = GuavaBloomFilterReaderUtils.computeFPP(maxSizeInBytes, cardinality);
+      fpp = Math.max(fpp, minFpp);
+    }
+    LOGGER.info("Creating bloom filter with cardinality: {}, fpp: {}", cardinality, fpp);
+    _bloomFilter = BloomFilter.create(Funnels.stringFunnel(StandardCharsets.UTF_8), cardinality, fpp);
   }
 
   @Override
diff --git a/pinot-core/src/main/java/org/apache/pinot/core/segment/index/column/PhysicalColumnIndexContainer.java b/pinot-core/src/main/java/org/apache/pinot/core/segment/index/column/PhysicalColumnIndexContainer.java
index 0573afe..595a8e0 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/segment/index/column/PhysicalColumnIndexContainer.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/segment/index/column/PhysicalColumnIndexContainer.java
@@ -54,6 +54,7 @@ import org.apache.pinot.core.segment.index.readers.text.LuceneTextIndexReader;
 import org.apache.pinot.core.segment.memory.PinotDataBuffer;
 import org.apache.pinot.core.segment.store.ColumnIndexType;
 import org.apache.pinot.core.segment.store.SegmentDirectory;
+import org.apache.pinot.spi.config.table.BloomFilterConfig;
 import org.apache.pinot.spi.data.FieldSpec;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -74,18 +75,11 @@ public final class PhysicalColumnIndexContainer implements ColumnIndexContainer
       IndexLoadingConfig indexLoadingConfig, File segmentIndexDir)
       throws IOException {
     String columnName = metadata.getColumnName();
-    boolean loadInvertedIndex = false;
-    boolean loadRangeIndex = false;
-    boolean loadTextIndex = false;
-    boolean loadOnHeapDictionary = false;
-    boolean loadBloomFilter = false;
-    if (indexLoadingConfig != null) {
-      loadInvertedIndex = indexLoadingConfig.getInvertedIndexColumns().contains(columnName);
-      loadRangeIndex = indexLoadingConfig.getRangeIndexColumns().contains(columnName);
-      loadOnHeapDictionary = indexLoadingConfig.getOnHeapDictionaryColumns().contains(columnName);
-      loadBloomFilter = indexLoadingConfig.getBloomFilterConfigs().containsKey(columnName);
-      loadTextIndex = indexLoadingConfig.getTextIndexColumns().contains(columnName);
-    }
+    boolean loadInvertedIndex = indexLoadingConfig.getInvertedIndexColumns().contains(columnName);
+    boolean loadRangeIndex = indexLoadingConfig.getRangeIndexColumns().contains(columnName);
+    boolean loadTextIndex = indexLoadingConfig.getTextIndexColumns().contains(columnName);
+    boolean loadOnHeapDictionary = indexLoadingConfig.getOnHeapDictionaryColumns().contains(columnName);
+    BloomFilterConfig bloomFilterConfig = indexLoadingConfig.getBloomFilterConfigs().get(columnName);
 
     if (segmentReader.hasIndexFor(columnName, ColumnIndexType.NULLVALUE_VECTOR)) {
       PinotDataBuffer nullValueVectorBuffer = segmentReader.getIndexFor(columnName, ColumnIndexType.NULLVALUE_VECTOR);
@@ -107,9 +101,10 @@ public final class PhysicalColumnIndexContainer implements ColumnIndexContainer
 
     if (metadata.hasDictionary()) {
       //bloom filter
-      if (loadBloomFilter) {
+      if (bloomFilterConfig != null) {
         PinotDataBuffer bloomFilterBuffer = segmentReader.getIndexFor(columnName, ColumnIndexType.BLOOM_FILTER);
-        _bloomFilter = BloomFilterReaderFactory.getBloomFilterReader(bloomFilterBuffer);
+        _bloomFilter =
+            BloomFilterReaderFactory.getBloomFilterReader(bloomFilterBuffer, bloomFilterConfig.isLoadOnHeap());
       } else {
         _bloomFilter = null;
       }
diff --git a/pinot-core/src/main/java/org/apache/pinot/core/segment/index/loader/IndexLoadingConfig.java b/pinot-core/src/main/java/org/apache/pinot/core/segment/index/loader/IndexLoadingConfig.java
index 798ce61..a6817a0 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/segment/index/loader/IndexLoadingConfig.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/segment/index/loader/IndexLoadingConfig.java
@@ -100,7 +100,7 @@ public class IndexLoadingConfig {
     List<String> bloomFilterColumns = indexingConfig.getBloomFilterColumns();
     if (bloomFilterColumns != null) {
       for (String bloomFilterColumn : bloomFilterColumns) {
-        _bloomFilterConfigs.put(bloomFilterColumn, new BloomFilterConfig(BloomFilterConfig.DEFAULT_FPP));
+        _bloomFilterConfigs.put(bloomFilterColumn, new BloomFilterConfig(BloomFilterConfig.DEFAULT_FPP, 0, false));
       }
     }
     Map<String, BloomFilterConfig> bloomFilterConfigs = indexingConfig.getBloomFilterConfigs();
diff --git a/pinot-core/src/main/java/org/apache/pinot/core/segment/index/readers/BloomFilterReader.java b/pinot-core/src/main/java/org/apache/pinot/core/segment/index/readers/BloomFilterReader.java
index 09a1dcf..2a16206 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/segment/index/readers/BloomFilterReader.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/segment/index/readers/BloomFilterReader.java
@@ -36,5 +36,5 @@ public interface BloomFilterReader extends Closeable {
    * otherwise.
    * <p>This method is provided to prevent hashing the same value multiple times.
    */
-  boolean mightContain(byte[] hash);
+  boolean mightContain(long hash1, long hash2);
 }
diff --git a/pinot-core/src/main/java/org/apache/pinot/core/segment/index/readers/bloom/OffHeapGuavaBloomFilterReader.java b/pinot-core/src/main/java/org/apache/pinot/core/segment/index/readers/bloom/BaseGuavaBloomFilterReader.java
similarity index 65%
copy from pinot-core/src/main/java/org/apache/pinot/core/segment/index/readers/bloom/OffHeapGuavaBloomFilterReader.java
copy to pinot-core/src/main/java/org/apache/pinot/core/segment/index/readers/bloom/BaseGuavaBloomFilterReader.java
index 56dac45..0c6f5d4 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/segment/index/readers/bloom/OffHeapGuavaBloomFilterReader.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/segment/index/readers/bloom/BaseGuavaBloomFilterReader.java
@@ -25,11 +25,10 @@ import org.apache.pinot.core.segment.memory.PinotDataBuffer;
 
 
 /**
- * Off-heap reader for guava bloom filter.
- * <p>The behavior should be aligned with {@link com.google.common.hash.BloomFilter}.
+ * Base implementation of the reader for guava bloom filter.
+ * <p>The format of the data should be aligned with the guava bloom filter.
  */
-@SuppressWarnings("UnstableApiUsage")
-public class OffHeapGuavaBloomFilterReader implements BloomFilterReader {
+public abstract class BaseGuavaBloomFilterReader implements BloomFilterReader {
   // Format of the data buffer header:
   //   - Strategy ordinal: 1 byte
   //   - Number of hash functions: 1 byte
@@ -39,11 +38,11 @@ public class OffHeapGuavaBloomFilterReader implements BloomFilterReader {
   private static final int NUM_LONGS_OFFSET = 2;
   private static final int HEADER_SIZE = 6;
 
-  private final int _numHashFunctions;
-  private final long _numBits;
-  private final PinotDataBuffer _valueBuffer;
+  protected final int _numHashFunctions;
+  protected final long _numBits;
+  protected final PinotDataBuffer _valueBuffer;
 
-  public OffHeapGuavaBloomFilterReader(PinotDataBuffer dataBuffer) {
+  public BaseGuavaBloomFilterReader(PinotDataBuffer dataBuffer) {
     byte strategyOrdinal = dataBuffer.getByte(STRATEGY_ORDINAL_OFFSET);
     Preconditions.checkState(strategyOrdinal == 1, "Unsupported strategy ordinal: %s", strategyOrdinal);
     _numHashFunctions = dataBuffer.getByte(NUM_HASH_FUNCTIONS_OFFSET) & 0xFF;
@@ -53,27 +52,10 @@ public class OffHeapGuavaBloomFilterReader implements BloomFilterReader {
 
   @Override
   public boolean mightContain(String value) {
-    return mightContain(GuavaBloomFilterReaderUtils.hash(value));
-  }
-
-  @Override
-  public boolean mightContain(byte[] hash) {
+    byte[] hash = GuavaBloomFilterReaderUtils.hash(value);
     long hash1 = Longs.fromBytes(hash[7], hash[6], hash[5], hash[4], hash[3], hash[2], hash[1], hash[0]);
     long hash2 = Longs.fromBytes(hash[15], hash[14], hash[13], hash[12], hash[11], hash[10], hash[9], hash[8]);
-    long combinedHash = hash1;
-    for (int i = 0; i < _numHashFunctions; i++) {
-      long bitIndex = (combinedHash & Long.MAX_VALUE) % _numBits;
-      // NOTE: Guava bloom filter stores bits in a long array. Inside each long value, the bits are stored in the
-      //       reverse order (the first bit is stored as the right most bit of the long).
-      int longIndex = (int) (bitIndex >>> 6);
-      int bitIndexInLong = (int) (bitIndex & 0x3F);
-      int byteIndex = (longIndex << 3) | (7 - (bitIndexInLong >>> 3));
-      if ((_valueBuffer.getByte(byteIndex) & (1 << (bitIndexInLong & 7))) == 0) {
-        return false;
-      }
-      combinedHash += hash2;
-    }
-    return true;
+    return mightContain(hash1, hash2);
   }
 
   @Override
diff --git a/pinot-core/src/main/java/org/apache/pinot/core/segment/index/readers/bloom/BloomFilterReaderFactory.java b/pinot-core/src/main/java/org/apache/pinot/core/segment/index/readers/bloom/BloomFilterReaderFactory.java
index 21f0636..728dffa 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/segment/index/readers/bloom/BloomFilterReaderFactory.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/segment/index/readers/bloom/BloomFilterReaderFactory.java
@@ -32,12 +32,14 @@ public class BloomFilterReaderFactory {
   private static final int VERSION_OFFSET = 4;
   private static final int HEADER_SIZE = 8;
 
-  public static BloomFilterReader getBloomFilterReader(PinotDataBuffer dataBuffer) {
+  public static BloomFilterReader getBloomFilterReader(PinotDataBuffer dataBuffer, boolean onHeap) {
     int typeValue = dataBuffer.getInt(TYPE_VALUE_OFFSET);
     int version = dataBuffer.getInt(VERSION_OFFSET);
     Preconditions.checkState(
         typeValue == OnHeapGuavaBloomFilterCreator.TYPE_VALUE && version == OnHeapGuavaBloomFilterCreator.VERSION,
         "Unsupported bloom filter type value: %s and version: %s", typeValue, version);
-    return new OffHeapGuavaBloomFilterReader(dataBuffer.view(HEADER_SIZE, dataBuffer.size()));
+    PinotDataBuffer bloomFilterDataBuffer = dataBuffer.view(HEADER_SIZE, dataBuffer.size());
+    return onHeap ? new OnHeapGuavaBloomFilterReader(bloomFilterDataBuffer)
+        : new OffHeapGuavaBloomFilterReader(bloomFilterDataBuffer);
   }
 }
diff --git a/pinot-core/src/main/java/org/apache/pinot/core/segment/index/readers/bloom/GuavaBloomFilterReaderUtils.java b/pinot-core/src/main/java/org/apache/pinot/core/segment/index/readers/bloom/GuavaBloomFilterReaderUtils.java
index 648fdff..c9feb9e 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/segment/index/readers/bloom/GuavaBloomFilterReaderUtils.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/segment/index/readers/bloom/GuavaBloomFilterReaderUtils.java
@@ -37,4 +37,29 @@ public class GuavaBloomFilterReaderUtils {
   public static byte[] hash(String value) {
     return HASH_FUNCTION.hashBytes(StringUtils.encodeUtf8(value)).asBytes();
   }
+
+  /* Cheat sheet:
+
+     m: total bits
+     n: expected insertions
+     b: m/n, bits per insertion
+     p: expected false positive probability
+     k: number of hash functions
+
+     1) Optimal k = b * ln2
+     2) p = (1 - e ^ (-kn/m)) ^ k
+     3) For optimal k: p = 2 ^ (-k) ~= 0.6185^b
+     4) For optimal k: m = -nlnp / ((ln2) ^ 2)
+
+     See http://en.wikipedia.org/wiki/Bloom_filter#Probability_of_false_positives for the formula.
+   */
+
+  /**
+   * Calculates the fpp (false positive probability) based on the given bloom filter size and number of insertions.
+   */
+  public static double computeFPP(int sizeInBytes, int numInsertions) {
+    double b = (double) sizeInBytes * Byte.SIZE / numInsertions;
+    double k = b * Math.log(2);
+    return Math.pow(2, -k);
+  }
 }
diff --git a/pinot-core/src/main/java/org/apache/pinot/core/segment/index/readers/bloom/OffHeapGuavaBloomFilterReader.java b/pinot-core/src/main/java/org/apache/pinot/core/segment/index/readers/bloom/OffHeapGuavaBloomFilterReader.java
index 56dac45..a5216e8 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/segment/index/readers/bloom/OffHeapGuavaBloomFilterReader.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/segment/index/readers/bloom/OffHeapGuavaBloomFilterReader.java
@@ -18,48 +18,20 @@
  */
 package org.apache.pinot.core.segment.index.readers.bloom;
 
-import com.google.common.base.Preconditions;
-import com.google.common.primitives.Longs;
-import org.apache.pinot.core.segment.index.readers.BloomFilterReader;
 import org.apache.pinot.core.segment.memory.PinotDataBuffer;
 
 
 /**
  * Off-heap reader for guava bloom filter.
- * <p>The behavior should be aligned with {@link com.google.common.hash.BloomFilter}.
  */
-@SuppressWarnings("UnstableApiUsage")
-public class OffHeapGuavaBloomFilterReader implements BloomFilterReader {
-  // Format of the data buffer header:
-  //   - Strategy ordinal: 1 byte
-  //   - Number of hash functions: 1 byte
-  //   - Number of long values: 4 bytes
-  private static final int STRATEGY_ORDINAL_OFFSET = 0;
-  private static final int NUM_HASH_FUNCTIONS_OFFSET = 1;
-  private static final int NUM_LONGS_OFFSET = 2;
-  private static final int HEADER_SIZE = 6;
-
-  private final int _numHashFunctions;
-  private final long _numBits;
-  private final PinotDataBuffer _valueBuffer;
+public class OffHeapGuavaBloomFilterReader extends BaseGuavaBloomFilterReader {
 
   public OffHeapGuavaBloomFilterReader(PinotDataBuffer dataBuffer) {
-    byte strategyOrdinal = dataBuffer.getByte(STRATEGY_ORDINAL_OFFSET);
-    Preconditions.checkState(strategyOrdinal == 1, "Unsupported strategy ordinal: %s", strategyOrdinal);
-    _numHashFunctions = dataBuffer.getByte(NUM_HASH_FUNCTIONS_OFFSET) & 0xFF;
-    _numBits = (long) dataBuffer.getInt(NUM_LONGS_OFFSET) * Long.SIZE;
-    _valueBuffer = dataBuffer.view(HEADER_SIZE, dataBuffer.size());
-  }
-
-  @Override
-  public boolean mightContain(String value) {
-    return mightContain(GuavaBloomFilterReaderUtils.hash(value));
+    super(dataBuffer);
   }
 
   @Override
-  public boolean mightContain(byte[] hash) {
-    long hash1 = Longs.fromBytes(hash[7], hash[6], hash[5], hash[4], hash[3], hash[2], hash[1], hash[0]);
-    long hash2 = Longs.fromBytes(hash[15], hash[14], hash[13], hash[12], hash[11], hash[10], hash[9], hash[8]);
+  public boolean mightContain(long hash1, long hash2) {
     long combinedHash = hash1;
     for (int i = 0; i < _numHashFunctions; i++) {
       long bitIndex = (combinedHash & Long.MAX_VALUE) % _numBits;
@@ -75,10 +47,4 @@ public class OffHeapGuavaBloomFilterReader implements BloomFilterReader {
     }
     return true;
   }
-
-  @Override
-  public void close() {
-    // NOTE: DO NOT close the PinotDataBuffer here because it is tracked by the caller and might be reused later. The
-    // caller is responsible of closing the PinotDataBuffer.
-  }
 }
diff --git a/pinot-core/src/main/java/org/apache/pinot/core/segment/index/readers/bloom/OnHeapGuavaBloomFilterReader.java b/pinot-core/src/main/java/org/apache/pinot/core/segment/index/readers/bloom/OnHeapGuavaBloomFilterReader.java
new file mode 100644
index 0000000..e990712
--- /dev/null
+++ b/pinot-core/src/main/java/org/apache/pinot/core/segment/index/readers/bloom/OnHeapGuavaBloomFilterReader.java
@@ -0,0 +1,54 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.core.segment.index.readers.bloom;
+
+import org.apache.pinot.core.segment.memory.PinotDataBuffer;
+
+
+/**
+ * On-heap reader for guava bloom filter.
+ */
+public class OnHeapGuavaBloomFilterReader extends BaseGuavaBloomFilterReader {
+  private final long[] _data;
+
+  public OnHeapGuavaBloomFilterReader(PinotDataBuffer dataBuffer) {
+    super(dataBuffer);
+
+    int numLongs = (int) (_numBits / Long.SIZE);
+    _data = new long[numLongs];
+    for (int i = 0; i < numLongs; i++) {
+      _data[i] = _valueBuffer.getLong(i * Long.BYTES);
+    }
+  }
+
+  @Override
+  public boolean mightContain(long hash1, long hash2) {
+    long combinedHash = hash1;
+    for (int i = 0; i < _numHashFunctions; i++) {
+      long bitIndex = (combinedHash & Long.MAX_VALUE) % _numBits;
+      // NOTE: Guava bloom filter stores bits in a long array. Inside each long value, the bits are stored in the
+      //       reverse order (the first bit is stored as the right most bit of the long).
+      if ((_data[(int) (bitIndex >>> 6)] & (1L << bitIndex)) == 0) {
+        return false;
+      }
+      combinedHash += hash2;
+    }
+    return true;
+  }
+}
diff --git a/pinot-core/src/test/java/org/apache/pinot/core/segment/index/creator/BloomFilterCreatorTest.java b/pinot-core/src/test/java/org/apache/pinot/core/segment/index/creator/BloomFilterCreatorTest.java
index 4ecc586..28e5b7f 100644
--- a/pinot-core/src/test/java/org/apache/pinot/core/segment/index/creator/BloomFilterCreatorTest.java
+++ b/pinot-core/src/test/java/org/apache/pinot/core/segment/index/creator/BloomFilterCreatorTest.java
@@ -50,7 +50,7 @@ public class BloomFilterCreatorTest {
     int cardinality = 10000;
     String columnName = "testColumn";
     try (BloomFilterCreator bloomFilterCreator = new OnHeapGuavaBloomFilterCreator(TEMP_DIR, columnName, cardinality,
-        new BloomFilterConfig(BloomFilterConfig.DEFAULT_FPP))) {
+        new BloomFilterConfig(BloomFilterConfig.DEFAULT_FPP, 0, false))) {
       for (int i = 0; i < 5; i++) {
         bloomFilterCreator.add(Integer.toString(i));
       }
@@ -60,12 +60,15 @@ public class BloomFilterCreatorTest {
     // Read the bloom filter
     File bloomFilterFile = new File(TEMP_DIR, columnName + V1Constants.Indexes.BLOOM_FILTER_FILE_EXTENSION);
     try (PinotDataBuffer dataBuffer = PinotDataBuffer.mapReadOnlyBigEndianFile(bloomFilterFile);
-        BloomFilterReader bloomFilterReader = BloomFilterReaderFactory.getBloomFilterReader(dataBuffer)) {
+        BloomFilterReader onHeapBloomFilter = BloomFilterReaderFactory.getBloomFilterReader(dataBuffer, true);
+        BloomFilterReader offHeapBloomFilter = BloomFilterReaderFactory.getBloomFilterReader(dataBuffer, false);) {
       for (int i = 0; i < 5; i++) {
-        Assert.assertTrue(bloomFilterReader.mightContain(Integer.toString(i)));
+        Assert.assertTrue(onHeapBloomFilter.mightContain(Integer.toString(i)));
+        Assert.assertTrue(offHeapBloomFilter.mightContain(Integer.toString(i)));
       }
       for (int i = 5; i < 10; i++) {
-        Assert.assertFalse(bloomFilterReader.mightContain(Integer.toString(i)));
+        Assert.assertFalse(onHeapBloomFilter.mightContain(Integer.toString(i)));
+        Assert.assertFalse(offHeapBloomFilter.mightContain(Integer.toString(i)));
       }
     }
   }
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/config/table/BloomFilterConfig.java b/pinot-spi/src/main/java/org/apache/pinot/spi/config/table/BloomFilterConfig.java
index d488ece..22af21b 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/config/table/BloomFilterConfig.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/config/table/BloomFilterConfig.java
@@ -28,14 +28,32 @@ public class BloomFilterConfig extends BaseJsonConfig {
   public static final double DEFAULT_FPP = 0.05;
 
   private final double _fpp;
+  private final int _maxSizeInBytes;
+  private final boolean _loadOnHeap;
 
   @JsonCreator
-  public BloomFilterConfig(@JsonProperty(value = "fpp", required = true) double fpp) {
-    Preconditions.checkArgument(fpp > 0.0 && fpp < 1.0, "Invalid fpp (false positive probability): %s", fpp);
-    _fpp = fpp;
+  public BloomFilterConfig(@JsonProperty(value = "fpp") double fpp,
+      @JsonProperty(value = "maxSizeInBytes") int maxSizeInBytes,
+      @JsonProperty(value = "loadOnHeap") boolean loadOnHeap) {
+    if (fpp != 0.0) {
+      Preconditions.checkArgument(fpp > 0.0 && fpp < 1.0, "Invalid fpp (false positive probability): %s", fpp);
+      _fpp = fpp;
+    } else {
+      _fpp = DEFAULT_FPP;
+    }
+    _maxSizeInBytes = maxSizeInBytes;
+    _loadOnHeap = loadOnHeap;
   }
 
   public double getFpp() {
     return _fpp;
   }
+
+  public int getMaxSizeInBytes() {
+    return _maxSizeInBytes;
+  }
+
+  public boolean isLoadOnHeap() {
+    return _loadOnHeap;
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org