You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by ja...@apache.org on 2020/10/15 17:36:36 UTC
[incubator-pinot] branch master updated: Add
OnHeapGuavaBloomFilterReader (#6147)
This is an automated email from the ASF dual-hosted git repository.
jackie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git
The following commit(s) were added to refs/heads/master by this push:
new a71b960 Add OnHeapGuavaBloomFilterReader (#6147)
a71b960 is described below
commit a71b96094dc58340274d11dad76b2bc58c7e09ff
Author: Xiaotian (Jackie) Jiang <17...@users.noreply.github.com>
AuthorDate: Thu Oct 15 10:36:20 2020 -0700
Add OnHeapGuavaBloomFilterReader (#6147)
Add the on-heap version of the guava bloom filter reader
Add 2 new fields into the `BloomFilterConfig`:
- maxSizeInBytes: if configured, limit the max size of the bloom filter (will use a larger fpp if the configured fpp hits the limit)
- loadOnHeap: load the bloom filter on-heap or off-heap
---
.../impl/bloom/OnHeapGuavaBloomFilterCreator.java | 16 ++++++-
.../index/column/PhysicalColumnIndexContainer.java | 23 ++++-----
.../segment/index/loader/IndexLoadingConfig.java | 2 +-
.../segment/index/readers/BloomFilterReader.java | 2 +-
...Reader.java => BaseGuavaBloomFilterReader.java} | 36 ++++-----------
.../readers/bloom/BloomFilterReaderFactory.java | 6 ++-
.../readers/bloom/GuavaBloomFilterReaderUtils.java | 25 ++++++++++
.../bloom/OffHeapGuavaBloomFilterReader.java | 40 ++--------------
.../bloom/OnHeapGuavaBloomFilterReader.java | 54 ++++++++++++++++++++++
.../index/creator/BloomFilterCreatorTest.java | 11 +++--
.../pinot/spi/config/table/BloomFilterConfig.java | 24 ++++++++--
11 files changed, 148 insertions(+), 91 deletions(-)
diff --git a/pinot-core/src/main/java/org/apache/pinot/core/segment/creator/impl/bloom/OnHeapGuavaBloomFilterCreator.java b/pinot-core/src/main/java/org/apache/pinot/core/segment/creator/impl/bloom/OnHeapGuavaBloomFilterCreator.java
index 1fb9b55..2ca942a 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/segment/creator/impl/bloom/OnHeapGuavaBloomFilterCreator.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/segment/creator/impl/bloom/OnHeapGuavaBloomFilterCreator.java
@@ -27,7 +27,10 @@ import java.io.IOException;
import java.nio.charset.StandardCharsets;
import org.apache.pinot.core.segment.creator.BloomFilterCreator;
import org.apache.pinot.core.segment.creator.impl.V1Constants;
+import org.apache.pinot.core.segment.index.readers.bloom.GuavaBloomFilterReaderUtils;
import org.apache.pinot.spi.config.table.BloomFilterConfig;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
/**
@@ -37,6 +40,8 @@ import org.apache.pinot.spi.config.table.BloomFilterConfig;
*/
@SuppressWarnings("UnstableApiUsage")
public class OnHeapGuavaBloomFilterCreator implements BloomFilterCreator {
+ private static final Logger LOGGER = LoggerFactory.getLogger(OnHeapGuavaBloomFilterCreator.class);
+
public static final int TYPE_VALUE = 1;
public static final int VERSION = 1;
@@ -46,8 +51,15 @@ public class OnHeapGuavaBloomFilterCreator implements BloomFilterCreator {
public OnHeapGuavaBloomFilterCreator(File indexDir, String columnName, int cardinality,
BloomFilterConfig bloomFilterConfig) {
_bloomFilterFile = new File(indexDir, columnName + V1Constants.Indexes.BLOOM_FILTER_FILE_EXTENSION);
- _bloomFilter =
- BloomFilter.create(Funnels.stringFunnel(StandardCharsets.UTF_8), cardinality, bloomFilterConfig.getFpp());
+ // Calculate the actual fpp with regards to the max size for the bloom filter
+ double fpp = bloomFilterConfig.getFpp();
+ int maxSizeInBytes = bloomFilterConfig.getMaxSizeInBytes();
+ if (maxSizeInBytes > 0) {
+ double minFpp = GuavaBloomFilterReaderUtils.computeFPP(maxSizeInBytes, cardinality);
+ fpp = Math.max(fpp, minFpp);
+ }
+ LOGGER.info("Creating bloom filter with cardinality: {}, fpp: {}", cardinality, fpp);
+ _bloomFilter = BloomFilter.create(Funnels.stringFunnel(StandardCharsets.UTF_8), cardinality, fpp);
}
@Override
diff --git a/pinot-core/src/main/java/org/apache/pinot/core/segment/index/column/PhysicalColumnIndexContainer.java b/pinot-core/src/main/java/org/apache/pinot/core/segment/index/column/PhysicalColumnIndexContainer.java
index 0573afe..595a8e0 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/segment/index/column/PhysicalColumnIndexContainer.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/segment/index/column/PhysicalColumnIndexContainer.java
@@ -54,6 +54,7 @@ import org.apache.pinot.core.segment.index.readers.text.LuceneTextIndexReader;
import org.apache.pinot.core.segment.memory.PinotDataBuffer;
import org.apache.pinot.core.segment.store.ColumnIndexType;
import org.apache.pinot.core.segment.store.SegmentDirectory;
+import org.apache.pinot.spi.config.table.BloomFilterConfig;
import org.apache.pinot.spi.data.FieldSpec;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -74,18 +75,11 @@ public final class PhysicalColumnIndexContainer implements ColumnIndexContainer
IndexLoadingConfig indexLoadingConfig, File segmentIndexDir)
throws IOException {
String columnName = metadata.getColumnName();
- boolean loadInvertedIndex = false;
- boolean loadRangeIndex = false;
- boolean loadTextIndex = false;
- boolean loadOnHeapDictionary = false;
- boolean loadBloomFilter = false;
- if (indexLoadingConfig != null) {
- loadInvertedIndex = indexLoadingConfig.getInvertedIndexColumns().contains(columnName);
- loadRangeIndex = indexLoadingConfig.getRangeIndexColumns().contains(columnName);
- loadOnHeapDictionary = indexLoadingConfig.getOnHeapDictionaryColumns().contains(columnName);
- loadBloomFilter = indexLoadingConfig.getBloomFilterConfigs().containsKey(columnName);
- loadTextIndex = indexLoadingConfig.getTextIndexColumns().contains(columnName);
- }
+ boolean loadInvertedIndex = indexLoadingConfig.getInvertedIndexColumns().contains(columnName);
+ boolean loadRangeIndex = indexLoadingConfig.getRangeIndexColumns().contains(columnName);
+ boolean loadTextIndex = indexLoadingConfig.getTextIndexColumns().contains(columnName);
+ boolean loadOnHeapDictionary = indexLoadingConfig.getOnHeapDictionaryColumns().contains(columnName);
+ BloomFilterConfig bloomFilterConfig = indexLoadingConfig.getBloomFilterConfigs().get(columnName);
if (segmentReader.hasIndexFor(columnName, ColumnIndexType.NULLVALUE_VECTOR)) {
PinotDataBuffer nullValueVectorBuffer = segmentReader.getIndexFor(columnName, ColumnIndexType.NULLVALUE_VECTOR);
@@ -107,9 +101,10 @@ public final class PhysicalColumnIndexContainer implements ColumnIndexContainer
if (metadata.hasDictionary()) {
//bloom filter
- if (loadBloomFilter) {
+ if (bloomFilterConfig != null) {
PinotDataBuffer bloomFilterBuffer = segmentReader.getIndexFor(columnName, ColumnIndexType.BLOOM_FILTER);
- _bloomFilter = BloomFilterReaderFactory.getBloomFilterReader(bloomFilterBuffer);
+ _bloomFilter =
+ BloomFilterReaderFactory.getBloomFilterReader(bloomFilterBuffer, bloomFilterConfig.isLoadOnHeap());
} else {
_bloomFilter = null;
}
diff --git a/pinot-core/src/main/java/org/apache/pinot/core/segment/index/loader/IndexLoadingConfig.java b/pinot-core/src/main/java/org/apache/pinot/core/segment/index/loader/IndexLoadingConfig.java
index 798ce61..a6817a0 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/segment/index/loader/IndexLoadingConfig.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/segment/index/loader/IndexLoadingConfig.java
@@ -100,7 +100,7 @@ public class IndexLoadingConfig {
List<String> bloomFilterColumns = indexingConfig.getBloomFilterColumns();
if (bloomFilterColumns != null) {
for (String bloomFilterColumn : bloomFilterColumns) {
- _bloomFilterConfigs.put(bloomFilterColumn, new BloomFilterConfig(BloomFilterConfig.DEFAULT_FPP));
+ _bloomFilterConfigs.put(bloomFilterColumn, new BloomFilterConfig(BloomFilterConfig.DEFAULT_FPP, 0, false));
}
}
Map<String, BloomFilterConfig> bloomFilterConfigs = indexingConfig.getBloomFilterConfigs();
diff --git a/pinot-core/src/main/java/org/apache/pinot/core/segment/index/readers/BloomFilterReader.java b/pinot-core/src/main/java/org/apache/pinot/core/segment/index/readers/BloomFilterReader.java
index 09a1dcf..2a16206 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/segment/index/readers/BloomFilterReader.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/segment/index/readers/BloomFilterReader.java
@@ -36,5 +36,5 @@ public interface BloomFilterReader extends Closeable {
* otherwise.
* <p>This method is provided to prevent hashing the same value multiple times.
*/
- boolean mightContain(byte[] hash);
+ boolean mightContain(long hash1, long hash2);
}
diff --git a/pinot-core/src/main/java/org/apache/pinot/core/segment/index/readers/bloom/OffHeapGuavaBloomFilterReader.java b/pinot-core/src/main/java/org/apache/pinot/core/segment/index/readers/bloom/BaseGuavaBloomFilterReader.java
similarity index 65%
copy from pinot-core/src/main/java/org/apache/pinot/core/segment/index/readers/bloom/OffHeapGuavaBloomFilterReader.java
copy to pinot-core/src/main/java/org/apache/pinot/core/segment/index/readers/bloom/BaseGuavaBloomFilterReader.java
index 56dac45..0c6f5d4 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/segment/index/readers/bloom/OffHeapGuavaBloomFilterReader.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/segment/index/readers/bloom/BaseGuavaBloomFilterReader.java
@@ -25,11 +25,10 @@ import org.apache.pinot.core.segment.memory.PinotDataBuffer;
/**
- * Off-heap reader for guava bloom filter.
- * <p>The behavior should be aligned with {@link com.google.common.hash.BloomFilter}.
+ * Base implementation of the reader for guava bloom filter.
+ * <p>The format of the data should be aligned with the guava bloom filter.
*/
-@SuppressWarnings("UnstableApiUsage")
-public class OffHeapGuavaBloomFilterReader implements BloomFilterReader {
+public abstract class BaseGuavaBloomFilterReader implements BloomFilterReader {
// Format of the data buffer header:
// - Strategy ordinal: 1 byte
// - Number of hash functions: 1 byte
@@ -39,11 +38,11 @@ public class OffHeapGuavaBloomFilterReader implements BloomFilterReader {
private static final int NUM_LONGS_OFFSET = 2;
private static final int HEADER_SIZE = 6;
- private final int _numHashFunctions;
- private final long _numBits;
- private final PinotDataBuffer _valueBuffer;
+ protected final int _numHashFunctions;
+ protected final long _numBits;
+ protected final PinotDataBuffer _valueBuffer;
- public OffHeapGuavaBloomFilterReader(PinotDataBuffer dataBuffer) {
+ public BaseGuavaBloomFilterReader(PinotDataBuffer dataBuffer) {
byte strategyOrdinal = dataBuffer.getByte(STRATEGY_ORDINAL_OFFSET);
Preconditions.checkState(strategyOrdinal == 1, "Unsupported strategy ordinal: %s", strategyOrdinal);
_numHashFunctions = dataBuffer.getByte(NUM_HASH_FUNCTIONS_OFFSET) & 0xFF;
@@ -53,27 +52,10 @@ public class OffHeapGuavaBloomFilterReader implements BloomFilterReader {
@Override
public boolean mightContain(String value) {
- return mightContain(GuavaBloomFilterReaderUtils.hash(value));
- }
-
- @Override
- public boolean mightContain(byte[] hash) {
+ byte[] hash = GuavaBloomFilterReaderUtils.hash(value);
long hash1 = Longs.fromBytes(hash[7], hash[6], hash[5], hash[4], hash[3], hash[2], hash[1], hash[0]);
long hash2 = Longs.fromBytes(hash[15], hash[14], hash[13], hash[12], hash[11], hash[10], hash[9], hash[8]);
- long combinedHash = hash1;
- for (int i = 0; i < _numHashFunctions; i++) {
- long bitIndex = (combinedHash & Long.MAX_VALUE) % _numBits;
- // NOTE: Guava bloom filter stores bits in a long array. Inside each long value, the bits are stored in the
- // reverse order (the first bit is stored as the right most bit of the long).
- int longIndex = (int) (bitIndex >>> 6);
- int bitIndexInLong = (int) (bitIndex & 0x3F);
- int byteIndex = (longIndex << 3) | (7 - (bitIndexInLong >>> 3));
- if ((_valueBuffer.getByte(byteIndex) & (1 << (bitIndexInLong & 7))) == 0) {
- return false;
- }
- combinedHash += hash2;
- }
- return true;
+ return mightContain(hash1, hash2);
}
@Override
diff --git a/pinot-core/src/main/java/org/apache/pinot/core/segment/index/readers/bloom/BloomFilterReaderFactory.java b/pinot-core/src/main/java/org/apache/pinot/core/segment/index/readers/bloom/BloomFilterReaderFactory.java
index 21f0636..728dffa 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/segment/index/readers/bloom/BloomFilterReaderFactory.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/segment/index/readers/bloom/BloomFilterReaderFactory.java
@@ -32,12 +32,14 @@ public class BloomFilterReaderFactory {
private static final int VERSION_OFFSET = 4;
private static final int HEADER_SIZE = 8;
- public static BloomFilterReader getBloomFilterReader(PinotDataBuffer dataBuffer) {
+ public static BloomFilterReader getBloomFilterReader(PinotDataBuffer dataBuffer, boolean onHeap) {
int typeValue = dataBuffer.getInt(TYPE_VALUE_OFFSET);
int version = dataBuffer.getInt(VERSION_OFFSET);
Preconditions.checkState(
typeValue == OnHeapGuavaBloomFilterCreator.TYPE_VALUE && version == OnHeapGuavaBloomFilterCreator.VERSION,
"Unsupported bloom filter type value: %s and version: %s", typeValue, version);
- return new OffHeapGuavaBloomFilterReader(dataBuffer.view(HEADER_SIZE, dataBuffer.size()));
+ PinotDataBuffer bloomFilterDataBuffer = dataBuffer.view(HEADER_SIZE, dataBuffer.size());
+ return onHeap ? new OnHeapGuavaBloomFilterReader(bloomFilterDataBuffer)
+ : new OffHeapGuavaBloomFilterReader(bloomFilterDataBuffer);
}
}
diff --git a/pinot-core/src/main/java/org/apache/pinot/core/segment/index/readers/bloom/GuavaBloomFilterReaderUtils.java b/pinot-core/src/main/java/org/apache/pinot/core/segment/index/readers/bloom/GuavaBloomFilterReaderUtils.java
index 648fdff..c9feb9e 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/segment/index/readers/bloom/GuavaBloomFilterReaderUtils.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/segment/index/readers/bloom/GuavaBloomFilterReaderUtils.java
@@ -37,4 +37,29 @@ public class GuavaBloomFilterReaderUtils {
public static byte[] hash(String value) {
return HASH_FUNCTION.hashBytes(StringUtils.encodeUtf8(value)).asBytes();
}
+
+ /* Cheat sheet:
+
+ m: total bits
+ n: expected insertions
+ b: m/n, bits per insertion
+ p: expected false positive probability
+ k: number of hash functions
+
+ 1) Optimal k = b * ln2
+ 2) p = (1 - e ^ (-kn/m)) ^ k
+ 3) For optimal k: p = 2 ^ (-k) ~= 0.6185^b
+ 4) For optimal k: m = -nlnp / ((ln2) ^ 2)
+
+ See http://en.wikipedia.org/wiki/Bloom_filter#Probability_of_false_positives for the formula.
+ */
+
+ /**
+ * Calculates the fpp (false positive probability) based on the given bloom filter size and number of insertions.
+ */
+ public static double computeFPP(int sizeInBytes, int numInsertions) {
+ double b = (double) sizeInBytes * Byte.SIZE / numInsertions;
+ double k = b * Math.log(2);
+ return Math.pow(2, -k);
+ }
}
diff --git a/pinot-core/src/main/java/org/apache/pinot/core/segment/index/readers/bloom/OffHeapGuavaBloomFilterReader.java b/pinot-core/src/main/java/org/apache/pinot/core/segment/index/readers/bloom/OffHeapGuavaBloomFilterReader.java
index 56dac45..a5216e8 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/segment/index/readers/bloom/OffHeapGuavaBloomFilterReader.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/segment/index/readers/bloom/OffHeapGuavaBloomFilterReader.java
@@ -18,48 +18,20 @@
*/
package org.apache.pinot.core.segment.index.readers.bloom;
-import com.google.common.base.Preconditions;
-import com.google.common.primitives.Longs;
-import org.apache.pinot.core.segment.index.readers.BloomFilterReader;
import org.apache.pinot.core.segment.memory.PinotDataBuffer;
/**
* Off-heap reader for guava bloom filter.
- * <p>The behavior should be aligned with {@link com.google.common.hash.BloomFilter}.
*/
-@SuppressWarnings("UnstableApiUsage")
-public class OffHeapGuavaBloomFilterReader implements BloomFilterReader {
- // Format of the data buffer header:
- // - Strategy ordinal: 1 byte
- // - Number of hash functions: 1 byte
- // - Number of long values: 4 bytes
- private static final int STRATEGY_ORDINAL_OFFSET = 0;
- private static final int NUM_HASH_FUNCTIONS_OFFSET = 1;
- private static final int NUM_LONGS_OFFSET = 2;
- private static final int HEADER_SIZE = 6;
-
- private final int _numHashFunctions;
- private final long _numBits;
- private final PinotDataBuffer _valueBuffer;
+public class OffHeapGuavaBloomFilterReader extends BaseGuavaBloomFilterReader {
public OffHeapGuavaBloomFilterReader(PinotDataBuffer dataBuffer) {
- byte strategyOrdinal = dataBuffer.getByte(STRATEGY_ORDINAL_OFFSET);
- Preconditions.checkState(strategyOrdinal == 1, "Unsupported strategy ordinal: %s", strategyOrdinal);
- _numHashFunctions = dataBuffer.getByte(NUM_HASH_FUNCTIONS_OFFSET) & 0xFF;
- _numBits = (long) dataBuffer.getInt(NUM_LONGS_OFFSET) * Long.SIZE;
- _valueBuffer = dataBuffer.view(HEADER_SIZE, dataBuffer.size());
- }
-
- @Override
- public boolean mightContain(String value) {
- return mightContain(GuavaBloomFilterReaderUtils.hash(value));
+ super(dataBuffer);
}
@Override
- public boolean mightContain(byte[] hash) {
- long hash1 = Longs.fromBytes(hash[7], hash[6], hash[5], hash[4], hash[3], hash[2], hash[1], hash[0]);
- long hash2 = Longs.fromBytes(hash[15], hash[14], hash[13], hash[12], hash[11], hash[10], hash[9], hash[8]);
+ public boolean mightContain(long hash1, long hash2) {
long combinedHash = hash1;
for (int i = 0; i < _numHashFunctions; i++) {
long bitIndex = (combinedHash & Long.MAX_VALUE) % _numBits;
@@ -75,10 +47,4 @@ public class OffHeapGuavaBloomFilterReader implements BloomFilterReader {
}
return true;
}
-
- @Override
- public void close() {
- // NOTE: DO NOT close the PinotDataBuffer here because it is tracked by the caller and might be reused later. The
- // caller is responsible of closing the PinotDataBuffer.
- }
}
diff --git a/pinot-core/src/main/java/org/apache/pinot/core/segment/index/readers/bloom/OnHeapGuavaBloomFilterReader.java b/pinot-core/src/main/java/org/apache/pinot/core/segment/index/readers/bloom/OnHeapGuavaBloomFilterReader.java
new file mode 100644
index 0000000..e990712
--- /dev/null
+++ b/pinot-core/src/main/java/org/apache/pinot/core/segment/index/readers/bloom/OnHeapGuavaBloomFilterReader.java
@@ -0,0 +1,54 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.core.segment.index.readers.bloom;
+
+import org.apache.pinot.core.segment.memory.PinotDataBuffer;
+
+
+/**
+ * On-heap reader for guava bloom filter.
+ */
+public class OnHeapGuavaBloomFilterReader extends BaseGuavaBloomFilterReader {
+ private final long[] _data;
+
+ public OnHeapGuavaBloomFilterReader(PinotDataBuffer dataBuffer) {
+ super(dataBuffer);
+
+ int numLongs = (int) (_numBits / Long.SIZE);
+ _data = new long[numLongs];
+ for (int i = 0; i < numLongs; i++) {
+ _data[i] = _valueBuffer.getLong(i * Long.BYTES);
+ }
+ }
+
+ @Override
+ public boolean mightContain(long hash1, long hash2) {
+ long combinedHash = hash1;
+ for (int i = 0; i < _numHashFunctions; i++) {
+ long bitIndex = (combinedHash & Long.MAX_VALUE) % _numBits;
+ // NOTE: Guava bloom filter stores bits in a long array. Inside each long value, the bits are stored in the
+ // reverse order (the first bit is stored as the right most bit of the long).
+ if ((_data[(int) (bitIndex >>> 6)] & (1L << bitIndex)) == 0) {
+ return false;
+ }
+ combinedHash += hash2;
+ }
+ return true;
+ }
+}
diff --git a/pinot-core/src/test/java/org/apache/pinot/core/segment/index/creator/BloomFilterCreatorTest.java b/pinot-core/src/test/java/org/apache/pinot/core/segment/index/creator/BloomFilterCreatorTest.java
index 4ecc586..28e5b7f 100644
--- a/pinot-core/src/test/java/org/apache/pinot/core/segment/index/creator/BloomFilterCreatorTest.java
+++ b/pinot-core/src/test/java/org/apache/pinot/core/segment/index/creator/BloomFilterCreatorTest.java
@@ -50,7 +50,7 @@ public class BloomFilterCreatorTest {
int cardinality = 10000;
String columnName = "testColumn";
try (BloomFilterCreator bloomFilterCreator = new OnHeapGuavaBloomFilterCreator(TEMP_DIR, columnName, cardinality,
- new BloomFilterConfig(BloomFilterConfig.DEFAULT_FPP))) {
+ new BloomFilterConfig(BloomFilterConfig.DEFAULT_FPP, 0, false))) {
for (int i = 0; i < 5; i++) {
bloomFilterCreator.add(Integer.toString(i));
}
@@ -60,12 +60,15 @@ public class BloomFilterCreatorTest {
// Read the bloom filter
File bloomFilterFile = new File(TEMP_DIR, columnName + V1Constants.Indexes.BLOOM_FILTER_FILE_EXTENSION);
try (PinotDataBuffer dataBuffer = PinotDataBuffer.mapReadOnlyBigEndianFile(bloomFilterFile);
- BloomFilterReader bloomFilterReader = BloomFilterReaderFactory.getBloomFilterReader(dataBuffer)) {
+ BloomFilterReader onHeapBloomFilter = BloomFilterReaderFactory.getBloomFilterReader(dataBuffer, true);
+ BloomFilterReader offHeapBloomFilter = BloomFilterReaderFactory.getBloomFilterReader(dataBuffer, false);) {
for (int i = 0; i < 5; i++) {
- Assert.assertTrue(bloomFilterReader.mightContain(Integer.toString(i)));
+ Assert.assertTrue(onHeapBloomFilter.mightContain(Integer.toString(i)));
+ Assert.assertTrue(offHeapBloomFilter.mightContain(Integer.toString(i)));
}
for (int i = 5; i < 10; i++) {
- Assert.assertFalse(bloomFilterReader.mightContain(Integer.toString(i)));
+ Assert.assertFalse(onHeapBloomFilter.mightContain(Integer.toString(i)));
+ Assert.assertFalse(offHeapBloomFilter.mightContain(Integer.toString(i)));
}
}
}
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/config/table/BloomFilterConfig.java b/pinot-spi/src/main/java/org/apache/pinot/spi/config/table/BloomFilterConfig.java
index d488ece..22af21b 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/config/table/BloomFilterConfig.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/config/table/BloomFilterConfig.java
@@ -28,14 +28,32 @@ public class BloomFilterConfig extends BaseJsonConfig {
public static final double DEFAULT_FPP = 0.05;
private final double _fpp;
+ private final int _maxSizeInBytes;
+ private final boolean _loadOnHeap;
@JsonCreator
- public BloomFilterConfig(@JsonProperty(value = "fpp", required = true) double fpp) {
- Preconditions.checkArgument(fpp > 0.0 && fpp < 1.0, "Invalid fpp (false positive probability): %s", fpp);
- _fpp = fpp;
+ public BloomFilterConfig(@JsonProperty(value = "fpp") double fpp,
+ @JsonProperty(value = "maxSizeInBytes") int maxSizeInBytes,
+ @JsonProperty(value = "loadOnHeap") boolean loadOnHeap) {
+ if (fpp != 0.0) {
+ Preconditions.checkArgument(fpp > 0.0 && fpp < 1.0, "Invalid fpp (false positive probability): %s", fpp);
+ _fpp = fpp;
+ } else {
+ _fpp = DEFAULT_FPP;
+ }
+ _maxSizeInBytes = maxSizeInBytes;
+ _loadOnHeap = loadOnHeap;
}
public double getFpp() {
return _fpp;
}
+
+ public int getMaxSizeInBytes() {
+ return _maxSizeInBytes;
+ }
+
+ public boolean isLoadOnHeap() {
+ return _loadOnHeap;
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org