You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@parquet.apache.org by ga...@apache.org on 2020/03/31 07:27:18 UTC
[parquet-mr] branch master updated: PARQUET-1743: Add equals API to
BloomFilter interface (#773)
This is an automated email from the ASF dual-hosted git repository.
gabor pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/parquet-mr.git
The following commit(s) were added to refs/heads/master by this push:
new 5d2bf27 PARQUET-1743: Add equals API to BloomFilter interface (#773)
5d2bf27 is described below
commit 5d2bf2789041fc8ff6de48a590775aafe4457db3
Author: Walid Gara <20...@users.noreply.github.com>
AuthorDate: Tue Mar 31 09:27:11 2020 +0200
PARQUET-1743: Add equals API to BloomFilter interface (#773)
---
.../values/bloomfilter/BlockSplitBloomFilter.java | 15 +++++++++++++++
.../column/values/bloomfilter/BloomFilter.java | 8 ++++++++
.../bloomfilter/TestBlockSplitBloomFilter.java | 20 ++++++++++++++++++++
3 files changed, 43 insertions(+)
diff --git a/parquet-column/src/main/java/org/apache/parquet/column/values/bloomfilter/BlockSplitBloomFilter.java b/parquet-column/src/main/java/org/apache/parquet/column/values/bloomfilter/BlockSplitBloomFilter.java
index a74c426..7a9b7a9 100644
--- a/parquet-column/src/main/java/org/apache/parquet/column/values/bloomfilter/BlockSplitBloomFilter.java
+++ b/parquet-column/src/main/java/org/apache/parquet/column/values/bloomfilter/BlockSplitBloomFilter.java
@@ -27,6 +27,7 @@ import java.io.OutputStream;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.IntBuffer;
+import java.util.Arrays;
/*
* This Bloom filter is implemented using block-based Bloom filter algorithm from Putze et al.'s
@@ -329,6 +330,20 @@ public class BlockSplitBloomFilter implements BloomFilter {
}
@Override
+ public boolean equals(Object object) {
+ if (object == this) {
+ return true;
+ }
+ if (object instanceof BlockSplitBloomFilter) {
+ BlockSplitBloomFilter that = (BlockSplitBloomFilter) object;
+ return Arrays.equals(this.bitset, that.bitset)
+ && this.getAlgorithm() == that.getAlgorithm()
+ && this.hashStrategy == that.hashStrategy;
+ }
+ return false;
+ }
+
+ @Override
public HashStrategy getHashStrategy() {
return HashStrategy.XXH64;
}
diff --git a/parquet-column/src/main/java/org/apache/parquet/column/values/bloomfilter/BloomFilter.java b/parquet-column/src/main/java/org/apache/parquet/column/values/bloomfilter/BloomFilter.java
index 27926e0..19a3de6 100644
--- a/parquet-column/src/main/java/org/apache/parquet/column/values/bloomfilter/BloomFilter.java
+++ b/parquet-column/src/main/java/org/apache/parquet/column/values/bloomfilter/BloomFilter.java
@@ -97,6 +97,14 @@ public interface BloomFilter {
int getBitsetSize();
/**
+ * Compare this Bloom filter to the specified object.
+ *
+ * @param object
+ * @return true if the given object represents a Bloom filter equivalent to this Bloom filter, false otherwise.
+ */
+ boolean equals(Object object);
+
+ /**
* Compute hash for int value by using its plain encoding result.
*
* @param value the value to hash
diff --git a/parquet-column/src/test/java/org/apache/parquet/column/values/bloomfilter/TestBlockSplitBloomFilter.java b/parquet-column/src/test/java/org/apache/parquet/column/values/bloomfilter/TestBlockSplitBloomFilter.java
index 9d2aacc..6adabe7 100644
--- a/parquet-column/src/test/java/org/apache/parquet/column/values/bloomfilter/TestBlockSplitBloomFilter.java
+++ b/parquet-column/src/test/java/org/apache/parquet/column/values/bloomfilter/TestBlockSplitBloomFilter.java
@@ -31,6 +31,7 @@ import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TemporaryFolder;
import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotEquals;
import static org.junit.Assert.assertTrue;
public class TestBlockSplitBloomFilter {
@@ -144,6 +145,25 @@ public class TestBlockSplitBloomFilter {
}
@Test
+ public void testEquals() {
+ final String[] words = {"hello", "parquet", "bloom", "filter"};
+ BloomFilter bloomFilterOne = new BlockSplitBloomFilter(1024);
+ BloomFilter bloomFilterTwo = new BlockSplitBloomFilter(1024);
+
+ for (String word : words) {
+ bloomFilterOne.insertHash(bloomFilterOne.hash(Binary.fromString(word)));
+ bloomFilterTwo.insertHash(bloomFilterTwo.hash(Binary.fromString(word)));
+ }
+
+ assertEquals(bloomFilterOne, bloomFilterTwo);
+
+ BloomFilter bloomFilterThree = new BlockSplitBloomFilter(1024);
+ bloomFilterThree.insertHash(bloomFilterThree.hash(Binary.fromString("parquet")));
+
+ assertNotEquals(bloomFilterTwo, bloomFilterThree);
+ }
+
+ @Test
public void testBloomFilterNDVs(){
// a row group of 128M with one column of long type.
int ndv = 128 * 1024 * 1024 / 8;