You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@parquet.apache.org by ga...@apache.org on 2020/03/31 07:27:18 UTC

[parquet-mr] branch master updated: PARQUET-1743: Add equals API to BloomFilter interface (#773)

This is an automated email from the ASF dual-hosted git repository.

gabor pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/parquet-mr.git


The following commit(s) were added to refs/heads/master by this push:
     new 5d2bf27  PARQUET-1743: Add equals API to BloomFilter interface (#773)
5d2bf27 is described below

commit 5d2bf2789041fc8ff6de48a590775aafe4457db3
Author: Walid Gara <20...@users.noreply.github.com>
AuthorDate: Tue Mar 31 09:27:11 2020 +0200

    PARQUET-1743: Add equals API to BloomFilter interface (#773)
---
 .../values/bloomfilter/BlockSplitBloomFilter.java    | 15 +++++++++++++++
 .../column/values/bloomfilter/BloomFilter.java       |  8 ++++++++
 .../bloomfilter/TestBlockSplitBloomFilter.java       | 20 ++++++++++++++++++++
 3 files changed, 43 insertions(+)

diff --git a/parquet-column/src/main/java/org/apache/parquet/column/values/bloomfilter/BlockSplitBloomFilter.java b/parquet-column/src/main/java/org/apache/parquet/column/values/bloomfilter/BlockSplitBloomFilter.java
index a74c426..7a9b7a9 100644
--- a/parquet-column/src/main/java/org/apache/parquet/column/values/bloomfilter/BlockSplitBloomFilter.java
+++ b/parquet-column/src/main/java/org/apache/parquet/column/values/bloomfilter/BlockSplitBloomFilter.java
@@ -27,6 +27,7 @@ import java.io.OutputStream;
 import java.nio.ByteBuffer;
 import java.nio.ByteOrder;
 import java.nio.IntBuffer;
+import java.util.Arrays;
 
 /*
  * This Bloom filter is implemented using block-based Bloom filter algorithm from Putze et al.'s
@@ -329,6 +330,20 @@ public class BlockSplitBloomFilter implements BloomFilter {
   }
 
   @Override
+  public boolean equals(Object object) {
+    if (object == this) {
+      return true;
+    }
+    if (object instanceof BlockSplitBloomFilter) {
+      BlockSplitBloomFilter that = (BlockSplitBloomFilter) object;
+      return Arrays.equals(this.bitset, that.bitset)
+        && this.getAlgorithm() == that.getAlgorithm()
+        && this.hashStrategy == that.hashStrategy;
+    }
+    return false;
+  }
+
+  @Override
   public HashStrategy getHashStrategy() {
     return HashStrategy.XXH64;
   }
diff --git a/parquet-column/src/main/java/org/apache/parquet/column/values/bloomfilter/BloomFilter.java b/parquet-column/src/main/java/org/apache/parquet/column/values/bloomfilter/BloomFilter.java
index 27926e0..19a3de6 100644
--- a/parquet-column/src/main/java/org/apache/parquet/column/values/bloomfilter/BloomFilter.java
+++ b/parquet-column/src/main/java/org/apache/parquet/column/values/bloomfilter/BloomFilter.java
@@ -97,6 +97,14 @@ public interface BloomFilter {
   int getBitsetSize();
 
   /**
+   * Compare this Bloom filter to the specified object.
+   *
+   * @param object
+   * @return true if the given object represents a Bloom filter equivalent to this Bloom filter, false otherwise.
+   */
+  boolean equals(Object object);
+
+  /**
    * Compute hash for int value by using its plain encoding result.
    *
    * @param value the value to hash
diff --git a/parquet-column/src/test/java/org/apache/parquet/column/values/bloomfilter/TestBlockSplitBloomFilter.java b/parquet-column/src/test/java/org/apache/parquet/column/values/bloomfilter/TestBlockSplitBloomFilter.java
index 9d2aacc..6adabe7 100644
--- a/parquet-column/src/test/java/org/apache/parquet/column/values/bloomfilter/TestBlockSplitBloomFilter.java
+++ b/parquet-column/src/test/java/org/apache/parquet/column/values/bloomfilter/TestBlockSplitBloomFilter.java
@@ -31,6 +31,7 @@ import org.junit.Rule;
 import org.junit.Test;
 import org.junit.rules.TemporaryFolder;
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotEquals;
 import static org.junit.Assert.assertTrue;
 
 public class TestBlockSplitBloomFilter {
@@ -144,6 +145,25 @@ public class TestBlockSplitBloomFilter {
   }
 
   @Test
+  public void testEquals() {
+    final String[] words = {"hello", "parquet", "bloom", "filter"};
+    BloomFilter bloomFilterOne = new BlockSplitBloomFilter(1024);
+    BloomFilter bloomFilterTwo = new BlockSplitBloomFilter(1024);
+
+    for (String word : words) {
+      bloomFilterOne.insertHash(bloomFilterOne.hash(Binary.fromString(word)));
+      bloomFilterTwo.insertHash(bloomFilterTwo.hash(Binary.fromString(word)));
+    }
+
+    assertEquals(bloomFilterOne, bloomFilterTwo);
+
+    BloomFilter bloomFilterThree = new BlockSplitBloomFilter(1024);
+    bloomFilterThree.insertHash(bloomFilterThree.hash(Binary.fromString("parquet")));
+
+    assertNotEquals(bloomFilterTwo, bloomFilterThree);
+  }
+
+  @Test
   public void testBloomFilterNDVs(){
     // a row group of 128M with one column of long type.
     int ndv = 128 * 1024 * 1024 / 8;