You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by go...@apache.org on 2018/07/31 21:10:18 UTC

hive git commit: HIVE-20101: BloomKFilter: Avoid using the local byte[] arrays entirely (Gopal V, reviewed by Prasanth Jayachandran)

Repository: hive
Updated Branches:
  refs/heads/master 3e46515d3 -> 87ce36b45


HIVE-20101: BloomKFilter: Avoid using the local byte[] arrays entirely (Gopal V, reviewed by Prasanth Jayachandran)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/87ce36b4
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/87ce36b4
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/87ce36b4

Branch: refs/heads/master
Commit: 87ce36b458350db141c4cb4b6336a9a01796370f
Parents: 3e46515
Author: Gopal V <go...@apache.org>
Authored: Tue Jul 31 14:10:06 2018 -0700
Committer: Gopal V <go...@apache.org>
Committed: Tue Jul 31 14:10:06 2018 -0700

----------------------------------------------------------------------
 .../apache/hive/common/util/BloomKFilter.java   | 30 +++-----------------
 1 file changed, 4 insertions(+), 26 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/87ce36b4/storage-api/src/java/org/apache/hive/common/util/BloomKFilter.java
----------------------------------------------------------------------
diff --git a/storage-api/src/java/org/apache/hive/common/util/BloomKFilter.java b/storage-api/src/java/org/apache/hive/common/util/BloomKFilter.java
index 5b1914d..3b44d2b 100644
--- a/storage-api/src/java/org/apache/hive/common/util/BloomKFilter.java
+++ b/storage-api/src/java/org/apache/hive/common/util/BloomKFilter.java
@@ -36,8 +36,6 @@ import java.util.Arrays;
  * This implementation has much lesser L1 data cache misses than {@link BloomFilter}.
  */
 public class BloomKFilter {
-  private final byte[] BYTE_ARRAY_4 = new byte[4];
-  private final byte[] BYTE_ARRAY_8 = new byte[8];
   public static final float DEFAULT_FPP = 0.05f;
   private static final int DEFAULT_BLOCK_SIZE = 8;
   private static final int DEFAULT_BLOCK_SIZE_BITS = (int) (Math.log(DEFAULT_BLOCK_SIZE) / Math.log(2));
@@ -149,8 +147,7 @@ public class BloomKFilter {
   }
 
   public void addInt(int val) {
-    // puts int in little endian order
-    addBytes(intToByteArrayLE(val));
+    addHash(Murmur3.hash64(val));
   }
 
 
@@ -184,6 +181,7 @@ public class BloomKFilter {
   private boolean testHash(long hash64) {
     final int hash1 = (int) hash64;
     final int hash2 = (int) (hash64 >>> 32);
+    final long[] bits = bitSet.data;
 
     int firstHash = hash1 + hash2;
     // hashcode should be positive, flip all the bits if it's negative
@@ -216,7 +214,7 @@ public class BloomKFilter {
     long expected = 0;
     for (int i = 0; i < DEFAULT_BLOCK_SIZE; i++) {
       final long mask = masks[i];
-      expected |= (bitSet.data[blockBaseOffset + i] & mask) ^ mask;
+      expected |= (bits[blockBaseOffset + i] & mask) ^ mask;
     }
 
     // clear the mask for array reuse (this is to avoid masks array allocation in inner loop)
@@ -235,7 +233,7 @@ public class BloomKFilter {
   }
 
   public boolean testInt(int val) {
-    return testBytes(intToByteArrayLE(val));
+    return testHash(Murmur3.hash64(val));
   }
 
   public boolean testLong(long val) {
@@ -250,26 +248,6 @@ public class BloomKFilter {
     return testLong(Double.doubleToLongBits(val));
   }
 
-  private byte[] intToByteArrayLE(int val) {
-    BYTE_ARRAY_4[0] = (byte) (val >> 0);
-    BYTE_ARRAY_4[1] = (byte) (val >> 8);
-    BYTE_ARRAY_4[2] = (byte) (val >> 16);
-    BYTE_ARRAY_4[3] = (byte) (val >> 24);
-    return BYTE_ARRAY_4;
-  }
-
-  private byte[] longToByteArrayLE(long val) {
-    BYTE_ARRAY_8[0] = (byte) (val >> 0);
-    BYTE_ARRAY_8[1] = (byte) (val >> 8);
-    BYTE_ARRAY_8[2] = (byte) (val >> 16);
-    BYTE_ARRAY_8[3] = (byte) (val >> 24);
-    BYTE_ARRAY_8[4] = (byte) (val >> 32);
-    BYTE_ARRAY_8[5] = (byte) (val >> 40);
-    BYTE_ARRAY_8[6] = (byte) (val >> 48);
-    BYTE_ARRAY_8[7] = (byte) (val >> 56);
-    return BYTE_ARRAY_8;
-  }
-
   public long sizeInBytes() {
     return getBitSize() / 8;
   }