You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by go...@apache.org on 2018/07/31 21:10:18 UTC
hive git commit: HIVE-20101: BloomKFilter: Avoid using the local
byte[] arrays entirely (Gopal V, reviewed by Prasanth Jayachandran)
Repository: hive
Updated Branches:
refs/heads/master 3e46515d3 -> 87ce36b45
HIVE-20101: BloomKFilter: Avoid using the local byte[] arrays entirely (Gopal V, reviewed by Prasanth Jayachandran)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/87ce36b4
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/87ce36b4
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/87ce36b4
Branch: refs/heads/master
Commit: 87ce36b458350db141c4cb4b6336a9a01796370f
Parents: 3e46515
Author: Gopal V <go...@apache.org>
Authored: Tue Jul 31 14:10:06 2018 -0700
Committer: Gopal V <go...@apache.org>
Committed: Tue Jul 31 14:10:06 2018 -0700
----------------------------------------------------------------------
.../apache/hive/common/util/BloomKFilter.java | 30 +++-----------------
1 file changed, 4 insertions(+), 26 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/87ce36b4/storage-api/src/java/org/apache/hive/common/util/BloomKFilter.java
----------------------------------------------------------------------
diff --git a/storage-api/src/java/org/apache/hive/common/util/BloomKFilter.java b/storage-api/src/java/org/apache/hive/common/util/BloomKFilter.java
index 5b1914d..3b44d2b 100644
--- a/storage-api/src/java/org/apache/hive/common/util/BloomKFilter.java
+++ b/storage-api/src/java/org/apache/hive/common/util/BloomKFilter.java
@@ -36,8 +36,6 @@ import java.util.Arrays;
* This implementation has much lesser L1 data cache misses than {@link BloomFilter}.
*/
public class BloomKFilter {
- private final byte[] BYTE_ARRAY_4 = new byte[4];
- private final byte[] BYTE_ARRAY_8 = new byte[8];
public static final float DEFAULT_FPP = 0.05f;
private static final int DEFAULT_BLOCK_SIZE = 8;
private static final int DEFAULT_BLOCK_SIZE_BITS = (int) (Math.log(DEFAULT_BLOCK_SIZE) / Math.log(2));
@@ -149,8 +147,7 @@ public class BloomKFilter {
}
public void addInt(int val) {
- // puts int in little endian order
- addBytes(intToByteArrayLE(val));
+ addHash(Murmur3.hash64(val));
}
@@ -184,6 +181,7 @@ public class BloomKFilter {
private boolean testHash(long hash64) {
final int hash1 = (int) hash64;
final int hash2 = (int) (hash64 >>> 32);
+ final long[] bits = bitSet.data;
int firstHash = hash1 + hash2;
// hashcode should be positive, flip all the bits if it's negative
@@ -216,7 +214,7 @@ public class BloomKFilter {
long expected = 0;
for (int i = 0; i < DEFAULT_BLOCK_SIZE; i++) {
final long mask = masks[i];
- expected |= (bitSet.data[blockBaseOffset + i] & mask) ^ mask;
+ expected |= (bits[blockBaseOffset + i] & mask) ^ mask;
}
// clear the mask for array reuse (this is to avoid masks array allocation in inner loop)
@@ -235,7 +233,7 @@ public class BloomKFilter {
}
public boolean testInt(int val) {
- return testBytes(intToByteArrayLE(val));
+ return testHash(Murmur3.hash64(val));
}
public boolean testLong(long val) {
@@ -250,26 +248,6 @@ public class BloomKFilter {
return testLong(Double.doubleToLongBits(val));
}
- private byte[] intToByteArrayLE(int val) {
- BYTE_ARRAY_4[0] = (byte) (val >> 0);
- BYTE_ARRAY_4[1] = (byte) (val >> 8);
- BYTE_ARRAY_4[2] = (byte) (val >> 16);
- BYTE_ARRAY_4[3] = (byte) (val >> 24);
- return BYTE_ARRAY_4;
- }
-
- private byte[] longToByteArrayLE(long val) {
- BYTE_ARRAY_8[0] = (byte) (val >> 0);
- BYTE_ARRAY_8[1] = (byte) (val >> 8);
- BYTE_ARRAY_8[2] = (byte) (val >> 16);
- BYTE_ARRAY_8[3] = (byte) (val >> 24);
- BYTE_ARRAY_8[4] = (byte) (val >> 32);
- BYTE_ARRAY_8[5] = (byte) (val >> 40);
- BYTE_ARRAY_8[6] = (byte) (val >> 48);
- BYTE_ARRAY_8[7] = (byte) (val >> 56);
- return BYTE_ARRAY_8;
- }
-
public long sizeInBytes() {
return getBitSize() / 8;
}