You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by go...@apache.org on 2017/05/12 08:09:30 UTC

[2/2] hive git commit: HIVE-16592: Vectorization: Long hashCodes should bit-mix into lower bits (Gopal V, reviewed by Sergey Shelukhin)

HIVE-16592: Vectorization: Long hashCodes should bit-mix into lower bits (Gopal V, reviewed by Sergey Shelukhin)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/538c0088
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/538c0088
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/538c0088

Branch: refs/heads/master
Commit: 538c0088ab89cdcc30f4d1ae2c7d2bd266d18235
Parents: 40fe0d7
Author: Gopal V <go...@apache.org>
Authored: Fri May 12 00:11:35 2017 -0700
Committer: Gopal V <go...@apache.org>
Committed: Fri May 12 00:11:44 2017 -0700

----------------------------------------------------------------------
 .../apache/hive/common/util/HashCodeUtil.java   | 30 ++++++--------------
 .../hive/common/type/FastHiveDecimalImpl.java   |  2 +-
 2 files changed, 9 insertions(+), 23 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/538c0088/common/src/java/org/apache/hive/common/util/HashCodeUtil.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hive/common/util/HashCodeUtil.java b/common/src/java/org/apache/hive/common/util/HashCodeUtil.java
index fa30273..b47479b 100644
--- a/common/src/java/org/apache/hive/common/util/HashCodeUtil.java
+++ b/common/src/java/org/apache/hive/common/util/HashCodeUtil.java
@@ -34,33 +34,19 @@ public class HashCodeUtil {
   }
 
   public static int calculateLongHashCode(long key) {
-
-    key = (~key) + (key << 21); // key = (key << 21) - key - 1;
-    key = key ^ (key >>> 24);
-    key = (key + (key << 3)) + (key << 8); // key * 265
-    key = key ^ (key >>> 14);
-    key = (key + (key << 2)) + (key << 4); // key * 21
-    key = key ^ (key >>> 28);
-    key = key + (key << 31);
-
+    // Mixing down into the lower bits - this produces a worse hashcode in purely
+    // numeric terms, but leaving entropy in the higher bits is not useful for a
+    // 2^n bucketing scheme. See JSR166 ConcurrentHashMap r1.89 (released under Public Domain)
+    // Note: ConcurrentHashMap has since reverted this to retain entropy bits higher
+    // up, to support the 2-level hashing for segment which operates at a higher bitmask
+    key ^= (key >>> 7) ^ (key >>> 4);
+    key ^= (key >>> 20) ^ (key >>> 12);
     return (int) key;
   }
 
   public static void calculateLongArrayHashCodes(long[] longs, int[] hashCodes, final int count) {
-    long key;
     for (int v = 0; v < count; v++) {
-
-      key = longs[v];
-
-      // Hash code logic from calculateLongHashCode.
-      key = (~key) + (key << 21); // key = (key << 21) - key - 1;
-      key = key ^ (key >>> 24);
-      key = (key + (key << 3)) + (key << 8); // key * 265
-      key = key ^ (key >>> 14);
-      key = (key + (key << 2)) + (key << 4); // key * 21
-      key = key ^ (key >>> 28);
-      key = key + (key << 31);
-      hashCodes[v] = (int) key;
+      hashCodes[v] = (int) calculateLongHashCode(longs[v]);
     }
   }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/538c0088/storage-api/src/java/org/apache/hadoop/hive/common/type/FastHiveDecimalImpl.java
----------------------------------------------------------------------
diff --git a/storage-api/src/java/org/apache/hadoop/hive/common/type/FastHiveDecimalImpl.java b/storage-api/src/java/org/apache/hadoop/hive/common/type/FastHiveDecimalImpl.java
index 88abf3c..ef9cbcf 100644
--- a/storage-api/src/java/org/apache/hadoop/hive/common/type/FastHiveDecimalImpl.java
+++ b/storage-api/src/java/org/apache/hadoop/hive/common/type/FastHiveDecimalImpl.java
@@ -3913,7 +3913,7 @@ public class FastHiveDecimalImpl extends FastHiveDecimal {
 
     long key = fast0;
 
-    // Hash code logic from calculateLongHashCode.
+    // Hash code logic from original calculateLongHashCode
 
     key = (~key) + (key << 21); // key = (key << 21) - key - 1;
     key = key ^ (key >>> 24);