You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2019/09/18 23:51:07 UTC

[hive] branch master updated: HIVE-20983 : Vectorization: Scale up small hashtables, when collisions are detected (Gopal V, Mustafa Iman via Ashutosh Chauhan)

This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new f7d5ef2  HIVE-20983 : Vectorization: Scale up small hashtables, when collisions are detected (Gopal V, Mustafa Iman via Ashutosh Chauhan)
f7d5ef2 is described below

commit f7d5ef2cc6335521a3ea119bcb491d283b312093
Author: Gopal V <go...@apache.org>
AuthorDate: Wed Sep 18 16:50:09 2019 -0700

    HIVE-20983 : Vectorization: Scale up small hashtables, when collisions are detected (Gopal V, Mustafa Iman via Ashutosh Chauhan)
    
    Signed-off-by: Ashutosh Chauhan <as...@cloudera.com>
---
 .../exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java  | 2 +-
 .../vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java  | 2 +-
 .../exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashSet.java  | 2 +-
 .../vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java     | 4 ++--
 .../ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashTable.java  | 9 +++++++++
 .../exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java | 4 ++--
 .../vector/mapjoin/fast/TestVectorMapJoinFastBytesHashMap.java   | 2 +-
 .../mapjoin/fast/TestVectorMapJoinFastBytesHashMultiSet.java     | 2 +-
 .../vector/mapjoin/fast/TestVectorMapJoinFastBytesHashSet.java   | 2 +-
 .../vector/mapjoin/fast/TestVectorMapJoinFastLongHashMap.java    | 2 +-
 .../mapjoin/fast/TestVectorMapJoinFastLongHashMapNonMatched.java | 2 +-
 .../mapjoin/fast/TestVectorMapJoinFastLongHashMultiSet.java      | 2 +-
 .../vector/mapjoin/fast/TestVectorMapJoinFastLongHashSet.java    | 2 +-
 13 files changed, 23 insertions(+), 14 deletions(-)

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java
index add8b9c..d314b6a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java
@@ -154,7 +154,7 @@ public abstract class VectorMapJoinFastBytesHashMap
 
   public void add(byte[] keyBytes, int keyStart, int keyLength, BytesWritable currentValue) {
 
-    if (resizeThreshold <= keysAssigned) {
+    if (checkResize()) {
       expandAndRehash();
     }
 
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java
index 5ec90b4..c384c09 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java
@@ -51,7 +51,7 @@ public abstract class VectorMapJoinFastBytesHashMultiSet
 
   public void add(byte[] keyBytes, int keyStart, int keyLength, BytesWritable currentValue) {
 
-    if (resizeThreshold <= keysAssigned) {
+    if (checkResize()) {
       expandAndRehash();
     }
 
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashSet.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashSet.java
index 7c73aa6..19b5791 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashSet.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashSet.java
@@ -46,7 +46,7 @@ public abstract class VectorMapJoinFastBytesHashSet
 
   public void add(byte[] keyBytes, int keyStart, int keyLength, BytesWritable currentValue) {
 
-    if (resizeThreshold <= keysAssigned) {
+    if (checkResize()) {
       expandAndRehash();
     }
 
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java
index 3d45a54..79b39b4 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java
@@ -62,8 +62,8 @@ public abstract class VectorMapJoinFastBytesHashTable
     if (logicalHashBucketCount > HIGHEST_INT_POWER_OF_2) {
       throwExpandError(HIGHEST_INT_POWER_OF_2, "Bytes");
     }
-    int newLogicalHashBucketCount = logicalHashBucketCount * 2;
-    int newLogicalHashBucketMask = newLogicalHashBucketCount - 1;
+    final int newLogicalHashBucketCount = Math.max(FIRST_SIZE_UP, logicalHashBucketCount * 2);
+    final int newLogicalHashBucketMask = newLogicalHashBucketCount - 1;
     int newMetricPutConflict = 0;
     int newLargestNumberOfSteps = 0;
 
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashTable.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashTable.java
index 2d05eab..248d125 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashTable.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashTable.java
@@ -29,6 +29,9 @@ import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinNonM
 public abstract class VectorMapJoinFastHashTable implements VectorMapJoinHashTable {
   public static final Logger LOG = LoggerFactory.getLogger(VectorMapJoinFastHashTable.class);
 
+  // when rehashing, jump directly to 4k items
+  public static final int FIRST_SIZE_UP = 4096;
+
   protected final boolean isFullOuter;
 
   protected int logicalHashBucketCount;
@@ -98,6 +101,12 @@ public abstract class VectorMapJoinFastHashTable implements VectorMapJoinHashTab
     return keysAssigned;
   }
 
+  protected final boolean checkResize() {
+    // resize small hashtables up to a higher width (4096 items), but when there are collisions
+    return (resizeThreshold <= keysAssigned)
+        || (logicalHashBucketCount <= FIRST_SIZE_UP && largestNumberOfSteps > 1);
+  }
+
   @Override
   public long getEstimatedMemorySize() {
     int size = 0;
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java
index 03ef249..a35401d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java
@@ -96,7 +96,7 @@ public abstract class VectorMapJoinFastLongHashTable
 
   public void add(long key, BytesWritable currentValue) {
 
-    if (resizeThreshold <= keysAssigned) {
+    if (checkResize()) {
       expandAndRehash();
     }
 
@@ -157,7 +157,7 @@ public abstract class VectorMapJoinFastLongHashTable
     if (logicalHashBucketCount > ONE_QUARTER_LIMIT) {
       throwExpandError(ONE_QUARTER_LIMIT, "Long");
     }
-    int newLogicalHashBucketCount = logicalHashBucketCount * 2;
+    int newLogicalHashBucketCount = Math.max(FIRST_SIZE_UP, logicalHashBucketCount * 2);
     int newLogicalHashBucketMask = newLogicalHashBucketCount - 1;
     int newMetricPutConflict = 0;
     int newLargestNumberOfSteps = 0;
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashMap.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashMap.java
index 9bf8bbc..0308daf 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashMap.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashMap.java
@@ -192,7 +192,7 @@ public class TestVectorMapJoinFastBytesHashMap extends CommonFastHashTable {
 
     VerifyFastBytesHashMap verifyTable = new VerifyFastBytesHashMap();
 
-    for (int i = 0; i < 18; ++i) {
+    for (int i = 0; i < 6; ++i) {
       byte[] key;
       while (true) {
         key = new byte[random.nextInt(MAX_KEY_LENGTH)];
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashMultiSet.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashMultiSet.java
index c6a8f7a..507544e 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashMultiSet.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashMultiSet.java
@@ -172,7 +172,7 @@ public class TestVectorMapJoinFastBytesHashMultiSet extends CommonFastHashTable
 
     VerifyFastBytesHashMultiSet verifyTable = new VerifyFastBytesHashMultiSet();
 
-    for (int i = 0; i < 18; ++i) {
+    for (int i = 0; i < 6; ++i) {
       byte[] key;
       while (true) {
         key = new byte[random.nextInt(MAX_KEY_LENGTH)];
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashSet.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashSet.java
index 6fccde9..1cd6d4d 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashSet.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashSet.java
@@ -173,7 +173,7 @@ public class TestVectorMapJoinFastBytesHashSet extends CommonFastHashTable {
 
     VerifyFastBytesHashSet verifyTable = new VerifyFastBytesHashSet();
 
-    for (int i = 0; i < 18; ++i) {
+    for (int i = 0; i < 6; ++i) {
       byte[] key;
       while (true) {
         key = new byte[random.nextInt(MAX_KEY_LENGTH)];
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashMap.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashMap.java
index a21bdcf..f72704c 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashMap.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashMap.java
@@ -188,7 +188,7 @@ public class TestVectorMapJoinFastLongHashMap extends CommonFastHashTable {
 
     VerifyFastLongHashMap verifyTable = new VerifyFastLongHashMap();
 
-    for (int i = 0; i < 18; ++i) {
+    for (int i = 0; i < 6; ++i) {
       long key;
       while (true) {
         key = random.nextLong();
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashMapNonMatched.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashMapNonMatched.java
index 5847787..d40351a 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashMapNonMatched.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashMapNonMatched.java
@@ -104,7 +104,7 @@ public class TestVectorMapJoinFastLongHashMapNonMatched extends CommonFastHashTa
 
     VerifyFastLongHashMap verifyTable = new VerifyFastLongHashMap();
 
-    for (int i = 0; i < 18; ++i) {
+    for (int i = 0; i < 6; ++i) {
       long key;
       while (true) {
         key = random.nextLong();
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashMultiSet.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashMultiSet.java
index 39f6632..d873ca6 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashMultiSet.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashMultiSet.java
@@ -173,7 +173,7 @@ public class TestVectorMapJoinFastLongHashMultiSet extends CommonFastHashTable {
 
     VerifyFastLongHashMultiSet verifyTable = new VerifyFastLongHashMultiSet();
 
-    for (int i = 0; i < 18; ++i) {
+    for (int i = 0; i < 6; ++i) {
       long key;
       while (true) {
         key = random.nextLong();
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashSet.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashSet.java
index bbb5da0..97d4375 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashSet.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashSet.java
@@ -171,7 +171,7 @@ public class TestVectorMapJoinFastLongHashSet extends CommonFastHashTable {
 
     VerifyFastLongHashSet verifyTable = new VerifyFastLongHashSet();
 
-    for (int i = 0; i < 18; ++i) {
+    for (int i = 0; i < 6; ++i) {
       long key;
       while (true) {
         key = random.nextLong();