You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2019/09/18 23:51:07 UTC
[hive] branch master updated: HIVE-20983 : Vectorization: Scale up
small hashtables, when collisions are detected (Gopal V,
Mustafa Iman via Ashutosh Chauhan)
This is an automated email from the ASF dual-hosted git repository.
hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new f7d5ef2 HIVE-20983 : Vectorization: Scale up small hashtables, when collisions are detected (Gopal V, Mustafa Iman via Ashutosh Chauhan)
f7d5ef2 is described below
commit f7d5ef2cc6335521a3ea119bcb491d283b312093
Author: Gopal V <go...@apache.org>
AuthorDate: Wed Sep 18 16:50:09 2019 -0700
HIVE-20983 : Vectorization: Scale up small hashtables, when collisions are detected (Gopal V, Mustafa Iman via Ashutosh Chauhan)
Signed-off-by: Ashutosh Chauhan <as...@cloudera.com>
---
.../exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java | 2 +-
.../vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java | 2 +-
.../exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashSet.java | 2 +-
.../vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java | 4 ++--
.../ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashTable.java | 9 +++++++++
.../exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java | 4 ++--
.../vector/mapjoin/fast/TestVectorMapJoinFastBytesHashMap.java | 2 +-
.../mapjoin/fast/TestVectorMapJoinFastBytesHashMultiSet.java | 2 +-
.../vector/mapjoin/fast/TestVectorMapJoinFastBytesHashSet.java | 2 +-
.../vector/mapjoin/fast/TestVectorMapJoinFastLongHashMap.java | 2 +-
.../mapjoin/fast/TestVectorMapJoinFastLongHashMapNonMatched.java | 2 +-
.../mapjoin/fast/TestVectorMapJoinFastLongHashMultiSet.java | 2 +-
.../vector/mapjoin/fast/TestVectorMapJoinFastLongHashSet.java | 2 +-
13 files changed, 23 insertions(+), 14 deletions(-)
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java
index add8b9c..d314b6a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java
@@ -154,7 +154,7 @@ public abstract class VectorMapJoinFastBytesHashMap
public void add(byte[] keyBytes, int keyStart, int keyLength, BytesWritable currentValue) {
- if (resizeThreshold <= keysAssigned) {
+ if (checkResize()) {
expandAndRehash();
}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java
index 5ec90b4..c384c09 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java
@@ -51,7 +51,7 @@ public abstract class VectorMapJoinFastBytesHashMultiSet
public void add(byte[] keyBytes, int keyStart, int keyLength, BytesWritable currentValue) {
- if (resizeThreshold <= keysAssigned) {
+ if (checkResize()) {
expandAndRehash();
}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashSet.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashSet.java
index 7c73aa6..19b5791 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashSet.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashSet.java
@@ -46,7 +46,7 @@ public abstract class VectorMapJoinFastBytesHashSet
public void add(byte[] keyBytes, int keyStart, int keyLength, BytesWritable currentValue) {
- if (resizeThreshold <= keysAssigned) {
+ if (checkResize()) {
expandAndRehash();
}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java
index 3d45a54..79b39b4 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java
@@ -62,8 +62,8 @@ public abstract class VectorMapJoinFastBytesHashTable
if (logicalHashBucketCount > HIGHEST_INT_POWER_OF_2) {
throwExpandError(HIGHEST_INT_POWER_OF_2, "Bytes");
}
- int newLogicalHashBucketCount = logicalHashBucketCount * 2;
- int newLogicalHashBucketMask = newLogicalHashBucketCount - 1;
+ final int newLogicalHashBucketCount = Math.max(FIRST_SIZE_UP, logicalHashBucketCount * 2);
+ final int newLogicalHashBucketMask = newLogicalHashBucketCount - 1;
int newMetricPutConflict = 0;
int newLargestNumberOfSteps = 0;
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashTable.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashTable.java
index 2d05eab..248d125 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashTable.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashTable.java
@@ -29,6 +29,9 @@ import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinNonM
public abstract class VectorMapJoinFastHashTable implements VectorMapJoinHashTable {
public static final Logger LOG = LoggerFactory.getLogger(VectorMapJoinFastHashTable.class);
+ // when rehashing, jump directly to 4k items
+ public static final int FIRST_SIZE_UP = 4096;
+
protected final boolean isFullOuter;
protected int logicalHashBucketCount;
@@ -98,6 +101,12 @@ public abstract class VectorMapJoinFastHashTable implements VectorMapJoinHashTab
return keysAssigned;
}
+ protected final boolean checkResize() {
+ // resize small hashtables up to a higher width (4096 items), but when there are collisions
+ return (resizeThreshold <= keysAssigned)
+ || (logicalHashBucketCount <= FIRST_SIZE_UP && largestNumberOfSteps > 1);
+ }
+
@Override
public long getEstimatedMemorySize() {
int size = 0;
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java
index 03ef249..a35401d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java
@@ -96,7 +96,7 @@ public abstract class VectorMapJoinFastLongHashTable
public void add(long key, BytesWritable currentValue) {
- if (resizeThreshold <= keysAssigned) {
+ if (checkResize()) {
expandAndRehash();
}
@@ -157,7 +157,7 @@ public abstract class VectorMapJoinFastLongHashTable
if (logicalHashBucketCount > ONE_QUARTER_LIMIT) {
throwExpandError(ONE_QUARTER_LIMIT, "Long");
}
- int newLogicalHashBucketCount = logicalHashBucketCount * 2;
+ int newLogicalHashBucketCount = Math.max(FIRST_SIZE_UP, logicalHashBucketCount * 2);
int newLogicalHashBucketMask = newLogicalHashBucketCount - 1;
int newMetricPutConflict = 0;
int newLargestNumberOfSteps = 0;
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashMap.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashMap.java
index 9bf8bbc..0308daf 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashMap.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashMap.java
@@ -192,7 +192,7 @@ public class TestVectorMapJoinFastBytesHashMap extends CommonFastHashTable {
VerifyFastBytesHashMap verifyTable = new VerifyFastBytesHashMap();
- for (int i = 0; i < 18; ++i) {
+ for (int i = 0; i < 6; ++i) {
byte[] key;
while (true) {
key = new byte[random.nextInt(MAX_KEY_LENGTH)];
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashMultiSet.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashMultiSet.java
index c6a8f7a..507544e 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashMultiSet.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashMultiSet.java
@@ -172,7 +172,7 @@ public class TestVectorMapJoinFastBytesHashMultiSet extends CommonFastHashTable
VerifyFastBytesHashMultiSet verifyTable = new VerifyFastBytesHashMultiSet();
- for (int i = 0; i < 18; ++i) {
+ for (int i = 0; i < 6; ++i) {
byte[] key;
while (true) {
key = new byte[random.nextInt(MAX_KEY_LENGTH)];
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashSet.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashSet.java
index 6fccde9..1cd6d4d 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashSet.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashSet.java
@@ -173,7 +173,7 @@ public class TestVectorMapJoinFastBytesHashSet extends CommonFastHashTable {
VerifyFastBytesHashSet verifyTable = new VerifyFastBytesHashSet();
- for (int i = 0; i < 18; ++i) {
+ for (int i = 0; i < 6; ++i) {
byte[] key;
while (true) {
key = new byte[random.nextInt(MAX_KEY_LENGTH)];
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashMap.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashMap.java
index a21bdcf..f72704c 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashMap.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashMap.java
@@ -188,7 +188,7 @@ public class TestVectorMapJoinFastLongHashMap extends CommonFastHashTable {
VerifyFastLongHashMap verifyTable = new VerifyFastLongHashMap();
- for (int i = 0; i < 18; ++i) {
+ for (int i = 0; i < 6; ++i) {
long key;
while (true) {
key = random.nextLong();
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashMapNonMatched.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashMapNonMatched.java
index 5847787..d40351a 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashMapNonMatched.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashMapNonMatched.java
@@ -104,7 +104,7 @@ public class TestVectorMapJoinFastLongHashMapNonMatched extends CommonFastHashTa
VerifyFastLongHashMap verifyTable = new VerifyFastLongHashMap();
- for (int i = 0; i < 18; ++i) {
+ for (int i = 0; i < 6; ++i) {
long key;
while (true) {
key = random.nextLong();
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashMultiSet.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashMultiSet.java
index 39f6632..d873ca6 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashMultiSet.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashMultiSet.java
@@ -173,7 +173,7 @@ public class TestVectorMapJoinFastLongHashMultiSet extends CommonFastHashTable {
VerifyFastLongHashMultiSet verifyTable = new VerifyFastLongHashMultiSet();
- for (int i = 0; i < 18; ++i) {
+ for (int i = 0; i < 6; ++i) {
long key;
while (true) {
key = random.nextLong();
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashSet.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashSet.java
index bbb5da0..97d4375 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashSet.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashSet.java
@@ -171,7 +171,7 @@ public class TestVectorMapJoinFastLongHashSet extends CommonFastHashTable {
VerifyFastLongHashSet verifyTable = new VerifyFastLongHashSet();
- for (int i = 0; i < 18; ++i) {
+ for (int i = 0; i < 6; ++i) {
long key;
while (true) {
key = random.nextLong();