You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by we...@apache.org on 2016/05/18 16:51:45 UTC
hive git commit: HIVE-13730 : Avoid double spilling the same
partition when memory threshold is set very low (Wei Zheng,
reviewed by Vikram Dixit K)
Repository: hive
Updated Branches:
refs/heads/master 8c4b99a4e -> 3726ce590
HIVE-13730 : Avoid double spilling the same partition when memory threshold is set very low (Wei Zheng, reviewed by Vikram Dixit K)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/3726ce59
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/3726ce59
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/3726ce59
Branch: refs/heads/master
Commit: 3726ce590f9dcb0e679ed6faaafa1211e9f881d3
Parents: 8c4b99a
Author: Wei Zheng <we...@apache.org>
Authored: Wed May 18 09:51:31 2016 -0700
Committer: Wei Zheng <we...@apache.org>
Committed: Wed May 18 09:51:31 2016 -0700
----------------------------------------------------------------------
.../persistence/HybridHashTableContainer.java | 22 +++++++++++++++++++-
1 file changed, 21 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/3726ce59/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java
index 5552dfb..bb35bae 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java
@@ -385,6 +385,11 @@ public class HybridHashTableContainer
memoryUsed += hashPartitions[i].hashMap.memorySize();
}
}
+
+ if (writeBufferSize * (numPartitions - numPartitionsSpilledOnCreation) > memoryThreshold) {
+ LOG.error("There is not enough memory to allocate " +
+ (numPartitions - numPartitionsSpilledOnCreation) + " hash partitions.");
+ }
assert numPartitionsSpilledOnCreation != numPartitions : "All partitions are directly spilled!" +
" It is not supported now.";
LOG.info("Number of partitions created: " + numPartitions);
@@ -558,7 +563,7 @@ public class HybridHashTableContainer
* @return the biggest partition number
*/
private int biggestPartition() {
- int res = 0;
+ int res = -1;
int maxSize = 0;
// If a partition has been spilled to disk, its size will be 0, i.e. it won't be picked
@@ -574,6 +579,17 @@ public class HybridHashTableContainer
res = i;
}
}
+
+ // It can happen that although there're some partitions in memory, but their sizes are all 0.
+ // In that case we just pick one and spill.
+ if (res == -1) {
+ for (int i = 0; i < hashPartitions.length; i++) {
+ if (!isOnDisk(i)) {
+ return i;
+ }
+ }
+ }
+
return res;
}
@@ -585,6 +601,10 @@ public class HybridHashTableContainer
public long spillPartition(int partitionId) throws IOException {
HashPartition partition = hashPartitions[partitionId];
int inMemRowCount = partition.hashMap.getNumValues();
+ if (inMemRowCount == 0) {
+ LOG.warn("Trying to spill an empty hash partition! It may be due to " +
+ "hive.auto.convert.join.noconditionaltask.size being set too low.");
+ }
File file = FileUtils.createLocalDirsTempFile(
spillLocalDirs, "partition-" + partitionId + "-", null, false);