You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by om...@apache.org on 2013/05/15 23:11:17 UTC
svn commit: r1483084 - in /hive/trunk:
common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
conf/hive-default.xml.template
ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java
Author: omalley
Date: Wed May 15 21:11:17 2013
New Revision: 1483084
URL: http://svn.apache.org/r1483084
Log:
HIVE-4440 SMB Operator spills to disk like it's 1999 (Gunther Hagleitner via
omalley)
Modified:
hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
hive/trunk/conf/hive-default.xml.template
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java
Modified: hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
URL: http://svn.apache.org/viewvc/hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java?rev=1483084&r1=1483083&r2=1483084&view=diff
==============================================================================
--- hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (original)
+++ hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java Wed May 15 21:11:17 2013
@@ -415,7 +415,12 @@ public class HiveConf extends Configurat
true),
HIVEJOINEMITINTERVAL("hive.join.emit.interval", 1000),
HIVEJOINCACHESIZE("hive.join.cache.size", 25000),
+
+ // hive.mapjoin.bucket.cache.size has been replaced by hive.smbjoin.cache.row,
+ // need to remove by hive .13. Also, do not change default (see SMB operator)
HIVEMAPJOINBUCKETCACHESIZE("hive.mapjoin.bucket.cache.size", 100),
+
+ HIVESMBJOINCACHEROWS("hive.smbjoin.cache.rows", 10000),
HIVEGROUPBYMAPINTERVAL("hive.groupby.mapaggr.checkinterval", 100000),
HIVEMAPAGGRHASHMEMORY("hive.map.aggr.hash.percentmemory", (float) 0.5),
HIVEMAPJOINFOLLOWEDBYMAPAGGRHASHMEMORY("hive.mapjoin.followby.map.aggr.hash.percentmemory", (float) 0.3),
Modified: hive/trunk/conf/hive-default.xml.template
URL: http://svn.apache.org/viewvc/hive/trunk/conf/hive-default.xml.template?rev=1483084&r1=1483083&r2=1483084&view=diff
==============================================================================
--- hive/trunk/conf/hive-default.xml.template (original)
+++ hive/trunk/conf/hive-default.xml.template Wed May 15 21:11:17 2013
@@ -589,9 +589,9 @@
</property>
<property>
- <name>hive.mapjoin.bucket.cache.size</name>
- <value>100</value>
- <description>How many values in each keys in the map-joined table should be cached in memory. </description>
+ <name>hive.smbjoin.cache.rows</name>
+ <value>10000</value>
+ <description>How many rows with the same key value should be cached in memory per smb joined table. </description>
</property>
<property>
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java?rev=1483084&r1=1483083&r2=1483084&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java Wed May 15 21:11:17 2013
@@ -123,8 +123,18 @@ public class SMBMapJoinOperator extends
fetchDone = new boolean[maxAlias];
foundNextKeyGroup = new boolean[maxAlias];
- int bucketSize = HiveConf.getIntVar(hconf,
- HiveConf.ConfVars.HIVEMAPJOINBUCKETCACHESIZE);
+ int bucketSize;
+
+ // For backwards compatibility reasons we honor the older
+ // HIVEMAPJOINBUCKETCACHESIZE if set different from default.
+ // By hive 0.13 we should remove this code.
+ int oldVar = HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEMAPJOINBUCKETCACHESIZE);
+ if (oldVar != 100) {
+ bucketSize = oldVar;
+ } else {
+ bucketSize = HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVESMBJOINCACHEROWS);
+ }
+
for (byte pos = 0; pos < order.length; pos++) {
RowContainer rc = JoinUtil.getRowContainer(hconf,
rowContainerStandardObjectInspectors[pos],