You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by om...@apache.org on 2013/05/15 23:11:17 UTC

svn commit: r1483084 - in /hive/trunk: common/src/java/org/apache/hadoop/hive/conf/HiveConf.java conf/hive-default.xml.template ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java

Author: omalley
Date: Wed May 15 21:11:17 2013
New Revision: 1483084

URL: http://svn.apache.org/r1483084
Log:
HIVE-4440 SMB Operator spills to disk like it's 1999 (Gunther Hagleitner via
omalley)

Modified:
    hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
    hive/trunk/conf/hive-default.xml.template
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java

Modified: hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
URL: http://svn.apache.org/viewvc/hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java?rev=1483084&r1=1483083&r2=1483084&view=diff
==============================================================================
--- hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (original)
+++ hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java Wed May 15 21:11:17 2013
@@ -415,7 +415,12 @@ public class HiveConf extends Configurat
         true),
     HIVEJOINEMITINTERVAL("hive.join.emit.interval", 1000),
     HIVEJOINCACHESIZE("hive.join.cache.size", 25000),
+
+    // hive.mapjoin.bucket.cache.size has been replaced by hive.smbjoin.cache.row,
+    // need to remove by hive .13. Also, do not change default (see SMB operator)
     HIVEMAPJOINBUCKETCACHESIZE("hive.mapjoin.bucket.cache.size", 100),
+
+    HIVESMBJOINCACHEROWS("hive.smbjoin.cache.rows", 10000),
     HIVEGROUPBYMAPINTERVAL("hive.groupby.mapaggr.checkinterval", 100000),
     HIVEMAPAGGRHASHMEMORY("hive.map.aggr.hash.percentmemory", (float) 0.5),
     HIVEMAPJOINFOLLOWEDBYMAPAGGRHASHMEMORY("hive.mapjoin.followby.map.aggr.hash.percentmemory", (float) 0.3),

Modified: hive/trunk/conf/hive-default.xml.template
URL: http://svn.apache.org/viewvc/hive/trunk/conf/hive-default.xml.template?rev=1483084&r1=1483083&r2=1483084&view=diff
==============================================================================
--- hive/trunk/conf/hive-default.xml.template (original)
+++ hive/trunk/conf/hive-default.xml.template Wed May 15 21:11:17 2013
@@ -589,9 +589,9 @@
 </property>
 
 <property>
-  <name>hive.mapjoin.bucket.cache.size</name>
-  <value>100</value>
-  <description>How many values in each keys in the map-joined table should be cached in memory. </description>
+  <name>hive.smbjoin.cache.rows</name>
+  <value>10000</value>
+  <description>How many rows with the same key value should be cached in memory per smb joined table. </description>
 </property>
 
 <property>

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java?rev=1483084&r1=1483083&r2=1483084&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java Wed May 15 21:11:17 2013
@@ -123,8 +123,18 @@ public class SMBMapJoinOperator extends 
     fetchDone = new boolean[maxAlias];
     foundNextKeyGroup = new boolean[maxAlias];
 
-    int bucketSize = HiveConf.getIntVar(hconf,
-        HiveConf.ConfVars.HIVEMAPJOINBUCKETCACHESIZE);
+    int bucketSize;
+
+    // For backwards compatibility reasons we honor the older 
+    // HIVEMAPJOINBUCKETCACHESIZE if set different from default. 
+    // By hive 0.13 we should remove this code.
+    int oldVar = HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEMAPJOINBUCKETCACHESIZE);
+    if (oldVar != 100) {
+      bucketSize = oldVar;
+    } else {
+      bucketSize = HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVESMBJOINCACHEROWS);
+    }
+
     for (byte pos = 0; pos < order.length; pos++) {
       RowContainer rc = JoinUtil.getRowContainer(hconf,
           rowContainerStandardObjectInspectors[pos],