You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by zs...@apache.org on 2010/02/16 08:07:40 UTC

svn commit: r910413 - in /hadoop/hive/trunk: ./ common/src/java/org/apache/hadoop/hive/conf/ conf/ ql/src/java/org/apache/hadoop/hive/ql/exec/ ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/

Author: zshao
Date: Tue Feb 16 07:07:40 2010
New Revision: 910413

URL: http://svn.apache.org/viewvc?rev=910413&view=rev
Log:
HIVE-1158. Introducing a new parameter for Map-side join bucket size. (Ning Zhang via zshao)

Modified:
    hadoop/hive/trunk/CHANGES.txt
    hadoop/hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
    hadoop/hive/trunk/conf/hive-default.xml
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinObjectValue.java

Modified: hadoop/hive/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/CHANGES.txt?rev=910413&r1=910412&r2=910413&view=diff
==============================================================================
--- hadoop/hive/trunk/CHANGES.txt (original)
+++ hadoop/hive/trunk/CHANGES.txt Tue Feb 16 07:07:40 2010
@@ -78,6 +78,9 @@
     HIVE-1122. Make ql/metadata/Table and Partition serializable
     (Zheng Shao via He Yongqiang)
 
+    HIVE-1158. Introducing a new parameter for Map-side join bucket size.
+    (Ning Zhang via zshao)
+
   OPTIMIZATIONS
 
   BUG FIXES

Modified: hadoop/hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java?rev=910413&r1=910412&r2=910413&view=diff
==============================================================================
--- hadoop/hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (original)
+++ hadoop/hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java Tue Feb 16 07:07:40 2010
@@ -133,6 +133,7 @@
     HIVEGROUPBYSKEW("hive.groupby.skewindata", "false"),
     HIVEJOINEMITINTERVAL("hive.join.emit.interval", 1000),
     HIVEJOINCACHESIZE("hive.join.cache.size", 25000),
+    HIVEMAPJOINBUCKETCACHESIZE("hive.mapjoin.bucket.cache.size", 100),
     HIVEMAPJOINROWSIZE("hive.mapjoin.size.key", 10000),
     HIVEMAPJOINCACHEROWS("hive.mapjoin.cache.numrows", 25000),
     HIVEGROUPBYMAPINTERVAL("hive.groupby.mapaggr.checkinterval", 100000),

Modified: hadoop/hive/trunk/conf/hive-default.xml
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/conf/hive-default.xml?rev=910413&r1=910412&r2=910413&view=diff
==============================================================================
--- hadoop/hive/trunk/conf/hive-default.xml (original)
+++ hadoop/hive/trunk/conf/hive-default.xml Tue Feb 16 07:07:40 2010
@@ -268,6 +268,18 @@
 </property>
 
 <property>
+  <name>hive.join.cache.size</name>
+  <value>25000</value>
+  <description>How many rows in the joining tables (except the streaming table) should be cached in memory. </description>
+</property>
+
+<property>
+  <name>hive.mapjoin.bucket.cache.size</name>
+  <value>100</value>
+  <description>How many values in each keys in the map-joined table should be cached in memory. </description>
+</property>
+
+<property>
   <name>hive.mapjoin.maxsize</name>
   <value>100000</value>
   <description>Maximum # of rows of the small table that can be handled by map-side join. If the size is reached and hive.task.progress is set, a fatal error counter is set and the job will be killed.</description>

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java?rev=910413&r1=910412&r2=910413&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java Tue Feb 16 07:07:40 2010
@@ -282,7 +282,8 @@
 
         boolean needNewKey = true;
         if (o == null) {
-          res = getRowContainer(hconf, (byte) tag, order[tag], joinCacheSize);
+          int bucketSize = HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEMAPJOINBUCKETCACHESIZE);
+          res = getRowContainer(hconf, (byte) tag, order[tag], bucketSize);
           res.add(value);
         } else {
           res = o.getObj();
@@ -320,6 +321,7 @@
           MapJoinObjectValue valueObj = new MapJoinObjectValue(
               metadataValueTag[tag], res);
           valueObj.setConf(hconf);
+          valueObj.setConf(hconf);
           // This may potentially increase the size of the hashmap on the mapper
           if (res.size() > mapJoinRowsKey) {
             if (res.size() % 100 == 0) {

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinObjectValue.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinObjectValue.java?rev=910413&r1=910412&r2=910413&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinObjectValue.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinObjectValue.java Tue Feb 16 07:07:40 2010
@@ -25,6 +25,7 @@
 import java.util.ArrayList;
 
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
 import org.apache.hadoop.hive.ql.exec.MapJoinOperator.MapJoinObjectCtx;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
@@ -41,8 +42,10 @@
   protected transient int metadataTag;
   protected transient RowContainer obj;
   protected transient Configuration conf;
+  protected int bucketSize; // bucket size for RowContainer
 
   public MapJoinObjectValue() {
+    bucketSize = 100; // default bucket size
   }
 
   /**
@@ -87,8 +90,7 @@
       MapJoinObjectCtx ctx = MapJoinOperator.getMapMetadata().get(
           Integer.valueOf(metadataTag));
       int sz = in.readInt();
-
-      RowContainer res = new RowContainer(ctx.getConf());
+      RowContainer res = new RowContainer(bucketSize, ctx.getConf());
       res.setSerDe(ctx.getSerDe(), ctx.getStandardOI());
       res.setTableDesc(ctx.getTblDesc());
       for (int pos = 0; pos < sz; pos++) {
@@ -165,6 +167,7 @@
 
   public void setConf(Configuration conf) {
     this.conf = conf;
+    bucketSize = HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVEMAPJOINBUCKETCACHESIZE);
   }
 
 }