You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by zs...@apache.org on 2010/02/16 08:07:40 UTC
svn commit: r910413 - in /hadoop/hive/trunk: ./
common/src/java/org/apache/hadoop/hive/conf/ conf/
ql/src/java/org/apache/hadoop/hive/ql/exec/
ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/
Author: zshao
Date: Tue Feb 16 07:07:40 2010
New Revision: 910413
URL: http://svn.apache.org/viewvc?rev=910413&view=rev
Log:
HIVE-1158. Introducing a new parameter for Map-side join bucket size. (Ning Zhang via zshao)
Modified:
hadoop/hive/trunk/CHANGES.txt
hadoop/hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
hadoop/hive/trunk/conf/hive-default.xml
hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java
hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinObjectValue.java
Modified: hadoop/hive/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/CHANGES.txt?rev=910413&r1=910412&r2=910413&view=diff
==============================================================================
--- hadoop/hive/trunk/CHANGES.txt (original)
+++ hadoop/hive/trunk/CHANGES.txt Tue Feb 16 07:07:40 2010
@@ -78,6 +78,9 @@
HIVE-1122. Make ql/metadata/Table and Partition serializable
(Zheng Shao via He Yongqiang)
+ HIVE-1158. Introducing a new parameter for Map-side join bucket size.
+ (Ning Zhang via zshao)
+
OPTIMIZATIONS
BUG FIXES
Modified: hadoop/hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java?rev=910413&r1=910412&r2=910413&view=diff
==============================================================================
--- hadoop/hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (original)
+++ hadoop/hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java Tue Feb 16 07:07:40 2010
@@ -133,6 +133,7 @@
HIVEGROUPBYSKEW("hive.groupby.skewindata", "false"),
HIVEJOINEMITINTERVAL("hive.join.emit.interval", 1000),
HIVEJOINCACHESIZE("hive.join.cache.size", 25000),
+ HIVEMAPJOINBUCKETCACHESIZE("hive.mapjoin.bucket.cache.size", 100),
HIVEMAPJOINROWSIZE("hive.mapjoin.size.key", 10000),
HIVEMAPJOINCACHEROWS("hive.mapjoin.cache.numrows", 25000),
HIVEGROUPBYMAPINTERVAL("hive.groupby.mapaggr.checkinterval", 100000),
Modified: hadoop/hive/trunk/conf/hive-default.xml
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/conf/hive-default.xml?rev=910413&r1=910412&r2=910413&view=diff
==============================================================================
--- hadoop/hive/trunk/conf/hive-default.xml (original)
+++ hadoop/hive/trunk/conf/hive-default.xml Tue Feb 16 07:07:40 2010
@@ -268,6 +268,18 @@
</property>
<property>
+ <name>hive.join.cache.size</name>
+ <value>25000</value>
+ <description>How many rows in the joining tables (except the streaming table) should be cached in memory. </description>
+</property>
+
+<property>
+ <name>hive.mapjoin.bucket.cache.size</name>
+ <value>100</value>
+ <description>How many values in each keys in the map-joined table should be cached in memory. </description>
+</property>
+
+<property>
<name>hive.mapjoin.maxsize</name>
<value>100000</value>
<description>Maximum # of rows of the small table that can be handled by map-side join. If the size is reached and hive.task.progress is set, a fatal error counter is set and the job will be killed.</description>
Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java?rev=910413&r1=910412&r2=910413&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java Tue Feb 16 07:07:40 2010
@@ -282,7 +282,8 @@
boolean needNewKey = true;
if (o == null) {
- res = getRowContainer(hconf, (byte) tag, order[tag], joinCacheSize);
+ int bucketSize = HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEMAPJOINBUCKETCACHESIZE);
+ res = getRowContainer(hconf, (byte) tag, order[tag], bucketSize);
res.add(value);
} else {
res = o.getObj();
@@ -320,6 +321,7 @@
MapJoinObjectValue valueObj = new MapJoinObjectValue(
metadataValueTag[tag], res);
valueObj.setConf(hconf);
+ valueObj.setConf(hconf);
// This may potentially increase the size of the hashmap on the mapper
if (res.size() > mapJoinRowsKey) {
if (res.size() % 100 == 0) {
Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinObjectValue.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinObjectValue.java?rev=910413&r1=910412&r2=910413&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinObjectValue.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinObjectValue.java Tue Feb 16 07:07:40 2010
@@ -25,6 +25,7 @@
import java.util.ArrayList;
import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
import org.apache.hadoop.hive.ql.exec.MapJoinOperator.MapJoinObjectCtx;
import org.apache.hadoop.hive.ql.metadata.HiveException;
@@ -41,8 +42,10 @@
protected transient int metadataTag;
protected transient RowContainer obj;
protected transient Configuration conf;
+ protected int bucketSize; // bucket size for RowContainer
public MapJoinObjectValue() {
+ bucketSize = 100; // default bucket size
}
/**
@@ -87,8 +90,7 @@
MapJoinObjectCtx ctx = MapJoinOperator.getMapMetadata().get(
Integer.valueOf(metadataTag));
int sz = in.readInt();
-
- RowContainer res = new RowContainer(ctx.getConf());
+ RowContainer res = new RowContainer(bucketSize, ctx.getConf());
res.setSerDe(ctx.getSerDe(), ctx.getStandardOI());
res.setTableDesc(ctx.getTblDesc());
for (int pos = 0; pos < sz; pos++) {
@@ -165,6 +167,7 @@
public void setConf(Configuration conf) {
this.conf = conf;
+ bucketSize = HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVEMAPJOINBUCKETCACHESIZE);
}
}