You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by na...@apache.org on 2009/06/16 02:35:31 UTC
svn commit: r785036 - in /hadoop/hive/trunk: CHANGES.txt
common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
conf/hive-default.xml
ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java
Author: namit
Date: Tue Jun 16 00:35:31 2009
New Revision: 785036
URL: http://svn.apache.org/viewvc?rev=785036&view=rev
Log:
HIVE-561. Make hash aggregation threshold configurable
(Zheng Shao via namit)
Modified:
hadoop/hive/trunk/CHANGES.txt
hadoop/hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
hadoop/hive/trunk/conf/hive-default.xml
hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java
Modified: hadoop/hive/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/CHANGES.txt?rev=785036&r1=785035&r2=785036&view=diff
==============================================================================
--- hadoop/hive/trunk/CHANGES.txt (original)
+++ hadoop/hive/trunk/CHANGES.txt Tue Jun 16 00:35:31 2009
@@ -236,6 +236,9 @@
HIVE-547. Better logging in ExecDriver
(Zheng Shao via namit)
+ HIVE-561. Make hash aggregation threshold configurable
+ (Zheng Shao via namit)
+
Release 0.3.1 - Unreleased
INCOMPATIBLE CHANGES
Modified: hadoop/hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java?rev=785036&r1=785035&r2=785036&view=diff
==============================================================================
--- hadoop/hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (original)
+++ hadoop/hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java Tue Jun 16 00:35:31 2009
@@ -117,6 +117,7 @@
HIVEMAPJOINCACHEROWS("hive.mapjoin.cache.numrows", 10000),
HIVEGROUPBYMAPINTERVAL("hive.groupby.mapaggr.checkinterval", 100000),
HIVEMAPAGGRHASHMEMORY("hive.map.aggr.hash.percentmemory", (float)0.5),
+ HIVEMAPAGGRHASHMINREDUCTION("hive.map.aggr.hash.min.reduction", (float)0.5),
// Default file format for CREATE TABLE statement
// Options: TextFile, SequenceFile
Modified: hadoop/hive/trunk/conf/hive-default.xml
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/conf/hive-default.xml?rev=785036&r1=785035&r2=785036&view=diff
==============================================================================
--- hadoop/hive/trunk/conf/hive-default.xml (original)
+++ hadoop/hive/trunk/conf/hive-default.xml Tue Jun 16 00:35:31 2009
@@ -145,6 +145,14 @@
</property>
<property>
+ <name>hive.map.aggr.hash.min.reduction</name>
+ <value>0.5</value>
+ <description>Hash aggregation will be turned off if the ratio between hash
+ table size and input rows is bigger than this number. Set to 1 to make sure
+ hash aggregation is never turned off.</description>
+</property>
+
+<property>
<name>hive.optimize.ppd</name>
<value>false</value>
<description>Whether to enable predicate pushdown</description>
Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java?rev=785036&r1=785035&r2=785036&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java Tue Jun 16 00:35:31 2009
@@ -97,6 +97,7 @@
transient long numRowsHashTbl;
transient int groupbyMapAggrInterval;
transient long numRowsCompareHashAggr;
+ transient float minReductionHashAggr;
/**
@@ -130,7 +131,7 @@
transient int totalVariableSize;
transient int numEntriesVarSize;
transient int numEntriesHashTable;
-
+
public void initializeOp(Configuration hconf, Reporter reporter, ObjectInspector[] inputObjInspector) throws HiveException {
totalMemory = Runtime.getRuntime().totalMemory();
@@ -227,6 +228,7 @@
// compare every groupbyMapAggrInterval rows
numRowsCompareHashAggr = groupbyMapAggrInterval;
+ minReductionHashAggr = HiveConf.getFloatVar(hconf, HiveConf.ConfVars.HIVEMAPAGGRHASHMINREDUCTION);
}
// init objectInspectors
@@ -475,13 +477,16 @@
if (numRowsInput == numRowsCompareHashAggr) {
numRowsCompareHashAggr += groupbyMapAggrInterval;
// map-side aggregation should reduce the entries by at-least half
- if ((numRowsHashTbl * 2) > numRowsInput) {
- LOG.warn("Disable Hash Aggr: #hash table = " + numRowsHashTbl + " #total = " + numRowsInput);
+ if (numRowsHashTbl > numRowsInput * minReductionHashAggr) {
+ LOG.warn("Disable Hash Aggr: #hash table = " + numRowsHashTbl + " #total = " + numRowsInput
+ + " reduction = " + 1.0*(numRowsHashTbl/numRowsInput) + " minReduction = " + minReductionHashAggr);
flush(true);
hashAggr = false;
}
- else
- LOG.trace("Hash Aggr Enabled: #hash table = " + numRowsHashTbl + " #total = " + numRowsInput);
+ else {
+ LOG.trace("Hash Aggr Enabled: #hash table = " + numRowsHashTbl + " #total = " + numRowsInput
+ + " reduction = " + 1.0*(numRowsHashTbl/numRowsInput) + " minReduction = " + minReductionHashAggr);
+ }
}
}