You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by na...@apache.org on 2009/04/07 03:40:35 UTC
svn commit: r762591 - in /hadoop/hive/branches/branch-0.3: ./ data/scripts/
ql/src/java/org/apache/hadoop/hive/ql/exec/
ql/src/test/queries/clientpositive/ ql/src/test/results/clientpositive/
Author: namit
Date: Tue Apr 7 01:40:34 2009
New Revision: 762591
URL: http://svn.apache.org/viewvc?rev=762591&view=rev
Log:
HIVE-382. fixed problems in map-side aggr - backported from trunk.
(namit)
Added:
hadoop/hive/branches/branch-0.3/data/scripts/dumpdata_script.py
hadoop/hive/branches/branch-0.3/ql/src/test/queries/clientpositive/groupby_bigdata.q
hadoop/hive/branches/branch-0.3/ql/src/test/results/clientpositive/groupby_bigdata.q.out
Modified:
hadoop/hive/branches/branch-0.3/CHANGES.txt
hadoop/hive/branches/branch-0.3/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java
Modified: hadoop/hive/branches/branch-0.3/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/branches/branch-0.3/CHANGES.txt?rev=762591&r1=762590&r2=762591&view=diff
==============================================================================
--- hadoop/hive/branches/branch-0.3/CHANGES.txt (original)
+++ hadoop/hive/branches/branch-0.3/CHANGES.txt Tue Apr 7 01:40:34 2009
@@ -102,6 +102,9 @@
HIVE-373. 1 reducer should be used if no grouping key is present in all
scenarios. (Namit Jain via zshao)
+ HIVE-382. fixed problems in map-side aggr - backported from trunk.
+ (namit)
+
Release 0.2.0 - Unreleased
INCOMPATIBLE CHANGES
Added: hadoop/hive/branches/branch-0.3/data/scripts/dumpdata_script.py
URL: http://svn.apache.org/viewvc/hadoop/hive/branches/branch-0.3/data/scripts/dumpdata_script.py?rev=762591&view=auto
==============================================================================
--- hadoop/hive/branches/branch-0.3/data/scripts/dumpdata_script.py (added)
+++ hadoop/hive/branches/branch-0.3/data/scripts/dumpdata_script.py Tue Apr 7 01:40:34 2009
@@ -0,0 +1,11 @@
+for i in xrange(100):
+ for j in xrange(10):
+ for k in xrange(42022):
+ print 42000 * i + k
+
+
+for i in xrange(100):
+ for j in xrange(10):
+ for k in xrange(42022):
+ print 5000000 + (42000 * i) + k
+
Modified: hadoop/hive/branches/branch-0.3/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java
URL: http://svn.apache.org/viewvc/hadoop/hive/branches/branch-0.3/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java?rev=762591&r1=762590&r2=762591&view=diff
==============================================================================
--- hadoop/hive/branches/branch-0.3/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java (original)
+++ hadoop/hive/branches/branch-0.3/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java Tue Apr 7 01:40:34 2009
@@ -240,7 +240,7 @@
estimateRowSize();
}
- private static final int javaObjectOverHead = 16;
+ private static final int javaObjectOverHead = 64;
private static final int javaHashEntryOverHead = 64;
private static final int javaSizePrimitiveType = 16;
private static final int javaSizeUnknownType = 256;
@@ -579,11 +579,12 @@
}
hashAggregations.clear();
hashAggregations = null;
+ LOG.warn("Hash Table completed flushed");
return;
}
int oldSize = hashAggregations.size();
- LOG.trace("Hash Tbl flush: #hash table = " + oldSize);
+ LOG.warn("Hash Tbl flush: #hash table = " + oldSize);
Iterator iter = hashAggregations.entrySet().iterator();
int numDel = 0;
while (iter.hasNext()) {
@@ -591,8 +592,10 @@
forward(m.getKey(), m.getValue());
iter.remove();
numDel++;
- if (numDel * 10 >= oldSize)
+ if (numDel * 10 >= oldSize) {
+ LOG.warn("Hash Table flushed: new size = " + hashAggregations.size());
return;
+ }
}
}
@@ -646,9 +649,12 @@
}
else {
if (hashAggregations != null) {
- // hash-based aggregations
- for (ArrayList<Object> key: hashAggregations.keySet()) {
- forward(key, hashAggregations.get(key));
+ LOG.warn("Begin Hash Table flush at close: size = " + hashAggregations.size());
+ Iterator iter = hashAggregations.entrySet().iterator();
+ while (iter.hasNext()) {
+ Map.Entry<ArrayList<Object>, UDAFEvaluator[]> m = (Map.Entry)iter.next();
+ forward(m.getKey(), m.getValue());
+ iter.remove();
}
hashAggregations.clear();
}
Added: hadoop/hive/branches/branch-0.3/ql/src/test/queries/clientpositive/groupby_bigdata.q
URL: http://svn.apache.org/viewvc/hadoop/hive/branches/branch-0.3/ql/src/test/queries/clientpositive/groupby_bigdata.q?rev=762591&view=auto
==============================================================================
--- hadoop/hive/branches/branch-0.3/ql/src/test/queries/clientpositive/groupby_bigdata.q (added)
+++ hadoop/hive/branches/branch-0.3/ql/src/test/queries/clientpositive/groupby_bigdata.q Tue Apr 7 01:40:34 2009
@@ -0,0 +1,4 @@
+set hive.map.aggr.hash.percentmemory = 0.4;
+
+select count(distinct subq.key) from
+(FROM src MAP src.key USING 'python ../data/scripts/dumpdata_script.py' AS key WHERE src.key = 10) subq;
Added: hadoop/hive/branches/branch-0.3/ql/src/test/results/clientpositive/groupby_bigdata.q.out
URL: http://svn.apache.org/viewvc/hadoop/hive/branches/branch-0.3/ql/src/test/results/clientpositive/groupby_bigdata.q.out?rev=762591&view=auto
==============================================================================
--- hadoop/hive/branches/branch-0.3/ql/src/test/results/clientpositive/groupby_bigdata.q.out (added)
+++ hadoop/hive/branches/branch-0.3/ql/src/test/results/clientpositive/groupby_bigdata.q.out Tue Apr 7 01:40:34 2009
@@ -0,0 +1 @@
+8400044