You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by at...@apache.org on 2009/01/29 04:54:27 UTC
svn commit: r738747 - in /hadoop/hive/trunk: CHANGES.txt ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java

Author: athusoo
Date: Thu Jan 29 03:54:26 2009
New Revision: 738747

URL: http://svn.apache.org/viewvc?rev=738747&view=rev
Log:
HIVE-256. Fix in map side aggregation wherein we were not including
private members of the records in calculating the avg size of the
rows and we were also not estimating variable length columns 
properly. (Namit Jain via athusoo)


Modified:
    hadoop/hive/trunk/CHANGES.txt
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java

Modified: hadoop/hive/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/CHANGES.txt?rev=738747&r1=738746&r2=738747&view=diff
==============================================================================
--- hadoop/hive/trunk/CHANGES.txt (original)
+++ hadoop/hive/trunk/CHANGES.txt Thu Jan 29 03:54:26 2009
@@ -255,3 +255,8 @@
 
     HIVE-257. Set hive history log location to build for tests.
     (Suresh Anthony via athusoo)
+
+    HIVE-256. Fix in map side aggregation wherein we were not including
+    private members of the records in calculating the avg size of the
+    rows and we were also not estimating variable length columns 
+    properly. (Namit Jain via athusoo)

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java?rev=738747&r1=738746&r2=738747&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java Thu Jan 29 03:54:26 2009
@@ -192,6 +192,7 @@
       hashAggregations = new HashMap<ArrayList<Object>, UDAFEvaluator[]>();
       hashAggr = true;
       keyPositionsSize = new ArrayList<Integer>();
+      aggrPositions = new ArrayList<varLenFields>();
     }
 
     // init objectInspectors
@@ -326,11 +327,13 @@
     // Go over all the aggregation classes and and get the size of the fields of fixed length. Keep track of the variable length
     // fields in these aggregation classes.
     for(int i=0; i < aggregationClasses.length; i++) {
+
       fixedRowSize += javaObjectOverHead;
       Class<? extends UDAFEvaluator> agg = aggregationClasses[i];
-      Field[] fArr = agg.getFields();
-      for (Field f : fArr) 
-        fixedRowSize += getSize(i, agg, f);
+      Field[] fArr = agg.getDeclaredFields();
+      for (Field f : fArr) {
+        fixedRowSize += getSize(i, f.getType(), f);
+      }
     }
   }
 
@@ -486,7 +489,7 @@
     int numEntries = hashAggregations.size();
 
     // The fixed size for the aggregation class is already known. Get the variable portion of the size every NUMROWSESTIMATESIZE rows.
-    if ((numEntries % NUMROWSESTIMATESIZE) == 0) {
+    if ((numEntriesHashTable == 0) || ((numEntries % NUMROWSESTIMATESIZE) == 0)) {
       for (Integer pos : keyPositionsSize) {
         Object key = newKeys.get(pos.intValue());
         totalVariableSize += ((String)key).length();
@@ -511,6 +514,7 @@
       }
 
       numEntriesVarSize++;
+
       // Update the number of entries that can fit in the hash table
       numEntriesHashTable = (int)(maxHashTblMemory / (fixedRowSize + ((int)totalVariableSize/numEntriesVarSize)));
       LOG.trace("Hash Aggr: #hash table = " + numEntries + " #max in hash table = " + numEntriesHashTable);