You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by na...@apache.org on 2009/10/15 22:50:48 UTC

svn commit: r825647 - in /hadoop/hive/branches/branch-0.4: CHANGES.txt ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java

Author: namit
Date: Thu Oct 15 20:50:48 2009
New Revision: 825647

URL: http://svn.apache.org/viewvc?rev=825647&view=rev
Log:
HIVE-878. Update the hash table entry before flushing in Group By
hash aggregation (Zheng Shao via namit)


Modified:
    hadoop/hive/branches/branch-0.4/CHANGES.txt
    hadoop/hive/branches/branch-0.4/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java

Modified: hadoop/hive/branches/branch-0.4/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/branches/branch-0.4/CHANGES.txt?rev=825647&r1=825646&r2=825647&view=diff
==============================================================================
--- hadoop/hive/branches/branch-0.4/CHANGES.txt (original)
+++ hadoop/hive/branches/branch-0.4/CHANGES.txt Thu Oct 15 20:50:48 2009
@@ -567,6 +567,9 @@
     HIVE-864. Fix map-join memory-leak.
     (Namit Jain via zshao)
 
+    HIVE-878. Update the hash table entry before flushing in Group By
+    hash aggregation (Zheng Shao via namit)
+
 Release 0.3.1 - Unreleased
 
   INCOMPATIBLE CHANGES

Modified: hadoop/hive/branches/branch-0.4/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java
URL: http://svn.apache.org/viewvc/hadoop/hive/branches/branch-0.4/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java?rev=825647&r1=825646&r2=825647&view=diff
==============================================================================
--- hadoop/hive/branches/branch-0.4/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java (original)
+++ hadoop/hive/branches/branch-0.4/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java Thu Oct 15 20:50:48 2009
@@ -541,14 +541,17 @@
       newEntryForHashAggr = keysCurrentGroup.add(newDefaultKeys);
     }
 
-    // based on used-specified parameters, check if the hash table needs to be flushed
+    // Update the aggs
+    updateAggregations(aggs, row, rowInspector, true, newEntryForHashAggr, null);
+
+    // We can only flush after the updateAggregations is done, or the potentially new entry "aggs"
+    // can be flushed out of the hash table.
+    
+    // Based on user-specified parameters, check if the hash table needs to be flushed.
     // If the grouping key is not the same as reduction key, flushing can only happen at boundaries
     if ((!groupKeyIsNotReduceKey || firstRowInGroup) && shouldBeFlushed(newKeys)) {
       flush(false);
     }
-
-    // Update the aggs
-    updateAggregations(aggs, row, rowInspector, true, newEntryForHashAggr, null);
   }
 
   // Non-hash aggregation