You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by na...@apache.org on 2009/11/27 18:48:18 UTC

svn commit: r884922 - in /hadoop/hive/trunk: ./ ql/src/java/org/apache/hadoop/hive/ql/exec/ serde/src/java/org/apache/hadoop/hive/serde2/lazy/ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/

Author: namit
Date: Fri Nov 27 17:48:18 2009
New Revision: 884922

URL: http://svn.apache.org/viewvc?rev=884922&view=rev
Log:
HIVE-949. avoid deep copy for group by hash table lookup
(He Yongqiang via namit)


Modified:
    hadoop/hive/trunk/CHANGES.txt
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java
    hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyNonPrimitive.java
    hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyObject.java
    hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyPrimitive.java
    hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java
    hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryNonPrimitive.java
    hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryObject.java
    hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryPrimitive.java

Modified: hadoop/hive/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/CHANGES.txt?rev=884922&r1=884921&r2=884922&view=diff
==============================================================================
--- hadoop/hive/trunk/CHANGES.txt (original)
+++ hadoop/hive/trunk/CHANGES.txt Fri Nov 27 17:48:18 2009
@@ -123,6 +123,9 @@
 
     HIVE-947. runlength encding for RCFILE (He Yongqiang via namit)
 
+    HIVE-949. avoid deep copy for group by hash table lookup
+    (He Yongqiang via namit)
+
   OPTIMIZATIONS
 
   BUG FIXES

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java?rev=884922&r1=884921&r2=884922&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java Fri Nov 27 17:48:18 2009
@@ -85,7 +85,7 @@
   transient protected Object[][] aggregationsParametersLastInvoke;
 
   // Used by hash-based GroupBy: Mode = HASH, PARTIALS
-  transient protected HashMap<ArrayList<Object>, AggregationBuffer[]> hashAggregations;
+  transient protected HashMap<KeyWrapper, AggregationBuffer[]> hashAggregations;
   
   // Used by hash distinct aggregations when hashGrpKeyNotRedKey is true
   transient protected HashSet<ArrayList<Object>> keysCurrentGroup;
@@ -214,7 +214,7 @@
       aggregations = newAggregations();
       hashAggr = false;
     } else {
-      hashAggregations = new HashMap<ArrayList<Object>, AggregationBuffer[]>();
+      hashAggregations = new HashMap<KeyWrapper, AggregationBuffer[]>();
       aggregations = newAggregations();
       hashAggr = true;
       keyPositionsSize = new ArrayList<Integer>();
@@ -521,24 +521,55 @@
     }
   }
   
+  class KeyWrapper{
+    int hashcode;
+    ArrayList<Object> copiedKeys;
+    
+    KeyWrapper() {}
+    
+    public KeyWrapper(int hashcode, ArrayList<Object> copiedKeys) {
+      super();
+      this.hashcode = hashcode;
+      this.copiedKeys = copiedKeys;
+    }
+    
+    public int hashCode(){
+      return hashcode;
+    }
+    
+    public boolean equals(Object obj) {
+      ArrayList<Object> toBeCopied = ((KeyWrapper) obj).copiedKeys;
+      return ObjectInspectorUtils.compare(toBeCopied, currentKeyObjectInspector, copiedKeys, newKeyObjectInspector) == 0;
+    }
+  }
+  
   private void processHashAggr(Object row, ObjectInspector rowInspector, ArrayList<Object> newKeys) throws HiveException {
     // Prepare aggs for updating
     AggregationBuffer[] aggs = null;
     boolean newEntryForHashAggr = false;
-
+    
+    KeyWrapper keyProber = new KeyWrapper();
+    keyProber.hashcode = newKeys.hashCode();
+    //use this to probe the hashmap
+    keyProber.copiedKeys = newKeys;
+    
     // hash-based aggregations
-    ArrayList<Object> newDefaultKeys = deepCopyElements(keyObjects, keyObjectInspectors, ObjectInspectorCopyOption.WRITABLE);
-    aggs = hashAggregations.get(newDefaultKeys);
-    if (aggs == null) {
+    aggs = hashAggregations.get(keyProber);
+    ArrayList<Object> newDefaultKeys = null;
+    if(aggs == null) {
+      newDefaultKeys = deepCopyElements(keyObjects, keyObjectInspectors, ObjectInspectorCopyOption.WRITABLE);
+      keyProber.copiedKeys = newDefaultKeys;
       aggs = newAggregations();
-      hashAggregations.put(newDefaultKeys, aggs);
+      hashAggregations.put(keyProber, aggs);
       newEntryForHashAggr = true;
       numRowsHashTbl++;      // new entry in the hash table
     }
-
+    
     // If the grouping key and the reduction key are different, a set of grouping keys for the current reduction key are maintained in keysCurrentGroup
     // Peek into the set to find out if a new grouping key is seen for the given reduction key
     if (groupKeyIsNotReduceKey) {
+      if(newDefaultKeys == null)
+        newDefaultKeys = deepCopyElements(keyObjects, keyObjectInspectors, ObjectInspectorCopyOption.WRITABLE);
       newEntryForHashAggr = keysCurrentGroup.add(newDefaultKeys);
     }
 
@@ -649,11 +680,11 @@
     // changed in the future
 
     if (complete) {
-      Iterator<Map.Entry<ArrayList<Object>, AggregationBuffer[]>> 
+      Iterator<Map.Entry<KeyWrapper, AggregationBuffer[]>> 
           iter = hashAggregations.entrySet().iterator();
       while (iter.hasNext()) {
-        Map.Entry<ArrayList<Object>, AggregationBuffer[]> m = iter.next();
-        forward(m.getKey(), m.getValue());
+        Map.Entry<KeyWrapper, AggregationBuffer[]> m = iter.next();
+        forward(m.getKey().copiedKeys, m.getValue());
       }
       hashAggregations.clear();
       hashAggregations = null;
@@ -663,12 +694,12 @@
 
     int oldSize = hashAggregations.size();
     LOG.warn("Hash Tbl flush: #hash table = " + oldSize);
-    Iterator<Map.Entry<ArrayList<Object>, AggregationBuffer[]>>
+    Iterator<Map.Entry<KeyWrapper, AggregationBuffer[]>>
         iter = hashAggregations.entrySet().iterator();
     int numDel = 0;
     while (iter.hasNext()) {
-      Map.Entry<ArrayList<Object>, AggregationBuffer[]> m = iter.next();
-      forward(m.getKey(), m.getValue());
+      Map.Entry<KeyWrapper, AggregationBuffer[]> m = iter.next();
+      forward(m.getKey().copiedKeys, m.getValue());
       iter.remove();
       numDel++;
       if (numDel * 10 >= oldSize) {
@@ -731,8 +762,8 @@
             LOG.warn("Begin Hash Table flush at close: size = " + hashAggregations.size());
             Iterator iter = hashAggregations.entrySet().iterator();
             while (iter.hasNext()) {
-              Map.Entry<ArrayList<Object>, AggregationBuffer[]> m = (Map.Entry)iter.next();
-              forward(m.getKey(), m.getValue());
+              Map.Entry<KeyWrapper, AggregationBuffer[]> m = (Map.Entry)iter.next();
+              forward(m.getKey().copiedKeys, m.getValue());
               iter.remove();
             }
             hashAggregations.clear();

Modified: hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyNonPrimitive.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyNonPrimitive.java?rev=884922&r1=884921&r2=884922&view=diff
==============================================================================
--- hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyNonPrimitive.java (original)
+++ hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyNonPrimitive.java Fri Nov 27 17:48:18 2009
@@ -18,6 +18,7 @@
 package org.apache.hadoop.hive.serde2.lazy;
 
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.io.WritableComparator;
 
 /**
  * LazyPrimitive stores a primitive Object in a LazyObject.
@@ -60,4 +61,8 @@
   public Object getObject() {
     return this; 
   }
+  
+  public int hashCode() {
+    return LazyUtils.hashBytes(bytes.getData(), start, length);
+  }
 }

Modified: hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyObject.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyObject.java?rev=884922&r1=884921&r2=884922&view=diff
==============================================================================
--- hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyObject.java (original)
+++ hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyObject.java Fri Nov 27 17:48:18 2009
@@ -56,4 +56,6 @@
    * Otherwise (array, map, struct), return this. 
    */
   public abstract Object getObject();
+  
+  public abstract int hashCode();
 }

Modified: hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyPrimitive.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyPrimitive.java?rev=884922&r1=884921&r2=884922&view=diff
==============================================================================
--- hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyPrimitive.java (original)
+++ hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyPrimitive.java Fri Nov 27 17:48:18 2009
@@ -53,4 +53,9 @@
   public String toString() {
     return isNull ? null : data.toString();
   }
+  
+  public int hashCode(){
+    return isNull ? 0 : data.hashCode();
+  }
+  
 }

Modified: hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java?rev=884922&r1=884921&r2=884922&view=diff
==============================================================================
--- hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java (original)
+++ hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java Fri Nov 27 17:48:18 2009
@@ -186,4 +186,11 @@
     }
   }
   
+  public static int hashBytes(byte[] data, int start, int len) {
+    int hash = 1;
+    for (int i = start; i < len; i++)
+      hash = (31 * hash) + (int)data[i];
+    return hash;
+  }
+  
 }

Modified: hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryNonPrimitive.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryNonPrimitive.java?rev=884922&r1=884921&r2=884922&view=diff
==============================================================================
--- hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryNonPrimitive.java (original)
+++ hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryNonPrimitive.java Fri Nov 27 17:48:18 2009
@@ -18,6 +18,7 @@
 package org.apache.hadoop.hive.serde2.lazybinary;
 
 import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef;
+import org.apache.hadoop.hive.serde2.lazy.LazyUtils;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 
 public abstract class LazyBinaryNonPrimitive<OI extends ObjectInspector>  extends LazyBinaryObject<OI> {
@@ -50,4 +51,8 @@
     this.start = start;
     this.length = length;
   }
+  
+  public int hashCode() {
+    return LazyUtils.hashBytes(bytes.getData(), start, length);
+  }
 }

Modified: hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryObject.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryObject.java?rev=884922&r1=884921&r2=884922&view=diff
==============================================================================
--- hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryObject.java (original)
+++ hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryObject.java Fri Nov 27 17:48:18 2009
@@ -61,4 +61,6 @@
    * Otherwise (string, list, map, struct), return this. 
    */
   public abstract Object getObject();
+  
+  public abstract int hashCode();
 }

Modified: hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryPrimitive.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryPrimitive.java?rev=884922&r1=884921&r2=884922&view=diff
==============================================================================
--- hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryPrimitive.java (original)
+++ hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryPrimitive.java Fri Nov 27 17:48:18 2009
@@ -58,4 +58,8 @@
   public String toString() {
     return data.toString();
   }
+  
+  public int hashCode(){
+    return data == null ? 0 : data.hashCode();
+  }
 }