You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by na...@apache.org on 2009/11/27 18:48:18 UTC
svn commit: r884922 - in /hadoop/hive/trunk: ./
ql/src/java/org/apache/hadoop/hive/ql/exec/
serde/src/java/org/apache/hadoop/hive/serde2/lazy/
serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/
Author: namit
Date: Fri Nov 27 17:48:18 2009
New Revision: 884922
URL: http://svn.apache.org/viewvc?rev=884922&view=rev
Log:
HIVE-949. avoid deep copy for group by hash table lookup
(He Yongqiang via namit)
Modified:
hadoop/hive/trunk/CHANGES.txt
hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java
hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyNonPrimitive.java
hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyObject.java
hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyPrimitive.java
hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java
hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryNonPrimitive.java
hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryObject.java
hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryPrimitive.java
Modified: hadoop/hive/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/CHANGES.txt?rev=884922&r1=884921&r2=884922&view=diff
==============================================================================
--- hadoop/hive/trunk/CHANGES.txt (original)
+++ hadoop/hive/trunk/CHANGES.txt Fri Nov 27 17:48:18 2009
@@ -123,6 +123,9 @@
HIVE-947. runlength encding for RCFILE (He Yongqiang via namit)
+ HIVE-949. avoid deep copy for group by hash table lookup
+ (He Yongqiang via namit)
+
OPTIMIZATIONS
BUG FIXES
Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java?rev=884922&r1=884921&r2=884922&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java Fri Nov 27 17:48:18 2009
@@ -85,7 +85,7 @@
transient protected Object[][] aggregationsParametersLastInvoke;
// Used by hash-based GroupBy: Mode = HASH, PARTIALS
- transient protected HashMap<ArrayList<Object>, AggregationBuffer[]> hashAggregations;
+ transient protected HashMap<KeyWrapper, AggregationBuffer[]> hashAggregations;
// Used by hash distinct aggregations when hashGrpKeyNotRedKey is true
transient protected HashSet<ArrayList<Object>> keysCurrentGroup;
@@ -214,7 +214,7 @@
aggregations = newAggregations();
hashAggr = false;
} else {
- hashAggregations = new HashMap<ArrayList<Object>, AggregationBuffer[]>();
+ hashAggregations = new HashMap<KeyWrapper, AggregationBuffer[]>();
aggregations = newAggregations();
hashAggr = true;
keyPositionsSize = new ArrayList<Integer>();
@@ -521,24 +521,55 @@
}
}
+ class KeyWrapper{
+ int hashcode;
+ ArrayList<Object> copiedKeys;
+
+ KeyWrapper() {}
+
+ public KeyWrapper(int hashcode, ArrayList<Object> copiedKeys) {
+ super();
+ this.hashcode = hashcode;
+ this.copiedKeys = copiedKeys;
+ }
+
+ public int hashCode(){
+ return hashcode;
+ }
+
+ public boolean equals(Object obj) {
+ ArrayList<Object> toBeCopied = ((KeyWrapper) obj).copiedKeys;
+ return ObjectInspectorUtils.compare(toBeCopied, currentKeyObjectInspector, copiedKeys, newKeyObjectInspector) == 0;
+ }
+ }
+
private void processHashAggr(Object row, ObjectInspector rowInspector, ArrayList<Object> newKeys) throws HiveException {
// Prepare aggs for updating
AggregationBuffer[] aggs = null;
boolean newEntryForHashAggr = false;
-
+
+ KeyWrapper keyProber = new KeyWrapper();
+ keyProber.hashcode = newKeys.hashCode();
+ //use this to probe the hashmap
+ keyProber.copiedKeys = newKeys;
+
// hash-based aggregations
- ArrayList<Object> newDefaultKeys = deepCopyElements(keyObjects, keyObjectInspectors, ObjectInspectorCopyOption.WRITABLE);
- aggs = hashAggregations.get(newDefaultKeys);
- if (aggs == null) {
+ aggs = hashAggregations.get(keyProber);
+ ArrayList<Object> newDefaultKeys = null;
+ if(aggs == null) {
+ newDefaultKeys = deepCopyElements(keyObjects, keyObjectInspectors, ObjectInspectorCopyOption.WRITABLE);
+ keyProber.copiedKeys = newDefaultKeys;
aggs = newAggregations();
- hashAggregations.put(newDefaultKeys, aggs);
+ hashAggregations.put(keyProber, aggs);
newEntryForHashAggr = true;
numRowsHashTbl++; // new entry in the hash table
}
-
+
// If the grouping key and the reduction key are different, a set of grouping keys for the current reduction key are maintained in keysCurrentGroup
// Peek into the set to find out if a new grouping key is seen for the given reduction key
if (groupKeyIsNotReduceKey) {
+ if(newDefaultKeys == null)
+ newDefaultKeys = deepCopyElements(keyObjects, keyObjectInspectors, ObjectInspectorCopyOption.WRITABLE);
newEntryForHashAggr = keysCurrentGroup.add(newDefaultKeys);
}
@@ -649,11 +680,11 @@
// changed in the future
if (complete) {
- Iterator<Map.Entry<ArrayList<Object>, AggregationBuffer[]>>
+ Iterator<Map.Entry<KeyWrapper, AggregationBuffer[]>>
iter = hashAggregations.entrySet().iterator();
while (iter.hasNext()) {
- Map.Entry<ArrayList<Object>, AggregationBuffer[]> m = iter.next();
- forward(m.getKey(), m.getValue());
+ Map.Entry<KeyWrapper, AggregationBuffer[]> m = iter.next();
+ forward(m.getKey().copiedKeys, m.getValue());
}
hashAggregations.clear();
hashAggregations = null;
@@ -663,12 +694,12 @@
int oldSize = hashAggregations.size();
LOG.warn("Hash Tbl flush: #hash table = " + oldSize);
- Iterator<Map.Entry<ArrayList<Object>, AggregationBuffer[]>>
+ Iterator<Map.Entry<KeyWrapper, AggregationBuffer[]>>
iter = hashAggregations.entrySet().iterator();
int numDel = 0;
while (iter.hasNext()) {
- Map.Entry<ArrayList<Object>, AggregationBuffer[]> m = iter.next();
- forward(m.getKey(), m.getValue());
+ Map.Entry<KeyWrapper, AggregationBuffer[]> m = iter.next();
+ forward(m.getKey().copiedKeys, m.getValue());
iter.remove();
numDel++;
if (numDel * 10 >= oldSize) {
@@ -731,8 +762,8 @@
LOG.warn("Begin Hash Table flush at close: size = " + hashAggregations.size());
Iterator iter = hashAggregations.entrySet().iterator();
while (iter.hasNext()) {
- Map.Entry<ArrayList<Object>, AggregationBuffer[]> m = (Map.Entry)iter.next();
- forward(m.getKey(), m.getValue());
+ Map.Entry<KeyWrapper, AggregationBuffer[]> m = (Map.Entry)iter.next();
+ forward(m.getKey().copiedKeys, m.getValue());
iter.remove();
}
hashAggregations.clear();
Modified: hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyNonPrimitive.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyNonPrimitive.java?rev=884922&r1=884921&r2=884922&view=diff
==============================================================================
--- hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyNonPrimitive.java (original)
+++ hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyNonPrimitive.java Fri Nov 27 17:48:18 2009
@@ -18,6 +18,7 @@
package org.apache.hadoop.hive.serde2.lazy;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.io.WritableComparator;
/**
* LazyPrimitive stores a primitive Object in a LazyObject.
@@ -60,4 +61,8 @@
public Object getObject() {
return this;
}
+
+ public int hashCode() {
+ return LazyUtils.hashBytes(bytes.getData(), start, length);
+ }
}
Modified: hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyObject.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyObject.java?rev=884922&r1=884921&r2=884922&view=diff
==============================================================================
--- hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyObject.java (original)
+++ hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyObject.java Fri Nov 27 17:48:18 2009
@@ -56,4 +56,6 @@
* Otherwise (array, map, struct), return this.
*/
public abstract Object getObject();
+
+ public abstract int hashCode();
}
Modified: hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyPrimitive.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyPrimitive.java?rev=884922&r1=884921&r2=884922&view=diff
==============================================================================
--- hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyPrimitive.java (original)
+++ hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyPrimitive.java Fri Nov 27 17:48:18 2009
@@ -53,4 +53,9 @@
public String toString() {
return isNull ? null : data.toString();
}
+
+ public int hashCode(){
+ return isNull ? 0 : data.hashCode();
+ }
+
}
Modified: hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java?rev=884922&r1=884921&r2=884922&view=diff
==============================================================================
--- hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java (original)
+++ hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java Fri Nov 27 17:48:18 2009
@@ -186,4 +186,11 @@
}
}
+ public static int hashBytes(byte[] data, int start, int len) {
+ int hash = 1;
+ for (int i = start; i < len; i++)
+ hash = (31 * hash) + (int)data[i];
+ return hash;
+ }
+
}
Modified: hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryNonPrimitive.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryNonPrimitive.java?rev=884922&r1=884921&r2=884922&view=diff
==============================================================================
--- hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryNonPrimitive.java (original)
+++ hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryNonPrimitive.java Fri Nov 27 17:48:18 2009
@@ -18,6 +18,7 @@
package org.apache.hadoop.hive.serde2.lazybinary;
import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef;
+import org.apache.hadoop.hive.serde2.lazy.LazyUtils;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
public abstract class LazyBinaryNonPrimitive<OI extends ObjectInspector> extends LazyBinaryObject<OI> {
@@ -50,4 +51,8 @@
this.start = start;
this.length = length;
}
+
+ public int hashCode() {
+ return LazyUtils.hashBytes(bytes.getData(), start, length);
+ }
}
Modified: hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryObject.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryObject.java?rev=884922&r1=884921&r2=884922&view=diff
==============================================================================
--- hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryObject.java (original)
+++ hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryObject.java Fri Nov 27 17:48:18 2009
@@ -61,4 +61,6 @@
* Otherwise (string, list, map, struct), return this.
*/
public abstract Object getObject();
+
+ public abstract int hashCode();
}
Modified: hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryPrimitive.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryPrimitive.java?rev=884922&r1=884921&r2=884922&view=diff
==============================================================================
--- hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryPrimitive.java (original)
+++ hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryPrimitive.java Fri Nov 27 17:48:18 2009
@@ -58,4 +58,8 @@
public String toString() {
return data.toString();
}
+
+ public int hashCode(){
+ return data == null ? 0 : data.hashCode();
+ }
}