You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by na...@apache.org on 2010/10/22 06:57:08 UTC
svn commit: r1026210 - in /hadoop/hive/trunk: CHANGES.txt
ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java
serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ListObjectsEqualComparer.java
Author: namit
Date: Fri Oct 22 04:57:08 2010
New Revision: 1026210
URL: http://svn.apache.org/viewvc?rev=1026210&view=rev
Log:
HIVE-1738. Optimize key comparison in groupby
(Siying Dong via namit)
Added:
hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ListObjectsEqualComparer.java
Modified:
hadoop/hive/trunk/CHANGES.txt
hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java
Modified: hadoop/hive/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/CHANGES.txt?rev=1026210&r1=1026209&r2=1026210&view=diff
==============================================================================
--- hadoop/hive/trunk/CHANGES.txt (original)
+++ hadoop/hive/trunk/CHANGES.txt Fri Oct 22 04:57:08 2010
@@ -199,6 +199,9 @@ Trunk - Unreleased
HIVE-1660. Change get_partitions_ps to pass partition filter to
database (Paul Yang via namit)
+ HIVE-1738. Optimize key comparison in groupby
+ (Siying Dong via namit)
+
OPTIMIZATIONS
BUG FIXES
Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java?rev=1026210&r1=1026209&r2=1026210&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java Fri Oct 22 04:57:08 2010
@@ -40,14 +40,15 @@ import org.apache.hadoop.hive.ql.plan.Gr
import org.apache.hadoop.hive.ql.plan.api.OperatorType;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.AggregationBuffer;
+import org.apache.hadoop.hive.serde2.objectinspector.ListObjectsEqualComparer;
import org.apache.hadoop.hive.serde2.lazy.LazyPrimitive;
import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyStringObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
-import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.io.Text;
@@ -117,6 +118,8 @@ public class GroupByOperator extends Ope
// new Key ObjectInspectors are objectInspectors from the parent
transient StructObjectInspector newKeyObjectInspector;
transient StructObjectInspector currentKeyObjectInspector;
+ transient ListObjectsEqualComparer currentStructEqualComparer;
+ transient ListObjectsEqualComparer newKeyStructEqualComparer;
/**
* This is used to store the position and field names for variable length
@@ -245,7 +248,7 @@ public class GroupByOperator extends Ope
aggregations = newAggregations();
hashAggr = false;
} else {
- hashAggregations = new HashMap<KeyWrapper, AggregationBuffer[]>();
+ hashAggregations = new HashMap<KeyWrapper, AggregationBuffer[]>(256);
aggregations = newAggregations();
hashAggr = true;
keyPositionsSize = new ArrayList<Integer>();
@@ -280,6 +283,8 @@ public class GroupByOperator extends Ope
currentKeyObjectInspector = ObjectInspectorFactory
.getStandardStructObjectInspector(keyNames, Arrays
.asList(currentKeyObjectInspectors));
+ currentStructEqualComparer = new ListObjectsEqualComparer(currentKeyObjectInspectors, currentKeyObjectInspectors);
+ newKeyStructEqualComparer = new ListObjectsEqualComparer(currentKeyObjectInspectors, keyObjectInspectors);
outputObjInspector = ObjectInspectorFactory
.getStandardStructObjectInspector(fieldNames, objectInspectors);
@@ -634,15 +639,15 @@ public class GroupByOperator extends Ope
public boolean equals(Object obj) {
ArrayList<Object> copied_in_hashmap = ((KeyWrapper) obj).keys;
if (!copy) {
- return ObjectInspectorUtils.compare(copied_in_hashmap,
- currentKeyObjectInspector, keys, newKeyObjectInspector) == 0;
+ return newKeyStructEqualComparer.areEqual(copied_in_hashmap, keys);
} else {
- return ObjectInspectorUtils.compare(copied_in_hashmap,
- currentKeyObjectInspector, keys, currentKeyObjectInspector) == 0;
+ return currentStructEqualComparer.areEqual(copied_in_hashmap, keys);
}
}
}
+
+
KeyWrapper keyProber = new KeyWrapper();
private void processHashAggr(Object row, ObjectInspector rowInspector,
@@ -705,8 +710,9 @@ public class GroupByOperator extends Ope
// Prepare aggs for updating
AggregationBuffer[] aggs = null;
Object[][] lastInvoke = null;
- boolean keysAreEqual = ObjectInspectorUtils.compare(newKeys,
- newKeyObjectInspector, currentKeys, currentKeyObjectInspector) == 0;
+ boolean keysAreEqual = (currentKeys != null && newKeys != null)?
+ newKeyStructEqualComparer.areEqual(currentKeys, newKeys) : false;
+
// Forward the current keys if needed for sort-based aggregation
if (currentKeys != null && !keysAreEqual) {
Added: hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ListObjectsEqualComparer.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ListObjectsEqualComparer.java?rev=1026210&view=auto
==============================================================================
--- hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ListObjectsEqualComparer.java (added)
+++ hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ListObjectsEqualComparer.java Fri Oct 22 04:57:08 2010
@@ -0,0 +1,180 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.serde2.objectinspector;
+
+import java.util.ArrayList;
+
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
+
+/**
+ * Compare two list of objects.
+ * Two lists are expected to have same types. Type information for every object is
+ * passed when calling Constructor to avoid the step of figuring out types from
+ * ObjectInspetor and determine how to compare the types when comparing.
+ * Also, for string and text elements, it performs slightly better than
+ * using ObjectInspectorUtils.compare() == 0, which instead of calling .compare()
+ * calls .equalTo(), which compares size before byte by byte comparison.
+ *
+ */
+public class ListObjectsEqualComparer {
+ enum CompareType {
+ // Now only string, text, int, long, byte and boolean comparisons
+ // are treated as special cases.
+ // For other types, we reuse ObjectInspectorUtils.compare()
+ COMPARE_STRING, COMPARE_TEXT, COMPARE_INT, COMPARE_LONG, COMPARE_BYTE,
+ COMPARE_BOOL, OTHER
+ }
+
+ class FieldComparer {
+ protected ObjectInspector oi0, oi1;
+ protected ObjectInspector compareOI;
+ protected CompareType compareType;
+ protected Converter converter0, converter1;
+ protected StringObjectInspector soi0, soi1;
+ protected IntObjectInspector ioi0, ioi1;
+ protected LongObjectInspector loi0, loi1;
+ protected ByteObjectInspector byoi0, byoi1;
+ protected BooleanObjectInspector boi0, boi1;
+
+ public FieldComparer(ObjectInspector oi0, ObjectInspector oi1) {
+ this.oi0 = oi0;
+ this.oi1 = oi1;
+
+ TypeInfo type0 = TypeInfoUtils.getTypeInfoFromObjectInspector(oi0);
+ TypeInfo type1 = TypeInfoUtils.getTypeInfoFromObjectInspector(oi1);
+ if (type0.equals(TypeInfoFactory.stringTypeInfo) &&
+ type1.equals(TypeInfoFactory.stringTypeInfo)) {
+ soi0 = (StringObjectInspector) oi0;
+ soi1 = (StringObjectInspector) oi1;
+ if (soi0.preferWritable() || soi1.preferWritable()) {
+ compareType = CompareType.COMPARE_TEXT;
+ } else {
+ compareType = CompareType.COMPARE_STRING;
+ }
+ } else if (type0.equals(TypeInfoFactory.intTypeInfo) &&
+ type1.equals(TypeInfoFactory.intTypeInfo)) {
+ compareType = CompareType.COMPARE_INT;
+ ioi0 = (IntObjectInspector) oi0;
+ ioi1 = (IntObjectInspector) oi1;
+ } else if (type0.equals(TypeInfoFactory.longTypeInfo) &&
+ type1.equals(TypeInfoFactory.longTypeInfo)) {
+ compareType = CompareType.COMPARE_LONG;
+ loi0 = (LongObjectInspector) oi0;
+ loi1 = (LongObjectInspector) oi1;
+ } else if (type0.equals(TypeInfoFactory.byteTypeInfo) &&
+ type1.equals(TypeInfoFactory.byteTypeInfo)) {
+ compareType = CompareType.COMPARE_BYTE;
+ byoi0 = (ByteObjectInspector) oi0;
+ byoi1 = (ByteObjectInspector) oi1;
+ } else if (type0.equals(TypeInfoFactory.booleanTypeInfo) &&
+ type1.equals(TypeInfoFactory.booleanTypeInfo)) {
+ compareType = CompareType.COMPARE_BOOL;
+ boi0 = (BooleanObjectInspector) oi0;
+ boi1 = (BooleanObjectInspector) oi1;
+ } else {
+ // We don't check compatibility of two object inspectors, but directly
+ // pass them into ObjectInspectorUtils.compare(), users of this class
+ // should make sure ObjectInspectorUtils.compare() doesn't throw exceptions
+ // and returns correct results.
+ compareType = CompareType.OTHER;
+ }
+ }
+
+ public boolean areEqual(Object o0, Object o1) {
+ if (o0 == null && o1 == null) {
+ return true;
+ } else if (o0 == null || o1 == null) {
+ return false;
+ }
+ switch (compareType) {
+ case COMPARE_TEXT:
+ return (soi0.getPrimitiveWritableObject(o0).equals(
+ soi1.getPrimitiveWritableObject(o1)));
+ case COMPARE_INT:
+ return (ioi0.get(o0) == ioi1.get(o1));
+ case COMPARE_LONG:
+ return (loi0.get(o0) == loi1.get(o1));
+ case COMPARE_BYTE:
+ return (byoi0.get(o0) == byoi1.get(o1));
+ case COMPARE_BOOL:
+ return (boi0.get(o0) == boi1.get(o1));
+ case COMPARE_STRING:
+ return (soi0.getPrimitiveJavaObject(o0).equals(
+ soi1.getPrimitiveJavaObject(o1)));
+ default:
+ return (ObjectInspectorUtils.compare(
+ o0, oi0, o1, oi1) == 0);
+ }
+ }
+ }
+
+ FieldComparer[] fieldComparers;
+ int numFields;
+
+ public ListObjectsEqualComparer(ObjectInspector[] oi0, ObjectInspector[] oi1) {
+ if (oi0.length != oi1.length) {
+ throw new RuntimeException("Sizes of two lists of object inspectors don't match.");
+ }
+ numFields = oi0.length;
+ fieldComparers = new FieldComparer[numFields];
+ for (int i = 0; i < oi0.length; i++) {
+ fieldComparers[i] = new FieldComparer(oi0[i], oi1[i]);
+ }
+ }
+
+
+ /**
+ * ol0, ol1 should have equal or less number of elements than objectinspectors
+ * passed in constructor.
+ *
+ * @param ol0
+ * @param ol1
+ * @return True if object in ol0 and ol1 are all identical
+ */
+ public boolean areEqual(ArrayList<Object> ol0, ArrayList<Object> ol1) {
+ if (ol0.size() != numFields || ol1.size() != numFields) {
+ if (ol0.size() != ol1.size()) {
+ return false;
+ }
+ assert (ol0.size() <= numFields);
+ assert (ol1.size() <= numFields);
+ for (int i = 0; i < Math.min(ol0.size(), ol1.size()); i++) {
+ if (!fieldComparers[i].areEqual(ol0.get(i), ol1.get(i))) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ for (int i = 0; i < numFields; i++) {
+ if (!fieldComparers[i].areEqual(ol0.get(i), ol1.get(i))) {
+ return false;
+ }
+ }
+ return true;
+ }
+}