You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by br...@apache.org on 2013/09/18 01:46:48 UTC
svn commit: r1524254 - in /hive/trunk/ql/src:
java/org/apache/hadoop/hive/ql/exec/
java/org/apache/hadoop/hive/ql/udf/generic/ test/queries/clientpositive/
test/results/clientpositive/
Author: brock
Date: Tue Sep 17 23:46:48 2013
New Revision: 1524254
URL: http://svn.apache.org/r1524254
Log:
HIVE-5294 - Create collect UDF and make evaluator reusable (Edward Capriolo via Brock Noland)
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCollectSet.java
hive/trunk/ql/src/test/queries/clientpositive/udaf_collect_set.q
hive/trunk/ql/src/test/results/clientpositive/show_functions.q.out
hive/trunk/ql/src/test/results/clientpositive/udaf_collect_set.q.out
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java?rev=1524254&r1=1524253&r2=1524254&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java Tue Sep 17 23:46:48 2013
@@ -383,6 +383,7 @@ public final class FunctionRegistry {
registerGenericUDAF("histogram_numeric", new GenericUDAFHistogramNumeric());
registerGenericUDAF("percentile_approx", new GenericUDAFPercentileApprox());
registerGenericUDAF("collect_set", new GenericUDAFCollectSet());
+ registerGenericUDAF("collect_list", new GenericUDAFCollectList());
registerGenericUDAF("ngrams", new GenericUDAFnGrams());
registerGenericUDAF("context_ngrams", new GenericUDAFContextNGrams());
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCollectSet.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCollectSet.java?rev=1524254&r1=1524253&r2=1524254&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCollectSet.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCollectSet.java Tue Sep 17 23:46:48 2013
@@ -17,21 +17,13 @@
*/
package org.apache.hadoop.hive.ql.udf.generic;
-import java.util.ArrayList;
-import java.util.HashSet;
-import java.util.Set;
-
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFMkCollectionEvaluator.BufferType;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
-import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.StandardListObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
/**
@@ -41,126 +33,23 @@ import org.apache.hadoop.hive.serde2.typ
public class GenericUDAFCollectSet extends AbstractGenericUDAFResolver {
static final Log LOG = LogFactory.getLog(GenericUDAFCollectSet.class.getName());
-
+
public GenericUDAFCollectSet() {
}
@Override
public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters)
throws SemanticException {
-
if (parameters.length != 1) {
throw new UDFArgumentTypeException(parameters.length - 1,
"Exactly one argument is expected.");
}
-
if (parameters[0].getCategory() != ObjectInspector.Category.PRIMITIVE) {
throw new UDFArgumentTypeException(0,
"Only primitive type arguments are accepted but "
+ parameters[0].getTypeName() + " was passed as parameter 1.");
}
-
- return new GenericUDAFMkSetEvaluator();
+ return new GenericUDAFMkCollectionEvaluator(BufferType.SET);
}
- public static class GenericUDAFMkSetEvaluator extends GenericUDAFEvaluator {
-
- // For PARTIAL1 and COMPLETE: ObjectInspectors for original data
- private PrimitiveObjectInspector inputOI;
- // For PARTIAL2 and FINAL: ObjectInspectors for partial aggregations (list
- // of objs)
- private transient StandardListObjectInspector loi;
-
- private transient StandardListObjectInspector internalMergeOI;
-
- @Override
- public ObjectInspector init(Mode m, ObjectInspector[] parameters)
- throws HiveException {
- super.init(m, parameters);
- // init output object inspectors
- // The output of a partial aggregation is a list
- if (m == Mode.PARTIAL1) {
- inputOI = (PrimitiveObjectInspector) parameters[0];
- return ObjectInspectorFactory
- .getStandardListObjectInspector((PrimitiveObjectInspector) ObjectInspectorUtils
- .getStandardObjectInspector(inputOI));
- } else {
- if (!(parameters[0] instanceof StandardListObjectInspector)) {
- //no map aggregation.
- inputOI = (PrimitiveObjectInspector) ObjectInspectorUtils
- .getStandardObjectInspector(parameters[0]);
- return (StandardListObjectInspector) ObjectInspectorFactory
- .getStandardListObjectInspector(inputOI);
- } else {
- internalMergeOI = (StandardListObjectInspector) parameters[0];
- inputOI = (PrimitiveObjectInspector) internalMergeOI.getListElementObjectInspector();
- loi = (StandardListObjectInspector) ObjectInspectorUtils.getStandardObjectInspector(internalMergeOI);
- return loi;
- }
- }
- }
-
- static class MkArrayAggregationBuffer extends AbstractAggregationBuffer {
- Set<Object> container;
- }
-
- @Override
- public void reset(AggregationBuffer agg) throws HiveException {
- ((MkArrayAggregationBuffer) agg).container = new HashSet<Object>();
- }
-
- @Override
- public AggregationBuffer getNewAggregationBuffer() throws HiveException {
- MkArrayAggregationBuffer ret = new MkArrayAggregationBuffer();
- reset(ret);
- return ret;
- }
-
- //mapside
- @Override
- public void iterate(AggregationBuffer agg, Object[] parameters)
- throws HiveException {
- assert (parameters.length == 1);
- Object p = parameters[0];
-
- if (p != null) {
- MkArrayAggregationBuffer myagg = (MkArrayAggregationBuffer) agg;
- putIntoSet(p, myagg);
- }
- }
-
- //mapside
- @Override
- public Object terminatePartial(AggregationBuffer agg) throws HiveException {
- MkArrayAggregationBuffer myagg = (MkArrayAggregationBuffer) agg;
- ArrayList<Object> ret = new ArrayList<Object>(myagg.container.size());
- ret.addAll(myagg.container);
- return ret;
- }
-
- @Override
- public void merge(AggregationBuffer agg, Object partial)
- throws HiveException {
- MkArrayAggregationBuffer myagg = (MkArrayAggregationBuffer) agg;
- ArrayList<Object> partialResult = (ArrayList<Object>) internalMergeOI.getList(partial);
- for(Object i : partialResult) {
- putIntoSet(i, myagg);
- }
- }
-
- @Override
- public Object terminate(AggregationBuffer agg) throws HiveException {
- MkArrayAggregationBuffer myagg = (MkArrayAggregationBuffer) agg;
- ArrayList<Object> ret = new ArrayList<Object>(myagg.container.size());
- ret.addAll(myagg.container);
- return ret;
- }
-
- private void putIntoSet(Object p, MkArrayAggregationBuffer myagg) {
- Object pCopy = ObjectInspectorUtils.copyToStandardObject(p,
- this.inputOI);
- myagg.container.add(pCopy);
- }
- }
-
}
Modified: hive/trunk/ql/src/test/queries/clientpositive/udaf_collect_set.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/udaf_collect_set.q?rev=1524254&r1=1524253&r2=1524254&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/udaf_collect_set.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/udaf_collect_set.q Tue Sep 17 23:46:48 2013
@@ -1,6 +1,9 @@
DESCRIBE FUNCTION collect_set;
DESCRIBE FUNCTION EXTENDED collect_set;
+DESCRIBE FUNCTION collect_list;
+DESCRIBE FUNCTION EXTENDED collect_list;
+
set hive.map.aggr = false;
set hive.groupby.skewindata = false;
@@ -8,6 +11,10 @@ SELECT key, collect_set(value)
FROM src
GROUP BY key ORDER BY key limit 20;
+SELECT key, collect_list(value)
+FROM src
+GROUP BY key ORDER by key limit 20;
+
set hive.map.aggr = true;
set hive.groupby.skewindata = false;
@@ -15,6 +22,10 @@ SELECT key, collect_set(value)
FROM src
GROUP BY key ORDER BY key limit 20;
+SELECT key, collect_list(value)
+FROM src
+GROUP BY key ORDER BY key limit 20;
+
set hive.map.aggr = false;
set hive.groupby.skewindata = true;
Modified: hive/trunk/ql/src/test/results/clientpositive/show_functions.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/show_functions.q.out?rev=1524254&r1=1524253&r2=1524254&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/show_functions.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/show_functions.q.out Tue Sep 17 23:46:48 2013
@@ -36,6 +36,7 @@ case
ceil
ceiling
coalesce
+collect_list
collect_set
compute_stats
concat
@@ -202,6 +203,7 @@ case
ceil
ceiling
coalesce
+collect_list
collect_set
compute_stats
concat
Modified: hive/trunk/ql/src/test/results/clientpositive/udaf_collect_set.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/udaf_collect_set.q.out?rev=1524254&r1=1524253&r2=1524254&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/udaf_collect_set.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/udaf_collect_set.q.out Tue Sep 17 23:46:48 2013
@@ -8,6 +8,16 @@ PREHOOK: type: DESCFUNCTION
POSTHOOK: query: DESCRIBE FUNCTION EXTENDED collect_set
POSTHOOK: type: DESCFUNCTION
collect_set(x) - Returns a set of objects with duplicate elements eliminated
+PREHOOK: query: DESCRIBE FUNCTION collect_list
+PREHOOK: type: DESCFUNCTION
+POSTHOOK: query: DESCRIBE FUNCTION collect_list
+POSTHOOK: type: DESCFUNCTION
+collect_list(x) - Returns a list of objects with duplicates
+PREHOOK: query: DESCRIBE FUNCTION EXTENDED collect_list
+PREHOOK: type: DESCFUNCTION
+POSTHOOK: query: DESCRIBE FUNCTION EXTENDED collect_list
+POSTHOOK: type: DESCFUNCTION
+collect_list(x) - Returns a list of objects with duplicates
PREHOOK: query: SELECT key, collect_set(value)
FROM src
GROUP BY key ORDER BY key limit 20
@@ -40,6 +50,38 @@ POSTHOOK: Input: default@src
128 ["val_128"]
129 ["val_129"]
131 ["val_131"]
+PREHOOK: query: SELECT key, collect_list(value)
+FROM src
+GROUP BY key ORDER by key limit 20
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT key, collect_list(value)
+FROM src
+GROUP BY key ORDER by key limit 20
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+0 ["val_0","val_0","val_0"]
+10 ["val_10"]
+100 ["val_100","val_100"]
+103 ["val_103","val_103"]
+104 ["val_104","val_104"]
+105 ["val_105"]
+11 ["val_11"]
+111 ["val_111"]
+113 ["val_113","val_113"]
+114 ["val_114"]
+116 ["val_116"]
+118 ["val_118","val_118"]
+119 ["val_119","val_119","val_119"]
+12 ["val_12","val_12"]
+120 ["val_120","val_120"]
+125 ["val_125","val_125"]
+126 ["val_126"]
+128 ["val_128","val_128","val_128"]
+129 ["val_129","val_129"]
+131 ["val_131"]
PREHOOK: query: SELECT key, collect_set(value)
FROM src
GROUP BY key ORDER BY key limit 20
@@ -72,6 +114,38 @@ POSTHOOK: Input: default@src
128 ["val_128"]
129 ["val_129"]
131 ["val_131"]
+PREHOOK: query: SELECT key, collect_list(value)
+FROM src
+GROUP BY key ORDER BY key limit 20
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT key, collect_list(value)
+FROM src
+GROUP BY key ORDER BY key limit 20
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+0 ["val_0","val_0","val_0"]
+10 ["val_10"]
+100 ["val_100","val_100"]
+103 ["val_103","val_103"]
+104 ["val_104","val_104"]
+105 ["val_105"]
+11 ["val_11"]
+111 ["val_111"]
+113 ["val_113","val_113"]
+114 ["val_114"]
+116 ["val_116"]
+118 ["val_118","val_118"]
+119 ["val_119","val_119","val_119"]
+12 ["val_12","val_12"]
+120 ["val_120","val_120"]
+125 ["val_125","val_125"]
+126 ["val_126"]
+128 ["val_128","val_128","val_128"]
+129 ["val_129","val_129"]
+131 ["val_131"]
PREHOOK: query: SELECT key, collect_set(value)
FROM src
GROUP BY key ORDER BY key limit 20