You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by br...@apache.org on 2013/09/18 01:46:48 UTC

svn commit: r1524254 - in /hive/trunk/ql/src: java/org/apache/hadoop/hive/ql/exec/ java/org/apache/hadoop/hive/ql/udf/generic/ test/queries/clientpositive/ test/results/clientpositive/

Author: brock
Date: Tue Sep 17 23:46:48 2013
New Revision: 1524254

URL: http://svn.apache.org/r1524254
Log:
HIVE-5294 - Create collect UDF and make evaluator reusable (Edward Capriolo via Brock Noland)

Modified:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCollectSet.java
    hive/trunk/ql/src/test/queries/clientpositive/udaf_collect_set.q
    hive/trunk/ql/src/test/results/clientpositive/show_functions.q.out
    hive/trunk/ql/src/test/results/clientpositive/udaf_collect_set.q.out

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java?rev=1524254&r1=1524253&r2=1524254&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java Tue Sep 17 23:46:48 2013
@@ -383,6 +383,7 @@ public final class FunctionRegistry {
     registerGenericUDAF("histogram_numeric", new GenericUDAFHistogramNumeric());
     registerGenericUDAF("percentile_approx", new GenericUDAFPercentileApprox());
     registerGenericUDAF("collect_set", new GenericUDAFCollectSet());
+    registerGenericUDAF("collect_list", new GenericUDAFCollectList());
 
     registerGenericUDAF("ngrams", new GenericUDAFnGrams());
     registerGenericUDAF("context_ngrams", new GenericUDAFContextNGrams());

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCollectSet.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCollectSet.java?rev=1524254&r1=1524253&r2=1524254&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCollectSet.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCollectSet.java Tue Sep 17 23:46:48 2013
@@ -17,21 +17,13 @@
  */
 package org.apache.hadoop.hive.ql.udf.generic;
 
-import java.util.ArrayList;
-import java.util.HashSet;
-import java.util.Set;
-
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.hive.ql.exec.Description;
 import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFMkCollectionEvaluator.BufferType;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
-import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.StandardListObjectInspector;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
 
 /**
@@ -41,126 +33,23 @@ import org.apache.hadoop.hive.serde2.typ
 public class GenericUDAFCollectSet extends AbstractGenericUDAFResolver {
 
   static final Log LOG = LogFactory.getLog(GenericUDAFCollectSet.class.getName());
-  
+
   public GenericUDAFCollectSet() {
   }
 
   @Override
   public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters)
       throws SemanticException {
-
     if (parameters.length != 1) {
       throw new UDFArgumentTypeException(parameters.length - 1,
           "Exactly one argument is expected.");
     }
-
     if (parameters[0].getCategory() != ObjectInspector.Category.PRIMITIVE) {
       throw new UDFArgumentTypeException(0,
           "Only primitive type arguments are accepted but "
           + parameters[0].getTypeName() + " was passed as parameter 1.");
     }
-
-    return new GenericUDAFMkSetEvaluator();
+    return new GenericUDAFMkCollectionEvaluator(BufferType.SET);
   }
 
-  public static class GenericUDAFMkSetEvaluator extends GenericUDAFEvaluator {
-    
-    // For PARTIAL1 and COMPLETE: ObjectInspectors for original data
-    private PrimitiveObjectInspector inputOI;
-    // For PARTIAL2 and FINAL: ObjectInspectors for partial aggregations (list
-    // of objs)
-    private transient StandardListObjectInspector loi;
-    
-    private transient StandardListObjectInspector internalMergeOI;
-    
-    @Override
-    public ObjectInspector init(Mode m, ObjectInspector[] parameters)
-        throws HiveException {
-      super.init(m, parameters);
-      // init output object inspectors
-      // The output of a partial aggregation is a list
-      if (m == Mode.PARTIAL1) {
-        inputOI = (PrimitiveObjectInspector) parameters[0];
-        return ObjectInspectorFactory
-            .getStandardListObjectInspector((PrimitiveObjectInspector) ObjectInspectorUtils
-                .getStandardObjectInspector(inputOI));
-      } else {
-        if (!(parameters[0] instanceof StandardListObjectInspector)) {
-          //no map aggregation.
-          inputOI = (PrimitiveObjectInspector)  ObjectInspectorUtils
-          .getStandardObjectInspector(parameters[0]);
-          return (StandardListObjectInspector) ObjectInspectorFactory
-              .getStandardListObjectInspector(inputOI);
-        } else {
-          internalMergeOI = (StandardListObjectInspector) parameters[0];
-          inputOI = (PrimitiveObjectInspector) internalMergeOI.getListElementObjectInspector();
-          loi = (StandardListObjectInspector) ObjectInspectorUtils.getStandardObjectInspector(internalMergeOI);          
-          return loi;
-        }
-      }
-    }
-    
-    static class MkArrayAggregationBuffer extends AbstractAggregationBuffer {
-      Set<Object> container;
-    }
-    
-    @Override
-    public void reset(AggregationBuffer agg) throws HiveException {
-      ((MkArrayAggregationBuffer) agg).container = new HashSet<Object>();
-    }
-    
-    @Override
-    public AggregationBuffer getNewAggregationBuffer() throws HiveException {
-      MkArrayAggregationBuffer ret = new MkArrayAggregationBuffer();
-      reset(ret);
-      return ret;
-    }
-
-    //mapside
-    @Override
-    public void iterate(AggregationBuffer agg, Object[] parameters)
-        throws HiveException {
-      assert (parameters.length == 1);
-      Object p = parameters[0];
-
-      if (p != null) {
-        MkArrayAggregationBuffer myagg = (MkArrayAggregationBuffer) agg;
-        putIntoSet(p, myagg);
-      }
-    }
-
-    //mapside
-    @Override
-    public Object terminatePartial(AggregationBuffer agg) throws HiveException {
-      MkArrayAggregationBuffer myagg = (MkArrayAggregationBuffer) agg;
-      ArrayList<Object> ret = new ArrayList<Object>(myagg.container.size());
-      ret.addAll(myagg.container);
-      return ret;
-    }
-
-    @Override
-    public void merge(AggregationBuffer agg, Object partial)
-        throws HiveException {
-      MkArrayAggregationBuffer myagg = (MkArrayAggregationBuffer) agg;
-      ArrayList<Object> partialResult = (ArrayList<Object>) internalMergeOI.getList(partial);
-      for(Object i : partialResult) {
-        putIntoSet(i, myagg);
-      }
-    }
-    
-    @Override
-    public Object terminate(AggregationBuffer agg) throws HiveException {
-      MkArrayAggregationBuffer myagg = (MkArrayAggregationBuffer) agg;
-      ArrayList<Object> ret = new ArrayList<Object>(myagg.container.size());
-      ret.addAll(myagg.container);
-      return ret;
-    }
-    
-    private void putIntoSet(Object p, MkArrayAggregationBuffer myagg) {
-      Object pCopy = ObjectInspectorUtils.copyToStandardObject(p,
-          this.inputOI);
-      myagg.container.add(pCopy);
-    }
-  }
-  
 }

Modified: hive/trunk/ql/src/test/queries/clientpositive/udaf_collect_set.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/udaf_collect_set.q?rev=1524254&r1=1524253&r2=1524254&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/udaf_collect_set.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/udaf_collect_set.q Tue Sep 17 23:46:48 2013
@@ -1,6 +1,9 @@
 DESCRIBE FUNCTION collect_set;
 DESCRIBE FUNCTION EXTENDED collect_set;
 
+DESCRIBE FUNCTION collect_list;
+DESCRIBE FUNCTION EXTENDED collect_list;
+
 set hive.map.aggr = false;
 set hive.groupby.skewindata = false;
 
@@ -8,6 +11,10 @@ SELECT key, collect_set(value)
 FROM src
 GROUP BY key ORDER BY key limit 20;
 
+SELECT key, collect_list(value)
+FROM src
+GROUP BY key ORDER by key limit 20;
+
 set hive.map.aggr = true;
 set hive.groupby.skewindata = false;
 
@@ -15,6 +22,10 @@ SELECT key, collect_set(value)
 FROM src
 GROUP BY key ORDER BY key limit 20;
 
+SELECT key, collect_list(value)
+FROM src
+GROUP BY key ORDER BY key limit 20;
+
 set hive.map.aggr = false;
 set hive.groupby.skewindata = true;
 

Modified: hive/trunk/ql/src/test/results/clientpositive/show_functions.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/show_functions.q.out?rev=1524254&r1=1524253&r2=1524254&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/show_functions.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/show_functions.q.out Tue Sep 17 23:46:48 2013
@@ -36,6 +36,7 @@ case
 ceil
 ceiling
 coalesce
+collect_list
 collect_set
 compute_stats
 concat
@@ -202,6 +203,7 @@ case
 ceil
 ceiling
 coalesce
+collect_list
 collect_set
 compute_stats
 concat

Modified: hive/trunk/ql/src/test/results/clientpositive/udaf_collect_set.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/udaf_collect_set.q.out?rev=1524254&r1=1524253&r2=1524254&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/udaf_collect_set.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/udaf_collect_set.q.out Tue Sep 17 23:46:48 2013
@@ -8,6 +8,16 @@ PREHOOK: type: DESCFUNCTION
 POSTHOOK: query: DESCRIBE FUNCTION EXTENDED collect_set
 POSTHOOK: type: DESCFUNCTION
 collect_set(x) - Returns a set of objects with duplicate elements eliminated
+PREHOOK: query: DESCRIBE FUNCTION collect_list
+PREHOOK: type: DESCFUNCTION
+POSTHOOK: query: DESCRIBE FUNCTION collect_list
+POSTHOOK: type: DESCFUNCTION
+collect_list(x) - Returns a list of objects with duplicates
+PREHOOK: query: DESCRIBE FUNCTION EXTENDED collect_list
+PREHOOK: type: DESCFUNCTION
+POSTHOOK: query: DESCRIBE FUNCTION EXTENDED collect_list
+POSTHOOK: type: DESCFUNCTION
+collect_list(x) - Returns a list of objects with duplicates
 PREHOOK: query: SELECT key, collect_set(value)
 FROM src
 GROUP BY key ORDER BY key limit 20
@@ -40,6 +50,38 @@ POSTHOOK: Input: default@src
 128	["val_128"]
 129	["val_129"]
 131	["val_131"]
+PREHOOK: query: SELECT key, collect_list(value)
+FROM src
+GROUP BY key ORDER by key limit 20
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT key, collect_list(value)
+FROM src
+GROUP BY key ORDER by key limit 20
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+0	["val_0","val_0","val_0"]
+10	["val_10"]
+100	["val_100","val_100"]
+103	["val_103","val_103"]
+104	["val_104","val_104"]
+105	["val_105"]
+11	["val_11"]
+111	["val_111"]
+113	["val_113","val_113"]
+114	["val_114"]
+116	["val_116"]
+118	["val_118","val_118"]
+119	["val_119","val_119","val_119"]
+12	["val_12","val_12"]
+120	["val_120","val_120"]
+125	["val_125","val_125"]
+126	["val_126"]
+128	["val_128","val_128","val_128"]
+129	["val_129","val_129"]
+131	["val_131"]
 PREHOOK: query: SELECT key, collect_set(value)
 FROM src
 GROUP BY key ORDER BY key limit 20
@@ -72,6 +114,38 @@ POSTHOOK: Input: default@src
 128	["val_128"]
 129	["val_129"]
 131	["val_131"]
+PREHOOK: query: SELECT key, collect_list(value)
+FROM src
+GROUP BY key ORDER BY key limit 20
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT key, collect_list(value)
+FROM src
+GROUP BY key ORDER BY key limit 20
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+0	["val_0","val_0","val_0"]
+10	["val_10"]
+100	["val_100","val_100"]
+103	["val_103","val_103"]
+104	["val_104","val_104"]
+105	["val_105"]
+11	["val_11"]
+111	["val_111"]
+113	["val_113","val_113"]
+114	["val_114"]
+116	["val_116"]
+118	["val_118","val_118"]
+119	["val_119","val_119","val_119"]
+12	["val_12","val_12"]
+120	["val_120","val_120"]
+125	["val_125","val_125"]
+126	["val_126"]
+128	["val_128","val_128","val_128"]
+129	["val_129","val_129"]
+131	["val_131"]
 PREHOOK: query: SELECT key, collect_set(value)
 FROM src
 GROUP BY key ORDER BY key limit 20