You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by cw...@apache.org on 2012/01/20 22:19:08 UTC

svn commit: r1234146 - in /hive/trunk/ql/src: java/org/apache/hadoop/hive/ql/exec/ java/org/apache/hadoop/hive/ql/udf/generic/ test/queries/clientnegative/ test/queries/clientpositive/ test/results/clientnegative/ test/results/clientpositive/

Author: cws
Date: Fri Jan 20 21:19:07 2012
New Revision: 1234146

URL: http://svn.apache.org/viewvc?rev=1234146&view=rev
Log:
HIVE-2279. Implement sort(array) UDF (Zhenxiao Luo via cws)

Added:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFSortArray.java   (with props)
    hive/trunk/ql/src/test/queries/clientnegative/udf_sort_array_wrong1.q
    hive/trunk/ql/src/test/queries/clientnegative/udf_sort_array_wrong2.q
    hive/trunk/ql/src/test/queries/clientnegative/udf_sort_array_wrong3.q
    hive/trunk/ql/src/test/queries/clientpositive/udf_sort_array.q
    hive/trunk/ql/src/test/results/clientnegative/udf_sort_array_wrong1.q.out
    hive/trunk/ql/src/test/results/clientnegative/udf_sort_array_wrong2.q.out
    hive/trunk/ql/src/test/results/clientnegative/udf_sort_array_wrong3.q.out
    hive/trunk/ql/src/test/results/clientpositive/udf_sort_array.q.out
Modified:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
    hive/trunk/ql/src/test/results/clientpositive/show_functions.q.out

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java?rev=1234146&r1=1234145&r2=1234146&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java Fri Jan 20 21:19:07 2012
@@ -157,6 +157,7 @@ import org.apache.hadoop.hive.ql.udf.gen
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFCase;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFCoalesce;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFConcatWS;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFSortArray;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFEWAHBitmapAnd;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFEWAHBitmapEmpty;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFEWAHBitmapOr;
@@ -446,6 +447,7 @@ public final class FunctionRegistry {
     registerGenericUDF("locate", GenericUDFLocate.class);
     registerGenericUDF("elt", GenericUDFElt.class);
     registerGenericUDF("concat_ws", GenericUDFConcatWS.class);
+    registerGenericUDF("sort_array", GenericUDFSortArray.class);
     registerGenericUDF("array_contains", GenericUDFArrayContains.class);
     registerGenericUDF("sentences", GenericUDFSentences.class);
     registerGenericUDF("map_keys", GenericUDFMapKeys.class);

Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFSortArray.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFSortArray.java?rev=1234146&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFSortArray.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFSortArray.java Fri Jan 20 21:19:07 2012
@@ -0,0 +1,116 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.udf.generic;
+
+import java.util.ArrayList;
+import java.util.Collections;
+
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.serde.Constants;
+import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
+
+/**
+ * Generic UDF for array sort
+ * <code>SORT_ARRAY(array(obj1, obj2, obj3...))</code>.
+ *
+ * @see org.apache.hadoop.hive.ql.udf.generic.GenericUDF
+ */
+@Description(name = "sort_array",
+    value = "_FUNC_(array(obj1, obj2,...)) - "
+    + "Sorts the input array in ascending order according to the natural ordering"
+    + " of the array elements.",
+    extended = "Example:\n"
+    + "  > SELECT _FUNC_(array('b', 'd', 'c', 'a')) FROM src LIMIT 1;\n"
+    + "  'a', 'b', 'c', 'd'")
+public class GenericUDFSortArray extends GenericUDF {
+  private Converter[] converters;
+  private ArrayList<Object> ret = new ArrayList<Object>();
+  private ObjectInspector[] argumentOIs;
+
+  @Override
+  public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
+    GenericUDFUtils.ReturnObjectInspectorResolver returnOIResolver;
+    returnOIResolver = new GenericUDFUtils.ReturnObjectInspectorResolver(true);
+
+    if (arguments.length != 1) {
+      throw new UDFArgumentLengthException(
+        "The function SORT_ARRAY(array(obj1, obj2,...)) needs one argument.");
+    }
+
+    switch(arguments[0].getCategory()) {
+      case LIST:
+        if(((ListObjectInspector)(arguments[0])).getListElementObjectInspector()
+          .getCategory().equals(Category.PRIMITIVE))
+          break;
+      default:
+        throw new UDFArgumentTypeException(0, "Argument 1"
+          + " of function SORT_ARRAY must be " + Constants.LIST_TYPE_NAME
+          + "<" + Category.PRIMITIVE + ">, but " + arguments[0].getTypeName()
+          + " was found.");
+    }
+
+    ObjectInspector elementObjectInspector =
+      ((ListObjectInspector)(arguments[0])).getListElementObjectInspector();
+    argumentOIs = arguments;
+    converters = new Converter[arguments.length];
+    ObjectInspector returnOI = returnOIResolver.get();
+    if (returnOI == null) {
+      returnOI = elementObjectInspector;
+    }
+    converters[0] = ObjectInspectorConverters.getConverter(elementObjectInspector, returnOI);
+
+    return ObjectInspectorFactory.getStandardListObjectInspector(returnOI);
+  }
+
+  @Override
+  public Object evaluate(DeferredObject[] arguments) throws HiveException {
+    if (arguments[0].get() == null) {
+      return null;
+    }
+
+    Object array = arguments[0].get();
+    ListObjectInspector arrayOI = (ListObjectInspector) argumentOIs[0];
+    ArrayList retArray = (ArrayList) arrayOI.getList(array);
+    Collections.sort(retArray);
+
+    ret.clear();
+    for (int i = 0; i < retArray.size(); i++) {
+      ret.add(converters[0].convert(retArray.get(i)));
+    }
+    return ret;
+  }
+
+  @Override
+  public String getDisplayString(String[] children) {
+    assert (children.length == 1);
+    return "sort_array(" + children[0] + ")";
+ }
+}

Propchange: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFSortArray.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: hive/trunk/ql/src/test/queries/clientnegative/udf_sort_array_wrong1.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientnegative/udf_sort_array_wrong1.q?rev=1234146&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientnegative/udf_sort_array_wrong1.q (added)
+++ hive/trunk/ql/src/test/queries/clientnegative/udf_sort_array_wrong1.q Fri Jan 20 21:19:07 2012
@@ -0,0 +1,2 @@
+-- invalid argument number
+SELECT sort_array(array(2, 5, 4), 3) FROM src LIMIT 1;

Added: hive/trunk/ql/src/test/queries/clientnegative/udf_sort_array_wrong2.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientnegative/udf_sort_array_wrong2.q?rev=1234146&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientnegative/udf_sort_array_wrong2.q (added)
+++ hive/trunk/ql/src/test/queries/clientnegative/udf_sort_array_wrong2.q Fri Jan 20 21:19:07 2012
@@ -0,0 +1,2 @@
+-- invalid argument type
+SELECT sort_array("Invalid") FROM src LIMIT 1;

Added: hive/trunk/ql/src/test/queries/clientnegative/udf_sort_array_wrong3.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientnegative/udf_sort_array_wrong3.q?rev=1234146&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientnegative/udf_sort_array_wrong3.q (added)
+++ hive/trunk/ql/src/test/queries/clientnegative/udf_sort_array_wrong3.q Fri Jan 20 21:19:07 2012
@@ -0,0 +1,2 @@
+-- invalid argument type
+SELECT sort_array(array(array(10, 20), array(5, 15), array(3, 13))) FROM src LIMIT 1;

Added: hive/trunk/ql/src/test/queries/clientpositive/udf_sort_array.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/udf_sort_array.q?rev=1234146&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/udf_sort_array.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/udf_sort_array.q Fri Jan 20 21:19:07 2012
@@ -0,0 +1,19 @@
+use default;
+-- Test sort_array() UDF
+
+DESCRIBE FUNCTION sort_array;
+DESCRIBE FUNCTION EXTENDED sort_array;
+
+-- Evaluate function against STRING valued keys
+EXPLAIN
+SELECT sort_array(array("b", "d", "c", "a")) FROM src LIMIT 1;
+
+SELECT sort_array(array("f", "a", "g", "c", "b", "d", "e")) FROM src LIMIT 1;
+SELECT sort_array(sort_array(array("hadoop distributed file system", "enterprise databases", "hadoop map-reduce"))) FROM src LIMIT 1;
+
+-- Evaluate function against INT valued keys
+SELECT sort_array(array(2, 9, 7, 3, 5, 4, 1, 6, 8)) FROM src LIMIT 1;
+
+-- Evaluate function against FLOAT valued keys
+SELECT sort_array(sort_array(array(2.333, 9, 1.325, 2.003, 0.777, -3.445, 1))) FROM src LIMIT 1;
+

Added: hive/trunk/ql/src/test/results/clientnegative/udf_sort_array_wrong1.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientnegative/udf_sort_array_wrong1.q.out?rev=1234146&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientnegative/udf_sort_array_wrong1.q.out (added)
+++ hive/trunk/ql/src/test/results/clientnegative/udf_sort_array_wrong1.q.out Fri Jan 20 21:19:07 2012
@@ -0,0 +1 @@
+FAILED: Error in semantic analysis: Line 2:7 Arguments length mismatch '3': The function SORT_ARRAY(array(obj1, obj2,...)) needs one argument.

Added: hive/trunk/ql/src/test/results/clientnegative/udf_sort_array_wrong2.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientnegative/udf_sort_array_wrong2.q.out?rev=1234146&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientnegative/udf_sort_array_wrong2.q.out (added)
+++ hive/trunk/ql/src/test/results/clientnegative/udf_sort_array_wrong2.q.out Fri Jan 20 21:19:07 2012
@@ -0,0 +1 @@
+FAILED: Error in semantic analysis: Line 2:18 Argument type mismatch '"Invalid"': Argument 1 of function SORT_ARRAY must be array<PRIMITIVE>, but string was found.

Added: hive/trunk/ql/src/test/results/clientnegative/udf_sort_array_wrong3.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientnegative/udf_sort_array_wrong3.q.out?rev=1234146&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientnegative/udf_sort_array_wrong3.q.out (added)
+++ hive/trunk/ql/src/test/results/clientnegative/udf_sort_array_wrong3.q.out Fri Jan 20 21:19:07 2012
@@ -0,0 +1 @@
+FAILED: Error in semantic analysis: Line 2:18 Argument type mismatch '13': Argument 1 of function SORT_ARRAY must be array<PRIMITIVE>, but array<array<int>> was found.

Modified: hive/trunk/ql/src/test/results/clientpositive/show_functions.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/show_functions.q.out?rev=1234146&r1=1234145&r2=1234146&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/show_functions.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/show_functions.q.out Fri Jan 20 21:19:07 2012
@@ -129,6 +129,7 @@ sentences
 sign
 sin
 size
+sort_array
 space
 split
 sqrt

Added: hive/trunk/ql/src/test/results/clientpositive/udf_sort_array.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/udf_sort_array.q.out?rev=1234146&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/udf_sort_array.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/udf_sort_array.q.out Fri Jan 20 21:19:07 2012
@@ -0,0 +1,101 @@
+PREHOOK: query: use default
+PREHOOK: type: SWITCHDATABASE
+POSTHOOK: query: use default
+POSTHOOK: type: SWITCHDATABASE
+PREHOOK: query: -- Test sort_array() UDF
+
+DESCRIBE FUNCTION sort_array
+PREHOOK: type: DESCFUNCTION
+POSTHOOK: query: -- Test sort_array() UDF
+
+DESCRIBE FUNCTION sort_array
+POSTHOOK: type: DESCFUNCTION
+sort_array(array(obj1, obj2,...)) - Sorts the input array in ascending order according to the natural ordering of the array elements.
+PREHOOK: query: DESCRIBE FUNCTION EXTENDED sort_array
+PREHOOK: type: DESCFUNCTION
+POSTHOOK: query: DESCRIBE FUNCTION EXTENDED sort_array
+POSTHOOK: type: DESCFUNCTION
+sort_array(array(obj1, obj2,...)) - Sorts the input array in ascending order according to the natural ordering of the array elements.
+Example:
+  > SELECT sort_array(array('b', 'd', 'c', 'a')) FROM src LIMIT 1;
+  'a', 'b', 'c', 'd'
+PREHOOK: query: -- Evaluate function against STRING valued keys
+EXPLAIN
+SELECT sort_array(array("b", "d", "c", "a")) FROM src LIMIT 1
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Evaluate function against STRING valued keys
+EXPLAIN
+SELECT sort_array(array("b", "d", "c", "a")) FROM src LIMIT 1
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION sort_array (TOK_FUNCTION array "b" "d" "c" "a")))) (TOK_LIMIT 1)))
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        src 
+          TableScan
+            alias: src
+            Select Operator
+              expressions:
+                    expr: sort_array(array('b','d','c','a'))
+                    type: array<string>
+              outputColumnNames: _col0
+              Limit
+                File Output Operator
+                  compressed: false
+                  GlobalTableId: 0
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 1
+
+
+PREHOOK: query: SELECT sort_array(array("f", "a", "g", "c", "b", "d", "e")) FROM src LIMIT 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sort_array(array("f", "a", "g", "c", "b", "d", "e")) FROM src LIMIT 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+["a","b","c","d","e","f","g"]
+PREHOOK: query: SELECT sort_array(sort_array(array("hadoop distributed file system", "enterprise databases", "hadoop map-reduce"))) FROM src LIMIT 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sort_array(sort_array(array("hadoop distributed file system", "enterprise databases", "hadoop map-reduce"))) FROM src LIMIT 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+["enterprise databases","hadoop distributed file system","hadoop map-reduce"]
+PREHOOK: query: -- Evaluate function against INT valued keys
+SELECT sort_array(array(2, 9, 7, 3, 5, 4, 1, 6, 8)) FROM src LIMIT 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: -- Evaluate function against INT valued keys
+SELECT sort_array(array(2, 9, 7, 3, 5, 4, 1, 6, 8)) FROM src LIMIT 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+[1,2,3,4,5,6,7,8,9]
+PREHOOK: query: -- Evaluate function against FLOAT valued keys
+SELECT sort_array(sort_array(array(2.333, 9, 1.325, 2.003, 0.777, -3.445, 1))) FROM src LIMIT 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: -- Evaluate function against FLOAT valued keys
+SELECT sort_array(sort_array(array(2.333, 9, 1.325, 2.003, 0.777, -3.445, 1))) FROM src LIMIT 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+[-3.445,0.777,1.0,1.325,2.003,2.333,9.0]