You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by cw...@apache.org on 2012/01/20 22:19:08 UTC
svn commit: r1234146 - in /hive/trunk/ql/src:
java/org/apache/hadoop/hive/ql/exec/
java/org/apache/hadoop/hive/ql/udf/generic/ test/queries/clientnegative/
test/queries/clientpositive/ test/results/clientnegative/
test/results/clientpositive/
Author: cws
Date: Fri Jan 20 21:19:07 2012
New Revision: 1234146
URL: http://svn.apache.org/viewvc?rev=1234146&view=rev
Log:
HIVE-2279. Implement sort(array) UDF (Zhenxiao Luo via cws)
Added:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFSortArray.java (with props)
hive/trunk/ql/src/test/queries/clientnegative/udf_sort_array_wrong1.q
hive/trunk/ql/src/test/queries/clientnegative/udf_sort_array_wrong2.q
hive/trunk/ql/src/test/queries/clientnegative/udf_sort_array_wrong3.q
hive/trunk/ql/src/test/queries/clientpositive/udf_sort_array.q
hive/trunk/ql/src/test/results/clientnegative/udf_sort_array_wrong1.q.out
hive/trunk/ql/src/test/results/clientnegative/udf_sort_array_wrong2.q.out
hive/trunk/ql/src/test/results/clientnegative/udf_sort_array_wrong3.q.out
hive/trunk/ql/src/test/results/clientpositive/udf_sort_array.q.out
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
hive/trunk/ql/src/test/results/clientpositive/show_functions.q.out
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java?rev=1234146&r1=1234145&r2=1234146&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java Fri Jan 20 21:19:07 2012
@@ -157,6 +157,7 @@ import org.apache.hadoop.hive.ql.udf.gen
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFCase;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFCoalesce;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFConcatWS;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFSortArray;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFEWAHBitmapAnd;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFEWAHBitmapEmpty;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFEWAHBitmapOr;
@@ -446,6 +447,7 @@ public final class FunctionRegistry {
registerGenericUDF("locate", GenericUDFLocate.class);
registerGenericUDF("elt", GenericUDFElt.class);
registerGenericUDF("concat_ws", GenericUDFConcatWS.class);
+ registerGenericUDF("sort_array", GenericUDFSortArray.class);
registerGenericUDF("array_contains", GenericUDFArrayContains.class);
registerGenericUDF("sentences", GenericUDFSentences.class);
registerGenericUDF("map_keys", GenericUDFMapKeys.class);
Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFSortArray.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFSortArray.java?rev=1234146&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFSortArray.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFSortArray.java Fri Jan 20 21:19:07 2012
@@ -0,0 +1,116 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.udf.generic;
+
+import java.util.ArrayList;
+import java.util.Collections;
+
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.serde.Constants;
+import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
+
+/**
+ * Generic UDF for array sort
+ * <code>SORT_ARRAY(array(obj1, obj2, obj3...))</code>.
+ *
+ * @see org.apache.hadoop.hive.ql.udf.generic.GenericUDF
+ */
+@Description(name = "sort_array",
+ value = "_FUNC_(array(obj1, obj2,...)) - "
+ + "Sorts the input array in ascending order according to the natural ordering"
+ + " of the array elements.",
+ extended = "Example:\n"
+ + " > SELECT _FUNC_(array('b', 'd', 'c', 'a')) FROM src LIMIT 1;\n"
+ + " 'a', 'b', 'c', 'd'")
+public class GenericUDFSortArray extends GenericUDF {
+ private Converter[] converters;
+ private ArrayList<Object> ret = new ArrayList<Object>();
+ private ObjectInspector[] argumentOIs;
+
+ @Override
+ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
+ GenericUDFUtils.ReturnObjectInspectorResolver returnOIResolver;
+ returnOIResolver = new GenericUDFUtils.ReturnObjectInspectorResolver(true);
+
+ if (arguments.length != 1) {
+ throw new UDFArgumentLengthException(
+ "The function SORT_ARRAY(array(obj1, obj2,...)) needs one argument.");
+ }
+
+ switch(arguments[0].getCategory()) {
+ case LIST:
+ if(((ListObjectInspector)(arguments[0])).getListElementObjectInspector()
+ .getCategory().equals(Category.PRIMITIVE))
+ break;
+ default:
+ throw new UDFArgumentTypeException(0, "Argument 1"
+ + " of function SORT_ARRAY must be " + Constants.LIST_TYPE_NAME
+ + "<" + Category.PRIMITIVE + ">, but " + arguments[0].getTypeName()
+ + " was found.");
+ }
+
+ ObjectInspector elementObjectInspector =
+ ((ListObjectInspector)(arguments[0])).getListElementObjectInspector();
+ argumentOIs = arguments;
+ converters = new Converter[arguments.length];
+ ObjectInspector returnOI = returnOIResolver.get();
+ if (returnOI == null) {
+ returnOI = elementObjectInspector;
+ }
+ converters[0] = ObjectInspectorConverters.getConverter(elementObjectInspector, returnOI);
+
+ return ObjectInspectorFactory.getStandardListObjectInspector(returnOI);
+ }
+
+ @Override
+ public Object evaluate(DeferredObject[] arguments) throws HiveException {
+ if (arguments[0].get() == null) {
+ return null;
+ }
+
+ Object array = arguments[0].get();
+ ListObjectInspector arrayOI = (ListObjectInspector) argumentOIs[0];
+ ArrayList retArray = (ArrayList) arrayOI.getList(array);
+ Collections.sort(retArray);
+
+ ret.clear();
+ for (int i = 0; i < retArray.size(); i++) {
+ ret.add(converters[0].convert(retArray.get(i)));
+ }
+ return ret;
+ }
+
+ @Override
+ public String getDisplayString(String[] children) {
+ assert (children.length == 1);
+ return "sort_array(" + children[0] + ")";
+ }
+}
Propchange: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFSortArray.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: hive/trunk/ql/src/test/queries/clientnegative/udf_sort_array_wrong1.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientnegative/udf_sort_array_wrong1.q?rev=1234146&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientnegative/udf_sort_array_wrong1.q (added)
+++ hive/trunk/ql/src/test/queries/clientnegative/udf_sort_array_wrong1.q Fri Jan 20 21:19:07 2012
@@ -0,0 +1,2 @@
+-- invalid argument number
+SELECT sort_array(array(2, 5, 4), 3) FROM src LIMIT 1;
Added: hive/trunk/ql/src/test/queries/clientnegative/udf_sort_array_wrong2.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientnegative/udf_sort_array_wrong2.q?rev=1234146&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientnegative/udf_sort_array_wrong2.q (added)
+++ hive/trunk/ql/src/test/queries/clientnegative/udf_sort_array_wrong2.q Fri Jan 20 21:19:07 2012
@@ -0,0 +1,2 @@
+-- invalid argument type
+SELECT sort_array("Invalid") FROM src LIMIT 1;
Added: hive/trunk/ql/src/test/queries/clientnegative/udf_sort_array_wrong3.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientnegative/udf_sort_array_wrong3.q?rev=1234146&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientnegative/udf_sort_array_wrong3.q (added)
+++ hive/trunk/ql/src/test/queries/clientnegative/udf_sort_array_wrong3.q Fri Jan 20 21:19:07 2012
@@ -0,0 +1,2 @@
+-- invalid argument type
+SELECT sort_array(array(array(10, 20), array(5, 15), array(3, 13))) FROM src LIMIT 1;
Added: hive/trunk/ql/src/test/queries/clientpositive/udf_sort_array.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/udf_sort_array.q?rev=1234146&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/udf_sort_array.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/udf_sort_array.q Fri Jan 20 21:19:07 2012
@@ -0,0 +1,19 @@
+use default;
+-- Test sort_array() UDF
+
+DESCRIBE FUNCTION sort_array;
+DESCRIBE FUNCTION EXTENDED sort_array;
+
+-- Evaluate function against STRING valued keys
+EXPLAIN
+SELECT sort_array(array("b", "d", "c", "a")) FROM src LIMIT 1;
+
+SELECT sort_array(array("f", "a", "g", "c", "b", "d", "e")) FROM src LIMIT 1;
+SELECT sort_array(sort_array(array("hadoop distributed file system", "enterprise databases", "hadoop map-reduce"))) FROM src LIMIT 1;
+
+-- Evaluate function against INT valued keys
+SELECT sort_array(array(2, 9, 7, 3, 5, 4, 1, 6, 8)) FROM src LIMIT 1;
+
+-- Evaluate function against FLOAT valued keys
+SELECT sort_array(sort_array(array(2.333, 9, 1.325, 2.003, 0.777, -3.445, 1))) FROM src LIMIT 1;
+
Added: hive/trunk/ql/src/test/results/clientnegative/udf_sort_array_wrong1.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientnegative/udf_sort_array_wrong1.q.out?rev=1234146&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientnegative/udf_sort_array_wrong1.q.out (added)
+++ hive/trunk/ql/src/test/results/clientnegative/udf_sort_array_wrong1.q.out Fri Jan 20 21:19:07 2012
@@ -0,0 +1 @@
+FAILED: Error in semantic analysis: Line 2:7 Arguments length mismatch '3': The function SORT_ARRAY(array(obj1, obj2,...)) needs one argument.
Added: hive/trunk/ql/src/test/results/clientnegative/udf_sort_array_wrong2.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientnegative/udf_sort_array_wrong2.q.out?rev=1234146&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientnegative/udf_sort_array_wrong2.q.out (added)
+++ hive/trunk/ql/src/test/results/clientnegative/udf_sort_array_wrong2.q.out Fri Jan 20 21:19:07 2012
@@ -0,0 +1 @@
+FAILED: Error in semantic analysis: Line 2:18 Argument type mismatch '"Invalid"': Argument 1 of function SORT_ARRAY must be array<PRIMITIVE>, but string was found.
Added: hive/trunk/ql/src/test/results/clientnegative/udf_sort_array_wrong3.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientnegative/udf_sort_array_wrong3.q.out?rev=1234146&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientnegative/udf_sort_array_wrong3.q.out (added)
+++ hive/trunk/ql/src/test/results/clientnegative/udf_sort_array_wrong3.q.out Fri Jan 20 21:19:07 2012
@@ -0,0 +1 @@
+FAILED: Error in semantic analysis: Line 2:18 Argument type mismatch '13': Argument 1 of function SORT_ARRAY must be array<PRIMITIVE>, but array<array<int>> was found.
Modified: hive/trunk/ql/src/test/results/clientpositive/show_functions.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/show_functions.q.out?rev=1234146&r1=1234145&r2=1234146&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/show_functions.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/show_functions.q.out Fri Jan 20 21:19:07 2012
@@ -129,6 +129,7 @@ sentences
sign
sin
size
+sort_array
space
split
sqrt
Added: hive/trunk/ql/src/test/results/clientpositive/udf_sort_array.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/udf_sort_array.q.out?rev=1234146&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/udf_sort_array.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/udf_sort_array.q.out Fri Jan 20 21:19:07 2012
@@ -0,0 +1,101 @@
+PREHOOK: query: use default
+PREHOOK: type: SWITCHDATABASE
+POSTHOOK: query: use default
+POSTHOOK: type: SWITCHDATABASE
+PREHOOK: query: -- Test sort_array() UDF
+
+DESCRIBE FUNCTION sort_array
+PREHOOK: type: DESCFUNCTION
+POSTHOOK: query: -- Test sort_array() UDF
+
+DESCRIBE FUNCTION sort_array
+POSTHOOK: type: DESCFUNCTION
+sort_array(array(obj1, obj2,...)) - Sorts the input array in ascending order according to the natural ordering of the array elements.
+PREHOOK: query: DESCRIBE FUNCTION EXTENDED sort_array
+PREHOOK: type: DESCFUNCTION
+POSTHOOK: query: DESCRIBE FUNCTION EXTENDED sort_array
+POSTHOOK: type: DESCFUNCTION
+sort_array(array(obj1, obj2,...)) - Sorts the input array in ascending order according to the natural ordering of the array elements.
+Example:
+ > SELECT sort_array(array('b', 'd', 'c', 'a')) FROM src LIMIT 1;
+ 'a', 'b', 'c', 'd'
+PREHOOK: query: -- Evaluate function against STRING valued keys
+EXPLAIN
+SELECT sort_array(array("b", "d", "c", "a")) FROM src LIMIT 1
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Evaluate function against STRING valued keys
+EXPLAIN
+SELECT sort_array(array("b", "d", "c", "a")) FROM src LIMIT 1
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION sort_array (TOK_FUNCTION array "b" "d" "c" "a")))) (TOK_LIMIT 1)))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ src
+ TableScan
+ alias: src
+ Select Operator
+ expressions:
+ expr: sort_array(array('b','d','c','a'))
+ type: array<string>
+ outputColumnNames: _col0
+ Limit
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 1
+
+
+PREHOOK: query: SELECT sort_array(array("f", "a", "g", "c", "b", "d", "e")) FROM src LIMIT 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sort_array(array("f", "a", "g", "c", "b", "d", "e")) FROM src LIMIT 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+["a","b","c","d","e","f","g"]
+PREHOOK: query: SELECT sort_array(sort_array(array("hadoop distributed file system", "enterprise databases", "hadoop map-reduce"))) FROM src LIMIT 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sort_array(sort_array(array("hadoop distributed file system", "enterprise databases", "hadoop map-reduce"))) FROM src LIMIT 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+["enterprise databases","hadoop distributed file system","hadoop map-reduce"]
+PREHOOK: query: -- Evaluate function against INT valued keys
+SELECT sort_array(array(2, 9, 7, 3, 5, 4, 1, 6, 8)) FROM src LIMIT 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: -- Evaluate function against INT valued keys
+SELECT sort_array(array(2, 9, 7, 3, 5, 4, 1, 6, 8)) FROM src LIMIT 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+[1,2,3,4,5,6,7,8,9]
+PREHOOK: query: -- Evaluate function against FLOAT valued keys
+SELECT sort_array(sort_array(array(2.333, 9, 1.325, 2.003, 0.777, -3.445, 1))) FROM src LIMIT 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: -- Evaluate function against FLOAT valued keys
+SELECT sort_array(sort_array(array(2.333, 9, 1.325, 2.003, 0.777, -3.445, 1))) FROM src LIMIT 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+[-3.445,0.777,1.0,1.325,2.003,2.333,9.0]