You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by su...@apache.org on 2015/05/30 18:30:14 UTC
hive git commit: HIVE-10788 - Change sort_array to support
non-primitive types (Chao Sun, reviewed by Alexander Pivovarov)
Repository: hive
Updated Branches:
refs/heads/master 83cc691c5 -> ae4790eaa
HIVE-10788 - Change sort_array to support non-primitive types (Chao Sun, reviewed by Alexander Pivovarov)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/ae4790ea
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/ae4790ea
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/ae4790ea
Branch: refs/heads/master
Commit: ae4790eaa1dca2605da59a662a3c0d6c80ee55b3
Parents: 83cc691
Author: Chao Sun <su...@apache.org>
Authored: Sat May 30 09:29:38 2015 -0700
Committer: Chao Sun <su...@apache.org>
Committed: Sat May 30 09:29:38 2015 -0700
----------------------------------------------------------------------
.../ql/udf/generic/GenericUDFSortArray.java | 17 +-
.../ql/udf/generic/TestGenericUDFSortArray.java | 158 +++++++++++++++++++
.../clientnegative/udf_sort_array_wrong3.q | 2 +-
.../queries/clientpositive/udf_sort_array.q | 10 ++
.../clientnegative/udf_sort_array_wrong1.q.out | 2 +-
.../clientnegative/udf_sort_array_wrong2.q.out | 2 +-
.../clientnegative/udf_sort_array_wrong3.q.out | 2 +-
.../results/clientpositive/udf_sort_array.q.out | 33 ++++
8 files changed, 211 insertions(+), 15 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/ae4790ea/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFSortArray.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFSortArray.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFSortArray.java
index 2d6d58c..edc75ec 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFSortArray.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFSortArray.java
@@ -25,13 +25,11 @@ import java.util.List;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
-import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
@@ -60,22 +58,19 @@ public class GenericUDFSortArray extends GenericUDF {
GenericUDFUtils.ReturnObjectInspectorResolver returnOIResolver;
returnOIResolver = new GenericUDFUtils.ReturnObjectInspectorResolver(true);
- if (arguments.length != 1) {
- throw new UDFArgumentLengthException(
- "The function SORT_ARRAY(array(obj1, obj2,...)) needs one argument.");
- }
+ checkArgsSize(arguments, 1, 1);
switch(arguments[0].getCategory()) {
case LIST:
- if(((ListObjectInspector)(arguments[0])).getListElementObjectInspector()
- .getCategory().equals(Category.PRIMITIVE)) {
+ if(!((ListObjectInspector)(arguments[0])).getListElementObjectInspector()
+ .getCategory().equals(ObjectInspector.Category.UNION)) {
break;
}
default:
throw new UDFArgumentTypeException(0, "Argument 1"
- + " of function SORT_ARRAY must be " + serdeConstants.LIST_TYPE_NAME
- + "<" + Category.PRIMITIVE + ">, but " + arguments[0].getTypeName()
- + " was found.");
+ + " of function SORT_ARRAY must be " + serdeConstants.LIST_TYPE_NAME
+ + ", and element type should be either primitive, list, struct, or map, " +
+ "but " + arguments[0].getTypeName() + " was found.");
}
ObjectInspector elementObjectInspector =
http://git-wip-us.apache.org/repos/asf/hive/blob/ae4790ea/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFSortArray.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFSortArray.java b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFSortArray.java
new file mode 100644
index 0000000..6dabd83
--- /dev/null
+++ b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFSortArray.java
@@ -0,0 +1,158 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.udf.generic;
+
+import java.sql.Date;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import static java.util.Arrays.asList;
+
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.serde2.io.DateWritable;
+import org.apache.hadoop.hive.serde2.io.DoubleWritable;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.Text;
+import org.junit.Assert;
+import org.junit.Test;
+
+public class TestGenericUDFSortArray {
+ private final GenericUDFSortArray udf = new GenericUDFSortArray();
+
+ @Test
+ public void testSortPrimitive() throws HiveException {
+ ObjectInspector[] inputOIs = {
+ ObjectInspectorFactory.getStandardListObjectInspector(
+ PrimitiveObjectInspectorFactory.writableIntObjectInspector)
+ };
+ udf.initialize(inputOIs);
+
+ Object i1 = new IntWritable(3);
+ Object i2 = new IntWritable(4);
+ Object i3 = new IntWritable(2);
+ Object i4 = new IntWritable(1);
+
+ runAndVerify(asList(i1,i2,i3,i4), asList(i4,i3,i1,i2));
+ }
+
+ @Test
+ public void testSortList() throws HiveException {
+ ObjectInspector[] inputOIs = {
+ ObjectInspectorFactory.getStandardListObjectInspector(
+ ObjectInspectorFactory.getStandardListObjectInspector(
+ PrimitiveObjectInspectorFactory.writableStringObjectInspector
+ )
+ )
+ };
+ udf.initialize(inputOIs);
+
+ Object i1 = asList(new Text("aa"),new Text("dd"),new Text("cc"),new Text("bb"));
+ Object i2 = asList(new Text("aa"),new Text("cc"),new Text("ba"),new Text("dd"));
+ Object i3 = asList(new Text("aa"),new Text("cc"),new Text("dd"),new Text("ee"), new Text("bb"));
+ Object i4 = asList(new Text("aa"),new Text("cc"),new Text("ddd"),new Text("bb"));
+
+ runAndVerify(asList(i1,i2,i3,i4), asList(i2,i3,i4,i1));
+ }
+
+ @Test
+ public void testSortStruct() throws HiveException {
+ ObjectInspector[] inputOIs = {
+ ObjectInspectorFactory.getStandardListObjectInspector(
+ ObjectInspectorFactory.getStandardStructObjectInspector(
+ asList("f1", "f2", "f3", "f4"),
+ asList(
+ PrimitiveObjectInspectorFactory.writableStringObjectInspector,
+ PrimitiveObjectInspectorFactory.writableDoubleObjectInspector,
+ PrimitiveObjectInspectorFactory.writableDateObjectInspector,
+ ObjectInspectorFactory.getStandardListObjectInspector(
+ PrimitiveObjectInspectorFactory.writableIntObjectInspector
+ )
+ )
+ )
+ )
+ };
+ udf.initialize(inputOIs);
+
+ Object i1 = asList(new Text("a"), new DoubleWritable(3.1415),
+ new DateWritable(new Date(2015, 5, 26)),
+ asList(new IntWritable(1), new IntWritable(3),
+ new IntWritable(2), new IntWritable(4)));
+
+ Object i2 = asList(new Text("b"), new DoubleWritable(3.14),
+ new DateWritable(new Date(2015, 5, 26)),
+ asList(new IntWritable(1), new IntWritable(3),
+ new IntWritable(2), new IntWritable(4)));
+
+ Object i3 = asList(new Text("a"), new DoubleWritable(3.1415),
+ new DateWritable(new Date(2015, 5, 25)),
+ asList(new IntWritable(1), new IntWritable(3),
+ new IntWritable(2), new IntWritable(5)));
+
+ Object i4 = asList(new Text("a"), new DoubleWritable(3.1415),
+ new DateWritable(new Date(2015, 5, 25)),
+ asList(new IntWritable(1), new IntWritable(3),
+ new IntWritable(2), new IntWritable(4)));
+
+ runAndVerify(asList(i1,i2,i3,i4), asList(i4,i3,i1,i2));
+ }
+
+ @Test
+ public void testSortMap() throws HiveException {
+ ObjectInspector[] inputOIs = {
+ ObjectInspectorFactory.getStandardListObjectInspector(
+ ObjectInspectorFactory.getStandardMapObjectInspector(
+ PrimitiveObjectInspectorFactory.writableStringObjectInspector,
+ PrimitiveObjectInspectorFactory.writableIntObjectInspector
+ )
+ )
+ };
+ udf.initialize(inputOIs);
+
+ Map<Text, IntWritable> m1 = new HashMap<Text, IntWritable>();
+ m1.put(new Text("a"), new IntWritable(4));
+ m1.put(new Text("b"), new IntWritable(3));
+ m1.put(new Text("c"), new IntWritable(1));
+ m1.put(new Text("d"), new IntWritable(2));
+
+ Map<Text, IntWritable> m2 = new HashMap<Text, IntWritable>();
+ m2.put(new Text("d"), new IntWritable(4));
+ m2.put(new Text("b"), new IntWritable(3));
+ m2.put(new Text("a"), new IntWritable(1));
+ m2.put(new Text("c"), new IntWritable(2));
+
+ Map<Text, IntWritable> m3 = new HashMap<Text, IntWritable>();
+ m3.put(new Text("d"), new IntWritable(4));
+ m3.put(new Text("b"), new IntWritable(3));
+ m3.put(new Text("a"), new IntWritable(1));
+
+ runAndVerify(asList((Object)m1, m2, m3), asList((Object)m3, m2, m1));
+ }
+
+ private void runAndVerify(List<Object> actual, List<Object> expected)
+ throws HiveException {
+ GenericUDF.DeferredJavaObject[] args = { new GenericUDF.DeferredJavaObject(actual) };
+ List<Object> result = (List<Object>) udf.evaluate(args);
+
+ Assert.assertEquals("Check size", expected.size(), result.size());
+ Assert.assertArrayEquals("Check content", expected.toArray(), result.toArray());
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/ae4790ea/ql/src/test/queries/clientnegative/udf_sort_array_wrong3.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientnegative/udf_sort_array_wrong3.q b/ql/src/test/queries/clientnegative/udf_sort_array_wrong3.q
index 034de06..49856ae 100644
--- a/ql/src/test/queries/clientnegative/udf_sort_array_wrong3.q
+++ b/ql/src/test/queries/clientnegative/udf_sort_array_wrong3.q
@@ -1,2 +1,2 @@
-- invalid argument type
-SELECT sort_array(array(array(10, 20), array(5, 15), array(3, 13))) FROM src LIMIT 1;
+SELECT sort_array(array(create_union(0,"a"))) FROM src LIMIT 1;
http://git-wip-us.apache.org/repos/asf/hive/blob/ae4790ea/ql/src/test/queries/clientpositive/udf_sort_array.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/udf_sort_array.q b/ql/src/test/queries/clientpositive/udf_sort_array.q
index 313bcf8..d53e2c8 100644
--- a/ql/src/test/queries/clientpositive/udf_sort_array.q
+++ b/ql/src/test/queries/clientpositive/udf_sort_array.q
@@ -19,6 +19,16 @@ SELECT sort_array(array(2, 9, 7, 3, 5, 4, 1, 6, 8)) FROM src tablesample (1 rows
-- Evaluate function against FLOAT valued keys
SELECT sort_array(sort_array(array(2.333, 9, 1.325, 2.003, 0.777, -3.445, 1))) FROM src tablesample (1 rows);
+-- Evaluate function against LIST valued keys
+SELECT sort_array(array(array(2, 9, 7), array(3, 5, 4), array(1, 6, 8))) FROM src tablesample (1 rows);
+
+-- Evaluate function against STRUCT valued keys
+SELECT sort_array(array(struct(2, 9, 7), struct(3, 5, 4), struct(1, 6, 8))) FROM src tablesample (1 rows);
+
+-- Evaluate function against MAP valued keys
+SELECT sort_array(array(map("b", 2, "a", 9, "c", 7), map("c", 3, "b", 5, "a", 1), map("a", 1, "c", 6, "b", 8))) FROM src tablesample (1 rows);
+
+
-- Test it against data in a table.
CREATE TABLE dest1 (
tinyints ARRAY<TINYINT>,
http://git-wip-us.apache.org/repos/asf/hive/blob/ae4790ea/ql/src/test/results/clientnegative/udf_sort_array_wrong1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientnegative/udf_sort_array_wrong1.q.out b/ql/src/test/results/clientnegative/udf_sort_array_wrong1.q.out
index 075f49b..6d4b2ab 100644
--- a/ql/src/test/results/clientnegative/udf_sort_array_wrong1.q.out
+++ b/ql/src/test/results/clientnegative/udf_sort_array_wrong1.q.out
@@ -1 +1 @@
-FAILED: SemanticException [Error 10015]: Line 2:7 Arguments length mismatch '3': The function SORT_ARRAY(array(obj1, obj2,...)) needs one argument.
+FAILED: SemanticException [Error 10015]: Line 2:7 Arguments length mismatch '3': sortarray requires 1 argument(s), got 2
http://git-wip-us.apache.org/repos/asf/hive/blob/ae4790ea/ql/src/test/results/clientnegative/udf_sort_array_wrong2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientnegative/udf_sort_array_wrong2.q.out b/ql/src/test/results/clientnegative/udf_sort_array_wrong2.q.out
index c068ecd..2123e2e 100644
--- a/ql/src/test/results/clientnegative/udf_sort_array_wrong2.q.out
+++ b/ql/src/test/results/clientnegative/udf_sort_array_wrong2.q.out
@@ -1 +1 @@
-FAILED: SemanticException [Error 10016]: Line 2:18 Argument type mismatch '"Invalid"': Argument 1 of function SORT_ARRAY must be array<PRIMITIVE>, but string was found.
+FAILED: SemanticException [Error 10016]: Line 2:18 Argument type mismatch '"Invalid"': Argument 1 of function SORT_ARRAY must be array, and element type should be either primitive, list, struct, or map, but string was found.
http://git-wip-us.apache.org/repos/asf/hive/blob/ae4790ea/ql/src/test/results/clientnegative/udf_sort_array_wrong3.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientnegative/udf_sort_array_wrong3.q.out b/ql/src/test/results/clientnegative/udf_sort_array_wrong3.q.out
index abf7124..6745f4f 100644
--- a/ql/src/test/results/clientnegative/udf_sort_array_wrong3.q.out
+++ b/ql/src/test/results/clientnegative/udf_sort_array_wrong3.q.out
@@ -1 +1 @@
-FAILED: SemanticException [Error 10016]: Line 2:18 Argument type mismatch '13': Argument 1 of function SORT_ARRAY must be array<PRIMITIVE>, but array<array<int>> was found.
+FAILED: SemanticException [Error 10016]: Line 2:18 Argument type mismatch '"a"': Argument 1 of function SORT_ARRAY must be array, and element type should be either primitive, list, struct, or map, but array<uniontype<string>> was found.
http://git-wip-us.apache.org/repos/asf/hive/blob/ae4790ea/ql/src/test/results/clientpositive/udf_sort_array.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/udf_sort_array.q.out b/ql/src/test/results/clientpositive/udf_sort_array.q.out
index 9631c2d..523b297 100644
--- a/ql/src/test/results/clientpositive/udf_sort_array.q.out
+++ b/ql/src/test/results/clientpositive/udf_sort_array.q.out
@@ -87,6 +87,39 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
#### A masked pattern was here ####
[-3.445,0.777,1.0,1.325,2.003,2.333,9.0]
+PREHOOK: query: -- Evaluate function against LIST valued keys
+SELECT sort_array(array(array(2, 9, 7), array(3, 5, 4), array(1, 6, 8))) FROM src tablesample (1 rows)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: -- Evaluate function against LIST valued keys
+SELECT sort_array(array(array(2, 9, 7), array(3, 5, 4), array(1, 6, 8))) FROM src tablesample (1 rows)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+[[1,6,8],[2,9,7],[3,5,4]]
+PREHOOK: query: -- Evaluate function against STRUCT valued keys
+SELECT sort_array(array(struct(2, 9, 7), struct(3, 5, 4), struct(1, 6, 8))) FROM src tablesample (1 rows)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: -- Evaluate function against STRUCT valued keys
+SELECT sort_array(array(struct(2, 9, 7), struct(3, 5, 4), struct(1, 6, 8))) FROM src tablesample (1 rows)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+[{"col1":1,"col2":6,"col3":8},{"col1":2,"col2":9,"col3":7},{"col1":3,"col2":5,"col3":4}]
+PREHOOK: query: -- Evaluate function against MAP valued keys
+SELECT sort_array(array(map("b", 2, "a", 9, "c", 7), map("c", 3, "b", 5, "a", 1), map("a", 1, "c", 6, "b", 8))) FROM src tablesample (1 rows)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: -- Evaluate function against MAP valued keys
+SELECT sort_array(array(map("b", 2, "a", 9, "c", 7), map("c", 3, "b", 5, "a", 1), map("a", 1, "c", 6, "b", 8))) FROM src tablesample (1 rows)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+[{"b":5,"a":1,"c":3},{"b":8,"a":1,"c":6},{"b":2,"a":9,"c":7}]
PREHOOK: query: -- Test it against data in a table.
CREATE TABLE dest1 (
tinyints ARRAY<TINYINT>,