You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by su...@apache.org on 2015/05/30 18:30:14 UTC

hive git commit: HIVE-10788 - Change sort_array to support non-primitive types (Chao Sun, reviewed by Alexander Pivovarov)

Repository: hive
Updated Branches:
  refs/heads/master 83cc691c5 -> ae4790eaa


HIVE-10788 - Change sort_array to support non-primitive types (Chao Sun, reviewed by Alexander Pivovarov)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/ae4790ea
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/ae4790ea
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/ae4790ea

Branch: refs/heads/master
Commit: ae4790eaa1dca2605da59a662a3c0d6c80ee55b3
Parents: 83cc691
Author: Chao Sun <su...@apache.org>
Authored: Sat May 30 09:29:38 2015 -0700
Committer: Chao Sun <su...@apache.org>
Committed: Sat May 30 09:29:38 2015 -0700

----------------------------------------------------------------------
 .../ql/udf/generic/GenericUDFSortArray.java     |  17 +-
 .../ql/udf/generic/TestGenericUDFSortArray.java | 158 +++++++++++++++++++
 .../clientnegative/udf_sort_array_wrong3.q      |   2 +-
 .../queries/clientpositive/udf_sort_array.q     |  10 ++
 .../clientnegative/udf_sort_array_wrong1.q.out  |   2 +-
 .../clientnegative/udf_sort_array_wrong2.q.out  |   2 +-
 .../clientnegative/udf_sort_array_wrong3.q.out  |   2 +-
 .../results/clientpositive/udf_sort_array.q.out |  33 ++++
 8 files changed, 211 insertions(+), 15 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/ae4790ea/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFSortArray.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFSortArray.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFSortArray.java
index 2d6d58c..edc75ec 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFSortArray.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFSortArray.java
@@ -25,13 +25,11 @@ import java.util.List;
 
 import org.apache.hadoop.hive.ql.exec.Description;
 import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
-import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
 import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.serde.serdeConstants;
 import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
@@ -60,22 +58,19 @@ public class GenericUDFSortArray extends GenericUDF {
     GenericUDFUtils.ReturnObjectInspectorResolver returnOIResolver;
     returnOIResolver = new GenericUDFUtils.ReturnObjectInspectorResolver(true);
 
-    if (arguments.length != 1) {
-      throw new UDFArgumentLengthException(
-        "The function SORT_ARRAY(array(obj1, obj2,...)) needs one argument.");
-    }
+    checkArgsSize(arguments, 1, 1);
 
     switch(arguments[0].getCategory()) {
       case LIST:
-        if(((ListObjectInspector)(arguments[0])).getListElementObjectInspector()
-          .getCategory().equals(Category.PRIMITIVE)) {
+        if(!((ListObjectInspector)(arguments[0])).getListElementObjectInspector()
+            .getCategory().equals(ObjectInspector.Category.UNION)) {
           break;
         }
       default:
         throw new UDFArgumentTypeException(0, "Argument 1"
-          + " of function SORT_ARRAY must be " + serdeConstants.LIST_TYPE_NAME
-          + "<" + Category.PRIMITIVE + ">, but " + arguments[0].getTypeName()
-          + " was found.");
+            + " of function SORT_ARRAY must be " + serdeConstants.LIST_TYPE_NAME
+            + ", and element type should be either primitive, list, struct, or map, " +
+            "but " + arguments[0].getTypeName() + " was found.");
     }
 
     ObjectInspector elementObjectInspector =

http://git-wip-us.apache.org/repos/asf/hive/blob/ae4790ea/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFSortArray.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFSortArray.java b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFSortArray.java
new file mode 100644
index 0000000..6dabd83
--- /dev/null
+++ b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFSortArray.java
@@ -0,0 +1,158 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.udf.generic;
+
+import java.sql.Date;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import static java.util.Arrays.asList;
+
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.serde2.io.DateWritable;
+import org.apache.hadoop.hive.serde2.io.DoubleWritable;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.Text;
+import org.junit.Assert;
+import org.junit.Test;
+
+public class TestGenericUDFSortArray {
+  private final GenericUDFSortArray udf = new GenericUDFSortArray();
+
+  @Test
+  public void testSortPrimitive() throws HiveException {
+    ObjectInspector[] inputOIs = {
+        ObjectInspectorFactory.getStandardListObjectInspector(
+            PrimitiveObjectInspectorFactory.writableIntObjectInspector)
+    };
+    udf.initialize(inputOIs);
+
+    Object i1 = new IntWritable(3);
+    Object i2 = new IntWritable(4);
+    Object i3 = new IntWritable(2);
+    Object i4 = new IntWritable(1);
+
+    runAndVerify(asList(i1,i2,i3,i4), asList(i4,i3,i1,i2));
+  }
+
+  @Test
+  public void testSortList() throws HiveException {
+    ObjectInspector[] inputOIs = {
+        ObjectInspectorFactory.getStandardListObjectInspector(
+            ObjectInspectorFactory.getStandardListObjectInspector(
+                PrimitiveObjectInspectorFactory.writableStringObjectInspector
+            )
+        )
+    };
+    udf.initialize(inputOIs);
+
+    Object i1 = asList(new Text("aa"),new Text("dd"),new Text("cc"),new Text("bb"));
+    Object i2 = asList(new Text("aa"),new Text("cc"),new Text("ba"),new Text("dd"));
+    Object i3 = asList(new Text("aa"),new Text("cc"),new Text("dd"),new Text("ee"), new Text("bb"));
+    Object i4 = asList(new Text("aa"),new Text("cc"),new Text("ddd"),new Text("bb"));
+
+    runAndVerify(asList(i1,i2,i3,i4), asList(i2,i3,i4,i1));
+  }
+
+  @Test
+  public void testSortStruct() throws HiveException {
+    ObjectInspector[] inputOIs = {
+        ObjectInspectorFactory.getStandardListObjectInspector(
+            ObjectInspectorFactory.getStandardStructObjectInspector(
+                asList("f1", "f2", "f3", "f4"),
+                asList(
+                    PrimitiveObjectInspectorFactory.writableStringObjectInspector,
+                    PrimitiveObjectInspectorFactory.writableDoubleObjectInspector,
+                    PrimitiveObjectInspectorFactory.writableDateObjectInspector,
+                    ObjectInspectorFactory.getStandardListObjectInspector(
+                        PrimitiveObjectInspectorFactory.writableIntObjectInspector
+                    )
+                )
+            )
+        )
+    };
+    udf.initialize(inputOIs);
+
+    Object i1 = asList(new Text("a"), new DoubleWritable(3.1415),
+        new DateWritable(new Date(2015, 5, 26)),
+        asList(new IntWritable(1), new IntWritable(3),
+            new IntWritable(2), new IntWritable(4)));
+
+    Object i2 = asList(new Text("b"), new DoubleWritable(3.14),
+        new DateWritable(new Date(2015, 5, 26)),
+        asList(new IntWritable(1), new IntWritable(3),
+            new IntWritable(2), new IntWritable(4)));
+
+    Object i3 = asList(new Text("a"), new DoubleWritable(3.1415),
+        new DateWritable(new Date(2015, 5, 25)),
+        asList(new IntWritable(1), new IntWritable(3),
+            new IntWritable(2), new IntWritable(5)));
+
+    Object i4 = asList(new Text("a"), new DoubleWritable(3.1415),
+        new DateWritable(new Date(2015, 5, 25)),
+        asList(new IntWritable(1), new IntWritable(3),
+            new IntWritable(2), new IntWritable(4)));
+
+    runAndVerify(asList(i1,i2,i3,i4), asList(i4,i3,i1,i2));
+  }
+
+  @Test
+  public void testSortMap() throws HiveException {
+    ObjectInspector[] inputOIs = {
+        ObjectInspectorFactory.getStandardListObjectInspector(
+            ObjectInspectorFactory.getStandardMapObjectInspector(
+                PrimitiveObjectInspectorFactory.writableStringObjectInspector,
+                PrimitiveObjectInspectorFactory.writableIntObjectInspector
+            )
+        )
+    };
+    udf.initialize(inputOIs);
+
+    Map<Text, IntWritable> m1 = new HashMap<Text, IntWritable>();
+    m1.put(new Text("a"), new IntWritable(4));
+    m1.put(new Text("b"), new IntWritable(3));
+    m1.put(new Text("c"), new IntWritable(1));
+    m1.put(new Text("d"), new IntWritable(2));
+
+    Map<Text, IntWritable> m2 = new HashMap<Text, IntWritable>();
+    m2.put(new Text("d"), new IntWritable(4));
+    m2.put(new Text("b"), new IntWritable(3));
+    m2.put(new Text("a"), new IntWritable(1));
+    m2.put(new Text("c"), new IntWritable(2));
+
+    Map<Text, IntWritable> m3 = new HashMap<Text, IntWritable>();
+    m3.put(new Text("d"), new IntWritable(4));
+    m3.put(new Text("b"), new IntWritable(3));
+    m3.put(new Text("a"), new IntWritable(1));
+
+    runAndVerify(asList((Object)m1, m2, m3), asList((Object)m3, m2, m1));
+  }
+
+  private void runAndVerify(List<Object> actual, List<Object> expected)
+      throws HiveException {
+    GenericUDF.DeferredJavaObject[] args = { new GenericUDF.DeferredJavaObject(actual) };
+    List<Object> result = (List<Object>) udf.evaluate(args);
+
+    Assert.assertEquals("Check size", expected.size(), result.size());
+    Assert.assertArrayEquals("Check content", expected.toArray(), result.toArray());
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/ae4790ea/ql/src/test/queries/clientnegative/udf_sort_array_wrong3.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientnegative/udf_sort_array_wrong3.q b/ql/src/test/queries/clientnegative/udf_sort_array_wrong3.q
index 034de06..49856ae 100644
--- a/ql/src/test/queries/clientnegative/udf_sort_array_wrong3.q
+++ b/ql/src/test/queries/clientnegative/udf_sort_array_wrong3.q
@@ -1,2 +1,2 @@
 -- invalid argument type
-SELECT sort_array(array(array(10, 20), array(5, 15), array(3, 13))) FROM src LIMIT 1;
+SELECT sort_array(array(create_union(0,"a"))) FROM src LIMIT 1;

http://git-wip-us.apache.org/repos/asf/hive/blob/ae4790ea/ql/src/test/queries/clientpositive/udf_sort_array.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/udf_sort_array.q b/ql/src/test/queries/clientpositive/udf_sort_array.q
index 313bcf8..d53e2c8 100644
--- a/ql/src/test/queries/clientpositive/udf_sort_array.q
+++ b/ql/src/test/queries/clientpositive/udf_sort_array.q
@@ -19,6 +19,16 @@ SELECT sort_array(array(2, 9, 7, 3, 5, 4, 1, 6, 8)) FROM src tablesample (1 rows
 -- Evaluate function against FLOAT valued keys
 SELECT sort_array(sort_array(array(2.333, 9, 1.325, 2.003, 0.777, -3.445, 1))) FROM src tablesample (1 rows);
 
+-- Evaluate function against LIST valued keys
+SELECT sort_array(array(array(2, 9, 7), array(3, 5, 4), array(1, 6, 8))) FROM src tablesample (1 rows);
+
+-- Evaluate function against STRUCT valued keys
+SELECT sort_array(array(struct(2, 9, 7), struct(3, 5, 4), struct(1, 6, 8))) FROM src tablesample (1 rows);
+
+-- Evaluate function against MAP valued keys
+SELECT sort_array(array(map("b", 2, "a", 9, "c", 7), map("c", 3, "b", 5, "a", 1), map("a", 1, "c", 6, "b", 8))) FROM src tablesample (1 rows);
+
+
 -- Test it against data in a table.
 CREATE TABLE dest1 (
 	tinyints ARRAY<TINYINT>,

http://git-wip-us.apache.org/repos/asf/hive/blob/ae4790ea/ql/src/test/results/clientnegative/udf_sort_array_wrong1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientnegative/udf_sort_array_wrong1.q.out b/ql/src/test/results/clientnegative/udf_sort_array_wrong1.q.out
index 075f49b..6d4b2ab 100644
--- a/ql/src/test/results/clientnegative/udf_sort_array_wrong1.q.out
+++ b/ql/src/test/results/clientnegative/udf_sort_array_wrong1.q.out
@@ -1 +1 @@
-FAILED: SemanticException [Error 10015]: Line 2:7 Arguments length mismatch '3': The function SORT_ARRAY(array(obj1, obj2,...)) needs one argument.
+FAILED: SemanticException [Error 10015]: Line 2:7 Arguments length mismatch '3': sortarray requires 1 argument(s), got 2

http://git-wip-us.apache.org/repos/asf/hive/blob/ae4790ea/ql/src/test/results/clientnegative/udf_sort_array_wrong2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientnegative/udf_sort_array_wrong2.q.out b/ql/src/test/results/clientnegative/udf_sort_array_wrong2.q.out
index c068ecd..2123e2e 100644
--- a/ql/src/test/results/clientnegative/udf_sort_array_wrong2.q.out
+++ b/ql/src/test/results/clientnegative/udf_sort_array_wrong2.q.out
@@ -1 +1 @@
-FAILED: SemanticException [Error 10016]: Line 2:18 Argument type mismatch '"Invalid"': Argument 1 of function SORT_ARRAY must be array<PRIMITIVE>, but string was found.
+FAILED: SemanticException [Error 10016]: Line 2:18 Argument type mismatch '"Invalid"': Argument 1 of function SORT_ARRAY must be array, and element type should be either primitive, list, struct, or map, but string was found.

http://git-wip-us.apache.org/repos/asf/hive/blob/ae4790ea/ql/src/test/results/clientnegative/udf_sort_array_wrong3.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientnegative/udf_sort_array_wrong3.q.out b/ql/src/test/results/clientnegative/udf_sort_array_wrong3.q.out
index abf7124..6745f4f 100644
--- a/ql/src/test/results/clientnegative/udf_sort_array_wrong3.q.out
+++ b/ql/src/test/results/clientnegative/udf_sort_array_wrong3.q.out
@@ -1 +1 @@
-FAILED: SemanticException [Error 10016]: Line 2:18 Argument type mismatch '13': Argument 1 of function SORT_ARRAY must be array<PRIMITIVE>, but array<array<int>> was found.
+FAILED: SemanticException [Error 10016]: Line 2:18 Argument type mismatch '"a"': Argument 1 of function SORT_ARRAY must be array, and element type should be either primitive, list, struct, or map, but array<uniontype<string>> was found.

http://git-wip-us.apache.org/repos/asf/hive/blob/ae4790ea/ql/src/test/results/clientpositive/udf_sort_array.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/udf_sort_array.q.out b/ql/src/test/results/clientpositive/udf_sort_array.q.out
index 9631c2d..523b297 100644
--- a/ql/src/test/results/clientpositive/udf_sort_array.q.out
+++ b/ql/src/test/results/clientpositive/udf_sort_array.q.out
@@ -87,6 +87,39 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@src
 #### A masked pattern was here ####
 [-3.445,0.777,1.0,1.325,2.003,2.333,9.0]
+PREHOOK: query: -- Evaluate function against LIST valued keys
+SELECT sort_array(array(array(2, 9, 7), array(3, 5, 4), array(1, 6, 8))) FROM src tablesample (1 rows)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: -- Evaluate function against LIST valued keys
+SELECT sort_array(array(array(2, 9, 7), array(3, 5, 4), array(1, 6, 8))) FROM src tablesample (1 rows)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+[[1,6,8],[2,9,7],[3,5,4]]
+PREHOOK: query: -- Evaluate function against STRUCT valued keys
+SELECT sort_array(array(struct(2, 9, 7), struct(3, 5, 4), struct(1, 6, 8))) FROM src tablesample (1 rows)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: -- Evaluate function against STRUCT valued keys
+SELECT sort_array(array(struct(2, 9, 7), struct(3, 5, 4), struct(1, 6, 8))) FROM src tablesample (1 rows)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+[{"col1":1,"col2":6,"col3":8},{"col1":2,"col2":9,"col3":7},{"col1":3,"col2":5,"col3":4}]
+PREHOOK: query: -- Evaluate function against MAP valued keys
+SELECT sort_array(array(map("b", 2, "a", 9, "c", 7), map("c", 3, "b", 5, "a", 1), map("a", 1, "c", 6, "b", 8))) FROM src tablesample (1 rows)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: -- Evaluate function against MAP valued keys
+SELECT sort_array(array(map("b", 2, "a", 9, "c", 7), map("c", 3, "b", 5, "a", 1), map("a", 1, "c", 6, "b", 8))) FROM src tablesample (1 rows)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+[{"b":5,"a":1,"c":3},{"b":8,"a":1,"c":6},{"b":2,"a":9,"c":7}]
 PREHOOK: query: -- Test it against data in a table.
 CREATE TABLE dest1 (
 	tinyints ARRAY<TINYINT>,