You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ma...@apache.org on 2015/07/07 04:44:44 UTC

spark git commit: [SPARK-6747] [SQL] Throw an AnalysisException when unsupported Java list types used in Hive UDF

Repository: spark
Updated Branches:
  refs/heads/master 929dfa24b -> 1821fc165


[SPARK-6747] [SQL] Throw an AnalysisException when unsupported Java list types used in Hive UDF

The current implementation can't handle List<> as a return type in Hive UDF and
throws meaningless Match Error.
We assume an UDF below;
public class UDFToListString extends UDF {
public List<String> evaluate(Object o)
{ return Arrays.asList("xxx", "yyy", "zzz"); }
}
An exception of scala.MatchError is thrown as follows when the UDF used;
scala.MatchError: interface java.util.List (of class java.lang.Class)
at org.apache.spark.sql.hive.HiveInspectors$class.javaClassToDataType(HiveInspectors.scala:174)
at org.apache.spark.sql.hive.HiveSimpleUdf.javaClassToDataType(hiveUdfs.scala:76)
at org.apache.spark.sql.hive.HiveSimpleUdf.dataType$lzycompute(hiveUdfs.scala:106)
at org.apache.spark.sql.hive.HiveSimpleUdf.dataType(hiveUdfs.scala:106)
at org.apache.spark.sql.catalyst.expressions.Alias.toAttribute(namedExpressions.scala:131)
at org.apache.spark.sql.catalyst.planning.PhysicalOperation$$anonfun$collectAliases$1.applyOrElse(patterns.scala:95)
at org.apache.spark.sql.catalyst.planning.PhysicalOperation$$anonfun$collectAliases$1.applyOrElse(patterns.scala:94)
at scala.runtime.AbstractPartialFunction.apply(AbstractPartialFunction.scala:33)
at scala.collection.TraversableLike$$anonfun$collect$1.apply(TraversableLike.scala:278)
...
To make udf developers more understood, we need to throw a more suitable exception.

Author: Takeshi YAMAMURO <li...@gmail.com>

Closes #7248 from maropu/FixBugInHiveInspectors and squashes the following commits:

1c3df2a [Takeshi YAMAMURO] Fix comments
56305de [Takeshi YAMAMURO] Fix conflicts
92ed7a6 [Takeshi YAMAMURO] Throw an exception when java list type used
2844a8e [Takeshi YAMAMURO] Apply comments
7114a47 [Takeshi YAMAMURO] Add TODO comments in UDFToListString of HiveUdfSuite
fdb2ae4 [Takeshi YAMAMURO] Add StringToUtf8 to comvert String into UTF8String
af61f2e [Takeshi YAMAMURO] Remove a new type
7f812fd [Takeshi YAMAMURO] Fix code-style errors
6984bf4 [Takeshi YAMAMURO] Apply review comments
93e3d4e [Takeshi YAMAMURO] Add a blank line at the end of UDFToListString
ee232db [Takeshi YAMAMURO] Support List as a return type in Hive UDF
1e82316 [Takeshi YAMAMURO] Apply comments
21e8763 [Takeshi YAMAMURO] Add TODO comments in UDFToListString of HiveUdfSuite
a488712 [Takeshi YAMAMURO] Add StringToUtf8 to comvert String into UTF8String
1c7b9d1 [Takeshi YAMAMURO] Remove a new type
f965c34 [Takeshi YAMAMURO] Fix code-style errors
9406416 [Takeshi YAMAMURO] Apply review comments
e21ce7e [Takeshi YAMAMURO] Add a blank line at the end of UDFToListString
e553f10 [Takeshi YAMAMURO] Support List as a return type in Hive UDF


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1821fc16
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1821fc16
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1821fc16

Branch: refs/heads/master
Commit: 1821fc165808143e98b3d9626141b1a55bde90ac
Parents: 929dfa2
Author: Takeshi YAMAMURO <li...@gmail.com>
Authored: Mon Jul 6 19:44:31 2015 -0700
Committer: Michael Armbrust <mi...@databricks.com>
Committed: Mon Jul 6 19:44:31 2015 -0700

----------------------------------------------------------------------
 .../apache/spark/sql/hive/HiveInspectors.scala  | 10 +++++-
 .../spark/sql/hive/execution/UDFToListInt.java  | 29 ++++++++++++++++++
 .../sql/hive/execution/UDFToListString.java     | 29 ++++++++++++++++++
 .../spark/sql/hive/execution/HiveUDFSuite.scala | 32 +++++++++++++++++++-
 4 files changed, 98 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/1821fc16/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala
index a6b8ead..7423d80 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala
@@ -26,8 +26,8 @@ import org.apache.hadoop.{io => hadoopIo}
 
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
-import org.apache.spark.sql.types
 import org.apache.spark.sql.types._
+import org.apache.spark.sql.{AnalysisException, types}
 import org.apache.spark.unsafe.types.UTF8String
 
 /* Implicit conversions */
@@ -218,6 +218,14 @@ private[hive] trait HiveInspectors {
 
     // Hive seems to return this for struct types?
     case c: Class[_] if c == classOf[java.lang.Object] => NullType
+
+    // java list type unsupported
+    case c: Class[_] if c == classOf[java.util.List[_]] =>
+      throw new AnalysisException(
+        "List type in java is unsupported because " +
+        "JVM type erasure makes spark fail to catch a component type in List<>")
+
+    case c => throw new AnalysisException(s"Unsupported java type $c")
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/spark/blob/1821fc16/sql/hive/src/test/java/org/apache/spark/sql/hive/execution/UDFToListInt.java
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/java/org/apache/spark/sql/hive/execution/UDFToListInt.java b/sql/hive/src/test/java/org/apache/spark/sql/hive/execution/UDFToListInt.java
new file mode 100644
index 0000000..67576a7
--- /dev/null
+++ b/sql/hive/src/test/java/org/apache/spark/sql/hive/execution/UDFToListInt.java
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.execution;
+
+import org.apache.hadoop.hive.ql.exec.UDF;
+
+import java.util.Arrays;
+import java.util.List;
+
+public class UDFToListInt extends UDF {
+    public List<Integer> evaluate(Object o) {
+        return Arrays.asList(1, 2, 3);
+    }
+}

http://git-wip-us.apache.org/repos/asf/spark/blob/1821fc16/sql/hive/src/test/java/org/apache/spark/sql/hive/execution/UDFToListString.java
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/java/org/apache/spark/sql/hive/execution/UDFToListString.java b/sql/hive/src/test/java/org/apache/spark/sql/hive/execution/UDFToListString.java
new file mode 100644
index 0000000..f02395c
--- /dev/null
+++ b/sql/hive/src/test/java/org/apache/spark/sql/hive/execution/UDFToListString.java
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.execution;
+
+import org.apache.hadoop.hive.ql.exec.UDF;
+
+import java.util.Arrays;
+import java.util.List;
+
+public class UDFToListString extends UDF {
+    public List<String> evaluate(Object o) {
+        return Arrays.asList("data1", "data2", "data3");
+    }
+}

http://git-wip-us.apache.org/repos/asf/spark/blob/1821fc16/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala
index 56b0bef..4468620 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala
@@ -28,7 +28,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectIn
 import org.apache.hadoop.hive.serde2.objectinspector.{ObjectInspector, ObjectInspectorFactory}
 import org.apache.hadoop.hive.serde2.{AbstractSerDe, SerDeStats}
 import org.apache.hadoop.io.Writable
-import org.apache.spark.sql.{QueryTest, Row}
+import org.apache.spark.sql.{AnalysisException, QueryTest, Row}
 import org.apache.spark.sql.hive.test.TestHive
 
 import org.apache.spark.util.Utils
@@ -133,6 +133,36 @@ class HiveUDFSuite extends QueryTest {
     TestHive.reset()
   }
 
+  test("UDFToListString") {
+    val testData = TestHive.sparkContext.parallelize(StringCaseClass("") :: Nil).toDF()
+    testData.registerTempTable("inputTable")
+
+    sql(s"CREATE TEMPORARY FUNCTION testUDFToListString AS '${classOf[UDFToListString].getName}'")
+    val errMsg = intercept[AnalysisException] {
+      sql("SELECT testUDFToListString(s) FROM inputTable")
+    }
+    assert(errMsg.getMessage === "List type in java is unsupported because " +
+      "JVM type erasure makes spark fail to catch a component type in List<>;")
+
+    sql("DROP TEMPORARY FUNCTION IF EXISTS testUDFToListString")
+    TestHive.reset()
+  }
+
+  test("UDFToListInt") {
+    val testData = TestHive.sparkContext.parallelize(StringCaseClass("") :: Nil).toDF()
+    testData.registerTempTable("inputTable")
+
+    sql(s"CREATE TEMPORARY FUNCTION testUDFToListInt AS '${classOf[UDFToListInt].getName}'")
+    val errMsg = intercept[AnalysisException] {
+      sql("SELECT testUDFToListInt(s) FROM inputTable")
+    }
+    assert(errMsg.getMessage === "List type in java is unsupported because " +
+      "JVM type erasure makes spark fail to catch a component type in List<>;")
+
+    sql("DROP TEMPORARY FUNCTION IF EXISTS testUDFToListInt")
+    TestHive.reset()
+  }
+
   test("UDFListListInt") {
     val testData = TestHive.sparkContext.parallelize(
       ListListIntCaseClass(Nil) ::


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org