You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by li...@apache.org on 2018/02/01 04:45:59 UTC
spark git commit: [SPARK-21396][SQL] Fixes MatchError when UDTs are
passed through Hive Thriftserver
Repository: spark
Updated Branches:
refs/heads/master 56ae32657 -> b2e7677f4
[SPARK-21396][SQL] Fixes MatchError when UDTs are passed through Hive Thriftserver
Signed-off-by: Atallah Hezbor <atallahhezborgmail.com>
## What changes were proposed in this pull request?
This PR proposes modifying the match statement that gets the columns of a row in HiveThriftServer. There was previously no case for `UserDefinedType`, so querying a table that contained them would throw a match error. The changes catch that case and return the string representation.
## How was this patch tested?
While I would have liked to add a unit test, I couldn't easily incorporate UDTs into the ``HiveThriftServer2Suites`` pipeline. With some guidance I would be happy to push a commit with tests.
Instead I did a manual test by loading a `DataFrame` with Point UDT in a spark shell with a HiveThriftServer. Then in beeline, connecting to the server and querying that table.
Here is the result before the change
```
0: jdbc:hive2://localhost:10000> select * from chicago;
Error: scala.MatchError: org.apache.spark.sql.PointUDT2d980dc3 (of class org.apache.spark.sql.PointUDT) (state=,code=0)
```
And after the change:
```
0: jdbc:hive2://localhost:10000> select * from chicago;
+---------------------------------------+--------------+------------------------+---------------------+--+
| __fid__ | case_number | dtg | geom |
+---------------------------------------+--------------+------------------------+---------------------+--+
| 109602f9-54f8-414b-8c6f-42b1a337643e | 2 | 2016-01-01 19:00:00.0 | POINT (-77 38) |
| 709602f9-fcff-4429-8027-55649b6fd7ed | 1 | 2015-12-31 19:00:00.0 | POINT (-76.5 38.5) |
| 009602f9-fcb5-45b1-a867-eb8ba10cab40 | 3 | 2016-01-02 19:00:00.0 | POINT (-78 39) |
+---------------------------------------+--------------+------------------------+---------------------+--+
```
Author: Atallah Hezbor <at...@gmail.com>
Closes #20385 from atallahhezbor/udts_over_hive.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b2e7677f
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b2e7677f
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b2e7677f
Branch: refs/heads/master
Commit: b2e7677f4d3d8f47f5f148680af39d38f2b558f0
Parents: 56ae326
Author: Atallah Hezbor <at...@gmail.com>
Authored: Wed Jan 31 20:45:55 2018 -0800
Committer: gatorsmile <ga...@gmail.com>
Committed: Wed Jan 31 20:45:55 2018 -0800
----------------------------------------------------------------------
.../hive/thriftserver/SparkExecuteStatementOperation.scala | 2 +-
.../src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala | 1 +
.../scala/org/apache/spark/sql/hive/HiveUtilsSuite.scala | 8 +++++++-
3 files changed, 9 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/b2e7677f/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala
----------------------------------------------------------------------
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala
index 664bc20..3cfc81b 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala
@@ -102,7 +102,7 @@ private[hive] class SparkExecuteStatementOperation(
to += from.getAs[Timestamp](ordinal)
case BinaryType =>
to += from.getAs[Array[Byte]](ordinal)
- case _: ArrayType | _: StructType | _: MapType =>
+ case _: ArrayType | _: StructType | _: MapType | _: UserDefinedType[_] =>
val hiveString = HiveUtils.toHiveString((from.get(ordinal), dataTypes(ordinal)))
to += hiveString
}
http://git-wip-us.apache.org/repos/asf/spark/blob/b2e7677f/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala
index c7717d7..d9627eb 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala
@@ -460,6 +460,7 @@ private[spark] object HiveUtils extends Logging {
case (decimal: java.math.BigDecimal, DecimalType()) =>
// Hive strips trailing zeros so use its toString
HiveDecimal.create(decimal).toString
+ case (other, _ : UserDefinedType[_]) => other.toString
case (other, tpe) if primitiveTypes contains tpe => other.toString
}
http://git-wip-us.apache.org/repos/asf/spark/blob/b2e7677f/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUtilsSuite.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUtilsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUtilsSuite.scala
index 8697d47..f2b75e4 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUtilsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUtilsSuite.scala
@@ -25,7 +25,7 @@ import org.apache.spark.SparkConf
import org.apache.spark.deploy.SparkHadoopUtil
import org.apache.spark.sql.QueryTest
import org.apache.spark.sql.hive.test.TestHiveSingleton
-import org.apache.spark.sql.test.SQLTestUtils
+import org.apache.spark.sql.test.{ExamplePoint, ExamplePointUDT, SQLTestUtils}
import org.apache.spark.util.{ChildFirstURLClassLoader, MutableURLClassLoader}
class HiveUtilsSuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
@@ -62,4 +62,10 @@ class HiveUtilsSuite extends QueryTest with SQLTestUtils with TestHiveSingleton
Thread.currentThread().setContextClassLoader(contextClassLoader)
}
}
+
+ test("toHiveString correctly handles UDTs") {
+ val point = new ExamplePoint(50.0, 50.0)
+ val tpe = new ExamplePointUDT()
+ assert(HiveUtils.toHiveString((point, tpe)) === "(50.0, 50.0)")
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org