You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2019/04/30 23:28:22 UTC
[spark] branch master updated: [SPARK-27591][SQL] Fix
UnivocityParser for UserDefinedType
This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new a35043c [SPARK-27591][SQL] Fix UnivocityParser for UserDefinedType
a35043c is described below
commit a35043c9e22a9bd9e372246c8d337e016736536c
Author: Artem Kalchenko <ar...@gmail.com>
AuthorDate: Wed May 1 08:27:51 2019 +0900
[SPARK-27591][SQL] Fix UnivocityParser for UserDefinedType
## What changes were proposed in this pull request?
Fix bug in UnivocityParser. makeConverter method didn't work correctly for UsedDefinedType
## How was this patch tested?
A test suite for UnivocityParser has been extended.
Closes #24496 from kalkolab/spark-27591.
Authored-by: Artem Kalchenko <ar...@gmail.com>
Signed-off-by: HyukjinKwon <gu...@apache.org>
---
.../spark/sql/catalyst/csv/UnivocityParser.scala | 2 +-
.../sql/catalyst/csv/UnivocityParserSuite.scala | 35 ++++++++++++++++++++++
2 files changed, 36 insertions(+), 1 deletion(-)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala
index b26044e..8456b7d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala
@@ -166,7 +166,7 @@ class UnivocityParser(
case _: StringType => (d: String) =>
nullSafeDatum(d, name, nullable, options)(UTF8String.fromString)
- case udt: UserDefinedType[_] => (datum: String) =>
+ case udt: UserDefinedType[_] =>
makeConverter(name, udt.sqlType, nullable)
// We don't actually hit this exception though, we keep it for understandability
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/UnivocityParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/UnivocityParserSuite.scala
index 986de12..933c576 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/UnivocityParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/UnivocityParserSuite.scala
@@ -231,4 +231,39 @@ class UnivocityParserSuite extends SparkFunSuite with SQLHelper {
Seq("en-US", "ko-KR", "ru-RU", "de-DE").foreach(checkDecimalParsing)
}
+
+ test("SPARK-27591 UserDefinedType can be read") {
+
+ @SQLUserDefinedType(udt = classOf[StringBasedUDT])
+ case class NameId(name: String, id: Int)
+
+ class StringBasedUDT extends UserDefinedType[NameId] {
+ override def sqlType: DataType = StringType
+
+ override def serialize(obj: NameId): Any = s"${obj.name}\t${obj.id}"
+
+ override def deserialize(datum: Any): NameId = datum match {
+ case s: String =>
+ val split = s.split("\t")
+ if (split.length != 2) throw new RuntimeException(s"Can't parse $s into NameId");
+ NameId(split(0), Integer.parseInt(split(1)))
+ case _ => throw new RuntimeException(s"Can't parse $datum into NameId");
+ }
+
+ override def userClass: Class[NameId] = classOf[NameId]
+ }
+
+ object StringBasedUDT extends StringBasedUDT
+
+ val input = "name\t42"
+ val expected = UTF8String.fromString(input)
+
+ val options = new CSVOptions(Map.empty[String, String], false, "GMT")
+ val parser = new UnivocityParser(StructType(Seq.empty), options)
+
+ val convertedValue = parser.makeConverter("_1", StringBasedUDT, nullable = false).apply(input)
+
+ assert(convertedValue.isInstanceOf[UTF8String])
+ assert(convertedValue == expected)
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org