You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by li...@apache.org on 2015/02/13 07:18:49 UTC
spark git commit: [SPARK-3365][SQL]Wrong schema generated for List
type
Repository: spark
Updated Branches:
refs/heads/master 2aea892eb -> 1c8633f3f
[SPARK-3365][SQL]Wrong schema generated for List type
This PR fix the issue SPARK-3365.
The reason is Spark generated wrong schema for the type `List` in `ScalaReflection.scala`
for example:
the generated schema for type `Seq[String]` is:
```
{"name":"x","type":{"type":"array","elementType":"string","containsNull":true},"nullable":true,"metadata":{}}`
```
the generated schema for type `List[String]` is:
```
{"name":"x","type":{"type":"struct","fields":[]},"nullable":true,"metadata":{}}`
```
Author: tianyi <ti...@gmail.com>
Closes #4581 from tianyi/SPARK-3365 and squashes the following commits:
a097e86 [tianyi] change the order of resolution in ScalaReflection.scala
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1c8633f3
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1c8633f3
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1c8633f3
Branch: refs/heads/master
Commit: 1c8633f3fe9d814c83384e339b958740c250c00c
Parents: 2aea892
Author: tianyi <ti...@gmail.com>
Authored: Thu Feb 12 22:18:39 2015 -0800
Committer: Cheng Lian <li...@databricks.com>
Committed: Thu Feb 12 22:18:39 2015 -0800
----------------------------------------------------------------------
.../spark/sql/catalyst/ScalaReflection.scala | 30 ++++++++++----------
.../sql/catalyst/ScalaReflectionSuite.scala | 5 ++++
2 files changed, 20 insertions(+), 15 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/1c8633f3/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
index 5d9c331..11fd443 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
@@ -122,6 +122,21 @@ trait ScalaReflection {
case t if t <:< typeOf[Option[_]] =>
val TypeRef(_, _, Seq(optType)) = t
Schema(schemaFor(optType).dataType, nullable = true)
+ // Need to decide if we actually need a special type here.
+ case t if t <:< typeOf[Array[Byte]] => Schema(BinaryType, nullable = true)
+ case t if t <:< typeOf[Array[_]] =>
+ val TypeRef(_, _, Seq(elementType)) = t
+ val Schema(dataType, nullable) = schemaFor(elementType)
+ Schema(ArrayType(dataType, containsNull = nullable), nullable = true)
+ case t if t <:< typeOf[Seq[_]] =>
+ val TypeRef(_, _, Seq(elementType)) = t
+ val Schema(dataType, nullable) = schemaFor(elementType)
+ Schema(ArrayType(dataType, containsNull = nullable), nullable = true)
+ case t if t <:< typeOf[Map[_, _]] =>
+ val TypeRef(_, _, Seq(keyType, valueType)) = t
+ val Schema(valueDataType, valueNullable) = schemaFor(valueType)
+ Schema(MapType(schemaFor(keyType).dataType,
+ valueDataType, valueContainsNull = valueNullable), nullable = true)
case t if t <:< typeOf[Product] =>
val formalTypeArgs = t.typeSymbol.asClass.typeParams
val TypeRef(_, _, actualTypeArgs) = t
@@ -144,21 +159,6 @@ trait ScalaReflection {
schemaFor(p.typeSignature.substituteTypes(formalTypeArgs, actualTypeArgs))
StructField(p.name.toString, dataType, nullable)
}), nullable = true)
- // Need to decide if we actually need a special type here.
- case t if t <:< typeOf[Array[Byte]] => Schema(BinaryType, nullable = true)
- case t if t <:< typeOf[Array[_]] =>
- val TypeRef(_, _, Seq(elementType)) = t
- val Schema(dataType, nullable) = schemaFor(elementType)
- Schema(ArrayType(dataType, containsNull = nullable), nullable = true)
- case t if t <:< typeOf[Seq[_]] =>
- val TypeRef(_, _, Seq(elementType)) = t
- val Schema(dataType, nullable) = schemaFor(elementType)
- Schema(ArrayType(dataType, containsNull = nullable), nullable = true)
- case t if t <:< typeOf[Map[_, _]] =>
- val TypeRef(_, _, Seq(keyType, valueType)) = t
- val Schema(valueDataType, valueNullable) = schemaFor(valueType)
- Schema(MapType(schemaFor(keyType).dataType,
- valueDataType, valueContainsNull = valueNullable), nullable = true)
case t if t <:< typeOf[String] => Schema(StringType, nullable = true)
case t if t <:< typeOf[Timestamp] => Schema(TimestampType, nullable = true)
case t if t <:< typeOf[java.sql.Date] => Schema(DateType, nullable = true)
http://git-wip-us.apache.org/repos/asf/spark/blob/1c8633f3/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/ScalaReflectionSuite.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/ScalaReflectionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/ScalaReflectionSuite.scala
index d0f547d..eee00e3 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/ScalaReflectionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/ScalaReflectionSuite.scala
@@ -61,6 +61,7 @@ case class OptionalData(
case class ComplexData(
arrayField: Seq[Int],
arrayField1: Array[Int],
+ arrayField2: List[Int],
arrayFieldContainsNull: Seq[java.lang.Integer],
mapField: Map[Int, Long],
mapFieldValueContainsNull: Map[Int, java.lang.Long],
@@ -138,6 +139,10 @@ class ScalaReflectionSuite extends FunSuite {
ArrayType(IntegerType, containsNull = false),
nullable = true),
StructField(
+ "arrayField2",
+ ArrayType(IntegerType, containsNull = false),
+ nullable = true),
+ StructField(
"arrayFieldContainsNull",
ArrayType(IntegerType, containsNull = true),
nullable = true),
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org