You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by do...@apache.org on 2019/05/13 19:42:02 UTC
[spark] branch branch-2.4 updated: [SPARK-27671][SQL] Fix error
when casting from a nested null in a struct
This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch branch-2.4
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-2.4 by this push:
new aae03ef [SPARK-27671][SQL] Fix error when casting from a nested null in a struct
aae03ef is described below
commit aae03ef5d9098dcce2cf590198774f507de72012
Author: Liang-Chi Hsieh <vi...@gmail.com>
AuthorDate: Mon May 13 12:40:46 2019 -0700
[SPARK-27671][SQL] Fix error when casting from a nested null in a struct
When a null in a nested field in struct, casting from the struct throws error, currently.
```scala
scala> sql("select cast(struct(1, null) as struct<a:int,b:int>)").show
scala.MatchError: NullType (of class org.apache.spark.sql.types.NullType$)
at org.apache.spark.sql.catalyst.expressions.Cast.castToInt(Cast.scala:447)
at org.apache.spark.sql.catalyst.expressions.Cast.cast(Cast.scala:635)
at org.apache.spark.sql.catalyst.expressions.Cast.$anonfun$castStruct$1(Cast.scala:603)
```
Similarly, inline table, which casts null in nested field under the hood, also throws an error.
```scala
scala> sql("select * FROM VALUES (('a', (10, null))), (('b', (10, 50))), (('c', null)) AS tab(x, y)").show
org.apache.spark.sql.AnalysisException: failed to evaluate expression named_struct('col1', 10, 'col2', NULL): NullType (of class org.apache.spark.sql.t
ypes.NullType$); line 1 pos 14
at org.apache.spark.sql.catalyst.analysis.package$AnalysisErrorAt.failAnalysis(package.scala:47)
at org.apache.spark.sql.catalyst.analysis.ResolveInlineTables.$anonfun$convert$6(ResolveInlineTables.scala:106)
```
This fixes the issue.
Added tests.
Closes #24576 from viirya/cast-null.
Authored-by: Liang-Chi Hsieh <vi...@gmail.com>
Signed-off-by: Dongjoon Hyun <dh...@apple.com>
(cherry picked from commit 8b0bdaa8e018607f1c4e790d1c0eb8cd480dee24)
Signed-off-by: Dongjoon Hyun <dh...@apple.com>
---
.../org/apache/spark/sql/catalyst/expressions/Cast.scala | 6 ++++++
.../apache/spark/sql/catalyst/expressions/CastSuite.scala | 15 +++++++++++++++
.../test/scala/org/apache/spark/sql/DataFrameSuite.scala | 9 +++++++++
3 files changed, 30 insertions(+)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
index ac02dac..780db65 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
@@ -607,6 +607,12 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
// We can return what the children return. Same thing should happen in the codegen path.
if (DataType.equalsStructurally(from, to)) {
identity
+ } else if (from == NullType) {
+ // According to `canCast`, NullType can be casted to any type.
+ // For primitive types, we don't reach here because the guard of `nullSafeEval`.
+ // But for nested types like struct, we might reach here for nested null type field.
+ // We won't call the returned function actually, but returns a placeholder.
+ _ => throw new SparkException(s"should not directly cast from NullType to $to.")
} else {
to match {
case dt if dt == from => identity[Any]
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
index b1531ba..c9a8c29 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
@@ -989,4 +989,19 @@ class CastSuite extends SparkFunSuite with ExpressionEvalHelper {
}
}
}
+
+ test("SPARK-27671: cast from nested null type in struct") {
+ import DataTypeTestUtils._
+
+ atomicTypes.foreach { atomicType =>
+ val struct = Literal.create(
+ InternalRow(null),
+ StructType(Seq(StructField("a", NullType, nullable = true))))
+
+ val ret = cast(struct, StructType(Seq(
+ StructField("a", atomicType, nullable = true))))
+ assert(ret.resolved)
+ checkEvaluation(ret, InternalRow(null))
+ }
+ }
}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
index 5075209..3d74206 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -2622,4 +2622,13 @@ class DataFrameSuite extends QueryTest with SharedSQLContext {
checkAnswer(res, Row("1-1", 6, 6))
}
}
+
+ test("SPARK-27671: Fix analysis exception when casting null in nested field in struct") {
+ val df = sql("SELECT * FROM VALUES (('a', (10, null))), (('b', (10, 50))), " +
+ "(('c', null)) AS tab(x, y)")
+ checkAnswer(df, Row("a", Row(10, null)) :: Row("b", Row(10, 50)) :: Row("c", null) :: Nil)
+
+ val cast = sql("SELECT cast(struct(1, null) AS struct<a:int,b:int>)")
+ checkAnswer(cast, Row(Row(1, null)) :: Nil)
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org