You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by do...@apache.org on 2020/07/08 20:35:46 UTC
[spark] branch branch-2.4 updated: [SPARK-32167][2.4][SQL] Fix
GetArrayStructFields to respect inner field's nullability together
This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch branch-2.4
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-2.4 by this push:
new 146062d [SPARK-32167][2.4][SQL] Fix GetArrayStructFields to respect inner field's nullability together
146062d is described below
commit 146062da380eebe4620414df90392b5315bcb145
Author: Wenchen Fan <we...@databricks.com>
AuthorDate: Wed Jul 8 13:31:35 2020 -0700
[SPARK-32167][2.4][SQL] Fix GetArrayStructFields to respect inner field's nullability together
### What changes were proposed in this pull request?
Backport https://github.com/apache/spark/pull/28992 to 2.4
Fix nullability of `GetArrayStructFields`. It should consider both the original array's `containsNull` and the inner field's nullability.
### Why are the changes needed?
Fix a correctness issue.
### Does this PR introduce _any_ user-facing change?
Yes. See the added test.
### How was this patch tested?
a new UT and end-to-end test
Closes #29019 from cloud-fan/port.
Authored-by: Wenchen Fan <we...@databricks.com>
Signed-off-by: Dongjoon Hyun <do...@apache.org>
---
.../expressions/complexTypeExtractors.scala | 2 +-
.../catalyst/expressions/ComplexTypeSuite.scala | 27 ++++++++++++++++++++++
.../org/apache/spark/sql/ComplexTypesSuite.scala | 11 +++++++++
3 files changed, 39 insertions(+), 1 deletion(-)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala
index 8994eef..a3dd983 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala
@@ -57,7 +57,7 @@ object ExtractValue {
val fieldName = v.toString
val ordinal = findField(fields, fieldName, resolver)
GetArrayStructFields(child, fields(ordinal).copy(name = fieldName),
- ordinal, fields.length, containsNull)
+ ordinal, fields.length, containsNull || fields(ordinal).nullable)
case (_: ArrayType, _) => GetArrayItem(child, extraction)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala
index 77aaf55..c50191d 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala
@@ -18,9 +18,11 @@
package org.apache.spark.sql.catalyst.expressions
import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.Row
import org.apache.spark.sql.catalyst.analysis.UnresolvedExtractValue
import org.apache.spark.sql.catalyst.dsl.expressions._
import org.apache.spark.sql.catalyst.expressions.codegen.CodegenContext
+import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.types._
import org.apache.spark.unsafe.types.UTF8String
@@ -125,6 +127,31 @@ class ComplexTypeSuite extends SparkFunSuite with ExpressionEvalHelper {
checkEvaluation(getArrayStructFields(nullArrayStruct, "a"), null)
}
+ test("SPARK-32167: nullability of GetArrayStructFields") {
+ val resolver = SQLConf.get.resolver
+
+ val array1 = ArrayType(
+ new StructType().add("a", "int", nullable = true),
+ containsNull = false)
+ val data1 = Literal.create(Seq(Row(null)), array1)
+ val get1 = ExtractValue(data1, Literal("a"), resolver).asInstanceOf[GetArrayStructFields]
+ assert(get1.containsNull)
+
+ val array2 = ArrayType(
+ new StructType().add("a", "int", nullable = false),
+ containsNull = true)
+ val data2 = Literal.create(Seq(null), array2)
+ val get2 = ExtractValue(data2, Literal("a"), resolver).asInstanceOf[GetArrayStructFields]
+ assert(get2.containsNull)
+
+ val array3 = ArrayType(
+ new StructType().add("a", "int", nullable = false),
+ containsNull = false)
+ val data3 = Literal.create(Seq(Row(1)), array3)
+ val get3 = ExtractValue(data3, Literal("a"), resolver).asInstanceOf[GetArrayStructFields]
+ assert(!get3.containsNull)
+ }
+
test("CreateArray") {
val intSeq = Seq(5, 10, 15, 20, 25)
val longSeq = intSeq.map(_.toLong)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ComplexTypesSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ComplexTypesSuite.scala
index b74fe2f..30ce1c7 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ComplexTypesSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ComplexTypesSuite.scala
@@ -17,11 +17,15 @@
package org.apache.spark.sql
+import scala.collection.JavaConverters._
+
import org.apache.spark.sql.catalyst.expressions.CreateNamedStruct
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
import org.apache.spark.sql.test.SharedSQLContext
+import org.apache.spark.sql.types.{ArrayType, StructType}
class ComplexTypesSuite extends QueryTest with SharedSQLContext {
+ import testImplicits._
override def beforeAll() {
super.beforeAll()
@@ -106,4 +110,11 @@ class ComplexTypesSuite extends QueryTest with SharedSQLContext {
checkAnswer(df1, Row(10, 12) :: Row(11, 13) :: Nil)
checkNamedStruct(df.queryExecution.optimizedPlan, expectedCount = 0)
}
+
+ test("SPARK-32167: get field from an array of struct") {
+ val innerStruct = new StructType().add("i", "int", nullable = true)
+ val schema = new StructType().add("arr", ArrayType(innerStruct, containsNull = false))
+ val df = spark.createDataFrame(List(Row(Seq(Row(1), Row(null)))).asJava, schema)
+ checkAnswer(df.select($"arr".getField("i")), Row(Seq(1, null)))
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org