You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2023/02/15 11:38:54 UTC
[spark] branch branch-3.4 updated: [SPARK-42401][SQL][FOLLOWUP] Always set `containsNull=true` for `array_insert`
This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch branch-3.4
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.4 by this push:
new 9e85162926a [SPARK-42401][SQL][FOLLOWUP] Always set `containsNull=true` for `array_insert`
9e85162926a is described below
commit 9e85162926a852b19aa1338e4ef3c8c33d02be0e
Author: Bruce Robbins <be...@gmail.com>
AuthorDate: Wed Feb 15 20:38:30 2023 +0900
[SPARK-42401][SQL][FOLLOWUP] Always set `containsNull=true` for `array_insert`
### What changes were proposed in this pull request?
Always set `containsNull=true` in the data type returned by `ArrayInsert#dataType`.
### Why are the changes needed?
PR #39970 fixed an issue where the data type for `array_insert` did not always have `containsNull=true` when the user was explicitly inserting a nullable value into the array. However, that fix does not handle the case where `array_insert` implicitly inserts null values into the array (e.g., when the insertion position is out-of-range):
```
spark-sql> select array_insert(array('1', '2', '3', '4'), -6, '5');
23/02/14 16:10:19 ERROR Executor: Exception in task 0.0 in stage 0.0 (TID 0)
java.lang.NullPointerException
at org.apache.spark.sql.catalyst.expressions.codegen.UnsafeWriter.write(UnsafeWriter.java:110)
at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.project_doConsume_0$(Unknown Source)
at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source)
at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
```
Because we can't know at planning time whether the insertion position will be out of range, we should always set `containsNull=true` on the data type for `array_insert`.
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
New unit tests.
Closes #40026 from bersprockets/array_insert_null_anytime.
Authored-by: Bruce Robbins <be...@gmail.com>
Signed-off-by: Hyukjin Kwon <gu...@apache.org>
(cherry picked from commit fb5b44f46b35562330e5e89133a0bca8e0bee36b)
Signed-off-by: Hyukjin Kwon <gu...@apache.org>
---
.../spark/sql/catalyst/expressions/collectionOperations.scala | 2 +-
.../sql/catalyst/expressions/CollectionExpressionsSuite.scala | 9 ++++++++-
.../scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala | 9 ++++++++-
3 files changed, 17 insertions(+), 3 deletions(-)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
index 53d8ff160c0..28c4a9eba68 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
@@ -4840,7 +4840,7 @@ case class ArrayInsert(srcArrayExpr: Expression, posExpr: Expression, itemExpr:
override def third: Expression = itemExpr
override def prettyName: String = "array_insert"
- override def dataType: DataType = if (third.nullable) first.dataType.asNullable else first.dataType
+ override def dataType: DataType = first.dataType.asNullable // out of range pos will add nulls
override def nullable: Boolean = first.nullable | second.nullable
@transient private lazy val elementType: DataType =
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala
index 64b9c18605d..60300ba62f2 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala
@@ -2753,13 +2753,20 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper
}
- test("SPARK-42401: Array insert of null value") {
+ test("SPARK-42401: Array insert of null value (explicit)") {
val a = Literal.create(Seq("b", "a", "c"), ArrayType(StringType, false))
checkEvaluation(ArrayInsert(
a, Literal(2), Literal.create(null, StringType)), Seq("b", null, "a", "c")
)
}
+ test("SPARK-42401: Array insert of null value (implicit)") {
+ val a = Literal.create(Seq("b", "a", "c"), ArrayType(StringType, false))
+ checkEvaluation(ArrayInsert(
+ a, Literal(5), Literal.create("q", StringType)), Seq("b", "a", "c", null, "q")
+ )
+ }
+
test("SPARK-42401: Array append of null value") {
val a = Literal.create(Seq("b", "a", "c"), ArrayType(StringType, false))
checkEvaluation(ArrayAppend(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
index 94f813a2c6b..bd03d292820 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
@@ -5432,13 +5432,20 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
)
}
- test("SPARK-42401: array_insert - insert null") {
+ test("SPARK-42401: array_insert - explicitly insert null") {
checkAnswer(
sql("select array_insert(array('b', 'a', 'c'), 2, cast(null as string))"),
Seq(Row(Seq("b", null, "a", "c")))
)
}
+ test("SPARK-42401: array_insert - implicitly insert null") {
+ checkAnswer(
+ sql("select array_insert(array('b', 'a', 'c'), 5, 'q')"),
+ Seq(Row(Seq("b", "a", "c", null, "q")))
+ )
+ }
+
test("SPARK-42401: array_append - append null") {
checkAnswer(
sql("select array_append(array('b', 'a', 'c'), cast(null as string))"),
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org