You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ma...@apache.org on 2021/10/13 16:41:33 UTC
[spark] branch branch-3.1 updated: [SPARK-36993][SQL] Fix
json_tuple throw NPE if fields exist no foldable null value
This is an automated email from the ASF dual-hosted git repository.
maxgekk pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.1 by this push:
new fe2f646 [SPARK-36993][SQL] Fix json_tuple throw NPE if fields exist no foldable null value
fe2f646 is described below
commit fe2f646a7ee2d9c456d9aa1a6916cc478b73dd12
Author: ulysses-you <ul...@gmail.com>
AuthorDate: Wed Oct 13 19:36:16 2021 +0300
[SPARK-36993][SQL] Fix json_tuple throw NPE if fields exist no foldable null value
### What changes were proposed in this pull request?
Wrap `expr.eval(input)` with Option in `json_tuple`.
### Why are the changes needed?
If json_tuple exists no foldable null field, Spark would throw NPE during eval field.toString.
e.g. the query will fail with:
```SQL
SELECT json_tuple('{"a":"1"}', if(c1 < 1, null, 'a')) FROM ( SELECT rand() AS c1 );
```
```
Caused by: java.lang.NullPointerException
at org.apache.spark.sql.catalyst.expressions.JsonTuple.$anonfun$parseRow$2(jsonExpressions.scala:435)
at scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:286)
at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
at scala.collection.TraversableLike.map(TraversableLike.scala:286)
at scala.collection.TraversableLike.map$(TraversableLike.scala:279)
at scala.collection.AbstractTraversable.map(Traversable.scala:108)
at org.apache.spark.sql.catalyst.expressions.JsonTuple.parseRow(jsonExpressions.scala:435)
at org.apache.spark.sql.catalyst.expressions.JsonTuple.$anonfun$eval$6(jsonExpressions.scala:413)
```
### Does this PR introduce _any_ user-facing change?
yes, bug fix.
### How was this patch tested?
add test in `json-functions.sql`.
Closes #34268 from ulysses-you/SPARK-36993.
Authored-by: ulysses-you <ul...@gmail.com>
Signed-off-by: Max Gekk <ma...@gmail.com>
(cherry picked from commit 7aedce44b73d9b0c56863f970257abf52ce551ce)
Signed-off-by: Max Gekk <ma...@gmail.com>
---
.../sql/catalyst/expressions/jsonExpressions.scala | 7 +++++--
.../test/resources/sql-tests/inputs/json-functions.sql | 4 ++++
.../resources/sql-tests/results/json-functions.sql.out | 18 +++++++++++++++++-
3 files changed, 26 insertions(+), 3 deletions(-)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala
index a363615..5abac01 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala
@@ -426,12 +426,15 @@ case class JsonTuple(children: Seq[Expression])
foldableFieldNames.map(_.orNull)
} else if (constantFields == 0) {
// none are foldable so all field names need to be evaluated from the input row
- fieldExpressions.map(_.eval(input).asInstanceOf[UTF8String].toString)
+ fieldExpressions.map { expr =>
+ Option(expr.eval(input)).map(_.asInstanceOf[UTF8String].toString).orNull
+ }
} else {
// if there is a mix of constant and non-constant expressions
// prefer the cached copy when available
foldableFieldNames.zip(fieldExpressions).map {
- case (null, expr) => expr.eval(input).asInstanceOf[UTF8String].toString
+ case (null, expr) =>
+ Option(expr.eval(input)).map(_.asInstanceOf[UTF8String].toString).orNull
case (fieldName, _) => fieldName.orNull
}
}
diff --git a/sql/core/src/test/resources/sql-tests/inputs/json-functions.sql b/sql/core/src/test/resources/sql-tests/inputs/json-functions.sql
index f6fa441..245a6a6 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/json-functions.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/json-functions.sql
@@ -25,6 +25,10 @@ select from_json();
SELECT json_tuple('{"a" : 1, "b" : 2}', CAST(NULL AS STRING), 'b', CAST(NULL AS STRING), 'a');
CREATE TEMPORARY VIEW jsonTable(jsonField, a) AS SELECT * FROM VALUES ('{"a": 1, "b": 2}', 'a');
SELECT json_tuple(jsonField, 'b', CAST(NULL AS STRING), a) FROM jsonTable;
+-- json_tuple exists no foldable null field
+SELECT json_tuple('{"a":"1"}', if(c1 < 1, null, 'a')) FROM ( SELECT rand() AS c1 );
+SELECT json_tuple('{"a":"1"}', if(c1 < 1, null, 'a'), if(c2 < 1, null, 'a')) FROM ( SELECT 0 AS c1, rand() AS c2 );
+
-- Clean up
DROP VIEW IF EXISTS jsonTable;
diff --git a/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out
index b14e3e1..138e70c 100644
--- a/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out
@@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
--- Number of queries: 71
+-- Number of queries: 73
-- !query
@@ -192,6 +192,22 @@ struct<c0:string,c1:string,c2:string>
-- !query
+SELECT json_tuple('{"a":"1"}', if(c1 < 1, null, 'a')) FROM ( SELECT rand() AS c1 )
+-- !query schema
+struct<c0:string>
+-- !query output
+NULL
+
+
+-- !query
+SELECT json_tuple('{"a":"1"}', if(c1 < 1, null, 'a'), if(c2 < 1, null, 'a')) FROM ( SELECT 0 AS c1, rand() AS c2 )
+-- !query schema
+struct<c0:string,c1:string>
+-- !query output
+NULL NULL
+
+
+-- !query
DROP VIEW IF EXISTS jsonTable
-- !query schema
struct<>
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org