You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by li...@apache.org on 2017/08/17 22:59:48 UTC

spark git commit: [SPARK-21677][SQL] json_tuple throws NullPointException when column is null as string type

Repository: spark
Updated Branches:
  refs/heads/master bfdc361ed -> 7ab951885


[SPARK-21677][SQL] json_tuple throws NullPointException when column is null as string type

## What changes were proposed in this pull request?
``` scala
scala> Seq(("""{"Hyukjin": 224, "John": 1225}""")).toDS.selectExpr("json_tuple(value, trim(null))").show()
...
java.lang.NullPointerException
	at ...
```

Currently the `null` field name will throw NullPointException. As a given field name null can't be matched with any field names in json, we just output null as its column value. This PR achieves it by returning a very unlikely column name `__NullFieldName` in evaluation of the field names.

## How was this patch tested?
Added unit test.

Author: Jen-Ming Chung <je...@gmail.com>

Closes #18930 from jmchung/SPARK-21677.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7ab95188
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7ab95188
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7ab95188

Branch: refs/heads/master
Commit: 7ab951885fd34aa8184b70a3a39b865a239e5052
Parents: bfdc361
Author: Jen-Ming Chung <je...@gmail.com>
Authored: Thu Aug 17 15:59:45 2017 -0700
Committer: gatorsmile <ga...@gmail.com>
Committed: Thu Aug 17 15:59:45 2017 -0700

----------------------------------------------------------------------
 .../catalyst/expressions/jsonExpressions.scala  |  8 ++---
 .../expressions/JsonExpressionsSuite.scala      | 10 ++++++
 .../sql-tests/inputs/json-functions.sql         |  6 ++++
 .../sql-tests/results/json-functions.sql.out    | 34 +++++++++++++++++++-
 4 files changed, 53 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/7ab95188/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala
index 17b6054..c375737 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala
@@ -362,9 +362,9 @@ case class JsonTuple(children: Seq[Expression])
   @transient private lazy val fieldExpressions: Seq[Expression] = children.tail
 
   // eagerly evaluate any foldable the field names
-  @transient private lazy val foldableFieldNames: IndexedSeq[String] = {
+  @transient private lazy val foldableFieldNames: IndexedSeq[Option[String]] = {
     fieldExpressions.map {
-      case expr if expr.foldable => expr.eval().asInstanceOf[UTF8String].toString
+      case expr if expr.foldable => Option(expr.eval()).map(_.asInstanceOf[UTF8String].toString)
       case _ => null
     }.toIndexedSeq
   }
@@ -417,7 +417,7 @@ case class JsonTuple(children: Seq[Expression])
     val fieldNames = if (constantFields == fieldExpressions.length) {
       // typically the user will provide the field names as foldable expressions
       // so we can use the cached copy
-      foldableFieldNames
+      foldableFieldNames.map(_.orNull)
     } else if (constantFields == 0) {
       // none are foldable so all field names need to be evaluated from the input row
       fieldExpressions.map(_.eval(input).asInstanceOf[UTF8String].toString)
@@ -426,7 +426,7 @@ case class JsonTuple(children: Seq[Expression])
       // prefer the cached copy when available
       foldableFieldNames.zip(fieldExpressions).map {
         case (null, expr) => expr.eval(input).asInstanceOf[UTF8String].toString
-        case (fieldName, _) => fieldName
+        case (fieldName, _) => fieldName.orNull
       }
     }
 

http://git-wip-us.apache.org/repos/asf/spark/blob/7ab95188/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala
index f892e80..1cd2b4f 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala
@@ -363,6 +363,16 @@ class JsonExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
       InternalRow(UTF8String.fromString("b\nc")))
   }
 
+  test("SPARK-21677: json_tuple throws NullPointException when column is null as string type") {
+    checkJsonTuple(
+      JsonTuple(Literal("""{"f1": 1, "f2": 2}""") ::
+        NonFoldableLiteral("f1") ::
+        NonFoldableLiteral("cast(NULL AS STRING)") ::
+        NonFoldableLiteral("f2") ::
+        Nil),
+      InternalRow(UTF8String.fromString("1"), null, UTF8String.fromString("2")))
+  }
+
   val gmtId = Option(DateTimeUtils.TimeZoneGMT.getID)
 
   test("from_json") {

http://git-wip-us.apache.org/repos/asf/spark/blob/7ab95188/sql/core/src/test/resources/sql-tests/inputs/json-functions.sql
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/sql-tests/inputs/json-functions.sql b/sql/core/src/test/resources/sql-tests/inputs/json-functions.sql
index b3cc2ce..5a46fb4 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/json-functions.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/json-functions.sql
@@ -20,3 +20,9 @@ select from_json('{"a":1}', 'a InvalidType');
 select from_json('{"a":1}', 'a INT', named_struct('mode', 'PERMISSIVE'));
 select from_json('{"a":1}', 'a INT', map('mode', 1));
 select from_json();
+-- json_tuple
+SELECT json_tuple('{"a" : 1, "b" : 2}', CAST(NULL AS STRING), 'b', CAST(NULL AS STRING), 'a');
+CREATE TEMPORARY VIEW jsonTable(jsonField, a) AS SELECT * FROM VALUES ('{"a": 1, "b": 2}', 'a');
+SELECT json_tuple(jsonField, 'b', CAST(NULL AS STRING), a) FROM jsonTable;
+-- Clean up
+DROP VIEW IF EXISTS jsonTable;

http://git-wip-us.apache.org/repos/asf/spark/blob/7ab95188/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out
index 22da20d..ae21d00 100644
--- a/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 17
+-- Number of queries: 21
 
 
 -- !query 0
@@ -178,3 +178,35 @@ struct<>
 -- !query 16 output
 org.apache.spark.sql.AnalysisException
 Invalid number of arguments for function from_json; line 1 pos 7
+
+
+-- !query 17
+SELECT json_tuple('{"a" : 1, "b" : 2}', CAST(NULL AS STRING), 'b', CAST(NULL AS STRING), 'a')
+-- !query 17 schema
+struct<c0:string,c1:string,c2:string,c3:string>
+-- !query 17 output
+NULL	2	NULL	1
+
+
+-- !query 18
+CREATE TEMPORARY VIEW jsonTable(jsonField, a) AS SELECT * FROM VALUES ('{"a": 1, "b": 2}', 'a')
+-- !query 18 schema
+struct<>
+-- !query 18 output
+
+
+
+-- !query 19
+SELECT json_tuple(jsonField, 'b', CAST(NULL AS STRING), a) FROM jsonTable
+-- !query 19 schema
+struct<c0:string,c1:string,c2:string>
+-- !query 19 output
+2	NULL	1
+
+
+-- !query 20
+DROP VIEW IF EXISTS jsonTable
+-- !query 20 schema
+struct<>
+-- !query 20 output
+


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org