You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by do...@apache.org on 2023/02/11 05:54:58 UTC

[spark] branch branch-3.4 updated: [SPARK-42403][CORE] JsonProtocol should handle null JSON strings

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch branch-3.4
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.4 by this push:
     new 42e7d66b663 [SPARK-42403][CORE] JsonProtocol should handle null JSON strings
42e7d66b663 is described below

commit 42e7d66b66337539317bea399540792def45292c
Author: Josh Rosen <jo...@databricks.com>
AuthorDate: Fri Feb 10 21:54:28 2023 -0800

    [SPARK-42403][CORE] JsonProtocol should handle null JSON strings
    
    ### What changes were proposed in this pull request?
    
    This PR fixes a regression introduced by #36885 which broke JsonProtocol's ability to parse `null` string values: the old Json4S-based parser would correctly parse null literals, whereas the new code rejects them via an overly-strict type check.
    
    This PR solves this problem by relaxing the type checking in `extractString` so that `null` literals in JSON can be parsed as `null` strings.
    
    ### Why are the changes needed?
    
    Fix a regression which prevents the history server from parsing certain types of event logs which contain null strings, including stacktraces containing generated code frames and ExceptionFailure messages where the exception message is `null`.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No.
    
    ### How was this patch tested?
    
    Added new unit test in JsonProtocolSuite.
    
    Closes #39973 from JoshRosen/SPARK-42403-handle-null-strings-in-json-protocol-read-path.
    
    Authored-by: Josh Rosen <jo...@databricks.com>
    Signed-off-by: Dongjoon Hyun <do...@apache.org>
    (cherry picked from commit 84ddd409c11e4da769c5b1f496f2b61c3d928c07)
    Signed-off-by: Dongjoon Hyun <do...@apache.org>
---
 .../scala/org/apache/spark/util/JsonProtocol.scala |  2 +-
 .../org/apache/spark/util/JsonProtocolSuite.scala  | 32 ++++++++++++++++++++++
 2 files changed, 33 insertions(+), 1 deletion(-)

diff --git a/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala b/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala
index 75dab8dc535..6b75971fc25 100644
--- a/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala
+++ b/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala
@@ -1611,7 +1611,7 @@ private[spark] object JsonProtocol {
     }
 
     def extractString: String = {
-      require(json.isTextual, s"Expected string, got ${json.getNodeType}")
+      require(json.isTextual || json.isNull, s"Expected string or NULL, got ${json.getNodeType}")
       json.textValue
     }
   }
diff --git a/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala b/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala
index be8a165d2d2..ea71a4b3f1b 100644
--- a/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala
@@ -778,6 +778,38 @@ class JsonProtocolSuite extends SparkFunSuite {
         |}""".stripMargin
     assert(JsonProtocol.sparkEventFromJson(unknownFieldsJson) === expected)
   }
+
+  test("SPARK-42403: properly handle null string values") {
+    // Null string values can appear in a few different event types,
+    // so we test multiple known cases here:
+    val stackTraceJson =
+      """
+        |[
+        |  {
+        |    "Declaring Class": "someClass",
+        |    "Method Name": "someMethod",
+        |    "File Name": null,
+        |    "Line Number": -1
+        |  }
+        |]
+        |""".stripMargin
+    val stackTrace = JsonProtocol.stackTraceFromJson(stackTraceJson)
+    assert(stackTrace === Array(new StackTraceElement("someClass", "someMethod", null, -1)))
+
+    val exceptionFailureJson =
+      """
+        |{
+        |  "Reason": "ExceptionFailure",
+        |  "Class Name": "java.lang.Exception",
+        |  "Description": null,
+        |  "Stack Trace": [],
+        |  "Accumulator Updates": []
+        |}
+        |""".stripMargin
+    val exceptionFailure =
+      JsonProtocol.taskEndReasonFromJson(exceptionFailureJson).asInstanceOf[ExceptionFailure]
+    assert(exceptionFailure.description == null)
+  }
 }
 
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org