You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2018/09/26 01:52:24 UTC

spark git commit: [SPARK-25514][SQL] Generating pretty JSON by to_json

Repository: spark
Updated Branches:
  refs/heads/master cb77a6689 -> 473d0d862


[SPARK-25514][SQL] Generating pretty JSON by to_json

## What changes were proposed in this pull request?

The PR introduces new JSON option `pretty` which allows to turn on `DefaultPrettyPrinter` of `Jackson`'s Json generator. New option is useful in exploring of deep nested columns and in converting of JSON columns in more readable representation (look at the added test).

## How was this patch tested?

Added rount trip test which convert an JSON string to pretty representation via `from_json()` and `to_json()`.

Closes #22534 from MaxGekk/pretty-json.

Lead-authored-by: Maxim Gekk <ma...@databricks.com>
Co-authored-by: Maxim Gekk <ma...@gmail.com>
Signed-off-by: hyukjinkwon <gu...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/473d0d86
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/473d0d86
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/473d0d86

Branch: refs/heads/master
Commit: 473d0d862de54ec1c7a8f0354fa5e06f3d66e455
Parents: cb77a66
Author: Maxim Gekk <ma...@databricks.com>
Authored: Wed Sep 26 09:52:15 2018 +0800
Committer: hyukjinkwon <gu...@apache.org>
Committed: Wed Sep 26 09:52:15 2018 +0800

----------------------------------------------------------------------
 R/pkg/R/functions.R                             |  5 +++--
 python/pyspark/sql/functions.py                 |  4 +++-
 .../spark/sql/catalyst/json/JSONOptions.scala   |  5 +++++
 .../sql/catalyst/json/JacksonGenerator.scala    |  5 ++++-
 .../scala/org/apache/spark/sql/functions.scala  |  4 ++++
 .../apache/spark/sql/JsonFunctionsSuite.scala   | 21 ++++++++++++++++++++
 6 files changed, 40 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/473d0d86/R/pkg/R/functions.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index 572dee5..6425c9d 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -198,8 +198,9 @@ NULL
 #'          }
 #' @param ... additional argument(s). In \code{to_json} and \code{from_json}, this contains
 #'            additional named properties to control how it is converted, accepts the same
-#'            options as the JSON data source.  In \code{arrays_zip}, this contains additional
-#'            Columns of arrays to be merged.
+#'            options as the JSON data source. Additionally \code{to_json} supports the "pretty"
+#'            option which enables pretty JSON generation. In \code{arrays_zip}, this contains
+#'            additional Columns of arrays to be merged.
 #' @name column_collection_functions
 #' @rdname column_collection_functions
 #' @family collection functions

http://git-wip-us.apache.org/repos/asf/spark/blob/473d0d86/python/pyspark/sql/functions.py
----------------------------------------------------------------------
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index 6da5237..1c3d972 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -2295,7 +2295,9 @@ def to_json(col, options={}):
     into a JSON string. Throws an exception, in the case of an unsupported type.
 
     :param col: name of column containing a struct, an array or a map.
-    :param options: options to control converting. accepts the same options as the JSON datasource
+    :param options: options to control converting. accepts the same options as the JSON datasource.
+                    Additionally the function supports the `pretty` option which enables
+                    pretty JSON generation.
 
     >>> from pyspark.sql import Row
     >>> from pyspark.sql.types import *

http://git-wip-us.apache.org/repos/asf/spark/blob/473d0d86/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala
index 47eeb70..64152e0 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala
@@ -113,6 +113,11 @@ private[sql] class JSONOptions(
   }
   val lineSeparatorInWrite: String = lineSeparator.getOrElse("\n")
 
+  /**
+   * Generating JSON strings in pretty representation if the parameter is enabled.
+   */
+  val pretty: Boolean = parameters.get("pretty").map(_.toBoolean).getOrElse(false)
+
   /** Sets config options on a Jackson [[JsonFactory]]. */
   def setJacksonOptions(factory: JsonFactory): Unit = {
     factory.configure(JsonParser.Feature.ALLOW_COMMENTS, allowComments)

http://git-wip-us.apache.org/repos/asf/spark/blob/473d0d86/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonGenerator.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonGenerator.scala
index 9b86d86..d02a2be 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonGenerator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonGenerator.scala
@@ -70,7 +70,10 @@ private[sql] class JacksonGenerator(
       s"Initial type ${dataType.catalogString} must be a ${MapType.simpleString}")
   }
 
-  private val gen = new JsonFactory().createGenerator(writer).setRootValueSeparator(null)
+  private val gen = {
+    val generator = new JsonFactory().createGenerator(writer).setRootValueSeparator(null)
+    if (options.pretty) generator.useDefaultPrettyPrinter() else generator
+  }
 
   private val lineSeparator: String = options.lineSeparatorInWrite
 

http://git-wip-us.apache.org/repos/asf/spark/blob/473d0d86/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index 10b67d7..4c58e77 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -3619,6 +3619,8 @@ object functions {
    * @param e a column containing a struct, an array or a map.
    * @param options options to control how the struct column is converted into a json string.
    *                accepts the same options and the json data source.
+   *                Additionally the function supports the `pretty` option which enables
+   *                pretty JSON generation.
    *
    * @group collection_funcs
    * @since 2.1.0
@@ -3635,6 +3637,8 @@ object functions {
    * @param e a column containing a struct, an array or a map.
    * @param options options to control how the struct column is converted into a json string.
    *                accepts the same options and the json data source.
+   *                Additionally the function supports the `pretty` option which enables
+   *                pretty JSON generation.
    *
    * @group collection_funcs
    * @since 2.1.0

http://git-wip-us.apache.org/repos/asf/spark/blob/473d0d86/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
index fe4bf15..853bc18 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
@@ -518,4 +518,25 @@ class JsonFunctionsSuite extends QueryTest with SharedSQLContext {
       jsonDF.select(to_json(from_json($"a", schema))),
       Seq(Row(json)))
   }
+
+  test("pretty print - roundtrip from_json -> to_json") {
+    val json = """[{"book":{"publisher":[{"country":"NL","year":[1981,1986,1999]}]}}]"""
+    val jsonDF = Seq(json).toDF("root")
+    val expected =
+      """[ {
+        |  "book" : {
+        |    "publisher" : [ {
+        |      "country" : "NL",
+        |      "year" : [ 1981, 1986, 1999 ]
+        |    } ]
+        |  }
+        |} ]""".stripMargin
+
+    checkAnswer(
+      jsonDF.select(
+        to_json(
+          from_json($"root", schema_of_json(lit(json))),
+          Map("pretty" -> "true"))),
+      Seq(Row(expected)))
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org