You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2018/09/26 01:52:24 UTC
spark git commit: [SPARK-25514][SQL] Generating pretty JSON by to_json
Repository: spark
Updated Branches:
refs/heads/master cb77a6689 -> 473d0d862
[SPARK-25514][SQL] Generating pretty JSON by to_json
## What changes were proposed in this pull request?
The PR introduces new JSON option `pretty` which allows to turn on `DefaultPrettyPrinter` of `Jackson`'s Json generator. New option is useful in exploring of deep nested columns and in converting of JSON columns in more readable representation (look at the added test).
## How was this patch tested?
Added rount trip test which convert an JSON string to pretty representation via `from_json()` and `to_json()`.
Closes #22534 from MaxGekk/pretty-json.
Lead-authored-by: Maxim Gekk <ma...@databricks.com>
Co-authored-by: Maxim Gekk <ma...@gmail.com>
Signed-off-by: hyukjinkwon <gu...@apache.org>
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/473d0d86
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/473d0d86
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/473d0d86
Branch: refs/heads/master
Commit: 473d0d862de54ec1c7a8f0354fa5e06f3d66e455
Parents: cb77a66
Author: Maxim Gekk <ma...@databricks.com>
Authored: Wed Sep 26 09:52:15 2018 +0800
Committer: hyukjinkwon <gu...@apache.org>
Committed: Wed Sep 26 09:52:15 2018 +0800
----------------------------------------------------------------------
R/pkg/R/functions.R | 5 +++--
python/pyspark/sql/functions.py | 4 +++-
.../spark/sql/catalyst/json/JSONOptions.scala | 5 +++++
.../sql/catalyst/json/JacksonGenerator.scala | 5 ++++-
.../scala/org/apache/spark/sql/functions.scala | 4 ++++
.../apache/spark/sql/JsonFunctionsSuite.scala | 21 ++++++++++++++++++++
6 files changed, 40 insertions(+), 4 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/473d0d86/R/pkg/R/functions.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index 572dee5..6425c9d 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -198,8 +198,9 @@ NULL
#' }
#' @param ... additional argument(s). In \code{to_json} and \code{from_json}, this contains
#' additional named properties to control how it is converted, accepts the same
-#' options as the JSON data source. In \code{arrays_zip}, this contains additional
-#' Columns of arrays to be merged.
+#' options as the JSON data source. Additionally \code{to_json} supports the "pretty"
+#' option which enables pretty JSON generation. In \code{arrays_zip}, this contains
+#' additional Columns of arrays to be merged.
#' @name column_collection_functions
#' @rdname column_collection_functions
#' @family collection functions
http://git-wip-us.apache.org/repos/asf/spark/blob/473d0d86/python/pyspark/sql/functions.py
----------------------------------------------------------------------
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index 6da5237..1c3d972 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -2295,7 +2295,9 @@ def to_json(col, options={}):
into a JSON string. Throws an exception, in the case of an unsupported type.
:param col: name of column containing a struct, an array or a map.
- :param options: options to control converting. accepts the same options as the JSON datasource
+ :param options: options to control converting. accepts the same options as the JSON datasource.
+ Additionally the function supports the `pretty` option which enables
+ pretty JSON generation.
>>> from pyspark.sql import Row
>>> from pyspark.sql.types import *
http://git-wip-us.apache.org/repos/asf/spark/blob/473d0d86/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala
index 47eeb70..64152e0 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala
@@ -113,6 +113,11 @@ private[sql] class JSONOptions(
}
val lineSeparatorInWrite: String = lineSeparator.getOrElse("\n")
+ /**
+ * Generating JSON strings in pretty representation if the parameter is enabled.
+ */
+ val pretty: Boolean = parameters.get("pretty").map(_.toBoolean).getOrElse(false)
+
/** Sets config options on a Jackson [[JsonFactory]]. */
def setJacksonOptions(factory: JsonFactory): Unit = {
factory.configure(JsonParser.Feature.ALLOW_COMMENTS, allowComments)
http://git-wip-us.apache.org/repos/asf/spark/blob/473d0d86/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonGenerator.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonGenerator.scala
index 9b86d86..d02a2be 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonGenerator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonGenerator.scala
@@ -70,7 +70,10 @@ private[sql] class JacksonGenerator(
s"Initial type ${dataType.catalogString} must be a ${MapType.simpleString}")
}
- private val gen = new JsonFactory().createGenerator(writer).setRootValueSeparator(null)
+ private val gen = {
+ val generator = new JsonFactory().createGenerator(writer).setRootValueSeparator(null)
+ if (options.pretty) generator.useDefaultPrettyPrinter() else generator
+ }
private val lineSeparator: String = options.lineSeparatorInWrite
http://git-wip-us.apache.org/repos/asf/spark/blob/473d0d86/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index 10b67d7..4c58e77 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -3619,6 +3619,8 @@ object functions {
* @param e a column containing a struct, an array or a map.
* @param options options to control how the struct column is converted into a json string.
* accepts the same options and the json data source.
+ * Additionally the function supports the `pretty` option which enables
+ * pretty JSON generation.
*
* @group collection_funcs
* @since 2.1.0
@@ -3635,6 +3637,8 @@ object functions {
* @param e a column containing a struct, an array or a map.
* @param options options to control how the struct column is converted into a json string.
* accepts the same options and the json data source.
+ * Additionally the function supports the `pretty` option which enables
+ * pretty JSON generation.
*
* @group collection_funcs
* @since 2.1.0
http://git-wip-us.apache.org/repos/asf/spark/blob/473d0d86/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
index fe4bf15..853bc18 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
@@ -518,4 +518,25 @@ class JsonFunctionsSuite extends QueryTest with SharedSQLContext {
jsonDF.select(to_json(from_json($"a", schema))),
Seq(Row(json)))
}
+
+ test("pretty print - roundtrip from_json -> to_json") {
+ val json = """[{"book":{"publisher":[{"country":"NL","year":[1981,1986,1999]}]}}]"""
+ val jsonDF = Seq(json).toDF("root")
+ val expected =
+ """[ {
+ | "book" : {
+ | "publisher" : [ {
+ | "country" : "NL",
+ | "year" : [ 1981, 1986, 1999 ]
+ | } ]
+ | }
+ |} ]""".stripMargin
+
+ checkAnswer(
+ jsonDF.select(
+ to_json(
+ from_json($"root", schema_of_json(lit(json))),
+ Map("pretty" -> "true"))),
+ Seq(Row(expected)))
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org