You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by li...@apache.org on 2018/02/15 07:56:05 UTC

spark git commit: [SPARK-23094] Revert [] Fix invalid character handling in JsonDataSource

Repository: spark
Updated Branches:
  refs/heads/master a77ebb092 -> 95e4b4916


[SPARK-23094] Revert [] Fix invalid character handling in JsonDataSource

## What changes were proposed in this pull request?
This PR is to revert the PR https://github.com/apache/spark/pull/20302, because it causes a regression.

## How was this patch tested?
N/A

Author: gatorsmile <ga...@gmail.com>

Closes #20614 from gatorsmile/revertJsonFix.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/95e4b491
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/95e4b491
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/95e4b491

Branch: refs/heads/master
Commit: 95e4b4916065e66a4f8dba57e98e725796f75e04
Parents: a77ebb0
Author: gatorsmile <ga...@gmail.com>
Authored: Wed Feb 14 23:56:02 2018 -0800
Committer: gatorsmile <ga...@gmail.com>
Committed: Wed Feb 14 23:56:02 2018 -0800

----------------------------------------------------------------------
 .../sql/catalyst/json/CreateJacksonParser.scala |  5 ++-
 .../sql/sources/JsonHadoopFsRelationSuite.scala | 34 --------------------
 2 files changed, 2 insertions(+), 37 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/95e4b491/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/CreateJacksonParser.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/CreateJacksonParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/CreateJacksonParser.scala
index b1672e7..025a388 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/CreateJacksonParser.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/CreateJacksonParser.scala
@@ -40,11 +40,10 @@ private[sql] object CreateJacksonParser extends Serializable {
   }
 
   def text(jsonFactory: JsonFactory, record: Text): JsonParser = {
-    val bain = new ByteArrayInputStream(record.getBytes, 0, record.getLength)
-    jsonFactory.createParser(new InputStreamReader(bain, "UTF-8"))
+    jsonFactory.createParser(record.getBytes, 0, record.getLength)
   }
 
   def inputStream(jsonFactory: JsonFactory, record: InputStream): JsonParser = {
-    jsonFactory.createParser(new InputStreamReader(record, "UTF-8"))
+    jsonFactory.createParser(record)
   }
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/95e4b491/sql/hive/src/test/scala/org/apache/spark/sql/sources/JsonHadoopFsRelationSuite.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/sources/JsonHadoopFsRelationSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/sources/JsonHadoopFsRelationSuite.scala
index 27f398e..49be304 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/sources/JsonHadoopFsRelationSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/sources/JsonHadoopFsRelationSuite.scala
@@ -28,8 +28,6 @@ import org.apache.spark.sql.types._
 class JsonHadoopFsRelationSuite extends HadoopFsRelationTest {
   override val dataSourceName: String = "json"
 
-  private val badJson = "\u0000\u0000\u0000A\u0001AAA"
-
   // JSON does not write data of NullType and does not play well with BinaryType.
   override protected def supportsDataType(dataType: DataType): Boolean = dataType match {
     case _: NullType => false
@@ -107,36 +105,4 @@ class JsonHadoopFsRelationSuite extends HadoopFsRelationTest {
       )
     }
   }
-
-  test("invalid json with leading nulls - from file (multiLine=true)") {
-    import testImplicits._
-    withTempDir { tempDir =>
-      val path = tempDir.getAbsolutePath
-      Seq(badJson, """{"a":1}""").toDS().write.mode("overwrite").text(path)
-      val expected = s"""$badJson\n{"a":1}\n"""
-      val schema = new StructType().add("a", IntegerType).add("_corrupt_record", StringType)
-      val df =
-        spark.read.format(dataSourceName).option("multiLine", true).schema(schema).load(path)
-      checkAnswer(df, Row(null, expected))
-    }
-  }
-
-  test("invalid json with leading nulls - from file (multiLine=false)") {
-    import testImplicits._
-    withTempDir { tempDir =>
-      val path = tempDir.getAbsolutePath
-      Seq(badJson, """{"a":1}""").toDS().write.mode("overwrite").text(path)
-      val schema = new StructType().add("a", IntegerType).add("_corrupt_record", StringType)
-      val df =
-        spark.read.format(dataSourceName).option("multiLine", false).schema(schema).load(path)
-      checkAnswer(df, Seq(Row(1, null), Row(null, badJson)))
-    }
-  }
-
-  test("invalid json with leading nulls - from dataset") {
-    import testImplicits._
-    checkAnswer(
-      spark.read.json(Seq(badJson).toDS()),
-      Row(badJson))
-  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org