You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ma...@apache.org on 2023/06/12 20:54:19 UTC
[spark] branch master updated: [SPARK-42298][SQL] Assign name to _LEGACY_ERROR_TEMP_2132
This is an automated email from the ASF dual-hosted git repository.
maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new c41be4ec0ad [SPARK-42298][SQL] Assign name to _LEGACY_ERROR_TEMP_2132
c41be4ec0ad is described below
commit c41be4ec0ad97f587a0581d5583b2ca9975b2a0f
Author: Hisoka <fa...@qq.com>
AuthorDate: Mon Jun 12 23:54:02 2023 +0300
[SPARK-42298][SQL] Assign name to _LEGACY_ERROR_TEMP_2132
### What changes were proposed in this pull request?
This PR proposes to assign name to _LEGACY_ERROR_TEMP_2132, "CANNOT_PARSE_JSON_ARRAYS_AS_STRUCTS".
### Why are the changes needed?
Assign proper name to LEGACY_ERROR_TEMP
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
./build/sbt "testOnly org.apache.spark.sql.errors.QueryExecutionErrorsSuite"
Closes #40632 from Hisoka-X/_LEGACY_ERROR_TEMP_2132.
Lead-authored-by: Hisoka <fa...@qq.com>
Co-authored-by: Jia Fan <fa...@qq.com>
Signed-off-by: Max Gekk <ma...@gmail.com>
---
core/src/main/resources/error/error-classes.json | 20 ++++++++++++++------
.../spark/sql/catalyst/json/JacksonParser.scala | 2 +-
.../spark/sql/catalyst/util/BadRecordException.scala | 5 +++++
.../spark/sql/catalyst/util/FailureSafeParser.scala | 10 ++++++++--
.../spark/sql/errors/QueryExecutionErrors.scala | 10 ++++++----
.../catalyst/expressions/JsonExpressionsSuite.scala | 2 +-
.../org/apache/spark/sql/CsvFunctionsSuite.scala | 2 +-
.../org/apache/spark/sql/JsonFunctionsSuite.scala | 12 ++++++------
.../spark/sql/errors/QueryExecutionErrorsSuite.scala | 15 +++++++++++++++
.../sql/execution/datasources/csv/CSVSuite.scala | 2 +-
.../sql/execution/datasources/json/JsonSuite.scala | 4 ++--
.../spark/sql/hive/thriftserver/CliSuite.scala | 4 ++--
.../ThriftServerWithSparkContextSuite.scala | 4 ++--
13 files changed, 64 insertions(+), 28 deletions(-)
diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json
index a12a8000870..183ea31a7cb 100644
--- a/core/src/main/resources/error/error-classes.json
+++ b/core/src/main/resources/error/error-classes.json
@@ -1542,7 +1542,20 @@
"message" : [
"Malformed records are detected in record parsing: <badRecord>.",
"Parse Mode: <failFastMode>. To process malformed records as null result, try setting the option 'mode' as 'PERMISSIVE'."
- ]
+ ],
+ "subClass" : {
+ "CANNOT_PARSE_JSON_ARRAYS_AS_STRUCTS" : {
+ "message" : [
+ "Parsing JSON arrays as structs is forbidden."
+ ]
+ },
+ "WITHOUT_SUGGESTION" : {
+ "message" : [
+ ""
+ ]
+ }
+ },
+ "sqlState" : "22023"
},
"MISSING_AGGREGATION" : {
"message" : [
@@ -4692,11 +4705,6 @@
"Exception when registering StreamingQueryListener."
]
},
- "_LEGACY_ERROR_TEMP_2132" : {
- "message" : [
- "Parsing JSON arrays as structs is forbidden."
- ]
- },
"_LEGACY_ERROR_TEMP_2133" : {
"message" : [
"Cannot parse field name <fieldName>, field value <fieldValue>, [<token>] as target spark data type [<dataType>]."
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala
index bf07d65caa0..48ee50938cd 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala
@@ -144,7 +144,7 @@ class JacksonParser(
array.toArray[InternalRow](schema)
}
case START_ARRAY =>
- throw QueryExecutionErrors.cannotParseJsonArraysAsStructsError()
+ throw JsonArraysAsStructsException()
}
}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/BadRecordException.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/BadRecordException.scala
index 67defe78a6c..cfbe9da6ec5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/BadRecordException.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/BadRecordException.scala
@@ -41,3 +41,8 @@ case class BadRecordException(
@transient record: () => UTF8String,
@transient partialResult: () => Option[InternalRow],
cause: Throwable) extends Exception(cause)
+
+/**
+ * Exception thrown when the underlying parser parses a JSON array as a struct.
+ */
+case class JsonArraysAsStructsException() extends RuntimeException()
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/FailureSafeParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/FailureSafeParser.scala
index fcdcd21b6dc..84fad1bb477 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/FailureSafeParser.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/FailureSafeParser.scala
@@ -65,8 +65,14 @@ class FailureSafeParser[IN](
case DropMalformedMode =>
Iterator.empty
case FailFastMode =>
- throw QueryExecutionErrors.malformedRecordsDetectedInRecordParsingError(
- toResultRow(e.partialResult(), e.record).toString, e)
+ e.getCause match {
+ case _: JsonArraysAsStructsException =>
+ // SPARK-42298 we recreate the exception here to make sure the error message
+ // have the record content.
+ throw QueryExecutionErrors.cannotParseJsonArraysAsStructsError(e.record().toString)
+ case _ => throw QueryExecutionErrors.malformedRecordsDetectedInRecordParsingError(
+ toResultRow(e.partialResult(), e.record).toString, e)
+ }
}
}
}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
index 68243233216..498723c1491 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
@@ -1416,10 +1416,12 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase {
cause = null)
}
- def cannotParseJsonArraysAsStructsError(): SparkRuntimeException = {
+ def cannotParseJsonArraysAsStructsError(recordStr: String): SparkRuntimeException = {
new SparkRuntimeException(
- errorClass = "_LEGACY_ERROR_TEMP_2132",
- messageParameters = Map.empty)
+ errorClass = "MALFORMED_RECORD_IN_PARSING.CANNOT_PARSE_JSON_ARRAYS_AS_STRUCTS",
+ messageParameters = Map(
+ "badRecord" -> recordStr,
+ "failFastMode" -> FailFastMode.name))
}
def cannotParseStringAsDataTypeError(parser: JsonParser, token: JsonToken, dataType: DataType)
@@ -1771,7 +1773,7 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase {
def malformedRecordsDetectedInRecordParsingError(
badRecord: String, e: BadRecordException): Throwable = {
new SparkException(
- errorClass = "MALFORMED_RECORD_IN_PARSING",
+ errorClass = "MALFORMED_RECORD_IN_PARSING.WITHOUT_SUGGESTION",
messageParameters = Map(
"badRecord" -> badRecord,
"failFastMode" -> FailFastMode.name),
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala
index a1db7e4c3ab..94e40b98065 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala
@@ -448,7 +448,7 @@ class JsonExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper with
}.getCause
checkError(
exception = exception.asInstanceOf[SparkException],
- errorClass = "MALFORMED_RECORD_IN_PARSING",
+ errorClass = "MALFORMED_RECORD_IN_PARSING.WITHOUT_SUGGESTION",
parameters = Map("badRecord" -> "[null]", "failFastMode" -> "FAILFAST")
)
}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CsvFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CsvFunctionsSuite.scala
index 67ba5511263..77b9b380852 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/CsvFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/CsvFunctionsSuite.scala
@@ -307,7 +307,7 @@ class CsvFunctionsSuite extends QueryTest with SharedSparkSession {
}.getCause
checkError(
exception = exception1.asInstanceOf[SparkException],
- errorClass = "MALFORMED_RECORD_IN_PARSING",
+ errorClass = "MALFORMED_RECORD_IN_PARSING.WITHOUT_SUGGESTION",
parameters = Map("badRecord" -> "[null,null,\"]", "failFastMode" -> "FAILFAST")
)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
index 34f9ea2c731..d2ffea07921 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
@@ -837,7 +837,7 @@ class JsonFunctionsSuite extends QueryTest with SharedSparkSession {
}.getCause
checkError(
exception = exception1.asInstanceOf[SparkException],
- errorClass = "MALFORMED_RECORD_IN_PARSING",
+ errorClass = "MALFORMED_RECORD_IN_PARSING.WITHOUT_SUGGESTION",
parameters = Map(
"badRecord" -> "[null,null,{\"a\" 1, \"b\": 11}]",
"failFastMode" -> "FAILFAST")
@@ -872,7 +872,7 @@ class JsonFunctionsSuite extends QueryTest with SharedSparkSession {
checkError(
exception = exception.asInstanceOf[SparkException],
- errorClass = "MALFORMED_RECORD_IN_PARSING",
+ errorClass = "MALFORMED_RECORD_IN_PARSING.WITHOUT_SUGGESTION",
parameters = Map(
"badRecord" -> "[null,11,{\"a\": \"1\", \"b\": 11}]",
"failFastMode" -> "FAILFAST")
@@ -1205,7 +1205,7 @@ class JsonFunctionsSuite extends QueryTest with SharedSparkSession {
}.getCause
checkError(
exception = exception1.asInstanceOf[SparkException],
- errorClass = "MALFORMED_RECORD_IN_PARSING",
+ errorClass = "MALFORMED_RECORD_IN_PARSING.WITHOUT_SUGGESTION",
parameters = Map(
"badRecord" -> "[null,null]",
"failFastMode" -> "FAILFAST")
@@ -1216,7 +1216,7 @@ class JsonFunctionsSuite extends QueryTest with SharedSparkSession {
}.getCause
checkError(
exception = exception2.asInstanceOf[SparkException],
- errorClass = "MALFORMED_RECORD_IN_PARSING",
+ errorClass = "MALFORMED_RECORD_IN_PARSING.WITHOUT_SUGGESTION",
parameters = Map(
"badRecord" -> "[null,null]",
"failFastMode" -> "FAILFAST")
@@ -1239,7 +1239,7 @@ class JsonFunctionsSuite extends QueryTest with SharedSparkSession {
}.getCause
checkError(
exception = exception1.asInstanceOf[SparkException],
- errorClass = "MALFORMED_RECORD_IN_PARSING",
+ errorClass = "MALFORMED_RECORD_IN_PARSING.WITHOUT_SUGGESTION",
parameters = Map(
"badRecord" -> "[null]",
"failFastMode" -> "FAILFAST")
@@ -1250,7 +1250,7 @@ class JsonFunctionsSuite extends QueryTest with SharedSparkSession {
}.getCause
checkError(
exception = exception2.asInstanceOf[SparkException],
- errorClass = "MALFORMED_RECORD_IN_PARSING",
+ errorClass = "MALFORMED_RECORD_IN_PARSING.WITHOUT_SUGGESTION",
parameters = Map(
"badRecord" -> "[null]",
"failFastMode" -> "FAILFAST")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala
index 069fce237f2..73a3e088894 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala
@@ -381,6 +381,21 @@ class QueryExecutionErrorsSuite
sqlState = "22018")
}
+ test("CANNOT_PARSE_JSON_ARRAYS_AS_STRUCTS: parse json arrays as structs") {
+ val jsonStr = """[{"a":1, "b":0.8}]"""
+ checkError(
+ exception = intercept[SparkRuntimeException] {
+ sql(s"SELECT from_json('$jsonStr', 'a INT, b DOUBLE', map('mode','FAILFAST') )")
+ .collect()
+ },
+ errorClass = "MALFORMED_RECORD_IN_PARSING.CANNOT_PARSE_JSON_ARRAYS_AS_STRUCTS",
+ parameters = Map(
+ "badRecord" -> jsonStr,
+ "failFastMode" -> "FAILFAST"
+ ),
+ sqlState = "22023")
+ }
+
test("FAILED_EXECUTE_UDF: execute user defined function") {
val luckyCharOfWord = udf { (word: String, index: Int) => {
word.substring(index, index + 1)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
index 814805a7272..aba94d903eb 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
@@ -3218,7 +3218,7 @@ class CSVv1Suite extends CSVSuite {
checkError(
exception = exception.getCause.asInstanceOf[SparkException],
- errorClass = "MALFORMED_RECORD_IN_PARSING",
+ errorClass = "MALFORMED_RECORD_IN_PARSING.WITHOUT_SUGGESTION",
parameters = Map(
"badRecord" -> "[2015,Chevy,Volt,null,null]",
"failFastMode" -> "FAILFAST")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
index 1f9a2da5dd7..cb7bab2ddea 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
@@ -1076,7 +1076,7 @@ abstract class JsonSuite
}.getCause
checkError(
exception = exceptionTwo.asInstanceOf[SparkException],
- errorClass = "MALFORMED_RECORD_IN_PARSING",
+ errorClass = "MALFORMED_RECORD_IN_PARSING.WITHOUT_SUGGESTION",
parameters = Map(
"badRecord" -> "[null]",
"failFastMode" -> "FAILFAST")
@@ -1997,7 +1997,7 @@ abstract class JsonSuite
}.getCause
checkError(
exception = exceptionTwo.asInstanceOf[SparkException],
- errorClass = "MALFORMED_RECORD_IN_PARSING",
+ errorClass = "MALFORMED_RECORD_IN_PARSING.WITHOUT_SUGGESTION",
parameters = Map(
"badRecord" -> "[null]",
"failFastMode" -> "FAILFAST")
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
index 651c6b7aafb..90f371c7ec7 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
@@ -563,7 +563,7 @@ class CliSuite extends SparkFunSuite {
extraArgs = Seq("--hiveconf", "hive.session.silent=false",
"-e", "select from_json('a', 'a INT', map('mode', 'FAILFAST'));"),
errorResponses = Seq("JsonParseException"))(
- ("", "SparkException: [MALFORMED_RECORD_IN_PARSING]"),
+ ("", "SparkException: [MALFORMED_RECORD_IN_PARSING.WITHOUT_SUGGESTION]"),
("", "JsonParseException: Unrecognized token 'a'"))
// If it is in silent mode, will print the error message only
runCliWithin(
@@ -571,7 +571,7 @@ class CliSuite extends SparkFunSuite {
extraArgs = Seq("--conf", "spark.hive.session.silent=true",
"-e", "select from_json('a', 'a INT', map('mode', 'FAILFAST'));"),
errorResponses = Seq("SparkException"))(
- ("", "SparkException: [MALFORMED_RECORD_IN_PARSING]"))
+ ("", "SparkException: [MALFORMED_RECORD_IN_PARSING.WITHOUT_SUGGESTION]"))
}
test("SPARK-30808: use Java 8 time API in Thrift SQL CLI by default") {
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerWithSparkContextSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerWithSparkContextSuite.scala
index aef9dc69656..e59fcb46dc9 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerWithSparkContextSuite.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerWithSparkContextSuite.scala
@@ -69,7 +69,7 @@ trait ThriftServerWithSparkContextSuite extends SharedThriftServer {
}
assert(e.getMessage.contains("JsonParseException: Unrecognized token 'a'"))
assert(!e.getMessage.contains(
- "SparkException: [MALFORMED_RECORD_IN_PARSING]"))
+ "SparkException: [MALFORMED_RECORD_IN_PARSING.WITHOUT_SUGGESTION]"))
}
withJdbcStatement() { statement =>
@@ -78,7 +78,7 @@ trait ThriftServerWithSparkContextSuite extends SharedThriftServer {
}
assert(e.getMessage.contains("JsonParseException: Unrecognized token 'a'"))
assert(e.getMessage.contains(
- "SparkException: [MALFORMED_RECORD_IN_PARSING]"))
+ "SparkException: [MALFORMED_RECORD_IN_PARSING.WITHOUT_SUGGESTION]"))
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org