You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ma...@apache.org on 2022/11/07 11:13:34 UTC
[spark] branch master updated: [SPARK-40948][SQL] Introduce new error class: PATH_NOT_FOUND
This is an automated email from the ASF dual-hosted git repository.
maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 57d49255676 [SPARK-40948][SQL] Introduce new error class: PATH_NOT_FOUND
57d49255676 is described below
commit 57d492556768eb341f525ce7eb5c934089fa9e7e
Author: itholic <ha...@databricks.com>
AuthorDate: Mon Nov 7 14:13:13 2022 +0300
[SPARK-40948][SQL] Introduce new error class: PATH_NOT_FOUND
### What changes were proposed in this pull request?
This PR proposes to introduce new error class `DATA_PATH_NOT_EXIST`, by updating the existing legacy temp error class `_LEGACY_ERROR_TEMP_1130 `.
### Why are the changes needed?
We should use appropriate error class name that matches the error message.
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
The existing CI should pass.
Closes #38422 from itholic/LEGACY_MIGRATE.
Authored-by: itholic <ha...@databricks.com>
Signed-off-by: Max Gekk <ma...@gmail.com>
---
R/pkg/tests/fulltests/test_sparkSQL.R | 19 +++++++---
core/src/main/resources/error/error-classes.json | 10 ++---
.../spark/sql/errors/QueryCompilationErrors.scala | 2 +-
.../org/apache/spark/sql/DataFrameSuite.scala | 44 ++++++++++++++--------
.../execution/datasources/DataSourceSuite.scala | 28 ++++++++------
5 files changed, 64 insertions(+), 39 deletions(-)
diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R
index 534ec07abac..91a2c51660b 100644
--- a/R/pkg/tests/fulltests/test_sparkSQL.R
+++ b/R/pkg/tests/fulltests/test_sparkSQL.R
@@ -3990,12 +3990,21 @@ test_that("Call DataFrameWriter.load() API in Java without path and check argume
expect_error(read.df(source = "json"),
paste("Error in load : analysis error - Unable to infer schema for JSON.",
"It must be specified manually"))
- expect_error(read.df("arbitrary_path"), "Error in load : analysis error - Path does not exist")
- expect_error(read.json("arbitrary_path"), "Error in json : analysis error - Path does not exist")
- expect_error(read.text("arbitrary_path"), "Error in text : analysis error - Path does not exist")
- expect_error(read.orc("arbitrary_path"), "Error in orc : analysis error - Path does not exist")
+ expect_error(read.df("arbitrary_path"),
+ paste("Error in load : analysis error - [PATH_NOT_FOUND] Path does not exist:",
+ "file:/__w/spark/spark/arbitrary_path."), fixed = TRUE)
+ expect_error(read.json("arbitrary_path"),
+ paste("Error in json : analysis error - [PATH_NOT_FOUND] Path does not exist:",
+ "file:/__w/spark/spark/arbitrary_path."), fixed = TRUE)
+ expect_error(read.text("arbitrary_path"),
+ paste("Error in text : analysis error - [PATH_NOT_FOUND] Path does not exist:",
+ "file:/__w/spark/spark/arbitrary_path."), fixed = TRUE)
+ expect_error(read.orc("arbitrary_path"),
+ paste("Error in orc : analysis error - [PATH_NOT_FOUND] Path does not exist:",
+ "file:/__w/spark/spark/arbitrary_path."), fixed = TRUE)
expect_error(read.parquet("arbitrary_path"),
- "Error in parquet : analysis error - Path does not exist")
+ paste("Error in parquet : analysis error - [PATH_NOT_FOUND] Path does not exist:",
+ "file:/__w/spark/spark/arbitrary_path."), fixed = TRUE)
# Arguments checking in R side.
expect_error(read.df(path = c(3)),
diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json
index ceb3e4ed5b1..73652a1ca78 100644
--- a/core/src/main/resources/error/error-classes.json
+++ b/core/src/main/resources/error/error-classes.json
@@ -806,6 +806,11 @@
],
"sqlState" : "42000"
},
+ "PATH_NOT_FOUND" : {
+ "message" : [
+ "Path does not exist: <path>."
+ ]
+ },
"PIVOT_VALUE_DATA_TYPE_MISMATCH" : {
"message" : [
"Invalid pivot value '<value>': value data type <valueType> does not match pivot column data type <pivotType>"
@@ -2226,11 +2231,6 @@
"Unable to infer schema for <format>. It must be specified manually."
]
},
- "_LEGACY_ERROR_TEMP_1130" : {
- "message" : [
- "Path does not exist: <path>."
- ]
- },
"_LEGACY_ERROR_TEMP_1131" : {
"message" : [
"Data source <className> does not support <outputMode> output mode."
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
index b56e1957f77..4056052c81e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
@@ -1388,7 +1388,7 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase {
def dataPathNotExistError(path: String): Throwable = {
new AnalysisException(
- errorClass = "_LEGACY_ERROR_TEMP_1130",
+ errorClass = "PATH_NOT_FOUND",
messageParameters = Map("path" -> path))
}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
index fabd0a4e1a9..d11e86b7d63 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -2322,15 +2322,21 @@ class DataFrameSuite extends QueryTest
test("SPARK-13774: Check error message for non existent path without globbed paths") {
val uuid = UUID.randomUUID().toString
val baseDir = Utils.createTempDir()
+ val filePath1 = new File(baseDir, "file").getAbsolutePath
+ val filePath2 = new File(baseDir, "file2").getAbsolutePath
+ val filePath3 = new File(uuid, "file3").getAbsolutePath
try {
- val e = intercept[AnalysisException] {
- spark.read.format("csv").load(
- new File(baseDir, "file").getAbsolutePath,
- new File(baseDir, "file2").getAbsolutePath,
- new File(uuid, "file3").getAbsolutePath,
- uuid).rdd
- }
- assert(e.getMessage.startsWith("Path does not exist"))
+ checkError(
+ exception = intercept[AnalysisException] {
+ spark.read.format("csv").load(
+ filePath1,
+ filePath2,
+ filePath3,
+ uuid).rdd
+ },
+ errorClass = "PATH_NOT_FOUND",
+ parameters = Map("path" -> s"file:$filePath1")
+ )
} finally {
}
@@ -2341,20 +2347,26 @@ class DataFrameSuite extends QueryTest
// Non-existent initial path component:
val nonExistentBasePath = "/" + UUID.randomUUID().toString
assert(!new File(nonExistentBasePath).exists())
- val e = intercept[AnalysisException] {
- spark.read.format("text").load(s"$nonExistentBasePath/*")
- }
- assert(e.getMessage.startsWith("Path does not exist"))
+ checkError(
+ exception = intercept[AnalysisException] {
+ spark.read.format("text").load(s"$nonExistentBasePath/*")
+ },
+ errorClass = "PATH_NOT_FOUND",
+ parameters = Map("path" -> s"file:$nonExistentBasePath/*")
+ )
// Existent initial path component, but no matching files:
val baseDir = Utils.createTempDir()
val childDir = Utils.createTempDir(baseDir.getAbsolutePath)
assert(childDir.exists())
try {
- val e1 = intercept[AnalysisException] {
- spark.read.json(s"${baseDir.getAbsolutePath}/*/*-xyz.json").rdd
- }
- assert(e1.getMessage.startsWith("Path does not exist"))
+ checkError(
+ exception = intercept[AnalysisException] {
+ spark.read.json(s"${baseDir.getAbsolutePath}/*/*-xyz.json").rdd
+ },
+ errorClass = "PATH_NOT_FOUND",
+ parameters = Map("path" -> s"file:${baseDir.getAbsolutePath}/*/*-xyz.json")
+ )
} finally {
Utils.deleteRecursively(baseDir)
}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceSuite.scala
index 3034d4fe67c..2832114d506 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceSuite.scala
@@ -109,18 +109,22 @@ class DataSourceSuite extends SharedSparkSession with PrivateMethodTester {
}
test("test non existent paths") {
- assertThrows[AnalysisException](
- DataSource.checkAndGlobPathIfNecessary(
- Seq(
- path1.toString,
- path2.toString,
- nonExistentPath.toString
- ),
- hadoopConf,
- checkEmptyGlobPath = true,
- checkFilesExist = true,
- enableGlobbing = true
- )
+ checkError(
+ exception = intercept[AnalysisException](
+ DataSource.checkAndGlobPathIfNecessary(
+ Seq(
+ path1.toString,
+ path2.toString,
+ nonExistentPath.toString
+ ),
+ hadoopConf,
+ checkEmptyGlobPath = true,
+ checkFilesExist = true,
+ enableGlobbing = true
+ )
+ ),
+ errorClass = "PATH_NOT_FOUND",
+ parameters = Map("path" -> "mockFs://mockFs/nonexistentpath")
)
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org