You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ma...@apache.org on 2022/11/23 05:38:39 UTC
[spark] branch master updated: [SPARK-40948][SQL][FOLLOWUP] Restore PATH_NOT_FOUND
This is an automated email from the ASF dual-hosted git repository.
maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 17816170316 [SPARK-40948][SQL][FOLLOWUP] Restore PATH_NOT_FOUND
17816170316 is described below
commit 178161703161ccf49b37baf9a667630865367950
Author: itholic <ha...@databricks.com>
AuthorDate: Wed Nov 23 08:38:20 2022 +0300
[SPARK-40948][SQL][FOLLOWUP] Restore PATH_NOT_FOUND
### What changes were proposed in this pull request?
The original PR to introduce the error class `PATH_NOT_FOUND` was reverted since it breaks the tests in different test env.
This PR proposes to restore it back.
### Why are the changes needed?
Restoring the reverted changes with proper fix.
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
The existing CI should pass.
Closes #38575 from itholic/SPARK-40948-followup.
Authored-by: itholic <ha...@databricks.com>
Signed-off-by: Max Gekk <ma...@gmail.com>
---
R/pkg/tests/fulltests/test_sparkSQL.R | 14 +++++---
core/src/main/resources/error/error-classes.json | 10 +++---
.../spark/sql/errors/QueryCompilationErrors.scala | 2 +-
.../org/apache/spark/sql/DataFrameSuite.scala | 37 ++++++++++++----------
.../execution/datasources/DataSourceSuite.scala | 28 +++++++++-------
5 files changed, 52 insertions(+), 39 deletions(-)
diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R
index 534ec07abac..d2b6220b2e7 100644
--- a/R/pkg/tests/fulltests/test_sparkSQL.R
+++ b/R/pkg/tests/fulltests/test_sparkSQL.R
@@ -3990,12 +3990,16 @@ test_that("Call DataFrameWriter.load() API in Java without path and check argume
expect_error(read.df(source = "json"),
paste("Error in load : analysis error - Unable to infer schema for JSON.",
"It must be specified manually"))
- expect_error(read.df("arbitrary_path"), "Error in load : analysis error - Path does not exist")
- expect_error(read.json("arbitrary_path"), "Error in json : analysis error - Path does not exist")
- expect_error(read.text("arbitrary_path"), "Error in text : analysis error - Path does not exist")
- expect_error(read.orc("arbitrary_path"), "Error in orc : analysis error - Path does not exist")
+ expect_error(read.df("arbitrary_path"),
+ "Error in load : analysis error - \\[PATH_NOT_FOUND\\].*")
+ expect_error(read.json("arbitrary_path"),
+ "Error in json : analysis error - \\[PATH_NOT_FOUND\\].*")
+ expect_error(read.text("arbitrary_path"),
+ "Error in text : analysis error - \\[PATH_NOT_FOUND\\].*")
+ expect_error(read.orc("arbitrary_path"),
+ "Error in orc : analysis error - \\[PATH_NOT_FOUND\\].*")
expect_error(read.parquet("arbitrary_path"),
- "Error in parquet : analysis error - Path does not exist")
+ "Error in parquet : analysis error - \\[PATH_NOT_FOUND\\].*")
# Arguments checking in R side.
expect_error(read.df(path = c(3)),
diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json
index 77d155bfc21..12c97c2108a 100644
--- a/core/src/main/resources/error/error-classes.json
+++ b/core/src/main/resources/error/error-classes.json
@@ -912,6 +912,11 @@
],
"sqlState" : "42000"
},
+ "PATH_NOT_FOUND" : {
+ "message" : [
+ "Path does not exist: <path>."
+ ]
+ },
"PIVOT_VALUE_DATA_TYPE_MISMATCH" : {
"message" : [
"Invalid pivot value '<value>': value data type <valueType> does not match pivot column data type <pivotType>"
@@ -2332,11 +2337,6 @@
"Unable to infer schema for <format>. It must be specified manually."
]
},
- "_LEGACY_ERROR_TEMP_1130" : {
- "message" : [
- "Path does not exist: <path>."
- ]
- },
"_LEGACY_ERROR_TEMP_1131" : {
"message" : [
"Data source <className> does not support <outputMode> output mode."
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
index 63c912c15a1..0f245597efd 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
@@ -1378,7 +1378,7 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase {
def dataPathNotExistError(path: String): Throwable = {
new AnalysisException(
- errorClass = "_LEGACY_ERROR_TEMP_1130",
+ errorClass = "PATH_NOT_FOUND",
messageParameters = Map("path" -> path))
}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
index aab68065319..589ee1bea27 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -2327,39 +2327,44 @@ class DataFrameSuite extends QueryTest
test("SPARK-13774: Check error message for non existent path without globbed paths") {
val uuid = UUID.randomUUID().toString
val baseDir = Utils.createTempDir()
- try {
- val e = intercept[AnalysisException] {
+ checkError(
+ exception = intercept[AnalysisException] {
spark.read.format("csv").load(
new File(baseDir, "file").getAbsolutePath,
new File(baseDir, "file2").getAbsolutePath,
new File(uuid, "file3").getAbsolutePath,
uuid).rdd
- }
- assert(e.getMessage.startsWith("Path does not exist"))
- } finally {
-
- }
-
+ },
+ errorClass = "PATH_NOT_FOUND",
+ parameters = Map("path" -> "file:.*"),
+ matchPVals = true
+ )
}
test("SPARK-13774: Check error message for not existent globbed paths") {
// Non-existent initial path component:
val nonExistentBasePath = "/" + UUID.randomUUID().toString
assert(!new File(nonExistentBasePath).exists())
- val e = intercept[AnalysisException] {
- spark.read.format("text").load(s"$nonExistentBasePath/*")
- }
- assert(e.getMessage.startsWith("Path does not exist"))
+ checkError(
+ exception = intercept[AnalysisException] {
+ spark.read.format("text").load(s"$nonExistentBasePath/*")
+ },
+ errorClass = "PATH_NOT_FOUND",
+ parameters = Map("path" -> s"file:$nonExistentBasePath/*")
+ )
// Existent initial path component, but no matching files:
val baseDir = Utils.createTempDir()
val childDir = Utils.createTempDir(baseDir.getAbsolutePath)
assert(childDir.exists())
try {
- val e1 = intercept[AnalysisException] {
- spark.read.json(s"${baseDir.getAbsolutePath}/*/*-xyz.json").rdd
- }
- assert(e1.getMessage.startsWith("Path does not exist"))
+ checkError(
+ exception = intercept[AnalysisException] {
+ spark.read.json(s"${baseDir.getAbsolutePath}/*/*-xyz.json").rdd
+ },
+ errorClass = "PATH_NOT_FOUND",
+ parameters = Map("path" -> s"file:${baseDir.getAbsolutePath}/*/*-xyz.json")
+ )
} finally {
Utils.deleteRecursively(baseDir)
}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceSuite.scala
index 3034d4fe67c..06e570cb016 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceSuite.scala
@@ -109,18 +109,22 @@ class DataSourceSuite extends SharedSparkSession with PrivateMethodTester {
}
test("test non existent paths") {
- assertThrows[AnalysisException](
- DataSource.checkAndGlobPathIfNecessary(
- Seq(
- path1.toString,
- path2.toString,
- nonExistentPath.toString
- ),
- hadoopConf,
- checkEmptyGlobPath = true,
- checkFilesExist = true,
- enableGlobbing = true
- )
+ checkError(
+ exception = intercept[AnalysisException](
+ DataSource.checkAndGlobPathIfNecessary(
+ Seq(
+ path1.toString,
+ path2.toString,
+ nonExistentPath.toString
+ ),
+ hadoopConf,
+ checkEmptyGlobPath = true,
+ checkFilesExist = true,
+ enableGlobbing = true
+ )
+ ),
+ errorClass = "PATH_NOT_FOUND",
+ parameters = Map("path" -> nonExistentPath.toString)
)
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org