You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ma...@apache.org on 2022/11/09 09:33:29 UTC
[spark] branch master updated: [SPARK-41009][SQL] Rename the error class `_LEGACY_ERROR_TEMP_1070` to `LOCATION_ALREADY_EXISTS`
This is an automated email from the ASF dual-hosted git repository.
maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 3e8191f7267 [SPARK-41009][SQL] Rename the error class `_LEGACY_ERROR_TEMP_1070` to `LOCATION_ALREADY_EXISTS`
3e8191f7267 is described below
commit 3e8191f726721bf74c8dbcb3ea73a216f6bf0517
Author: Max Gekk <ma...@gmail.com>
AuthorDate: Wed Nov 9 12:33:13 2022 +0300
[SPARK-41009][SQL] Rename the error class `_LEGACY_ERROR_TEMP_1070` to `LOCATION_ALREADY_EXISTS`
### What changes were proposed in this pull request?
In the PR, I propose to assign the proper name `LOCATION_ALREADY_EXISTS ` to the legacy error class `_LEGACY_ERROR_TEMP_1070 `, and modify test suite to use `checkError()` which checks the error class name, context and etc.
### Why are the changes needed?
Proper name improves user experience w/ Spark SQL.
### Does this PR introduce _any_ user-facing change?
Yes, the PR changes an user-facing error message.
### How was this patch tested?
By running the modified test suites:
```
$ build/sbt "core/testOnly *SparkThrowableSuite"
$ build/sbt -Phive-2.3 -Phive-thriftserver "test:testOnly *AlterTableRenameSuite"
$ build/sbt -Phive-2.3 -Phive-thriftserver "test:testOnly *HiveCatalogedDDLSuite"
```
Closes #38490 from MaxGekk/location-already-exists.
Authored-by: Max Gekk <ma...@gmail.com>
Signed-off-by: Max Gekk <ma...@gmail.com>
---
core/src/main/resources/error/error-classes.json | 10 +++---
.../sql/catalyst/catalog/SessionCatalog.scala | 8 ++---
.../spark/sql/errors/QueryCompilationErrors.scala | 10 ------
.../spark/sql/errors/QueryExecutionErrors.scala | 8 +++++
.../spark/sql/execution/command/DDLSuite.scala | 42 ++++++++++++----------
.../command/v1/AlterTableRenameSuite.scala | 17 +++++----
6 files changed, 51 insertions(+), 44 deletions(-)
diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json
index 71703e7efd9..9c914b86bb1 100644
--- a/core/src/main/resources/error/error-classes.json
+++ b/core/src/main/resources/error/error-classes.json
@@ -669,6 +669,11 @@
}
}
},
+ "LOCATION_ALREADY_EXISTS" : {
+ "message" : [
+ "Cannot name the managed table as <identifier>, as its associated location <location> already exists. Please pick a different table name, or remove the existing location first."
+ ]
+ },
"MALFORMED_PROTOBUF_MESSAGE" : {
"message" : [
"Malformed Protobuf messages are detected in message deserialization. Parse Mode: <failFastMode>. To process malformed protobuf message as null result, try setting the option 'mode' as 'PERMISSIVE'."
@@ -1949,11 +1954,6 @@
"CREATE EXTERNAL TABLE must be accompanied by LOCATION."
]
},
- "_LEGACY_ERROR_TEMP_1070" : {
- "message" : [
- "Can not <methodName> the managed table('<tableIdentifier>'). The associated location('<tableLocation>') already exists."
- ]
- },
"_LEGACY_ERROR_TEMP_1071" : {
"message" : [
"Some existing schema fields (<nonExistentColumnNames>) are not present in the new schema. We don't support dropping columns yet."
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index bf712f9681e..06214613299 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -40,7 +40,7 @@ import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project, Subque
import org.apache.spark.sql.catalyst.trees.{CurrentOrigin, Origin}
import org.apache.spark.sql.catalyst.util.{CharVarcharUtils, StringUtils}
import org.apache.spark.sql.connector.catalog.CatalogManager
-import org.apache.spark.sql.errors.QueryCompilationErrors
+import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.internal.StaticSQLConf.GLOBAL_TEMP_DATABASE
import org.apache.spark.sql.types.StructType
@@ -411,8 +411,7 @@ class SessionCatalog(
val fs = tableLocation.getFileSystem(hadoopConf)
if (fs.exists(tableLocation) && fs.listStatus(tableLocation).nonEmpty) {
- throw QueryCompilationErrors.cannotOperateManagedTableWithExistingLocationError(
- "create", table.identifier, tableLocation)
+ throw QueryExecutionErrors.locationAlreadyExists(table.identifier, tableLocation)
}
}
}
@@ -1912,8 +1911,7 @@ class SessionCatalog(
val newTableLocation = new Path(new Path(databaseLocation), format(newName.table))
val fs = newTableLocation.getFileSystem(hadoopConf)
if (fs.exists(newTableLocation)) {
- throw QueryCompilationErrors.cannotOperateManagedTableWithExistingLocationError(
- "rename", oldName, newTableLocation)
+ throw QueryExecutionErrors.locationAlreadyExists(newName, newTableLocation)
}
}
}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
index 67ceafbf03d..76fe951c0ec 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
@@ -851,16 +851,6 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase {
messageParameters = Map.empty)
}
- def cannotOperateManagedTableWithExistingLocationError(
- methodName: String, tableIdentifier: TableIdentifier, tableLocation: Path): Throwable = {
- new AnalysisException(
- errorClass = "_LEGACY_ERROR_TEMP_1070",
- messageParameters = Map(
- "methodName" -> methodName,
- "tableIdentifier" -> tableIdentifier.toString,
- "tableLocation" -> tableLocation.toString))
- }
-
def dropNonExistentColumnsNotSupportedError(
nonExistentColumnNames: Seq[String]): Throwable = {
new AnalysisException(
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
index aad9a9ba51e..73664e64c22 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
@@ -2812,4 +2812,12 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase {
"failFastMode" -> FailFastMode.name),
cause = e)
}
+
+ def locationAlreadyExists(tableId: TableIdentifier, location: Path): Throwable = {
+ new SparkRuntimeException(
+ errorClass = "LOCATION_ALREADY_EXISTS",
+ messageParameters = Map(
+ "location" -> toSQLValue(location.toString, StringType),
+ "identifier" -> toSQLId(tableId.nameParts)))
+ }
}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
index 883672cc112..5aa36471770 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
@@ -24,7 +24,7 @@ import java.util.Locale
import org.apache.hadoop.fs.{Path, RawLocalFileSystem}
import org.apache.hadoop.fs.permission.{AclEntry, AclStatus}
-import org.apache.spark.{SparkException, SparkFiles}
+import org.apache.spark.{SparkException, SparkFiles, SparkRuntimeException}
import org.apache.spark.internal.config
import org.apache.spark.sql.{AnalysisException, QueryTest, Row, SaveMode}
import org.apache.spark.sql.catalyst.{FunctionIdentifier, QualifiedTableName, TableIdentifier}
@@ -366,26 +366,32 @@ abstract class DDLSuite extends QueryTest with DDLSuiteBase {
withEmptyDirInTablePath("tab1") { tableLoc =>
val hiddenGarbageFile = new File(tableLoc.getCanonicalPath, ".garbage")
hiddenGarbageFile.createNewFile()
- val exMsgWithDefaultDB =
- s"Can not create the managed table('`$SESSION_CATALOG_NAME`.`default`.`tab1`'). " +
- "The associated location"
- var ex = intercept[AnalysisException] {
- sql(s"CREATE TABLE tab1 USING ${dataSource} AS SELECT 1, 'a'")
- }.getMessage
- assert(ex.contains(exMsgWithDefaultDB))
-
- ex = intercept[AnalysisException] {
- sql(s"CREATE TABLE tab1 (col1 int, col2 string) USING ${dataSource}")
- }.getMessage
- assert(ex.contains(exMsgWithDefaultDB))
+ val expectedLoc = s"'${hiddenGarbageFile.getParentFile.toURI.toString.stripSuffix("/")}'"
+ Seq(
+ s"CREATE TABLE tab1 USING $dataSource AS SELECT 1, 'a'",
+ s"CREATE TABLE tab1 (col1 int, col2 string) USING $dataSource"
+ ).foreach { createStmt =>
+ checkError(
+ exception = intercept[SparkRuntimeException] {
+ sql(createStmt)
+ },
+ errorClass = "LOCATION_ALREADY_EXISTS",
+ parameters = Map(
+ "location" -> expectedLoc,
+ "identifier" -> s"`$SESSION_CATALOG_NAME`.`default`.`tab1`"))
+ }
// Always check location of managed table, with or without (IF NOT EXISTS)
withTable("tab2") {
- sql(s"CREATE TABLE tab2 (col1 int, col2 string) USING ${dataSource}")
- ex = intercept[AnalysisException] {
- sql(s"CREATE TABLE IF NOT EXISTS tab1 LIKE tab2")
- }.getMessage
- assert(ex.contains(exMsgWithDefaultDB))
+ sql(s"CREATE TABLE tab2 (col1 int, col2 string) USING $dataSource")
+ checkError(
+ exception = intercept[SparkRuntimeException] {
+ sql(s"CREATE TABLE IF NOT EXISTS tab1 LIKE tab2")
+ },
+ errorClass = "LOCATION_ALREADY_EXISTS",
+ parameters = Map(
+ "location" -> expectedLoc,
+ "identifier" -> s"`$SESSION_CATALOG_NAME`.`default`.`tab1`"))
}
}
}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableRenameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableRenameSuite.scala
index abc99db441d..3efd6d8a957 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableRenameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableRenameSuite.scala
@@ -17,7 +17,9 @@
package org.apache.spark.sql.execution.command.v1
+import org.apache.spark.SparkRuntimeException
import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.errors.QueryErrorsBase
import org.apache.spark.sql.execution.command
/**
@@ -28,7 +30,7 @@ import org.apache.spark.sql.execution.command
* - V1 In-Memory catalog: `org.apache.spark.sql.execution.command.v1.AlterTableRenameSuite`
* - V1 Hive External catalog: `org.apache.spark.sql.hive.execution.command.AlterTableRenameSuite`
*/
-trait AlterTableRenameSuiteBase extends command.AlterTableRenameSuiteBase {
+trait AlterTableRenameSuiteBase extends command.AlterTableRenameSuiteBase with QueryErrorsBase {
test("destination database is different") {
withNamespaceAndTable("dst_ns", "dst_tbl") { dst =>
withNamespace("src_ns") {
@@ -66,11 +68,14 @@ trait AlterTableRenameSuiteBase extends command.AlterTableRenameSuiteBase {
withTableDir(dst) { (fs, dst_dir) =>
sql(s"DROP TABLE $dst")
fs.mkdirs(dst_dir)
- val errMsg = intercept[AnalysisException] {
- sql(s"ALTER TABLE $src RENAME TO ns.dst_tbl")
- }.getMessage
- assert(errMsg.matches("Can not rename the managed table(.+). " +
- "The associated location(.+) already exists."))
+ checkError(
+ exception = intercept[SparkRuntimeException] {
+ sql(s"ALTER TABLE $src RENAME TO ns.dst_tbl")
+ },
+ errorClass = "LOCATION_ALREADY_EXISTS",
+ parameters = Map(
+ "location" -> s"'$dst_dir'",
+ "identifier" -> toSQLId(dst)))
}
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org