You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2023/02/12 00:11:20 UTC

[spark] branch master updated: [SPARK-42310][SQL] Assign name to _LEGACY_ERROR_TEMP_1289

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 81111159976 [SPARK-42310][SQL] Assign name to _LEGACY_ERROR_TEMP_1289
81111159976 is described below

commit 8111115997659ad7b8854c2af77d4515a06b7407
Author: itholic <ha...@databricks.com>
AuthorDate: Sun Feb 12 09:11:07 2023 +0900

    [SPARK-42310][SQL] Assign name to _LEGACY_ERROR_TEMP_1289
    
    ### What changes were proposed in this pull request?
    
    This PR proposes to assign name to _LEGACY_ERROR_TEMP_1289, "INVALID_COLUMN_NAME".
    
    ### Why are the changes needed?
    
    We should assign proper name to _LEGACY_ERROR_TEMP_*
    
    ### Does this PR introduce _any_ user-facing change?
    
    No
    
    ### How was this patch tested?
    
    `./build/sbt "sql/testOnly org.apache.spark.sql.SQLQueryTestSuite*`
    
    Closes #39946 from itholic/LEGACY_1289.
    
    Authored-by: itholic <ha...@databricks.com>
    Signed-off-by: Hyukjin Kwon <gu...@apache.org>
---
 .../org/apache/spark/sql/avro/AvroSuite.scala      | 50 ++++++++++++++--------
 core/src/main/resources/error/error-classes.json   | 11 ++---
 .../spark/sql/errors/QueryCompilationErrors.scala  | 10 +++--
 .../execution/datasources/DataSourceUtils.scala    |  3 +-
 .../spark/sql/hive/execution/HiveDDLSuite.scala    | 41 ++++++++++--------
 5 files changed, 70 insertions(+), 45 deletions(-)

diff --git a/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala b/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala
index a913da7a172..debdf9b45cf 100644
--- a/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala
+++ b/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala
@@ -2219,14 +2219,18 @@ abstract class AvroSuite
       withView("v") {
         spark.range(1).createTempView("v")
         withTempDir { dir =>
-          val e = intercept[AnalysisException] {
-            sql(
-              s"""
-                 |CREATE TABLE test_ddl USING AVRO
-                 |LOCATION '${dir}'
-                 |AS SELECT ID, IF(ID=1,1,0) FROM v""".stripMargin)
-          }.getMessage
-          assert(e.contains("Column name \"(IF((ID = 1), 1, 0))\" contains invalid character(s)."))
+          checkError(
+            exception = intercept[AnalysisException] {
+              sql(
+                s"""
+                   |CREATE TABLE test_ddl USING AVRO
+                   |LOCATION '${dir}'
+                   |AS SELECT ID, IF(ID=1,1,0) FROM v""".stripMargin)
+            },
+            errorClass = "INVALID_COLUMN_NAME_AS_PATH",
+            parameters = Map(
+              "datasource" -> "AvroFileFormat", "columnName" -> "`(IF((ID = 1), 1, 0))`")
+          )
         }
 
         withTempDir { dir =>
@@ -2298,20 +2302,28 @@ class AvroV1Suite extends AvroSuite {
     withView("v") {
       spark.range(1).createTempView("v")
       withTempDir { dir =>
-        val e = intercept[AnalysisException] {
-          sql("SELECT ID, IF(ID=1,1,0) FROM v").write.mode(SaveMode.Overwrite)
-            .format("avro").save(dir.getCanonicalPath)
-        }.getMessage
-        assert(e.contains("Column name \"(IF((ID = 1), 1, 0))\" contains invalid character(s)."))
+        checkError(
+          exception = intercept[AnalysisException] {
+            sql("SELECT ID, IF(ID=1,1,0) FROM v").write.mode(SaveMode.Overwrite)
+              .format("avro").save(dir.getCanonicalPath)
+          },
+          errorClass = "INVALID_COLUMN_NAME_AS_PATH",
+          parameters = Map(
+            "datasource" -> "AvroFileFormat", "columnName" -> "`(IF((ID = 1), 1, 0))`")
+        )
       }
 
       withTempDir { dir =>
-        val e = intercept[AnalysisException] {
-          sql("SELECT NAMED_STRUCT('(IF((ID = 1), 1, 0))', IF(ID=1,ID,0)) AS col1 FROM v")
-            .write.mode(SaveMode.Overwrite)
-            .format("avro").save(dir.getCanonicalPath)
-        }.getMessage
-        assert(e.contains("Column name \"(IF((ID = 1), 1, 0))\" contains invalid character(s)."))
+        checkError(
+          exception = intercept[AnalysisException] {
+            sql("SELECT NAMED_STRUCT('(IF((ID = 1), 1, 0))', IF(ID=1,ID,0)) AS col1 FROM v")
+              .write.mode(SaveMode.Overwrite)
+              .format("avro").save(dir.getCanonicalPath)
+          },
+          errorClass = "INVALID_COLUMN_NAME_AS_PATH",
+          parameters = Map(
+            "datasource" -> "AvroFileFormat", "columnName" -> "`(IF((ID = 1), 1, 0))`")
+        )
       }
     }
   }
diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json
index b5f6da1e45d..20685622bc5 100644
--- a/core/src/main/resources/error/error-classes.json
+++ b/core/src/main/resources/error/error-classes.json
@@ -786,6 +786,12 @@
       "The expected format is ByteString, but was <unsupported> (<class>)."
     ]
   },
+  "INVALID_COLUMN_NAME_AS_PATH" : {
+    "message" : [
+      "The datasource <datasource> cannot save the column <columnName> because its name contains some characters that are not allowed in file paths. Please, use an alias to rename it."
+    ],
+    "sqlState" : "46121"
+  },
   "INVALID_COLUMN_OR_FIELD_DATA_TYPE" : {
     "message" : [
       "Column or field <name> is of type <type> while it's required to be <expectedType>."
@@ -3359,11 +3365,6 @@
       "Table or view '<name>' already exists. SaveMode: ErrorIfExists."
     ]
   },
-  "_LEGACY_ERROR_TEMP_1289" : {
-    "message" : [
-      "Column name \"<name>\" contains invalid character(s). Please use alias to rename it."
-    ]
-  },
   "_LEGACY_ERROR_TEMP_1290" : {
     "message" : [
       "Text data source supports only a single column, and you have <schemaSize> columns."
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
index 4b0e914f279..74dd5879061 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
@@ -2737,10 +2737,14 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase {
       messageParameters = Map("name" -> name))
   }
 
-  def columnNameContainsInvalidCharactersError(name: String): Throwable = {
+  def invalidColumnNameAsPathError(datasource: String, columnName: String): Throwable = {
     new AnalysisException(
-      errorClass = "_LEGACY_ERROR_TEMP_1289",
-      messageParameters = Map("name" -> name))
+      errorClass = "INVALID_COLUMN_NAME_AS_PATH",
+      messageParameters = Map(
+        "datasource" -> datasource,
+        "columnName" -> toSQLId(columnName)
+      )
+    )
   }
 
   def textDataSourceWithMultiColumnsError(schema: StructType): Throwable = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceUtils.scala
index 5eb422f80e2..cedca062074 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceUtils.scala
@@ -73,7 +73,8 @@ object DataSourceUtils extends PredicateHelper {
   def checkFieldNames(format: FileFormat, schema: StructType): Unit = {
     schema.foreach { field =>
       if (!format.supportFieldName(field.name)) {
-        throw QueryCompilationErrors.columnNameContainsInvalidCharactersError(field.name)
+        throw QueryCompilationErrors.invalidColumnNameAsPathError(
+          format.getClass.getSimpleName, field.name)
       }
       field.dataType match {
         case s: StructType => checkFieldNames(format, s)
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
index e09b923ee51..96408790259 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
@@ -2965,12 +2965,17 @@ class HiveDDLSuite
         spark.range(1).createTempView("v")
         withTempPath { path =>
           Seq("PARQUET", "ORC").foreach { format =>
-            val e = intercept[SparkException] {
-              spark.sql(s"INSERT OVERWRITE LOCAL DIRECTORY '${path.getCanonicalPath}' " +
-                s"STORED AS $format SELECT ID, if(1=1, 1, 0), abs(id), '^-' FROM v")
-            }.getCause.getMessage
-            assert(e.contains("Column name \"(IF((1 = 1), 1, 0))\" contains" +
-              " invalid character(s). Please use alias to rename it."))
+            checkError(
+              exception = intercept[SparkException] {
+                spark.sql(s"INSERT OVERWRITE LOCAL DIRECTORY '${path.getCanonicalPath}' " +
+                  s"STORED AS $format SELECT ID, if(1=1, 1, 0), abs(id), '^-' FROM v")
+              }.getCause.asInstanceOf[AnalysisException],
+              errorClass = "INVALID_COLUMN_NAME_AS_PATH",
+              parameters = Map(
+                "datasource" -> "HiveFileFormat",
+                "columnName" -> "`(IF((1 = 1), 1, 0))`"
+              )
+            )
           }
         }
       }
@@ -2982,18 +2987,20 @@ class HiveDDLSuite
       withView("v") {
         spark.range(1).createTempView("v")
         withTempPath { path =>
-          val e = intercept[SparkException] {
-            spark.sql(
-              s"""
-                 |INSERT OVERWRITE LOCAL DIRECTORY '${path.getCanonicalPath}'
-                 |STORED AS PARQUET
-                 |SELECT
-                 |NAMED_STRUCT('ID', ID, 'IF(ID=1,ID,0)', IF(ID=1,ID,0), 'B', ABS(ID)) AS col1
-                 |FROM v
+          checkError(
+            exception = intercept[SparkException] {
+              spark.sql(
+                s"""
+                   |INSERT OVERWRITE LOCAL DIRECTORY '${path.getCanonicalPath}'
+                   |STORED AS PARQUET
+                   |SELECT
+                   |NAMED_STRUCT('ID', ID, 'IF(ID=1,ID,0)', IF(ID=1,ID,0), 'B', ABS(ID)) AS col1
+                   |FROM v
                """.stripMargin)
-          }.getCause.getMessage
-          assert(e.contains("Column name \"IF(ID=1,ID,0)\" contains invalid character(s). " +
-            "Please use alias to rename it."))
+            }.getCause.asInstanceOf[AnalysisException],
+            errorClass = "INVALID_COLUMN_NAME_AS_PATH",
+            parameters = Map("datasource" -> "HiveFileFormat", "columnName" -> "`IF(ID=1,ID,0)`")
+          )
         }
       }
     }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org