You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ge...@apache.org on 2023/03/08 04:05:06 UTC

[spark] branch master updated: [SPARK-42681][SQL] Relax ordering constraint for ALTER TABLE ADD|REPLACE column descriptor

This is an automated email from the ASF dual-hosted git repository.

gengliang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new c39486083e4 [SPARK-42681][SQL] Relax ordering constraint for ALTER TABLE ADD|REPLACE column descriptor
c39486083e4 is described below

commit c39486083e4ea0e10011571762ed33c30467c44f
Author: Vitalii Li <vi...@databricks.com>
AuthorDate: Tue Mar 7 20:04:46 2023 -0800

    [SPARK-42681][SQL] Relax ordering constraint for ALTER TABLE ADD|REPLACE column descriptor
    
    ### What changes were proposed in this pull request?
    
    Currently the grammar for ALTER TABLE ADD|REPLACE column is:
    
    ```
    qualifiedColTypeWithPosition
    : name=multipartIdentifier dataType (NOT NULL)? defaultExpression? commentSpec? colPosition?
    ;
    ```
    
    This enforces a constraint on the order of: (NOT NULL, DEFAULT value, COMMENT value FIRST|AFTER value). We can update the grammar to allow these options in any order instead, to improve usability.
    
    ### Why are the changes needed?
    
    This helps make the SQL syntax more usable.
    
    ### Does this PR introduce _any_ user-facing change?
    
    Yes, the SQL syntax updates slightly.
    
    ### How was this patch tested?
    
    Existing and new unit tests
    
    Closes #40295 from vitaliili-db/SPARK-42681.
    
    Authored-by: Vitalii Li <vi...@databricks.com>
    Signed-off-by: Gengliang Wang <ge...@apache.org>
---
 core/src/main/resources/error/error-classes.json   |  10 +-
 .../spark/sql/catalyst/parser/SqlBaseParser.g4     |   9 +-
 .../spark/sql/catalyst/parser/AstBuilder.scala     | 104 ++++++++++++++++-----
 .../spark/sql/errors/QueryParsingErrors.scala      |  32 +++++--
 .../spark/sql/catalyst/parser/DDLParserSuite.scala |  91 +++++++++++++++++-
 5 files changed, 208 insertions(+), 38 deletions(-)

diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json
index 1832f87598b..061074cd47f 100644
--- a/core/src/main/resources/error/error-classes.json
+++ b/core/src/main/resources/error/error-classes.json
@@ -1,4 +1,10 @@
 {
+  "ALTER_TABLE_COLUMN_DESCRIPTOR_DUPLICATE" : {
+    "message" : [
+      "ALTER TABLE <type> column <columnName> specifies descriptor \"<optionName>\" more than once, which is invalid."
+    ],
+    "sqlState" : "42710"
+  },
   "AMBIGUOUS_COLUMN_OR_FIELD" : {
     "message" : [
       "Column or field <name> is ambiguous and has <n> matches."
@@ -207,9 +213,9 @@
     ],
     "sqlState" : "22018"
   },
-  "CREATE_TABLE_COLUMN_OPTION_DUPLICATE" : {
+  "CREATE_TABLE_COLUMN_DESCRIPTOR_DUPLICATE" : {
     "message" : [
-      "CREATE TABLE column <columnName> specifies option \"<optionName>\" more than once, which is invalid."
+      "CREATE TABLE column <columnName> specifies descriptor \"<optionName>\" more than once, which is invalid."
     ],
     "sqlState" : "42710"
   },
diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
index 6142700f095..93c1da11dca 100644
--- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
+++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
@@ -1009,7 +1009,14 @@ qualifiedColTypeWithPositionList
     ;
 
 qualifiedColTypeWithPosition
-    : name=multipartIdentifier dataType (NOT NULL)? defaultExpression? commentSpec? colPosition?
+    : name=multipartIdentifier dataType colDefinitionDescriptorWithPosition*
+    ;
+
+colDefinitionDescriptorWithPosition
+    : NOT NULL
+    | defaultExpression
+    | commentSpec
+    | colPosition
     ;
 
 defaultExpression
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 6ebfa853edc..cdb6d6a06f2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -3006,28 +3006,28 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit
     ctx.colDefinitionOption().asScala.foreach { option =>
       if (option.NULL != null) {
         if (!nullable) {
-          throw QueryParsingErrors.duplicateCreateTableColumnOption(
+          throw QueryParsingErrors.duplicateTableColumnDescriptor(
             option, colName.getText, "NOT NULL")
         }
         nullable = false
       }
       Option(option.defaultExpression()).foreach { expr =>
         if (defaultExpression.isDefined) {
-          throw QueryParsingErrors.duplicateCreateTableColumnOption(
+          throw QueryParsingErrors.duplicateTableColumnDescriptor(
             option, colName.getText, "DEFAULT")
         }
         defaultExpression = Some(expr)
       }
       Option(option.generationExpression()).foreach { expr =>
         if (generationExpression.isDefined) {
-          throw QueryParsingErrors.duplicateCreateTableColumnOption(
+          throw QueryParsingErrors.duplicateTableColumnDescriptor(
             option, colName.getText, "GENERATED ALWAYS AS")
         }
         generationExpression = Some(expr)
       }
       Option(option.commentSpec()).foreach { spec =>
         if (commentSpec.isDefined) {
-          throw QueryParsingErrors.duplicateCreateTableColumnOption(
+          throw QueryParsingErrors.duplicateTableColumnDescriptor(
             option, colName.getText, "COMMENT")
         }
         commentSpec = Some(spec)
@@ -4006,18 +4006,59 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit
   override def visitQualifiedColTypeWithPosition(
       ctx: QualifiedColTypeWithPositionContext): QualifiedColType = withOrigin(ctx) {
     val name = typedVisit[Seq[String]](ctx.name)
+    // Check that no duplicates exist among any ALTER TABLE ADD|REPLACE column options specified.
+    var nullable = true
+    var defaultExpression: Option[DefaultExpressionContext] = None
+    var commentSpec: Option[CommentSpecContext] = None
+    var colPosition: Option[ColPositionContext] = None
+    val columnName = name.last
+    ctx.colDefinitionDescriptorWithPosition.asScala.foreach { option =>
+      if (option.NULL != null) {
+        if (!nullable) {
+          throw QueryParsingErrors.duplicateTableColumnDescriptor(
+            option, columnName, "NOT NULL", isCreate = false)
+        }
+        nullable = false
+      }
+      Option(option.defaultExpression()).foreach { expr =>
+        if (defaultExpression.isDefined) {
+          throw QueryParsingErrors.duplicateTableColumnDescriptor(
+            option, columnName, "DEFAULT", isCreate = false)
+        }
+        defaultExpression = Some(expr)
+      }
+      Option(option.commentSpec()).foreach { spec =>
+        if (commentSpec.isDefined) {
+          throw QueryParsingErrors.duplicateTableColumnDescriptor(
+            option, columnName, "COMMENT", isCreate = false)
+        }
+        commentSpec = Some(spec)
+      }
+      Option(option.colPosition()).foreach { spec =>
+        if (colPosition.isDefined) {
+          throw QueryParsingErrors.duplicateTableColumnDescriptor(
+            option, columnName, "FIRST|AFTER", isCreate = false)
+        }
+        colPosition = Some(spec)
+      }
+    }
+
     // Add the 'DEFAULT expression' clause in the column definition, if any, to the column metadata.
-    val defaultExpr = Option(ctx.defaultExpression()).map(visitDefaultExpression)
-    if (defaultExpr.isDefined && !conf.getConf(SQLConf.ENABLE_DEFAULT_COLUMNS)) {
-      throw QueryParsingErrors.defaultColumnNotEnabledError(ctx)
+    val defaultExpr = defaultExpression.map(visitDefaultExpression).map { field =>
+      if (conf.getConf(SQLConf.ENABLE_DEFAULT_COLUMNS)) {
+        field
+      } else {
+        throw QueryParsingErrors.defaultColumnNotEnabledError(ctx)
+      }
     }
+
     QualifiedColType(
       path = if (name.length > 1) Some(UnresolvedFieldName(name.init)) else None,
       colName = name.last,
       dataType = typedVisit[DataType](ctx.dataType),
-      nullable = ctx.NULL == null,
-      comment = Option(ctx.commentSpec()).map(visitCommentSpec),
-      position = Option(ctx.colPosition).map( pos =>
+      nullable = nullable,
+      comment = commentSpec.map(visitCommentSpec),
+      position = colPosition.map( pos =>
         UnresolvedFieldPosition(typedVisit[ColumnPosition](pos))),
       default = defaultExpr)
   }
@@ -4169,23 +4210,40 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit
     ReplaceColumns(
       createUnresolvedTable(ctx.multipartIdentifier, "ALTER TABLE ... REPLACE COLUMNS"),
       ctx.columns.qualifiedColTypeWithPosition.asScala.map { colType =>
-        if (colType.NULL != null) {
-          throw QueryParsingErrors.operationInHiveStyleCommandUnsupportedError(
-            "NOT NULL", "REPLACE COLUMNS", ctx)
-        }
-        if (colType.colPosition != null) {
-          throw QueryParsingErrors.operationInHiveStyleCommandUnsupportedError(
-            "Column position", "REPLACE COLUMNS", ctx)
-        }
-        val col = typedVisit[QualifiedColType](colType)
-        if (col.path.isDefined) {
+        val name = typedVisit[Seq[String]](colType.name)
+        if (name.length > 1) {
           throw QueryParsingErrors.operationInHiveStyleCommandUnsupportedError(
             "Replacing with a nested column", "REPLACE COLUMNS", ctx)
         }
-        if (Option(colType.defaultExpression()).map(visitDefaultExpression).isDefined) {
-          throw QueryParsingErrors.defaultColumnNotImplementedYetError(ctx)
+        var commentSpec: Option[CommentSpecContext] = None
+        colType.colDefinitionDescriptorWithPosition.asScala.foreach { opt =>
+          if (opt.NULL != null) {
+            throw QueryParsingErrors.operationInHiveStyleCommandUnsupportedError(
+              "NOT NULL", "REPLACE COLUMNS", ctx)
+          }
+          if (opt.colPosition != null) {
+            throw QueryParsingErrors.operationInHiveStyleCommandUnsupportedError(
+              "Column position", "REPLACE COLUMNS", ctx)
+          }
+          if (Option(opt.defaultExpression()).map(visitDefaultExpression).isDefined) {
+            throw QueryParsingErrors.defaultColumnNotImplementedYetError(ctx)
+          }
+          Option(opt.commentSpec()).foreach { spec =>
+            if (commentSpec.isDefined) {
+              throw QueryParsingErrors.duplicateTableColumnDescriptor(
+                opt, name.last, "COMMENT", isCreate = false, alterType = "REPLACE")
+            }
+            commentSpec = Some(spec)
+          }
         }
-        col
+        QualifiedColType(
+          path = None,
+          colName = name.last,
+          dataType = typedVisit[DataType](colType.dataType),
+          nullable = true,
+          comment = commentSpec.map(visitCommentSpec),
+          position = None,
+          default = None)
       }.toSeq
     )
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala
index 7aad1127c8c..4a8ec2ec1db 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala
@@ -642,15 +642,31 @@ private[sql] object QueryParsingErrors extends QueryErrorsBase {
     new ParseException(errorClass = "_LEGACY_ERROR_TEMP_0059", ctx)
   }
 
-  def duplicateCreateTableColumnOption(
+  def duplicateTableColumnDescriptor(
       ctx: ParserRuleContext,
       columnName: String,
-      optionName: String): Throwable = {
-    new ParseException(
-      errorClass = "CREATE_TABLE_COLUMN_OPTION_DUPLICATE",
-      messageParameters = Map(
-        "columnName" -> columnName,
-        "optionName" -> optionName),
-      ctx)
+      optionName: String,
+      isCreate: Boolean = true,
+      alterType: String = "ADD"): Throwable = {
+    val errorClass =
+      if (isCreate) {
+        "CREATE_TABLE_COLUMN_DESCRIPTOR_DUPLICATE"
+      } else {
+        "ALTER_TABLE_COLUMN_DESCRIPTOR_DUPLICATE"
+      }
+    val alterTypeMap: Map[String, String] =
+      if (isCreate) {
+        Map.empty
+      } else {
+        Map("type" -> alterType)
+      }
+    new ParseException(
+      errorClass = errorClass,
+      messageParameters = alterTypeMap ++ Map(
+          "columnName" -> columnName,
+          "optionName" -> optionName
+        ),
+      ctx
+    )
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
index 5196d19ffcd..a16fa28b7bf 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
@@ -1464,6 +1464,19 @@ class DDLParserSuite extends AnalysisTest {
         fragment = sql8,
         start = 0,
         stop = 52))
+
+    val sql9 = "ALTER TABLE table_name REPLACE COLUMNS (a STRING COMMENT 'x' COMMENT 'y')"
+    checkError(
+      exception = parseException(sql9),
+      errorClass = "ALTER_TABLE_COLUMN_DESCRIPTOR_DUPLICATE",
+      parameters = Map(
+        "type" -> "REPLACE",
+        "columnName" -> "a",
+        "optionName" -> "COMMENT"),
+      context = ExpectedContext(
+        fragment = sql9,
+        start = 0,
+        stop = 72))
   }
 
   test("alter view: rename view") {
@@ -2690,7 +2703,7 @@ class DDLParserSuite extends AnalysisTest {
       exception = intercept[ParseException](
         parsePlan(
           "CREATE TABLE my_tab(a INT, b STRING NOT NULL DEFAULT \"abc\" NOT NULL)")),
-      errorClass = "CREATE_TABLE_COLUMN_OPTION_DUPLICATE",
+      errorClass = "CREATE_TABLE_COLUMN_DESCRIPTOR_DUPLICATE",
       parameters = Map(
         "columnName" -> "b",
         "optionName" -> "NOT NULL"),
@@ -2700,7 +2713,7 @@ class DDLParserSuite extends AnalysisTest {
       exception = intercept[ParseException](
         parsePlan(
           "CREATE TABLE my_tab(a INT, b STRING DEFAULT \"123\" NOT NULL DEFAULT \"abc\")")),
-      errorClass = "CREATE_TABLE_COLUMN_OPTION_DUPLICATE",
+      errorClass = "CREATE_TABLE_COLUMN_DESCRIPTOR_DUPLICATE",
       parameters = Map(
         "columnName" -> "b",
         "optionName" -> "DEFAULT"),
@@ -2710,7 +2723,7 @@ class DDLParserSuite extends AnalysisTest {
       exception = intercept[ParseException](
         parsePlan(
           "CREATE TABLE my_tab(a INT, b STRING COMMENT \"abc\" NOT NULL COMMENT \"abc\")")),
-      errorClass = "CREATE_TABLE_COLUMN_OPTION_DUPLICATE",
+      errorClass = "CREATE_TABLE_COLUMN_DESCRIPTOR_DUPLICATE",
       parameters = Map(
         "columnName" -> "b",
         "optionName" -> "COMMENT"),
@@ -2739,7 +2752,7 @@ class DDLParserSuite extends AnalysisTest {
     checkError(
       exception = parseException("CREATE TABLE my_tab(a INT, " +
           "b INT GENERATED ALWAYS AS (a + 1) GENERATED ALWAYS AS (a + 2)) USING PARQUET"),
-      errorClass = "CREATE_TABLE_COLUMN_OPTION_DUPLICATE",
+      errorClass = "CREATE_TABLE_COLUMN_DESCRIPTOR_DUPLICATE",
       parameters = Map("columnName" -> "b", "optionName" -> "GENERATED ALWAYS AS"),
       context = ExpectedContext(
         fragment = "b INT GENERATED ALWAYS AS (a + 1) GENERATED ALWAYS AS (a + 2)",
@@ -2762,4 +2775,74 @@ class DDLParserSuite extends AnalysisTest {
       parameters = Map("error" -> "'a'", "hint" -> ": missing '('")
     )
   }
+
+  test("SPARK-42681: Relax ordering constraint for ALTER TABLE ADD COLUMN options") {
+    // Positive test cases to verify that column definition options could be applied in any order.
+    val expectedPlan = AddColumns(
+      UnresolvedTable(Seq("my_tab"), "ALTER TABLE ... ADD COLUMN", None),
+      Seq(
+        QualifiedColType(
+          path = None,
+          colName = "x",
+          dataType = StringType,
+          nullable = false,
+          comment = Some("a"),
+          position = Some(UnresolvedFieldPosition(first())),
+          default = Some("'abc'")
+        )
+      )
+    )
+    Seq("NOT NULL", "COMMENT \"a\"", "FIRST", "DEFAULT 'abc'").permutations
+      .map(_.mkString(" "))
+      .foreach { element =>
+        parseCompare(s"ALTER TABLE my_tab ADD COLUMN x STRING $element", expectedPlan)
+      }
+
+    // These are negative test cases exercising error cases.
+    checkError(
+      exception = intercept[ParseException](
+        parsePlan("ALTER TABLE my_tab ADD COLUMN b STRING NOT NULL DEFAULT \"abc\" NOT NULL")
+      ),
+      errorClass = "ALTER_TABLE_COLUMN_DESCRIPTOR_DUPLICATE",
+      parameters = Map("type" -> "ADD", "columnName" -> "b", "optionName" -> "NOT NULL"),
+      context = ExpectedContext(
+        fragment = "b STRING NOT NULL DEFAULT \"abc\" NOT NULL",
+        start = 30,
+        stop = 69
+      )
+    )
+    checkError(
+      exception = intercept[ParseException](
+        parsePlan("ALTER TABLE my_tab ADD COLUMN b STRING DEFAULT \"123\" NOT NULL DEFAULT \"abc\"")
+      ),
+      errorClass = "ALTER_TABLE_COLUMN_DESCRIPTOR_DUPLICATE",
+      parameters = Map("type" -> "ADD", "columnName" -> "b", "optionName" -> "DEFAULT"),
+      context = ExpectedContext(
+        fragment = "b STRING DEFAULT \"123\" NOT NULL DEFAULT \"abc\"",
+        start = 30,
+        stop = 74
+      )
+    )
+    checkError(
+      exception = intercept[ParseException](
+        parsePlan("ALTER TABLE my_tab ADD COLUMN b STRING COMMENT \"abc\" NOT NULL COMMENT \"abc\"")
+      ),
+      errorClass = "ALTER_TABLE_COLUMN_DESCRIPTOR_DUPLICATE",
+      parameters = Map("type" -> "ADD", "columnName" -> "b", "optionName" -> "COMMENT"),
+      context = ExpectedContext(
+        fragment = "b STRING COMMENT \"abc\" NOT NULL COMMENT \"abc\"",
+        start = 30,
+        stop = 74
+      )
+    )
+    checkError(
+      exception = intercept[ParseException](
+        parsePlan("ALTER TABLE my_tab ADD COLUMN b STRING FIRST COMMENT \"abc\" AFTER y")
+      ),
+      errorClass = "ALTER_TABLE_COLUMN_DESCRIPTOR_DUPLICATE",
+      parameters = Map("type" -> "ADD", "columnName" -> "b", "optionName" -> "FIRST|AFTER"),
+      context =
+        ExpectedContext(fragment = "b STRING FIRST COMMENT \"abc\" AFTER y", start = 30, stop = 65)
+    )
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org