You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ma...@apache.org on 2023/03/30 14:44:13 UTC

[spark] branch master updated: [SPARK-42979][SQL] Define literal constructors as keywords

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 5a17537aa4a [SPARK-42979][SQL] Define literal constructors as keywords
5a17537aa4a is described below

commit 5a17537aa4a777429431542cfa6184591476e54a
Author: Max Gekk <ma...@gmail.com>
AuthorDate: Thu Mar 30 17:43:54 2023 +0300

    [SPARK-42979][SQL] Define literal constructors as keywords
    
    ### What changes were proposed in this pull request?
    In the PR, I propose to define literal constructors `DATE`, `TIMESTAMP`, `TIMESTAMP_NTZ`, `TIMESTAMP_LTZ`, `INTERVAL`, and `X` as Spark SQL keywords.
    
    ### Why are the changes needed?
    The non-keywords literal constructors cause some inconveniences while analysing/transforming the lexer tree. For example, while forming the stable column aliases, see https://github.com/apache/spark/pull/40126.
    
    ### Does this PR introduce _any_ user-facing change?
    No.
    
    ### How was this patch tested?
    By running the affected test suites:
    ```
    $ build/sbt "test:testOnly *SQLKeywordSuite"
    $ build/sbt "test:testOnly *.ResolveAliasesSuite"
    ```
    
    Closes #40593 from MaxGekk/typed-literal-keywords.
    
    Authored-by: Max Gekk <ma...@gmail.com>
    Signed-off-by: Max Gekk <ma...@gmail.com>
---
 docs/sql-ref-ansi-compliance.md                    |  1 +
 .../spark/sql/catalyst/parser/SqlBaseLexer.g4      |  1 +
 .../spark/sql/catalyst/parser/SqlBaseParser.g4     | 12 ++++++++++-
 .../spark/sql/catalyst/parser/AstBuilder.scala     | 23 +++++++++++-----------
 .../catalyst/analysis/ResolveAliasesSuite.scala    |  4 ++--
 5 files changed, 27 insertions(+), 14 deletions(-)

diff --git a/docs/sql-ref-ansi-compliance.md b/docs/sql-ref-ansi-compliance.md
index 36d1f8f73eb..d4bb0e93bee 100644
--- a/docs/sql-ref-ansi-compliance.md
+++ b/docs/sql-ref-ansi-compliance.md
@@ -672,6 +672,7 @@ Below is a list of all the keywords in Spark SQL.
 |WINDOW|non-reserved|non-reserved|reserved|
 |WITH|reserved|non-reserved|reserved|
 |WITHIN|reserved|non-reserved|reserved|
+|X|non-reserved|non-reserved|non-reserved|
 |YEAR|non-reserved|non-reserved|non-reserved|
 |YEARS|non-reserved|non-reserved|non-reserved|
 |ZONE|non-reserved|non-reserved|non-reserved|
diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4
index 4d446b494f7..c9930fa0986 100644
--- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4
+++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4
@@ -208,6 +208,7 @@ GRANT: 'GRANT';
 GROUP: 'GROUP';
 GROUPING: 'GROUPING';
 HAVING: 'HAVING';
+BINARY_HEX: 'X';
 HOUR: 'HOUR';
 HOURS: 'HOURS';
 IF: 'IF';
diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
index ab54aef35df..a112b6e31fe 100644
--- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
+++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
@@ -928,11 +928,19 @@ primaryExpression
         (FILTER LEFT_PAREN WHERE where=booleanExpression RIGHT_PAREN)? ( OVER windowSpec)?     #percentile
     ;
 
+literalType
+    : DATE
+    | TIMESTAMP | TIMESTAMP_LTZ | TIMESTAMP_NTZ
+    | INTERVAL
+    | BINARY_HEX
+    | unsupportedType=identifier
+    ;
+
 constant
     : NULL                                                                                     #nullLiteral
     | COLON identifier                                                                         #parameterLiteral
     | interval                                                                                 #intervalLiteral
-    | identifier stringLit                                                                     #typeConstructor
+    | literalType stringLit                                                                     #typeConstructor
     | number                                                                                   #numericLiteral
     | booleanValue                                                                             #booleanLiteral
     | stringLit+                                                                               #stringLiteral
@@ -1227,6 +1235,7 @@ ansiNonReserved
     | BETWEEN
     | BIGINT
     | BINARY
+    | BINARY_HEX
     | BOOLEAN
     | BUCKET
     | BUCKETS
@@ -1514,6 +1523,7 @@ nonReserved
     | BETWEEN
     | BIGINT
     | BINARY
+    | BINARY_HEX
     | BOOLEAN
     | BOTH
     | BUCKET
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 86880a82bb3..cb06fc31f0e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -2398,11 +2398,11 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit
    */
   override def visitTypeConstructor(ctx: TypeConstructorContext): Literal = withOrigin(ctx) {
     val value = string(visitStringLit(ctx.stringLit))
-    val valueType = ctx.identifier.getText.toUpperCase(Locale.ROOT)
+    val valueType = ctx.literalType.start.getType
 
     def toLiteral[T](f: UTF8String => Option[T], t: DataType): Literal = {
       f(UTF8String.fromString(value)).map(Literal(_, t)).getOrElse {
-        throw QueryParsingErrors.cannotParseValueTypeError(valueType, value, ctx)
+        throw QueryParsingErrors.cannotParseValueTypeError(ctx.literalType.getText, value, ctx)
       }
     }
 
@@ -2413,17 +2413,17 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit
     }
 
     valueType match {
-      case "DATE" =>
+      case DATE =>
         val zoneId = getZoneId(conf.sessionLocalTimeZone)
         val specialDate = convertSpecialDate(value, zoneId).map(Literal(_, DateType))
         specialDate.getOrElse(toLiteral(stringToDate, DateType))
-      case "TIMESTAMP_NTZ" =>
+      case TIMESTAMP_NTZ =>
         convertSpecialTimestampNTZ(value, getZoneId(conf.sessionLocalTimeZone))
           .map(Literal(_, TimestampNTZType))
           .getOrElse(toLiteral(stringToTimestampWithoutTimeZone, TimestampNTZType))
-      case "TIMESTAMP_LTZ" =>
+      case TIMESTAMP_LTZ =>
         constructTimestampLTZLiteral(value)
-      case "TIMESTAMP" =>
+      case TIMESTAMP =>
         SQLConf.get.timestampType match {
           case TimestampNTZType =>
             convertSpecialTimestampNTZ(value, getZoneId(conf.sessionLocalTimeZone))
@@ -2444,12 +2444,13 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit
             constructTimestampLTZLiteral(value)
         }
 
-      case "INTERVAL" =>
+      case INTERVAL =>
         val interval = try {
           IntervalUtils.stringToInterval(UTF8String.fromString(value))
         } catch {
           case e: IllegalArgumentException =>
-            val ex = QueryParsingErrors.cannotParseValueTypeError(valueType, value, ctx)
+            val ex = QueryParsingErrors.cannotParseValueTypeError(
+              ctx.literalType.getText, value, ctx)
             ex.setStackTrace(e.getStackTrace)
             throw ex
         }
@@ -2462,7 +2463,7 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit
         } else {
           Literal(interval, CalendarIntervalType)
         }
-      case "X" =>
+      case BINARY_HEX =>
         val padding = if (value.length % 2 != 0) "0" else ""
         try {
           Literal(Hex.decodeHex(padding + value))
@@ -2472,9 +2473,9 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit
             ex.setStackTrace(e.getStackTrace)
             throw ex
         }
-      case other =>
+      case _ =>
         throw QueryParsingErrors.literalValueTypeUnsupportedError(
-          unsupportedType = other,
+          unsupportedType = ctx.literalType.getText,
           supportedTypes =
             Seq("DATE", "TIMESTAMP_NTZ", "TIMESTAMP_LTZ", "TIMESTAMP", "INTERVAL", "X"),
           ctx)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveAliasesSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveAliasesSuite.scala
index 071304d5762..6513db43639 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveAliasesSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveAliasesSuite.scala
@@ -103,9 +103,9 @@ class ResolveAliasesSuite extends AnalysisTest {
         """"abc"""" -> """"abc"""",
         """'\t\n xyz \t\r'""" -> """'\t\n xyz \t\r'""",
         "1l" -> "1L", "1S" -> "1S",
-        "date'-0001-1-28'" -> "date'-0001-1-28'",
+        "date'-0001-1-28'" -> "DATE'-0001-1-28'",
         "interval 3 year 1 month" -> "INTERVAL3YEAR1MONTH",
-        "x'00'" -> "x'00'",
+        "x'00'" -> "X'00'",
         // Preserve case
         "CAST(1 as tinyint)" -> "CAST(1ASTINYINT)",
         // Brackets


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org