You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ma...@apache.org on 2023/03/30 14:44:13 UTC
[spark] branch master updated: [SPARK-42979][SQL] Define literal constructors as keywords
This is an automated email from the ASF dual-hosted git repository.
maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 5a17537aa4a [SPARK-42979][SQL] Define literal constructors as keywords
5a17537aa4a is described below
commit 5a17537aa4a777429431542cfa6184591476e54a
Author: Max Gekk <ma...@gmail.com>
AuthorDate: Thu Mar 30 17:43:54 2023 +0300
[SPARK-42979][SQL] Define literal constructors as keywords
### What changes were proposed in this pull request?
In the PR, I propose to define literal constructors `DATE`, `TIMESTAMP`, `TIMESTAMP_NTZ`, `TIMESTAMP_LTZ`, `INTERVAL`, and `X` as Spark SQL keywords.
### Why are the changes needed?
The non-keywords literal constructors cause some inconveniences while analysing/transforming the lexer tree. For example, while forming the stable column aliases, see https://github.com/apache/spark/pull/40126.
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
By running the affected test suites:
```
$ build/sbt "test:testOnly *SQLKeywordSuite"
$ build/sbt "test:testOnly *.ResolveAliasesSuite"
```
Closes #40593 from MaxGekk/typed-literal-keywords.
Authored-by: Max Gekk <ma...@gmail.com>
Signed-off-by: Max Gekk <ma...@gmail.com>
---
docs/sql-ref-ansi-compliance.md | 1 +
.../spark/sql/catalyst/parser/SqlBaseLexer.g4 | 1 +
.../spark/sql/catalyst/parser/SqlBaseParser.g4 | 12 ++++++++++-
.../spark/sql/catalyst/parser/AstBuilder.scala | 23 +++++++++++-----------
.../catalyst/analysis/ResolveAliasesSuite.scala | 4 ++--
5 files changed, 27 insertions(+), 14 deletions(-)
diff --git a/docs/sql-ref-ansi-compliance.md b/docs/sql-ref-ansi-compliance.md
index 36d1f8f73eb..d4bb0e93bee 100644
--- a/docs/sql-ref-ansi-compliance.md
+++ b/docs/sql-ref-ansi-compliance.md
@@ -672,6 +672,7 @@ Below is a list of all the keywords in Spark SQL.
|WINDOW|non-reserved|non-reserved|reserved|
|WITH|reserved|non-reserved|reserved|
|WITHIN|reserved|non-reserved|reserved|
+|X|non-reserved|non-reserved|non-reserved|
|YEAR|non-reserved|non-reserved|non-reserved|
|YEARS|non-reserved|non-reserved|non-reserved|
|ZONE|non-reserved|non-reserved|non-reserved|
diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4
index 4d446b494f7..c9930fa0986 100644
--- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4
+++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4
@@ -208,6 +208,7 @@ GRANT: 'GRANT';
GROUP: 'GROUP';
GROUPING: 'GROUPING';
HAVING: 'HAVING';
+BINARY_HEX: 'X';
HOUR: 'HOUR';
HOURS: 'HOURS';
IF: 'IF';
diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
index ab54aef35df..a112b6e31fe 100644
--- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
+++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
@@ -928,11 +928,19 @@ primaryExpression
(FILTER LEFT_PAREN WHERE where=booleanExpression RIGHT_PAREN)? ( OVER windowSpec)? #percentile
;
+literalType
+ : DATE
+ | TIMESTAMP | TIMESTAMP_LTZ | TIMESTAMP_NTZ
+ | INTERVAL
+ | BINARY_HEX
+ | unsupportedType=identifier
+ ;
+
constant
: NULL #nullLiteral
| COLON identifier #parameterLiteral
| interval #intervalLiteral
- | identifier stringLit #typeConstructor
+ | literalType stringLit #typeConstructor
| number #numericLiteral
| booleanValue #booleanLiteral
| stringLit+ #stringLiteral
@@ -1227,6 +1235,7 @@ ansiNonReserved
| BETWEEN
| BIGINT
| BINARY
+ | BINARY_HEX
| BOOLEAN
| BUCKET
| BUCKETS
@@ -1514,6 +1523,7 @@ nonReserved
| BETWEEN
| BIGINT
| BINARY
+ | BINARY_HEX
| BOOLEAN
| BOTH
| BUCKET
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 86880a82bb3..cb06fc31f0e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -2398,11 +2398,11 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit
*/
override def visitTypeConstructor(ctx: TypeConstructorContext): Literal = withOrigin(ctx) {
val value = string(visitStringLit(ctx.stringLit))
- val valueType = ctx.identifier.getText.toUpperCase(Locale.ROOT)
+ val valueType = ctx.literalType.start.getType
def toLiteral[T](f: UTF8String => Option[T], t: DataType): Literal = {
f(UTF8String.fromString(value)).map(Literal(_, t)).getOrElse {
- throw QueryParsingErrors.cannotParseValueTypeError(valueType, value, ctx)
+ throw QueryParsingErrors.cannotParseValueTypeError(ctx.literalType.getText, value, ctx)
}
}
@@ -2413,17 +2413,17 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit
}
valueType match {
- case "DATE" =>
+ case DATE =>
val zoneId = getZoneId(conf.sessionLocalTimeZone)
val specialDate = convertSpecialDate(value, zoneId).map(Literal(_, DateType))
specialDate.getOrElse(toLiteral(stringToDate, DateType))
- case "TIMESTAMP_NTZ" =>
+ case TIMESTAMP_NTZ =>
convertSpecialTimestampNTZ(value, getZoneId(conf.sessionLocalTimeZone))
.map(Literal(_, TimestampNTZType))
.getOrElse(toLiteral(stringToTimestampWithoutTimeZone, TimestampNTZType))
- case "TIMESTAMP_LTZ" =>
+ case TIMESTAMP_LTZ =>
constructTimestampLTZLiteral(value)
- case "TIMESTAMP" =>
+ case TIMESTAMP =>
SQLConf.get.timestampType match {
case TimestampNTZType =>
convertSpecialTimestampNTZ(value, getZoneId(conf.sessionLocalTimeZone))
@@ -2444,12 +2444,13 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit
constructTimestampLTZLiteral(value)
}
- case "INTERVAL" =>
+ case INTERVAL =>
val interval = try {
IntervalUtils.stringToInterval(UTF8String.fromString(value))
} catch {
case e: IllegalArgumentException =>
- val ex = QueryParsingErrors.cannotParseValueTypeError(valueType, value, ctx)
+ val ex = QueryParsingErrors.cannotParseValueTypeError(
+ ctx.literalType.getText, value, ctx)
ex.setStackTrace(e.getStackTrace)
throw ex
}
@@ -2462,7 +2463,7 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit
} else {
Literal(interval, CalendarIntervalType)
}
- case "X" =>
+ case BINARY_HEX =>
val padding = if (value.length % 2 != 0) "0" else ""
try {
Literal(Hex.decodeHex(padding + value))
@@ -2472,9 +2473,9 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit
ex.setStackTrace(e.getStackTrace)
throw ex
}
- case other =>
+ case _ =>
throw QueryParsingErrors.literalValueTypeUnsupportedError(
- unsupportedType = other,
+ unsupportedType = ctx.literalType.getText,
supportedTypes =
Seq("DATE", "TIMESTAMP_NTZ", "TIMESTAMP_LTZ", "TIMESTAMP", "INTERVAL", "X"),
ctx)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveAliasesSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveAliasesSuite.scala
index 071304d5762..6513db43639 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveAliasesSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveAliasesSuite.scala
@@ -103,9 +103,9 @@ class ResolveAliasesSuite extends AnalysisTest {
""""abc"""" -> """"abc"""",
"""'\t\n xyz \t\r'""" -> """'\t\n xyz \t\r'""",
"1l" -> "1L", "1S" -> "1S",
- "date'-0001-1-28'" -> "date'-0001-1-28'",
+ "date'-0001-1-28'" -> "DATE'-0001-1-28'",
"interval 3 year 1 month" -> "INTERVAL3YEAR1MONTH",
- "x'00'" -> "x'00'",
+ "x'00'" -> "X'00'",
// Preserve case
"CAST(1 as tinyint)" -> "CAST(1ASTINYINT)",
// Brackets
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org