You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ya...@apache.org on 2020/06/15 15:47:45 UTC
[spark] 02/02: [SPARK-26905][SQL] Follow the SQL:2016 reserved
keywords
This is an automated email from the ASF dual-hosted git repository.
yamamuro pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git
commit ed69190ce0762f3b741b8d175ef8d02da45f3183
Author: Takeshi Yamamuro <ya...@apache.org>
AuthorDate: Tue Jun 16 00:27:45 2020 +0900
[SPARK-26905][SQL] Follow the SQL:2016 reserved keywords
### What changes were proposed in this pull request?
This PR intends to move keywords `ANTI`, `SEMI`, and `MINUS` from reserved to non-reserved.
### Why are the changes needed?
To comply with the ANSI/SQL standard.
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
Added tests.
Closes #28807 from maropu/SPARK-26905-2.
Authored-by: Takeshi Yamamuro <ya...@apache.org>
Signed-off-by: Takeshi Yamamuro <ya...@apache.org>
---
docs/sql-ref-ansi-compliance.md | 6 +-
.../apache/spark/sql/catalyst/parser/SqlBase.g4 | 3 +
.../resources/ansi-sql-2016-reserved-keywords.txt | 401 +++++++++++++++++++++
.../parser/TableIdentifierParserSuite.scala | 24 +-
4 files changed, 429 insertions(+), 5 deletions(-)
diff --git a/docs/sql-ref-ansi-compliance.md b/docs/sql-ref-ansi-compliance.md
index eab194c..e5ca7e9d 100644
--- a/docs/sql-ref-ansi-compliance.md
+++ b/docs/sql-ref-ansi-compliance.md
@@ -135,7 +135,7 @@ Below is a list of all the keywords in Spark SQL.
|ALTER|non-reserved|non-reserved|reserved|
|ANALYZE|non-reserved|non-reserved|non-reserved|
|AND|reserved|non-reserved|reserved|
-|ANTI|reserved|strict-non-reserved|non-reserved|
+|ANTI|non-reserved|strict-non-reserved|non-reserved|
|ANY|reserved|non-reserved|reserved|
|ARCHIVE|non-reserved|non-reserved|non-reserved|
|ARRAY|non-reserved|non-reserved|reserved|
@@ -264,7 +264,7 @@ Below is a list of all the keywords in Spark SQL.
|MAP|non-reserved|non-reserved|non-reserved|
|MATCHED|non-reserved|non-reserved|non-reserved|
|MERGE|non-reserved|non-reserved|non-reserved|
-|MINUS|reserved|strict-non-reserved|non-reserved|
+|MINUS|not-reserved|strict-non-reserved|non-reserved|
|MINUTE|reserved|non-reserved|reserved|
|MONTH|reserved|non-reserved|reserved|
|MSCK|non-reserved|non-reserved|non-reserved|
@@ -325,7 +325,7 @@ Below is a list of all the keywords in Spark SQL.
|SCHEMA|non-reserved|non-reserved|non-reserved|
|SECOND|reserved|non-reserved|reserved|
|SELECT|reserved|non-reserved|reserved|
-|SEMI|reserved|strict-non-reserved|non-reserved|
+|SEMI|non-reserved|strict-non-reserved|non-reserved|
|SEPARATED|non-reserved|non-reserved|non-reserved|
|SERDE|non-reserved|non-reserved|non-reserved|
|SERDEPROPERTIES|non-reserved|non-reserved|non-reserved|
diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
index 14a6687..5821a74 100644
--- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
+++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
@@ -994,6 +994,7 @@ ansiNonReserved
| AFTER
| ALTER
| ANALYZE
+ | ANTI
| ARCHIVE
| ARRAY
| ASC
@@ -1126,10 +1127,12 @@ ansiNonReserved
| ROW
| ROWS
| SCHEMA
+ | SEMI
| SEPARATED
| SERDE
| SERDEPROPERTIES
| SET
+ | SETMINUS
| SETS
| SHOW
| SKEWED
diff --git a/sql/catalyst/src/test/resources/ansi-sql-2016-reserved-keywords.txt b/sql/catalyst/src/test/resources/ansi-sql-2016-reserved-keywords.txt
new file mode 100644
index 0000000..921491a
--- /dev/null
+++ b/sql/catalyst/src/test/resources/ansi-sql-2016-reserved-keywords.txt
@@ -0,0 +1,401 @@
+-- This file comes from: https://github.com/postgres/postgres/tree/master/doc/src/sgml/keywords
+ABS
+ACOS
+ALL
+ALLOCATE
+ALTER
+AND
+ANY
+ARE
+ARRAY
+ARRAY_AGG
+ARRAY_MAX_CARDINALITY
+AS
+ASENSITIVE
+ASIN
+ASYMMETRIC
+AT
+ATAN
+ATOMIC
+AUTHORIZATION
+AVG
+BEGIN
+BEGIN_FRAME
+BEGIN_PARTITION
+BETWEEN
+BIGINT
+BINARY
+BLOB
+BOOLEAN
+BOTH
+BY
+CALL
+CALLED
+CARDINALITY
+CASCADED
+CASE
+CAST
+CEIL
+CEILING
+CHAR
+CHAR_LENGTH
+CHARACTER
+CHARACTER_LENGTH
+CHECK
+CLASSIFIER
+CLOB
+CLOSE
+COALESCE
+COLLATE
+COLLECT
+COLUMN
+COMMIT
+CONDITION
+CONNECT
+CONSTRAINT
+CONTAINS
+CONVERT
+COPY
+CORR
+CORRESPONDING
+COS
+COSH
+COUNT
+COVAR_POP
+COVAR_SAMP
+CREATE
+CROSS
+CUBE
+CUME_DIST
+CURRENT
+CURRENT_CATALOG
+CURRENT_DATE
+CURRENT_DEFAULT_TRANSFORM_GROUP
+CURRENT_PATH
+CURRENT_ROLE
+CURRENT_ROW
+CURRENT_SCHEMA
+CURRENT_TIME
+CURRENT_TIMESTAMP
+CURRENT_TRANSFORM_GROUP_FOR_TYPE
+CURRENT_USER
+CURSOR
+CYCLE
+DATE
+DAY
+DEALLOCATE
+DEC
+DECIMAL
+DECFLOAT
+DECLARE
+DEFAULT
+DEFINE
+DELETE
+DENSE_RANK
+DEREF
+DESCRIBE
+DETERMINISTIC
+DISCONNECT
+DISTINCT
+DOUBLE
+DROP
+DYNAMIC
+EACH
+ELEMENT
+ELSE
+EMPTY
+END
+END_FRAME
+END_PARTITION
+END-EXEC
+EQUALS
+ESCAPE
+EVERY
+EXCEPT
+EXEC
+EXECUTE
+EXISTS
+EXP
+EXTERNAL
+EXTRACT
+FALSE
+FETCH
+FILTER
+FIRST_VALUE
+FLOAT
+FLOOR
+FOR
+FOREIGN
+FRAME_ROW
+FREE
+FROM
+FULL
+FUNCTION
+FUSION
+GET
+GLOBAL
+GRANT
+GROUP
+GROUPING
+GROUPS
+HAVING
+HOLD
+HOUR
+IDENTITY
+IN
+INDICATOR
+INITIAL
+INNER
+INOUT
+INSENSITIVE
+INSERT
+INT
+INTEGER
+INTERSECT
+INTERSECTION
+INTERVAL
+INTO
+IS
+JOIN
+JSON_ARRAY
+JSON_ARRAYAGG
+JSON_EXISTS
+JSON_OBJECT
+JSON_OBJECTAGG
+JSON_QUERY
+JSON_TABLE
+JSON_TABLE_PRIMITIVE
+JSON_VALUE
+LAG
+LANGUAGE
+LARGE
+LAST_VALUE
+LATERAL
+LEAD
+LEADING
+LEFT
+LIKE
+LIKE_REGEX
+LISTAGG
+LN
+LOCAL
+LOCALTIME
+LOCALTIMESTAMP
+LOG
+LOG10
+LOWER
+MATCH
+MATCH_NUMBER
+MATCH_RECOGNIZE
+MATCHES
+MAX
+MEASURES
+MEMBER
+MERGE
+METHOD
+MIN
+MINUTE
+MOD
+MODIFIES
+MODULE
+MONTH
+MULTISET
+NATIONAL
+NATURAL
+NCHAR
+NCLOB
+NEW
+NO
+NONE
+NORMALIZE
+NOT
+NTH_VALUE
+NTILE
+NULL
+NULLIF
+NUMERIC
+OCTET_LENGTH
+OCCURRENCES_REGEX
+OF
+OFFSET
+OLD
+OMIT
+ON
+ONE
+ONLY
+OPEN
+OR
+ORDER
+OUT
+OUTER
+OVER
+OVERLAPS
+OVERLAY
+PARAMETER
+PARTITION
+PATTERN
+PER
+PERCENT
+PERCENT_RANK
+PERCENTILE_CONT
+PERCENTILE_DISC
+PERIOD
+PERMUTE
+PORTION
+POSITION
+POSITION_REGEX
+POWER
+PRECEDES
+PRECISION
+PREPARE
+PRIMARY
+PROCEDURE
+PTF
+RANGE
+RANK
+READS
+REAL
+RECURSIVE
+REF
+REFERENCES
+REFERENCING
+REGR_AVGX
+REGR_AVGY
+REGR_COUNT
+REGR_INTERCEPT
+REGR_R2
+REGR_SLOPE
+REGR_SXX
+REGR_SXY
+REGR_SYY
+RELEASE
+RESULT
+RETURN
+RETURNS
+REVOKE
+RIGHT
+ROLLBACK
+ROLLUP
+ROW
+ROW_NUMBER
+ROWS
+RUNNING
+SAVEPOINT
+SCOPE
+SCROLL
+SEARCH
+SECOND
+SEEK
+SELECT
+SENSITIVE
+SESSION_USER
+SET
+SHOW
+SIMILAR
+SIN
+SINH
+SKIP
+SMALLINT
+SOME
+SPECIFIC
+SPECIFICTYPE
+SQL
+SQLEXCEPTION
+SQLSTATE
+SQLWARNING
+SQRT
+START
+STATIC
+STDDEV_POP
+STDDEV_SAMP
+SUBMULTISET
+SUBSET
+SUBSTRING
+SUBSTRING_REGEX
+SUCCEEDS
+SUM
+SYMMETRIC
+SYSTEM
+SYSTEM_TIME
+SYSTEM_USER
+TABLE
+TABLESAMPLE
+TAN
+TANH
+THEN
+TIME
+TIMESTAMP
+TIMEZONE_HOUR
+TIMEZONE_MINUTE
+TO
+TRAILING
+TRANSLATE
+TRANSLATE_REGEX
+TRANSLATION
+TREAT
+TRIGGER
+TRIM
+TRIM_ARRAY
+TRUE
+TRUNCATE
+UESCAPE
+UNION
+UNIQUE
+UNKNOWN
+UNMATCHED
+UNNEST
+UPDATE
+UPPER
+USER
+USING
+VALUE
+VALUES
+VALUE_OF
+VAR_POP
+VAR_SAMP
+VARBINARY
+VARCHAR
+VARYING
+VERSIONING
+WHEN
+WHENEVER
+WHERE
+WIDTH_BUCKET
+WINDOW
+WITH
+WITHIN
+WITHOUT
+YEAR
+DATALINK
+DLNEWCOPY
+DLPREVIOUSCOPY
+DLURLCOMPLETE
+DLURLCOMPLETEWRITE
+DLURLCOMPLETEONLY
+DLURLPATH
+DLURLPATHWRITE
+DLURLPATHONLY
+DLURLSCHEME
+DLURLSERVER
+DLVALUE
+IMPORT
+XML
+XMLAGG
+XMLATTRIBUTES
+XMLBINARY
+XMLCAST
+XMLCOMMENT
+XMLCONCAT
+XMLDOCUMENT
+XMLELEMENT
+XMLEXISTS
+XMLFOREST
+XMLITERATE
+XMLNAMESPACES
+XMLPARSE
+XMLPI
+XMLQUERY
+XMLSERIALIZE
+XMLTABLE
+XMLTEXT
+XMLVALIDATE
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala
index 04969e3..04c427d 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala
@@ -16,8 +16,11 @@
*/
package org.apache.spark.sql.catalyst.parser
+import java.io.File
+import java.nio.file.Files
import java.util.Locale
+import scala.collection.JavaConverters._
import scala.collection.mutable
import org.apache.spark.SparkFunSuite
@@ -340,7 +343,12 @@ class TableIdentifierParserSuite extends SparkFunSuite with SQLHelper {
// The case where a symbol has multiple literal definitions,
// e.g., `DATABASES: 'DATABASES' | 'SCHEMAS';`.
if (hasMultipleLiterals) {
- val literals = splitDefs.map(_.replaceAll("'", "").trim).toSeq
+ // Filters out inappropriate entries, e.g., `!` in `NOT: 'NOT' | '!';`
+ val litDef = """([A-Z_]+)""".r
+ val literals = splitDefs.map(_.replaceAll("'", "").trim).toSeq.flatMap {
+ case litDef(lit) => Some(lit)
+ case _ => None
+ }
(symbol, literals) :: Nil
} else {
val literal = literalDef.replaceAll("'", "").trim
@@ -388,12 +396,24 @@ class TableIdentifierParserSuite extends SparkFunSuite with SQLHelper {
val reservedKeywordsInAnsiMode = allCandidateKeywords -- nonReservedKeywordsInAnsiMode
test("check # of reserved keywords") {
- val numReservedKeywords = 78
+ val numReservedKeywords = 74
assert(reservedKeywordsInAnsiMode.size == numReservedKeywords,
s"The expected number of reserved keywords is $numReservedKeywords, but " +
s"${reservedKeywordsInAnsiMode.size} found.")
}
+ test("reserved keywords in Spark are also reserved in SQL 2016") {
+ withTempDir { dir =>
+ val tmpFile = new File(dir, "tmp")
+ val is = Thread.currentThread().getContextClassLoader
+ .getResourceAsStream("ansi-sql-2016-reserved-keywords.txt")
+ Files.copy(is, tmpFile.toPath)
+ val reservedKeywordsInSql2016 = Files.readAllLines(tmpFile.toPath)
+ .asScala.filterNot(_.startsWith("--")).map(_.trim).toSet
+ assert((reservedKeywordsInAnsiMode -- reservedKeywordsInSql2016).isEmpty)
+ }
+ }
+
test("table identifier") {
// Regular names.
assert(TableIdentifier("q") === parseTableIdentifier("q"))
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org