You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ya...@apache.org on 2019/03/13 02:21:03 UTC
[spark] branch master updated: [SPARK-26976][SQL] Forbid reserved
keywords as identifiers when ANSI mode is on
This is an automated email from the ASF dual-hosted git repository.
yamamuro pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 1e9469b [SPARK-26976][SQL] Forbid reserved keywords as identifiers when ANSI mode is on
1e9469b is described below
commit 1e9469bb7a71b06d610edaaebca933f4219a6eb3
Author: Takeshi Yamamuro <ya...@apache.org>
AuthorDate: Wed Mar 13 11:20:27 2019 +0900
[SPARK-26976][SQL] Forbid reserved keywords as identifiers when ANSI mode is on
## What changes were proposed in this pull request?
This pr added code to forbid reserved keywords as identifiers when ANSI mode is on.
This is a follow-up of SPARK-26215(#23259).
## How was this patch tested?
Added tests in `TableIdentifierParserSuite`.
Closes #23880 from maropu/SPARK-26976.
Authored-by: Takeshi Yamamuro <ya...@apache.org>
Signed-off-by: Takeshi Yamamuro <ya...@apache.org>
---
.../apache/spark/sql/catalyst/parser/SqlBase.g4 | 84 ---
.../parser/TableIdentifierParserSuite.scala | 650 ++++++++++++++++++++-
2 files changed, 649 insertions(+), 85 deletions(-)
diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
index c61cda8..d11c28c 100644
--- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
+++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
@@ -736,7 +736,6 @@ qualifiedName
identifier
: strictIdentifier
- | {ansi}? ansiReserved
| {!ansi}? defaultReserved
;
@@ -761,89 +760,6 @@ number
| MINUS? BIGDECIMAL_LITERAL #bigDecimalLiteral
;
-// NOTE: You must follow a rule below when you add a new ANTLR token in this file:
-// - All the ANTLR tokens = UNION(`ansiReserved`, `ansiNonReserved`) = UNION(`defaultReserved`, `nonReserved`)
-//
-// Let's say you add a new token `NEWTOKEN` and this is not reserved regardless of a `spark.sql.parser.ansi.enabled`
-// value. In this case, you must add a token `NEWTOKEN` in both `ansiNonReserved` and `nonReserved`.
-//
-// It is recommended to list them in alphabetical order.
-
-// The list of the reserved keywords when `spark.sql.parser.ansi.enabled` is true. Currently, we only reserve
-// the ANSI keywords that almost all the ANSI SQL standards (SQL-92, SQL-99, SQL-2003, SQL-2008, SQL-2011,
-// and SQL-2016) and PostgreSQL reserve.
-ansiReserved
- : ALL
- | AND
- | ANTI
- | ANY
- | AS
- | AUTHORIZATION
- | BOTH
- | CASE
- | CAST
- | CHECK
- | COLLATE
- | COLUMN
- | CONSTRAINT
- | CREATE
- | CROSS
- | CURRENT_DATE
- | CURRENT_TIME
- | CURRENT_TIMESTAMP
- | CURRENT_USER
- | DISTINCT
- | ELSE
- | END
- | EXCEPT
- | FALSE
- | FETCH
- | FOR
- | FOREIGN
- | FROM
- | FULL
- | GRANT
- | GROUP
- | HAVING
- | IN
- | INNER
- | INTERSECT
- | INTO
- | IS
- | JOIN
- | LEADING
- | LEFT
- | NATURAL
- | NOT
- | NULL
- | ON
- | ONLY
- | OR
- | ORDER
- | OUTER
- | OVERLAPS
- | PRIMARY
- | REFERENCES
- | RIGHT
- | SELECT
- | SEMI
- | SESSION_USER
- | SETMINUS
- | SOME
- | TABLE
- | THEN
- | TO
- | TRAILING
- | UNION
- | UNIQUE
- | USER
- | USING
- | WHEN
- | WHERE
- | WITH
- ;
-
-
// The list of the non-reserved keywords when `spark.sql.parser.ansi.enabled` is true.
ansiNonReserved
: ADD
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala
index 3d41c27..2725deb 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala
@@ -18,8 +18,10 @@ package org.apache.spark.sql.catalyst.parser
import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.catalyst.plans.SQLHelper
+import org.apache.spark.sql.internal.SQLConf
-class TableIdentifierParserSuite extends SparkFunSuite {
+class TableIdentifierParserSuite extends SparkFunSuite with SQLHelper {
import CatalystSqlParser._
// Add "$elem$", "$value$" & "$key$"
@@ -281,6 +283,635 @@ class TableIdentifierParserSuite extends SparkFunSuite {
"where",
"with")
+ // All the keywords in `docs/sql-reserved-and-non-reserved-key-words.md` are listed below:
+ val allCandidateKeywords = Set(
+ "abs",
+ "absolute",
+ "acos",
+ "action",
+ "add",
+ "after",
+ "all",
+ "allocate",
+ "alter",
+ "analyze",
+ "and",
+ "anti",
+ "any",
+ "archive",
+ "are",
+ "array",
+ "array_agg",
+ "array_max_cardinality",
+ "as",
+ "asc",
+ "asensitive",
+ "asin",
+ "assertion",
+ "asymmetric",
+ "at",
+ "atan",
+ "atomic",
+ "authorization",
+ "avg",
+ "before",
+ "begin",
+ "begin_frame",
+ "begin_partition",
+ "between",
+ "bigint",
+ "binary",
+ "bit",
+ "bit_length",
+ "blob",
+ "boolean",
+ "both",
+ "breadth",
+ "bucket",
+ "buckets",
+ "by",
+ "cache",
+ "call",
+ "called",
+ "cardinality",
+ "cascade",
+ "cascaded",
+ "case",
+ "cast",
+ "catalog",
+ "ceil",
+ "ceiling",
+ "change",
+ "char",
+ "char_length",
+ "character",
+ "character_length",
+ "check",
+ "classifier",
+ "clear",
+ "clob",
+ "close",
+ "cluster",
+ "clustered",
+ "coalesce",
+ "codegen",
+ "collate",
+ "collation",
+ "collect",
+ "collection",
+ "column",
+ "columns",
+ "comment",
+ "commit",
+ "compact",
+ "compactions",
+ "compute",
+ "concatenate",
+ "condition",
+ "connect",
+ "connection",
+ "constraint",
+ "constraints",
+ "constructor",
+ "contains",
+ "continue",
+ "convert",
+ "copy",
+ "corr",
+ "corresponding",
+ "cos",
+ "cosh",
+ "cost",
+ "count",
+ "covar_pop",
+ "covar_samp",
+ "create",
+ "cross",
+ "cube",
+ "cume_dist",
+ "current",
+ "current_catalog",
+ "current_date",
+ "current_default_transform_group",
+ "current_path",
+ "current_role",
+ "current_row",
+ "current_schema",
+ "current_time",
+ "current_timestamp",
+ "current_transform_group_for_type",
+ "current_user",
+ "cursor",
+ "cycle",
+ "data",
+ "database",
+ "databases",
+ "date",
+ "day",
+ "dbproperties",
+ "deallocate",
+ "dec",
+ "decfloat",
+ "decimal",
+ "declare",
+ "default",
+ "deferrable",
+ "deferred",
+ "define",
+ "defined",
+ "delete",
+ "delimited",
+ "dense_rank",
+ "depth",
+ "deref",
+ "desc",
+ "describe",
+ "descriptor",
+ "deterministic",
+ "dfs",
+ "diagnostics",
+ "directories",
+ "directory",
+ "disconnect",
+ "distinct",
+ "distribute",
+ "div",
+ "do",
+ "domain",
+ "double",
+ "drop",
+ "dynamic",
+ "each",
+ "element",
+ "else",
+ "elseif",
+ "empty",
+ "end",
+ "end_frame",
+ "end_partition",
+ "equals",
+ "escape",
+ "escaped",
+ "every",
+ "except",
+ "exception",
+ "exchange",
+ "exec",
+ "execute",
+ "exists",
+ "exit",
+ "exp",
+ "explain",
+ "export",
+ "extended",
+ "external",
+ "extract",
+ "false",
+ "fetch",
+ "fields",
+ "fileformat",
+ "filter",
+ "first",
+ "first_value",
+ "float",
+ "following",
+ "for",
+ "foreign",
+ "format",
+ "formatted",
+ "found",
+ "frame_row",
+ "free",
+ "from",
+ "full",
+ "function",
+ "functions",
+ "fusion",
+ "general",
+ "get",
+ "global",
+ "go",
+ "goto",
+ "grant",
+ "group",
+ "grouping",
+ "groups",
+ "handler",
+ "having",
+ "hold",
+ "hour",
+ "identity",
+ "if",
+ "ignore",
+ "immediate",
+ "import",
+ "in",
+ "index",
+ "indexes",
+ "indicator",
+ "initial",
+ "initially",
+ "inner",
+ "inout",
+ "inpath",
+ "input",
+ "inputformat",
+ "insensitive",
+ "insert",
+ "int",
+ "integer",
+ "intersect",
+ "intersection",
+ "interval",
+ "into",
+ "is",
+ "isolation",
+ "items",
+ "iterate",
+ "join",
+ "json_array",
+ "json_arrayagg",
+ "json_exists",
+ "json_object",
+ "json_objectagg",
+ "json_query",
+ "json_table",
+ "json_table_primitive",
+ "json_value",
+ "key",
+ "keys",
+ "lag",
+ "language",
+ "large",
+ "last",
+ "last_value",
+ "lateral",
+ "lazy",
+ "lead",
+ "leading",
+ "leave",
+ "left",
+ "level",
+ "like",
+ "like_regex",
+ "limit",
+ "lines",
+ "list",
+ "listagg",
+ "ln",
+ "load",
+ "local",
+ "localtime",
+ "localtimestamp",
+ "location",
+ "locator",
+ "lock",
+ "locks",
+ "log",
+ "log10",
+ "logical",
+ "loop",
+ "lower",
+ "macro",
+ "map",
+ "match",
+ "match_number",
+ "match_recognize",
+ "matches",
+ "max",
+ "member",
+ "merge",
+ "method",
+ "min",
+ "minus",
+ "minute",
+ "mod",
+ "modifies",
+ "module",
+ "month",
+ "msck",
+ "multiset",
+ "names",
+ "national",
+ "natural",
+ "nchar",
+ "nclob",
+ "new",
+ "next",
+ "no",
+ "none",
+ "normalize",
+ "not",
+ "nth_value",
+ "ntile",
+ "null",
+ "nullif",
+ "nulls",
+ "numeric",
+ "object",
+ "occurrences_regex",
+ "octet_length",
+ "of",
+ "offset",
+ "old",
+ "omit",
+ "on",
+ "one",
+ "only",
+ "open",
+ "option",
+ "options",
+ "or",
+ "order",
+ "ordinality",
+ "out",
+ "outer",
+ "output",
+ "outputformat",
+ "over",
+ "overlaps",
+ "overlay",
+ "overwrite",
+ "pad",
+ "parameter",
+ "partial",
+ "partition",
+ "partitioned",
+ "partitions",
+ "path",
+ "pattern",
+ "per",
+ "percent",
+ "percent_rank",
+ "percentile_cont",
+ "percentile_disc",
+ "percentlit",
+ "period",
+ "pivot",
+ "portion",
+ "power",
+ "precedes",
+ "preceding",
+ "precision",
+ "prepare",
+ "preserve",
+ "primary",
+ "principals",
+ "prior",
+ "privileges",
+ "procedure",
+ "ptf",
+ "public",
+ "purge",
+ "range",
+ "rank",
+ "read",
+ "reads",
+ "real",
+ "recordreader",
+ "recordwriter",
+ "recover",
+ "recursive",
+ "reduce",
+ "ref",
+ "references",
+ "referencing",
+ "refresh",
+ "regr_avgx",
+ "regr_avgy",
+ "regr_count",
+ "regr_intercept",
+ "regr_r2",
+ "regr_slope",
+ "regr_sxx",
+ "regr_sxy",
+ "regr_syy",
+ "relative",
+ "release",
+ "rename",
+ "repair",
+ "repeat",
+ "replace",
+ "reset",
+ "resignal",
+ "restrict",
+ "result",
+ "return",
+ "returns",
+ "revoke",
+ "right",
+ "rlike",
+ "role",
+ "roles",
+ "rollback",
+ "rollup",
+ "routine",
+ "row",
+ "row_number",
+ "rows",
+ "running",
+ "savepoint",
+ "schema",
+ "scope",
+ "scroll",
+ "search",
+ "second",
+ "section",
+ "seek",
+ "select",
+ "semi",
+ "sensitive",
+ "separated",
+ "serde",
+ "serdeproperties",
+ "session",
+ "session_user",
+ "set",
+ "sets",
+ "show",
+ "signal",
+ "similar",
+ "sin",
+ "sinh",
+ "size",
+ "skewed",
+ "skip",
+ "smallint",
+ "some",
+ "sort",
+ "sorted",
+ "space",
+ "specific",
+ "specifictype",
+ "sql",
+ "sqlcode",
+ "sqlerror",
+ "sqlexception",
+ "sqlstate",
+ "sqlwarning",
+ "sqrt",
+ "start",
+ "state",
+ "static",
+ "statistics",
+ "stddev_pop",
+ "stddev_samp",
+ "stored",
+ "stratify",
+ "struct",
+ "submultiset",
+ "subset",
+ "substring",
+ "substring_regex",
+ "succeeds",
+ "sum",
+ "symmetric",
+ "system",
+ "system_time",
+ "system_user",
+ "table",
+ "tables",
+ "tablesample",
+ "tan",
+ "tanh",
+ "tblproperties",
+ "temporary",
+ "terminated",
+ "then",
+ "time",
+ "timestamp",
+ "timezone_hour",
+ "timezone_minute",
+ "to",
+ "touch",
+ "trailing",
+ "transaction",
+ "transactions",
+ "transform",
+ "translate",
+ "translate_regex",
+ "translation",
+ "treat",
+ "trigger",
+ "trim",
+ "trim_array",
+ "true",
+ "truncate",
+ "uescape",
+ "unarchive",
+ "unbounded",
+ "uncache",
+ "under",
+ "undo",
+ "union",
+ "unique",
+ "unknown",
+ "unlock",
+ "unnest",
+ "unset",
+ "until",
+ "update",
+ "upper",
+ "usage",
+ "use",
+ "user",
+ "using",
+ "value",
+ "value_of",
+ "values",
+ "var_pop",
+ "var_samp",
+ "varbinary",
+ "varchar",
+ "varying",
+ "versioning",
+ "view",
+ "when",
+ "whenever",
+ "where",
+ "while",
+ "width_bucket",
+ "window",
+ "with",
+ "within",
+ "without",
+ "work",
+ "write",
+ "year",
+ "zone")
+
+ val reservedKeywordsInAnsiMode = Set(
+ "all",
+ "and",
+ "anti",
+ "any",
+ "as",
+ "authorization",
+ "both",
+ "case",
+ "cast",
+ "check",
+ "collate",
+ "column",
+ "constraint",
+ "create",
+ "cross",
+ "current_date",
+ "current_time",
+ "current_timestamp",
+ "current_user",
+ "distinct",
+ "else",
+ "end",
+ "except",
+ "false",
+ "fetch",
+ "for",
+ "foreign",
+ "from",
+ "full",
+ "grant",
+ "group",
+ "having",
+ "in",
+ "inner",
+ "intersect",
+ "into",
+ "join",
+ "is",
+ "leading",
+ "left",
+ "natural",
+ "not",
+ "null",
+ "on",
+ "only",
+ "or",
+ "order",
+ "outer",
+ "overlaps",
+ "primary",
+ "references",
+ "right",
+ "select",
+ "semi",
+ "session_user",
+ "minus",
+ "some",
+ "table",
+ "then",
+ "to",
+ "trailing",
+ "union",
+ "unique",
+ "user",
+ "using",
+ "when",
+ "where",
+ "with")
+
+ val nonReservedKeywordsInAnsiMode = allCandidateKeywords -- reservedKeywordsInAnsiMode
+
test("table identifier") {
// Regular names.
assert(TableIdentifier("q") === parseTableIdentifier("q"))
@@ -300,6 +931,23 @@ class TableIdentifierParserSuite extends SparkFunSuite {
assert(TableIdentifier("x.y.z", None) === parseTableIdentifier("`x.y.z`"))
}
+ test("table identifier - reserved/non-reserved keywords if ANSI mode enabled") {
+ withSQLConf(SQLConf.ANSI_SQL_PARSER.key -> "true") {
+ reservedKeywordsInAnsiMode.foreach { keyword =>
+ val errMsg = intercept[ParseException] {
+ parseTableIdentifier(keyword)
+ }.getMessage
+ assert(errMsg.contains("no viable alternative at input"))
+ assert(TableIdentifier(keyword) === parseTableIdentifier(s"`$keyword`"))
+ assert(TableIdentifier(keyword, Option("db")) === parseTableIdentifier(s"db.`$keyword`"))
+ }
+ nonReservedKeywordsInAnsiMode.foreach { keyword =>
+ assert(TableIdentifier(keyword) === parseTableIdentifier(s"$keyword"))
+ assert(TableIdentifier(keyword, Option("db")) === parseTableIdentifier(s"db.$keyword"))
+ }
+ }
+ }
+
test("table identifier - strict keywords") {
// SQL Keywords.
hiveStrictNonReservedKeyword.foreach { keyword =>
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org