You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by li...@apache.org on 2017/10/04 20:13:54 UTC
spark git commit: [SPARK-22169][SQL] support byte length literal as
identifier
Repository: spark
Updated Branches:
refs/heads/master 4a779bdac -> bb035f1ee
[SPARK-22169][SQL] support byte length literal as identifier
## What changes were proposed in this pull request?
By definition the table name in Spark can be something like `123x`, `25a`, etc., with exceptions for literals like `12L`, `23BD`, etc. However, Spark SQL has a special byte length literal, which stops users to use digits followed by `b`, `k`, `m`, `g` as identifiers.
byte length literal is not a standard sql literal and is only used in the `tableSample` parser rule. This PR move the parsing of byte length literal from lexer to parser, so that users can use it as identifiers.
## How was this patch tested?
regression test
Author: Wenchen Fan <we...@databricks.com>
Closes #19392 from cloud-fan/parser-bug.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/bb035f1e
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/bb035f1e
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/bb035f1e
Branch: refs/heads/master
Commit: bb035f1ee5cdf88e476b7ed83d59140d669fbe12
Parents: 4a779bd
Author: Wenchen Fan <we...@databricks.com>
Authored: Wed Oct 4 13:13:51 2017 -0700
Committer: gatorsmile <ga...@gmail.com>
Committed: Wed Oct 4 13:13:51 2017 -0700
----------------------------------------------------------------------
.../apache/spark/sql/catalyst/parser/SqlBase.g4 | 25 ++++++++-----------
.../sql/catalyst/catalog/SessionCatalog.scala | 2 +-
.../spark/sql/catalyst/parser/AstBuilder.scala | 26 ++++++++++++++------
.../sql/catalyst/parser/PlanParserSuite.scala | 1 +
.../sql/execution/command/DDLParserSuite.scala | 19 ++++++++++++++
5 files changed, 49 insertions(+), 24 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/bb035f1e/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
index d0a5428..17c8404 100644
--- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
+++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
@@ -25,7 +25,7 @@ grammar SqlBase;
* For char stream "2.3", "2." is not a valid decimal token, because it is followed by digit '3'.
* For char stream "2.3_", "2.3" is not a valid decimal token, because it is followed by '_'.
* For char stream "2.3W", "2.3" is not a valid decimal token, because it is followed by 'W'.
- * For char stream "12.0D 34.E2+0.12 " 12.0D is a valid decimal token because it is folllowed
+ * For char stream "12.0D 34.E2+0.12 " 12.0D is a valid decimal token because it is followed
* by a space. 34.E2 is a valid decimal token because it is followed by symbol '+'
* which is not a digit or letter or underscore.
*/
@@ -40,10 +40,6 @@ grammar SqlBase;
}
}
-tokens {
- DELIMITER
-}
-
singleStatement
: statement EOF
;
@@ -447,12 +443,15 @@ joinCriteria
;
sample
- : TABLESAMPLE '('
- ( (negativeSign=MINUS? percentage=(INTEGER_VALUE | DECIMAL_VALUE) sampleType=PERCENTLIT)
- | (expression sampleType=ROWS)
- | sampleType=BYTELENGTH_LITERAL
- | (sampleType=BUCKET numerator=INTEGER_VALUE OUT OF denominator=INTEGER_VALUE (ON (identifier | qualifiedName '(' ')'))?))
- ')'
+ : TABLESAMPLE '(' sampleMethod? ')'
+ ;
+
+sampleMethod
+ : negativeSign=MINUS? percentage=(INTEGER_VALUE | DECIMAL_VALUE) PERCENTLIT #sampleByPercentile
+ | expression ROWS #sampleByRows
+ | sampleType=BUCKET numerator=INTEGER_VALUE OUT OF denominator=INTEGER_VALUE
+ (ON (identifier | qualifiedName '(' ')'))? #sampleByBucket
+ | bytes=expression #sampleByBytes
;
identifierList
@@ -1004,10 +1003,6 @@ TINYINT_LITERAL
: DIGIT+ 'Y'
;
-BYTELENGTH_LITERAL
- : DIGIT+ ('B' | 'K' | 'M' | 'G')
- ;
-
INTEGER_VALUE
: DIGIT+
;
http://git-wip-us.apache.org/repos/asf/spark/blob/bb035f1e/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index 6ba9ee5..95bc3d6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -99,7 +99,7 @@ class SessionCatalog(
protected var currentDb: String = formatDatabaseName(DEFAULT_DATABASE)
/**
- * Checks if the given name conforms the Hive standard ("[a-zA-z_0-9]+"),
+ * Checks if the given name conforms the Hive standard ("[a-zA-Z_0-9]+"),
* i.e. if this name only contains characters, numbers, and _.
*
* This method is intended to have the same behavior of
http://git-wip-us.apache.org/repos/asf/spark/blob/bb035f1e/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 85b492e..ce36714 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -699,20 +699,30 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
Sample(0.0, fraction, withReplacement = false, (math.random * 1000).toInt, query)
}
- ctx.sampleType.getType match {
- case SqlBaseParser.ROWS =>
+ if (ctx.sampleMethod() == null) {
+ throw new ParseException("TABLESAMPLE does not accept empty inputs.", ctx)
+ }
+
+ ctx.sampleMethod() match {
+ case ctx: SampleByRowsContext =>
Limit(expression(ctx.expression), query)
- case SqlBaseParser.PERCENTLIT =>
+ case ctx: SampleByPercentileContext =>
val fraction = ctx.percentage.getText.toDouble
val sign = if (ctx.negativeSign == null) 1 else -1
sample(sign * fraction / 100.0d)
- case SqlBaseParser.BYTELENGTH_LITERAL =>
- throw new ParseException(
- "TABLESAMPLE(byteLengthLiteral) is not supported", ctx)
+ case ctx: SampleByBytesContext =>
+ val bytesStr = ctx.bytes.getText
+ if (bytesStr.matches("[0-9]+[bBkKmMgG]")) {
+ throw new ParseException("TABLESAMPLE(byteLengthLiteral) is not supported", ctx)
+ } else {
+ throw new ParseException(
+ bytesStr + " is not a valid byte length literal, " +
+ "expected syntax: DIGIT+ ('B' | 'K' | 'M' | 'G')", ctx)
+ }
- case SqlBaseParser.BUCKET if ctx.ON != null =>
+ case ctx: SampleByBucketContext if ctx.ON() != null =>
if (ctx.identifier != null) {
throw new ParseException(
"TABLESAMPLE(BUCKET x OUT OF y ON colname) is not supported", ctx)
@@ -721,7 +731,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
"TABLESAMPLE(BUCKET x OUT OF y ON function) is not supported", ctx)
}
- case SqlBaseParser.BUCKET =>
+ case ctx: SampleByBucketContext =>
sample(ctx.numerator.getText.toDouble / ctx.denominator.getText.toDouble)
}
}
http://git-wip-us.apache.org/repos/asf/spark/blob/bb035f1e/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
index 306e6f2..d34a83c 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
@@ -110,6 +110,7 @@ class PlanParserSuite extends AnalysisTest {
assertEqual("select distinct a, b from db.c", Distinct(table("db", "c").select('a, 'b)))
assertEqual("select all a, b from db.c", table("db", "c").select('a, 'b))
assertEqual("select from tbl", OneRowRelation().select('from.as("tbl")))
+ assertEqual("select a from 1k.2m", table("1k", "2m").select('a))
}
test("reverse select query") {
http://git-wip-us.apache.org/repos/asf/spark/blob/bb035f1e/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala
index fa5172c..eb7c335 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala
@@ -525,6 +525,25 @@ class DDLParserSuite extends PlanTest with SharedSQLContext {
assert(e.message.contains("you can only specify one of them."))
}
+ test("create table - byte length literal table name") {
+ val sql = "CREATE TABLE 1m.2g(a INT) USING parquet"
+
+ val expectedTableDesc = CatalogTable(
+ identifier = TableIdentifier("2g", Some("1m")),
+ tableType = CatalogTableType.MANAGED,
+ storage = CatalogStorageFormat.empty,
+ schema = new StructType().add("a", IntegerType),
+ provider = Some("parquet"))
+
+ parser.parsePlan(sql) match {
+ case CreateTable(tableDesc, _, None) =>
+ assert(tableDesc == expectedTableDesc.copy(createTime = tableDesc.createTime))
+ case other =>
+ fail(s"Expected to parse ${classOf[CreateTableCommand].getClass.getName} from query," +
+ s"got ${other.getClass.getName}: $sql")
+ }
+ }
+
test("insert overwrite directory") {
val v1 = "INSERT OVERWRITE DIRECTORY '/tmp/file' USING parquet SELECT 1 as a"
parser.parsePlan(v1) match {
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org