You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2019/03/26 00:43:19 UTC
[spark] branch master updated: [SPARK-27209][SQL] Split parsing of
SELECT and INSERT into two top-level rules in the grammar file.
This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 9cc925c [SPARK-27209][SQL] Split parsing of SELECT and INSERT into two top-level rules in the grammar file.
9cc925c is described below
commit 9cc925cda2c415deca8cf142a2a1774e81fda3fb
Author: Dilip Biswal <db...@us.ibm.com>
AuthorDate: Mon Mar 25 17:43:03 2019 -0700
[SPARK-27209][SQL] Split parsing of SELECT and INSERT into two top-level rules in the grammar file.
## What changes were proposed in this pull request?
Currently in the grammar file the rule `query` is responsible to parse both select and insert statements. As a result, we need to have more semantic checks in the code to guard against in-valid insert constructs in a query. Couple of examples are in the `visitCreateView` and `visitAlterView` functions. One other issue is that, we don't catch the `invalid insert constructs` in all the places until checkAnalysis (the errors we raise can be confusing as well). Here are couple of examples :
```SQL
select * from (insert into bar values (2));
```
```
Error in query: unresolved operator 'Project [*];
'Project [*]
+- SubqueryAlias `__auto_generated_subquery_name`
+- InsertIntoHiveTable `default`.`bar`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, false, false, [c1]
+- Project [cast(col1#18 as int) AS c1#20]
+- LocalRelation [col1#18]
```
```SQL
select * from foo where c1 in (insert into bar values (2))
```
```
Error in query: cannot resolve '(default.foo.`c1` IN (listquery()))' due to data type mismatch:
The number of columns in the left hand side of an IN subquery does not match the
number of columns in the output of subquery.
#columns in left hand side: 1.
#columns in right hand side: 0.
Left side columns:
[default.foo.`c1`].
Right side columns:
[].;;
'Project [*]
+- 'Filter c1#6 IN (list#5 [])
: +- InsertIntoHiveTable `default`.`bar`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, false, false, [c1]
: +- Project [cast(col1#7 as int) AS c1#9]
: +- LocalRelation [col1#7]
+- SubqueryAlias `default`.`foo`
+- HiveTableRelation `default`.`foo`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, [c1#6]
```
For both the cases above, we should reject the syntax at parser level.
In this PR, we create two top-level parser rules to parse `SELECT` and `INSERT` respectively.
I will create a small PR to allow CTEs in DESCRIBE QUERY after this PR is in.
## How was this patch tested?
Added tests to PlanParserSuite and removed the semantic check tests from SparkSqlParserSuites.
Closes #24150 from dilipbiswal/split-query-insert.
Authored-by: Dilip Biswal <db...@us.ibm.com>
Signed-off-by: Wenchen Fan <we...@databricks.com>
---
.../apache/spark/sql/catalyst/parser/SqlBase.g4 | 23 +++++--
.../spark/sql/catalyst/parser/AstBuilder.scala | 77 ++++++++++++++++------
.../sql/catalyst/parser/PlanParserSuite.scala | 49 +++++++++++++-
.../spark/sql/execution/SparkSqlParser.scala | 17 -----
.../spark/sql/execution/SparkSqlParserSuite.scala | 22 -------
5 files changed, 124 insertions(+), 64 deletions(-)
diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
index 1f7da19..78dc60c 100644
--- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
+++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
@@ -81,6 +81,8 @@ singleTableSchema
statement
: query #statementDefault
+ | insertStatement #insertStatementDefault
+ | multiSelectStatement #multiSelectStatementDefault
| USE db=identifier #use
| CREATE database (IF NOT EXISTS)? identifier
(COMMENT comment=STRING)? locationSpec?
@@ -358,9 +360,14 @@ resource
: identifier STRING
;
+insertStatement
+ : (ctes)? insertInto queryTerm queryOrganization #singleInsertQuery
+ | (ctes)? fromClause multiInsertQueryBody+ #multiInsertQuery
+ ;
+
queryNoWith
- : insertInto? queryTerm queryOrganization #singleInsertQuery
- | fromClause multiInsertQueryBody+ #multiInsertQuery
+ : queryTerm queryOrganization #noWithQuery
+ | fromClause selectStatement #queryWithFrom
;
queryOrganization
@@ -373,9 +380,15 @@ queryOrganization
;
multiInsertQueryBody
- : insertInto?
- querySpecification
- queryOrganization
+ : insertInto selectStatement
+ ;
+
+multiSelectStatement
+ : (ctes)? fromClause selectStatement+ #multiSelect
+ ;
+
+selectStatement
+ : querySpecification queryOrganization
;
queryTerm
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 52a5d2c..7164ad2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -112,21 +112,36 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
val query = plan(ctx.queryNoWith)
// Apply CTEs
- query.optional(ctx.ctes) {
- val ctes = ctx.ctes.namedQuery.asScala.map { nCtx =>
- val namedQuery = visitNamedQuery(nCtx)
- (namedQuery.alias, namedQuery)
- }
- // Check for duplicate names.
- checkDuplicateKeys(ctes, ctx)
- With(query, ctes)
+ query.optionalMap(ctx.ctes)(withCTE)
+ }
+
+ private def withCTE(ctx: CtesContext, plan: LogicalPlan): LogicalPlan = {
+ val ctes = ctx.namedQuery.asScala.map { nCtx =>
+ val namedQuery = visitNamedQuery(nCtx)
+ (namedQuery.alias, namedQuery)
}
+ // Check for duplicate names.
+ checkDuplicateKeys(ctes, ctx)
+ With(plan, ctes)
}
override def visitQueryToDesc(ctx: QueryToDescContext): LogicalPlan = withOrigin(ctx) {
plan(ctx.queryTerm).optionalMap(ctx.queryOrganization)(withQueryResultClauses)
}
+ override def visitQueryWithFrom(ctx: QueryWithFromContext): LogicalPlan = withOrigin(ctx) {
+ val from = visitFromClause(ctx.fromClause)
+ validate(ctx.selectStatement.querySpecification.fromClause == null,
+ "Individual select statement can not have FROM cause as its already specified in the" +
+ " outer query block", ctx)
+ withQuerySpecification(ctx.selectStatement.querySpecification, from).
+ optionalMap(ctx.selectStatement.queryOrganization)(withQueryResultClauses)
+ }
+
+ override def visitNoWithQuery(ctx: NoWithQueryContext): LogicalPlan = withOrigin(ctx) {
+ plan(ctx.queryTerm).optionalMap(ctx.queryOrganization)(withQueryResultClauses)
+ }
+
/**
* Create a named logical plan.
*
@@ -156,24 +171,49 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
val from = visitFromClause(ctx.fromClause)
// Build the insert clauses.
- val inserts = ctx.multiInsertQueryBody.asScala.map {
+ val inserts = ctx.multiInsertQueryBody().asScala.map {
body =>
- validate(body.querySpecification.fromClause == null,
+ validate(body.selectStatement.querySpecification.fromClause == null,
"Multi-Insert queries cannot have a FROM clause in their individual SELECT statements",
body)
+ withInsertInto(body.insertInto,
+ withQuerySpecification(body.selectStatement.querySpecification, from).
+ // Add organization statements.
+ optionalMap(body.selectStatement.queryOrganization)(withQueryResultClauses))
+ }
+
+ // If there are multiple INSERTS just UNION them together into one query.
+ val insertPlan = inserts match {
+ case Seq(query) => query
+ case queries => Union(queries)
+ }
+ // Apply CTEs
+ insertPlan.optionalMap(ctx.ctes)(withCTE)
+ }
+
+ override def visitMultiSelect(ctx: MultiSelectContext): LogicalPlan = withOrigin(ctx) {
+ val from = visitFromClause(ctx.fromClause)
+
+ // Build the insert clauses.
+ val selects = ctx.selectStatement.asScala.map {
+ body =>
+ validate(body.querySpecification.fromClause == null,
+ "Multi-select queries cannot have a FROM clause in their individual SELECT statements",
+ body)
+
withQuerySpecification(body.querySpecification, from).
// Add organization statements.
- optionalMap(body.queryOrganization)(withQueryResultClauses).
- // Add insert.
- optionalMap(body.insertInto())(withInsertInto)
+ optionalMap(body.queryOrganization)(withQueryResultClauses)
}
// If there are multiple INSERTS just UNION them together into one query.
- inserts match {
+ val selectUnionPlan = selects match {
case Seq(query) => query
case queries => Union(queries)
}
+ // Apply CTEs
+ selectUnionPlan.optionalMap(ctx.ctes)(withCTE)
}
/**
@@ -181,11 +221,10 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
*/
override def visitSingleInsertQuery(
ctx: SingleInsertQueryContext): LogicalPlan = withOrigin(ctx) {
- plan(ctx.queryTerm).
- // Add organization statements.
- optionalMap(ctx.queryOrganization)(withQueryResultClauses).
- // Add insert.
- optionalMap(ctx.insertInto())(withInsertInto)
+ val insertPlan = withInsertInto(ctx.insertInto(),
+ plan(ctx.queryTerm).optionalMap(ctx.queryOrganization)(withQueryResultClauses))
+ // Apply CTEs
+ insertPlan.optionalMap(ctx.ctes)(withCTE)
}
/**
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
index d628150..9208d93 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
@@ -132,7 +132,11 @@ class PlanParserSuite extends AnalysisTest {
table("a").select(star()).union(table("a").where('s < 10).select(star())))
intercept(
"from a select * select * from x where a.s < 10",
- "Multi-Insert queries cannot have a FROM clause in their individual SELECT statements")
+ "Multi-select queries cannot have a FROM clause in their individual SELECT statements")
+ intercept(
+ "from a select * from b",
+ "Individual select statement can not have FROM cause as its already specified in " +
+ "the outer query block")
assertEqual(
"from a insert into tbl1 select * insert into tbl2 select * where s < 10",
table("a").select(star()).insertInto("tbl1").union(
@@ -754,4 +758,47 @@ class PlanParserSuite extends AnalysisTest {
assertEqual(query2, Distinct(a.union(b)).except(c.intersect(d, isAll = true), isAll = true))
}
}
+
+ test("create/alter view as insert into table") {
+ val m1 = intercept[ParseException] {
+ parsePlan("CREATE VIEW testView AS INSERT INTO jt VALUES(1, 1)")
+ }.getMessage
+ assert(m1.contains("mismatched input 'INSERT' expecting"))
+ // Multi insert query
+ val m2 = intercept[ParseException] {
+ parsePlan(
+ """
+ |CREATE VIEW testView AS FROM jt
+ |INSERT INTO tbl1 SELECT * WHERE jt.id < 5
+ |INSERT INTO tbl2 SELECT * WHERE jt.id > 4
+ """.stripMargin)
+ }.getMessage
+ assert(m2.contains("mismatched input 'INSERT' expecting"))
+ val m3 = intercept[ParseException] {
+ parsePlan("ALTER VIEW testView AS INSERT INTO jt VALUES(1, 1)")
+ }.getMessage
+ assert(m3.contains("mismatched input 'INSERT' expecting"))
+ // Multi insert query
+ val m4 = intercept[ParseException] {
+ parsePlan(
+ """
+ |ALTER VIEW testView AS FROM jt
+ |INSERT INTO tbl1 SELECT * WHERE jt.id < 5
+ |INSERT INTO tbl2 SELECT * WHERE jt.id > 4
+ """.stripMargin
+ )
+ }.getMessage
+ assert(m4.contains("mismatched input 'INSERT' expecting"))
+ }
+
+ test("Invalid insert constructs in the query") {
+ val m1 = intercept[ParseException] {
+ parsePlan("SELECT * FROM (INSERT INTO BAR VALUES (2))")
+ }.getMessage
+ assert(m1.contains("mismatched input 'FROM' expecting"))
+ val m2 = intercept[ParseException] {
+ parsePlan("SELECT * FROM S WHERE C1 IN (INSERT INTO T VALUES (2))")
+ }.getMessage
+ assert(m2.contains("mismatched input 'FROM' expecting"))
+ }
}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
index 735c0a5..8c7b2cb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
@@ -1257,15 +1257,6 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) {
if (ctx.identifierList != null) {
operationNotAllowed("CREATE VIEW ... PARTITIONED ON", ctx)
} else {
- // CREATE VIEW ... AS INSERT INTO is not allowed.
- ctx.query.queryNoWith match {
- case s: SingleInsertQueryContext if s.insertInto != null =>
- operationNotAllowed("CREATE VIEW ... AS INSERT INTO", ctx)
- case _: MultiInsertQueryContext =>
- operationNotAllowed("CREATE VIEW ... AS FROM ... [INSERT INTO ...]+", ctx)
- case _ => // OK
- }
-
val userSpecifiedColumns = Option(ctx.identifierCommentList).toSeq.flatMap { icl =>
icl.identifierComment.asScala.map { ic =>
ic.identifier.getText -> Option(ic.STRING).map(string)
@@ -1302,14 +1293,6 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) {
* }}}
*/
override def visitAlterViewQuery(ctx: AlterViewQueryContext): LogicalPlan = withOrigin(ctx) {
- // ALTER VIEW ... AS INSERT INTO is not allowed.
- ctx.query.queryNoWith match {
- case s: SingleInsertQueryContext if s.insertInto != null =>
- operationNotAllowed("ALTER VIEW ... AS INSERT INTO", ctx)
- case _: MultiInsertQueryContext =>
- operationNotAllowed("ALTER VIEW ... AS FROM ... [INSERT INTO ...]+", ctx)
- case _ => // OK
- }
AlterViewAsCommand(
name = visitTableIdentifier(ctx.tableIdentifier),
originalText = source(ctx.query),
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala
index be3d079..fd60779 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala
@@ -215,17 +215,6 @@ class SparkSqlParserSuite extends AnalysisTest {
"no viable alternative at input")
}
- test("create view as insert into table") {
- // Single insert query
- intercept("CREATE VIEW testView AS INSERT INTO jt VALUES(1, 1)",
- "Operation not allowed: CREATE VIEW ... AS INSERT INTO")
-
- // Multi insert query
- intercept("CREATE VIEW testView AS FROM jt INSERT INTO tbl1 SELECT * WHERE jt.id < 5 " +
- "INSERT INTO tbl2 SELECT * WHERE jt.id > 4",
- "Operation not allowed: CREATE VIEW ... AS FROM ... [INSERT INTO ...]+")
- }
-
test("SPARK-17328 Fix NPE with EXPLAIN DESCRIBE TABLE") {
assertEqual("describe t",
DescribeTableCommand(TableIdentifier("t"), Map.empty, isExtended = false))
@@ -369,17 +358,6 @@ class SparkSqlParserSuite extends AnalysisTest {
Project(UnresolvedAlias(concat) :: Nil, UnresolvedRelation(TableIdentifier("t"))))
}
- test("SPARK-25046 Fix Alter View ... As Insert Into Table") {
- // Single insert query
- intercept("ALTER VIEW testView AS INSERT INTO jt VALUES(1, 1)",
- "Operation not allowed: ALTER VIEW ... AS INSERT INTO")
-
- // Multi insert query
- intercept("ALTER VIEW testView AS FROM jt INSERT INTO tbl1 SELECT * WHERE jt.id < 5 " +
- "INSERT INTO tbl2 SELECT * WHERE jt.id > 4",
- "Operation not allowed: ALTER VIEW ... AS FROM ... [INSERT INTO ...]+")
- }
-
test("database and schema tokens are interchangeable") {
assertEqual("CREATE DATABASE foo", parser.parsePlan("CREATE SCHEMA foo"))
assertEqual("DROP DATABASE foo", parser.parsePlan("DROP SCHEMA foo"))
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org