You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2019/03/26 00:43:19 UTC

[spark] branch master updated: [SPARK-27209][SQL] Split parsing of SELECT and INSERT into two top-level rules in the grammar file.

This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 9cc925c  [SPARK-27209][SQL] Split parsing of SELECT and INSERT into two top-level rules in the grammar file.
9cc925c is described below

commit 9cc925cda2c415deca8cf142a2a1774e81fda3fb
Author: Dilip Biswal <db...@us.ibm.com>
AuthorDate: Mon Mar 25 17:43:03 2019 -0700

    [SPARK-27209][SQL] Split parsing of SELECT and INSERT into two top-level rules in the grammar file.
    
    ## What changes were proposed in this pull request?
    Currently in the grammar file the rule `query` is responsible to parse both select and insert statements. As a result, we need to have more semantic checks in the code to guard against in-valid insert constructs in a query. Couple of examples are in the `visitCreateView` and `visitAlterView` functions. One other issue is that, we don't catch the `invalid insert constructs` in all the places until checkAnalysis (the errors we raise can be confusing as well). Here are couple of examples :
    
    ```SQL
    select * from (insert into bar values (2));
    ```
    ```
    Error in query: unresolved operator 'Project [*];
    'Project [*]
    +- SubqueryAlias `__auto_generated_subquery_name`
       +- InsertIntoHiveTable `default`.`bar`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, false, false, [c1]
          +- Project [cast(col1#18 as int) AS c1#20]
             +- LocalRelation [col1#18]
    ```
    
    ```SQL
    select * from foo where c1 in (insert into bar values (2))
    ```
    ```
    Error in query: cannot resolve '(default.foo.`c1` IN (listquery()))' due to data type mismatch:
    The number of columns in the left hand side of an IN subquery does not match the
    number of columns in the output of subquery.
    #columns in left hand side: 1.
    #columns in right hand side: 0.
    
    Left side columns:
    [default.foo.`c1`].
    Right side columns:
    [].;;
    'Project [*]
    +- 'Filter c1#6 IN (list#5 [])
       :  +- InsertIntoHiveTable `default`.`bar`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, false, false, [c1]
       :     +- Project [cast(col1#7 as int) AS c1#9]
       :        +- LocalRelation [col1#7]
       +- SubqueryAlias `default`.`foo`
          +- HiveTableRelation `default`.`foo`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, [c1#6]
    ```
    
    For both the cases above, we should reject the syntax at parser level.
    
    In this PR, we create two top-level parser rules to parse `SELECT` and `INSERT` respectively.
    I will create a small PR to allow CTEs in DESCRIBE QUERY after this PR is in.
    ## How was this patch tested?
    Added tests to PlanParserSuite and removed the semantic check tests from SparkSqlParserSuites.
    
    Closes #24150 from dilipbiswal/split-query-insert.
    
    Authored-by: Dilip Biswal <db...@us.ibm.com>
    Signed-off-by: Wenchen Fan <we...@databricks.com>
---
 .../apache/spark/sql/catalyst/parser/SqlBase.g4    | 23 +++++--
 .../spark/sql/catalyst/parser/AstBuilder.scala     | 77 ++++++++++++++++------
 .../sql/catalyst/parser/PlanParserSuite.scala      | 49 +++++++++++++-
 .../spark/sql/execution/SparkSqlParser.scala       | 17 -----
 .../spark/sql/execution/SparkSqlParserSuite.scala  | 22 -------
 5 files changed, 124 insertions(+), 64 deletions(-)

diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
index 1f7da19..78dc60c 100644
--- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
+++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
@@ -81,6 +81,8 @@ singleTableSchema
 
 statement
     : query                                                            #statementDefault
+    | insertStatement                                                  #insertStatementDefault
+    | multiSelectStatement                                             #multiSelectStatementDefault
     | USE db=identifier                                                #use
     | CREATE database (IF NOT EXISTS)? identifier
         (COMMENT comment=STRING)? locationSpec?
@@ -358,9 +360,14 @@ resource
     : identifier STRING
     ;
 
+insertStatement
+    : (ctes)? insertInto queryTerm queryOrganization                                       #singleInsertQuery
+    | (ctes)? fromClause multiInsertQueryBody+                                             #multiInsertQuery
+    ;
+
 queryNoWith
-    : insertInto? queryTerm queryOrganization                                              #singleInsertQuery
-    | fromClause multiInsertQueryBody+                                                     #multiInsertQuery
+    : queryTerm queryOrganization                                               #noWithQuery
+    | fromClause selectStatement                                                #queryWithFrom
     ;
 
 queryOrganization
@@ -373,9 +380,15 @@ queryOrganization
     ;
 
 multiInsertQueryBody
-    : insertInto?
-      querySpecification
-      queryOrganization
+    : insertInto selectStatement
+    ;
+
+multiSelectStatement
+    : (ctes)? fromClause selectStatement+                                                #multiSelect
+    ;
+
+selectStatement
+    : querySpecification queryOrganization
     ;
 
 queryTerm
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 52a5d2c..7164ad2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -112,21 +112,36 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
     val query = plan(ctx.queryNoWith)
 
     // Apply CTEs
-    query.optional(ctx.ctes) {
-      val ctes = ctx.ctes.namedQuery.asScala.map { nCtx =>
-        val namedQuery = visitNamedQuery(nCtx)
-        (namedQuery.alias, namedQuery)
-      }
-      // Check for duplicate names.
-      checkDuplicateKeys(ctes, ctx)
-      With(query, ctes)
+    query.optionalMap(ctx.ctes)(withCTE)
+  }
+
+  private def withCTE(ctx: CtesContext, plan: LogicalPlan): LogicalPlan = {
+    val ctes = ctx.namedQuery.asScala.map { nCtx =>
+      val namedQuery = visitNamedQuery(nCtx)
+      (namedQuery.alias, namedQuery)
     }
+    // Check for duplicate names.
+    checkDuplicateKeys(ctes, ctx)
+    With(plan, ctes)
   }
 
   override def visitQueryToDesc(ctx: QueryToDescContext): LogicalPlan = withOrigin(ctx) {
     plan(ctx.queryTerm).optionalMap(ctx.queryOrganization)(withQueryResultClauses)
   }
 
+  override def visitQueryWithFrom(ctx: QueryWithFromContext): LogicalPlan = withOrigin(ctx) {
+    val from = visitFromClause(ctx.fromClause)
+    validate(ctx.selectStatement.querySpecification.fromClause == null,
+      "Individual select statement can not have FROM cause as its already specified in the" +
+        " outer query block", ctx)
+    withQuerySpecification(ctx.selectStatement.querySpecification, from).
+      optionalMap(ctx.selectStatement.queryOrganization)(withQueryResultClauses)
+  }
+
+  override def visitNoWithQuery(ctx: NoWithQueryContext): LogicalPlan = withOrigin(ctx) {
+    plan(ctx.queryTerm).optionalMap(ctx.queryOrganization)(withQueryResultClauses)
+  }
+
   /**
    * Create a named logical plan.
    *
@@ -156,24 +171,49 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
     val from = visitFromClause(ctx.fromClause)
 
     // Build the insert clauses.
-    val inserts = ctx.multiInsertQueryBody.asScala.map {
+    val inserts = ctx.multiInsertQueryBody().asScala.map {
       body =>
-        validate(body.querySpecification.fromClause == null,
+        validate(body.selectStatement.querySpecification.fromClause == null,
           "Multi-Insert queries cannot have a FROM clause in their individual SELECT statements",
           body)
 
+       withInsertInto(body.insertInto,
+         withQuerySpecification(body.selectStatement.querySpecification, from).
+           // Add organization statements.
+           optionalMap(body.selectStatement.queryOrganization)(withQueryResultClauses))
+    }
+
+    // If there are multiple INSERTS just UNION them together into one query.
+    val insertPlan = inserts match {
+      case Seq(query) => query
+      case queries => Union(queries)
+    }
+    // Apply CTEs
+    insertPlan.optionalMap(ctx.ctes)(withCTE)
+  }
+
+  override def visitMultiSelect(ctx: MultiSelectContext): LogicalPlan = withOrigin(ctx) {
+    val from = visitFromClause(ctx.fromClause)
+
+    // Build the insert clauses.
+    val selects = ctx.selectStatement.asScala.map {
+      body =>
+        validate(body.querySpecification.fromClause == null,
+          "Multi-select queries cannot have a FROM clause in their individual SELECT statements",
+          body)
+
         withQuerySpecification(body.querySpecification, from).
           // Add organization statements.
-          optionalMap(body.queryOrganization)(withQueryResultClauses).
-          // Add insert.
-          optionalMap(body.insertInto())(withInsertInto)
+          optionalMap(body.queryOrganization)(withQueryResultClauses)
     }
 
     // If there are multiple INSERTS just UNION them together into one query.
-    inserts match {
+    val selectUnionPlan = selects match {
       case Seq(query) => query
       case queries => Union(queries)
     }
+    // Apply CTEs
+    selectUnionPlan.optionalMap(ctx.ctes)(withCTE)
   }
 
   /**
@@ -181,11 +221,10 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
    */
   override def visitSingleInsertQuery(
       ctx: SingleInsertQueryContext): LogicalPlan = withOrigin(ctx) {
-    plan(ctx.queryTerm).
-      // Add organization statements.
-      optionalMap(ctx.queryOrganization)(withQueryResultClauses).
-      // Add insert.
-      optionalMap(ctx.insertInto())(withInsertInto)
+    val insertPlan = withInsertInto(ctx.insertInto(),
+        plan(ctx.queryTerm).optionalMap(ctx.queryOrganization)(withQueryResultClauses))
+    // Apply CTEs
+    insertPlan.optionalMap(ctx.ctes)(withCTE)
   }
 
   /**
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
index d628150..9208d93 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
@@ -132,7 +132,11 @@ class PlanParserSuite extends AnalysisTest {
       table("a").select(star()).union(table("a").where('s < 10).select(star())))
     intercept(
       "from a select * select * from x where a.s < 10",
-      "Multi-Insert queries cannot have a FROM clause in their individual SELECT statements")
+      "Multi-select queries cannot have a FROM clause in their individual SELECT statements")
+    intercept(
+      "from a select * from b",
+      "Individual select statement can not have FROM cause as its already specified in " +
+        "the outer query block")
     assertEqual(
       "from a insert into tbl1 select * insert into tbl2 select * where s < 10",
       table("a").select(star()).insertInto("tbl1").union(
@@ -754,4 +758,47 @@ class PlanParserSuite extends AnalysisTest {
       assertEqual(query2, Distinct(a.union(b)).except(c.intersect(d, isAll = true), isAll = true))
     }
   }
+
+  test("create/alter view as insert into table") {
+    val m1 = intercept[ParseException] {
+      parsePlan("CREATE VIEW testView AS INSERT INTO jt VALUES(1, 1)")
+    }.getMessage
+    assert(m1.contains("mismatched input 'INSERT' expecting"))
+    // Multi insert query
+    val m2 = intercept[ParseException] {
+      parsePlan(
+        """
+          |CREATE VIEW testView AS FROM jt
+          |INSERT INTO tbl1 SELECT * WHERE jt.id < 5
+          |INSERT INTO tbl2 SELECT * WHERE jt.id > 4
+        """.stripMargin)
+    }.getMessage
+    assert(m2.contains("mismatched input 'INSERT' expecting"))
+    val m3 = intercept[ParseException] {
+      parsePlan("ALTER VIEW testView AS INSERT INTO jt VALUES(1, 1)")
+    }.getMessage
+    assert(m3.contains("mismatched input 'INSERT' expecting"))
+    // Multi insert query
+    val m4 = intercept[ParseException] {
+      parsePlan(
+        """
+          |ALTER VIEW testView AS FROM jt
+          |INSERT INTO tbl1 SELECT * WHERE jt.id < 5
+          |INSERT INTO tbl2 SELECT * WHERE jt.id > 4
+        """.stripMargin
+      )
+    }.getMessage
+    assert(m4.contains("mismatched input 'INSERT' expecting"))
+  }
+
+  test("Invalid insert constructs in the query") {
+    val m1 = intercept[ParseException] {
+      parsePlan("SELECT * FROM (INSERT INTO BAR VALUES (2))")
+    }.getMessage
+    assert(m1.contains("mismatched input 'FROM' expecting"))
+    val m2 = intercept[ParseException] {
+      parsePlan("SELECT * FROM S WHERE C1 IN (INSERT INTO T VALUES (2))")
+    }.getMessage
+    assert(m2.contains("mismatched input 'FROM' expecting"))
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
index 735c0a5..8c7b2cb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
@@ -1257,15 +1257,6 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) {
     if (ctx.identifierList != null) {
       operationNotAllowed("CREATE VIEW ... PARTITIONED ON", ctx)
     } else {
-      // CREATE VIEW ... AS INSERT INTO is not allowed.
-      ctx.query.queryNoWith match {
-        case s: SingleInsertQueryContext if s.insertInto != null =>
-          operationNotAllowed("CREATE VIEW ... AS INSERT INTO", ctx)
-        case _: MultiInsertQueryContext =>
-          operationNotAllowed("CREATE VIEW ... AS FROM ... [INSERT INTO ...]+", ctx)
-        case _ => // OK
-      }
-
       val userSpecifiedColumns = Option(ctx.identifierCommentList).toSeq.flatMap { icl =>
         icl.identifierComment.asScala.map { ic =>
           ic.identifier.getText -> Option(ic.STRING).map(string)
@@ -1302,14 +1293,6 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) {
    * }}}
    */
   override def visitAlterViewQuery(ctx: AlterViewQueryContext): LogicalPlan = withOrigin(ctx) {
-    // ALTER VIEW ... AS INSERT INTO is not allowed.
-    ctx.query.queryNoWith match {
-      case s: SingleInsertQueryContext if s.insertInto != null =>
-        operationNotAllowed("ALTER VIEW ... AS INSERT INTO", ctx)
-      case _: MultiInsertQueryContext =>
-        operationNotAllowed("ALTER VIEW ... AS FROM ... [INSERT INTO ...]+", ctx)
-      case _ => // OK
-    }
     AlterViewAsCommand(
       name = visitTableIdentifier(ctx.tableIdentifier),
       originalText = source(ctx.query),
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala
index be3d079..fd60779 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala
@@ -215,17 +215,6 @@ class SparkSqlParserSuite extends AnalysisTest {
       "no viable alternative at input")
   }
 
-  test("create view as insert into table") {
-    // Single insert query
-    intercept("CREATE VIEW testView AS INSERT INTO jt VALUES(1, 1)",
-      "Operation not allowed: CREATE VIEW ... AS INSERT INTO")
-
-    // Multi insert query
-    intercept("CREATE VIEW testView AS FROM jt INSERT INTO tbl1 SELECT * WHERE jt.id < 5 " +
-      "INSERT INTO tbl2 SELECT * WHERE jt.id > 4",
-      "Operation not allowed: CREATE VIEW ... AS FROM ... [INSERT INTO ...]+")
-  }
-
   test("SPARK-17328 Fix NPE with EXPLAIN DESCRIBE TABLE") {
     assertEqual("describe t",
       DescribeTableCommand(TableIdentifier("t"), Map.empty, isExtended = false))
@@ -369,17 +358,6 @@ class SparkSqlParserSuite extends AnalysisTest {
       Project(UnresolvedAlias(concat) :: Nil, UnresolvedRelation(TableIdentifier("t"))))
   }
 
-  test("SPARK-25046 Fix Alter View ... As Insert Into Table") {
-    // Single insert query
-    intercept("ALTER VIEW testView AS INSERT INTO jt VALUES(1, 1)",
-      "Operation not allowed: ALTER VIEW ... AS INSERT INTO")
-
-    // Multi insert query
-    intercept("ALTER VIEW testView AS FROM jt INSERT INTO tbl1 SELECT * WHERE jt.id < 5 " +
-      "INSERT INTO tbl2 SELECT * WHERE jt.id > 4",
-      "Operation not allowed: ALTER VIEW ... AS FROM ... [INSERT INTO ...]+")
-  }
-
   test("database and schema tokens are interchangeable") {
     assertEqual("CREATE DATABASE foo", parser.parsePlan("CREATE SCHEMA foo"))
     assertEqual("DROP DATABASE foo", parser.parsePlan("DROP SCHEMA foo"))


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org