You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2021/11/25 07:29:09 UTC

[spark] branch branch-3.1 updated: [SPARK-37389][SQL][3.1] Check unclosed bracketed comments

This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.1 by this push:
     new 61074aa  [SPARK-37389][SQL][3.1] Check unclosed bracketed comments
61074aa is described below

commit 61074aa1c17ffca1294215c5516a76799c9eeed5
Author: Jiaan Geng <be...@163.com>
AuthorDate: Thu Nov 25 15:28:14 2021 +0800

    [SPARK-37389][SQL][3.1] Check unclosed bracketed comments
    
    ### What changes were proposed in this pull request?
    This PR used to backport https://github.com/apache/spark/pull/34668 to branch 3.1
    
    ### Why are the changes needed?
    The execute plan is not expected, if we don't check unclosed bracketed comments.
    
    ### Does this PR introduce _any_ user-facing change?
    'Yes'. The behavior of bracketed comments will more correctly.
    
    ### How was this patch tested?
    New tests.
    
    Closes #34696 from beliefer/SPARK-37389-backport-3.1.
    
    Authored-by: Jiaan Geng <be...@163.com>
    Signed-off-by: Wenchen Fan <we...@databricks.com>
---
 .../apache/spark/sql/catalyst/parser/SqlBase.g4    | 17 +++++-
 .../spark/sql/catalyst/parser/ParseDriver.scala    | 55 +++++++++++++++++++
 .../sql/catalyst/parser/PlanParserSuite.scala      | 50 +++++++++++++++++
 .../test/resources/sql-tests/inputs/comments.sql   | 29 ++++++++++
 .../resources/sql-tests/results/comments.sql.out   | 64 +++++++++++++++++++++-
 5 files changed, 213 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
index a23994f..7463ce2 100644
--- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
+++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
@@ -37,6 +37,11 @@ grammar SqlBase;
 
 @lexer::members {
   /**
+    * When true, parser should throw ParseExcetion for unclosed bracketed comment.
+    */
+   public boolean has_unclosed_bracketed_comment = false;
+
+  /**
    * Verify whether current token is a valid decimal token (which contains dot).
    * Returns true if the character that follows the token is not a digit or letter or underscore.
    *
@@ -73,6 +78,16 @@ grammar SqlBase;
       return false;
     }
   }
+
+  /**
+    * This method will be called when the character stream ends and try to find out the
+    * unclosed bracketed comment.
+    * If the method be called, it means the end of the entire character stream match,
+    * and we set the flag and fail later.
+    */
+   public void markUnclosedComment() {
+     has_unclosed_bracketed_comment = true;
+   }
 }
 
 singleStatement
@@ -1821,7 +1836,7 @@ SIMPLE_COMMENT
     ;
 
 BRACKETED_COMMENT
-    : '/*' {!isHint()}? (BRACKETED_COMMENT|.)*? '*/' -> channel(HIDDEN)
+    : '/*' {!isHint()}? ( BRACKETED_COMMENT | . )*? ('*/' | {markUnclosedComment();} EOF) -> channel(HIDDEN)
     ;
 
 WS
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
index d08be46..dc3c0cd 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
@@ -94,6 +94,7 @@ abstract class AbstractSqlParser extends ParserInterface with SQLConfHelper with
     val tokenStream = new CommonTokenStream(lexer)
     val parser = new SqlBaseParser(tokenStream)
     parser.addParseListener(PostProcessor)
+    parser.addParseListener(UnclosedCommentProcessor(command, tokenStream))
     parser.removeErrorListeners()
     parser.addErrorListener(ParseErrorListener)
     parser.legacy_setops_precedence_enbled = conf.setOpsPrecedenceEnforced
@@ -299,3 +300,57 @@ case object PostProcessor extends SqlBaseBaseListener {
     parent.addChild(new TerminalNodeImpl(f(newToken)))
   }
 }
+
+/**
+ * The post-processor checks the unclosed bracketed comment.
+ */
+case class UnclosedCommentProcessor(
+    command: String, tokenStream: CommonTokenStream) extends SqlBaseBaseListener {
+
+  override def exitSingleDataType(ctx: SqlBaseParser.SingleDataTypeContext): Unit = {
+    checkUnclosedComment(tokenStream, command)
+  }
+
+  override def exitSingleExpression(ctx: SqlBaseParser.SingleExpressionContext): Unit = {
+    checkUnclosedComment(tokenStream, command)
+  }
+
+  override def exitSingleTableIdentifier(ctx: SqlBaseParser.SingleTableIdentifierContext): Unit = {
+    checkUnclosedComment(tokenStream, command)
+  }
+
+  override def exitSingleFunctionIdentifier(
+      ctx: SqlBaseParser.SingleFunctionIdentifierContext): Unit = {
+    checkUnclosedComment(tokenStream, command)
+  }
+
+  override def exitSingleMultipartIdentifier(
+      ctx: SqlBaseParser.SingleMultipartIdentifierContext): Unit = {
+    checkUnclosedComment(tokenStream, command)
+  }
+
+  override def exitSingleTableSchema(ctx: SqlBaseParser.SingleTableSchemaContext): Unit = {
+    checkUnclosedComment(tokenStream, command)
+  }
+
+  override def exitQuery(ctx: SqlBaseParser.QueryContext): Unit = {
+    checkUnclosedComment(tokenStream, command)
+  }
+
+  override def exitSingleStatement(ctx: SqlBaseParser.SingleStatementContext): Unit = {
+    checkUnclosedComment(tokenStream, command)
+  }
+
+  /** check `has_unclosed_bracketed_comment` to find out the unclosed bracketed comment. */
+  private def checkUnclosedComment(tokenStream: CommonTokenStream, command: String) = {
+    assert(tokenStream.getTokenSource.isInstanceOf[SqlBaseLexer])
+    val lexer = tokenStream.getTokenSource.asInstanceOf[SqlBaseLexer]
+    if (lexer.has_unclosed_bracketed_comment) {
+      // The last token is 'EOF' and the penultimate is unclosed bracketed comment
+      val failedToken = tokenStream.get(tokenStream.size() - 2)
+      assert(failedToken.getType() == SqlBaseParser.BRACKETED_COMMENT)
+      val position = Origin(Option(failedToken.getLine), Option(failedToken.getCharPositionInLine))
+      throw new ParseException(Some(command), "Unclosed bracketed comment", position, position)
+    }
+  }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
index 6fef18b..f152c15 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
@@ -158,6 +158,56 @@ class PlanParserSuite extends AnalysisTest {
       """.stripMargin, plan)
   }
 
+  test("nested bracketed comment case seven") {
+    val plan = OneRowRelation().select(Literal(1).as("a"))
+    assertEqual(
+      """
+        |/*abc*/
+        |select 1 as a
+        |/*
+        |
+        |2 as b
+        |/*abc */
+        |, 3 as c
+        |
+        |/**/
+        |*/
+      """.stripMargin, plan)
+  }
+
+  test("unclosed bracketed comment one") {
+    val query = """
+                  |/*abc*/
+                  |select 1 as a
+                  |/*
+                  |
+                  |2 as b
+                  |/*abc */
+                  |, 3 as c
+                  |
+                  |/**/
+                  |""".stripMargin
+    val e = intercept[ParseException](parsePlan(query))
+    assert(e.getMessage.contains(s"Unclosed bracketed comment"))
+  }
+
+  test("unclosed bracketed comment two") {
+    val query = """
+                  |/*abc*/
+                  |select 1 as a
+                  |/*
+                  |
+                  |2 as b
+                  |/*abc */
+                  |, 3 as c
+                  |
+                  |/**/
+                  |select 4 as d
+                  |""".stripMargin
+    val e = intercept[ParseException](parsePlan(query))
+    assert(e.getMessage.contains(s"Unclosed bracketed comment"))
+  }
+
   test("case insensitive") {
     val plan = table("a").select(star())
     assertEqual("sELEct * FroM a", plan)
diff --git a/sql/core/src/test/resources/sql-tests/inputs/comments.sql b/sql/core/src/test/resources/sql-tests/inputs/comments.sql
index 19f11de..da5e57a 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/comments.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/comments.sql
@@ -88,3 +88,32 @@ Other information of first level.
 /*/**/*/
 SELECT 'selected content' AS tenth;
 --QUERY-DELIMITER-END
+
+-- the first case of unclosed bracketed comment
+--QUERY-DELIMITER-START
+/*abc*/
+select 1 as a
+/*
+
+2 as b
+/*abc*/
+, 3 as c
+
+/**/
+;
+--QUERY-DELIMITER-END
+
+-- the second case of unclosed bracketed comment
+--QUERY-DELIMITER-START
+/*abc*/
+select 1 as a
+/*
+
+2 as b
+/*abc*/
+, 3 as c
+
+/**/
+select 4 as d
+;
+--QUERY-DELIMITER-END
diff --git a/sql/core/src/test/resources/sql-tests/results/comments.sql.out b/sql/core/src/test/resources/sql-tests/results/comments.sql.out
index fd58a33..da9dbd5 100644
--- a/sql/core/src/test/resources/sql-tests/results/comments.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/comments.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 10
+-- Number of queries: 12
 
 
 -- !query
@@ -119,3 +119,65 @@ SELECT 'selected content' AS tenth
 struct<tenth:string>
 -- !query output
 selected content
+
+
+-- !query
+/*abc*/
+select 1 as a
+/*
+
+2 as b
+/*abc*/
+, 3 as c
+
+/**/
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+Unclosed bracketed comment(line 3, pos 0)
+
+== SQL ==
+/*abc*/
+select 1 as a
+/*
+^^^
+
+2 as b
+/*abc*/
+, 3 as c
+
+/**/
+
+
+-- !query
+/*abc*/
+select 1 as a
+/*
+
+2 as b
+/*abc*/
+, 3 as c
+
+/**/
+select 4 as d
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+Unclosed bracketed comment(line 3, pos 0)
+
+== SQL ==
+/*abc*/
+select 1 as a
+/*
+^^^
+
+2 as b
+/*abc*/
+, 3 as c
+
+/**/
+select 4 as d

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org