You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2021/11/25 07:29:09 UTC
[spark] branch branch-3.1 updated: [SPARK-37389][SQL][3.1] Check unclosed bracketed comments
This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.1 by this push:
new 61074aa [SPARK-37389][SQL][3.1] Check unclosed bracketed comments
61074aa is described below
commit 61074aa1c17ffca1294215c5516a76799c9eeed5
Author: Jiaan Geng <be...@163.com>
AuthorDate: Thu Nov 25 15:28:14 2021 +0800
[SPARK-37389][SQL][3.1] Check unclosed bracketed comments
### What changes were proposed in this pull request?
This PR used to backport https://github.com/apache/spark/pull/34668 to branch 3.1
### Why are the changes needed?
The execute plan is not expected, if we don't check unclosed bracketed comments.
### Does this PR introduce _any_ user-facing change?
'Yes'. The behavior of bracketed comments will more correctly.
### How was this patch tested?
New tests.
Closes #34696 from beliefer/SPARK-37389-backport-3.1.
Authored-by: Jiaan Geng <be...@163.com>
Signed-off-by: Wenchen Fan <we...@databricks.com>
---
.../apache/spark/sql/catalyst/parser/SqlBase.g4 | 17 +++++-
.../spark/sql/catalyst/parser/ParseDriver.scala | 55 +++++++++++++++++++
.../sql/catalyst/parser/PlanParserSuite.scala | 50 +++++++++++++++++
.../test/resources/sql-tests/inputs/comments.sql | 29 ++++++++++
.../resources/sql-tests/results/comments.sql.out | 64 +++++++++++++++++++++-
5 files changed, 213 insertions(+), 2 deletions(-)
diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
index a23994f..7463ce2 100644
--- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
+++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
@@ -37,6 +37,11 @@ grammar SqlBase;
@lexer::members {
/**
+ * When true, parser should throw ParseExcetion for unclosed bracketed comment.
+ */
+ public boolean has_unclosed_bracketed_comment = false;
+
+ /**
* Verify whether current token is a valid decimal token (which contains dot).
* Returns true if the character that follows the token is not a digit or letter or underscore.
*
@@ -73,6 +78,16 @@ grammar SqlBase;
return false;
}
}
+
+ /**
+ * This method will be called when the character stream ends and try to find out the
+ * unclosed bracketed comment.
+ * If the method be called, it means the end of the entire character stream match,
+ * and we set the flag and fail later.
+ */
+ public void markUnclosedComment() {
+ has_unclosed_bracketed_comment = true;
+ }
}
singleStatement
@@ -1821,7 +1836,7 @@ SIMPLE_COMMENT
;
BRACKETED_COMMENT
- : '/*' {!isHint()}? (BRACKETED_COMMENT|.)*? '*/' -> channel(HIDDEN)
+ : '/*' {!isHint()}? ( BRACKETED_COMMENT | . )*? ('*/' | {markUnclosedComment();} EOF) -> channel(HIDDEN)
;
WS
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
index d08be46..dc3c0cd 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
@@ -94,6 +94,7 @@ abstract class AbstractSqlParser extends ParserInterface with SQLConfHelper with
val tokenStream = new CommonTokenStream(lexer)
val parser = new SqlBaseParser(tokenStream)
parser.addParseListener(PostProcessor)
+ parser.addParseListener(UnclosedCommentProcessor(command, tokenStream))
parser.removeErrorListeners()
parser.addErrorListener(ParseErrorListener)
parser.legacy_setops_precedence_enbled = conf.setOpsPrecedenceEnforced
@@ -299,3 +300,57 @@ case object PostProcessor extends SqlBaseBaseListener {
parent.addChild(new TerminalNodeImpl(f(newToken)))
}
}
+
+/**
+ * The post-processor checks the unclosed bracketed comment.
+ */
+case class UnclosedCommentProcessor(
+ command: String, tokenStream: CommonTokenStream) extends SqlBaseBaseListener {
+
+ override def exitSingleDataType(ctx: SqlBaseParser.SingleDataTypeContext): Unit = {
+ checkUnclosedComment(tokenStream, command)
+ }
+
+ override def exitSingleExpression(ctx: SqlBaseParser.SingleExpressionContext): Unit = {
+ checkUnclosedComment(tokenStream, command)
+ }
+
+ override def exitSingleTableIdentifier(ctx: SqlBaseParser.SingleTableIdentifierContext): Unit = {
+ checkUnclosedComment(tokenStream, command)
+ }
+
+ override def exitSingleFunctionIdentifier(
+ ctx: SqlBaseParser.SingleFunctionIdentifierContext): Unit = {
+ checkUnclosedComment(tokenStream, command)
+ }
+
+ override def exitSingleMultipartIdentifier(
+ ctx: SqlBaseParser.SingleMultipartIdentifierContext): Unit = {
+ checkUnclosedComment(tokenStream, command)
+ }
+
+ override def exitSingleTableSchema(ctx: SqlBaseParser.SingleTableSchemaContext): Unit = {
+ checkUnclosedComment(tokenStream, command)
+ }
+
+ override def exitQuery(ctx: SqlBaseParser.QueryContext): Unit = {
+ checkUnclosedComment(tokenStream, command)
+ }
+
+ override def exitSingleStatement(ctx: SqlBaseParser.SingleStatementContext): Unit = {
+ checkUnclosedComment(tokenStream, command)
+ }
+
+ /** check `has_unclosed_bracketed_comment` to find out the unclosed bracketed comment. */
+ private def checkUnclosedComment(tokenStream: CommonTokenStream, command: String) = {
+ assert(tokenStream.getTokenSource.isInstanceOf[SqlBaseLexer])
+ val lexer = tokenStream.getTokenSource.asInstanceOf[SqlBaseLexer]
+ if (lexer.has_unclosed_bracketed_comment) {
+ // The last token is 'EOF' and the penultimate is unclosed bracketed comment
+ val failedToken = tokenStream.get(tokenStream.size() - 2)
+ assert(failedToken.getType() == SqlBaseParser.BRACKETED_COMMENT)
+ val position = Origin(Option(failedToken.getLine), Option(failedToken.getCharPositionInLine))
+ throw new ParseException(Some(command), "Unclosed bracketed comment", position, position)
+ }
+ }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
index 6fef18b..f152c15 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
@@ -158,6 +158,56 @@ class PlanParserSuite extends AnalysisTest {
""".stripMargin, plan)
}
+ test("nested bracketed comment case seven") {
+ val plan = OneRowRelation().select(Literal(1).as("a"))
+ assertEqual(
+ """
+ |/*abc*/
+ |select 1 as a
+ |/*
+ |
+ |2 as b
+ |/*abc */
+ |, 3 as c
+ |
+ |/**/
+ |*/
+ """.stripMargin, plan)
+ }
+
+ test("unclosed bracketed comment one") {
+ val query = """
+ |/*abc*/
+ |select 1 as a
+ |/*
+ |
+ |2 as b
+ |/*abc */
+ |, 3 as c
+ |
+ |/**/
+ |""".stripMargin
+ val e = intercept[ParseException](parsePlan(query))
+ assert(e.getMessage.contains(s"Unclosed bracketed comment"))
+ }
+
+ test("unclosed bracketed comment two") {
+ val query = """
+ |/*abc*/
+ |select 1 as a
+ |/*
+ |
+ |2 as b
+ |/*abc */
+ |, 3 as c
+ |
+ |/**/
+ |select 4 as d
+ |""".stripMargin
+ val e = intercept[ParseException](parsePlan(query))
+ assert(e.getMessage.contains(s"Unclosed bracketed comment"))
+ }
+
test("case insensitive") {
val plan = table("a").select(star())
assertEqual("sELEct * FroM a", plan)
diff --git a/sql/core/src/test/resources/sql-tests/inputs/comments.sql b/sql/core/src/test/resources/sql-tests/inputs/comments.sql
index 19f11de..da5e57a 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/comments.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/comments.sql
@@ -88,3 +88,32 @@ Other information of first level.
/*/**/*/
SELECT 'selected content' AS tenth;
--QUERY-DELIMITER-END
+
+-- the first case of unclosed bracketed comment
+--QUERY-DELIMITER-START
+/*abc*/
+select 1 as a
+/*
+
+2 as b
+/*abc*/
+, 3 as c
+
+/**/
+;
+--QUERY-DELIMITER-END
+
+-- the second case of unclosed bracketed comment
+--QUERY-DELIMITER-START
+/*abc*/
+select 1 as a
+/*
+
+2 as b
+/*abc*/
+, 3 as c
+
+/**/
+select 4 as d
+;
+--QUERY-DELIMITER-END
diff --git a/sql/core/src/test/resources/sql-tests/results/comments.sql.out b/sql/core/src/test/resources/sql-tests/results/comments.sql.out
index fd58a33..da9dbd5 100644
--- a/sql/core/src/test/resources/sql-tests/results/comments.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/comments.sql.out
@@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
--- Number of queries: 10
+-- Number of queries: 12
-- !query
@@ -119,3 +119,65 @@ SELECT 'selected content' AS tenth
struct<tenth:string>
-- !query output
selected content
+
+
+-- !query
+/*abc*/
+select 1 as a
+/*
+
+2 as b
+/*abc*/
+, 3 as c
+
+/**/
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+Unclosed bracketed comment(line 3, pos 0)
+
+== SQL ==
+/*abc*/
+select 1 as a
+/*
+^^^
+
+2 as b
+/*abc*/
+, 3 as c
+
+/**/
+
+
+-- !query
+/*abc*/
+select 1 as a
+/*
+
+2 as b
+/*abc*/
+, 3 as c
+
+/**/
+select 4 as d
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+Unclosed bracketed comment(line 3, pos 0)
+
+== SQL ==
+/*abc*/
+select 1 as a
+/*
+^^^
+
+2 as b
+/*abc*/
+, 3 as c
+
+/**/
+select 4 as d
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org