You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2020/02/13 14:20:58 UTC
[spark] branch branch-3.0 updated: [SPARK-30758][SQL][TESTS]
Improve bracketed comments tests
This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.0 by this push:
new f041aaa [SPARK-30758][SQL][TESTS] Improve bracketed comments tests
f041aaa is described below
commit f041aaaf55fb1e907e0e5b0876927ef328664664
Author: beliefer <be...@163.com>
AuthorDate: Thu Feb 13 22:06:24 2020 +0800
[SPARK-30758][SQL][TESTS] Improve bracketed comments tests
### What changes were proposed in this pull request?
Although Spark SQL support bracketed comments, but `SQLQueryTestSuite` can't treat bracketed comments well and lead to generated golden files can't display bracketed comments well.
This PR will improve the treatment of bracketed comments and add three test case in `PlanParserSuite`.
Spark SQL can't support nested bracketed comments and https://github.com/apache/spark/pull/27495 used to support it.
### Why are the changes needed?
Golden files can't display well.
### Does this PR introduce any user-facing change?
No
### How was this patch tested?
New UT.
Closes #27481 from beliefer/ansi-brancket-comments.
Authored-by: beliefer <be...@163.com>
Signed-off-by: Wenchen Fan <we...@databricks.com>
(cherry picked from commit 04604b9899cc43a9726d671061ff305912fdb85f)
Signed-off-by: Wenchen Fan <we...@databricks.com>
---
.../sql-tests/inputs/postgreSQL/comments.sql | 6 +-
.../sql-tests/results/postgreSQL/comments.sql.out | 137 +++++----------------
.../org/apache/spark/sql/SQLQueryTestSuite.scala | 51 +++++++-
3 files changed, 78 insertions(+), 116 deletions(-)
diff --git a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/comments.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/comments.sql
index 6725ce4..1a45417 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/comments.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/comments.sql
@@ -11,17 +11,19 @@ SELECT /* embedded single line */ 'embedded' AS `second`;
SELECT /* both embedded and trailing single line */ 'both' AS third; -- trailing single line
SELECT 'before multi-line' AS fourth;
+--QUERY-DELIMITER-START
-- [SPARK-28880] ANSI SQL: Bracketed comments
/* This is an example of SQL which should not execute:
* select 'multi-line';
*/
SELECT 'after multi-line' AS fifth;
+--QUERY-DELIMITER-END
-- [SPARK-28880] ANSI SQL: Bracketed comments
--
-- Nested comments
--
-
+--QUERY-DELIMITER-START
/*
SELECT 'trailing' as x1; -- inside block comment
*/
@@ -44,5 +46,5 @@ Hoo boy. Still two deep...
Now just one deep...
*/
'deeply nested example' AS sixth;
-
+--QUERY-DELIMITER-END
/* and this is the end of the file */
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/comments.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/comments.sql.out
index 4ea4901..637c556 100644
--- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/comments.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/comments.sql.out
@@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
--- Number of queries: 13
+-- Number of queries: 7
-- !query
@@ -36,129 +36,32 @@ before multi-line
-- !query
/* This is an example of SQL which should not execute:
- * select 'multi-line'
--- !query schema
-struct<>
--- !query output
-org.apache.spark.sql.catalyst.parser.ParseException
-
-mismatched input '/' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR', 'COMMENT', 'COMMIT', 'CREATE', 'DELETE', 'DESC', 'DESCRIBE', 'DFS', 'DROP', 'EXPLAIN', 'EXPORT', 'FROM', 'GRANT', 'IMPORT', 'INSERT', 'LIST', 'LOAD', 'LOCK', 'MAP', 'MERGE', 'MSCK', 'REDUCE', 'REFRESH', 'REPLACE', 'RESET', 'REVOKE', 'ROLLBACK', 'SELECT', 'SET', 'SHOW', 'START', 'TABLE', 'TRUNCATE', 'UNCACHE', 'UNLOCK', 'UPDATE', 'USE', 'VALUES', 'WITH'}(line 1, pos 0)
-
-== SQL ==
-/* This is an example of SQL which should not execute:
-^^^
- * select 'multi-line'
-
-
--- !query
-*/
+ * select 'multi-line';
+ */
SELECT 'after multi-line' AS fifth
-- !query schema
-struct<>
+struct<fifth:string>
-- !query output
-org.apache.spark.sql.catalyst.parser.ParseException
-
-extraneous input '*/' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR', 'COMMENT', 'COMMIT', 'CREATE', 'DELETE', 'DESC', 'DESCRIBE', 'DFS', 'DROP', 'EXPLAIN', 'EXPORT', 'FROM', 'GRANT', 'IMPORT', 'INSERT', 'LIST', 'LOAD', 'LOCK', 'MAP', 'MERGE', 'MSCK', 'REDUCE', 'REFRESH', 'REPLACE', 'RESET', 'REVOKE', 'ROLLBACK', 'SELECT', 'SET', 'SHOW', 'START', 'TABLE', 'TRUNCATE', 'UNCACHE', 'UNLOCK', 'UPDATE', 'USE', 'VALUES', 'WITH'}(line 1, pos 0)
-
-== SQL ==
-*/
-^^^
-SELECT 'after multi-line' AS fifth
+after multi-line
-- !query
/*
-SELECT 'trailing' as x1
--- !query schema
-struct<>
--- !query output
-org.apache.spark.sql.catalyst.parser.ParseException
-
-mismatched input '/' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR', 'COMMENT', 'COMMIT', 'CREATE', 'DELETE', 'DESC', 'DESCRIBE', 'DFS', 'DROP', 'EXPLAIN', 'EXPORT', 'FROM', 'GRANT', 'IMPORT', 'INSERT', 'LIST', 'LOAD', 'LOCK', 'MAP', 'MERGE', 'MSCK', 'REDUCE', 'REFRESH', 'REPLACE', 'RESET', 'REVOKE', 'ROLLBACK', 'SELECT', 'SET', 'SHOW', 'START', 'TABLE', 'TRUNCATE', 'UNCACHE', 'UNLOCK', 'UPDATE', 'USE', 'VALUES', 'WITH'}(line 1, pos 0)
-
-== SQL ==
-/*
-^^^
-SELECT 'trailing' as x1
-
-
--- !query
+SELECT 'trailing' as x1; -- inside block comment
*/
/* This block comment surrounds a query which itself has a block comment...
-SELECT /* embedded single line */ 'embedded' AS x2
--- !query schema
-struct<>
--- !query output
-org.apache.spark.sql.catalyst.parser.ParseException
-
-mismatched input '*/' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR', 'COMMENT', 'COMMIT', 'CREATE', 'DELETE', 'DESC', 'DESCRIBE', 'DFS', 'DROP', 'EXPLAIN', 'EXPORT', 'FROM', 'GRANT', 'IMPORT', 'INSERT', 'LIST', 'LOAD', 'LOCK', 'MAP', 'MERGE', 'MSCK', 'REDUCE', 'REFRESH', 'REPLACE', 'RESET', 'REVOKE', 'ROLLBACK', 'SELECT', 'SET', 'SHOW', 'START', 'TABLE', 'TRUNCATE', 'UNCACHE', 'UNLOCK', 'UPDATE', 'USE', 'VALUES', 'WITH'}(line 1, pos 0)
-
-== SQL ==
-*/
-^^^
-
-/* This block comment surrounds a query which itself has a block comment...
-SELECT /* embedded single line */ 'embedded' AS x2
-
-
--- !query
+SELECT /* embedded single line */ 'embedded' AS x2;
*/
SELECT -- continued after the following block comments...
/* Deeply nested comment.
This includes a single apostrophe to make sure we aren't decoding this part as a string.
-SELECT 'deep nest' AS n1
--- !query schema
-struct<>
--- !query output
-org.apache.spark.sql.catalyst.parser.ParseException
-
-extraneous input '*/' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR', 'COMMENT', 'COMMIT', 'CREATE', 'DELETE', 'DESC', 'DESCRIBE', 'DFS', 'DROP', 'EXPLAIN', 'EXPORT', 'FROM', 'GRANT', 'IMPORT', 'INSERT', 'LIST', 'LOAD', 'LOCK', 'MAP', 'MERGE', 'MSCK', 'REDUCE', 'REFRESH', 'REPLACE', 'RESET', 'REVOKE', 'ROLLBACK', 'SELECT', 'SET', 'SHOW', 'START', 'TABLE', 'TRUNCATE', 'UNCACHE', 'UNLOCK', 'UPDATE', 'USE', 'VALUES', 'WITH'}(line 1, pos 0)
-
-== SQL ==
-*/
-^^^
-
-SELECT -- continued after the following block comments...
-/* Deeply nested comment.
- This includes a single apostrophe to make sure we aren't decoding this part as a string.
-SELECT 'deep nest' AS n1
-
-
--- !query
+SELECT 'deep nest' AS n1;
/* Second level of nesting...
-SELECT 'deeper nest' as n2
--- !query schema
-struct<>
--- !query output
-org.apache.spark.sql.catalyst.parser.ParseException
-
-mismatched input '/' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR', 'COMMENT', 'COMMIT', 'CREATE', 'DELETE', 'DESC', 'DESCRIBE', 'DFS', 'DROP', 'EXPLAIN', 'EXPORT', 'FROM', 'GRANT', 'IMPORT', 'INSERT', 'LIST', 'LOAD', 'LOCK', 'MAP', 'MERGE', 'MSCK', 'REDUCE', 'REFRESH', 'REPLACE', 'RESET', 'REVOKE', 'ROLLBACK', 'SELECT', 'SET', 'SHOW', 'START', 'TABLE', 'TRUNCATE', 'UNCACHE', 'UNLOCK', 'UPDATE', 'USE', 'VALUES', 'WITH'}(line 1, pos 0)
-
-== SQL ==
-/* Second level of nesting...
-^^^
-SELECT 'deeper nest' as n2
-
-
--- !query
+SELECT 'deeper nest' as n2;
/* Third level of nesting...
-SELECT 'deepest nest' as n3
--- !query schema
-struct<>
--- !query output
-org.apache.spark.sql.catalyst.parser.ParseException
-
-mismatched input '/' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR', 'COMMENT', 'COMMIT', 'CREATE', 'DELETE', 'DESC', 'DESCRIBE', 'DFS', 'DROP', 'EXPLAIN', 'EXPORT', 'FROM', 'GRANT', 'IMPORT', 'INSERT', 'LIST', 'LOAD', 'LOCK', 'MAP', 'MERGE', 'MSCK', 'REDUCE', 'REFRESH', 'REPLACE', 'RESET', 'REVOKE', 'ROLLBACK', 'SELECT', 'SET', 'SHOW', 'START', 'TABLE', 'TRUNCATE', 'UNCACHE', 'UNLOCK', 'UPDATE', 'USE', 'VALUES', 'WITH'}(line 1, pos 0)
-
-== SQL ==
-/* Third level of nesting...
-^^^
-SELECT 'deepest nest' as n3
-
-
--- !query
+SELECT 'deepest nest' as n3;
*/
Hoo boy. Still two deep...
*/
@@ -170,11 +73,27 @@ struct<>
-- !query output
org.apache.spark.sql.catalyst.parser.ParseException
-mismatched input '*/' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR', 'COMMENT', 'COMMIT', 'CREATE', 'DELETE', 'DESC', 'DESCRIBE', 'DFS', 'DROP', 'EXPLAIN', 'EXPORT', 'FROM', 'GRANT', 'IMPORT', 'INSERT', 'LIST', 'LOAD', 'LOCK', 'MAP', 'MERGE', 'MSCK', 'REDUCE', 'REFRESH', 'REPLACE', 'RESET', 'REVOKE', 'ROLLBACK', 'SELECT', 'SET', 'SHOW', 'START', 'TABLE', 'TRUNCATE', 'UNCACHE', 'UNLOCK', 'UPDATE', 'USE', 'VALUES', 'WITH'}(line 1, pos 0)
+mismatched input ''embedded'' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR', 'COMMENT', 'COMMIT', 'CREATE', 'DELETE', 'DESC', 'DESCRIBE', 'DFS', 'DROP', 'EXPLAIN', 'EXPORT', 'FROM', 'GRANT', 'IMPORT', 'INSERT', 'LIST', 'LOAD', 'LOCK', 'MAP', 'MERGE', 'MSCK', 'REDUCE', 'REFRESH', 'REPLACE', 'RESET', 'REVOKE', 'ROLLBACK', 'SELECT', 'SET', 'SHOW', 'START', 'TABLE', 'TRUNCATE', 'UNCACHE', 'UNLOCK', 'UPDATE', 'USE', 'VALUES', 'WITH'}(line 6, pos 34)
== SQL ==
+/*
+SELECT 'trailing' as x1; -- inside block comment
+*/
+
+/* This block comment surrounds a query which itself has a block comment...
+SELECT /* embedded single line */ 'embedded' AS x2;
+----------------------------------^^^
+*/
+
+SELECT -- continued after the following block comments...
+/* Deeply nested comment.
+ This includes a single apostrophe to make sure we aren't decoding this part as a string.
+SELECT 'deep nest' AS n1;
+/* Second level of nesting...
+SELECT 'deeper nest' as n2;
+/* Third level of nesting...
+SELECT 'deepest nest' as n3;
*/
-^^^
Hoo boy. Still two deep...
*/
Now just one deep...
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
index 6b9e5bb..da4727f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
@@ -19,7 +19,9 @@ package org.apache.spark.sql
import java.io.File
import java.util.{Locale, TimeZone}
+import java.util.regex.Pattern
+import scala.collection.mutable.{ArrayBuffer, HashMap}
import scala.util.control.NonFatal
import org.apache.spark.{SparkConf, SparkException}
@@ -62,7 +64,12 @@ import org.apache.spark.tags.ExtendedSQLTest
* }}}
*
* The format for input files is simple:
- * 1. A list of SQL queries separated by semicolon.
+ * 1. A list of SQL queries separated by semicolons by default. If the semicolon cannot effectively
+ * separate the SQL queries in the test file(e.g. bracketed comments), please use
+ * --QUERY-DELIMITER-START and --QUERY-DELIMITER-END. Lines starting with
+ * --QUERY-DELIMITER-START and --QUERY-DELIMITER-END represent the beginning and end of a query,
+ * respectively. Code that is not surrounded by lines that begin with --QUERY-DELIMITER-START
+ * and --QUERY-DELIMITER-END is still separated by semicolons.
* 2. Lines starting with -- are treated as comments and ignored.
* 3. Lines starting with --SET are used to specify the configs when running this testing file. You
* can set multiple configs in one --SET, using comma to separate them. Or you can use multiple
@@ -246,9 +253,15 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession {
/** Run a test case. */
protected def runTest(testCase: TestCase): Unit = {
+ def splitWithSemicolon(seq: Seq[String]) = {
+ seq.mkString("\n").split("(?<=[^\\\\]);")
+ }
val input = fileToString(new File(testCase.inputFile))
- val (comments, code) = input.split("\n").partition(_.trim.startsWith("--"))
+ val (comments, code) = input.split("\n").partition { line =>
+ val newLine = line.trim
+ newLine.startsWith("--") && !newLine.startsWith("--QUERY-DELIMITER")
+ }
// If `--IMPORT` found, load code from another test case file, then insert them
// into the head in this test.
@@ -261,10 +274,38 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession {
}
}.flatten
+ val allCode = importedCode ++ code
+ val tempQueries = if (allCode.exists(_.trim.startsWith("--QUERY-DELIMITER"))) {
+ // Although the loop is heavy, only used for bracketed comments test.
+ val querys = new ArrayBuffer[String]
+ val otherCodes = new ArrayBuffer[String]
+ var tempStr = ""
+ var start = false
+ for (c <- allCode) {
+ if (c.trim.startsWith("--QUERY-DELIMITER-START")) {
+ start = true
+ querys ++= splitWithSemicolon(otherCodes.toSeq)
+ otherCodes.clear()
+ } else if (c.trim.startsWith("--QUERY-DELIMITER-END")) {
+ start = false
+ querys += s"\n${tempStr.stripSuffix(";")}"
+ tempStr = ""
+ } else if (start) {
+ tempStr += s"\n$c"
+ } else {
+ otherCodes += c
+ }
+ }
+ if (otherCodes.nonEmpty) {
+ querys ++= splitWithSemicolon(otherCodes.toSeq)
+ }
+ querys.toSeq
+ } else {
+ splitWithSemicolon(allCode).toSeq
+ }
+
// List of SQL queries to run
- // note: this is not a robust way to split queries using semicolon, but works for now.
- val queries = (importedCode ++ code).mkString("\n").split("(?<=[^\\\\]);")
- .map(_.trim).filter(_ != "").toSeq
+ val queries = tempQueries.map(_.trim).filter(_ != "").toSeq
// Fix misplacement when comment is at the end of the query.
.map(_.split("\n").filterNot(_.startsWith("--")).mkString("\n")).map(_.trim).filter(_ != "")
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org