You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2020/02/13 14:20:58 UTC
[spark] branch branch-3.0 updated: [SPARK-30758][SQL][TESTS] Improve bracketed comments tests

This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new f041aaa  [SPARK-30758][SQL][TESTS] Improve bracketed comments tests
f041aaa is described below

commit f041aaaf55fb1e907e0e5b0876927ef328664664
Author: beliefer <be...@163.com>
AuthorDate: Thu Feb 13 22:06:24 2020 +0800

    [SPARK-30758][SQL][TESTS] Improve bracketed comments tests
    
    ### What changes were proposed in this pull request?
    Although Spark SQL support bracketed comments, but `SQLQueryTestSuite` can't treat bracketed comments well and lead to generated golden files can't display bracketed comments well.
    This PR will improve the treatment of bracketed comments and add three test case in `PlanParserSuite`.
    Spark SQL can't support nested bracketed comments and https://github.com/apache/spark/pull/27495 used to support it.
    
    ### Why are the changes needed?
    Golden files can't display well.
    
    ### Does this PR introduce any user-facing change?
    No
    
    ### How was this patch tested?
    New UT.
    
    Closes #27481 from beliefer/ansi-brancket-comments.
    
    Authored-by: beliefer <be...@163.com>
    Signed-off-by: Wenchen Fan <we...@databricks.com>
    (cherry picked from commit 04604b9899cc43a9726d671061ff305912fdb85f)
    Signed-off-by: Wenchen Fan <we...@databricks.com>
---
 .../sql-tests/inputs/postgreSQL/comments.sql       |   6 +-
 .../sql-tests/results/postgreSQL/comments.sql.out  | 137 +++++----------------
 .../org/apache/spark/sql/SQLQueryTestSuite.scala   |  51 +++++++-
 3 files changed, 78 insertions(+), 116 deletions(-)

diff --git a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/comments.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/comments.sql
index 6725ce4..1a45417 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/comments.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/comments.sql
@@ -11,17 +11,19 @@ SELECT /* embedded single line */ 'embedded' AS `second`;
 SELECT /* both embedded and trailing single line */ 'both' AS third; -- trailing single line
 
 SELECT 'before multi-line' AS fourth;
+--QUERY-DELIMITER-START
 -- [SPARK-28880] ANSI SQL: Bracketed comments
 /* This is an example of SQL which should not execute:
  * select 'multi-line';
  */
 SELECT 'after multi-line' AS fifth;
+--QUERY-DELIMITER-END
 
 -- [SPARK-28880] ANSI SQL: Bracketed comments
 --
 -- Nested comments
 --
-
+--QUERY-DELIMITER-START
 /*
 SELECT 'trailing' as x1; -- inside block comment
 */
@@ -44,5 +46,5 @@ Hoo boy. Still two deep...
 Now just one deep...
 */
 'deeply nested example' AS sixth;
-
+--QUERY-DELIMITER-END
 /* and this is the end of the file */
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/comments.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/comments.sql.out
index 4ea4901..637c556 100644
--- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/comments.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/comments.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 13
+-- Number of queries: 7
 
 
 -- !query
@@ -36,129 +36,32 @@ before multi-line
 
 -- !query
 /* This is an example of SQL which should not execute:
- * select 'multi-line'
--- !query schema
-struct<>
--- !query output
-org.apache.spark.sql.catalyst.parser.ParseException
-
-mismatched input '/' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR', 'COMMENT', 'COMMIT', 'CREATE', 'DELETE', 'DESC', 'DESCRIBE', 'DFS', 'DROP', 'EXPLAIN', 'EXPORT', 'FROM', 'GRANT', 'IMPORT', 'INSERT', 'LIST', 'LOAD', 'LOCK', 'MAP', 'MERGE', 'MSCK', 'REDUCE', 'REFRESH', 'REPLACE', 'RESET', 'REVOKE', 'ROLLBACK', 'SELECT', 'SET', 'SHOW', 'START', 'TABLE', 'TRUNCATE', 'UNCACHE', 'UNLOCK', 'UPDATE', 'USE', 'VALUES', 'WITH'}(line 1, pos 0)
-
-== SQL ==
-/* This is an example of SQL which should not execute:
-^^^
- * select 'multi-line'
-
-
--- !query
-*/
+ * select 'multi-line';
+ */
 SELECT 'after multi-line' AS fifth
 -- !query schema
-struct<>
+struct<fifth:string>
 -- !query output
-org.apache.spark.sql.catalyst.parser.ParseException
-
-extraneous input '*/' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR', 'COMMENT', 'COMMIT', 'CREATE', 'DELETE', 'DESC', 'DESCRIBE', 'DFS', 'DROP', 'EXPLAIN', 'EXPORT', 'FROM', 'GRANT', 'IMPORT', 'INSERT', 'LIST', 'LOAD', 'LOCK', 'MAP', 'MERGE', 'MSCK', 'REDUCE', 'REFRESH', 'REPLACE', 'RESET', 'REVOKE', 'ROLLBACK', 'SELECT', 'SET', 'SHOW', 'START', 'TABLE', 'TRUNCATE', 'UNCACHE', 'UNLOCK', 'UPDATE', 'USE', 'VALUES', 'WITH'}(line 1, pos 0)
-
-== SQL ==
-*/
-^^^
-SELECT 'after multi-line' AS fifth
+after multi-line
 
 
 -- !query
 /*
-SELECT 'trailing' as x1
--- !query schema
-struct<>
--- !query output
-org.apache.spark.sql.catalyst.parser.ParseException
-
-mismatched input '/' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR', 'COMMENT', 'COMMIT', 'CREATE', 'DELETE', 'DESC', 'DESCRIBE', 'DFS', 'DROP', 'EXPLAIN', 'EXPORT', 'FROM', 'GRANT', 'IMPORT', 'INSERT', 'LIST', 'LOAD', 'LOCK', 'MAP', 'MERGE', 'MSCK', 'REDUCE', 'REFRESH', 'REPLACE', 'RESET', 'REVOKE', 'ROLLBACK', 'SELECT', 'SET', 'SHOW', 'START', 'TABLE', 'TRUNCATE', 'UNCACHE', 'UNLOCK', 'UPDATE', 'USE', 'VALUES', 'WITH'}(line 1, pos 0)
-
-== SQL ==
-/*
-^^^
-SELECT 'trailing' as x1
-
-
--- !query
+SELECT 'trailing' as x1; -- inside block comment
 */
 
 /* This block comment surrounds a query which itself has a block comment...
-SELECT /* embedded single line */ 'embedded' AS x2
--- !query schema
-struct<>
--- !query output
-org.apache.spark.sql.catalyst.parser.ParseException
-
-mismatched input '*/' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR', 'COMMENT', 'COMMIT', 'CREATE', 'DELETE', 'DESC', 'DESCRIBE', 'DFS', 'DROP', 'EXPLAIN', 'EXPORT', 'FROM', 'GRANT', 'IMPORT', 'INSERT', 'LIST', 'LOAD', 'LOCK', 'MAP', 'MERGE', 'MSCK', 'REDUCE', 'REFRESH', 'REPLACE', 'RESET', 'REVOKE', 'ROLLBACK', 'SELECT', 'SET', 'SHOW', 'START', 'TABLE', 'TRUNCATE', 'UNCACHE', 'UNLOCK', 'UPDATE', 'USE', 'VALUES', 'WITH'}(line 1, pos 0)
-
-== SQL ==
-*/
-^^^
-
-/* This block comment surrounds a query which itself has a block comment...
-SELECT /* embedded single line */ 'embedded' AS x2
-
-
--- !query
+SELECT /* embedded single line */ 'embedded' AS x2;
 */
 
 SELECT -- continued after the following block comments...
 /* Deeply nested comment.
    This includes a single apostrophe to make sure we aren't decoding this part as a string.
-SELECT 'deep nest' AS n1
--- !query schema
-struct<>
--- !query output
-org.apache.spark.sql.catalyst.parser.ParseException
-
-extraneous input '*/' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR', 'COMMENT', 'COMMIT', 'CREATE', 'DELETE', 'DESC', 'DESCRIBE', 'DFS', 'DROP', 'EXPLAIN', 'EXPORT', 'FROM', 'GRANT', 'IMPORT', 'INSERT', 'LIST', 'LOAD', 'LOCK', 'MAP', 'MERGE', 'MSCK', 'REDUCE', 'REFRESH', 'REPLACE', 'RESET', 'REVOKE', 'ROLLBACK', 'SELECT', 'SET', 'SHOW', 'START', 'TABLE', 'TRUNCATE', 'UNCACHE', 'UNLOCK', 'UPDATE', 'USE', 'VALUES', 'WITH'}(line 1, pos 0)
-
-== SQL ==
-*/
-^^^
-
-SELECT -- continued after the following block comments...
-/* Deeply nested comment.
-   This includes a single apostrophe to make sure we aren't decoding this part as a string.
-SELECT 'deep nest' AS n1
-
-
--- !query
+SELECT 'deep nest' AS n1;
 /* Second level of nesting...
-SELECT 'deeper nest' as n2
--- !query schema
-struct<>
--- !query output
-org.apache.spark.sql.catalyst.parser.ParseException
-
-mismatched input '/' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR', 'COMMENT', 'COMMIT', 'CREATE', 'DELETE', 'DESC', 'DESCRIBE', 'DFS', 'DROP', 'EXPLAIN', 'EXPORT', 'FROM', 'GRANT', 'IMPORT', 'INSERT', 'LIST', 'LOAD', 'LOCK', 'MAP', 'MERGE', 'MSCK', 'REDUCE', 'REFRESH', 'REPLACE', 'RESET', 'REVOKE', 'ROLLBACK', 'SELECT', 'SET', 'SHOW', 'START', 'TABLE', 'TRUNCATE', 'UNCACHE', 'UNLOCK', 'UPDATE', 'USE', 'VALUES', 'WITH'}(line 1, pos 0)
-
-== SQL ==
-/* Second level of nesting...
-^^^
-SELECT 'deeper nest' as n2
-
-
--- !query
+SELECT 'deeper nest' as n2;
 /* Third level of nesting...
-SELECT 'deepest nest' as n3
--- !query schema
-struct<>
--- !query output
-org.apache.spark.sql.catalyst.parser.ParseException
-
-mismatched input '/' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR', 'COMMENT', 'COMMIT', 'CREATE', 'DELETE', 'DESC', 'DESCRIBE', 'DFS', 'DROP', 'EXPLAIN', 'EXPORT', 'FROM', 'GRANT', 'IMPORT', 'INSERT', 'LIST', 'LOAD', 'LOCK', 'MAP', 'MERGE', 'MSCK', 'REDUCE', 'REFRESH', 'REPLACE', 'RESET', 'REVOKE', 'ROLLBACK', 'SELECT', 'SET', 'SHOW', 'START', 'TABLE', 'TRUNCATE', 'UNCACHE', 'UNLOCK', 'UPDATE', 'USE', 'VALUES', 'WITH'}(line 1, pos 0)
-
-== SQL ==
-/* Third level of nesting...
-^^^
-SELECT 'deepest nest' as n3
-
-
--- !query
+SELECT 'deepest nest' as n3;
 */
 Hoo boy. Still two deep...
 */
@@ -170,11 +73,27 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
 
-mismatched input '*/' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR', 'COMMENT', 'COMMIT', 'CREATE', 'DELETE', 'DESC', 'DESCRIBE', 'DFS', 'DROP', 'EXPLAIN', 'EXPORT', 'FROM', 'GRANT', 'IMPORT', 'INSERT', 'LIST', 'LOAD', 'LOCK', 'MAP', 'MERGE', 'MSCK', 'REDUCE', 'REFRESH', 'REPLACE', 'RESET', 'REVOKE', 'ROLLBACK', 'SELECT', 'SET', 'SHOW', 'START', 'TABLE', 'TRUNCATE', 'UNCACHE', 'UNLOCK', 'UPDATE', 'USE', 'VALUES', 'WITH'}(line 1, pos 0)
+mismatched input ''embedded'' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR', 'COMMENT', 'COMMIT', 'CREATE', 'DELETE', 'DESC', 'DESCRIBE', 'DFS', 'DROP', 'EXPLAIN', 'EXPORT', 'FROM', 'GRANT', 'IMPORT', 'INSERT', 'LIST', 'LOAD', 'LOCK', 'MAP', 'MERGE', 'MSCK', 'REDUCE', 'REFRESH', 'REPLACE', 'RESET', 'REVOKE', 'ROLLBACK', 'SELECT', 'SET', 'SHOW', 'START', 'TABLE', 'TRUNCATE', 'UNCACHE', 'UNLOCK', 'UPDATE', 'USE', 'VALUES', 'WITH'}(line 6, pos 34)
 
 == SQL ==
+/*
+SELECT 'trailing' as x1; -- inside block comment
+*/
+
+/* This block comment surrounds a query which itself has a block comment...
+SELECT /* embedded single line */ 'embedded' AS x2;
+----------------------------------^^^
+*/
+
+SELECT -- continued after the following block comments...
+/* Deeply nested comment.
+   This includes a single apostrophe to make sure we aren't decoding this part as a string.
+SELECT 'deep nest' AS n1;
+/* Second level of nesting...
+SELECT 'deeper nest' as n2;
+/* Third level of nesting...
+SELECT 'deepest nest' as n3;
 */
-^^^
 Hoo boy. Still two deep...
 */
 Now just one deep...
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
index 6b9e5bb..da4727f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
@@ -19,7 +19,9 @@ package org.apache.spark.sql
 
 import java.io.File
 import java.util.{Locale, TimeZone}
+import java.util.regex.Pattern
 
+import scala.collection.mutable.{ArrayBuffer, HashMap}
 import scala.util.control.NonFatal
 
 import org.apache.spark.{SparkConf, SparkException}
@@ -62,7 +64,12 @@ import org.apache.spark.tags.ExtendedSQLTest
  * }}}
  *
  * The format for input files is simple:
- *  1. A list of SQL queries separated by semicolon.
+ *  1. A list of SQL queries separated by semicolons by default. If the semicolon cannot effectively
+ *     separate the SQL queries in the test file(e.g. bracketed comments), please use
+ *     --QUERY-DELIMITER-START and --QUERY-DELIMITER-END. Lines starting with
+ *     --QUERY-DELIMITER-START and --QUERY-DELIMITER-END represent the beginning and end of a query,
+ *     respectively. Code that is not surrounded by lines that begin with --QUERY-DELIMITER-START
+ *     and --QUERY-DELIMITER-END is still separated by semicolons.
  *  2. Lines starting with -- are treated as comments and ignored.
  *  3. Lines starting with --SET are used to specify the configs when running this testing file. You
  *     can set multiple configs in one --SET, using comma to separate them. Or you can use multiple
@@ -246,9 +253,15 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession {
 
   /** Run a test case. */
   protected def runTest(testCase: TestCase): Unit = {
+    def splitWithSemicolon(seq: Seq[String]) = {
+      seq.mkString("\n").split("(?<=[^\\\\]);")
+    }
     val input = fileToString(new File(testCase.inputFile))
 
-    val (comments, code) = input.split("\n").partition(_.trim.startsWith("--"))
+    val (comments, code) = input.split("\n").partition { line =>
+      val newLine = line.trim
+      newLine.startsWith("--") && !newLine.startsWith("--QUERY-DELIMITER")
+    }
 
     // If `--IMPORT` found, load code from another test case file, then insert them
     // into the head in this test.
@@ -261,10 +274,38 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession {
       }
     }.flatten
 
+    val allCode = importedCode ++ code
+    val tempQueries = if (allCode.exists(_.trim.startsWith("--QUERY-DELIMITER"))) {
+      // Although the loop is heavy, only used for bracketed comments test.
+      val querys = new ArrayBuffer[String]
+      val otherCodes = new ArrayBuffer[String]
+      var tempStr = ""
+      var start = false
+      for (c <- allCode) {
+        if (c.trim.startsWith("--QUERY-DELIMITER-START")) {
+          start = true
+          querys ++= splitWithSemicolon(otherCodes.toSeq)
+          otherCodes.clear()
+        } else if (c.trim.startsWith("--QUERY-DELIMITER-END")) {
+          start = false
+          querys += s"\n${tempStr.stripSuffix(";")}"
+          tempStr = ""
+        } else if (start) {
+          tempStr += s"\n$c"
+        } else {
+          otherCodes += c
+        }
+      }
+      if (otherCodes.nonEmpty) {
+        querys ++= splitWithSemicolon(otherCodes.toSeq)
+      }
+      querys.toSeq
+    } else {
+      splitWithSemicolon(allCode).toSeq
+    }
+
     // List of SQL queries to run
-    // note: this is not a robust way to split queries using semicolon, but works for now.
-    val queries = (importedCode ++ code).mkString("\n").split("(?<=[^\\\\]);")
-      .map(_.trim).filter(_ != "").toSeq
+    val queries = tempQueries.map(_.trim).filter(_ != "").toSeq
       // Fix misplacement when comment is at the end of the query.
       .map(_.split("\n").filterNot(_.startsWith("--")).mkString("\n")).map(_.trim).filter(_ != "")
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org