You are viewing a plain text version of this content. The canonical link for it is here.
Posted to reviews@spark.apache.org by GitBox <gi...@apache.org> on 2020/10/13 02:15:40 UTC

[GitHub] [spark] maropu commented on a change in pull request #29982: [SPARK-33100][SQL] Ignore the content inside bracketed comment and ignore the comment without content

maropu commented on a change in pull request #29982:
URL: https://github.com/apache/spark/pull/29982#discussion_r503624175



##########
File path: sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
##########
@@ -519,15 +519,29 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging {
   // Note: [SPARK-31595] if there is a `'` in a double quoted string, or a `"` in a single quoted
   // string, the origin implementation from Hive will not drop the trailing semicolon as expected,
   // hence we refined this function a little bit.
+  // Note: [SPARK-33100] Ignore the content inside bracketed comment and ignore the comment without
+  // content.
   private def splitSemiColon(line: String): JList[String] = {
     var insideSingleQuote = false
     var insideDoubleQuote = false
-    var insideComment = false
+    var insideDashComment = false

Review comment:
       `Dash` -> `Simple` https://github.com/apache/spark/blob/master/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4#L1816

##########
File path: sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
##########
@@ -519,15 +519,29 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging {
   // Note: [SPARK-31595] if there is a `'` in a double quoted string, or a `"` in a single quoted
   // string, the origin implementation from Hive will not drop the trailing semicolon as expected,
   // hence we refined this function a little bit.
+  // Note: [SPARK-33100] Ignore the content inside bracketed comment and ignore the comment without

Review comment:
       This statement looks ambiguous, so how about saying `Ignore a semicolon inside a bracketed comment in spark-sql`?

##########
File path: sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
##########
@@ -519,15 +519,29 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging {
   // Note: [SPARK-31595] if there is a `'` in a double quoted string, or a `"` in a single quoted
   // string, the origin implementation from Hive will not drop the trailing semicolon as expected,
   // hence we refined this function a little bit.
+  // Note: [SPARK-33100] Ignore the content inside bracketed comment and ignore the comment without
+  // content.
   private def splitSemiColon(line: String): JList[String] = {
     var insideSingleQuote = false
     var insideDoubleQuote = false
-    var insideComment = false
+    var insideDashComment = false
+    var insideBracketedComment = false
+    var bracketedCommentRightBound = -1
     var escape = false
     var beginIndex = 0
+    var contentBegin = false

Review comment:
       Why do we need to track this state? `!insideComment` does not mean that a cursor exists in contents?

##########
File path: sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
##########
@@ -519,15 +519,29 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging {
   // Note: [SPARK-31595] if there is a `'` in a double quoted string, or a `"` in a single quoted
   // string, the origin implementation from Hive will not drop the trailing semicolon as expected,
   // hence we refined this function a little bit.
+  // Note: [SPARK-33100] Ignore the content inside bracketed comment and ignore the comment without
+  // content.
   private def splitSemiColon(line: String): JList[String] = {
     var insideSingleQuote = false
     var insideDoubleQuote = false
-    var insideComment = false
+    var insideDashComment = false
+    var insideBracketedComment = false
+    var bracketedCommentRightBound = -1
     var escape = false
     var beginIndex = 0
+    var contentBegin = false

Review comment:
       nit: btw, `content` -> `statement`?

##########
File path: sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
##########
@@ -550,21 +564,37 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging {
           // Sample query: select "quoted value --"
           //                                    ^^ avoids starting a comment if it's inside quotes.
         } else if (hasNext && line.charAt(index + 1) == '-') {
-          // ignore quotes and ;
-          insideComment = true
+          // ignore quotes and ; in dash-comment
+          insideDashComment = true
         }
       } else if (line.charAt(index) == ';') {
         if (insideSingleQuote || insideDoubleQuote || insideComment) {
           // do not split
         } else {
-          // split, do not include ; itself
-          ret.add(line.substring(beginIndex, index))
+          if (contentBegin) {
+            // split, do not include ; itself
+            ret.add(line.substring(beginIndex, index))
+          }
           beginIndex = index + 1
+          contentBegin = false
         }
       } else if (line.charAt(index) == '\n') {
-        // with a new line the inline comment should end.
+        // with a new line the inline dash-comment should end.
         if (!escape) {
-          insideComment = false
+          insideDashComment = false
+        }
+      } else if (line.charAt(index) == '/' && !insideComment) {
+        val hasNext = index + 1 < line.length
+        if (insideSingleQuote || insideDoubleQuote) {
+          // Ignores '/' in any case of quotes
+        } else if (hasNext && line.charAt(index + 1) == '*') {
+          // ignore quotes and ; in bracketed comment
+          insideBracketedComment = true
+        }
+      } else if (line.charAt(index) == '/' && insideBracketedComment) {
+        if (line.charAt(index - 1) == '*') {
+          // record the right bound of bracketed comment
+          bracketedCommentRightBound = index

Review comment:
       We need a variable `bracketedCommentRightBound` for tracking a bracketed comment?  I think we can do it without it.

##########
File path: sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
##########
@@ -550,21 +564,37 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging {
           // Sample query: select "quoted value --"
           //                                    ^^ avoids starting a comment if it's inside quotes.
         } else if (hasNext && line.charAt(index + 1) == '-') {
-          // ignore quotes and ;
-          insideComment = true
+          // ignore quotes and ; in dash-comment

Review comment:
       nit: `// ignore quotes and semicolons in a simple comment`




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org