You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2020/05/06 04:37:50 UTC
[spark] branch branch-3.0 updated: [SPARK-31595][SQL] Spark sql should allow unescaped quote mark in quoted string

This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new fcd566e  [SPARK-31595][SQL] Spark sql should allow unescaped quote mark in quoted string
fcd566e is described below

commit fcd566ea15ac1a58efe152570b933f4d7a3d7c4e
Author: Daoyuan Wang <me...@daoyuan.wang>
AuthorDate: Wed May 6 04:34:43 2020 +0000

    [SPARK-31595][SQL] Spark sql should allow unescaped quote mark in quoted string
    
    ### What changes were proposed in this pull request?
    `def splitSemiColon` cannot handle unescaped quote mark like "'" or '"' correctly. When there are unmatched quotes in a string, `splitSemiColon` will not drop off semicolon as expected.
    
    ### Why are the changes needed?
    Some regex expression will use quote mark in string. We should process semicolon correctly.
    
    ### Does this PR introduce any user-facing change?
    No
    
    ### How was this patch tested?
    Added Unit test and also manual test.
    
    Closes #28393 from adrian-wang/unescaped.
    
    Authored-by: Daoyuan Wang <me...@daoyuan.wang>
    Signed-off-by: Wenchen Fan <we...@databricks.com>
    (cherry picked from commit 53a9bf8fece7322312cbe93c9224c04f645a0f5e)
    Signed-off-by: Wenchen Fan <we...@databricks.com>
---
 .../apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala   | 9 +++++++--
 .../scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala  | 9 +++++++++
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
index 5ed0cb0..bffa24c 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
@@ -507,6 +507,9 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging {
   }
 
   // Adapted splitSemiColon from Hive 2.3's CliDriver.splitSemiColon.
+  // Note: [SPARK-31595] if there is a `'` in a double quoted string, or a `"` in a single quoted
+  // string, the origin implementation from Hive will not drop the trailing semicolon as expected,
+  // hence we refined this function a little bit.
   private def splitSemiColon(line: String): JList[String] = {
     var insideSingleQuote = false
     var insideDoubleQuote = false
@@ -519,13 +522,15 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging {
     for (index <- 0 until line.length) {
       if (line.charAt(index) == '\'' && !insideComment) {
         // take a look to see if it is escaped
-        if (!escape) {
+        // See the comment above about SPARK-31595
+        if (!escape && !insideDoubleQuote) {
           // flip the boolean variable
           insideSingleQuote = !insideSingleQuote
         }
       } else if (line.charAt(index) == '\"' && !insideComment) {
         // take a look to see if it is escaped
-        if (!escape) {
+        // See the comment above about SPARK-31595
+        if (!escape && !insideSingleQuote) {
           // flip the boolean variable
           insideDoubleQuote = !insideDoubleQuote
         }
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
index abefb46..265e777 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
@@ -500,4 +500,13 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with BeforeAndAfterE
         |   ;""".stripMargin -> "testcomment"
     )
   }
+
+  test("SPARK-31595 Should allow unescaped quote mark in quoted string") {
+    runCliWithin(1.minute)(
+      "SELECT '\"legal string a';select 1 + 234;".stripMargin -> "235"
+    )
+    runCliWithin(1.minute)(
+      "SELECT \"legal 'string b\";select 22222 + 1;".stripMargin -> "22223"
+    )
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org