You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2020/05/06 04:37:50 UTC
[spark] branch branch-3.0 updated: [SPARK-31595][SQL] Spark sql
should allow unescaped quote mark in quoted string
This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.0 by this push:
new fcd566e [SPARK-31595][SQL] Spark sql should allow unescaped quote mark in quoted string
fcd566e is described below
commit fcd566ea15ac1a58efe152570b933f4d7a3d7c4e
Author: Daoyuan Wang <me...@daoyuan.wang>
AuthorDate: Wed May 6 04:34:43 2020 +0000
[SPARK-31595][SQL] Spark sql should allow unescaped quote mark in quoted string
### What changes were proposed in this pull request?
`def splitSemiColon` cannot handle unescaped quote mark like "'" or '"' correctly. When there are unmatched quotes in a string, `splitSemiColon` will not drop off semicolon as expected.
### Why are the changes needed?
Some regex expression will use quote mark in string. We should process semicolon correctly.
### Does this PR introduce any user-facing change?
No
### How was this patch tested?
Added Unit test and also manual test.
Closes #28393 from adrian-wang/unescaped.
Authored-by: Daoyuan Wang <me...@daoyuan.wang>
Signed-off-by: Wenchen Fan <we...@databricks.com>
(cherry picked from commit 53a9bf8fece7322312cbe93c9224c04f645a0f5e)
Signed-off-by: Wenchen Fan <we...@databricks.com>
---
.../apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala | 9 +++++++--
.../scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala | 9 +++++++++
2 files changed, 16 insertions(+), 2 deletions(-)
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
index 5ed0cb0..bffa24c 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
@@ -507,6 +507,9 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging {
}
// Adapted splitSemiColon from Hive 2.3's CliDriver.splitSemiColon.
+ // Note: [SPARK-31595] if there is a `'` in a double quoted string, or a `"` in a single quoted
+ // string, the origin implementation from Hive will not drop the trailing semicolon as expected,
+ // hence we refined this function a little bit.
private def splitSemiColon(line: String): JList[String] = {
var insideSingleQuote = false
var insideDoubleQuote = false
@@ -519,13 +522,15 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging {
for (index <- 0 until line.length) {
if (line.charAt(index) == '\'' && !insideComment) {
// take a look to see if it is escaped
- if (!escape) {
+ // See the comment above about SPARK-31595
+ if (!escape && !insideDoubleQuote) {
// flip the boolean variable
insideSingleQuote = !insideSingleQuote
}
} else if (line.charAt(index) == '\"' && !insideComment) {
// take a look to see if it is escaped
- if (!escape) {
+ // See the comment above about SPARK-31595
+ if (!escape && !insideSingleQuote) {
// flip the boolean variable
insideDoubleQuote = !insideDoubleQuote
}
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
index abefb46..265e777 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
@@ -500,4 +500,13 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with BeforeAndAfterE
| ;""".stripMargin -> "testcomment"
)
}
+
+ test("SPARK-31595 Should allow unescaped quote mark in quoted string") {
+ runCliWithin(1.minute)(
+ "SELECT '\"legal string a';select 1 + 234;".stripMargin -> "235"
+ )
+ runCliWithin(1.minute)(
+ "SELECT \"legal 'string b\";select 22222 + 1;".stripMargin -> "22223"
+ )
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org