You are viewing a plain text version of this content. The canonical link for it is here.
Posted to reviews@spark.apache.org by GitBox <gi...@apache.org> on 2020/07/29 07:42:00 UTC
[GitHub] [spark] maropu commented on a change in pull request #29146: [SPARK-32257][SQL] Reports explicit errors for invalid usage of SET/RESET command

maropu commented on a change in pull request #29146:
URL: https://github.com/apache/spark/pull/29146#discussion_r461230902



##########
File path: sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
##########
@@ -66,17 +69,21 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) {
    * character in the raw string.
    */
   override def visitSetConfiguration(ctx: SetConfigurationContext): LogicalPlan = withOrigin(ctx) {
-    // Construct the command.
-    val raw = remainder(ctx.SET.getSymbol)
-    val keyValueSeparatorIndex = raw.indexOf('=')
-    if (keyValueSeparatorIndex >= 0) {
-      val key = raw.substring(0, keyValueSeparatorIndex).trim
-      val value = raw.substring(keyValueSeparatorIndex + 1).trim
-      SetCommand(Some(key -> Option(value)))
-    } else if (raw.nonEmpty) {
-      SetCommand(Some(raw.trim -> None))
+    if (ctx.configKey() != null) {
+      val keyStr = normalizeConfigString(ctx.configKey().getText)
+      if (ctx.configValue() != null) {
+        SetCommand(Some(keyStr -> Option(normalizeConfigString(ctx.configValue().getText))))
+      } else {
+        SetCommand(Some(keyStr -> None))
+      }
     } else {
-      SetCommand(None)
+      remainder(ctx.SET().getSymbol).trim match {
+        case "-v" => SetCommand(Some("-v" -> None))
+        case s if s.isEmpty() => SetCommand(None)
+        case _ => throw new ParseException("Expected format is 'SET', 'SET key', or " +
+          "'SET key=value'. If you want to include spaces in key and value, please use quotes, " +
+          "e.g., SET \"ke y\"=`va lu e`.", ctx)

Review comment:
       Updated.

##########
File path: sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
##########
@@ -66,17 +69,21 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) {
    * character in the raw string.
    */
   override def visitSetConfiguration(ctx: SetConfigurationContext): LogicalPlan = withOrigin(ctx) {
-    // Construct the command.
-    val raw = remainder(ctx.SET.getSymbol)
-    val keyValueSeparatorIndex = raw.indexOf('=')
-    if (keyValueSeparatorIndex >= 0) {
-      val key = raw.substring(0, keyValueSeparatorIndex).trim
-      val value = raw.substring(keyValueSeparatorIndex + 1).trim
-      SetCommand(Some(key -> Option(value)))
-    } else if (raw.nonEmpty) {
-      SetCommand(Some(raw.trim -> None))
+    if (ctx.configKey() != null) {
+      val keyStr = normalizeConfigString(ctx.configKey().getText)

Review comment:
       No. At least, we don't have such an existing test case.

##########
File path: sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
##########
@@ -66,17 +69,21 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) {
    * character in the raw string.
    */
   override def visitSetConfiguration(ctx: SetConfigurationContext): LogicalPlan = withOrigin(ctx) {
-    // Construct the command.
-    val raw = remainder(ctx.SET.getSymbol)
-    val keyValueSeparatorIndex = raw.indexOf('=')
-    if (keyValueSeparatorIndex >= 0) {
-      val key = raw.substring(0, keyValueSeparatorIndex).trim
-      val value = raw.substring(keyValueSeparatorIndex + 1).trim
-      SetCommand(Some(key -> Option(value)))
-    } else if (raw.nonEmpty) {
-      SetCommand(Some(raw.trim -> None))
+    if (ctx.configKey() != null) {
+      val keyStr = normalizeConfigString(ctx.configKey().getText)
+      if (ctx.configValue() != null) {
+        SetCommand(Some(keyStr -> Option(normalizeConfigString(ctx.configValue().getText))))
+      } else {
+        SetCommand(Some(keyStr -> None))
+      }
     } else {
-      SetCommand(None)
+      remainder(ctx.SET().getSymbol).trim match {
+        case "-v" => SetCommand(Some("-v" -> None))
+        case s if s.isEmpty() => SetCommand(None)
+        case _ => throw new ParseException("Expected format is 'SET', 'SET key', or " +
+          "'SET key=value'. If you want to include spaces in key and value, please use quotes, " +
+          "e.g., SET \"ke y\"=`va lu e`.", ctx)

Review comment:
       Updated.

##########
File path: sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
##########
@@ -58,6 +58,9 @@ class SparkSqlParser(conf: SQLConf) extends AbstractSqlParser(conf) {
 class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) {
   import org.apache.spark.sql.catalyst.parser.ParserUtils._
 
+  private def normalizeConfigString(s: String) =

Review comment:
       I looked into `tableIdentifier`, but I couldn't find why back-quotes are removed in the case. I replaced `BACKQUOTED_IDENTIFIER` with `quotedIdentifier` in [this commit](https://github.com/apache/spark/pull/29146/commits/08feee31c5d21aa75f9981efe59f583d46042cb3), then it seems back-quotes are removed by the ANTLR parser. Do you know something about that?

##########
File path: sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
##########
@@ -244,11 +244,19 @@ statement
     | SET TIME ZONE interval                                           #setTimeZone
     | SET TIME ZONE timezone=(STRING | LOCAL)                          #setTimeZone
     | SET TIME ZONE .*?                                                #setTimeZone
+    | SET configKey (EQ value=.+)?                                     #setConfiguration
     | SET .*?                                                          #setConfiguration
+    | RESET configKey?                                                 #resetConfiguration
     | RESET .*?                                                        #resetConfiguration
     | unsupportedHiveNativeCommands .*?                                #failNativeCommand
     ;
 
+configKey
+    : IDENTIFIER (('.' | ':') IDENTIFIER)*

Review comment:
       Currently, this rule leads to some test failures;
   ```
   sbt.ForkMain$ForkError: org.apache.spark.sql.catalyst.parser.ParseException: 
   Expected format is 'SET', 'SET key', or 'SET key=value'. If you want to include special characters in key and value, please use quotes or string literal, e.g., SET "ke y"=value.(line 1, pos 0)
   
   == SQL ==
   set hive.fetch.task.conversion=more
   ^^^
   ```
   https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/126674/testReport/org.apache.spark.sql.hive.execution/HiveCompatibilitySuite/binary_constant/
   
   This is because `IDENTIFIER` seems to be conflict with the existing ANTLR token, e.g., `fetch` in the example above. I'm currently looking for another smart rule to avoid it. Please let me know if someone has better idea..

##########
File path: sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
##########
@@ -244,11 +244,19 @@ statement
     | SET TIME ZONE interval                                           #setTimeZone
     | SET TIME ZONE timezone=(STRING | LOCAL)                          #setTimeZone
     | SET TIME ZONE .*?                                                #setTimeZone
+    | SET configKey (EQ value=.+)?                                     #setConfiguration
     | SET .*?                                                          #setConfiguration
+    | RESET configKey?                                                 #resetConfiguration
     | RESET .*?                                                        #resetConfiguration
     | unsupportedHiveNativeCommands .*?                                #failNativeCommand
     ;
 
+configKey
+    : IDENTIFIER (('.' | ':') IDENTIFIER)*
+    | quotedIdentifier
+    | STRING

Review comment:
       ok, I also think the simpler rule looks better. I'll update it based on your suggestion.

##########
File path: sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
##########
@@ -244,11 +244,19 @@ statement
     | SET TIME ZONE interval                                           #setTimeZone
     | SET TIME ZONE timezone=(STRING | LOCAL)                          #setTimeZone
     | SET TIME ZONE .*?                                                #setTimeZone
+    | SET configKey (EQ value=.+)?                                     #setConfiguration
     | SET .*?                                                          #setConfiguration
+    | RESET configKey?                                                 #resetConfiguration
     | RESET .*?                                                        #resetConfiguration
     | unsupportedHiveNativeCommands .*?                                #failNativeCommand
     ;
 
+configKey
+    : IDENTIFIER (('.' | ':') IDENTIFIER)*
+    | quotedIdentifier
+    | STRING

Review comment:
       ok, I think the simpler rule looks better. I'll update it based on your suggestion.

##########
File path: sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
##########
@@ -58,6 +58,9 @@ class SparkSqlParser(conf: SQLConf) extends AbstractSqlParser(conf) {
 class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) {
   import org.apache.spark.sql.catalyst.parser.ParserUtils._
 
+  private def normalizeConfigString(s: String) =

Review comment:
       Oh, nice. I see...

##########
File path: sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala
##########
@@ -61,6 +61,39 @@ class SparkSqlParserSuite extends AnalysisTest {
   private def intercept(sqlCommand: String, messages: String*): Unit =
     interceptParseException(parser.parsePlan)(sqlCommand, messages: _*)
 
+  test("Report Error for invalid usage of SET command") {
+    assertEqual("SET", SetCommand(None))
+    assertEqual("SET -v", SetCommand(Some("-v", None)))
+    assertEqual("SET spark.sql.key", SetCommand(Some("spark.sql.key" -> None)))
+    assertEqual("SET spark:sql:key=false", SetCommand(Some("spark:sql:key" -> Some("false"))))
+    assertEqual("SET spark.sql.    key=value",

Review comment:
       hm, I'll try. But, it seems we already accept similar patterns, e.g.,
   ```
   scala> sql("select * from default.t").show()
   +---+                                                                           
   |  a|
   +---+
   |  4|
   |  5|
   |  1|
   |  2|
   |  3|
   +---+
   
   scala> sql("select * from default   .t").show()
   +---+
   |  a|
   +---+
   |  4|
   |  5|
   |  1|
   |  2|
   |  3|
   +---+
   
   scala> sql("select * from default   .    t").show()
   +---+
   |  a|
   +---+
   |  4|
   |  5|
   |  1|
   |  2|
   |  3|
   +---+
   ```
   Should these also be prohibited? (But, I think it is hard to do so though...)

##########
File path: sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala
##########
@@ -61,6 +61,39 @@ class SparkSqlParserSuite extends AnalysisTest {
   private def intercept(sqlCommand: String, messages: String*): Unit =
     interceptParseException(parser.parsePlan)(sqlCommand, messages: _*)
 
+  test("Report Error for invalid usage of SET command") {
+    assertEqual("SET", SetCommand(None))
+    assertEqual("SET -v", SetCommand(Some("-v", None)))
+    assertEqual("SET spark.sql.key", SetCommand(Some("spark.sql.key" -> None)))
+    assertEqual("SET spark:sql:key=false", SetCommand(Some("spark:sql:key" -> Some("false"))))
+    assertEqual("SET spark.sql.    key=value",

Review comment:
       hm, I'll check it. But, it seems we already accept similar patterns, e.g.,
   ```
   scala> sql("select * from default.t").show()
   +---+                                                                           
   |  a|
   +---+
   |  4|
   |  5|
   |  1|
   |  2|
   |  3|
   +---+
   
   scala> sql("select * from default   .t").show()
   +---+
   |  a|
   +---+
   |  4|
   |  5|
   |  1|
   |  2|
   |  3|
   +---+
   
   scala> sql("select * from default   .    t").show()
   +---+
   |  a|
   +---+
   |  4|
   |  5|
   |  1|
   |  2|
   |  3|
   +---+
   ```
   Should these also be prohibited? (But, I think it is hard to do so though...)




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org