You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ma...@apache.org on 2023/03/02 06:37:57 UTC

[spark] branch branch-3.4 updated: [SPARK-42553][SQL] Ensure at least one time unit after "interval"

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch branch-3.4
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.4 by this push:
     new 39f220c5410 [SPARK-42553][SQL] Ensure at least one time unit after "interval"
39f220c5410 is described below

commit 39f220c54108e82e0915d32bd173cd7f67f08cbf
Author: jiangyzanze <ji...@alibaba-inc.com>
AuthorDate: Thu Mar 2 09:37:25 2023 +0300

    [SPARK-42553][SQL] Ensure at least one time unit after "interval"
    
    ### What changes were proposed in this pull request?
    
    THis PR aims to ensure "at least one time unit should be given for interval literal" by modifying SqlBaseParser
    
    ### Why are the changes needed?
    
    INTERVAL is a Non-Reserved keyword in spark. But when I run
    ```shell
    scala> spark.sql("select interval from mytable")
    ```
    I get
    ```
    org.apache.spark.sql.catalyst.parser.ParseException:
    at least one time unit should be given for interval literal(line 1, pos 7)== SQL ==
    select interval from mytable
    -------^^^  at org.apache.spark.sql.errors.QueryParsingErrors$.invalidIntervalLiteralError(QueryParsingErrors.scala:196)
    ......
    ```
    It is a bug because "Non-Reserved keywords" have a special meaning in particular contexts and can be used as identifiers in other contexts. So by design, INTERVAL can be used as a column name.
    
    Currently the interval's grammar is
    ```
    interval
        : INTERVAL (errorCapturingMultiUnitsInterval | errorCapturingUnitToUnitInterval)?
        ;
    ```
    There is no need to make the time unit nullable, we can ensure "at least one time unit should be given for interval literal" if the interval's grammar is
    ```
    interval
        : INTERVAL (errorCapturingMultiUnitsInterval | errorCapturingUnitToUnitInterval)
        ;
    ```
    ### Does this PR introduce _any_ user-facing change?
    
    No.
    
    ### How was this patch tested?
    
    local test
    ```shell
    scala> val myDF = spark.sparkContext.makeRDD(1 to 5).toDF("interval")
    myDF: org.apache.spark.sql.DataFrame = [interval: int]
    
    scala> myDF.createOrReplaceTempView("mytable")
    
    scala> spark.sql("select interval from mytable;").show()
    +--------+
    |interval|
    +--------+
    |       1|
    |       2|
    |       3|
    |       4|
    |       5|
    +--------+
    
    ```
    
    Closes #40195 from jiang13021/jiang13021_fix_42553.
    
    Authored-by: jiangyzanze <ji...@alibaba-inc.com>
    Signed-off-by: Max Gekk <ma...@gmail.com>
    (cherry picked from commit 738a81e3173bd6571e038196fc161737ca105f58)
    Signed-off-by: Max Gekk <ma...@gmail.com>
---
 core/src/main/resources/error/error-classes.json       |  5 -----
 .../apache/spark/sql/catalyst/parser/SqlBaseParser.g4  |  2 +-
 .../apache/spark/sql/catalyst/parser/AstBuilder.scala  |  5 ++---
 .../apache/spark/sql/errors/QueryParsingErrors.scala   |  4 ----
 .../sql/catalyst/parser/ExpressionParserSuite.scala    | 10 ----------
 .../spark/sql/catalyst/parser/PlanParserSuite.scala    |  9 +++++++++
 .../src/test/resources/sql-tests/inputs/interval.sql   |  1 -
 .../resources/sql-tests/results/ansi/interval.sql.out  | 18 ------------------
 .../test/resources/sql-tests/results/interval.sql.out  | 18 ------------------
 9 files changed, 12 insertions(+), 60 deletions(-)

diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json
index 28d213db93c..2780c98bfc6 100644
--- a/core/src/main/resources/error/error-classes.json
+++ b/core/src/main/resources/error/error-classes.json
@@ -2017,11 +2017,6 @@
       "Can only have a single from-to unit in the interval literal syntax."
     ]
   },
-  "_LEGACY_ERROR_TEMP_0025" : {
-    "message" : [
-      "At least one time unit should be given for interval literal."
-    ]
-  },
   "_LEGACY_ERROR_TEMP_0026" : {
     "message" : [
       "Can only use numbers in the interval value part for multiple unit value pairs interval form, but got invalid value: <value>."
diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
index aa5f538bbf6..6142700f095 100644
--- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
+++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
@@ -955,7 +955,7 @@ booleanValue
     ;
 
 interval
-    : INTERVAL (errorCapturingMultiUnitsInterval | errorCapturingUnitToUnitInterval)?
+    : INTERVAL (errorCapturingMultiUnitsInterval | errorCapturingUnitToUnitInterval)
     ;
 
 errorCapturingMultiUnitsInterval
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 1c5eac4ce17..6ebfa853edc 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -2758,15 +2758,14 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit
           innerCtx.unitToUnitInterval)
       }
       visitMultiUnitsInterval(innerCtx.multiUnitsInterval)
-    } else if (ctx.errorCapturingUnitToUnitInterval != null) {
+    } else {
+      assert(ctx.errorCapturingUnitToUnitInterval != null)
       val innerCtx = ctx.errorCapturingUnitToUnitInterval
       if (innerCtx.error1 != null || innerCtx.error2 != null) {
         val errorCtx = if (innerCtx.error1 != null) innerCtx.error1 else innerCtx.error2
         throw QueryParsingErrors.moreThanOneFromToUnitInIntervalLiteralError(errorCtx)
       }
       visitUnitToUnitInterval(innerCtx.body)
-    } else {
-      throw QueryParsingErrors.invalidIntervalLiteralError(ctx)
     }
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala
index 57868020736..7aad1127c8c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala
@@ -246,10 +246,6 @@ private[sql] object QueryParsingErrors extends QueryErrorsBase {
     new ParseException(errorClass = "_LEGACY_ERROR_TEMP_0024", ctx)
   }
 
-  def invalidIntervalLiteralError(ctx: IntervalContext): Throwable = {
-    new ParseException(errorClass = "_LEGACY_ERROR_TEMP_0025", ctx)
-  }
-
   def invalidIntervalFormError(value: String, ctx: MultiUnitsIntervalContext): Throwable = {
     new ParseException(
       errorClass = "_LEGACY_ERROR_TEMP_0026",
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
index d0fc7199378..8d08d07249e 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
@@ -970,16 +970,6 @@ class ExpressionParserSuite extends AnalysisTest {
       }
     }
 
-    // Empty interval statement
-    checkError(
-      exception = parseException("interval"),
-      errorClass = "_LEGACY_ERROR_TEMP_0025",
-      parameters = Map.empty,
-      context = ExpectedContext(
-        fragment = "interval",
-        start = 0,
-        stop = 7))
-
     // Single Intervals.
     val forms = Seq("", "s")
     val values = Seq("0", "10", "-7", "21")
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
index a079cc62440..6fc83d8c782 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
@@ -1597,4 +1597,13 @@ class PlanParserSuite extends AnalysisTest {
       errorClass = "PARSE_SYNTAX_ERROR",
       parameters = Map("error" -> "end of input", "hint" -> ""))
   }
+
+  test("SPARK-42553: NonReserved keyword 'interval' can be column name") {
+    comparePlans(
+      parsePlan("SELECT interval FROM VALUES ('abc') AS tbl(interval);"),
+      UnresolvedInlineTable(
+        Seq("interval"),
+        Seq(Literal("abc")) :: Nil).as("tbl").select($"interval")
+    )
+  }
 }
diff --git a/sql/core/src/test/resources/sql-tests/inputs/interval.sql b/sql/core/src/test/resources/sql-tests/inputs/interval.sql
index 1bd86c45afe..e4da28c2e75 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/interval.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/interval.sql
@@ -164,7 +164,6 @@ SELECT interval 'interval 2 weeks 2 days 1 hour 3 minutes 2 seconds 100 millisec
 SELECT interval '2 weeks 2 days 1 hour 3 minutes 2 seconds 100 millisecond 200 microseconds';
 
 -- malformed interval literal
-select interval;
 select interval 1 fake_unit;
 select interval 1 year to month;
 select interval '1' year to second;
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out
index 3a8fee88f07..c5c73002a1a 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out
@@ -1481,24 +1481,6 @@ struct<INTERVAL '16 01:03:02.1002' DAY TO SECOND:interval day to second>
 16 01:03:02.100200000
 
 
--- !query
-select interval
--- !query schema
-struct<>
--- !query output
-org.apache.spark.sql.catalyst.parser.ParseException
-{
-  "errorClass" : "_LEGACY_ERROR_TEMP_0025",
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 15,
-    "fragment" : "interval"
-  } ]
-}
-
-
 -- !query
 select interval 1 fake_unit
 -- !query schema
diff --git a/sql/core/src/test/resources/sql-tests/results/interval.sql.out b/sql/core/src/test/resources/sql-tests/results/interval.sql.out
index a410ea0b32c..2f8185f78d8 100644
--- a/sql/core/src/test/resources/sql-tests/results/interval.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/interval.sql.out
@@ -1362,24 +1362,6 @@ struct<INTERVAL '16 01:03:02.1002' DAY TO SECOND:interval day to second>
 16 01:03:02.100200000
 
 
--- !query
-select interval
--- !query schema
-struct<>
--- !query output
-org.apache.spark.sql.catalyst.parser.ParseException
-{
-  "errorClass" : "_LEGACY_ERROR_TEMP_0025",
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 15,
-    "fragment" : "interval"
-  } ]
-}
-
-
 -- !query
 select interval 1 fake_unit
 -- !query schema


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org