You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ma...@apache.org on 2023/03/02 15:25:23 UTC

[spark] branch branch-3.3 updated: [SPARK-42553][SQL][3.3] Ensure at least one time unit after "interval"

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch branch-3.3
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.3 by this push:
     new 7e90f4226dd [SPARK-42553][SQL][3.3] Ensure at least one time unit after "interval"
7e90f4226dd is described below

commit 7e90f4226dd5e4fae1af5e88a334d87b330019cb
Author: jiangyzanze <ji...@alibaba-inc.com>
AuthorDate: Thu Mar 2 18:25:02 2023 +0300

    [SPARK-42553][SQL][3.3] Ensure at least one time unit after "interval"
    
    ### What changes were proposed in this pull request?
    This PR aims to ensure "at least one time unit should be given for interval literal" by modifying SqlBaseParser.
    
    This is a backport of https://github.com/apache/spark/pull/40195
    
    ### Why are the changes needed?
    INTERVAL is a Non-Reserved keyword in spark. But when I run
    ```shell
    scala> spark.sql("select interval from mytable")
    ```
    I get
    ```
    org.apache.spark.sql.catalyst.parser.ParseException:
    at least one time unit should be given for interval literal(line 1, pos 7)== SQL ==
    select interval from mytable
    -------^^^  at org.apache.spark.sql.errors.QueryParsingErrors$.invalidIntervalLiteralError(QueryParsingErrors.scala:196)
    ......
    ```
    It is a bug because "Non-Reserved keywords" have a special meaning in particular contexts and can be used as identifiers in other contexts. So by design, INTERVAL can be used as a column name.
    
    Currently the interval's grammar is
    ```
    interval
        : INTERVAL (errorCapturingMultiUnitsInterval | errorCapturingUnitToUnitInterval)?
        ;
    ```
    There is no need to make the time unit nullable, we can ensure "at least one time unit should be given for interval literal" if the interval's grammar is
    ```
    interval
        : INTERVAL (errorCapturingMultiUnitsInterval | errorCapturingUnitToUnitInterval)
        ;
    ```
    ### Does this PR introduce _any_ user-facing change?
    No.
    
    ### How was this patch tested?
    Unit test: PlanParsserSuite."SPARK-42553: NonReserved keyword 'interval' can be column name"
    
    Local test
    ```shell
    scala> val myDF = spark.sparkContext.makeRDD(1 to 5).toDF("interval")
    myDF: org.apache.spark.sql.DataFrame = [interval: int]
    
    scala> myDF.createOrReplaceTempView("mytable")
    
    scala> spark.sql("select interval from mytable;").show()
    +--------+
    |interval|
    +--------+
    |       1|
    |       2|
    |       3|
    |       4|
    |       5|
    +--------+
    
    ```
    
    Closes #40253 from jiang13021/branch-3.3-42553.
    
    Authored-by: jiangyzanze <ji...@alibaba-inc.com>
    Signed-off-by: Max Gekk <ma...@gmail.com>
---
 .../apache/spark/sql/catalyst/parser/SqlBaseParser.g4    |  2 +-
 .../apache/spark/sql/catalyst/parser/AstBuilder.scala    |  5 ++---
 .../org/apache/spark/sql/errors/QueryParsingErrors.scala |  4 ----
 .../sql/catalyst/parser/ExpressionParserSuite.scala      |  3 ---
 .../spark/sql/catalyst/parser/PlanParserSuite.scala      |  9 +++++++++
 .../src/test/resources/sql-tests/inputs/interval.sql     |  1 -
 .../resources/sql-tests/results/ansi/interval.sql.out    | 16 +---------------
 .../test/resources/sql-tests/results/interval.sql.out    | 16 +---------------
 8 files changed, 14 insertions(+), 42 deletions(-)

diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
index 701d4bc5aa7..3ec4b9a833d 100644
--- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
+++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
@@ -875,7 +875,7 @@ booleanValue
     ;
 
 interval
-    : INTERVAL (errorCapturingMultiUnitsInterval | errorCapturingUnitToUnitInterval)?
+    : INTERVAL (errorCapturingMultiUnitsInterval | errorCapturingUnitToUnitInterval)
     ;
 
 errorCapturingMultiUnitsInterval
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index ecc5360a4f7..4152e24d3e9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -2551,15 +2551,14 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit
           innerCtx.unitToUnitInterval)
       }
       visitMultiUnitsInterval(innerCtx.multiUnitsInterval)
-    } else if (ctx.errorCapturingUnitToUnitInterval != null) {
+    } else {
+      assert(ctx.errorCapturingUnitToUnitInterval != null)
       val innerCtx = ctx.errorCapturingUnitToUnitInterval
       if (innerCtx.error1 != null || innerCtx.error2 != null) {
         val errorCtx = if (innerCtx.error1 != null) innerCtx.error1 else innerCtx.error2
         throw QueryParsingErrors.moreThanOneFromToUnitInIntervalLiteralError(errorCtx)
       }
       visitUnitToUnitInterval(innerCtx.body)
-    } else {
-      throw QueryParsingErrors.invalidIntervalLiteralError(ctx)
     }
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala
index e92ed3e3b07..b0b1ccba618 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala
@@ -213,10 +213,6 @@ private[sql] object QueryParsingErrors extends QueryErrorsBase {
     new ParseException("Can only have a single from-to unit in the interval literal syntax", ctx)
   }
 
-  def invalidIntervalLiteralError(ctx: IntervalContext): Throwable = {
-    new ParseException("at least one time unit should be given for interval literal", ctx)
-  }
-
   def invalidIntervalFormError(value: String, ctx: MultiUnitsIntervalContext): Throwable = {
     new ParseException("Can only use numbers in the interval value part for" +
       s" multiple unit value pairs interval form, but got invalid value: $value", ctx)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
index 9e63c817e74..4635033bec6 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
@@ -744,9 +744,6 @@ class ExpressionParserSuite extends AnalysisTest {
       }
     }
 
-    // Empty interval statement
-    intercept("interval", "at least one time unit should be given for interval literal")
-
     // Single Intervals.
     val forms = Seq("", "s")
     val values = Seq("0", "10", "-7", "21")
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
index d1787cb7666..276dc569989 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
@@ -1326,4 +1326,13 @@ class PlanParserSuite extends AnalysisTest {
         Literal(Decimal(0.1), DecimalType(1, 1)), true).toAggregateExpression()
     )
   }
+
+  test("SPARK-42553: NonReserved keyword 'interval' can be column name") {
+    comparePlans(
+      parsePlan("SELECT interval FROM VALUES ('abc') AS tbl(interval);"),
+      UnresolvedInlineTable(
+        Seq("interval"),
+        Seq(Literal("abc")) :: Nil).as("tbl").select('interval)
+    )
+  }
 }
diff --git a/sql/core/src/test/resources/sql-tests/inputs/interval.sql b/sql/core/src/test/resources/sql-tests/inputs/interval.sql
index 1bd86c45afe..e4da28c2e75 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/interval.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/interval.sql
@@ -164,7 +164,6 @@ SELECT interval 'interval 2 weeks 2 days 1 hour 3 minutes 2 seconds 100 millisec
 SELECT interval '2 weeks 2 days 1 hour 3 minutes 2 seconds 100 millisecond 200 microseconds';
 
 -- malformed interval literal
-select interval;
 select interval 1 fake_unit;
 select interval 1 year to month;
 select interval '1' year to second;
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out
index f4ec0afb0cc..6d8a4403d0a 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 286
+-- Number of queries: 285
 
 
 -- !query
@@ -1191,20 +1191,6 @@ struct<INTERVAL '16 01:03:02.1002' DAY TO SECOND:interval day to second>
 16 01:03:02.100200000
 
 
--- !query
-select interval
--- !query schema
-struct<>
--- !query output
-org.apache.spark.sql.catalyst.parser.ParseException
-
-at least one time unit should be given for interval literal(line 1, pos 7)
-
-== SQL ==
-select interval
--------^^^
-
-
 -- !query
 select interval 1 fake_unit
 -- !query schema
diff --git a/sql/core/src/test/resources/sql-tests/results/interval.sql.out b/sql/core/src/test/resources/sql-tests/results/interval.sql.out
index 71fb0c0845d..829ec2adcd3 100644
--- a/sql/core/src/test/resources/sql-tests/results/interval.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/interval.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 286
+-- Number of queries: 285
 
 
 -- !query
@@ -1163,20 +1163,6 @@ struct<INTERVAL '16 01:03:02.1002' DAY TO SECOND:interval day to second>
 16 01:03:02.100200000
 
 
--- !query
-select interval
--- !query schema
-struct<>
--- !query output
-org.apache.spark.sql.catalyst.parser.ParseException
-
-at least one time unit should be given for interval literal(line 1, pos 7)
-
-== SQL ==
-select interval
--------^^^
-
-
 -- !query
 select interval 1 fake_unit
 -- !query schema


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org