You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by do...@apache.org on 2020/09/21 21:13:25 UTC

[spark] branch branch-3.0 updated: [SPARK-32718][SQL][3.0] Remove unnecessary keywords for interval units

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new b27bbbb  [SPARK-32718][SQL][3.0] Remove unnecessary keywords for interval units
b27bbbb is described below

commit b27bbbbbe46572feebf549a2314b6c9ea0d39c8a
Author: Wenchen Fan <we...@databricks.com>
AuthorDate: Mon Sep 21 14:06:54 2020 -0700

    [SPARK-32718][SQL][3.0] Remove unnecessary keywords for interval units
    
    Backport https://github.com/apache/spark/pull/29560 to 3.0, as it's kind of a bug fix for the ANSI mode. People can't use `year`,  `month`, etc. functions under ANSI mode.
    
    Closes #29823 from cloud-fan/backport.
    
    Authored-by: Wenchen Fan <we...@databricks.com>
    Signed-off-by: Dongjoon Hyun <dh...@apple.com>
---
 docs/sql-ref-ansi-compliance.md                    |  6 --
 .../apache/spark/sql/catalyst/parser/SqlBase.g4    | 26 +--------
 .../spark/sql/catalyst/parser/AstBuilder.scala     |  2 +-
 .../test/resources/sql-tests/inputs/interval.sql   |  4 ++
 .../sql-tests/results/ansi/datetime.sql.out        | 10 +---
 .../sql-tests/results/ansi/interval.sql.out        | 64 +++++++++++++++-------
 .../resources/sql-tests/results/interval.sql.out   | 42 +++++++++++++-
 .../org/apache/spark/sql/TPCDSQuerySuite.scala     |  6 ++
 8 files changed, 100 insertions(+), 60 deletions(-)

diff --git a/docs/sql-ref-ansi-compliance.md b/docs/sql-ref-ansi-compliance.md
index 1936161..948a36e 100644
--- a/docs/sql-ref-ansi-compliance.md
+++ b/docs/sql-ref-ansi-compliance.md
@@ -181,7 +181,6 @@ Below is a list of all the keywords in Spark SQL.
 |DATA|non-reserved|non-reserved|non-reserved|
 |DATABASE|non-reserved|non-reserved|non-reserved|
 |DATABASES|non-reserved|non-reserved|non-reserved|
-|DAY|reserved|non-reserved|reserved|
 |DBPROPERTIES|non-reserved|non-reserved|non-reserved|
 |DEFINED|non-reserved|non-reserved|non-reserved|
 |DELETE|non-reserved|non-reserved|reserved|
@@ -227,7 +226,6 @@ Below is a list of all the keywords in Spark SQL.
 |GROUP|reserved|non-reserved|reserved|
 |GROUPING|non-reserved|non-reserved|reserved|
 |HAVING|reserved|non-reserved|reserved|
-|HOUR|reserved|non-reserved|reserved|
 |IF|non-reserved|non-reserved|not a keyword|
 |IGNORE|non-reserved|non-reserved|non-reserved|
 |IMPORT|non-reserved|non-reserved|non-reserved|
@@ -265,8 +263,6 @@ Below is a list of all the keywords in Spark SQL.
 |MATCHED|non-reserved|non-reserved|non-reserved|
 |MERGE|non-reserved|non-reserved|non-reserved|
 |MINUS|non-reserved|strict-non-reserved|non-reserved|
-|MINUTE|reserved|non-reserved|reserved|
-|MONTH|reserved|non-reserved|reserved|
 |MSCK|non-reserved|non-reserved|non-reserved|
 |NAMESPACE|non-reserved|non-reserved|non-reserved|
 |NAMESPACES|non-reserved|non-reserved|non-reserved|
@@ -326,7 +322,6 @@ Below is a list of all the keywords in Spark SQL.
 |ROWS|non-reserved|non-reserved|reserved|
 |SCHEMA|non-reserved|non-reserved|non-reserved|
 |SCHEMAS|non-reserved|non-reserved|not a keyword|
-|SECOND|reserved|non-reserved|reserved|
 |SELECT|reserved|non-reserved|reserved|
 |SEMI|non-reserved|strict-non-reserved|non-reserved|
 |SEPARATED|non-reserved|non-reserved|non-reserved|
@@ -384,4 +379,3 @@ Below is a list of all the keywords in Spark SQL.
 |WHERE|reserved|non-reserved|reserved|
 |WINDOW|non-reserved|non-reserved|reserved|
 |WITH|reserved|non-reserved|reserved|
-|YEAR|reserved|non-reserved|reserved|
diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
index df6ff9f..922ff10 100644
--- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
+++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
@@ -818,7 +818,7 @@ errorCapturingMultiUnitsInterval
     ;
 
 multiUnitsInterval
-    : (intervalValue intervalUnit)+
+    : (intervalValue unit+=identifier)+
     ;
 
 errorCapturingUnitToUnitInterval
@@ -826,7 +826,7 @@ errorCapturingUnitToUnitInterval
     ;
 
 unitToUnitInterval
-    : value=intervalValue from=intervalUnit TO to=intervalUnit
+    : value=intervalValue from=identifier TO to=identifier
     ;
 
 intervalValue
@@ -834,16 +834,6 @@ intervalValue
     | STRING
     ;
 
-intervalUnit
-    : DAY
-    | HOUR
-    | MINUTE
-    | MONTH
-    | SECOND
-    | YEAR
-    | identifier
-    ;
-
 colPosition
     : position=FIRST | position=AFTER afterCol=errorCapturingIdentifier
     ;
@@ -1251,7 +1241,6 @@ nonReserved
     | DATA
     | DATABASE
     | DATABASES
-    | DAY
     | DBPROPERTIES
     | DEFINED
     | DELETE
@@ -1295,7 +1284,6 @@ nonReserved
     | GROUP
     | GROUPING
     | HAVING
-    | HOUR
     | IF
     | IGNORE
     | IMPORT
@@ -1328,8 +1316,6 @@ nonReserved
     | MAP
     | MATCHED
     | MERGE
-    | MINUTE
-    | MONTH
     | MSCK
     | NAMESPACE
     | NAMESPACES
@@ -1384,7 +1370,6 @@ nonReserved
     | ROW
     | ROWS
     | SCHEMA
-    | SECOND
     | SELECT
     | SEPARATED
     | SERDE
@@ -1438,7 +1423,6 @@ nonReserved
     | WHERE
     | WINDOW
     | WITH
-    | YEAR
 //--DEFAULT-NON-RESERVED-END
     ;
 
@@ -1501,7 +1485,6 @@ CURRENT_USER: 'CURRENT_USER';
 DATA: 'DATA';
 DATABASE: 'DATABASE';
 DATABASES: 'DATABASES' | 'SCHEMAS';
-DAY: 'DAY';
 DBPROPERTIES: 'DBPROPERTIES';
 DEFINED: 'DEFINED';
 DELETE: 'DELETE';
@@ -1547,7 +1530,6 @@ GRANT: 'GRANT';
 GROUP: 'GROUP';
 GROUPING: 'GROUPING';
 HAVING: 'HAVING';
-HOUR: 'HOUR';
 IF: 'IF';
 IGNORE: 'IGNORE';
 IMPORT: 'IMPORT';
@@ -1584,8 +1566,6 @@ MACRO: 'MACRO';
 MAP: 'MAP';
 MATCHED: 'MATCHED';
 MERGE: 'MERGE';
-MINUTE: 'MINUTE';
-MONTH: 'MONTH';
 MSCK: 'MSCK';
 NAMESPACE: 'NAMESPACE';
 NAMESPACES: 'NAMESPACES';
@@ -1643,7 +1623,6 @@ ROLLUP: 'ROLLUP';
 ROW: 'ROW';
 ROWS: 'ROWS';
 SCHEMA: 'SCHEMA';
-SECOND: 'SECOND';
 SELECT: 'SELECT';
 SEMI: 'SEMI';
 SEPARATED: 'SEPARATED';
@@ -1701,7 +1680,6 @@ WHEN: 'WHEN';
 WHERE: 'WHERE';
 WINDOW: 'WINDOW';
 WITH: 'WITH';
-YEAR: 'YEAR';
 //--SPARK-KEYWORD-LIST-END
 //============================
 // End of the keywords list
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 307a72e..938976e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -2100,7 +2100,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
    */
   override def visitMultiUnitsInterval(ctx: MultiUnitsIntervalContext): CalendarInterval = {
     withOrigin(ctx) {
-      val units = ctx.intervalUnit().asScala
+      val units = ctx.unit.asScala
       val values = ctx.intervalValue().asScala
       try {
         assert(units.length == values.length)
diff --git a/sql/core/src/test/resources/sql-tests/inputs/interval.sql b/sql/core/src/test/resources/sql-tests/inputs/interval.sql
index 9ad968e..8f6cf05 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/interval.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/interval.sql
@@ -55,6 +55,7 @@ select interval '15:40:32.99899999' hour to second;
 select interval '40:32.99899999' minute to second;
 select interval '40:32' minute to second;
 select interval 30 day day;
+select interval 30 days days;
 
 -- invalid day-time string intervals
 select interval '20 15:40:32.99899999' day to hour;
@@ -90,6 +91,9 @@ select interval '12:11:10' hour to second '1' year;
 select interval (-30) day;
 select interval (a + 1) day;
 select interval 30 day day day;
+select interval (-30) days;
+select interval (a + 1) days;
+select interval 30 days days days;
 
 -- Interval year-month arithmetic
 
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out
index ec007aa..7e5568d 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out
@@ -114,15 +114,9 @@ struct<weekday(CAST(2007-02-03 AS DATE)):int,weekday(CAST(2009-07-30 AS DATE)):i
 -- !query
 select year('1500-01-01'), month('1500-01-01'), dayOfYear('1500-01-01')
 -- !query schema
-struct<>
+struct<year(CAST(1500-01-01 AS DATE)):int,month(CAST(1500-01-01 AS DATE)):int,dayofyear(CAST(1500-01-01 AS DATE)):int>
 -- !query output
-org.apache.spark.sql.catalyst.parser.ParseException
-
-no viable alternative at input 'year'(line 1, pos 7)
-
-== SQL ==
-select year('1500-01-01'), month('1500-01-01'), dayOfYear('1500-01-01')
--------^^^
+1500	1	1
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out
index 5a66db9..2cee0c8 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 103
+-- Number of queries: 107
 
 
 -- !query
@@ -355,15 +355,17 @@ struct<INTERVAL '40 minutes 32 seconds':interval>
 -- !query
 select interval 30 day day
 -- !query schema
-struct<>
+struct<day:interval>
 -- !query output
-org.apache.spark.sql.catalyst.parser.ParseException
+30 days
 
-no viable alternative at input 'day'(line 1, pos 23)
 
-== SQL ==
-select interval 30 day day
------------------------^^^
+-- !query
+select interval 30 days days
+-- !query schema
+struct<days:interval>
+-- !query output
+30 days
 
 
 -- !query
@@ -655,41 +657,63 @@ select interval (-30) day
 -- !query schema
 struct<>
 -- !query output
-org.apache.spark.sql.catalyst.parser.ParseException
+org.apache.spark.sql.AnalysisException
+Undefined function: 'interval'. This function is neither a registered temporary function nor a permanent function registered in the database 'default'.; line 1 pos 7
 
-no viable alternative at input 'day'(line 1, pos 22)
 
-== SQL ==
-select interval (-30) day
-----------------------^^^
+-- !query
+select interval (a + 1) day
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+Undefined function: 'interval'. This function is neither a registered temporary function nor a permanent function registered in the database 'default'.; line 1 pos 7
 
 
 -- !query
-select interval (a + 1) day
+select interval 30 day day day
 -- !query schema
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
 
-no viable alternative at input 'day'(line 1, pos 24)
+extraneous input 'day' expecting {<EOF>, ';'}(line 1, pos 27)
 
 == SQL ==
-select interval (a + 1) day
-------------------------^^^
+select interval 30 day day day
+---------------------------^^^
 
 
 -- !query
-select interval 30 day day day
+select interval (-30) days
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+Undefined function: 'interval'. This function is neither a registered temporary function nor a permanent function registered in the database 'default'.; line 1 pos 7
+
+
+-- !query
+select interval (a + 1) days
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+Undefined function: 'interval'. This function is neither a registered temporary function nor a permanent function registered in the database 'default'.; line 1 pos 7
+
+
+-- !query
+select interval 30 days days days
 -- !query schema
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
 
-no viable alternative at input 'day'(line 1, pos 23)
+extraneous input 'days' expecting {<EOF>, ';'}(line 1, pos 29)
 
 == SQL ==
-select interval 30 day day day
------------------------^^^
+select interval 30 days days days
+-----------------------------^^^
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/interval.sql.out b/sql/core/src/test/resources/sql-tests/results/interval.sql.out
index baf7f16..3068e8e 100644
--- a/sql/core/src/test/resources/sql-tests/results/interval.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/interval.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 103
+-- Number of queries: 107
 
 
 -- !query
@@ -355,6 +355,14 @@ struct<day:interval>
 
 
 -- !query
+select interval 30 days days
+-- !query schema
+struct<days:interval>
+-- !query output
+30 days
+
+
+-- !query
 select interval '20 15:40:32.99899999' day to hour
 -- !query schema
 struct<>
@@ -671,6 +679,38 @@ select interval 30 day day day
 
 
 -- !query
+select interval (-30) days
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+Undefined function: 'interval'. This function is neither a registered temporary function nor a permanent function registered in the database 'default'.; line 1 pos 7
+
+
+-- !query
+select interval (a + 1) days
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+Undefined function: 'interval'. This function is neither a registered temporary function nor a permanent function registered in the database 'default'.; line 1 pos 7
+
+
+-- !query
+select interval 30 days days days
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+extraneous input 'days' expecting {<EOF>, ';'}(line 1, pos 29)
+
+== SQL ==
+select interval 30 days days days
+-----------------------------^^^
+
+
+-- !query
 create temporary view interval_arithmetic as
   select CAST(dateval AS date), CAST(tsval AS timestamp) from values
     ('2012-01-01', '2012-01-01')
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/TPCDSQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/TPCDSQuerySuite.scala
index e72c31f..f4c6fb9b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/TPCDSQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/TPCDSQuerySuite.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql
 
+import org.apache.spark.SparkConf
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.util.resourceToString
 import org.apache.spark.sql.internal.SQLConf
@@ -121,3 +122,8 @@ class TPCDSQueryWithStatsSuite extends TPCDSQuerySuite {
     SQLConf.JOIN_REORDER_ENABLED.key -> "true"
   )
 }
+
+class TPCDSQueryANSISuite extends TPCDSQuerySuite {
+  override protected def sparkConf: SparkConf =
+    super.sparkConf.set(SQLConf.ANSI_ENABLED, true)
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org