You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by do...@apache.org on 2020/08/29 21:10:35 UTC

[spark] branch master updated: [SPARK-32718][SQL] Remove unnecessary keywords for interval units

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new ccc0250  [SPARK-32718][SQL] Remove unnecessary keywords for interval units
ccc0250 is described below

commit ccc0250a08fa9519a6dc3c1153e51c1e110f1d7d
Author: Wenchen Fan <we...@databricks.com>
AuthorDate: Sat Aug 29 14:06:01 2020 -0700

    [SPARK-32718][SQL] Remove unnecessary keywords for interval units
    
    ### What changes were proposed in this pull request?
    
    Remove the YEAR, MONTH, DAY, HOUR, MINUTE, SECOND keywords. They are not useful in the parser, as we need to support plural like YEARS, so the parser has to accept the general identifier as interval unit anyway.
    
    ### Why are the changes needed?
    
    These keywords are reserved in ANSI. If Spark has these keywords, then they become reserved under ANSI mode. This makes Spark not able to run TPCDS queries as they use YEAR as alias name.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No
    
    ### How was this patch tested?
    
    Added `TPCDSQueryANSISuite`, to make sure Spark with ANSI mode can run TPCDS queries.
    
    Closes #29560 from cloud-fan/keyword.
    
    Authored-by: Wenchen Fan <we...@databricks.com>
    Signed-off-by: Dongjoon Hyun <do...@apache.org>
---
 docs/sql-ref-ansi-compliance.md                    |  6 --
 .../apache/spark/sql/catalyst/parser/SqlBase.g4    | 26 +--------
 .../spark/sql/catalyst/parser/AstBuilder.scala     |  2 +-
 .../test/resources/sql-tests/inputs/interval.sql   |  4 ++
 .../sql-tests/results/ansi/datetime.sql.out        | 10 +---
 .../sql-tests/results/ansi/interval.sql.out        | 64 +++++++++++++++-------
 .../resources/sql-tests/results/interval.sql.out   | 42 +++++++++++++-
 .../org/apache/spark/sql/TPCDSQuerySuite.scala     |  7 +++
 8 files changed, 101 insertions(+), 60 deletions(-)

diff --git a/docs/sql-ref-ansi-compliance.md b/docs/sql-ref-ansi-compliance.md
index e786c0b..d6e9931 100644
--- a/docs/sql-ref-ansi-compliance.md
+++ b/docs/sql-ref-ansi-compliance.md
@@ -181,7 +181,6 @@ Below is a list of all the keywords in Spark SQL.
 |DATA|non-reserved|non-reserved|non-reserved|
 |DATABASE|non-reserved|non-reserved|non-reserved|
 |DATABASES|non-reserved|non-reserved|non-reserved|
-|DAY|reserved|non-reserved|reserved|
 |DBPROPERTIES|non-reserved|non-reserved|non-reserved|
 |DEFINED|non-reserved|non-reserved|non-reserved|
 |DELETE|non-reserved|non-reserved|reserved|
@@ -227,7 +226,6 @@ Below is a list of all the keywords in Spark SQL.
 |GROUP|reserved|non-reserved|reserved|
 |GROUPING|non-reserved|non-reserved|reserved|
 |HAVING|reserved|non-reserved|reserved|
-|HOUR|reserved|non-reserved|reserved|
 |IF|non-reserved|non-reserved|not a keyword|
 |IGNORE|non-reserved|non-reserved|non-reserved|
 |IMPORT|non-reserved|non-reserved|non-reserved|
@@ -265,8 +263,6 @@ Below is a list of all the keywords in Spark SQL.
 |MATCHED|non-reserved|non-reserved|non-reserved|
 |MERGE|non-reserved|non-reserved|non-reserved|
 |MINUS|non-reserved|strict-non-reserved|non-reserved|
-|MINUTE|reserved|non-reserved|reserved|
-|MONTH|reserved|non-reserved|reserved|
 |MSCK|non-reserved|non-reserved|non-reserved|
 |NAMESPACE|non-reserved|non-reserved|non-reserved|
 |NAMESPACES|non-reserved|non-reserved|non-reserved|
@@ -326,7 +322,6 @@ Below is a list of all the keywords in Spark SQL.
 |ROWS|non-reserved|non-reserved|reserved|
 |SCHEMA|non-reserved|non-reserved|non-reserved|
 |SCHEMAS|non-reserved|non-reserved|not a keyword|
-|SECOND|reserved|non-reserved|reserved|
 |SELECT|reserved|non-reserved|reserved|
 |SEMI|non-reserved|strict-non-reserved|non-reserved|
 |SEPARATED|non-reserved|non-reserved|non-reserved|
@@ -385,5 +380,4 @@ Below is a list of all the keywords in Spark SQL.
 |WHERE|reserved|non-reserved|reserved|
 |WINDOW|non-reserved|non-reserved|reserved|
 |WITH|reserved|non-reserved|reserved|
-|YEAR|reserved|non-reserved|reserved|
 |ZONE|non-reserved|non-reserved|non-reserved|
diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
index 6fce781..ad0de52 100644
--- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
+++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
@@ -850,7 +850,7 @@ errorCapturingMultiUnitsInterval
     ;
 
 multiUnitsInterval
-    : (intervalValue intervalUnit)+
+    : (intervalValue unit+=identifier)+
     ;
 
 errorCapturingUnitToUnitInterval
@@ -858,7 +858,7 @@ errorCapturingUnitToUnitInterval
     ;
 
 unitToUnitInterval
-    : value=intervalValue from=intervalUnit TO to=intervalUnit
+    : value=intervalValue from=identifier TO to=identifier
     ;
 
 intervalValue
@@ -866,16 +866,6 @@ intervalValue
     | STRING
     ;
 
-intervalUnit
-    : DAY
-    | HOUR
-    | MINUTE
-    | MONTH
-    | SECOND
-    | YEAR
-    | identifier
-    ;
-
 colPosition
     : position=FIRST | position=AFTER afterCol=errorCapturingIdentifier
     ;
@@ -1285,7 +1275,6 @@ nonReserved
     | DATA
     | DATABASE
     | DATABASES
-    | DAY
     | DBPROPERTIES
     | DEFINED
     | DELETE
@@ -1329,7 +1318,6 @@ nonReserved
     | GROUP
     | GROUPING
     | HAVING
-    | HOUR
     | IF
     | IGNORE
     | IMPORT
@@ -1362,8 +1350,6 @@ nonReserved
     | MAP
     | MATCHED
     | MERGE
-    | MINUTE
-    | MONTH
     | MSCK
     | NAMESPACE
     | NAMESPACES
@@ -1418,7 +1404,6 @@ nonReserved
     | ROW
     | ROWS
     | SCHEMA
-    | SECOND
     | SELECT
     | SEPARATED
     | SERDE
@@ -1473,7 +1458,6 @@ nonReserved
     | WHERE
     | WINDOW
     | WITH
-    | YEAR
     | ZONE
 //--DEFAULT-NON-RESERVED-END
     ;
@@ -1537,7 +1521,6 @@ CURRENT_USER: 'CURRENT_USER';
 DATA: 'DATA';
 DATABASE: 'DATABASE';
 DATABASES: 'DATABASES' | 'SCHEMAS';
-DAY: 'DAY';
 DBPROPERTIES: 'DBPROPERTIES';
 DEFINED: 'DEFINED';
 DELETE: 'DELETE';
@@ -1583,7 +1566,6 @@ GRANT: 'GRANT';
 GROUP: 'GROUP';
 GROUPING: 'GROUPING';
 HAVING: 'HAVING';
-HOUR: 'HOUR';
 IF: 'IF';
 IGNORE: 'IGNORE';
 IMPORT: 'IMPORT';
@@ -1620,8 +1602,6 @@ MACRO: 'MACRO';
 MAP: 'MAP';
 MATCHED: 'MATCHED';
 MERGE: 'MERGE';
-MINUTE: 'MINUTE';
-MONTH: 'MONTH';
 MSCK: 'MSCK';
 NAMESPACE: 'NAMESPACE';
 NAMESPACES: 'NAMESPACES';
@@ -1679,7 +1659,6 @@ ROLLUP: 'ROLLUP';
 ROW: 'ROW';
 ROWS: 'ROWS';
 SCHEMA: 'SCHEMA';
-SECOND: 'SECOND';
 SELECT: 'SELECT';
 SEMI: 'SEMI';
 SEPARATED: 'SEPARATED';
@@ -1738,7 +1717,6 @@ WHEN: 'WHEN';
 WHERE: 'WHERE';
 WINDOW: 'WINDOW';
 WITH: 'WITH';
-YEAR: 'YEAR';
 ZONE: 'ZONE';
 //--SPARK-KEYWORD-LIST-END
 //============================
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 716ceb4..d90c990 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -2125,7 +2125,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
    */
   override def visitMultiUnitsInterval(ctx: MultiUnitsIntervalContext): CalendarInterval = {
     withOrigin(ctx) {
-      val units = ctx.intervalUnit().asScala
+      val units = ctx.unit.asScala
       val values = ctx.intervalValue().asScala
       try {
         assert(units.length == values.length)
diff --git a/sql/core/src/test/resources/sql-tests/inputs/interval.sql b/sql/core/src/test/resources/sql-tests/inputs/interval.sql
index 7173863..a5bc43f 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/interval.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/interval.sql
@@ -55,6 +55,7 @@ select interval '15:40:32.99899999' hour to second;
 select interval '40:32.99899999' minute to second;
 select interval '40:32' minute to second;
 select interval 30 day day;
+select interval 30 days days;
 
 -- invalid day-time string intervals
 select interval '20 15:40:32.99899999' day to hour;
@@ -90,6 +91,9 @@ select interval '12:11:10' hour to second '1' year;
 select interval (-30) day;
 select interval (a + 1) day;
 select interval 30 day day day;
+select interval (-30) days;
+select interval (a + 1) days;
+select interval 30 days days days;
 
 -- Interval year-month arithmetic
 
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out
index d2d6671..5fe0bd5 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out
@@ -199,15 +199,9 @@ struct<weekday(CAST(2007-02-03 AS DATE)):int,weekday(CAST(2009-07-30 AS DATE)):i
 -- !query
 select year('1500-01-01'), month('1500-01-01'), dayOfYear('1500-01-01')
 -- !query schema
-struct<>
+struct<year(CAST(1500-01-01 AS DATE)):int,month(CAST(1500-01-01 AS DATE)):int,dayofyear(CAST(1500-01-01 AS DATE)):int>
 -- !query output
-org.apache.spark.sql.catalyst.parser.ParseException
-
-no viable alternative at input 'year'(line 1, pos 7)
-
-== SQL ==
-select year('1500-01-01'), month('1500-01-01'), dayOfYear('1500-01-01')
--------^^^
+1500	1	1
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out
index f80bea1..c1c8a70 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 94
+-- Number of queries: 98
 
 
 -- !query
@@ -355,15 +355,17 @@ struct<INTERVAL '40 minutes 32 seconds':interval>
 -- !query
 select interval 30 day day
 -- !query schema
-struct<>
+struct<day:interval>
 -- !query output
-org.apache.spark.sql.catalyst.parser.ParseException
+30 days
 
-no viable alternative at input 'day'(line 1, pos 23)
 
-== SQL ==
-select interval 30 day day
------------------------^^^
+-- !query
+select interval 30 days days
+-- !query schema
+struct<days:interval>
+-- !query output
+30 days
 
 
 -- !query
@@ -655,41 +657,63 @@ select interval (-30) day
 -- !query schema
 struct<>
 -- !query output
-org.apache.spark.sql.catalyst.parser.ParseException
+org.apache.spark.sql.AnalysisException
+Undefined function: 'interval'. This function is neither a registered temporary function nor a permanent function registered in the database 'default'.; line 1 pos 7
 
-no viable alternative at input 'day'(line 1, pos 22)
 
-== SQL ==
-select interval (-30) day
-----------------------^^^
+-- !query
+select interval (a + 1) day
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+Undefined function: 'interval'. This function is neither a registered temporary function nor a permanent function registered in the database 'default'.; line 1 pos 7
 
 
 -- !query
-select interval (a + 1) day
+select interval 30 day day day
 -- !query schema
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
 
-no viable alternative at input 'day'(line 1, pos 24)
+extraneous input 'day' expecting {<EOF>, ';'}(line 1, pos 27)
 
 == SQL ==
-select interval (a + 1) day
-------------------------^^^
+select interval 30 day day day
+---------------------------^^^
 
 
 -- !query
-select interval 30 day day day
+select interval (-30) days
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+Undefined function: 'interval'. This function is neither a registered temporary function nor a permanent function registered in the database 'default'.; line 1 pos 7
+
+
+-- !query
+select interval (a + 1) days
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+Undefined function: 'interval'. This function is neither a registered temporary function nor a permanent function registered in the database 'default'.; line 1 pos 7
+
+
+-- !query
+select interval 30 days days days
 -- !query schema
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
 
-no viable alternative at input 'day'(line 1, pos 23)
+extraneous input 'days' expecting {<EOF>, ';'}(line 1, pos 29)
 
 == SQL ==
-select interval 30 day day day
------------------------^^^
+select interval 30 days days days
+-----------------------------^^^
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/interval.sql.out b/sql/core/src/test/resources/sql-tests/results/interval.sql.out
index 297c4fc..31294cc 100644
--- a/sql/core/src/test/resources/sql-tests/results/interval.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/interval.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 94
+-- Number of queries: 98
 
 
 -- !query
@@ -355,6 +355,14 @@ struct<day:interval>
 
 
 -- !query
+select interval 30 days days
+-- !query schema
+struct<days:interval>
+-- !query output
+30 days
+
+
+-- !query
 select interval '20 15:40:32.99899999' day to hour
 -- !query schema
 struct<>
@@ -671,6 +679,38 @@ select interval 30 day day day
 
 
 -- !query
+select interval (-30) days
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+Undefined function: 'interval'. This function is neither a registered temporary function nor a permanent function registered in the database 'default'.; line 1 pos 7
+
+
+-- !query
+select interval (a + 1) days
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+Undefined function: 'interval'. This function is neither a registered temporary function nor a permanent function registered in the database 'default'.; line 1 pos 7
+
+
+-- !query
+select interval 30 days days days
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+extraneous input 'days' expecting {<EOF>, ';'}(line 1, pos 29)
+
+== SQL ==
+select interval 30 days days days
+-----------------------------^^^
+
+
+-- !query
 create temporary view interval_arithmetic as
   select CAST(dateval AS date), CAST(tsval AS timestamp), dateval as strval from values
     ('2012-01-01', '2012-01-01')
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/TPCDSQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/TPCDSQuerySuite.scala
index 30751af..decd1d6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/TPCDSQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/TPCDSQuerySuite.scala
@@ -17,7 +17,9 @@
 
 package org.apache.spark.sql
 
+import org.apache.spark.SparkConf
 import org.apache.spark.sql.catalyst.util.resourceToString
+import org.apache.spark.sql.internal.SQLConf
 
 /**
  * This test suite ensures all the TPC-DS queries can be successfully analyzed, optimized
@@ -65,3 +67,8 @@ class TPCDSQuerySuite extends BenchmarkQueryTest with TPCDSBase {
 class TPCDSQueryWithStatsSuite extends TPCDSQuerySuite {
   override def injectStats: Boolean = true
 }
+
+class TPCDSQueryANSISuite extends TPCDSQuerySuite {
+  override protected def sparkConf: SparkConf =
+    super.sparkConf.set(SQLConf.ANSI_ENABLED, true)
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org