You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ma...@apache.org on 2022/03/02 16:43:12 UTC

[spark] branch master updated: [SPARK-38389][SQL] Add the `DATEDIFF()` and `DATE_DIFF()` aliases for `TIMESTAMPDIFF()`

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new f960328  [SPARK-38389][SQL] Add the `DATEDIFF()` and `DATE_DIFF()` aliases for `TIMESTAMPDIFF()`
f960328 is described below

commit f9603287d4a74f4be0486750fdee86386291503e
Author: Max Gekk <ma...@gmail.com>
AuthorDate: Wed Mar 2 19:41:51 2022 +0300

    [SPARK-38389][SQL] Add the `DATEDIFF()` and `DATE_DIFF()` aliases for `TIMESTAMPDIFF()`
    
    ### What changes were proposed in this pull request?
    In the PR, I propose to add two aliases for the `TIMESTAMPDIFF()` function introduced by https://github.com/apache/spark/pull/35607:
    - `DATEDIFF()`
    - `DATE_DIFF()`
    
    ### Why are the changes needed?
    1. To make the migration process from other systems to Spark SQL easier.
    2. To achieve feature parity with other DBMSs.
    
    ### Does this PR introduce _any_ user-facing change?
    No. The new aliases just extend Spark SQL API.
    
    ### How was this patch tested?
    1. By running the existing test suites:
    ```
    $ build/sbt "test:testOnly *SQLKeywordSuite"
    ```
    3. and new checks:
    ```
    $ build/sbt "sql/testOnly org.apache.spark.sql.SQLQueryTestSuite -- -z date.sql"
    $ build/sbt "sql/testOnly org.apache.spark.sql.SQLQueryTestSuite -- -z datetime-legacy.sql"
    ```
    
    Closes #35709 from MaxGekk/datediff.
    
    Authored-by: Max Gekk <ma...@gmail.com>
    Signed-off-by: Max Gekk <ma...@gmail.com>
---
 docs/sql-ref-ansi-compliance.md                    |  2 +
 .../apache/spark/sql/catalyst/parser/SqlBase.g4    |  8 ++-
 .../src/test/resources/sql-tests/inputs/date.sql   | 12 ++++
 .../resources/sql-tests/results/ansi/date.sql.out  | 82 +++++++++++++++++++++-
 .../test/resources/sql-tests/results/date.sql.out  | 82 +++++++++++++++++++++-
 .../sql-tests/results/datetime-legacy.sql.out      | 82 +++++++++++++++++++++-
 6 files changed, 264 insertions(+), 4 deletions(-)

diff --git a/docs/sql-ref-ansi-compliance.md b/docs/sql-ref-ansi-compliance.md
index 46bf415..7646206 100644
--- a/docs/sql-ref-ansi-compliance.md
+++ b/docs/sql-ref-ansi-compliance.md
@@ -392,6 +392,8 @@ Below is a list of all the keywords in Spark SQL.
 |DATABASES|non-reserved|non-reserved|non-reserved|
 |DATEADD|non-reserved|non-reserved|non-reserved|
 |DATE_ADD|non-reserved|non-reserved|non-reserved|
+|DATEDIFF|non-reserved|non-reserved|non-reserved|
+|DATE_DIFF|non-reserved|non-reserved|non-reserved|
 |DAY|non-reserved|non-reserved|non-reserved|
 |DBPROPERTIES|non-reserved|non-reserved|non-reserved|
 |DEFINED|non-reserved|non-reserved|non-reserved|
diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
index ae57b42..3f5052d 100644
--- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
+++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
@@ -861,7 +861,7 @@ valueExpression
 primaryExpression
     : name=(CURRENT_DATE | CURRENT_TIMESTAMP | CURRENT_USER)                                   #currentLike
     | name=(TIMESTAMPADD | DATEADD | DATE_ADD) '(' unit=identifier ',' unitsAmount=valueExpression ',' timestamp=valueExpression ')'  #timestampadd
-    | TIMESTAMPDIFF '(' unit=identifier ',' startTimestamp=valueExpression ',' endTimestamp=valueExpression ')'  #timestampdiff
+    | name=(TIMESTAMPDIFF | DATEDIFF | DATE_DIFF) '(' unit=identifier ',' startTimestamp=valueExpression ',' endTimestamp=valueExpression ')'  #timestampdiff
     | CASE whenClause+ (ELSE elseExpression=expression)? END                                   #searchedCase
     | CASE value=expression whenClause+ (ELSE elseExpression=expression)? END                  #simpleCase
     | name=(CAST | TRY_CAST) '(' expression AS dataType ')'                                    #cast
@@ -1133,6 +1133,8 @@ ansiNonReserved
     | DATABASES
     | DATEADD
     | DATE_ADD
+    | DATEDIFF
+    | DATE_DIFF
     | DAY
     | DBPROPERTIES
     | DEFINED
@@ -1383,6 +1385,8 @@ nonReserved
     | DATABASES
     | DATEADD
     | DATE_ADD
+    | DATEDIFF
+    | DATE_DIFF
     | DAY
     | DBPROPERTIES
     | DEFINED
@@ -1653,6 +1657,8 @@ DATABASE: 'DATABASE';
 DATABASES: 'DATABASES';
 DATEADD: 'DATEADD';
 DATE_ADD: 'DATE_ADD';
+DATEDIFF: 'DATEDIFF';
+DATE_DIFF: 'DATE_DIFF';
 DBPROPERTIES: 'DBPROPERTIES';
 DEFINED: 'DEFINED';
 DELETE: 'DELETE';
diff --git a/sql/core/src/test/resources/sql-tests/inputs/date.sql b/sql/core/src/test/resources/sql-tests/inputs/date.sql
index 6fcba1d..4c8d5a7 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/date.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/date.sql
@@ -152,3 +152,15 @@ select dateadd(WEEK, -4, timestamp'2022-02-25 01:02:03');
 select date_add(MONTH, -1, timestamp'2022-02-25 01:02:03');
 select dateadd(QUARTER, 5, date'2022-02-25');
 select date_add(YEAR, 1, date'2022-02-25');
+
+-- Get the difference between timestamps or dates in the specified units
+select date_diff(MICROSECOND, timestamp'2022-02-25 01:02:03.123', timestamp'2022-02-25 01:02:03.124001');
+select datediff(MILLISECOND, timestamp'2022-02-25 01:02:03.456', timestamp'2022-02-25 01:02:03.455');
+select date_diff(SECOND, timestamp'2022-02-25 01:02:03', timestamp'2022-02-25 01:03:01');
+select datediff(MINUTE, date'2022-02-25', timestamp'2022-02-24 22:20:00');
+select date_diff(HOUR, timestamp'2022-02-25 01:02:03', timestamp'2022-02-25 00:02:03');
+select datediff(DAY, date'2022-02-25', timestamp'2023-02-27 00:00:00');
+select date_diff(WEEK, timestamp'2022-02-25 01:02:03', timestamp'2022-01-28 01:02:03');
+select datediff(MONTH, timestamp'2022-02-25 01:02:03', timestamp'2022-01-25 01:02:03');
+select date_diff(QUARTER, date'2022-02-25', date'2023-05-25');
+select datediff(YEAR, date'2022-02-25', date'2023-02-25');
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/date.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/date.sql.out
index 07989ae..a21512f 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/date.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/date.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 87
+-- Number of queries: 97
 
 
 -- !query
@@ -740,3 +740,83 @@ select date_add(YEAR, 1, date'2022-02-25')
 struct<timestampadd(YEAR, 1, DATE '2022-02-25'):timestamp>
 -- !query output
 2023-02-25 00:00:00
+
+
+-- !query
+select date_diff(MICROSECOND, timestamp'2022-02-25 01:02:03.123', timestamp'2022-02-25 01:02:03.124001')
+-- !query schema
+struct<timestampdiff(MICROSECOND, TIMESTAMP '2022-02-25 01:02:03.123', TIMESTAMP '2022-02-25 01:02:03.124001'):bigint>
+-- !query output
+1001
+
+
+-- !query
+select datediff(MILLISECOND, timestamp'2022-02-25 01:02:03.456', timestamp'2022-02-25 01:02:03.455')
+-- !query schema
+struct<timestampdiff(MILLISECOND, TIMESTAMP '2022-02-25 01:02:03.456', TIMESTAMP '2022-02-25 01:02:03.455'):bigint>
+-- !query output
+-1
+
+
+-- !query
+select date_diff(SECOND, timestamp'2022-02-25 01:02:03', timestamp'2022-02-25 01:03:01')
+-- !query schema
+struct<timestampdiff(SECOND, TIMESTAMP '2022-02-25 01:02:03', TIMESTAMP '2022-02-25 01:03:01'):bigint>
+-- !query output
+58
+
+
+-- !query
+select datediff(MINUTE, date'2022-02-25', timestamp'2022-02-24 22:20:00')
+-- !query schema
+struct<timestampdiff(MINUTE, DATE '2022-02-25', TIMESTAMP '2022-02-24 22:20:00'):bigint>
+-- !query output
+-100
+
+
+-- !query
+select date_diff(HOUR, timestamp'2022-02-25 01:02:03', timestamp'2022-02-25 00:02:03')
+-- !query schema
+struct<timestampdiff(HOUR, TIMESTAMP '2022-02-25 01:02:03', TIMESTAMP '2022-02-25 00:02:03'):bigint>
+-- !query output
+-1
+
+
+-- !query
+select datediff(DAY, date'2022-02-25', timestamp'2023-02-27 00:00:00')
+-- !query schema
+struct<timestampdiff(DAY, DATE '2022-02-25', TIMESTAMP '2023-02-27 00:00:00'):bigint>
+-- !query output
+367
+
+
+-- !query
+select date_diff(WEEK, timestamp'2022-02-25 01:02:03', timestamp'2022-01-28 01:02:03')
+-- !query schema
+struct<timestampdiff(WEEK, TIMESTAMP '2022-02-25 01:02:03', TIMESTAMP '2022-01-28 01:02:03'):bigint>
+-- !query output
+-4
+
+
+-- !query
+select datediff(MONTH, timestamp'2022-02-25 01:02:03', timestamp'2022-01-25 01:02:03')
+-- !query schema
+struct<timestampdiff(MONTH, TIMESTAMP '2022-02-25 01:02:03', TIMESTAMP '2022-01-25 01:02:03'):bigint>
+-- !query output
+-1
+
+
+-- !query
+select date_diff(QUARTER, date'2022-02-25', date'2023-05-25')
+-- !query schema
+struct<timestampdiff(QUARTER, DATE '2022-02-25', DATE '2023-05-25'):bigint>
+-- !query output
+5
+
+
+-- !query
+select datediff(YEAR, date'2022-02-25', date'2023-02-25')
+-- !query schema
+struct<timestampdiff(YEAR, DATE '2022-02-25', DATE '2023-02-25'):bigint>
+-- !query output
+1
diff --git a/sql/core/src/test/resources/sql-tests/results/date.sql.out b/sql/core/src/test/resources/sql-tests/results/date.sql.out
index e3a2d7d..bd32361 100644
--- a/sql/core/src/test/resources/sql-tests/results/date.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/date.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 87
+-- Number of queries: 97
 
 
 -- !query
@@ -739,3 +739,83 @@ select date_add(YEAR, 1, date'2022-02-25')
 struct<timestampadd(YEAR, 1, DATE '2022-02-25'):timestamp>
 -- !query output
 2023-02-25 00:00:00
+
+
+-- !query
+select date_diff(MICROSECOND, timestamp'2022-02-25 01:02:03.123', timestamp'2022-02-25 01:02:03.124001')
+-- !query schema
+struct<timestampdiff(MICROSECOND, TIMESTAMP '2022-02-25 01:02:03.123', TIMESTAMP '2022-02-25 01:02:03.124001'):bigint>
+-- !query output
+1001
+
+
+-- !query
+select datediff(MILLISECOND, timestamp'2022-02-25 01:02:03.456', timestamp'2022-02-25 01:02:03.455')
+-- !query schema
+struct<timestampdiff(MILLISECOND, TIMESTAMP '2022-02-25 01:02:03.456', TIMESTAMP '2022-02-25 01:02:03.455'):bigint>
+-- !query output
+-1
+
+
+-- !query
+select date_diff(SECOND, timestamp'2022-02-25 01:02:03', timestamp'2022-02-25 01:03:01')
+-- !query schema
+struct<timestampdiff(SECOND, TIMESTAMP '2022-02-25 01:02:03', TIMESTAMP '2022-02-25 01:03:01'):bigint>
+-- !query output
+58
+
+
+-- !query
+select datediff(MINUTE, date'2022-02-25', timestamp'2022-02-24 22:20:00')
+-- !query schema
+struct<timestampdiff(MINUTE, DATE '2022-02-25', TIMESTAMP '2022-02-24 22:20:00'):bigint>
+-- !query output
+-100
+
+
+-- !query
+select date_diff(HOUR, timestamp'2022-02-25 01:02:03', timestamp'2022-02-25 00:02:03')
+-- !query schema
+struct<timestampdiff(HOUR, TIMESTAMP '2022-02-25 01:02:03', TIMESTAMP '2022-02-25 00:02:03'):bigint>
+-- !query output
+-1
+
+
+-- !query
+select datediff(DAY, date'2022-02-25', timestamp'2023-02-27 00:00:00')
+-- !query schema
+struct<timestampdiff(DAY, DATE '2022-02-25', TIMESTAMP '2023-02-27 00:00:00'):bigint>
+-- !query output
+367
+
+
+-- !query
+select date_diff(WEEK, timestamp'2022-02-25 01:02:03', timestamp'2022-01-28 01:02:03')
+-- !query schema
+struct<timestampdiff(WEEK, TIMESTAMP '2022-02-25 01:02:03', TIMESTAMP '2022-01-28 01:02:03'):bigint>
+-- !query output
+-4
+
+
+-- !query
+select datediff(MONTH, timestamp'2022-02-25 01:02:03', timestamp'2022-01-25 01:02:03')
+-- !query schema
+struct<timestampdiff(MONTH, TIMESTAMP '2022-02-25 01:02:03', TIMESTAMP '2022-01-25 01:02:03'):bigint>
+-- !query output
+-1
+
+
+-- !query
+select date_diff(QUARTER, date'2022-02-25', date'2023-05-25')
+-- !query schema
+struct<timestampdiff(QUARTER, DATE '2022-02-25', DATE '2023-05-25'):bigint>
+-- !query output
+5
+
+
+-- !query
+select datediff(YEAR, date'2022-02-25', date'2023-02-25')
+-- !query schema
+struct<timestampdiff(YEAR, DATE '2022-02-25', DATE '2023-02-25'):bigint>
+-- !query output
+1
diff --git a/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out
index 60752e3..8eeed14 100644
--- a/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 184
+-- Number of queries: 194
 
 
 -- !query
@@ -739,6 +739,86 @@ struct<timestampadd(YEAR, 1, DATE '2022-02-25'):timestamp>
 
 
 -- !query
+select date_diff(MICROSECOND, timestamp'2022-02-25 01:02:03.123', timestamp'2022-02-25 01:02:03.124001')
+-- !query schema
+struct<timestampdiff(MICROSECOND, TIMESTAMP '2022-02-25 01:02:03.123', TIMESTAMP '2022-02-25 01:02:03.124001'):bigint>
+-- !query output
+1001
+
+
+-- !query
+select datediff(MILLISECOND, timestamp'2022-02-25 01:02:03.456', timestamp'2022-02-25 01:02:03.455')
+-- !query schema
+struct<timestampdiff(MILLISECOND, TIMESTAMP '2022-02-25 01:02:03.456', TIMESTAMP '2022-02-25 01:02:03.455'):bigint>
+-- !query output
+-1
+
+
+-- !query
+select date_diff(SECOND, timestamp'2022-02-25 01:02:03', timestamp'2022-02-25 01:03:01')
+-- !query schema
+struct<timestampdiff(SECOND, TIMESTAMP '2022-02-25 01:02:03', TIMESTAMP '2022-02-25 01:03:01'):bigint>
+-- !query output
+58
+
+
+-- !query
+select datediff(MINUTE, date'2022-02-25', timestamp'2022-02-24 22:20:00')
+-- !query schema
+struct<timestampdiff(MINUTE, DATE '2022-02-25', TIMESTAMP '2022-02-24 22:20:00'):bigint>
+-- !query output
+-100
+
+
+-- !query
+select date_diff(HOUR, timestamp'2022-02-25 01:02:03', timestamp'2022-02-25 00:02:03')
+-- !query schema
+struct<timestampdiff(HOUR, TIMESTAMP '2022-02-25 01:02:03', TIMESTAMP '2022-02-25 00:02:03'):bigint>
+-- !query output
+-1
+
+
+-- !query
+select datediff(DAY, date'2022-02-25', timestamp'2023-02-27 00:00:00')
+-- !query schema
+struct<timestampdiff(DAY, DATE '2022-02-25', TIMESTAMP '2023-02-27 00:00:00'):bigint>
+-- !query output
+367
+
+
+-- !query
+select date_diff(WEEK, timestamp'2022-02-25 01:02:03', timestamp'2022-01-28 01:02:03')
+-- !query schema
+struct<timestampdiff(WEEK, TIMESTAMP '2022-02-25 01:02:03', TIMESTAMP '2022-01-28 01:02:03'):bigint>
+-- !query output
+-4
+
+
+-- !query
+select datediff(MONTH, timestamp'2022-02-25 01:02:03', timestamp'2022-01-25 01:02:03')
+-- !query schema
+struct<timestampdiff(MONTH, TIMESTAMP '2022-02-25 01:02:03', TIMESTAMP '2022-01-25 01:02:03'):bigint>
+-- !query output
+-1
+
+
+-- !query
+select date_diff(QUARTER, date'2022-02-25', date'2023-05-25')
+-- !query schema
+struct<timestampdiff(QUARTER, DATE '2022-02-25', DATE '2023-05-25'):bigint>
+-- !query output
+5
+
+
+-- !query
+select datediff(YEAR, date'2022-02-25', date'2023-02-25')
+-- !query schema
+struct<timestampdiff(YEAR, DATE '2022-02-25', DATE '2023-02-25'):bigint>
+-- !query output
+1
+
+
+-- !query
 select timestamp '2019-01-01\t'
 -- !query schema
 struct<TIMESTAMP '2019-01-01 00:00:00':timestamp>

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org