You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ma...@apache.org on 2022/03/02 16:43:12 UTC
[spark] branch master updated: [SPARK-38389][SQL] Add the `DATEDIFF()` and `DATE_DIFF()` aliases for `TIMESTAMPDIFF()`
This is an automated email from the ASF dual-hosted git repository.
maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new f960328 [SPARK-38389][SQL] Add the `DATEDIFF()` and `DATE_DIFF()` aliases for `TIMESTAMPDIFF()`
f960328 is described below
commit f9603287d4a74f4be0486750fdee86386291503e
Author: Max Gekk <ma...@gmail.com>
AuthorDate: Wed Mar 2 19:41:51 2022 +0300
[SPARK-38389][SQL] Add the `DATEDIFF()` and `DATE_DIFF()` aliases for `TIMESTAMPDIFF()`
### What changes were proposed in this pull request?
In the PR, I propose to add two aliases for the `TIMESTAMPDIFF()` function introduced by https://github.com/apache/spark/pull/35607:
- `DATEDIFF()`
- `DATE_DIFF()`
### Why are the changes needed?
1. To make the migration process from other systems to Spark SQL easier.
2. To achieve feature parity with other DBMSs.
### Does this PR introduce _any_ user-facing change?
No. The new aliases just extend Spark SQL API.
### How was this patch tested?
1. By running the existing test suites:
```
$ build/sbt "test:testOnly *SQLKeywordSuite"
```
3. and new checks:
```
$ build/sbt "sql/testOnly org.apache.spark.sql.SQLQueryTestSuite -- -z date.sql"
$ build/sbt "sql/testOnly org.apache.spark.sql.SQLQueryTestSuite -- -z datetime-legacy.sql"
```
Closes #35709 from MaxGekk/datediff.
Authored-by: Max Gekk <ma...@gmail.com>
Signed-off-by: Max Gekk <ma...@gmail.com>
---
docs/sql-ref-ansi-compliance.md | 2 +
.../apache/spark/sql/catalyst/parser/SqlBase.g4 | 8 ++-
.../src/test/resources/sql-tests/inputs/date.sql | 12 ++++
.../resources/sql-tests/results/ansi/date.sql.out | 82 +++++++++++++++++++++-
.../test/resources/sql-tests/results/date.sql.out | 82 +++++++++++++++++++++-
.../sql-tests/results/datetime-legacy.sql.out | 82 +++++++++++++++++++++-
6 files changed, 264 insertions(+), 4 deletions(-)
diff --git a/docs/sql-ref-ansi-compliance.md b/docs/sql-ref-ansi-compliance.md
index 46bf415..7646206 100644
--- a/docs/sql-ref-ansi-compliance.md
+++ b/docs/sql-ref-ansi-compliance.md
@@ -392,6 +392,8 @@ Below is a list of all the keywords in Spark SQL.
|DATABASES|non-reserved|non-reserved|non-reserved|
|DATEADD|non-reserved|non-reserved|non-reserved|
|DATE_ADD|non-reserved|non-reserved|non-reserved|
+|DATEDIFF|non-reserved|non-reserved|non-reserved|
+|DATE_DIFF|non-reserved|non-reserved|non-reserved|
|DAY|non-reserved|non-reserved|non-reserved|
|DBPROPERTIES|non-reserved|non-reserved|non-reserved|
|DEFINED|non-reserved|non-reserved|non-reserved|
diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
index ae57b42..3f5052d 100644
--- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
+++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
@@ -861,7 +861,7 @@ valueExpression
primaryExpression
: name=(CURRENT_DATE | CURRENT_TIMESTAMP | CURRENT_USER) #currentLike
| name=(TIMESTAMPADD | DATEADD | DATE_ADD) '(' unit=identifier ',' unitsAmount=valueExpression ',' timestamp=valueExpression ')' #timestampadd
- | TIMESTAMPDIFF '(' unit=identifier ',' startTimestamp=valueExpression ',' endTimestamp=valueExpression ')' #timestampdiff
+ | name=(TIMESTAMPDIFF | DATEDIFF | DATE_DIFF) '(' unit=identifier ',' startTimestamp=valueExpression ',' endTimestamp=valueExpression ')' #timestampdiff
| CASE whenClause+ (ELSE elseExpression=expression)? END #searchedCase
| CASE value=expression whenClause+ (ELSE elseExpression=expression)? END #simpleCase
| name=(CAST | TRY_CAST) '(' expression AS dataType ')' #cast
@@ -1133,6 +1133,8 @@ ansiNonReserved
| DATABASES
| DATEADD
| DATE_ADD
+ | DATEDIFF
+ | DATE_DIFF
| DAY
| DBPROPERTIES
| DEFINED
@@ -1383,6 +1385,8 @@ nonReserved
| DATABASES
| DATEADD
| DATE_ADD
+ | DATEDIFF
+ | DATE_DIFF
| DAY
| DBPROPERTIES
| DEFINED
@@ -1653,6 +1657,8 @@ DATABASE: 'DATABASE';
DATABASES: 'DATABASES';
DATEADD: 'DATEADD';
DATE_ADD: 'DATE_ADD';
+DATEDIFF: 'DATEDIFF';
+DATE_DIFF: 'DATE_DIFF';
DBPROPERTIES: 'DBPROPERTIES';
DEFINED: 'DEFINED';
DELETE: 'DELETE';
diff --git a/sql/core/src/test/resources/sql-tests/inputs/date.sql b/sql/core/src/test/resources/sql-tests/inputs/date.sql
index 6fcba1d..4c8d5a7 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/date.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/date.sql
@@ -152,3 +152,15 @@ select dateadd(WEEK, -4, timestamp'2022-02-25 01:02:03');
select date_add(MONTH, -1, timestamp'2022-02-25 01:02:03');
select dateadd(QUARTER, 5, date'2022-02-25');
select date_add(YEAR, 1, date'2022-02-25');
+
+-- Get the difference between timestamps or dates in the specified units
+select date_diff(MICROSECOND, timestamp'2022-02-25 01:02:03.123', timestamp'2022-02-25 01:02:03.124001');
+select datediff(MILLISECOND, timestamp'2022-02-25 01:02:03.456', timestamp'2022-02-25 01:02:03.455');
+select date_diff(SECOND, timestamp'2022-02-25 01:02:03', timestamp'2022-02-25 01:03:01');
+select datediff(MINUTE, date'2022-02-25', timestamp'2022-02-24 22:20:00');
+select date_diff(HOUR, timestamp'2022-02-25 01:02:03', timestamp'2022-02-25 00:02:03');
+select datediff(DAY, date'2022-02-25', timestamp'2023-02-27 00:00:00');
+select date_diff(WEEK, timestamp'2022-02-25 01:02:03', timestamp'2022-01-28 01:02:03');
+select datediff(MONTH, timestamp'2022-02-25 01:02:03', timestamp'2022-01-25 01:02:03');
+select date_diff(QUARTER, date'2022-02-25', date'2023-05-25');
+select datediff(YEAR, date'2022-02-25', date'2023-02-25');
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/date.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/date.sql.out
index 07989ae..a21512f 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/date.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/date.sql.out
@@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
--- Number of queries: 87
+-- Number of queries: 97
-- !query
@@ -740,3 +740,83 @@ select date_add(YEAR, 1, date'2022-02-25')
struct<timestampadd(YEAR, 1, DATE '2022-02-25'):timestamp>
-- !query output
2023-02-25 00:00:00
+
+
+-- !query
+select date_diff(MICROSECOND, timestamp'2022-02-25 01:02:03.123', timestamp'2022-02-25 01:02:03.124001')
+-- !query schema
+struct<timestampdiff(MICROSECOND, TIMESTAMP '2022-02-25 01:02:03.123', TIMESTAMP '2022-02-25 01:02:03.124001'):bigint>
+-- !query output
+1001
+
+
+-- !query
+select datediff(MILLISECOND, timestamp'2022-02-25 01:02:03.456', timestamp'2022-02-25 01:02:03.455')
+-- !query schema
+struct<timestampdiff(MILLISECOND, TIMESTAMP '2022-02-25 01:02:03.456', TIMESTAMP '2022-02-25 01:02:03.455'):bigint>
+-- !query output
+-1
+
+
+-- !query
+select date_diff(SECOND, timestamp'2022-02-25 01:02:03', timestamp'2022-02-25 01:03:01')
+-- !query schema
+struct<timestampdiff(SECOND, TIMESTAMP '2022-02-25 01:02:03', TIMESTAMP '2022-02-25 01:03:01'):bigint>
+-- !query output
+58
+
+
+-- !query
+select datediff(MINUTE, date'2022-02-25', timestamp'2022-02-24 22:20:00')
+-- !query schema
+struct<timestampdiff(MINUTE, DATE '2022-02-25', TIMESTAMP '2022-02-24 22:20:00'):bigint>
+-- !query output
+-100
+
+
+-- !query
+select date_diff(HOUR, timestamp'2022-02-25 01:02:03', timestamp'2022-02-25 00:02:03')
+-- !query schema
+struct<timestampdiff(HOUR, TIMESTAMP '2022-02-25 01:02:03', TIMESTAMP '2022-02-25 00:02:03'):bigint>
+-- !query output
+-1
+
+
+-- !query
+select datediff(DAY, date'2022-02-25', timestamp'2023-02-27 00:00:00')
+-- !query schema
+struct<timestampdiff(DAY, DATE '2022-02-25', TIMESTAMP '2023-02-27 00:00:00'):bigint>
+-- !query output
+367
+
+
+-- !query
+select date_diff(WEEK, timestamp'2022-02-25 01:02:03', timestamp'2022-01-28 01:02:03')
+-- !query schema
+struct<timestampdiff(WEEK, TIMESTAMP '2022-02-25 01:02:03', TIMESTAMP '2022-01-28 01:02:03'):bigint>
+-- !query output
+-4
+
+
+-- !query
+select datediff(MONTH, timestamp'2022-02-25 01:02:03', timestamp'2022-01-25 01:02:03')
+-- !query schema
+struct<timestampdiff(MONTH, TIMESTAMP '2022-02-25 01:02:03', TIMESTAMP '2022-01-25 01:02:03'):bigint>
+-- !query output
+-1
+
+
+-- !query
+select date_diff(QUARTER, date'2022-02-25', date'2023-05-25')
+-- !query schema
+struct<timestampdiff(QUARTER, DATE '2022-02-25', DATE '2023-05-25'):bigint>
+-- !query output
+5
+
+
+-- !query
+select datediff(YEAR, date'2022-02-25', date'2023-02-25')
+-- !query schema
+struct<timestampdiff(YEAR, DATE '2022-02-25', DATE '2023-02-25'):bigint>
+-- !query output
+1
diff --git a/sql/core/src/test/resources/sql-tests/results/date.sql.out b/sql/core/src/test/resources/sql-tests/results/date.sql.out
index e3a2d7d..bd32361 100644
--- a/sql/core/src/test/resources/sql-tests/results/date.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/date.sql.out
@@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
--- Number of queries: 87
+-- Number of queries: 97
-- !query
@@ -739,3 +739,83 @@ select date_add(YEAR, 1, date'2022-02-25')
struct<timestampadd(YEAR, 1, DATE '2022-02-25'):timestamp>
-- !query output
2023-02-25 00:00:00
+
+
+-- !query
+select date_diff(MICROSECOND, timestamp'2022-02-25 01:02:03.123', timestamp'2022-02-25 01:02:03.124001')
+-- !query schema
+struct<timestampdiff(MICROSECOND, TIMESTAMP '2022-02-25 01:02:03.123', TIMESTAMP '2022-02-25 01:02:03.124001'):bigint>
+-- !query output
+1001
+
+
+-- !query
+select datediff(MILLISECOND, timestamp'2022-02-25 01:02:03.456', timestamp'2022-02-25 01:02:03.455')
+-- !query schema
+struct<timestampdiff(MILLISECOND, TIMESTAMP '2022-02-25 01:02:03.456', TIMESTAMP '2022-02-25 01:02:03.455'):bigint>
+-- !query output
+-1
+
+
+-- !query
+select date_diff(SECOND, timestamp'2022-02-25 01:02:03', timestamp'2022-02-25 01:03:01')
+-- !query schema
+struct<timestampdiff(SECOND, TIMESTAMP '2022-02-25 01:02:03', TIMESTAMP '2022-02-25 01:03:01'):bigint>
+-- !query output
+58
+
+
+-- !query
+select datediff(MINUTE, date'2022-02-25', timestamp'2022-02-24 22:20:00')
+-- !query schema
+struct<timestampdiff(MINUTE, DATE '2022-02-25', TIMESTAMP '2022-02-24 22:20:00'):bigint>
+-- !query output
+-100
+
+
+-- !query
+select date_diff(HOUR, timestamp'2022-02-25 01:02:03', timestamp'2022-02-25 00:02:03')
+-- !query schema
+struct<timestampdiff(HOUR, TIMESTAMP '2022-02-25 01:02:03', TIMESTAMP '2022-02-25 00:02:03'):bigint>
+-- !query output
+-1
+
+
+-- !query
+select datediff(DAY, date'2022-02-25', timestamp'2023-02-27 00:00:00')
+-- !query schema
+struct<timestampdiff(DAY, DATE '2022-02-25', TIMESTAMP '2023-02-27 00:00:00'):bigint>
+-- !query output
+367
+
+
+-- !query
+select date_diff(WEEK, timestamp'2022-02-25 01:02:03', timestamp'2022-01-28 01:02:03')
+-- !query schema
+struct<timestampdiff(WEEK, TIMESTAMP '2022-02-25 01:02:03', TIMESTAMP '2022-01-28 01:02:03'):bigint>
+-- !query output
+-4
+
+
+-- !query
+select datediff(MONTH, timestamp'2022-02-25 01:02:03', timestamp'2022-01-25 01:02:03')
+-- !query schema
+struct<timestampdiff(MONTH, TIMESTAMP '2022-02-25 01:02:03', TIMESTAMP '2022-01-25 01:02:03'):bigint>
+-- !query output
+-1
+
+
+-- !query
+select date_diff(QUARTER, date'2022-02-25', date'2023-05-25')
+-- !query schema
+struct<timestampdiff(QUARTER, DATE '2022-02-25', DATE '2023-05-25'):bigint>
+-- !query output
+5
+
+
+-- !query
+select datediff(YEAR, date'2022-02-25', date'2023-02-25')
+-- !query schema
+struct<timestampdiff(YEAR, DATE '2022-02-25', DATE '2023-02-25'):bigint>
+-- !query output
+1
diff --git a/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out
index 60752e3..8eeed14 100644
--- a/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out
@@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
--- Number of queries: 184
+-- Number of queries: 194
-- !query
@@ -739,6 +739,86 @@ struct<timestampadd(YEAR, 1, DATE '2022-02-25'):timestamp>
-- !query
+select date_diff(MICROSECOND, timestamp'2022-02-25 01:02:03.123', timestamp'2022-02-25 01:02:03.124001')
+-- !query schema
+struct<timestampdiff(MICROSECOND, TIMESTAMP '2022-02-25 01:02:03.123', TIMESTAMP '2022-02-25 01:02:03.124001'):bigint>
+-- !query output
+1001
+
+
+-- !query
+select datediff(MILLISECOND, timestamp'2022-02-25 01:02:03.456', timestamp'2022-02-25 01:02:03.455')
+-- !query schema
+struct<timestampdiff(MILLISECOND, TIMESTAMP '2022-02-25 01:02:03.456', TIMESTAMP '2022-02-25 01:02:03.455'):bigint>
+-- !query output
+-1
+
+
+-- !query
+select date_diff(SECOND, timestamp'2022-02-25 01:02:03', timestamp'2022-02-25 01:03:01')
+-- !query schema
+struct<timestampdiff(SECOND, TIMESTAMP '2022-02-25 01:02:03', TIMESTAMP '2022-02-25 01:03:01'):bigint>
+-- !query output
+58
+
+
+-- !query
+select datediff(MINUTE, date'2022-02-25', timestamp'2022-02-24 22:20:00')
+-- !query schema
+struct<timestampdiff(MINUTE, DATE '2022-02-25', TIMESTAMP '2022-02-24 22:20:00'):bigint>
+-- !query output
+-100
+
+
+-- !query
+select date_diff(HOUR, timestamp'2022-02-25 01:02:03', timestamp'2022-02-25 00:02:03')
+-- !query schema
+struct<timestampdiff(HOUR, TIMESTAMP '2022-02-25 01:02:03', TIMESTAMP '2022-02-25 00:02:03'):bigint>
+-- !query output
+-1
+
+
+-- !query
+select datediff(DAY, date'2022-02-25', timestamp'2023-02-27 00:00:00')
+-- !query schema
+struct<timestampdiff(DAY, DATE '2022-02-25', TIMESTAMP '2023-02-27 00:00:00'):bigint>
+-- !query output
+367
+
+
+-- !query
+select date_diff(WEEK, timestamp'2022-02-25 01:02:03', timestamp'2022-01-28 01:02:03')
+-- !query schema
+struct<timestampdiff(WEEK, TIMESTAMP '2022-02-25 01:02:03', TIMESTAMP '2022-01-28 01:02:03'):bigint>
+-- !query output
+-4
+
+
+-- !query
+select datediff(MONTH, timestamp'2022-02-25 01:02:03', timestamp'2022-01-25 01:02:03')
+-- !query schema
+struct<timestampdiff(MONTH, TIMESTAMP '2022-02-25 01:02:03', TIMESTAMP '2022-01-25 01:02:03'):bigint>
+-- !query output
+-1
+
+
+-- !query
+select date_diff(QUARTER, date'2022-02-25', date'2023-05-25')
+-- !query schema
+struct<timestampdiff(QUARTER, DATE '2022-02-25', DATE '2023-05-25'):bigint>
+-- !query output
+5
+
+
+-- !query
+select datediff(YEAR, date'2022-02-25', date'2023-02-25')
+-- !query schema
+struct<timestampdiff(YEAR, DATE '2022-02-25', DATE '2023-02-25'):bigint>
+-- !query output
+1
+
+
+-- !query
select timestamp '2019-01-01\t'
-- !query schema
struct<TIMESTAMP '2019-01-01 00:00:00':timestamp>
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org