You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ma...@apache.org on 2021/07/07 17:27:37 UTC
[spark] branch master updated: [SPARK-36015][SQL] Support
TimestampNTZType in the Window spec definition
This is an automated email from the ASF dual-hosted git repository.
maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 62ff2ad [SPARK-36015][SQL] Support TimestampNTZType in the Window spec definition
62ff2ad is described below
commit 62ff2add9444fbd54802548b3daf7cde5820feef
Author: gengjiaan <ge...@360.cn>
AuthorDate: Wed Jul 7 20:27:05 2021 +0300
[SPARK-36015][SQL] Support TimestampNTZType in the Window spec definition
### What changes were proposed in this pull request?
The method `WindowSpecDefinition.isValidFrameType` doesn't consider `TimestampNTZType`. We should support it as for `TimestampType`.
### Why are the changes needed?
Support `TimestampNTZType` in the Window spec definition.
### Does this PR introduce _any_ user-facing change?
'Yes'. This PR allows users use `TimestampNTZType` as the sort spec in window spec definition.
### How was this patch tested?
New tests.
Closes #33246 from beliefer/SPARK-36015.
Authored-by: gengjiaan <ge...@360.cn>
Signed-off-by: Max Gekk <ma...@gmail.com>
---
.../catalyst/expressions/windowExpressions.scala | 6 +--
.../sql/execution/window/WindowExecBase.scala | 10 ++--
.../src/test/resources/sql-tests/inputs/window.sql | 9 ++++
.../resources/sql-tests/results/window.sql.out | 56 +++++++++++++++++++++-
4 files changed, 73 insertions(+), 8 deletions(-)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
index 2555c6a..fc2e449 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
@@ -102,9 +102,9 @@ case class WindowSpecDefinition(
private def isValidFrameType(ft: DataType): Boolean = (orderSpec.head.dataType, ft) match {
case (DateType, IntegerType) => true
case (DateType, _: YearMonthIntervalType) => true
- case (TimestampType, CalendarIntervalType) => true
- case (TimestampType, _: YearMonthIntervalType) => true
- case (TimestampType, _: DayTimeIntervalType) => true
+ case (TimestampType | TimestampNTZType, CalendarIntervalType) => true
+ case (TimestampType | TimestampNTZType, _: YearMonthIntervalType) => true
+ case (TimestampType | TimestampNTZType, _: DayTimeIntervalType) => true
case (a, b) => a == b
}
}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExecBase.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExecBase.scala
index 2aa0b02..f3b3b34 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExecBase.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExecBase.scala
@@ -24,7 +24,7 @@ import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
import org.apache.spark.sql.execution.UnaryExecNode
-import org.apache.spark.sql.types.{CalendarIntervalType, DateType, DayTimeIntervalType, IntegerType, TimestampType, YearMonthIntervalType}
+import org.apache.spark.sql.types._
trait WindowExecBase extends UnaryExecNode {
def windowExpression: Seq[NamedExpression]
@@ -96,10 +96,12 @@ trait WindowExecBase extends UnaryExecNode {
val boundExpr = (expr.dataType, boundOffset.dataType) match {
case (DateType, IntegerType) => DateAdd(expr, boundOffset)
case (DateType, _: YearMonthIntervalType) => DateAddYMInterval(expr, boundOffset)
- case (TimestampType, CalendarIntervalType) => TimeAdd(expr, boundOffset, Some(timeZone))
- case (TimestampType, _: YearMonthIntervalType) =>
+ case (TimestampType | TimestampNTZType, CalendarIntervalType) =>
+ TimeAdd(expr, boundOffset, Some(timeZone))
+ case (TimestampType | TimestampNTZType, _: YearMonthIntervalType) =>
TimestampAddYMInterval(expr, boundOffset, Some(timeZone))
- case (TimestampType, _: DayTimeIntervalType) => TimeAdd(expr, boundOffset, Some(timeZone))
+ case (TimestampType | TimestampNTZType, _: DayTimeIntervalType) =>
+ TimeAdd(expr, boundOffset, Some(timeZone))
case (a, b) if a == b => Add(expr, boundOffset)
}
val bound = MutableProjection.create(boundExpr :: Nil, child.output)
diff --git a/sql/core/src/test/resources/sql-tests/inputs/window.sql b/sql/core/src/test/resources/sql-tests/inputs/window.sql
index 46d3629..9766aaf 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/window.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/window.sql
@@ -70,12 +70,21 @@ RANGE BETWEEN CURRENT ROW AND 2 FOLLOWING) FROM testData ORDER BY cate, val_date
SELECT val_timestamp, cate, avg(val_timestamp) OVER(PARTITION BY cate ORDER BY val_timestamp
RANGE BETWEEN CURRENT ROW AND interval 23 days 4 hours FOLLOWING) FROM testData
ORDER BY cate, val_timestamp;
+SELECT val_timestamp, cate, avg(val_timestamp) OVER(PARTITION BY cate ORDER BY to_timestamp_ntz(val_timestamp)
+RANGE BETWEEN CURRENT ROW AND interval 23 days 4 hours FOLLOWING) FROM testData
+ORDER BY cate, to_timestamp_ntz(val_timestamp);
SELECT val_timestamp, cate, avg(val_timestamp) OVER(PARTITION BY cate ORDER BY val_timestamp
RANGE BETWEEN CURRENT ROW AND interval '1-1' year to month FOLLOWING) FROM testData
ORDER BY cate, val_timestamp;
+SELECT val_timestamp, cate, avg(val_timestamp) OVER(PARTITION BY cate ORDER BY to_timestamp_ntz(val_timestamp)
+RANGE BETWEEN CURRENT ROW AND interval '1-1' year to month FOLLOWING) FROM testData
+ORDER BY cate, to_timestamp_ntz(val_timestamp);
SELECT val_timestamp, cate, avg(val_timestamp) OVER(PARTITION BY cate ORDER BY val_timestamp
RANGE BETWEEN CURRENT ROW AND interval '1 2:3:4.001' day to second FOLLOWING) FROM testData
ORDER BY cate, val_timestamp;
+SELECT val_timestamp, cate, avg(val_timestamp) OVER(PARTITION BY cate ORDER BY to_timestamp_ntz(val_timestamp)
+RANGE BETWEEN CURRENT ROW AND interval '1 2:3:4.001' day to second FOLLOWING) FROM testData
+ORDER BY cate, to_timestamp_ntz(val_timestamp);
SELECT val_date, cate, avg(val_timestamp) OVER(PARTITION BY cate ORDER BY val_date
RANGE BETWEEN CURRENT ROW AND interval '1-1' year to month FOLLOWING) FROM testData
ORDER BY cate, val_date;
diff --git a/sql/core/src/test/resources/sql-tests/results/window.sql.out b/sql/core/src/test/resources/sql-tests/results/window.sql.out
index b3f9e6c..455015b 100644
--- a/sql/core/src/test/resources/sql-tests/results/window.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/window.sql.out
@@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
--- Number of queries: 50
+-- Number of queries: 53
-- !query
@@ -212,6 +212,24 @@ NULL NULL NULL
-- !query
+SELECT val_timestamp, cate, avg(val_timestamp) OVER(PARTITION BY cate ORDER BY to_timestamp_ntz(val_timestamp)
+RANGE BETWEEN CURRENT ROW AND interval 23 days 4 hours FOLLOWING) FROM testData
+ORDER BY cate, to_timestamp_ntz(val_timestamp)
+-- !query schema
+struct<val_timestamp:timestamp,cate:string,avg(val_timestamp) OVER (PARTITION BY cate ORDER BY to_timestamp_ntz(val_timestamp) ASC NULLS FIRST RANGE BETWEEN CURRENT ROW AND INTERVAL '23 days 4 hours' FOLLOWING):double>
+-- !query output
+NULL NULL NULL
+2017-07-31 17:00:00 NULL 1.5015456E9
+2017-07-31 17:00:00 a 1.5016970666666667E9
+2017-07-31 17:00:00 a 1.5016970666666667E9
+2017-08-05 23:13:20 a 1.502E9
+2020-12-30 16:00:00 a 1.6093728E9
+2017-07-31 17:00:00 b 1.5022728E9
+2017-08-17 13:00:00 b 1.503E9
+2020-12-30 16:00:00 b 1.6093728E9
+
+
+-- !query
SELECT val_timestamp, cate, avg(val_timestamp) OVER(PARTITION BY cate ORDER BY val_timestamp
RANGE BETWEEN CURRENT ROW AND interval '1-1' year to month FOLLOWING) FROM testData
ORDER BY cate, val_timestamp
@@ -230,6 +248,24 @@ NULL NULL NULL
-- !query
+SELECT val_timestamp, cate, avg(val_timestamp) OVER(PARTITION BY cate ORDER BY to_timestamp_ntz(val_timestamp)
+RANGE BETWEEN CURRENT ROW AND interval '1-1' year to month FOLLOWING) FROM testData
+ORDER BY cate, to_timestamp_ntz(val_timestamp)
+-- !query schema
+struct<val_timestamp:timestamp,cate:string,avg(val_timestamp) OVER (PARTITION BY cate ORDER BY to_timestamp_ntz(val_timestamp) ASC NULLS FIRST RANGE BETWEEN CURRENT ROW AND INTERVAL '1-1' YEAR TO MONTH FOLLOWING):double>
+-- !query output
+NULL NULL NULL
+2017-07-31 17:00:00 NULL 1.5015456E9
+2017-07-31 17:00:00 a 1.5016970666666667E9
+2017-07-31 17:00:00 a 1.5016970666666667E9
+2017-08-05 23:13:20 a 1.502E9
+2020-12-30 16:00:00 a 1.6093728E9
+2017-07-31 17:00:00 b 1.5022728E9
+2017-08-17 13:00:00 b 1.503E9
+2020-12-30 16:00:00 b 1.6093728E9
+
+
+-- !query
SELECT val_timestamp, cate, avg(val_timestamp) OVER(PARTITION BY cate ORDER BY val_timestamp
RANGE BETWEEN CURRENT ROW AND interval '1 2:3:4.001' day to second FOLLOWING) FROM testData
ORDER BY cate, val_timestamp
@@ -248,6 +284,24 @@ NULL NULL NULL
-- !query
+SELECT val_timestamp, cate, avg(val_timestamp) OVER(PARTITION BY cate ORDER BY to_timestamp_ntz(val_timestamp)
+RANGE BETWEEN CURRENT ROW AND interval '1 2:3:4.001' day to second FOLLOWING) FROM testData
+ORDER BY cate, to_timestamp_ntz(val_timestamp)
+-- !query schema
+struct<val_timestamp:timestamp,cate:string,avg(val_timestamp) OVER (PARTITION BY cate ORDER BY to_timestamp_ntz(val_timestamp) ASC NULLS FIRST RANGE BETWEEN CURRENT ROW AND INTERVAL '1 02:03:04.001' DAY TO SECOND FOLLOWING):double>
+-- !query output
+NULL NULL NULL
+2017-07-31 17:00:00 NULL 1.5015456E9
+2017-07-31 17:00:00 a 1.5015456E9
+2017-07-31 17:00:00 a 1.5015456E9
+2017-08-05 23:13:20 a 1.502E9
+2020-12-30 16:00:00 a 1.6093728E9
+2017-07-31 17:00:00 b 1.5015456E9
+2017-08-17 13:00:00 b 1.503E9
+2020-12-30 16:00:00 b 1.6093728E9
+
+
+-- !query
SELECT val_date, cate, avg(val_timestamp) OVER(PARTITION BY cate ORDER BY val_date
RANGE BETWEEN CURRENT ROW AND interval '1-1' year to month FOLLOWING) FROM testData
ORDER BY cate, val_date
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org