You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ma...@apache.org on 2021/07/07 17:27:37 UTC

[spark] branch master updated: [SPARK-36015][SQL] Support TimestampNTZType in the Window spec definition

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 62ff2ad  [SPARK-36015][SQL] Support TimestampNTZType in the Window spec definition
62ff2ad is described below

commit 62ff2add9444fbd54802548b3daf7cde5820feef
Author: gengjiaan <ge...@360.cn>
AuthorDate: Wed Jul 7 20:27:05 2021 +0300

    [SPARK-36015][SQL] Support TimestampNTZType in the Window spec definition
    
    ### What changes were proposed in this pull request?
    The method `WindowSpecDefinition.isValidFrameType` doesn't consider `TimestampNTZType`. We should support it as for `TimestampType`.
    
    ### Why are the changes needed?
    Support `TimestampNTZType` in the Window spec definition.
    
    ### Does this PR introduce _any_ user-facing change?
    'Yes'. This PR allows users use  `TimestampNTZType` as the sort spec in window spec definition.
    
    ### How was this patch tested?
    New tests.
    
    Closes #33246 from beliefer/SPARK-36015.
    
    Authored-by: gengjiaan <ge...@360.cn>
    Signed-off-by: Max Gekk <ma...@gmail.com>
---
 .../catalyst/expressions/windowExpressions.scala   |  6 +--
 .../sql/execution/window/WindowExecBase.scala      | 10 ++--
 .../src/test/resources/sql-tests/inputs/window.sql |  9 ++++
 .../resources/sql-tests/results/window.sql.out     | 56 +++++++++++++++++++++-
 4 files changed, 73 insertions(+), 8 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
index 2555c6a..fc2e449 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
@@ -102,9 +102,9 @@ case class WindowSpecDefinition(
   private def isValidFrameType(ft: DataType): Boolean = (orderSpec.head.dataType, ft) match {
     case (DateType, IntegerType) => true
     case (DateType, _: YearMonthIntervalType) => true
-    case (TimestampType, CalendarIntervalType) => true
-    case (TimestampType, _: YearMonthIntervalType) => true
-    case (TimestampType, _: DayTimeIntervalType) => true
+    case (TimestampType | TimestampNTZType, CalendarIntervalType) => true
+    case (TimestampType | TimestampNTZType, _: YearMonthIntervalType) => true
+    case (TimestampType | TimestampNTZType, _: DayTimeIntervalType) => true
     case (a, b) => a == b
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExecBase.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExecBase.scala
index 2aa0b02..f3b3b34 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExecBase.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExecBase.scala
@@ -24,7 +24,7 @@ import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
 import org.apache.spark.sql.execution.UnaryExecNode
-import org.apache.spark.sql.types.{CalendarIntervalType, DateType, DayTimeIntervalType, IntegerType, TimestampType, YearMonthIntervalType}
+import org.apache.spark.sql.types._
 
 trait WindowExecBase extends UnaryExecNode {
   def windowExpression: Seq[NamedExpression]
@@ -96,10 +96,12 @@ trait WindowExecBase extends UnaryExecNode {
         val boundExpr = (expr.dataType, boundOffset.dataType) match {
           case (DateType, IntegerType) => DateAdd(expr, boundOffset)
           case (DateType, _: YearMonthIntervalType) => DateAddYMInterval(expr, boundOffset)
-          case (TimestampType, CalendarIntervalType) => TimeAdd(expr, boundOffset, Some(timeZone))
-          case (TimestampType, _: YearMonthIntervalType) =>
+          case (TimestampType | TimestampNTZType, CalendarIntervalType) =>
+            TimeAdd(expr, boundOffset, Some(timeZone))
+          case (TimestampType | TimestampNTZType, _: YearMonthIntervalType) =>
             TimestampAddYMInterval(expr, boundOffset, Some(timeZone))
-          case (TimestampType, _: DayTimeIntervalType) => TimeAdd(expr, boundOffset, Some(timeZone))
+          case (TimestampType | TimestampNTZType, _: DayTimeIntervalType) =>
+            TimeAdd(expr, boundOffset, Some(timeZone))
           case (a, b) if a == b => Add(expr, boundOffset)
         }
         val bound = MutableProjection.create(boundExpr :: Nil, child.output)
diff --git a/sql/core/src/test/resources/sql-tests/inputs/window.sql b/sql/core/src/test/resources/sql-tests/inputs/window.sql
index 46d3629..9766aaf 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/window.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/window.sql
@@ -70,12 +70,21 @@ RANGE BETWEEN CURRENT ROW AND 2 FOLLOWING) FROM testData ORDER BY cate, val_date
 SELECT val_timestamp, cate, avg(val_timestamp) OVER(PARTITION BY cate ORDER BY val_timestamp
 RANGE BETWEEN CURRENT ROW AND interval 23 days 4 hours FOLLOWING) FROM testData
 ORDER BY cate, val_timestamp;
+SELECT val_timestamp, cate, avg(val_timestamp) OVER(PARTITION BY cate ORDER BY to_timestamp_ntz(val_timestamp)
+RANGE BETWEEN CURRENT ROW AND interval 23 days 4 hours FOLLOWING) FROM testData
+ORDER BY cate, to_timestamp_ntz(val_timestamp);
 SELECT val_timestamp, cate, avg(val_timestamp) OVER(PARTITION BY cate ORDER BY val_timestamp
 RANGE BETWEEN CURRENT ROW AND interval '1-1' year to month FOLLOWING) FROM testData
 ORDER BY cate, val_timestamp;
+SELECT val_timestamp, cate, avg(val_timestamp) OVER(PARTITION BY cate ORDER BY to_timestamp_ntz(val_timestamp)
+RANGE BETWEEN CURRENT ROW AND interval '1-1' year to month FOLLOWING) FROM testData
+ORDER BY cate, to_timestamp_ntz(val_timestamp);
 SELECT val_timestamp, cate, avg(val_timestamp) OVER(PARTITION BY cate ORDER BY val_timestamp
 RANGE BETWEEN CURRENT ROW AND interval '1 2:3:4.001' day to second FOLLOWING) FROM testData
 ORDER BY cate, val_timestamp;
+SELECT val_timestamp, cate, avg(val_timestamp) OVER(PARTITION BY cate ORDER BY to_timestamp_ntz(val_timestamp)
+RANGE BETWEEN CURRENT ROW AND interval '1 2:3:4.001' day to second FOLLOWING) FROM testData
+ORDER BY cate, to_timestamp_ntz(val_timestamp);
 SELECT val_date, cate, avg(val_timestamp) OVER(PARTITION BY cate ORDER BY val_date
 RANGE BETWEEN CURRENT ROW AND interval '1-1' year to month FOLLOWING) FROM testData
 ORDER BY cate, val_date;
diff --git a/sql/core/src/test/resources/sql-tests/results/window.sql.out b/sql/core/src/test/resources/sql-tests/results/window.sql.out
index b3f9e6c..455015b 100644
--- a/sql/core/src/test/resources/sql-tests/results/window.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/window.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 50
+-- Number of queries: 53
 
 
 -- !query
@@ -212,6 +212,24 @@ NULL	NULL	NULL
 
 
 -- !query
+SELECT val_timestamp, cate, avg(val_timestamp) OVER(PARTITION BY cate ORDER BY to_timestamp_ntz(val_timestamp)
+RANGE BETWEEN CURRENT ROW AND interval 23 days 4 hours FOLLOWING) FROM testData
+ORDER BY cate, to_timestamp_ntz(val_timestamp)
+-- !query schema
+struct<val_timestamp:timestamp,cate:string,avg(val_timestamp) OVER (PARTITION BY cate ORDER BY to_timestamp_ntz(val_timestamp) ASC NULLS FIRST RANGE BETWEEN CURRENT ROW AND INTERVAL '23 days 4 hours' FOLLOWING):double>
+-- !query output
+NULL	NULL	NULL
+2017-07-31 17:00:00	NULL	1.5015456E9
+2017-07-31 17:00:00	a	1.5016970666666667E9
+2017-07-31 17:00:00	a	1.5016970666666667E9
+2017-08-05 23:13:20	a	1.502E9
+2020-12-30 16:00:00	a	1.6093728E9
+2017-07-31 17:00:00	b	1.5022728E9
+2017-08-17 13:00:00	b	1.503E9
+2020-12-30 16:00:00	b	1.6093728E9
+
+
+-- !query
 SELECT val_timestamp, cate, avg(val_timestamp) OVER(PARTITION BY cate ORDER BY val_timestamp
 RANGE BETWEEN CURRENT ROW AND interval '1-1' year to month FOLLOWING) FROM testData
 ORDER BY cate, val_timestamp
@@ -230,6 +248,24 @@ NULL	NULL	NULL
 
 
 -- !query
+SELECT val_timestamp, cate, avg(val_timestamp) OVER(PARTITION BY cate ORDER BY to_timestamp_ntz(val_timestamp)
+RANGE BETWEEN CURRENT ROW AND interval '1-1' year to month FOLLOWING) FROM testData
+ORDER BY cate, to_timestamp_ntz(val_timestamp)
+-- !query schema
+struct<val_timestamp:timestamp,cate:string,avg(val_timestamp) OVER (PARTITION BY cate ORDER BY to_timestamp_ntz(val_timestamp) ASC NULLS FIRST RANGE BETWEEN CURRENT ROW AND INTERVAL '1-1' YEAR TO MONTH FOLLOWING):double>
+-- !query output
+NULL	NULL	NULL
+2017-07-31 17:00:00	NULL	1.5015456E9
+2017-07-31 17:00:00	a	1.5016970666666667E9
+2017-07-31 17:00:00	a	1.5016970666666667E9
+2017-08-05 23:13:20	a	1.502E9
+2020-12-30 16:00:00	a	1.6093728E9
+2017-07-31 17:00:00	b	1.5022728E9
+2017-08-17 13:00:00	b	1.503E9
+2020-12-30 16:00:00	b	1.6093728E9
+
+
+-- !query
 SELECT val_timestamp, cate, avg(val_timestamp) OVER(PARTITION BY cate ORDER BY val_timestamp
 RANGE BETWEEN CURRENT ROW AND interval '1 2:3:4.001' day to second FOLLOWING) FROM testData
 ORDER BY cate, val_timestamp
@@ -248,6 +284,24 @@ NULL	NULL	NULL
 
 
 -- !query
+SELECT val_timestamp, cate, avg(val_timestamp) OVER(PARTITION BY cate ORDER BY to_timestamp_ntz(val_timestamp)
+RANGE BETWEEN CURRENT ROW AND interval '1 2:3:4.001' day to second FOLLOWING) FROM testData
+ORDER BY cate, to_timestamp_ntz(val_timestamp)
+-- !query schema
+struct<val_timestamp:timestamp,cate:string,avg(val_timestamp) OVER (PARTITION BY cate ORDER BY to_timestamp_ntz(val_timestamp) ASC NULLS FIRST RANGE BETWEEN CURRENT ROW AND INTERVAL '1 02:03:04.001' DAY TO SECOND FOLLOWING):double>
+-- !query output
+NULL	NULL	NULL
+2017-07-31 17:00:00	NULL	1.5015456E9
+2017-07-31 17:00:00	a	1.5015456E9
+2017-07-31 17:00:00	a	1.5015456E9
+2017-08-05 23:13:20	a	1.502E9
+2020-12-30 16:00:00	a	1.6093728E9
+2017-07-31 17:00:00	b	1.5015456E9
+2017-08-17 13:00:00	b	1.503E9
+2020-12-30 16:00:00	b	1.6093728E9
+
+
+-- !query
 SELECT val_date, cate, avg(val_timestamp) OVER(PARTITION BY cate ORDER BY val_date
 RANGE BETWEEN CURRENT ROW AND interval '1-1' year to month FOLLOWING) FROM testData
 ORDER BY cate, val_date

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org