You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ge...@apache.org on 2021/07/13 09:38:49 UTC

[spark] branch branch-3.2 updated: [SPARK-36119][SQL] Add new SQL function to_timestamp_ltz

This is an automated email from the ASF dual-hosted git repository.

gengliang pushed a commit to branch branch-3.2
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.2 by this push:
     new 2781bf0  [SPARK-36119][SQL] Add new SQL function to_timestamp_ltz
2781bf0 is described below

commit 2781bf07218eae280184dcd1bbe38c68424358e9
Author: Gengliang Wang <ge...@apache.org>
AuthorDate: Tue Jul 13 17:37:44 2021 +0800

    [SPARK-36119][SQL] Add new SQL function to_timestamp_ltz
    
    ### What changes were proposed in this pull request?
    
    Add new SQL function `to_timestamp_ltz`
    syntax:
    ```
    to_timestamp_ltz(timestamp_str_column[, fmt])
    to_timestamp_ltz(timestamp_column)
    to_timestamp_ltz(date_column)
    ```
    
    ### Why are the changes needed?
    
    As the result of to_timestamp become consistent with the SQL configuration spark.sql.timestmapType and there is already a SQL function to_timestmap_ntz, we need new function to_timestamp_ltz to construct timestamp with local time zone values.
    
    ### Does this PR introduce _any_ user-facing change?
    
    Yes, a new function for constructing timestamp with local time zone values
    
    ### How was this patch tested?
    
    Unit test
    
    Closes #33318 from gengliangwang/to_timestamp_ltz.
    
    Authored-by: Gengliang Wang <ge...@apache.org>
    Signed-off-by: Gengliang Wang <ge...@apache.org>
    (cherry picked from commit 01ddaf3918c770dcbfdf3103f80995902f650339)
    Signed-off-by: Gengliang Wang <ge...@apache.org>
---
 .../sql/catalyst/analysis/FunctionRegistry.scala   |  1 +
 .../catalyst/expressions/datetimeExpressions.scala | 47 ++++++++++++++++++++++
 .../sql-functions/sql-expression-schema.md         |  3 +-
 .../test/resources/sql-tests/inputs/datetime.sql   |  4 ++
 .../sql-tests/results/ansi/datetime.sql.out        | 26 +++++++++++-
 .../sql-tests/results/datetime-legacy.sql.out      | 26 +++++++++++-
 .../resources/sql-tests/results/datetime.sql.out   | 26 +++++++++++-
 .../results/timestampNTZ/datetime.sql.out          | 26 +++++++++++-
 8 files changed, 154 insertions(+), 5 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index 4fd871d..d518bf3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -542,6 +542,7 @@ object FunctionRegistry {
     expression[ToUnixTimestamp]("to_unix_timestamp"),
     expression[ToUTCTimestamp]("to_utc_timestamp"),
     expression[ParseToTimestampNTZ]("to_timestamp_ntz"),
+    expression[ParseToTimestampLTZ]("to_timestamp_ltz"),
     expression[TruncDate]("trunc"),
     expression[TruncTimestamp]("date_trunc"),
     expression[UnixTimestamp]("unix_timestamp"),
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
index 2840b18..bee3ae4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
@@ -1079,6 +1079,53 @@ case class ParseToTimestampNTZ(
     copy(child = newChild)
 }
 
+/**
+ * Parses a column to a timestamp with local time zone based on the supplied format.
+ */
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = """
+    _FUNC_(timestamp_str[, fmt]) - Parses the `timestamp_str` expression with the `fmt` expression
+      to a timestamp with local time zone. Returns null with invalid input. By default, it follows casting rules to
+      a timestamp if the `fmt` is omitted.
+  """,
+  arguments = """
+    Arguments:
+      * timestamp_str - A string to be parsed to timestamp with local time zone.
+      * fmt - Timestamp format pattern to follow. See <a href="https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html">Datetime Patterns</a> for valid
+              date and time format patterns.
+  """,
+  examples = """
+    Examples:
+      > SELECT _FUNC_('2016-12-31 00:12:00');
+       2016-12-31 00:12:00
+      > SELECT _FUNC_('2016-12-31', 'yyyy-MM-dd');
+       2016-12-31 00:00:00
+  """,
+  group = "datetime_funcs",
+  since = "3.2.0")
+// scalastyle:on line.size.limit
+case class ParseToTimestampLTZ(
+    left: Expression,
+    format: Option[Expression],
+    child: Expression) extends RuntimeReplaceable {
+
+  def this(left: Expression, format: Expression) = {
+    this(left, Option(format), GetTimestamp(left, format, TimestampType))
+  }
+
+  def this(left: Expression) = this(left, None, Cast(left, TimestampType))
+
+  override def flatArguments: Iterator[Any] = Iterator(left, format)
+  override def exprsReplaced: Seq[Expression] = left +: format.toSeq
+
+  override def prettyName: String = "to_timestamp_ltz"
+  override def dataType: DataType = TimestampType
+
+  override protected def withNewChildInternal(newChild: Expression): ParseToTimestampLTZ =
+    copy(child = newChild)
+}
+
 abstract class ToTimestamp
   extends BinaryExpression with TimestampFormatterHelper with ExpectsInputTypes {
 
diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
index 76fc1b7..f71f3a8 100644
--- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
+++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
@@ -1,6 +1,6 @@
 <!-- Automatically generated by ExpressionsSchemaSuite -->
 ## Summary
-  - Number of queries: 358
+  - Number of queries: 359
   - Number of expressions that missing example: 13
   - Expressions missing examples: bigint,binary,boolean,date,decimal,double,float,int,smallint,string,timestamp,tinyint,window
 ## Schema of Built-in Functions
@@ -208,6 +208,7 @@
 | org.apache.spark.sql.catalyst.expressions.Overlay | overlay | SELECT overlay('Spark SQL' PLACING '_' FROM 6) | struct<overlay(Spark SQL, _, 6, -1):string> |
 | org.apache.spark.sql.catalyst.expressions.ParseToDate | to_date | SELECT to_date('2009-07-30 04:17:52') | struct<to_date(2009-07-30 04:17:52):date> |
 | org.apache.spark.sql.catalyst.expressions.ParseToTimestamp | to_timestamp | SELECT to_timestamp('2016-12-31 00:12:00') | struct<to_timestamp(2016-12-31 00:12:00):timestamp> |
+| org.apache.spark.sql.catalyst.expressions.ParseToTimestampLTZ | to_timestamp_ltz | SELECT to_timestamp_ltz('2016-12-31 00:12:00') | struct<to_timestamp_ltz(2016-12-31 00:12:00):timestamp> |
 | org.apache.spark.sql.catalyst.expressions.ParseToTimestampNTZ | to_timestamp_ntz | SELECT to_timestamp_ntz('2016-12-31 00:12:00') | struct<to_timestamp_ntz(2016-12-31 00:12:00):timestamp_ntz> |
 | org.apache.spark.sql.catalyst.expressions.ParseUrl | parse_url | SELECT parse_url('http://spark.apache.org/path?query=1', 'HOST') | struct<parse_url(http://spark.apache.org/path?query=1, HOST):string> |
 | org.apache.spark.sql.catalyst.expressions.PercentRank | percent_rank | SELECT a, b, percent_rank(b) OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b) | struct<a:string,b:int,PERCENT_RANK() OVER (PARTITION BY a ORDER BY b ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):double> |
diff --git a/sql/core/src/test/resources/sql-tests/inputs/datetime.sql b/sql/core/src/test/resources/sql-tests/inputs/datetime.sql
index c4a89a6..db30c22 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/datetime.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/datetime.sql
@@ -33,6 +33,10 @@ select to_timestamp_ntz(null), to_timestamp_ntz('2016-12-31 00:12:00'), to_times
 select to_timestamp_ntz(to_date(null)), to_timestamp_ntz(to_date('2016-12-31')), to_timestamp_ntz(to_date('2016-12-31', 'yyyy-MM-dd'));
 select to_timestamp_ntz(to_timestamp(null)), to_timestamp_ntz(to_timestamp('2016-12-31 00:12:00')), to_timestamp_ntz(to_timestamp('2016-12-31', 'yyyy-MM-dd'));
 
+select to_timestamp_ltz(null), to_timestamp_ltz('2016-12-31 00:12:00'), to_timestamp_ltz('2016-12-31', 'yyyy-MM-dd');
+select to_timestamp_ltz(to_date(null)), to_timestamp_ltz(to_date('2016-12-31')), to_timestamp_ltz(to_date('2016-12-31', 'yyyy-MM-dd'));
+select to_timestamp_ltz(to_timestamp(null)), to_timestamp_ltz(to_timestamp('2016-12-31 00:12:00')), to_timestamp_ltz(to_timestamp('2016-12-31', 'yyyy-MM-dd'));
+
 select dayofweek('2007-02-03'), dayofweek('2009-07-30'), dayofweek('2017-05-27'), dayofweek(null), dayofweek('1582-10-15 13:10:15');
 
 -- [SPARK-22333]: timeFunctionCall has conflicts with columnReference
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out
index 195bc69..761511d 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 202
+-- Number of queries: 205
 
 
 -- !query
@@ -182,6 +182,30 @@ NULL	2016-12-31 00:12:00	2016-12-31 00:00:00
 
 
 -- !query
+select to_timestamp_ltz(null), to_timestamp_ltz('2016-12-31 00:12:00'), to_timestamp_ltz('2016-12-31', 'yyyy-MM-dd')
+-- !query schema
+struct<to_timestamp_ltz(NULL):timestamp,to_timestamp_ltz(2016-12-31 00:12:00):timestamp,to_timestamp_ltz(2016-12-31, yyyy-MM-dd):timestamp>
+-- !query output
+NULL	2016-12-31 00:12:00	2016-12-31 00:00:00
+
+
+-- !query
+select to_timestamp_ltz(to_date(null)), to_timestamp_ltz(to_date('2016-12-31')), to_timestamp_ltz(to_date('2016-12-31', 'yyyy-MM-dd'))
+-- !query schema
+struct<to_timestamp_ltz(to_date(NULL)):timestamp,to_timestamp_ltz(to_date(2016-12-31)):timestamp,to_timestamp_ltz(to_date(2016-12-31, yyyy-MM-dd)):timestamp>
+-- !query output
+NULL	2016-12-31 00:00:00	2016-12-31 00:00:00
+
+
+-- !query
+select to_timestamp_ltz(to_timestamp(null)), to_timestamp_ltz(to_timestamp('2016-12-31 00:12:00')), to_timestamp_ltz(to_timestamp('2016-12-31', 'yyyy-MM-dd'))
+-- !query schema
+struct<to_timestamp_ltz(to_timestamp(NULL)):timestamp,to_timestamp_ltz(to_timestamp(2016-12-31 00:12:00)):timestamp,to_timestamp_ltz(to_timestamp(2016-12-31, yyyy-MM-dd)):timestamp>
+-- !query output
+NULL	2016-12-31 00:12:00	2016-12-31 00:00:00
+
+
+-- !query
 select dayofweek('2007-02-03'), dayofweek('2009-07-30'), dayofweek('2017-05-27'), dayofweek(null), dayofweek('1582-10-15 13:10:15')
 -- !query schema
 struct<dayofweek(2007-02-03):int,dayofweek(2009-07-30):int,dayofweek(2017-05-27):int,dayofweek(NULL):int,dayofweek(1582-10-15 13:10:15):int>
diff --git a/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out
index 965f179..ebeb127 100644
--- a/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 202
+-- Number of queries: 205
 
 
 -- !query
@@ -176,6 +176,30 @@ NULL	2016-12-31 00:12:00	2016-12-31 00:00:00
 
 
 -- !query
+select to_timestamp_ltz(null), to_timestamp_ltz('2016-12-31 00:12:00'), to_timestamp_ltz('2016-12-31', 'yyyy-MM-dd')
+-- !query schema
+struct<to_timestamp_ltz(NULL):timestamp,to_timestamp_ltz(2016-12-31 00:12:00):timestamp,to_timestamp_ltz(2016-12-31, yyyy-MM-dd):timestamp>
+-- !query output
+NULL	2016-12-31 00:12:00	2016-12-31 00:00:00
+
+
+-- !query
+select to_timestamp_ltz(to_date(null)), to_timestamp_ltz(to_date('2016-12-31')), to_timestamp_ltz(to_date('2016-12-31', 'yyyy-MM-dd'))
+-- !query schema
+struct<to_timestamp_ltz(to_date(NULL)):timestamp,to_timestamp_ltz(to_date(2016-12-31)):timestamp,to_timestamp_ltz(to_date(2016-12-31, yyyy-MM-dd)):timestamp>
+-- !query output
+NULL	2016-12-31 00:00:00	2016-12-31 00:00:00
+
+
+-- !query
+select to_timestamp_ltz(to_timestamp(null)), to_timestamp_ltz(to_timestamp('2016-12-31 00:12:00')), to_timestamp_ltz(to_timestamp('2016-12-31', 'yyyy-MM-dd'))
+-- !query schema
+struct<to_timestamp_ltz(to_timestamp(NULL)):timestamp,to_timestamp_ltz(to_timestamp(2016-12-31 00:12:00)):timestamp,to_timestamp_ltz(to_timestamp(2016-12-31, yyyy-MM-dd)):timestamp>
+-- !query output
+NULL	2016-12-31 00:12:00	2016-12-31 00:00:00
+
+
+-- !query
 select dayofweek('2007-02-03'), dayofweek('2009-07-30'), dayofweek('2017-05-27'), dayofweek(null), dayofweek('1582-10-15 13:10:15')
 -- !query schema
 struct<dayofweek(2007-02-03):int,dayofweek(2009-07-30):int,dayofweek(2017-05-27):int,dayofweek(NULL):int,dayofweek(1582-10-15 13:10:15):int>
diff --git a/sql/core/src/test/resources/sql-tests/results/datetime.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime.sql.out
index fdc9c03..e95cade 100755
--- a/sql/core/src/test/resources/sql-tests/results/datetime.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/datetime.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 202
+-- Number of queries: 205
 
 
 -- !query
@@ -176,6 +176,30 @@ NULL	2016-12-31 00:12:00	2016-12-31 00:00:00
 
 
 -- !query
+select to_timestamp_ltz(null), to_timestamp_ltz('2016-12-31 00:12:00'), to_timestamp_ltz('2016-12-31', 'yyyy-MM-dd')
+-- !query schema
+struct<to_timestamp_ltz(NULL):timestamp,to_timestamp_ltz(2016-12-31 00:12:00):timestamp,to_timestamp_ltz(2016-12-31, yyyy-MM-dd):timestamp>
+-- !query output
+NULL	2016-12-31 00:12:00	2016-12-31 00:00:00
+
+
+-- !query
+select to_timestamp_ltz(to_date(null)), to_timestamp_ltz(to_date('2016-12-31')), to_timestamp_ltz(to_date('2016-12-31', 'yyyy-MM-dd'))
+-- !query schema
+struct<to_timestamp_ltz(to_date(NULL)):timestamp,to_timestamp_ltz(to_date(2016-12-31)):timestamp,to_timestamp_ltz(to_date(2016-12-31, yyyy-MM-dd)):timestamp>
+-- !query output
+NULL	2016-12-31 00:00:00	2016-12-31 00:00:00
+
+
+-- !query
+select to_timestamp_ltz(to_timestamp(null)), to_timestamp_ltz(to_timestamp('2016-12-31 00:12:00')), to_timestamp_ltz(to_timestamp('2016-12-31', 'yyyy-MM-dd'))
+-- !query schema
+struct<to_timestamp_ltz(to_timestamp(NULL)):timestamp,to_timestamp_ltz(to_timestamp(2016-12-31 00:12:00)):timestamp,to_timestamp_ltz(to_timestamp(2016-12-31, yyyy-MM-dd)):timestamp>
+-- !query output
+NULL	2016-12-31 00:12:00	2016-12-31 00:00:00
+
+
+-- !query
 select dayofweek('2007-02-03'), dayofweek('2009-07-30'), dayofweek('2017-05-27'), dayofweek(null), dayofweek('1582-10-15 13:10:15')
 -- !query schema
 struct<dayofweek(2007-02-03):int,dayofweek(2009-07-30):int,dayofweek(2017-05-27):int,dayofweek(NULL):int,dayofweek(1582-10-15 13:10:15):int>
diff --git a/sql/core/src/test/resources/sql-tests/results/timestampNTZ/datetime.sql.out b/sql/core/src/test/resources/sql-tests/results/timestampNTZ/datetime.sql.out
index 4b148b0..ec384a5 100644
--- a/sql/core/src/test/resources/sql-tests/results/timestampNTZ/datetime.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/timestampNTZ/datetime.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 202
+-- Number of queries: 205
 
 
 -- !query
@@ -176,6 +176,30 @@ NULL	2016-12-31 00:12:00	2016-12-31 00:00:00
 
 
 -- !query
+select to_timestamp_ltz(null), to_timestamp_ltz('2016-12-31 00:12:00'), to_timestamp_ltz('2016-12-31', 'yyyy-MM-dd')
+-- !query schema
+struct<to_timestamp_ltz(NULL):timestamp,to_timestamp_ltz(2016-12-31 00:12:00):timestamp,to_timestamp_ltz(2016-12-31, yyyy-MM-dd):timestamp>
+-- !query output
+NULL	2016-12-31 00:12:00	2016-12-31 00:00:00
+
+
+-- !query
+select to_timestamp_ltz(to_date(null)), to_timestamp_ltz(to_date('2016-12-31')), to_timestamp_ltz(to_date('2016-12-31', 'yyyy-MM-dd'))
+-- !query schema
+struct<to_timestamp_ltz(to_date(NULL)):timestamp,to_timestamp_ltz(to_date(2016-12-31)):timestamp,to_timestamp_ltz(to_date(2016-12-31, yyyy-MM-dd)):timestamp>
+-- !query output
+NULL	2016-12-31 00:00:00	2016-12-31 00:00:00
+
+
+-- !query
+select to_timestamp_ltz(to_timestamp(null)), to_timestamp_ltz(to_timestamp('2016-12-31 00:12:00')), to_timestamp_ltz(to_timestamp('2016-12-31', 'yyyy-MM-dd'))
+-- !query schema
+struct<to_timestamp_ltz(to_timestamp(NULL)):timestamp,to_timestamp_ltz(to_timestamp(2016-12-31 00:12:00)):timestamp,to_timestamp_ltz(to_timestamp(2016-12-31, yyyy-MM-dd)):timestamp>
+-- !query output
+NULL	2016-12-31 00:12:00	2016-12-31 00:00:00
+
+
+-- !query
 select dayofweek('2007-02-03'), dayofweek('2009-07-30'), dayofweek('2017-05-27'), dayofweek(null), dayofweek('1582-10-15 13:10:15')
 -- !query schema
 struct<dayofweek(2007-02-03):int,dayofweek(2009-07-30):int,dayofweek(2017-05-27):int,dayofweek(NULL):int,dayofweek(1582-10-15 13:10:15):int>

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org