You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ge...@apache.org on 2021/07/13 09:38:16 UTC
[spark] branch master updated: [SPARK-36119][SQL] Add new SQL
function to_timestamp_ltz
This is an automated email from the ASF dual-hosted git repository.
gengliang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 01ddaf3 [SPARK-36119][SQL] Add new SQL function to_timestamp_ltz
01ddaf3 is described below
commit 01ddaf3918c770dcbfdf3103f80995902f650339
Author: Gengliang Wang <ge...@apache.org>
AuthorDate: Tue Jul 13 17:37:44 2021 +0800
[SPARK-36119][SQL] Add new SQL function to_timestamp_ltz
### What changes were proposed in this pull request?
Add new SQL function `to_timestamp_ltz`
syntax:
```
to_timestamp_ltz(timestamp_str_column[, fmt])
to_timestamp_ltz(timestamp_column)
to_timestamp_ltz(date_column)
```
### Why are the changes needed?
As the result of to_timestamp become consistent with the SQL configuration spark.sql.timestmapType and there is already a SQL function to_timestmap_ntz, we need new function to_timestamp_ltz to construct timestamp with local time zone values.
### Does this PR introduce _any_ user-facing change?
Yes, a new function for constructing timestamp with local time zone values
### How was this patch tested?
Unit test
Closes #33318 from gengliangwang/to_timestamp_ltz.
Authored-by: Gengliang Wang <ge...@apache.org>
Signed-off-by: Gengliang Wang <ge...@apache.org>
---
.../sql/catalyst/analysis/FunctionRegistry.scala | 1 +
.../catalyst/expressions/datetimeExpressions.scala | 47 ++++++++++++++++++++++
.../sql-functions/sql-expression-schema.md | 3 +-
.../test/resources/sql-tests/inputs/datetime.sql | 4 ++
.../sql-tests/results/ansi/datetime.sql.out | 26 +++++++++++-
.../sql-tests/results/datetime-legacy.sql.out | 26 +++++++++++-
.../resources/sql-tests/results/datetime.sql.out | 26 +++++++++++-
.../results/timestampNTZ/datetime.sql.out | 26 +++++++++++-
8 files changed, 154 insertions(+), 5 deletions(-)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index 4fd871d..d518bf3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -542,6 +542,7 @@ object FunctionRegistry {
expression[ToUnixTimestamp]("to_unix_timestamp"),
expression[ToUTCTimestamp]("to_utc_timestamp"),
expression[ParseToTimestampNTZ]("to_timestamp_ntz"),
+ expression[ParseToTimestampLTZ]("to_timestamp_ltz"),
expression[TruncDate]("trunc"),
expression[TruncTimestamp]("date_trunc"),
expression[UnixTimestamp]("unix_timestamp"),
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
index 2840b18..bee3ae4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
@@ -1079,6 +1079,53 @@ case class ParseToTimestampNTZ(
copy(child = newChild)
}
+/**
+ * Parses a column to a timestamp with local time zone based on the supplied format.
+ */
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+ usage = """
+ _FUNC_(timestamp_str[, fmt]) - Parses the `timestamp_str` expression with the `fmt` expression
+ to a timestamp with local time zone. Returns null with invalid input. By default, it follows casting rules to
+ a timestamp if the `fmt` is omitted.
+ """,
+ arguments = """
+ Arguments:
+ * timestamp_str - A string to be parsed to timestamp with local time zone.
+ * fmt - Timestamp format pattern to follow. See <a href="https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html">Datetime Patterns</a> for valid
+ date and time format patterns.
+ """,
+ examples = """
+ Examples:
+ > SELECT _FUNC_('2016-12-31 00:12:00');
+ 2016-12-31 00:12:00
+ > SELECT _FUNC_('2016-12-31', 'yyyy-MM-dd');
+ 2016-12-31 00:00:00
+ """,
+ group = "datetime_funcs",
+ since = "3.2.0")
+// scalastyle:on line.size.limit
+case class ParseToTimestampLTZ(
+ left: Expression,
+ format: Option[Expression],
+ child: Expression) extends RuntimeReplaceable {
+
+ def this(left: Expression, format: Expression) = {
+ this(left, Option(format), GetTimestamp(left, format, TimestampType))
+ }
+
+ def this(left: Expression) = this(left, None, Cast(left, TimestampType))
+
+ override def flatArguments: Iterator[Any] = Iterator(left, format)
+ override def exprsReplaced: Seq[Expression] = left +: format.toSeq
+
+ override def prettyName: String = "to_timestamp_ltz"
+ override def dataType: DataType = TimestampType
+
+ override protected def withNewChildInternal(newChild: Expression): ParseToTimestampLTZ =
+ copy(child = newChild)
+}
+
abstract class ToTimestamp
extends BinaryExpression with TimestampFormatterHelper with ExpectsInputTypes {
diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
index 76fc1b7..f71f3a8 100644
--- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
+++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
@@ -1,6 +1,6 @@
<!-- Automatically generated by ExpressionsSchemaSuite -->
## Summary
- - Number of queries: 358
+ - Number of queries: 359
- Number of expressions that missing example: 13
- Expressions missing examples: bigint,binary,boolean,date,decimal,double,float,int,smallint,string,timestamp,tinyint,window
## Schema of Built-in Functions
@@ -208,6 +208,7 @@
| org.apache.spark.sql.catalyst.expressions.Overlay | overlay | SELECT overlay('Spark SQL' PLACING '_' FROM 6) | struct<overlay(Spark SQL, _, 6, -1):string> |
| org.apache.spark.sql.catalyst.expressions.ParseToDate | to_date | SELECT to_date('2009-07-30 04:17:52') | struct<to_date(2009-07-30 04:17:52):date> |
| org.apache.spark.sql.catalyst.expressions.ParseToTimestamp | to_timestamp | SELECT to_timestamp('2016-12-31 00:12:00') | struct<to_timestamp(2016-12-31 00:12:00):timestamp> |
+| org.apache.spark.sql.catalyst.expressions.ParseToTimestampLTZ | to_timestamp_ltz | SELECT to_timestamp_ltz('2016-12-31 00:12:00') | struct<to_timestamp_ltz(2016-12-31 00:12:00):timestamp> |
| org.apache.spark.sql.catalyst.expressions.ParseToTimestampNTZ | to_timestamp_ntz | SELECT to_timestamp_ntz('2016-12-31 00:12:00') | struct<to_timestamp_ntz(2016-12-31 00:12:00):timestamp_ntz> |
| org.apache.spark.sql.catalyst.expressions.ParseUrl | parse_url | SELECT parse_url('http://spark.apache.org/path?query=1', 'HOST') | struct<parse_url(http://spark.apache.org/path?query=1, HOST):string> |
| org.apache.spark.sql.catalyst.expressions.PercentRank | percent_rank | SELECT a, b, percent_rank(b) OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b) | struct<a:string,b:int,PERCENT_RANK() OVER (PARTITION BY a ORDER BY b ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):double> |
diff --git a/sql/core/src/test/resources/sql-tests/inputs/datetime.sql b/sql/core/src/test/resources/sql-tests/inputs/datetime.sql
index c4a89a6..db30c22 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/datetime.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/datetime.sql
@@ -33,6 +33,10 @@ select to_timestamp_ntz(null), to_timestamp_ntz('2016-12-31 00:12:00'), to_times
select to_timestamp_ntz(to_date(null)), to_timestamp_ntz(to_date('2016-12-31')), to_timestamp_ntz(to_date('2016-12-31', 'yyyy-MM-dd'));
select to_timestamp_ntz(to_timestamp(null)), to_timestamp_ntz(to_timestamp('2016-12-31 00:12:00')), to_timestamp_ntz(to_timestamp('2016-12-31', 'yyyy-MM-dd'));
+select to_timestamp_ltz(null), to_timestamp_ltz('2016-12-31 00:12:00'), to_timestamp_ltz('2016-12-31', 'yyyy-MM-dd');
+select to_timestamp_ltz(to_date(null)), to_timestamp_ltz(to_date('2016-12-31')), to_timestamp_ltz(to_date('2016-12-31', 'yyyy-MM-dd'));
+select to_timestamp_ltz(to_timestamp(null)), to_timestamp_ltz(to_timestamp('2016-12-31 00:12:00')), to_timestamp_ltz(to_timestamp('2016-12-31', 'yyyy-MM-dd'));
+
select dayofweek('2007-02-03'), dayofweek('2009-07-30'), dayofweek('2017-05-27'), dayofweek(null), dayofweek('1582-10-15 13:10:15');
-- [SPARK-22333]: timeFunctionCall has conflicts with columnReference
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out
index 195bc69..761511d 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out
@@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
--- Number of queries: 202
+-- Number of queries: 205
-- !query
@@ -182,6 +182,30 @@ NULL 2016-12-31 00:12:00 2016-12-31 00:00:00
-- !query
+select to_timestamp_ltz(null), to_timestamp_ltz('2016-12-31 00:12:00'), to_timestamp_ltz('2016-12-31', 'yyyy-MM-dd')
+-- !query schema
+struct<to_timestamp_ltz(NULL):timestamp,to_timestamp_ltz(2016-12-31 00:12:00):timestamp,to_timestamp_ltz(2016-12-31, yyyy-MM-dd):timestamp>
+-- !query output
+NULL 2016-12-31 00:12:00 2016-12-31 00:00:00
+
+
+-- !query
+select to_timestamp_ltz(to_date(null)), to_timestamp_ltz(to_date('2016-12-31')), to_timestamp_ltz(to_date('2016-12-31', 'yyyy-MM-dd'))
+-- !query schema
+struct<to_timestamp_ltz(to_date(NULL)):timestamp,to_timestamp_ltz(to_date(2016-12-31)):timestamp,to_timestamp_ltz(to_date(2016-12-31, yyyy-MM-dd)):timestamp>
+-- !query output
+NULL 2016-12-31 00:00:00 2016-12-31 00:00:00
+
+
+-- !query
+select to_timestamp_ltz(to_timestamp(null)), to_timestamp_ltz(to_timestamp('2016-12-31 00:12:00')), to_timestamp_ltz(to_timestamp('2016-12-31', 'yyyy-MM-dd'))
+-- !query schema
+struct<to_timestamp_ltz(to_timestamp(NULL)):timestamp,to_timestamp_ltz(to_timestamp(2016-12-31 00:12:00)):timestamp,to_timestamp_ltz(to_timestamp(2016-12-31, yyyy-MM-dd)):timestamp>
+-- !query output
+NULL 2016-12-31 00:12:00 2016-12-31 00:00:00
+
+
+-- !query
select dayofweek('2007-02-03'), dayofweek('2009-07-30'), dayofweek('2017-05-27'), dayofweek(null), dayofweek('1582-10-15 13:10:15')
-- !query schema
struct<dayofweek(2007-02-03):int,dayofweek(2009-07-30):int,dayofweek(2017-05-27):int,dayofweek(NULL):int,dayofweek(1582-10-15 13:10:15):int>
diff --git a/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out
index 965f179..ebeb127 100644
--- a/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out
@@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
--- Number of queries: 202
+-- Number of queries: 205
-- !query
@@ -176,6 +176,30 @@ NULL 2016-12-31 00:12:00 2016-12-31 00:00:00
-- !query
+select to_timestamp_ltz(null), to_timestamp_ltz('2016-12-31 00:12:00'), to_timestamp_ltz('2016-12-31', 'yyyy-MM-dd')
+-- !query schema
+struct<to_timestamp_ltz(NULL):timestamp,to_timestamp_ltz(2016-12-31 00:12:00):timestamp,to_timestamp_ltz(2016-12-31, yyyy-MM-dd):timestamp>
+-- !query output
+NULL 2016-12-31 00:12:00 2016-12-31 00:00:00
+
+
+-- !query
+select to_timestamp_ltz(to_date(null)), to_timestamp_ltz(to_date('2016-12-31')), to_timestamp_ltz(to_date('2016-12-31', 'yyyy-MM-dd'))
+-- !query schema
+struct<to_timestamp_ltz(to_date(NULL)):timestamp,to_timestamp_ltz(to_date(2016-12-31)):timestamp,to_timestamp_ltz(to_date(2016-12-31, yyyy-MM-dd)):timestamp>
+-- !query output
+NULL 2016-12-31 00:00:00 2016-12-31 00:00:00
+
+
+-- !query
+select to_timestamp_ltz(to_timestamp(null)), to_timestamp_ltz(to_timestamp('2016-12-31 00:12:00')), to_timestamp_ltz(to_timestamp('2016-12-31', 'yyyy-MM-dd'))
+-- !query schema
+struct<to_timestamp_ltz(to_timestamp(NULL)):timestamp,to_timestamp_ltz(to_timestamp(2016-12-31 00:12:00)):timestamp,to_timestamp_ltz(to_timestamp(2016-12-31, yyyy-MM-dd)):timestamp>
+-- !query output
+NULL 2016-12-31 00:12:00 2016-12-31 00:00:00
+
+
+-- !query
select dayofweek('2007-02-03'), dayofweek('2009-07-30'), dayofweek('2017-05-27'), dayofweek(null), dayofweek('1582-10-15 13:10:15')
-- !query schema
struct<dayofweek(2007-02-03):int,dayofweek(2009-07-30):int,dayofweek(2017-05-27):int,dayofweek(NULL):int,dayofweek(1582-10-15 13:10:15):int>
diff --git a/sql/core/src/test/resources/sql-tests/results/datetime.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime.sql.out
index fdc9c03..e95cade 100755
--- a/sql/core/src/test/resources/sql-tests/results/datetime.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/datetime.sql.out
@@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
--- Number of queries: 202
+-- Number of queries: 205
-- !query
@@ -176,6 +176,30 @@ NULL 2016-12-31 00:12:00 2016-12-31 00:00:00
-- !query
+select to_timestamp_ltz(null), to_timestamp_ltz('2016-12-31 00:12:00'), to_timestamp_ltz('2016-12-31', 'yyyy-MM-dd')
+-- !query schema
+struct<to_timestamp_ltz(NULL):timestamp,to_timestamp_ltz(2016-12-31 00:12:00):timestamp,to_timestamp_ltz(2016-12-31, yyyy-MM-dd):timestamp>
+-- !query output
+NULL 2016-12-31 00:12:00 2016-12-31 00:00:00
+
+
+-- !query
+select to_timestamp_ltz(to_date(null)), to_timestamp_ltz(to_date('2016-12-31')), to_timestamp_ltz(to_date('2016-12-31', 'yyyy-MM-dd'))
+-- !query schema
+struct<to_timestamp_ltz(to_date(NULL)):timestamp,to_timestamp_ltz(to_date(2016-12-31)):timestamp,to_timestamp_ltz(to_date(2016-12-31, yyyy-MM-dd)):timestamp>
+-- !query output
+NULL 2016-12-31 00:00:00 2016-12-31 00:00:00
+
+
+-- !query
+select to_timestamp_ltz(to_timestamp(null)), to_timestamp_ltz(to_timestamp('2016-12-31 00:12:00')), to_timestamp_ltz(to_timestamp('2016-12-31', 'yyyy-MM-dd'))
+-- !query schema
+struct<to_timestamp_ltz(to_timestamp(NULL)):timestamp,to_timestamp_ltz(to_timestamp(2016-12-31 00:12:00)):timestamp,to_timestamp_ltz(to_timestamp(2016-12-31, yyyy-MM-dd)):timestamp>
+-- !query output
+NULL 2016-12-31 00:12:00 2016-12-31 00:00:00
+
+
+-- !query
select dayofweek('2007-02-03'), dayofweek('2009-07-30'), dayofweek('2017-05-27'), dayofweek(null), dayofweek('1582-10-15 13:10:15')
-- !query schema
struct<dayofweek(2007-02-03):int,dayofweek(2009-07-30):int,dayofweek(2017-05-27):int,dayofweek(NULL):int,dayofweek(1582-10-15 13:10:15):int>
diff --git a/sql/core/src/test/resources/sql-tests/results/timestampNTZ/datetime.sql.out b/sql/core/src/test/resources/sql-tests/results/timestampNTZ/datetime.sql.out
index 4b148b0..ec384a5 100644
--- a/sql/core/src/test/resources/sql-tests/results/timestampNTZ/datetime.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/timestampNTZ/datetime.sql.out
@@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
--- Number of queries: 202
+-- Number of queries: 205
-- !query
@@ -176,6 +176,30 @@ NULL 2016-12-31 00:12:00 2016-12-31 00:00:00
-- !query
+select to_timestamp_ltz(null), to_timestamp_ltz('2016-12-31 00:12:00'), to_timestamp_ltz('2016-12-31', 'yyyy-MM-dd')
+-- !query schema
+struct<to_timestamp_ltz(NULL):timestamp,to_timestamp_ltz(2016-12-31 00:12:00):timestamp,to_timestamp_ltz(2016-12-31, yyyy-MM-dd):timestamp>
+-- !query output
+NULL 2016-12-31 00:12:00 2016-12-31 00:00:00
+
+
+-- !query
+select to_timestamp_ltz(to_date(null)), to_timestamp_ltz(to_date('2016-12-31')), to_timestamp_ltz(to_date('2016-12-31', 'yyyy-MM-dd'))
+-- !query schema
+struct<to_timestamp_ltz(to_date(NULL)):timestamp,to_timestamp_ltz(to_date(2016-12-31)):timestamp,to_timestamp_ltz(to_date(2016-12-31, yyyy-MM-dd)):timestamp>
+-- !query output
+NULL 2016-12-31 00:00:00 2016-12-31 00:00:00
+
+
+-- !query
+select to_timestamp_ltz(to_timestamp(null)), to_timestamp_ltz(to_timestamp('2016-12-31 00:12:00')), to_timestamp_ltz(to_timestamp('2016-12-31', 'yyyy-MM-dd'))
+-- !query schema
+struct<to_timestamp_ltz(to_timestamp(NULL)):timestamp,to_timestamp_ltz(to_timestamp(2016-12-31 00:12:00)):timestamp,to_timestamp_ltz(to_timestamp(2016-12-31, yyyy-MM-dd)):timestamp>
+-- !query output
+NULL 2016-12-31 00:12:00 2016-12-31 00:00:00
+
+
+-- !query
select dayofweek('2007-02-03'), dayofweek('2009-07-30'), dayofweek('2017-05-27'), dayofweek(null), dayofweek('1582-10-15 13:10:15')
-- !query schema
struct<dayofweek(2007-02-03):int,dayofweek(2009-07-30):int,dayofweek(2017-05-27):int,dayofweek(NULL):int,dayofweek(1582-10-15 13:10:15):int>
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org