You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2020/03/05 12:50:41 UTC
[spark] branch master updated: [SPARK-31005][SQL] Support time zone
ids in casting strings to timestamps
This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 1fd9a91 [SPARK-31005][SQL] Support time zone ids in casting strings to timestamps
1fd9a91 is described below
commit 1fd9a91c662a368e348ef96604a79929f814041c
Author: Maxim Gekk <ma...@gmail.com>
AuthorDate: Thu Mar 5 20:49:43 2020 +0800
[SPARK-31005][SQL] Support time zone ids in casting strings to timestamps
### What changes were proposed in this pull request?
In the PR, I propose to change `DateTimeUtils.stringToTimestamp` to support any valid time zone id at the end of input string. After the changes, the function accepts zone ids in the formats:
- no zone id. In that case, the function uses the local session time zone from the SQL config `spark.sql.session.timeZone`
- -[h]h:[m]m
- +[h]h:[m]m
- Z
- Short zone id, see https://docs.oracle.com/javase/8/docs/api/java/time/ZoneId.html#SHORT_IDS
- Zone ID starts with 'UTC+', 'UTC-', 'GMT+', 'GMT-', 'UT+' or 'UT-'. The ID is split in two, with a two or three letter prefix and a suffix starting with the sign. The suffix must be in the formats:
- +|-h[h]
- +|-hh[:]mm
- +|-hh:mm:ss
- +|-hhmmss
- Region-based zone IDs in the form `{area}/{city}`, such as `Europe/Paris` or `America/New_York`. The default set of region ids is supplied by the IANA Time Zone Database (TZDB).
### Why are the changes needed?
- To use `stringToTimestamp` as a substitution of removed `stringToTime`, see https://github.com/apache/spark/pull/27710#discussion_r385020173
- Improve UX of Spark SQL by allowing flexible formats of zone ids. Currently, Spark accepts only `Z` and zone offsets that can be inconvenient when a time zone offset is shifted due to daylight saving rules. For instance:
```sql
spark-sql> select cast('2015-03-18T12:03:17.123456 Europe/Moscow' as timestamp);
NULL
```
### Does this PR introduce any user-facing change?
Yes. After the changes, casting strings to timestamps allows time zone id at the end of the strings:
```sql
spark-sql> select cast('2015-03-18T12:03:17.123456 Europe/Moscow' as timestamp);
2015-03-18 12:03:17.123456
```
### How was this patch tested?
- Added new test cases to the `string to timestamp` test in `DateTimeUtilsSuite`.
- Run `CastSuite` and `AnsiCastSuite`.
Closes #27753 from MaxGekk/stringToTimestamp-uni-zoneId.
Authored-by: Maxim Gekk <ma...@gmail.com>
Signed-off-by: Wenchen Fan <we...@databricks.com>
---
.../spark/sql/catalyst/util/DateTimeUtils.scala | 57 +++++++++++-----------
.../sql/catalyst/util/DateTimeUtilsSuite.scala | 23 +++++++--
2 files changed, 48 insertions(+), 32 deletions(-)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
index 6b252ec..3a038a4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
@@ -185,28 +185,28 @@ object DateTimeUtils {
* `yyyy-[m]m`
* `yyyy-[m]m-[d]d`
* `yyyy-[m]m-[d]d `
- * `yyyy-[m]m-[d]d [h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]`
- * `yyyy-[m]m-[d]d [h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]Z`
- * `yyyy-[m]m-[d]d [h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]-[h]h:[m]m`
- * `yyyy-[m]m-[d]d [h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]+[h]h:[m]m`
- * `yyyy-[m]m-[d]dT[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]`
- * `yyyy-[m]m-[d]dT[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]Z`
- * `yyyy-[m]m-[d]dT[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]-[h]h:[m]m`
- * `yyyy-[m]m-[d]dT[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]+[h]h:[m]m`
- * `[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]`
- * `[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]Z`
- * `[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]-[h]h:[m]m`
- * `[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]+[h]h:[m]m`
- * `T[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]`
- * `T[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]Z`
- * `T[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]-[h]h:[m]m`
- * `T[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]+[h]h:[m]m`
+ * `yyyy-[m]m-[d]d [h]h:[m]m:[s]s.[ms][ms][ms][us][us][us][zone_id]`
+ * `yyyy-[m]m-[d]dT[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us][zone_id]`
+ * `[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us][zone_id]`
+ * `T[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us][zone_id]`
+ *
+ * where `zone_id` should have one of the forms:
+ * - Z - Zulu time zone UTC+0
+ * - +|-[h]h:[m]m
+ * - A short id, see https://docs.oracle.com/javase/8/docs/api/java/time/ZoneId.html#SHORT_IDS
+ * - An id with one of the prefixes UTC+, UTC-, GMT+, GMT-, UT+ or UT-,
+ * and a suffix in the formats:
+ * - +|-h[h]
+ * - +|-hh[:]mm
+ * - +|-hh:mm:ss
+ * - +|-hhmmss
+ * - Region-based zone IDs in the form `area/city`, such as `Europe/Paris`
*/
def stringToTimestamp(s: UTF8String, timeZoneId: ZoneId): Option[SQLTimestamp] = {
if (s == null) {
return None
}
- var tz: Option[Byte] = None
+ var tz: Option[String] = None
val segments: Array[Int] = Array[Int](1, 1, 1, 0, 0, 0, 0, 0, 0)
var i = 0
var currentSegmentValue = 0
@@ -257,22 +257,21 @@ object DateTimeUtils {
return None
}
} else if (i == 5 || i == 6) {
- if (b == 'Z') {
+ if (b == '-' || b == '+') {
segments(i) = currentSegmentValue
currentSegmentValue = 0
i += 1
- tz = Some(43)
- } else if (b == '-' || b == '+') {
+ tz = Some(new String(bytes, j, 1))
+ } else if (b == '.' && i == 5) {
segments(i) = currentSegmentValue
currentSegmentValue = 0
i += 1
- tz = Some(b)
- } else if (b == '.' && i == 5) {
+ } else {
segments(i) = currentSegmentValue
currentSegmentValue = 0
i += 1
- } else {
- return None
+ tz = Some(new String(bytes, j, bytes.length - j))
+ j = bytes.length - 1
}
if (i == 6 && b != '.') {
i += 1
@@ -312,11 +311,11 @@ object DateTimeUtils {
digitsMilli -= 1
}
try {
- val zoneId = if (tz.isEmpty) {
- timeZoneId
- } else {
- val sign = if (tz.get.toChar == '-') -1 else 1
- ZoneOffset.ofHoursMinutes(sign * segments(7), sign * segments(8))
+ val zoneId = tz match {
+ case None => timeZoneId
+ case Some("+") => ZoneOffset.ofHoursMinutes(segments(7), segments(8))
+ case Some("-") => ZoneOffset.ofHoursMinutes(-segments(7), -segments(8))
+ case Some(zoneName: String) => getZoneId(zoneName.trim)
}
val nanoseconds = MICROSECONDS.toNanos(segments(6))
val localTime = LocalTime.of(segments(3), segments(4), segments(5), nanoseconds.toInt)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
index 1465b06..6dde3d2 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
@@ -183,24 +183,29 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers with SQLHelper {
var zoneId = getZoneId("GMT-13:53")
expected = Option(date(2015, 3, 18, 12, 3, 17, zid = zoneId))
checkStringToTimestamp("2015-03-18T12:03:17-13:53", expected)
+ checkStringToTimestamp("2015-03-18T12:03:17GMT-13:53", expected)
zoneId = getZoneId("UTC")
expected = Option(date(2015, 3, 18, 12, 3, 17, zid = zoneId))
checkStringToTimestamp("2015-03-18T12:03:17Z", expected)
checkStringToTimestamp("2015-03-18 12:03:17Z", expected)
+ checkStringToTimestamp("2015-03-18 12:03:17UTC", expected)
zoneId = getZoneId("GMT-01:00")
expected = Option(date(2015, 3, 18, 12, 3, 17, zid = zoneId))
checkStringToTimestamp("2015-03-18T12:03:17-1:0", expected)
checkStringToTimestamp("2015-03-18T12:03:17-01:00", expected)
+ checkStringToTimestamp("2015-03-18T12:03:17GMT-01:00", expected)
zoneId = getZoneId("GMT+07:30")
expected = Option(date(2015, 3, 18, 12, 3, 17, zid = zoneId))
checkStringToTimestamp("2015-03-18T12:03:17+07:30", expected)
+ checkStringToTimestamp("2015-03-18T12:03:17 GMT+07:30", expected)
zoneId = getZoneId("GMT+07:03")
expected = Option(date(2015, 3, 18, 12, 3, 17, zid = zoneId))
checkStringToTimestamp("2015-03-18T12:03:17+07:03", expected)
+ checkStringToTimestamp("2015-03-18T12:03:17GMT+07:03", expected)
// tests for the string including milliseconds.
expected = Option(date(2015, 3, 18, 12, 3, 17, 123000, zid = zid))
@@ -213,27 +218,32 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers with SQLHelper {
expected = Option(date(2015, 3, 18, 12, 3, 17, 456000, zid = zoneId))
checkStringToTimestamp("2015-03-18T12:03:17.456Z", expected)
checkStringToTimestamp("2015-03-18 12:03:17.456Z", expected)
+ checkStringToTimestamp("2015-03-18 12:03:17.456 UTC", expected)
zoneId = getZoneId("GMT-01:00")
expected = Option(date(2015, 3, 18, 12, 3, 17, 123000, zid = zoneId))
checkStringToTimestamp("2015-03-18T12:03:17.123-1:0", expected)
checkStringToTimestamp("2015-03-18T12:03:17.123-01:00", expected)
+ checkStringToTimestamp("2015-03-18T12:03:17.123 GMT-01:00", expected)
zoneId = getZoneId("GMT+07:30")
expected = Option(date(2015, 3, 18, 12, 3, 17, 123000, zid = zoneId))
checkStringToTimestamp("2015-03-18T12:03:17.123+07:30", expected)
+ checkStringToTimestamp("2015-03-18T12:03:17.123 GMT+07:30", expected)
zoneId = getZoneId("GMT+07:30")
expected = Option(date(2015, 3, 18, 12, 3, 17, 123000, zid = zoneId))
checkStringToTimestamp("2015-03-18T12:03:17.123+07:30", expected)
+ checkStringToTimestamp("2015-03-18T12:03:17.123GMT+07:30", expected)
- zoneId = getZoneId("GMT+07:30")
expected = Option(date(2015, 3, 18, 12, 3, 17, 123121, zid = zoneId))
checkStringToTimestamp("2015-03-18T12:03:17.123121+7:30", expected)
+ checkStringToTimestamp("2015-03-18T12:03:17.123121 GMT+0730", expected)
zoneId = getZoneId("GMT+07:30")
expected = Option(date(2015, 3, 18, 12, 3, 17, 123120, zid = zoneId))
checkStringToTimestamp("2015-03-18T12:03:17.12312+7:30", expected)
+ checkStringToTimestamp("2015-03-18T12:03:17.12312 UT+07:30", expected)
expected = Option(time(18, 12, 15, zid = zid))
checkStringToTimestamp("18:12:15", expected)
@@ -241,10 +251,12 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers with SQLHelper {
zoneId = getZoneId("GMT+07:30")
expected = Option(time(18, 12, 15, 123120, zid = zoneId))
checkStringToTimestamp("T18:12:15.12312+7:30", expected)
+ checkStringToTimestamp("T18:12:15.12312 UTC+07:30", expected)
zoneId = getZoneId("GMT+07:30")
expected = Option(time(18, 12, 15, 123120, zid = zoneId))
checkStringToTimestamp("18:12:15.12312+7:30", expected)
+ checkStringToTimestamp("18:12:15.12312 GMT+07:30", expected)
expected = Option(date(2011, 5, 6, 7, 8, 9, 100000, zid = zid))
checkStringToTimestamp("2011-05-06 07:08:09.1000", expected)
@@ -270,8 +282,13 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers with SQLHelper {
// Truncating the fractional seconds
zoneId = getZoneId("GMT+00:00")
expected = Option(date(2015, 3, 18, 12, 3, 17, 123456, zid = zoneId))
- checkStringToTimestamp(
- "2015-03-18T12:03:17.123456789+0:00", expected)
+ checkStringToTimestamp("2015-03-18T12:03:17.123456789+0:00", expected)
+ checkStringToTimestamp("2015-03-18T12:03:17.123456789 UTC+0", expected)
+ checkStringToTimestamp("2015-03-18T12:03:17.123456789GMT+00:00", expected)
+
+ zoneId = getZoneId("Europe/Moscow")
+ expected = Option(date(2015, 3, 18, 12, 3, 17, 123456, zid = zoneId))
+ checkStringToTimestamp("2015-03-18T12:03:17.123456 Europe/Moscow", expected)
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org