You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2019/04/10 14:42:11 UTC
[spark] branch master updated: [SPARK-27423][SQL] Cast DATE <->
TIMESTAMP according to the SQL standard
This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new ab8710b [SPARK-27423][SQL] Cast DATE <-> TIMESTAMP according to the SQL standard
ab8710b is described below
commit ab8710b57916a129fcb89464209361120d224535
Author: Maxim Gekk <ma...@databricks.com>
AuthorDate: Wed Apr 10 22:41:19 2019 +0800
[SPARK-27423][SQL] Cast DATE <-> TIMESTAMP according to the SQL standard
## What changes were proposed in this pull request?
According to SQL standard, value of `DATE` type is union of year, month, dayInMonth, and it is independent from any time zones. To convert it to Catalyst's `TIMESTAMP`, `DATE` value should be "extended" by the time at midnight - `00:00:00`. The resulted local date+time should be considered as a timestamp in the session time zone, and casted to microseconds since epoch in `UTC` accordingly.
The reverse casting from `TIMESTAMP` to `DATE` should be performed in the similar way. `TIMESTAMP` values should be represented as a local date+time in the session time zone. And the time component should be just removed. For example, `TIMESTAMP 2019-04-10 00:10:12` -> `DATE 2019-04-10`. The resulted date is converted to days since epoch `1970-01-01`.
## How was this patch tested?
The changes were tested by existing test suites - `DateFunctionsSuite`, `DateExpressionsSuite` and `CastSuite`.
Closes #24332 from MaxGekk/cast-timestamp-to-date2.
Lead-authored-by: Maxim Gekk <ma...@databricks.com>
Co-authored-by: Maxim Gekk <ma...@gmail.com>
Signed-off-by: Wenchen Fan <we...@databricks.com>
---
.../apache/spark/sql/catalyst/expressions/Cast.scala | 19 +++++++++++--------
.../spark/sql/catalyst/util/DateTimeUtils.scala | 11 +++++++++++
2 files changed, 22 insertions(+), 8 deletions(-)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
index 848195f..f7bc8b9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
@@ -18,6 +18,7 @@
package org.apache.spark.sql.catalyst.expressions
import java.math.{BigDecimal => JavaBigDecimal}
+import java.time.{LocalDate, LocalDateTime, LocalTime}
import java.util.concurrent.TimeUnit._
import org.apache.spark.SparkException
@@ -381,7 +382,7 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
case ByteType =>
buildCast[Byte](_, b => longToTimestamp(b.toLong))
case DateType =>
- buildCast[Int](_, d => MILLISECONDS.toMicros(DateTimeUtils.daysToMillis(d, timeZone)))
+ buildCast[Int](_, d => epochDaysToMicros(d, zoneId))
// TimestampWritable.decimalToTimestamp
case DecimalType() =>
buildCast[Decimal](_, d => decimalToTimestamp(d))
@@ -418,7 +419,7 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
case TimestampType =>
// throw valid precision more than seconds, according to Hive.
// Timestamp.nanos is in 0 to 999,999,999, no more than a second.
- buildCast[Long](_, t => DateTimeUtils.millisToDays(MICROSECONDS.toMillis(t), timeZone))
+ buildCast[Long](_, t => microsToEpochDays(t, zoneId))
}
// IntervalConverter
@@ -935,11 +936,12 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
}
"""
case TimestampType =>
- val tz = JavaCode.global(ctx.addReferenceObj("timeZone", timeZone), timeZone.getClass)
+ val zid = JavaCode.global(
+ ctx.addReferenceObj("zoneId", zoneId, "java.time.ZoneId"),
+ zoneId.getClass)
(c, evPrim, evNull) =>
code"""$evPrim =
- org.apache.spark.sql.catalyst.util.DateTimeUtils.millisToDays(
- $c / $MICROS_PER_MILLIS, $tz);"""
+ org.apache.spark.sql.catalyst.util.DateTimeUtils.microsToEpochDays($c, $zid);"""
case _ =>
(c, evPrim, evNull) => code"$evNull = true;"
}
@@ -1043,11 +1045,12 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
case _: IntegralType =>
(c, evPrim, evNull) => code"$evPrim = ${longToTimeStampCode(c)};"
case DateType =>
- val tz = JavaCode.global(ctx.addReferenceObj("timeZone", timeZone), timeZone.getClass)
+ val zid = JavaCode.global(
+ ctx.addReferenceObj("zoneId", zoneId, "java.time.ZoneId"),
+ zoneId.getClass)
(c, evPrim, evNull) =>
code"""$evPrim =
- org.apache.spark.sql.catalyst.util.DateTimeUtils.daysToMillis(
- $c, $tz) * $MICROS_PER_MILLIS;"""
+ org.apache.spark.sql.catalyst.util.DateTimeUtils.epochDaysToMicros($c, $zid);"""
case DecimalType() =>
(c, evPrim, evNull) => code"$evPrim = ${decimalToTimestampCode(c)};"
case DoubleType =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
index 7f3bb83..50fa6fb 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
@@ -170,6 +170,17 @@ object DateTimeUtils {
MILLISECONDS.toMicros(millis)
}
+ def microsToEpochDays(epochMicros: SQLTimestamp, zoneId: ZoneId): SQLDate = {
+ localDateToDays(microsToInstant(epochMicros).atZone(zoneId).toLocalDate)
+ }
+
+ def epochDaysToMicros(epochDays: SQLDate, zoneId: ZoneId): SQLTimestamp = {
+ val localDate = LocalDate.ofEpochDay(epochDays)
+ val zeroLocalTime = LocalTime.MIDNIGHT
+ val localDateTime = LocalDateTime.of(localDate, zeroLocalTime)
+ instantToMicros(localDateTime.atZone(zoneId).toInstant)
+ }
+
/**
* Trim and parse a given UTF8 date string to the corresponding a corresponding [[Long]] value.
* The return type is [[Option]] in order to distinguish between 0L and null. The following
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org