You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2020/05/07 15:03:30 UTC
[spark] branch branch-2.4 updated: [SPARK-26908][SQL][2.4] Fix
DateTimeUtils.toMillis and millisToDays
This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch branch-2.4
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-2.4 by this push:
new 3936b14 [SPARK-26908][SQL][2.4] Fix DateTimeUtils.toMillis and millisToDays
3936b14 is described below
commit 3936b1442eb17709ff19d305b3da5ca520a79c56
Author: Max Gekk <ma...@gmail.com>
AuthorDate: Thu May 7 15:01:11 2020 +0000
[SPARK-26908][SQL][2.4] Fix DateTimeUtils.toMillis and millisToDays
## What changes were proposed in this pull request?
The `DateTimeUtils.toMillis` can produce inaccurate result for some negative values (timestamps before epoch). The error can be around 1ms. In the PR, I propose to use `Math.floorDiv` in casting microseconds to milliseconds, and milliseconds to days since epoch.
## How was this patch tested?
Added new test to `DateTimeUtilsSuite`, and tested by `CastSuite` as well.
Closes #28475 from MaxGekk/micros-to-millis-2.4.
Lead-authored-by: Max Gekk <ma...@gmail.com>
Co-authored-by: Maxim Gekk <ma...@gmail.com>
Signed-off-by: Wenchen Fan <we...@databricks.com>
---
.../scala/org/apache/spark/sql/catalyst/expressions/Cast.scala | 6 ++++--
.../org/apache/spark/sql/catalyst/util/DateTimeUtils.scala | 10 +++++-----
.../apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala | 6 ++++++
3 files changed, 15 insertions(+), 7 deletions(-)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
index 780db65..8c2dc0a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
@@ -393,7 +393,9 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
// converting seconds to us
private[this] def longToTimestamp(t: Long): Long = t * 1000000L
// converting us to seconds
- private[this] def timestampToLong(ts: Long): Long = math.floor(ts.toDouble / 1000000L).toLong
+ private[this] def timestampToLong(ts: Long): Long = {
+ Math.floorDiv(ts, DateTimeUtils.MICROS_PER_SECOND)
+ }
// converting us to seconds in double
private[this] def timestampToDouble(ts: Long): Double = {
ts / 1000000.0
@@ -1071,7 +1073,7 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
}
private[this] def longToTimeStampCode(l: ExprValue): Block = code"$l * 1000000L"
private[this] def timestampToIntegerCode(ts: ExprValue): Block =
- code"java.lang.Math.floor((double) $ts / 1000000L)"
+ code"java.lang.Math.floorDiv($ts, 1000000L)"
private[this] def timestampToDoubleCode(ts: ExprValue): Block =
code"$ts / 1000000.0"
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
index 8c0b8ce..1c9b661 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
@@ -132,10 +132,10 @@ object DateTimeUtils {
}
def millisToDays(millisUtc: Long, timeZone: TimeZone): SQLDate = {
- // SPARK-6785: use Math.floor so negative number of days (dates before 1970)
+ // SPARK-6785: use Math.floorDiv so negative number of days (dates before 1970)
// will correctly work as input for function toJavaDate(Int)
val millisLocal = millisUtc + timeZone.getOffset(millisUtc)
- Math.floor(millisLocal.toDouble / MILLIS_PER_DAY).toInt
+ Math.floorDiv(millisLocal, MILLIS_PER_DAY).toInt
}
// reverse of millisToDays
@@ -268,14 +268,14 @@ object DateTimeUtils {
// When the timestamp is negative i.e before 1970, we need to adjust the millseconds portion.
// Example - 1965-01-01 10:11:12.123456 is represented as (-157700927876544) in micro precision.
// In millis precision the above needs to be represented as (-157700927877).
- Math.floor(us.toDouble / MILLIS_PER_SECOND).toLong
+ Math.floorDiv(us, MICROS_PER_MILLIS)
}
/*
- * Converts millseconds since epoch to SQLTimestamp.
+ * Converts milliseconds since epoch to SQLTimestamp.
*/
def fromMillis(millis: Long): SQLTimestamp = {
- millis * 1000L
+ millis * MICROS_PER_MILLIS
}
/**
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
index a73aeaa..95f0fee 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
@@ -665,6 +665,7 @@ class DateTimeUtilsSuite extends SparkFunSuite {
c.set(2015, 11, 31, 16, 0, 0)
assert(millisToDays(c.getTimeInMillis, TimeZonePST) === 16800)
assert(millisToDays(c.getTimeInMillis, TimeZoneGMT) === 16801)
+ assert(millisToDays(-1 * MILLIS_PER_DAY + 1, TimeZoneGMT) == -1)
c.set(2015, 11, 31, 0, 0, 0)
c.set(Calendar.MILLISECOND, 0)
@@ -771,4 +772,9 @@ class DateTimeUtilsSuite extends SparkFunSuite {
"2019-10-14 09:39:07")
}
}
+
+ test("toMillis") {
+ assert(DateTimeUtils.toMillis(-9223372036844776001L) === -9223372036844777L)
+ assert(DateTimeUtils.toMillis(-157700927876544L) === -157700927877L)
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org