You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ma...@apache.org on 2023/03/03 06:39:03 UTC
[spark] branch master updated: [SPARK-42635][SQL] Fix the TimestampAdd expression
This is an automated email from the ASF dual-hosted git repository.
maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 392bdc15958 [SPARK-42635][SQL] Fix the TimestampAdd expression
392bdc15958 is described below
commit 392bdc1595879ea03c90e3f4b550aa5ce7b32bdf
Author: Chenhao Li <ch...@databricks.com>
AuthorDate: Fri Mar 3 09:38:44 2023 +0300
[SPARK-42635][SQL] Fix the TimestampAdd expression
### What changes were proposed in this pull request?
This PR fixed the counter-intuitive behaviors of the `TimestampAdd` expression mentioned in https://issues.apache.org/jira/browse/SPARK-42635. See the following *user-facing* changes for details.
### Does this PR introduce _any_ user-facing change?
Yes. This PR fixes the three problems mentioned in SPARK-42635:
1. When the time is close to daylight saving time transition, the result may be discontinuous and not monotonic.
2. Adding month, quarter, and year silently ignores `Int` overflow during unit conversion.
3. Adding sub-month units (week, day, hour, minute, second, millisecond, microsecond)silently ignores `Long` overflow during unit conversion.
Some examples of the result changes:
Old results:
```
// In America/Los_Angeles timezone:
timestampadd(DAY, 1, 2011-03-12 03:00:00) = 2011-03-13 03:00:00 (this is correct, put it here for comparison)
timestampadd(HOUR, 23, 2011-03-12 03:00:00) = 2011-03-13 03:00:00
timestampadd(HOUR, 24, 2011-03-12 03:00:00) = 2011-03-13 03:00:00
timestampadd(SECOND, 86400 - 1, 2011-03-12 03:00:00) = 2011-03-13 03:59:59
timestampadd(SECOND, 86400, 2011-03-12 03:00:00) = 2011-03-13 03:00:00
// In UTC timezone:
timestampadd(quarter, 1431655764, 1970-01-01 00:00:00) = 1969-09-01 00:00:00
timestampadd(day, 106751992, 1970-01-01 00:00:00) = -290308-12-22 15:58:10.448384
```
New results:
```
// In America/Los_Angeles timezone:
timestampadd(DAY, 1, 2011-03-12 03:00:00) = 2011-03-13 03:00:00
timestampadd(HOUR, 23, 2011-03-12 03:00:00) = 2011-03-13 03:00:00
timestampadd(HOUR, 24, 2011-03-12 03:00:00) = 2011-03-13 04:00:00
timestampadd(SECOND, 86400 - 1, 2011-03-12 03:00:00) = 2011-03-13 03:59:59
timestampadd(SECOND, 86400, 2011-03-12 03:00:00) = 2011-03-13 04:00:00
// In UTC timezone:
timestampadd(quarter, 1431655764, 1970-01-01 00:00:00) = throw overflow exception
timestampadd(day, 106751992, 1970-01-01 00:00:00) = throw overflow exception
```
### How was this patch tested?
Pass existing tests and some new tests.
Closes #40237 from chenhao-db/SPARK-42635.
Authored-by: Chenhao Li <ch...@databricks.com>
Signed-off-by: Max Gekk <ma...@gmail.com>
---
.../spark/sql/catalyst/util/DateTimeUtils.scala | 22 +++--
.../expressions/DateExpressionsSuite.scala | 98 +++++++++++++++++++++-
2 files changed, 110 insertions(+), 10 deletions(-)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
index af0666a98fa..2ae11f6568a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
@@ -1227,25 +1227,29 @@ object DateTimeUtils {
try {
unit.toUpperCase(Locale.ROOT) match {
case "MICROSECOND" =>
- timestampAddDayTime(micros, quantity, zoneId)
+ timestampAddInterval(micros, 0, 0, quantity, zoneId)
case "MILLISECOND" =>
- timestampAddDayTime(micros, quantity * MICROS_PER_MILLIS, zoneId)
+ timestampAddInterval(micros, 0, 0,
+ Math.multiplyExact(quantity.toLong, MICROS_PER_MILLIS), zoneId)
case "SECOND" =>
- timestampAddDayTime(micros, quantity * MICROS_PER_SECOND, zoneId)
+ timestampAddInterval(micros, 0, 0,
+ Math.multiplyExact(quantity.toLong, MICROS_PER_SECOND), zoneId)
case "MINUTE" =>
- timestampAddDayTime(micros, quantity * MICROS_PER_MINUTE, zoneId)
+ timestampAddInterval(micros, 0, 0,
+ Math.multiplyExact(quantity.toLong, MICROS_PER_MINUTE), zoneId)
case "HOUR" =>
- timestampAddDayTime(micros, quantity * MICROS_PER_HOUR, zoneId)
+ timestampAddInterval(micros, 0, 0,
+ Math.multiplyExact(quantity.toLong, MICROS_PER_HOUR), zoneId)
case "DAY" | "DAYOFYEAR" =>
- timestampAddDayTime(micros, quantity * MICROS_PER_DAY, zoneId)
+ timestampAddInterval(micros, 0, quantity, 0, zoneId)
case "WEEK" =>
- timestampAddDayTime(micros, quantity * MICROS_PER_DAY * DAYS_PER_WEEK, zoneId)
+ timestampAddInterval(micros, 0, Math.multiplyExact(quantity, DAYS_PER_WEEK), 0, zoneId)
case "MONTH" =>
timestampAddMonths(micros, quantity, zoneId)
case "QUARTER" =>
- timestampAddMonths(micros, quantity * 3, zoneId)
+ timestampAddMonths(micros, Math.multiplyExact(quantity, 3), zoneId)
case "YEAR" =>
- timestampAddMonths(micros, quantity * MONTHS_PER_YEAR, zoneId)
+ timestampAddMonths(micros, Math.multiplyExact(quantity, MONTHS_PER_YEAR), zoneId)
}
} catch {
case _: scala.MatchError =>
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
index e39d1518990..d2010102690 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
@@ -28,7 +28,7 @@ import scala.language.postfixOps
import scala.reflect.ClassTag
import scala.util.Random
-import org.apache.spark.{SparkDateTimeException, SparkFunSuite, SparkUpgradeException}
+import org.apache.spark.{SparkArithmeticException, SparkDateTimeException, SparkFunSuite, SparkUpgradeException}
import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow}
import org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection
import org.apache.spark.sql.catalyst.util.{DateTimeUtils, IntervalUtils, TimestampFormatter}
@@ -1961,6 +1961,102 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
}
}
+ test("SPARK-42635: timestampadd near daylight saving transition") {
+ // In America/Los_Angeles timezone, timestamp value `skippedTime` is 2011-03-13 03:00:00.
+ // The next second of 2011-03-13 01:59:59 jumps to 2011-03-13 03:00:00.
+ val skippedTime = 1300010400000000L
+ // In America/Los_Angeles timezone, both timestamp range `[repeatedTime - MICROS_PER_HOUR,
+ // repeatedTime)` and `[repeatedTime, repeatedTime + MICROS_PER_HOUR)` map to
+ // [2011-11-06 01:00:00, 2011-11-06 02:00:00).
+ // The next second of 2011-11-06 01:59:59 (pre-transition) jumps back to 2011-11-06 01:00:00.
+ val repeatedTime = 1320570000000000L
+ withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> LA.getId) {
+ // Adding one day is **not** equivalent to adding <unit>_PER_DAY time units, because not every
+ // day has 24 hours: 2011-03-13 has 23 hours, 2011-11-06 has 25 hours.
+
+ // timestampadd(DAY, 1, 2011-03-12 03:00:00) = 2011-03-13 03:00:00
+ checkEvaluation(
+ TimestampAdd("DAY", Literal(1), Literal(skippedTime - 23 * MICROS_PER_HOUR, TimestampType)),
+ skippedTime)
+ // timestampadd(HOUR, 24, 2011-03-12 03:00:00) = 2011-03-13 04:00:00
+ checkEvaluation(
+ TimestampAdd("HOUR", Literal(24),
+ Literal(skippedTime - 23 * MICROS_PER_HOUR, TimestampType)),
+ skippedTime + MICROS_PER_HOUR)
+ // timestampadd(HOUR, 23, 2011-03-12 03:00:00) = 2011-03-13 03:00:00
+ checkEvaluation(
+ TimestampAdd("HOUR", Literal(23),
+ Literal(skippedTime - 23 * MICROS_PER_HOUR, TimestampType)),
+ skippedTime)
+ // timestampadd(SECOND, SECONDS_PER_DAY, 2011-03-12 03:00:00) = 2011-03-13 04:00:00
+ checkEvaluation(
+ TimestampAdd(
+ "SECOND", Literal(SECONDS_PER_DAY.toInt),
+ Literal(skippedTime - 23 * MICROS_PER_HOUR, TimestampType)),
+ skippedTime + MICROS_PER_HOUR)
+ // timestampadd(SECOND, SECONDS_PER_DAY, 2011-03-12 03:00:00) = 2011-03-13 03:59:59
+ checkEvaluation(
+ TimestampAdd(
+ "SECOND", Literal(SECONDS_PER_DAY.toInt - 1),
+ Literal(skippedTime - 23 * MICROS_PER_HOUR, TimestampType)),
+ skippedTime + MICROS_PER_HOUR - MICROS_PER_SECOND)
+
+ // timestampadd(DAY, 1, 2011-11-05 02:00:00) = 2011-11-06 02:00:00
+ checkEvaluation(
+ TimestampAdd("DAY", Literal(1),
+ Literal(repeatedTime - 24 * MICROS_PER_HOUR, TimestampType)),
+ repeatedTime + MICROS_PER_HOUR)
+ // timestampadd(DAY, 1, 2011-11-05 01:00:00) = 2011-11-06 01:00:00 (pre-transition)
+ checkEvaluation(
+ TimestampAdd("DAY", Literal(1),
+ Literal(repeatedTime - 25 * MICROS_PER_HOUR, TimestampType)),
+ repeatedTime - MICROS_PER_HOUR)
+ // timestampadd(DAY, -1, 2011-11-07 01:00:00) = 2011-11-06 01:00:00 (post-transition)
+ checkEvaluation(
+ TimestampAdd("DAY", Literal(-1),
+ Literal(repeatedTime + 24 * MICROS_PER_HOUR, TimestampType)),
+ repeatedTime)
+ // timestampadd(MONTH, 1, 2011-10-06 01:00:00) = 2011-11-06 01:00:00 (pre-transition)
+ checkEvaluation(
+ TimestampAdd(
+ "MONTH", Literal(1),
+ Literal(repeatedTime - MICROS_PER_HOUR - 31 * MICROS_PER_DAY, TimestampType)),
+ repeatedTime - MICROS_PER_HOUR)
+ // timestampadd(MONTH, -1, 2011-12-06 01:00:00) = 2011-11-06 01:00:00 (post-transition)
+ checkEvaluation(
+ TimestampAdd(
+ "MONTH", Literal(-1),
+ Literal(repeatedTime + 30 * MICROS_PER_DAY, TimestampType)),
+ repeatedTime)
+ // timestampadd(HOUR, 23, 2011-11-05 02:00:00) = 2011-11-06 01:00:00 (pre-transition)
+ checkEvaluation(
+ TimestampAdd("HOUR", Literal(23),
+ Literal(repeatedTime - 24 * MICROS_PER_HOUR, TimestampType)),
+ repeatedTime - MICROS_PER_HOUR)
+ // timestampadd(HOUR, 24, 2011-11-05 02:00:00) = 2011-11-06 01:00:00 (post-transition)
+ checkEvaluation(
+ TimestampAdd("HOUR", Literal(24),
+ Literal(repeatedTime - 24 * MICROS_PER_HOUR, TimestampType)),
+ repeatedTime)
+ }
+ }
+
+ test("SPARK-42635: timestampadd unit conversion overflow") {
+ withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> "UTC") {
+ checkErrorInExpression[SparkArithmeticException](TimestampAdd("DAY",
+ Literal(106751992),
+ Literal(0L, TimestampType)),
+ errorClass = "DATETIME_OVERFLOW",
+ parameters = Map("operation" -> "add 106751992 DAY to TIMESTAMP '1970-01-01 00:00:00'"))
+ checkErrorInExpression[SparkArithmeticException](TimestampAdd("QUARTER",
+ Literal(1431655764),
+ Literal(0L, TimestampType)),
+ errorClass = "DATETIME_OVERFLOW",
+ parameters = Map("operation" ->
+ "add 1431655764 QUARTER to TIMESTAMP '1970-01-01 00:00:00'"))
+ }
+ }
+
test("SPARK-38284: difference between two timestamps in units") {
// Check case-insensitivity
checkEvaluation(
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org