You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2021/04/07 13:29:20 UTC
[spark] branch master updated: [SPARK-34668][SQL] Support casting
of day-time intervals to strings
This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 3dfd456 [SPARK-34668][SQL] Support casting of day-time intervals to strings
3dfd456 is described below
commit 3dfd456b2c4133f751a67e4132196d2d1470af29
Author: Max Gekk <ma...@gmail.com>
AuthorDate: Wed Apr 7 13:28:55 2021 +0000
[SPARK-34668][SQL] Support casting of day-time intervals to strings
### What changes were proposed in this pull request?
1. Added new method `toDayTimeIntervalString()` to `IntervalUtils` which converts a day-time interval as a number of microseconds to a string in the form **"INTERVAL '[sign]days hours:minutes:secondsWithFraction' DAY TO SECOND"**.
2. Extended the `Cast` expression to support casting of `DayTimeIntervalType` to `StringType`.
### Why are the changes needed?
To conform the ANSI SQL standard which requires to support such casting.
### Does this PR introduce _any_ user-facing change?
Should not because new day-time interval has not been released yet.
### How was this patch tested?
Added new tests for casting:
```
$ build/sbt "testOnly *CastSuite*"
```
Closes #32070 from MaxGekk/cast-dt-interval-to-string.
Authored-by: Max Gekk <ma...@gmail.com>
Signed-off-by: Wenchen Fan <we...@databricks.com>
---
.../spark/sql/catalyst/expressions/Cast.scala | 6 ++++
.../spark/sql/catalyst/util/IntervalUtils.scala | 32 +++++++++++++++++++++
.../spark/sql/catalyst/expressions/CastSuite.scala | 33 +++++++++++++++++++---
3 files changed, 67 insertions(+), 4 deletions(-)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
index 1c37713..879b154 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
@@ -408,6 +408,8 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit
buildCast[Any](_, o => UTF8String.fromString(udt.deserialize(o).toString))
case YearMonthIntervalType =>
buildCast[Int](_, i => UTF8String.fromString(IntervalUtils.toYearMonthIntervalString(i)))
+ case DayTimeIntervalType =>
+ buildCast[Long](_, i => UTF8String.fromString(IntervalUtils.toDayTimeIntervalString(i)))
case _ => buildCast[Any](_, o => UTF8String.fromString(o.toString))
}
@@ -1127,6 +1129,10 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit
val iu = IntervalUtils.getClass.getName.stripSuffix("$")
(c, evPrim, _) =>
code"""$evPrim = UTF8String.fromString($iu.toYearMonthIntervalString($c));"""
+ case DayTimeIntervalType =>
+ val iu = IntervalUtils.getClass.getName.stripSuffix("$")
+ (c, evPrim, _) =>
+ code"""$evPrim = UTF8String.fromString($iu.toDayTimeIntervalString($c));"""
case _ =>
(c, evPrim, evNull) => code"$evPrim = UTF8String.fromString(String.valueOf($c));"
}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala
index 8cd9d28..b96a7b9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala
@@ -851,4 +851,36 @@ object IntervalUtils {
}
s"INTERVAL '$sign${absMonths / MONTHS_PER_YEAR}-${absMonths % MONTHS_PER_YEAR}' YEAR TO MONTH"
}
+
+ /**
+ * Converts a day-time interval as a number of microseconds to its textual representation
+ * which conforms to the ANSI SQL standard.
+ *
+ * @param micros The number of microseconds, positive or negative
+ * @return Day-time interval string
+ */
+ def toDayTimeIntervalString(micros: Long): String = {
+ var sign = ""
+ var rest = micros
+ if (micros < 0) {
+ if (micros == Long.MinValue) {
+ // Especial handling of minimum `Long` value because negate op overflows `Long`.
+ // seconds = 106751991 * (24 * 60 * 60) + 4 * 60 * 60 + 54 = 9223372036854
+ // microseconds = -9223372036854000000L-775808 == Long.MinValue
+ return "INTERVAL '-106751991 04:00:54.775808' DAY TO SECOND"
+ } else {
+ sign = "-"
+ rest = -rest
+ }
+ }
+ val seconds = rest % MICROS_PER_MINUTE
+ rest /= MICROS_PER_MINUTE
+ val minutes = rest % MINUTES_PER_HOUR
+ rest /= MINUTES_PER_HOUR
+ val hours = rest % HOURS_PER_DAY
+ val days = rest / HOURS_PER_DAY
+ val leadSecZero = if (seconds < 10 * MICROS_PER_SECOND) "0" else ""
+ val secStr = java.math.BigDecimal.valueOf(seconds, 6).stripTrailingZeros().toPlainString()
+ f"INTERVAL '$sign$days $hours%02d:$minutes%02d:$leadSecZero$secStr' DAY TO SECOND"
+ }
}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
index 547bf88..0554d07 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
@@ -18,7 +18,8 @@
package org.apache.spark.sql.catalyst.expressions
import java.sql.{Date, Timestamp}
-import java.time.{DateTimeException, Period}
+import java.time.{DateTimeException, Duration, Period}
+import java.time.temporal.ChronoUnit
import java.util.{Calendar, TimeZone}
import scala.collection.parallel.immutable.ParVector
@@ -35,6 +36,7 @@ import org.apache.spark.sql.catalyst.util.DateTimeConstants._
import org.apache.spark.sql.catalyst.util.DateTimeTestUtils
import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._
import org.apache.spark.sql.catalyst.util.DateTimeUtils._
+import org.apache.spark.sql.catalyst.util.IntervalUtils.microsToDuration
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.types._
import org.apache.spark.unsafe.types.UTF8String
@@ -62,9 +64,7 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
}
atomicTypes.foreach(dt => checkNullCast(NullType, dt))
- (atomicTypes -- Set(
- // TODO(SPARK-34668): Support casting of day-time intervals to strings
- DayTimeIntervalType)).foreach(dt => checkNullCast(dt, StringType))
+ atomicTypes.foreach(dt => checkNullCast(dt, StringType))
checkNullCast(StringType, BinaryType)
checkNullCast(StringType, BooleanType)
numericTypes.foreach(dt => checkNullCast(dt, BooleanType))
@@ -818,6 +818,31 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
checkConsistencyBetweenInterpretedAndCodegen(
(child: Expression) => Cast(child, StringType), YearMonthIntervalType)
}
+
+ test("SPARK-34668: cast day-time interval to string") {
+ Seq(
+ Duration.ZERO -> "0 00:00:00",
+ Duration.of(1, ChronoUnit.MICROS) -> "0 00:00:00.000001",
+ Duration.ofMillis(-1) -> "-0 00:00:00.001",
+ Duration.ofMillis(1234) -> "0 00:00:01.234",
+ Duration.ofSeconds(-9).minus(999999, ChronoUnit.MICROS) -> "-0 00:00:09.999999",
+ Duration.ofMinutes(30).plusMillis(59010) -> "0 00:30:59.01",
+ Duration.ofHours(-23).minusSeconds(59) -> "-0 23:00:59",
+ Duration.ofDays(1).plus(12345678, ChronoUnit.MICROS) -> "1 00:00:12.345678",
+ Duration.ofDays(-1234).minusHours(23).minusMinutes(59).minusSeconds(59).minusMillis(999) ->
+ "-1234 23:59:59.999",
+ microsToDuration(Long.MaxValue) -> "106751991 04:00:54.775807",
+ microsToDuration(Long.MinValue + 1) -> "-106751991 04:00:54.775807",
+ microsToDuration(Long.MinValue) -> "-106751991 04:00:54.775808"
+ ).foreach { case (period, intervalPayload) =>
+ checkEvaluation(
+ Cast(Literal(period), StringType),
+ s"INTERVAL '$intervalPayload' DAY TO SECOND")
+ }
+
+ checkConsistencyBetweenInterpretedAndCodegen(
+ (child: Expression) => Cast(child, StringType), DayTimeIntervalType)
+ }
}
abstract class AnsiCastSuiteBase extends CastSuiteBase {
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org