You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2021/04/07 13:29:20 UTC

[spark] branch master updated: [SPARK-34668][SQL] Support casting of day-time intervals to strings

This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 3dfd456  [SPARK-34668][SQL] Support casting of day-time intervals to strings
3dfd456 is described below

commit 3dfd456b2c4133f751a67e4132196d2d1470af29
Author: Max Gekk <ma...@gmail.com>
AuthorDate: Wed Apr 7 13:28:55 2021 +0000

    [SPARK-34668][SQL] Support casting of day-time intervals to strings
    
    ### What changes were proposed in this pull request?
    1. Added new method `toDayTimeIntervalString()` to `IntervalUtils` which converts a day-time interval as a number of microseconds to a string in the form **"INTERVAL '[sign]days hours:minutes:secondsWithFraction' DAY TO SECOND"**.
    2. Extended the `Cast` expression to support casting of `DayTimeIntervalType` to `StringType`.
    
    ### Why are the changes needed?
    To conform the ANSI SQL standard which requires to support such casting.
    
    ### Does this PR introduce _any_ user-facing change?
    Should not because new day-time interval has not been released yet.
    
    ### How was this patch tested?
    Added new tests for casting:
    ```
    $ build/sbt "testOnly *CastSuite*"
    ```
    
    Closes #32070 from MaxGekk/cast-dt-interval-to-string.
    
    Authored-by: Max Gekk <ma...@gmail.com>
    Signed-off-by: Wenchen Fan <we...@databricks.com>
---
 .../spark/sql/catalyst/expressions/Cast.scala      |  6 ++++
 .../spark/sql/catalyst/util/IntervalUtils.scala    | 32 +++++++++++++++++++++
 .../spark/sql/catalyst/expressions/CastSuite.scala | 33 +++++++++++++++++++---
 3 files changed, 67 insertions(+), 4 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
index 1c37713..879b154 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
@@ -408,6 +408,8 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit
       buildCast[Any](_, o => UTF8String.fromString(udt.deserialize(o).toString))
     case YearMonthIntervalType =>
       buildCast[Int](_, i => UTF8String.fromString(IntervalUtils.toYearMonthIntervalString(i)))
+    case DayTimeIntervalType =>
+      buildCast[Long](_, i => UTF8String.fromString(IntervalUtils.toDayTimeIntervalString(i)))
     case _ => buildCast[Any](_, o => UTF8String.fromString(o.toString))
   }
 
@@ -1127,6 +1129,10 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit
         val iu = IntervalUtils.getClass.getName.stripSuffix("$")
         (c, evPrim, _) =>
           code"""$evPrim = UTF8String.fromString($iu.toYearMonthIntervalString($c));"""
+      case DayTimeIntervalType =>
+        val iu = IntervalUtils.getClass.getName.stripSuffix("$")
+        (c, evPrim, _) =>
+          code"""$evPrim = UTF8String.fromString($iu.toDayTimeIntervalString($c));"""
       case _ =>
         (c, evPrim, evNull) => code"$evPrim = UTF8String.fromString(String.valueOf($c));"
     }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala
index 8cd9d28..b96a7b9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala
@@ -851,4 +851,36 @@ object IntervalUtils {
     }
     s"INTERVAL '$sign${absMonths / MONTHS_PER_YEAR}-${absMonths % MONTHS_PER_YEAR}' YEAR TO MONTH"
   }
+
+  /**
+   * Converts a day-time interval as a number of microseconds to its textual representation
+   * which conforms to the ANSI SQL standard.
+   *
+   * @param micros The number of microseconds, positive or negative
+   * @return Day-time interval string
+   */
+  def toDayTimeIntervalString(micros: Long): String = {
+    var sign = ""
+    var rest = micros
+    if (micros < 0) {
+      if (micros == Long.MinValue) {
+        // Especial handling of minimum `Long` value because negate op overflows `Long`.
+        // seconds = 106751991 * (24 * 60 * 60) + 4 * 60 * 60 + 54 = 9223372036854
+        // microseconds = -9223372036854000000L-775808 == Long.MinValue
+        return "INTERVAL '-106751991 04:00:54.775808' DAY TO SECOND"
+      } else {
+        sign = "-"
+        rest = -rest
+      }
+    }
+    val seconds = rest % MICROS_PER_MINUTE
+    rest /= MICROS_PER_MINUTE
+    val minutes = rest % MINUTES_PER_HOUR
+    rest /= MINUTES_PER_HOUR
+    val hours = rest % HOURS_PER_DAY
+    val days = rest / HOURS_PER_DAY
+    val leadSecZero = if (seconds < 10 * MICROS_PER_SECOND) "0" else ""
+    val secStr = java.math.BigDecimal.valueOf(seconds, 6).stripTrailingZeros().toPlainString()
+    f"INTERVAL '$sign$days $hours%02d:$minutes%02d:$leadSecZero$secStr' DAY TO SECOND"
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
index 547bf88..0554d07 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
@@ -18,7 +18,8 @@
 package org.apache.spark.sql.catalyst.expressions
 
 import java.sql.{Date, Timestamp}
-import java.time.{DateTimeException, Period}
+import java.time.{DateTimeException, Duration, Period}
+import java.time.temporal.ChronoUnit
 import java.util.{Calendar, TimeZone}
 
 import scala.collection.parallel.immutable.ParVector
@@ -35,6 +36,7 @@ import org.apache.spark.sql.catalyst.util.DateTimeConstants._
 import org.apache.spark.sql.catalyst.util.DateTimeTestUtils
 import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._
 import org.apache.spark.sql.catalyst.util.DateTimeUtils._
+import org.apache.spark.sql.catalyst.util.IntervalUtils.microsToDuration
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
@@ -62,9 +64,7 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
     }
 
     atomicTypes.foreach(dt => checkNullCast(NullType, dt))
-    (atomicTypes -- Set(
-      // TODO(SPARK-34668): Support casting of day-time intervals to strings
-      DayTimeIntervalType)).foreach(dt => checkNullCast(dt, StringType))
+    atomicTypes.foreach(dt => checkNullCast(dt, StringType))
     checkNullCast(StringType, BinaryType)
     checkNullCast(StringType, BooleanType)
     numericTypes.foreach(dt => checkNullCast(dt, BooleanType))
@@ -818,6 +818,31 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
     checkConsistencyBetweenInterpretedAndCodegen(
       (child: Expression) => Cast(child, StringType), YearMonthIntervalType)
   }
+
+  test("SPARK-34668: cast day-time interval to string") {
+    Seq(
+      Duration.ZERO -> "0 00:00:00",
+      Duration.of(1, ChronoUnit.MICROS) -> "0 00:00:00.000001",
+      Duration.ofMillis(-1) -> "-0 00:00:00.001",
+      Duration.ofMillis(1234) -> "0 00:00:01.234",
+      Duration.ofSeconds(-9).minus(999999, ChronoUnit.MICROS) -> "-0 00:00:09.999999",
+      Duration.ofMinutes(30).plusMillis(59010) -> "0 00:30:59.01",
+      Duration.ofHours(-23).minusSeconds(59) -> "-0 23:00:59",
+      Duration.ofDays(1).plus(12345678, ChronoUnit.MICROS) -> "1 00:00:12.345678",
+      Duration.ofDays(-1234).minusHours(23).minusMinutes(59).minusSeconds(59).minusMillis(999) ->
+        "-1234 23:59:59.999",
+      microsToDuration(Long.MaxValue) -> "106751991 04:00:54.775807",
+      microsToDuration(Long.MinValue + 1) -> "-106751991 04:00:54.775807",
+      microsToDuration(Long.MinValue) -> "-106751991 04:00:54.775808"
+    ).foreach { case (period, intervalPayload) =>
+      checkEvaluation(
+        Cast(Literal(period), StringType),
+        s"INTERVAL '$intervalPayload' DAY TO SECOND")
+    }
+
+    checkConsistencyBetweenInterpretedAndCodegen(
+      (child: Expression) => Cast(child, StringType), DayTimeIntervalType)
+  }
 }
 
 abstract class AnsiCastSuiteBase extends CastSuiteBase {

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org