You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2020/02/25 14:34:31 UTC

[spark] branch branch-3.0 updated: [SPARK-30919][SQL] Make interval multiply and divide's overflow behavior consistent with other operations

This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new 1d746eb  [SPARK-30919][SQL] Make interval multiply and divide's overflow behavior consistent with other operations
1d746eb is described below

commit 1d746eb0afddd3c2a4e1313dddf80ac0aec00a7a
Author: Kent Yao <ya...@hotmail.com>
AuthorDate: Tue Feb 25 22:19:24 2020 +0800

    [SPARK-30919][SQL] Make interval multiply and divide's overflow behavior consistent with other operations
    
    ### What changes were proposed in this pull request?
    
    The current behavior of interval multiply and divide follows the ANSI SQL standard when overflow, it is compatible with other operations when `spark.sql.ansi.enabled` is true, but not compatible when `spark.sql.ansi.enabled` is false.
    
    When `spark.sql.ansi.enabled` is false, as the factor is a double value, so it should use java's rounding or truncation behavior for casting double to integrals. when divided by zero, it returns `null`.  we also follow the natural rules for intervals as defined in the Gregorian calendar, so we do not add the month fraction to days but add days fraction to microseconds.
    
    ### Why are the changes needed?
    
    Make interval multiply and divide's overflow behavior consistent with other interval operations
    
    ### Does this PR introduce any user-facing change?
    
    no, these are new features in 3.0
    
    ### How was this patch tested?
    
    add uts
    
    Closes #27672 from yaooqinn/SPARK-30919.
    
    Authored-by: Kent Yao <ya...@hotmail.com>
    Signed-off-by: Wenchen Fan <we...@databricks.com>
    (cherry picked from commit 761209c1f2af513a9db2e08c5937531cff7aeeed)
    Signed-off-by: Wenchen Fan <we...@databricks.com>
---
 .../catalyst/expressions/intervalExpressions.scala | 34 +++++++++---
 .../spark/sql/catalyst/util/IntervalUtils.scala    | 45 +++++++++++++---
 .../expressions/IntervalExpressionsSuite.scala     | 37 ++++++++-----
 .../sql/catalyst/util/IntervalUtilsSuite.scala     | 61 ++++++++++++++--------
 .../test/resources/sql-tests/inputs/interval.sql   |  4 ++
 .../sql-tests/results/ansi/interval.sql.out        | 38 +++++++++++++-
 .../resources/sql-tests/results/interval.sql.out   | 49 +++++++++++++----
 7 files changed, 210 insertions(+), 58 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/intervalExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/intervalExpressions.scala
index 831510e..c09350f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/intervalExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/intervalExpressions.scala
@@ -22,6 +22,7 @@ import java.util.Locale
 import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode}
 import org.apache.spark.sql.catalyst.util.IntervalUtils
 import org.apache.spark.sql.catalyst.util.IntervalUtils._
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.CalendarInterval
 
@@ -112,13 +113,14 @@ object ExtractIntervalPart {
 
 abstract class IntervalNumOperation(
     interval: Expression,
-    num: Expression,
-    operation: (CalendarInterval, Double) => CalendarInterval,
-    operationName: String)
+    num: Expression)
   extends BinaryExpression with ImplicitCastInputTypes with Serializable {
   override def left: Expression = interval
   override def right: Expression = num
 
+  protected val operation: (CalendarInterval, Double) => CalendarInterval
+  protected def operationName: String
+
   override def inputTypes: Seq[AbstractDataType] = Seq(CalendarIntervalType, DoubleType)
   override def dataType: DataType = CalendarIntervalType
 
@@ -136,11 +138,29 @@ abstract class IntervalNumOperation(
   override def prettyName: String = operationName.stripSuffix("Exact") + "_interval"
 }
 
-case class MultiplyInterval(interval: Expression, num: Expression)
-  extends IntervalNumOperation(interval, num, multiplyExact, "multiplyExact")
+case class MultiplyInterval(
+    interval: Expression,
+    num: Expression,
+    checkOverflow: Boolean = SQLConf.get.ansiEnabled)
+  extends IntervalNumOperation(interval, num) {
+
+  override protected val operation: (CalendarInterval, Double) => CalendarInterval =
+    if (checkOverflow) multiplyExact else multiply
+
+  override protected def operationName: String = if (checkOverflow) "multiplyExact" else "multiply"
+}
+
+case class DivideInterval(
+    interval: Expression,
+    num: Expression,
+    checkOverflow: Boolean = SQLConf.get.ansiEnabled)
+  extends IntervalNumOperation(interval, num) {
+
+  override protected val operation: (CalendarInterval, Double) => CalendarInterval =
+    if (checkOverflow) divideExact else divide
 
-case class DivideInterval(interval: Expression, num: Expression)
-  extends IntervalNumOperation(interval, num, divideExact, "divideExact")
+  override protected def operationName: String = if (checkOverflow) "divideExact" else "divide"
+}
 
 // scalastyle:off line.size.limit
 @ExpressionDescription(
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala
index 2790f8e..0a13ec8 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala
@@ -405,23 +405,39 @@ object IntervalUtils {
   }
 
   /**
-   * Makes an interval from months, days and micros with the fractional part by
-   * adding the month fraction to days and the days fraction to micros.
-   *
-   * @throws ArithmeticException if the result overflows any field value
+   * Makes an interval from months, days and micros with the fractional part.
+   * The overflow style here follows the way of ansi sql standard and the natural rules for
+   * intervals as defined in the Gregorian calendar. Thus, the days fraction will be added
+   * to microseconds but the months fraction will not be added to days, and it will throw
+   * exception if any part overflows.
    */
   private def fromDoubles(
       monthsWithFraction: Double,
       daysWithFraction: Double,
       microsWithFraction: Double): CalendarInterval = {
     val truncatedMonths = Math.toIntExact(monthsWithFraction.toLong)
-    val days = daysWithFraction + DAYS_PER_MONTH * (monthsWithFraction - truncatedMonths)
-    val truncatedDays = Math.toIntExact(days.toLong)
-    val micros = microsWithFraction + MICROS_PER_DAY * (days - truncatedDays)
+    val truncatedDays = Math.toIntExact(daysWithFraction.toLong)
+    val micros = microsWithFraction + MICROS_PER_DAY * (daysWithFraction - truncatedDays)
     new CalendarInterval(truncatedMonths, truncatedDays, micros.round)
   }
 
   /**
+   * Makes an interval from months, days and micros with the fractional part.
+   * The overflow style here follows the way of casting [[java.lang.Double]] to integrals and the
+   * natural rules for intervals as defined in the Gregorian calendar. Thus, the days fraction
+   * will be added to microseconds but the months fraction will not be added to days, and there may
+   * be rounding or truncation in months(or day and microseconds) part.
+   */
+  private def safeFromDoubles(
+      monthsWithFraction: Double,
+      daysWithFraction: Double,
+      microsWithFraction: Double): CalendarInterval = {
+    val truncatedDays = daysWithFraction.toInt
+    val micros = microsWithFraction + MICROS_PER_DAY * (daysWithFraction - truncatedDays)
+    new CalendarInterval(monthsWithFraction.toInt, truncatedDays, micros.round)
+  }
+
+  /**
    * Unary minus, return the negated the calendar interval value.
    *
    * @throws ArithmeticException if the result overflows any field value
@@ -486,6 +502,13 @@ object IntervalUtils {
 
   /**
    * Return a new calendar interval instance of the left interval times a multiplier.
+   */
+  def multiply(interval: CalendarInterval, num: Double): CalendarInterval = {
+    safeFromDoubles(num * interval.months, num * interval.days, num * interval.microseconds)
+  }
+
+  /**
+   * Return a new calendar interval instance of the left interval times a multiplier.
    *
    * @throws ArithmeticException if the result overflows any field value
    */
@@ -495,6 +518,14 @@ object IntervalUtils {
 
   /**
    * Return a new calendar interval instance of the left interval divides by a dividend.
+   */
+  def divide(interval: CalendarInterval, num: Double): CalendarInterval = {
+    if (num == 0) return null
+    safeFromDoubles(interval.months / num, interval.days / num, interval.microseconds / num)
+  }
+
+  /**
+   * Return a new calendar interval instance of the left interval divides by a dividend.
    *
    * @throws ArithmeticException if the result overflows any field value or divided by zero
    */
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/IntervalExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/IntervalExpressionsSuite.scala
index bc57438..e591c49 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/IntervalExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/IntervalExpressionsSuite.scala
@@ -199,11 +199,16 @@ class IntervalExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
   }
 
   test("multiply") {
-    def check(interval: String, num: Double, expected: String): Unit = {
-      val expr = MultiplyInterval(Literal(stringToInterval(interval)), Literal(num))
+    def check(
+        interval: String,
+        num: Double,
+        expected: String,
+        isAnsi: Option[Boolean] = None): Unit = {
       val expectedRes = safeStringToInterval(expected)
-      Seq("true", "false").foreach { v =>
+      val configs = if (isAnsi.isEmpty) Seq("true", "false") else isAnsi.map(_.toString).toSeq
+      configs.foreach { v =>
         withSQLConf(SQLConf.ANSI_ENABLED.key -> v) {
+          val expr = MultiplyInterval(Literal(stringToInterval(interval)), Literal(num))
           if (expectedRes == null) {
             checkExceptionInExpression[ArithmeticException](expr, expected)
           } else {
@@ -220,17 +225,23 @@ class IntervalExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     check("1 year 1 second", 0.5, "6 months 500 milliseconds")
     check("-100 years -1 millisecond", 0.5, "-50 years -500 microseconds")
     check("2 months 4 seconds", -0.5, "-1 months -2 seconds")
-    check("1 month 2 microseconds", 1.5, "1 months 15 days 3 microseconds")
-    check("2 months", Int.MaxValue, "integer overflow")
+    check("1 month 2 microseconds", 1.5, "1 months 3 microseconds")
+    check("2 months", Int.MaxValue, "integer overflow", Some(true))
+    check("2 months", Int.MaxValue, Int.MaxValue + " months", Some(false))
   }
 
   test("divide") {
-    def check(interval: String, num: Double, expected: String): Unit = {
-      val expr = DivideInterval(Literal(stringToInterval(interval)), Literal(num))
+    def check(
+        interval: String,
+        num: Double,
+        expected: String,
+        isAnsi: Option[Boolean] = None): Unit = {
       val expectedRes = safeStringToInterval(expected)
-      Seq("true", "false").foreach { v =>
+      val configs = if (isAnsi.isEmpty) Seq("true", "false") else isAnsi.map(_.toString).toSeq
+      configs.foreach { v =>
         withSQLConf(SQLConf.ANSI_ENABLED.key -> v) {
-          if (expectedRes == null) {
+          val expr = DivideInterval(Literal(stringToInterval(interval)), Literal(num))
+          if (expected != null && expectedRes == null) {
             checkExceptionInExpression[ArithmeticException](expr, expected)
           } else {
             checkEvaluation(expr, expectedRes)
@@ -245,9 +256,11 @@ class IntervalExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     check("6 years -7 seconds", 3, "2 years -2.333333 seconds")
     check("2 years -8 seconds", 0.5, "4 years -16 seconds")
     check("-1 month 2 microseconds", -0.25, "4 months -8 microseconds")
-    check("1 month 3 microsecond", 1.5, "20 days 2 microseconds")
-    check("1 second", 0, "divide by zero")
-    check(s"${Int.MaxValue} months", 0.9, "integer overflow")
+    check("1 month 3 microsecond", 1.5, "2 microseconds")
+    check("1 second", 0, "divide by zero", Some(true))
+    check("1 second", 0, null, Some(false))
+    check(s"${Int.MaxValue} months", 0.9, "integer overflow", Some(true))
+    check(s"${Int.MaxValue} months", 0.9, Int.MaxValue + " months", Some(false))
   }
 
   test("make interval") {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/IntervalUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/IntervalUtilsSuite.scala
index d0d79ac..e7c3163 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/IntervalUtilsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/IntervalUtilsSuite.scala
@@ -268,19 +268,23 @@ class IntervalUtilsSuite extends SparkFunSuite with SQLHelper {
   }
 
   test("multiply by num") {
-    var interval = new CalendarInterval(0, 0, 0)
-    assert(interval === multiplyExact(interval, 0))
-    interval = new CalendarInterval(123, 456, 789)
-    assert(new CalendarInterval(123 * 42, 456 * 42, 789 * 42) === multiplyExact(interval, 42))
-    interval = new CalendarInterval(-123, -456, -789)
-    assert(new CalendarInterval(-123 * 42, -456 * 42, -789 * 42) === multiplyExact(interval, 42))
-    assert(new CalendarInterval(1, 22, 12 * MICROS_PER_HOUR) ===
-      multiplyExact(new CalendarInterval(1, 5, 0), 1.5))
-    assert(new CalendarInterval(2, 14, 12 * MICROS_PER_HOUR) ===
-      multiplyExact(new CalendarInterval(2, 2, 2 * MICROS_PER_HOUR), 1.2))
+    Seq[(CalendarInterval, Double) => CalendarInterval](multiply, multiplyExact).foreach { func =>
+      var interval = new CalendarInterval(0, 0, 0)
+      assert(interval === func(interval, 0))
+      interval = new CalendarInterval(123, 456, 789)
+      assert(new CalendarInterval(123 * 42, 456 * 42, 789 * 42) === func(interval, 42))
+      interval = new CalendarInterval(-123, -456, -789)
+      assert(new CalendarInterval(-123 * 42, -456 * 42, -789 * 42) === func(interval, 42))
+      interval = new CalendarInterval(1, 5, 0)
+      assert(new CalendarInterval(1, 7, 12 * MICROS_PER_HOUR) === func(interval, 1.5))
+      interval = new CalendarInterval(2, 2, 2 * MICROS_PER_HOUR)
+      assert(new CalendarInterval(2, 2, 12 * MICROS_PER_HOUR) === func(interval, 1.2))
+    }
 
+    val interval = new CalendarInterval(2, 0, 0)
+    assert(multiply(interval, Integer.MAX_VALUE) === new CalendarInterval(Int.MaxValue, 0, 0))
     try {
-      multiplyExact(new CalendarInterval(2, 0, 0), Integer.MAX_VALUE)
+      multiplyExact(interval, Integer.MAX_VALUE)
       fail("Expected to throw an exception on months overflow")
     } catch {
       case e: ArithmeticException => assert(e.getMessage.contains("overflow"))
@@ -288,17 +292,32 @@ class IntervalUtilsSuite extends SparkFunSuite with SQLHelper {
   }
 
   test("divide by num") {
-    var interval = new CalendarInterval(0, 0, 0)
-    assert(interval === divideExact(interval, 10))
-    interval = new CalendarInterval(1, 3, 30 * MICROS_PER_SECOND)
-    assert(new CalendarInterval(0, 16, 12 * MICROS_PER_HOUR + 15 * MICROS_PER_SECOND) ===
-      divideExact(interval, 2))
-    assert(new CalendarInterval(2, 6, MICROS_PER_MINUTE) === divideExact(interval, 0.5))
-    interval = new CalendarInterval(-1, 0, -30 * MICROS_PER_SECOND)
-    assert(new CalendarInterval(0, -15, -15 * MICROS_PER_SECOND) === divideExact(interval, 2))
-    assert(new CalendarInterval(-2, 0, -1 * MICROS_PER_MINUTE) === divideExact(interval, 0.5))
+    Seq[(CalendarInterval, Double) => CalendarInterval](divide, divideExact).foreach { func =>
+      var interval = new CalendarInterval(0, 0, 0)
+      assert(interval === func(interval, 10))
+      interval = new CalendarInterval(1, 3, 30 * MICROS_PER_SECOND)
+      assert(new CalendarInterval(0, 1, 12 * MICROS_PER_HOUR + 15 * MICROS_PER_SECOND) ===
+        func(interval, 2))
+      assert(new CalendarInterval(2, 6, MICROS_PER_MINUTE) === func(interval, 0.5))
+      interval = new CalendarInterval(-1, 0, -30 * MICROS_PER_SECOND)
+      assert(new CalendarInterval(0, 0, -15 * MICROS_PER_SECOND) === func(interval, 2))
+      assert(new CalendarInterval(-2, 0, -MICROS_PER_MINUTE) === func(interval, 0.5))
+    }
+
+    var interval = new CalendarInterval(Int.MaxValue, Int.MaxValue, 0)
+    assert(divide(interval, 0.9) === new CalendarInterval(Int.MaxValue, Int.MaxValue,
+      ((Int.MaxValue / 9.0) * MICROS_PER_DAY).round))
+    try {
+      divideExact(interval, 0.9)
+      fail("Expected to throw an exception on integer overflow")
+    } catch {
+      case e: ArithmeticException => assert(e.getMessage.contains("integer overflow"))
+    }
+
+    interval = new CalendarInterval(123, 456, 789)
+    assert(divide(interval, 0) === null)
     try {
-      divideExact(new CalendarInterval(123, 456, 789), 0)
+      divideExact(interval, 0)
       fail("Expected to throw an exception on divide by zero")
     } catch {
       case e: ArithmeticException => assert(e.getMessage.contains("divide by zero"))
diff --git a/sql/core/src/test/resources/sql-tests/inputs/interval.sql b/sql/core/src/test/resources/sql-tests/inputs/interval.sql
index facd632..4f26e75 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/interval.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/interval.sql
@@ -4,6 +4,10 @@
 select 3 * (timestamp'2019-10-15 10:11:12.001002' - date'2019-10-15');
 select interval 4 month 2 weeks 3 microseconds * 1.5;
 select (timestamp'2019-10-15' - timestamp'2019-10-14') / 1.5;
+select interval 2147483647 month * 2;
+select interval 2147483647 month / 0.5;
+select interval 2147483647 day * 2;
+select interval 2147483647 day / 0.5;
 
 -- interval operation with null and zero case
 select interval '2 seconds' / 0;
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out
index 4b46540..4a41dd6 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 81
+-- Number of queries: 85
 
 
 -- !query
@@ -27,6 +27,42 @@ struct<divide_interval(subtracttimestamps(TIMESTAMP '2019-10-15 00:00:00', TIMES
 
 
 -- !query
+select interval 2147483647 month * 2
+-- !query schema
+struct<>
+-- !query output
+java.lang.ArithmeticException
+integer overflow
+
+
+-- !query
+select interval 2147483647 month / 0.5
+-- !query schema
+struct<>
+-- !query output
+java.lang.ArithmeticException
+integer overflow
+
+
+-- !query
+select interval 2147483647 day * 2
+-- !query schema
+struct<>
+-- !query output
+java.lang.ArithmeticException
+integer overflow
+
+
+-- !query
+select interval 2147483647 day / 0.5
+-- !query schema
+struct<>
+-- !query output
+java.lang.ArithmeticException
+integer overflow
+
+
+-- !query
 select interval '2 seconds' / 0
 -- !query schema
 struct<>
diff --git a/sql/core/src/test/resources/sql-tests/results/interval.sql.out b/sql/core/src/test/resources/sql-tests/results/interval.sql.out
index 0509594..f1af335 100644
--- a/sql/core/src/test/resources/sql-tests/results/interval.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/interval.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 81
+-- Number of queries: 85
 
 
 -- !query
@@ -27,12 +27,43 @@ struct<divide_interval(subtracttimestamps(TIMESTAMP '2019-10-15 00:00:00', TIMES
 
 
 -- !query
+select interval 2147483647 month * 2
+-- !query schema
+struct<multiply_interval(INTERVAL '178956970 years 7 months', CAST(2 AS DOUBLE)):interval>
+-- !query output
+178956970 years 7 months
+
+
+-- !query
+select interval 2147483647 month / 0.5
+-- !query schema
+struct<divide_interval(INTERVAL '178956970 years 7 months', CAST(0.5 AS DOUBLE)):interval>
+-- !query output
+178956970 years 7 months
+
+
+-- !query
+select interval 2147483647 day * 2
+-- !query schema
+struct<multiply_interval(INTERVAL '2147483647 days', CAST(2 AS DOUBLE)):interval>
+-- !query output
+2147483647 days 2562047788 hours 54.775807 seconds
+
+
+-- !query
+select interval 2147483647 day / 0.5
+-- !query schema
+struct<divide_interval(INTERVAL '2147483647 days', CAST(0.5 AS DOUBLE)):interval>
+-- !query output
+2147483647 days 2562047788 hours 54.775807 seconds
+
+
+-- !query
 select interval '2 seconds' / 0
 -- !query schema
-struct<>
+struct<divide_interval(INTERVAL '2 seconds', CAST(0 AS DOUBLE)):interval>
 -- !query output
-java.lang.ArithmeticException
-divide by zero
+NULL
 
 
 -- !query
@@ -779,19 +810,17 @@ struct<(b + INTERVAL '1 months'):interval>
 -- !query
 select a * 1.1 from values (interval '-2147483648 months', interval '2147483647 months') t(a, b)
 -- !query schema
-struct<>
+struct<multiply_interval(a, CAST(1.1 AS DOUBLE)):interval>
 -- !query output
-java.lang.ArithmeticException
-integer overflow
+-178956970 years -8 months
 
 
 -- !query
 select a / 0.5 from values (interval '-2147483648 months', interval '2147483647 months') t(a, b)
 -- !query schema
-struct<>
+struct<divide_interval(a, CAST(0.5 AS DOUBLE)):interval>
 -- !query output
-java.lang.ArithmeticException
-integer overflow
+-178956970 years -8 months
 
 
 -- !query


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org