You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2020/02/25 14:34:31 UTC
[spark] branch branch-3.0 updated: [SPARK-30919][SQL] Make interval
multiply and divide's overflow behavior consistent with other operations
This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.0 by this push:
new 1d746eb [SPARK-30919][SQL] Make interval multiply and divide's overflow behavior consistent with other operations
1d746eb is described below
commit 1d746eb0afddd3c2a4e1313dddf80ac0aec00a7a
Author: Kent Yao <ya...@hotmail.com>
AuthorDate: Tue Feb 25 22:19:24 2020 +0800
[SPARK-30919][SQL] Make interval multiply and divide's overflow behavior consistent with other operations
### What changes were proposed in this pull request?
The current behavior of interval multiply and divide follows the ANSI SQL standard when overflow, it is compatible with other operations when `spark.sql.ansi.enabled` is true, but not compatible when `spark.sql.ansi.enabled` is false.
When `spark.sql.ansi.enabled` is false, as the factor is a double value, so it should use java's rounding or truncation behavior for casting double to integrals. when divided by zero, it returns `null`. we also follow the natural rules for intervals as defined in the Gregorian calendar, so we do not add the month fraction to days but add days fraction to microseconds.
### Why are the changes needed?
Make interval multiply and divide's overflow behavior consistent with other interval operations
### Does this PR introduce any user-facing change?
no, these are new features in 3.0
### How was this patch tested?
add uts
Closes #27672 from yaooqinn/SPARK-30919.
Authored-by: Kent Yao <ya...@hotmail.com>
Signed-off-by: Wenchen Fan <we...@databricks.com>
(cherry picked from commit 761209c1f2af513a9db2e08c5937531cff7aeeed)
Signed-off-by: Wenchen Fan <we...@databricks.com>
---
.../catalyst/expressions/intervalExpressions.scala | 34 +++++++++---
.../spark/sql/catalyst/util/IntervalUtils.scala | 45 +++++++++++++---
.../expressions/IntervalExpressionsSuite.scala | 37 ++++++++-----
.../sql/catalyst/util/IntervalUtilsSuite.scala | 61 ++++++++++++++--------
.../test/resources/sql-tests/inputs/interval.sql | 4 ++
.../sql-tests/results/ansi/interval.sql.out | 38 +++++++++++++-
.../resources/sql-tests/results/interval.sql.out | 49 +++++++++++++----
7 files changed, 210 insertions(+), 58 deletions(-)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/intervalExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/intervalExpressions.scala
index 831510e..c09350f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/intervalExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/intervalExpressions.scala
@@ -22,6 +22,7 @@ import java.util.Locale
import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode}
import org.apache.spark.sql.catalyst.util.IntervalUtils
import org.apache.spark.sql.catalyst.util.IntervalUtils._
+import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.types._
import org.apache.spark.unsafe.types.CalendarInterval
@@ -112,13 +113,14 @@ object ExtractIntervalPart {
abstract class IntervalNumOperation(
interval: Expression,
- num: Expression,
- operation: (CalendarInterval, Double) => CalendarInterval,
- operationName: String)
+ num: Expression)
extends BinaryExpression with ImplicitCastInputTypes with Serializable {
override def left: Expression = interval
override def right: Expression = num
+ protected val operation: (CalendarInterval, Double) => CalendarInterval
+ protected def operationName: String
+
override def inputTypes: Seq[AbstractDataType] = Seq(CalendarIntervalType, DoubleType)
override def dataType: DataType = CalendarIntervalType
@@ -136,11 +138,29 @@ abstract class IntervalNumOperation(
override def prettyName: String = operationName.stripSuffix("Exact") + "_interval"
}
-case class MultiplyInterval(interval: Expression, num: Expression)
- extends IntervalNumOperation(interval, num, multiplyExact, "multiplyExact")
+case class MultiplyInterval(
+ interval: Expression,
+ num: Expression,
+ checkOverflow: Boolean = SQLConf.get.ansiEnabled)
+ extends IntervalNumOperation(interval, num) {
+
+ override protected val operation: (CalendarInterval, Double) => CalendarInterval =
+ if (checkOverflow) multiplyExact else multiply
+
+ override protected def operationName: String = if (checkOverflow) "multiplyExact" else "multiply"
+}
+
+case class DivideInterval(
+ interval: Expression,
+ num: Expression,
+ checkOverflow: Boolean = SQLConf.get.ansiEnabled)
+ extends IntervalNumOperation(interval, num) {
+
+ override protected val operation: (CalendarInterval, Double) => CalendarInterval =
+ if (checkOverflow) divideExact else divide
-case class DivideInterval(interval: Expression, num: Expression)
- extends IntervalNumOperation(interval, num, divideExact, "divideExact")
+ override protected def operationName: String = if (checkOverflow) "divideExact" else "divide"
+}
// scalastyle:off line.size.limit
@ExpressionDescription(
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala
index 2790f8e..0a13ec8 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala
@@ -405,23 +405,39 @@ object IntervalUtils {
}
/**
- * Makes an interval from months, days and micros with the fractional part by
- * adding the month fraction to days and the days fraction to micros.
- *
- * @throws ArithmeticException if the result overflows any field value
+ * Makes an interval from months, days and micros with the fractional part.
+ * The overflow style here follows the way of ansi sql standard and the natural rules for
+ * intervals as defined in the Gregorian calendar. Thus, the days fraction will be added
+ * to microseconds but the months fraction will not be added to days, and it will throw
+ * exception if any part overflows.
*/
private def fromDoubles(
monthsWithFraction: Double,
daysWithFraction: Double,
microsWithFraction: Double): CalendarInterval = {
val truncatedMonths = Math.toIntExact(monthsWithFraction.toLong)
- val days = daysWithFraction + DAYS_PER_MONTH * (monthsWithFraction - truncatedMonths)
- val truncatedDays = Math.toIntExact(days.toLong)
- val micros = microsWithFraction + MICROS_PER_DAY * (days - truncatedDays)
+ val truncatedDays = Math.toIntExact(daysWithFraction.toLong)
+ val micros = microsWithFraction + MICROS_PER_DAY * (daysWithFraction - truncatedDays)
new CalendarInterval(truncatedMonths, truncatedDays, micros.round)
}
/**
+ * Makes an interval from months, days and micros with the fractional part.
+ * The overflow style here follows the way of casting [[java.lang.Double]] to integrals and the
+ * natural rules for intervals as defined in the Gregorian calendar. Thus, the days fraction
+ * will be added to microseconds but the months fraction will not be added to days, and there may
+ * be rounding or truncation in months(or day and microseconds) part.
+ */
+ private def safeFromDoubles(
+ monthsWithFraction: Double,
+ daysWithFraction: Double,
+ microsWithFraction: Double): CalendarInterval = {
+ val truncatedDays = daysWithFraction.toInt
+ val micros = microsWithFraction + MICROS_PER_DAY * (daysWithFraction - truncatedDays)
+ new CalendarInterval(monthsWithFraction.toInt, truncatedDays, micros.round)
+ }
+
+ /**
* Unary minus, return the negated the calendar interval value.
*
* @throws ArithmeticException if the result overflows any field value
@@ -486,6 +502,13 @@ object IntervalUtils {
/**
* Return a new calendar interval instance of the left interval times a multiplier.
+ */
+ def multiply(interval: CalendarInterval, num: Double): CalendarInterval = {
+ safeFromDoubles(num * interval.months, num * interval.days, num * interval.microseconds)
+ }
+
+ /**
+ * Return a new calendar interval instance of the left interval times a multiplier.
*
* @throws ArithmeticException if the result overflows any field value
*/
@@ -495,6 +518,14 @@ object IntervalUtils {
/**
* Return a new calendar interval instance of the left interval divides by a dividend.
+ */
+ def divide(interval: CalendarInterval, num: Double): CalendarInterval = {
+ if (num == 0) return null
+ safeFromDoubles(interval.months / num, interval.days / num, interval.microseconds / num)
+ }
+
+ /**
+ * Return a new calendar interval instance of the left interval divides by a dividend.
*
* @throws ArithmeticException if the result overflows any field value or divided by zero
*/
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/IntervalExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/IntervalExpressionsSuite.scala
index bc57438..e591c49 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/IntervalExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/IntervalExpressionsSuite.scala
@@ -199,11 +199,16 @@ class IntervalExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
}
test("multiply") {
- def check(interval: String, num: Double, expected: String): Unit = {
- val expr = MultiplyInterval(Literal(stringToInterval(interval)), Literal(num))
+ def check(
+ interval: String,
+ num: Double,
+ expected: String,
+ isAnsi: Option[Boolean] = None): Unit = {
val expectedRes = safeStringToInterval(expected)
- Seq("true", "false").foreach { v =>
+ val configs = if (isAnsi.isEmpty) Seq("true", "false") else isAnsi.map(_.toString).toSeq
+ configs.foreach { v =>
withSQLConf(SQLConf.ANSI_ENABLED.key -> v) {
+ val expr = MultiplyInterval(Literal(stringToInterval(interval)), Literal(num))
if (expectedRes == null) {
checkExceptionInExpression[ArithmeticException](expr, expected)
} else {
@@ -220,17 +225,23 @@ class IntervalExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
check("1 year 1 second", 0.5, "6 months 500 milliseconds")
check("-100 years -1 millisecond", 0.5, "-50 years -500 microseconds")
check("2 months 4 seconds", -0.5, "-1 months -2 seconds")
- check("1 month 2 microseconds", 1.5, "1 months 15 days 3 microseconds")
- check("2 months", Int.MaxValue, "integer overflow")
+ check("1 month 2 microseconds", 1.5, "1 months 3 microseconds")
+ check("2 months", Int.MaxValue, "integer overflow", Some(true))
+ check("2 months", Int.MaxValue, Int.MaxValue + " months", Some(false))
}
test("divide") {
- def check(interval: String, num: Double, expected: String): Unit = {
- val expr = DivideInterval(Literal(stringToInterval(interval)), Literal(num))
+ def check(
+ interval: String,
+ num: Double,
+ expected: String,
+ isAnsi: Option[Boolean] = None): Unit = {
val expectedRes = safeStringToInterval(expected)
- Seq("true", "false").foreach { v =>
+ val configs = if (isAnsi.isEmpty) Seq("true", "false") else isAnsi.map(_.toString).toSeq
+ configs.foreach { v =>
withSQLConf(SQLConf.ANSI_ENABLED.key -> v) {
- if (expectedRes == null) {
+ val expr = DivideInterval(Literal(stringToInterval(interval)), Literal(num))
+ if (expected != null && expectedRes == null) {
checkExceptionInExpression[ArithmeticException](expr, expected)
} else {
checkEvaluation(expr, expectedRes)
@@ -245,9 +256,11 @@ class IntervalExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
check("6 years -7 seconds", 3, "2 years -2.333333 seconds")
check("2 years -8 seconds", 0.5, "4 years -16 seconds")
check("-1 month 2 microseconds", -0.25, "4 months -8 microseconds")
- check("1 month 3 microsecond", 1.5, "20 days 2 microseconds")
- check("1 second", 0, "divide by zero")
- check(s"${Int.MaxValue} months", 0.9, "integer overflow")
+ check("1 month 3 microsecond", 1.5, "2 microseconds")
+ check("1 second", 0, "divide by zero", Some(true))
+ check("1 second", 0, null, Some(false))
+ check(s"${Int.MaxValue} months", 0.9, "integer overflow", Some(true))
+ check(s"${Int.MaxValue} months", 0.9, Int.MaxValue + " months", Some(false))
}
test("make interval") {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/IntervalUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/IntervalUtilsSuite.scala
index d0d79ac..e7c3163 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/IntervalUtilsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/IntervalUtilsSuite.scala
@@ -268,19 +268,23 @@ class IntervalUtilsSuite extends SparkFunSuite with SQLHelper {
}
test("multiply by num") {
- var interval = new CalendarInterval(0, 0, 0)
- assert(interval === multiplyExact(interval, 0))
- interval = new CalendarInterval(123, 456, 789)
- assert(new CalendarInterval(123 * 42, 456 * 42, 789 * 42) === multiplyExact(interval, 42))
- interval = new CalendarInterval(-123, -456, -789)
- assert(new CalendarInterval(-123 * 42, -456 * 42, -789 * 42) === multiplyExact(interval, 42))
- assert(new CalendarInterval(1, 22, 12 * MICROS_PER_HOUR) ===
- multiplyExact(new CalendarInterval(1, 5, 0), 1.5))
- assert(new CalendarInterval(2, 14, 12 * MICROS_PER_HOUR) ===
- multiplyExact(new CalendarInterval(2, 2, 2 * MICROS_PER_HOUR), 1.2))
+ Seq[(CalendarInterval, Double) => CalendarInterval](multiply, multiplyExact).foreach { func =>
+ var interval = new CalendarInterval(0, 0, 0)
+ assert(interval === func(interval, 0))
+ interval = new CalendarInterval(123, 456, 789)
+ assert(new CalendarInterval(123 * 42, 456 * 42, 789 * 42) === func(interval, 42))
+ interval = new CalendarInterval(-123, -456, -789)
+ assert(new CalendarInterval(-123 * 42, -456 * 42, -789 * 42) === func(interval, 42))
+ interval = new CalendarInterval(1, 5, 0)
+ assert(new CalendarInterval(1, 7, 12 * MICROS_PER_HOUR) === func(interval, 1.5))
+ interval = new CalendarInterval(2, 2, 2 * MICROS_PER_HOUR)
+ assert(new CalendarInterval(2, 2, 12 * MICROS_PER_HOUR) === func(interval, 1.2))
+ }
+ val interval = new CalendarInterval(2, 0, 0)
+ assert(multiply(interval, Integer.MAX_VALUE) === new CalendarInterval(Int.MaxValue, 0, 0))
try {
- multiplyExact(new CalendarInterval(2, 0, 0), Integer.MAX_VALUE)
+ multiplyExact(interval, Integer.MAX_VALUE)
fail("Expected to throw an exception on months overflow")
} catch {
case e: ArithmeticException => assert(e.getMessage.contains("overflow"))
@@ -288,17 +292,32 @@ class IntervalUtilsSuite extends SparkFunSuite with SQLHelper {
}
test("divide by num") {
- var interval = new CalendarInterval(0, 0, 0)
- assert(interval === divideExact(interval, 10))
- interval = new CalendarInterval(1, 3, 30 * MICROS_PER_SECOND)
- assert(new CalendarInterval(0, 16, 12 * MICROS_PER_HOUR + 15 * MICROS_PER_SECOND) ===
- divideExact(interval, 2))
- assert(new CalendarInterval(2, 6, MICROS_PER_MINUTE) === divideExact(interval, 0.5))
- interval = new CalendarInterval(-1, 0, -30 * MICROS_PER_SECOND)
- assert(new CalendarInterval(0, -15, -15 * MICROS_PER_SECOND) === divideExact(interval, 2))
- assert(new CalendarInterval(-2, 0, -1 * MICROS_PER_MINUTE) === divideExact(interval, 0.5))
+ Seq[(CalendarInterval, Double) => CalendarInterval](divide, divideExact).foreach { func =>
+ var interval = new CalendarInterval(0, 0, 0)
+ assert(interval === func(interval, 10))
+ interval = new CalendarInterval(1, 3, 30 * MICROS_PER_SECOND)
+ assert(new CalendarInterval(0, 1, 12 * MICROS_PER_HOUR + 15 * MICROS_PER_SECOND) ===
+ func(interval, 2))
+ assert(new CalendarInterval(2, 6, MICROS_PER_MINUTE) === func(interval, 0.5))
+ interval = new CalendarInterval(-1, 0, -30 * MICROS_PER_SECOND)
+ assert(new CalendarInterval(0, 0, -15 * MICROS_PER_SECOND) === func(interval, 2))
+ assert(new CalendarInterval(-2, 0, -MICROS_PER_MINUTE) === func(interval, 0.5))
+ }
+
+ var interval = new CalendarInterval(Int.MaxValue, Int.MaxValue, 0)
+ assert(divide(interval, 0.9) === new CalendarInterval(Int.MaxValue, Int.MaxValue,
+ ((Int.MaxValue / 9.0) * MICROS_PER_DAY).round))
+ try {
+ divideExact(interval, 0.9)
+ fail("Expected to throw an exception on integer overflow")
+ } catch {
+ case e: ArithmeticException => assert(e.getMessage.contains("integer overflow"))
+ }
+
+ interval = new CalendarInterval(123, 456, 789)
+ assert(divide(interval, 0) === null)
try {
- divideExact(new CalendarInterval(123, 456, 789), 0)
+ divideExact(interval, 0)
fail("Expected to throw an exception on divide by zero")
} catch {
case e: ArithmeticException => assert(e.getMessage.contains("divide by zero"))
diff --git a/sql/core/src/test/resources/sql-tests/inputs/interval.sql b/sql/core/src/test/resources/sql-tests/inputs/interval.sql
index facd632..4f26e75 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/interval.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/interval.sql
@@ -4,6 +4,10 @@
select 3 * (timestamp'2019-10-15 10:11:12.001002' - date'2019-10-15');
select interval 4 month 2 weeks 3 microseconds * 1.5;
select (timestamp'2019-10-15' - timestamp'2019-10-14') / 1.5;
+select interval 2147483647 month * 2;
+select interval 2147483647 month / 0.5;
+select interval 2147483647 day * 2;
+select interval 2147483647 day / 0.5;
-- interval operation with null and zero case
select interval '2 seconds' / 0;
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out
index 4b46540..4a41dd6 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out
@@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
--- Number of queries: 81
+-- Number of queries: 85
-- !query
@@ -27,6 +27,42 @@ struct<divide_interval(subtracttimestamps(TIMESTAMP '2019-10-15 00:00:00', TIMES
-- !query
+select interval 2147483647 month * 2
+-- !query schema
+struct<>
+-- !query output
+java.lang.ArithmeticException
+integer overflow
+
+
+-- !query
+select interval 2147483647 month / 0.5
+-- !query schema
+struct<>
+-- !query output
+java.lang.ArithmeticException
+integer overflow
+
+
+-- !query
+select interval 2147483647 day * 2
+-- !query schema
+struct<>
+-- !query output
+java.lang.ArithmeticException
+integer overflow
+
+
+-- !query
+select interval 2147483647 day / 0.5
+-- !query schema
+struct<>
+-- !query output
+java.lang.ArithmeticException
+integer overflow
+
+
+-- !query
select interval '2 seconds' / 0
-- !query schema
struct<>
diff --git a/sql/core/src/test/resources/sql-tests/results/interval.sql.out b/sql/core/src/test/resources/sql-tests/results/interval.sql.out
index 0509594..f1af335 100644
--- a/sql/core/src/test/resources/sql-tests/results/interval.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/interval.sql.out
@@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
--- Number of queries: 81
+-- Number of queries: 85
-- !query
@@ -27,12 +27,43 @@ struct<divide_interval(subtracttimestamps(TIMESTAMP '2019-10-15 00:00:00', TIMES
-- !query
+select interval 2147483647 month * 2
+-- !query schema
+struct<multiply_interval(INTERVAL '178956970 years 7 months', CAST(2 AS DOUBLE)):interval>
+-- !query output
+178956970 years 7 months
+
+
+-- !query
+select interval 2147483647 month / 0.5
+-- !query schema
+struct<divide_interval(INTERVAL '178956970 years 7 months', CAST(0.5 AS DOUBLE)):interval>
+-- !query output
+178956970 years 7 months
+
+
+-- !query
+select interval 2147483647 day * 2
+-- !query schema
+struct<multiply_interval(INTERVAL '2147483647 days', CAST(2 AS DOUBLE)):interval>
+-- !query output
+2147483647 days 2562047788 hours 54.775807 seconds
+
+
+-- !query
+select interval 2147483647 day / 0.5
+-- !query schema
+struct<divide_interval(INTERVAL '2147483647 days', CAST(0.5 AS DOUBLE)):interval>
+-- !query output
+2147483647 days 2562047788 hours 54.775807 seconds
+
+
+-- !query
select interval '2 seconds' / 0
-- !query schema
-struct<>
+struct<divide_interval(INTERVAL '2 seconds', CAST(0 AS DOUBLE)):interval>
-- !query output
-java.lang.ArithmeticException
-divide by zero
+NULL
-- !query
@@ -779,19 +810,17 @@ struct<(b + INTERVAL '1 months'):interval>
-- !query
select a * 1.1 from values (interval '-2147483648 months', interval '2147483647 months') t(a, b)
-- !query schema
-struct<>
+struct<multiply_interval(a, CAST(1.1 AS DOUBLE)):interval>
-- !query output
-java.lang.ArithmeticException
-integer overflow
+-178956970 years -8 months
-- !query
select a / 0.5 from values (interval '-2147483648 months', interval '2147483647 months') t(a, b)
-- !query schema
-struct<>
+struct<divide_interval(a, CAST(0.5 AS DOUBLE)):interval>
-- !query output
-java.lang.ArithmeticException
-integer overflow
+-178956970 years -8 months
-- !query
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org