You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ge...@apache.org on 2022/09/08 20:24:08 UTC
[spark] branch branch-3.3 updated: [SPARK-40389][SQL] Decimals can't upcast as integral types if the cast can overflow
This is an automated email from the ASF dual-hosted git repository.
gengliang pushed a commit to branch branch-3.3
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.3 by this push:
new cd9f5642060 [SPARK-40389][SQL] Decimals can't upcast as integral types if the cast can overflow
cd9f5642060 is described below
commit cd9f5642060cea344b6c84dde19de3e96836da19
Author: Gengliang Wang <ge...@apache.org>
AuthorDate: Thu Sep 8 13:23:06 2022 -0700
[SPARK-40389][SQL] Decimals can't upcast as integral types if the cast can overflow
### What changes were proposed in this pull request?
In Spark SQL, the method `canUpCast` returns true iff we can safely up-cast the `from` type to `to` type without truncating or precision loss or possible runtime failures.
Meanwhile, DecimalType(10, 0) is considered as `canUpCast` to Integer type. This is wrong since casting `9000000000BD` as Integer type will overflow.
As a result:
* The optimizer rule `SimplifyCasts` replies on the method `canUpCast` and it will mistakenly convert `cast(cast(9000000000BD as int) as long)` as `cast(9000000000BD as long)`
* The STRICT store assignment policy relies on this method too. With the policy enabled, inserting `9000000000BD` into integer columns will pass compiling time check and insert an unexpected value `410065408`.
* etc...
### Why are the changes needed?
Bug fix on the method `Cast.canUpCast`
### Does this PR introduce _any_ user-facing change?
Yes, fix a bug on the checking condition of whether a decimal can safely cast as integral types.
### How was this patch tested?
New UT
Closes #37832 from gengliangwang/SPARK-40389.
Authored-by: Gengliang Wang <ge...@apache.org>
Signed-off-by: Gengliang Wang <ge...@apache.org>
(cherry picked from commit 17982519a749bd4ca2aa7eca12fba00ccc1520aa)
Signed-off-by: Gengliang Wang <ge...@apache.org>
---
.../main/scala/org/apache/spark/sql/types/DecimalType.scala | 11 ++++++-----
.../spark/sql/catalyst/expressions/CastSuiteBase.scala | 9 +++++++++
.../src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala | 12 ++++++++++++
3 files changed, 27 insertions(+), 5 deletions(-)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DecimalType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DecimalType.scala
index 08ddd12ef7d..19e7d898d22 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DecimalType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DecimalType.scala
@@ -92,14 +92,15 @@ case class DecimalType(precision: Int, scale: Int) extends FractionalType {
* Returns whether this DecimalType is tighter than `other`. If yes, it means `this`
* can be casted into `other` safely without losing any precision or range.
*/
- private[sql] def isTighterThan(other: DataType): Boolean = isTighterThanInternal(other)
-
- @tailrec
- private def isTighterThanInternal(other: DataType): Boolean = other match {
+ private[sql] def isTighterThan(other: DataType): Boolean = other match {
case dt: DecimalType =>
(precision - scale) <= (dt.precision - dt.scale) && scale <= dt.scale
case dt: IntegralType =>
- isTighterThanInternal(DecimalType.forType(dt))
+ val integerAsDecimal = DecimalType.forType(dt)
+ assert(integerAsDecimal.scale == 0)
+ // If the precision equals `integerAsDecimal.precision`, there can be integer overflow
+ // during casting.
+ precision < integerAsDecimal.precision && scale == 0
case _ => false
}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala
index ba8ab708046..8c0467aedd1 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala
@@ -671,6 +671,15 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
}
}
+ test("SPARK-40389: canUpCast: return false if casting decimal to integral types can cause" +
+ " overflow") {
+ Seq(ByteType, ShortType, IntegerType, LongType).foreach { integralType =>
+ val decimalType = DecimalType.forType(integralType)
+ assert(!Cast.canUpCast(decimalType, integralType))
+ assert(Cast.canUpCast(integralType, decimalType))
+ }
+ }
+
test("SPARK-27671: cast from nested null type in struct") {
import DataTypeTestUtils._
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index 6ed6a85b8d1..b23da9282b0 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -4551,6 +4551,18 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
sql("select * from test_temp_view"),
Row(1, 2, 3, 1, 2, 3, 1, 1))
}
+
+ test("SPARK-40389: Don't eliminate a cast which can cause overflow") {
+ withSQLConf(SQLConf.ANSI_ENABLED.key -> "true") {
+ withTable("dt") {
+ sql("create table dt using parquet as select 9000000000BD as d")
+ val msg = intercept[SparkException] {
+ sql("select cast(cast(d as int) as long) from dt").collect()
+ }.getCause.getMessage
+ assert(msg.contains("[CAST_OVERFLOW]"))
+ }
+ }
+ }
}
case class Foo(bar: Option[String])
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org