You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ge...@apache.org on 2022/09/08 20:24:08 UTC
[spark] branch branch-3.3 updated: [SPARK-40389][SQL] Decimals can't upcast as integral types if the cast can overflow

This is an automated email from the ASF dual-hosted git repository.

gengliang pushed a commit to branch branch-3.3
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.3 by this push:
     new cd9f5642060 [SPARK-40389][SQL] Decimals can't upcast as integral types if the cast can overflow
cd9f5642060 is described below

commit cd9f5642060cea344b6c84dde19de3e96836da19
Author: Gengliang Wang <ge...@apache.org>
AuthorDate: Thu Sep 8 13:23:06 2022 -0700

    [SPARK-40389][SQL] Decimals can't upcast as integral types if the cast can overflow
    
    ### What changes were proposed in this pull request?
    
    In Spark SQL, the method `canUpCast` returns true iff we can safely up-cast the `from` type to `to` type without truncating or precision loss or possible runtime failures.
    
    Meanwhile, DecimalType(10, 0) is considered as `canUpCast` to Integer type. This is wrong since casting `9000000000BD` as Integer type will overflow.
    
    As a result:
    
    * The optimizer rule `SimplifyCasts` replies on the method `canUpCast` and it will mistakenly convert `cast(cast(9000000000BD as int) as long)` as `cast(9000000000BD as long)`
    * The STRICT store assignment policy relies on this method too. With the policy enabled, inserting `9000000000BD` into integer columns will pass compiling time check and insert an unexpected value `410065408`.
    * etc...
    
    ### Why are the changes needed?
    
    Bug fix on the method `Cast.canUpCast`
    
    ### Does this PR introduce _any_ user-facing change?
    
    Yes, fix a bug on the checking condition of whether a decimal can safely cast as integral types.
    
    ### How was this patch tested?
    
    New UT
    
    Closes #37832 from gengliangwang/SPARK-40389.
    
    Authored-by: Gengliang Wang <ge...@apache.org>
    Signed-off-by: Gengliang Wang <ge...@apache.org>
    (cherry picked from commit 17982519a749bd4ca2aa7eca12fba00ccc1520aa)
    Signed-off-by: Gengliang Wang <ge...@apache.org>
---
 .../main/scala/org/apache/spark/sql/types/DecimalType.scala  | 11 ++++++-----
 .../spark/sql/catalyst/expressions/CastSuiteBase.scala       |  9 +++++++++
 .../src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala  | 12 ++++++++++++
 3 files changed, 27 insertions(+), 5 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DecimalType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DecimalType.scala
index 08ddd12ef7d..19e7d898d22 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DecimalType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DecimalType.scala
@@ -92,14 +92,15 @@ case class DecimalType(precision: Int, scale: Int) extends FractionalType {
    * Returns whether this DecimalType is tighter than `other`. If yes, it means `this`
    * can be casted into `other` safely without losing any precision or range.
    */
-  private[sql] def isTighterThan(other: DataType): Boolean = isTighterThanInternal(other)
-
-  @tailrec
-  private def isTighterThanInternal(other: DataType): Boolean = other match {
+  private[sql] def isTighterThan(other: DataType): Boolean = other match {
     case dt: DecimalType =>
       (precision - scale) <= (dt.precision - dt.scale) && scale <= dt.scale
     case dt: IntegralType =>
-      isTighterThanInternal(DecimalType.forType(dt))
+      val integerAsDecimal = DecimalType.forType(dt)
+      assert(integerAsDecimal.scale == 0)
+      // If the precision equals `integerAsDecimal.precision`, there can be integer overflow
+      // during casting.
+      precision < integerAsDecimal.precision && scale == 0
     case _ => false
   }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala
index ba8ab708046..8c0467aedd1 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala
@@ -671,6 +671,15 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
     }
   }
 
+  test("SPARK-40389: canUpCast: return false if casting decimal to integral types can cause" +
+    " overflow") {
+    Seq(ByteType, ShortType, IntegerType, LongType).foreach { integralType =>
+      val decimalType = DecimalType.forType(integralType)
+      assert(!Cast.canUpCast(decimalType, integralType))
+      assert(Cast.canUpCast(integralType, decimalType))
+    }
+  }
+
   test("SPARK-27671: cast from nested null type in struct") {
     import DataTypeTestUtils._
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index 6ed6a85b8d1..b23da9282b0 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -4551,6 +4551,18 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
       sql("select * from test_temp_view"),
       Row(1, 2, 3, 1, 2, 3, 1, 1))
   }
+
+  test("SPARK-40389: Don't eliminate a cast which can cause overflow") {
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> "true") {
+      withTable("dt") {
+        sql("create table dt using parquet as select 9000000000BD as d")
+        val msg = intercept[SparkException] {
+          sql("select cast(cast(d as int) as long) from dt").collect()
+        }.getCause.getMessage
+        assert(msg.contains("[CAST_OVERFLOW]"))
+      }
+    }
+  }
 }
 
 case class Foo(bar: Option[String])


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org