You are viewing a plain text version of this content. The canonical link for it is here.
Posted to reviews@spark.apache.org by GitBox <gi...@apache.org> on 2020/07/08 04:22:31 UTC

[GitHub] [spark] viirya commented on a change in pull request #29026: [SPARK-28067][SPARK-32018] Fix decimal overflow issues

viirya commented on a change in pull request #29026:
URL: https://github.com/apache/spark/pull/29026#discussion_r451274175



##########
File path: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala
##########
@@ -58,39 +58,50 @@ case class Sum(child: Expression) extends DeclarativeAggregate with ImplicitCast
     case _ => DoubleType
   }
 
-  private lazy val sumDataType = resultType
-
-  private lazy val sum = AttributeReference("sum", sumDataType)()
+  private lazy val sum = AttributeReference("sum", resultType)()
 
   private lazy val isEmpty = AttributeReference("isEmpty", BooleanType, nullable = false)()
 
-  private lazy val zero = Literal.default(sumDataType)
+  private lazy val zero = Literal.default(resultType)
 
   override lazy val aggBufferAttributes = resultType match {
     case _: DecimalType => sum :: isEmpty :: Nil
     case _ => sum :: Nil
   }
 
   override lazy val initialValues: Seq[Expression] = resultType match {
-    case _: DecimalType => Seq(Literal(null, resultType), Literal(true, BooleanType))
+    case _: DecimalType => Seq(zero, Literal(true, BooleanType))
     case _ => Seq(Literal(null, resultType))
   }
 
   override lazy val updateExpressions: Seq[Expression] = {
-    if (child.nullable) {
-      val updateSumExpr = coalesce(coalesce(sum, zero) + child.cast(sumDataType), sum)
-      resultType match {
-        case _: DecimalType =>
-          Seq(updateSumExpr, isEmpty && child.isNull)
-        case _ => Seq(updateSumExpr)
-      }
-    } else {
-      val updateSumExpr = coalesce(sum, zero) + child.cast(sumDataType)
-      resultType match {
-        case _: DecimalType =>
-          Seq(updateSumExpr, Literal(false, BooleanType))
-        case _ => Seq(updateSumExpr)
-      }
+    resultType match {
+      case _: DecimalType =>
+        // For decimal type, the initial value of `sum` is 0. We need to keep `sum` unchanged if
+        // the input is null, as SUM function ignores null input. The `sum` can only be null if
+        // overflow happens under non-ansi mode.

Review comment:
       Do you mean `checkOverflow` in `Add` expression? If it is enabled, for `DecimalType`, `DecimalExactNumeric` is used for `plus` operation. But I don't see it has overrided `plus` behavior to check overflow.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org