You are viewing a plain text version of this content. The canonical link for it is here.
Posted to reviews@spark.apache.org by GitBox <gi...@apache.org> on 2018/12/17 16:12:45 UTC

[GitHub] cloud-fan commented on a change in pull request #23308: [SPARK-26308][SQL] Avoid cast of decimals for ScalaUDF

cloud-fan commented on a change in pull request #23308: [SPARK-26308][SQL] Avoid cast of decimals for ScalaUDF
URL: https://github.com/apache/spark/pull/23308#discussion_r242211223
 
 

 ##########
 File path: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
 ##########
 @@ -879,6 +879,36 @@ object TypeCoercion {
           }
         }
         e.withNewChildren(children)
+      case udf: ScalaUDF if udf.inputTypes.nonEmpty =>
+        val children = udf.children.zip(udf.inputTypes).map { case (in, expected) =>
+          implicitCast(in, scalaUDFExpectedTypes(in.dataType, expected)).getOrElse(in)
+        }
+        udf.withNewChildren(children)
+    }
+
+    private def scalaUDFExpectedTypes(input: DataType, expectedType: DataType): DataType = {
+      (input, expectedType) match {
+        // SPARK-26308: avoid casting to an arbitrary precision and scale for decimals. Please note
+        // that precision and scale cannot be inferred properly for a ScalaUDF because, when it is
+        // created, it is not bound to any column. So here the precision and scale of the input
+        // column is used.
+        case (in: DecimalType, _: DecimalType) => in
+        case (ArrayType(dtIn, _), ArrayType(dtExp, nullableExp)) =>
+          ArrayType(scalaUDFExpectedTypes(dtIn, dtExp), nullableExp)
 
 Review comment:
   what's the current behavior of the udf expect `Array[Int]` but the actual column is `ArrayType(IntegerType, nullable = true)`?

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org