You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by rx...@apache.org on 2016/01/03 07:31:43 UTC

spark git commit: [SPARK-12599][MLLIB][SQL] Remove the use of callUDF in MLlib

Repository: spark
Updated Branches:
  refs/heads/master 15bd73627 -> 513e3b092


[SPARK-12599][MLLIB][SQL] Remove the use of callUDF in MLlib

callUDF has been deprecated. However, we do not have an alternative for users to specify the output data type without type tags. This pull request introduced a new API for that, and replaces the invocation of the deprecated callUDF with that.

Author: Reynold Xin <rx...@databricks.com>

Closes #10547 from rxin/SPARK-12599.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/513e3b09
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/513e3b09
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/513e3b09

Branch: refs/heads/master
Commit: 513e3b092c4f3d58058ff64c861ea35cfec04205
Parents: 15bd736
Author: Reynold Xin <rx...@databricks.com>
Authored: Sat Jan 2 22:31:39 2016 -0800
Committer: Reynold Xin <rx...@databricks.com>
Committed: Sat Jan 2 22:31:39 2016 -0800

----------------------------------------------------------------------
 .../main/scala/org/apache/spark/ml/Transformer.scala  |  4 ++--
 .../main/scala/org/apache/spark/sql/functions.scala   | 14 ++++++++++++++
 2 files changed, 16 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/513e3b09/mllib/src/main/scala/org/apache/spark/ml/Transformer.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/ml/Transformer.scala b/mllib/src/main/scala/org/apache/spark/ml/Transformer.scala
index 3c7bcf7..1f3325a 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/Transformer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/Transformer.scala
@@ -115,8 +115,8 @@ abstract class UnaryTransformer[IN, OUT, T <: UnaryTransformer[IN, OUT, T]]
 
   override def transform(dataset: DataFrame): DataFrame = {
     transformSchema(dataset.schema, logging = true)
-    dataset.withColumn($(outputCol),
-      callUDF(this.createTransformFunc, outputDataType, dataset($(inputCol))))
+    val transformUDF = udf(this.createTransformFunc, outputDataType)
+    dataset.withColumn($(outputCol), transformUDF(dataset($(inputCol))))
   }
 
   override def copy(extra: ParamMap): T = defaultCopy(extra)

http://git-wip-us.apache.org/repos/asf/spark/blob/513e3b09/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index 97c5aed..3572f3c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -2844,6 +2844,20 @@ object functions extends LegacyFunctions {
   // scalastyle:on line.size.limit
 
   /**
+   * Defines a user-defined function (UDF) using a Scala closure. For this variant, the caller must
+   * specifcy the output data type, and there is no automatic input type coercion.
+   *
+   * @param f  A closure in Scala
+   * @param dataType  The output data type of the UDF
+   *
+   * @group udf_funcs
+   * @since 2.0.0
+   */
+  def udf(f: AnyRef, dataType: DataType): UserDefinedFunction = {
+    UserDefinedFunction(f, dataType, None)
+  }
+
+  /**
    * Call an user-defined function.
    * Example:
    * {{{


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org