You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by sr...@apache.org on 2016/10/07 10:49:40 UTC

spark git commit: [SPARK-16960][SQL] Deprecate approxCountDistinct, toDegrees and toRadians according to FunctionRegistry

Repository: spark
Updated Branches:
  refs/heads/master 24097d847 -> 2b01d3c70


[SPARK-16960][SQL] Deprecate approxCountDistinct, toDegrees and toRadians according to FunctionRegistry

## What changes were proposed in this pull request?

It seems `approxCountDistinct`, `toDegrees` and `toRadians` are also missed while matching the names to the ones in `FunctionRegistry`. (please see [approx_count_distinct](https://github.com/apache/spark/blob/5c2ae79bfcf448d8dc9217efafa1409997c739de/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala#L244), [degrees](https://github.com/apache/spark/blob/5c2ae79bfcf448d8dc9217efafa1409997c739de/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala#L203) and [radians](https://github.com/apache/spark/blob/5c2ae79bfcf448d8dc9217efafa1409997c739de/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala#L222) in `FunctionRegistry`).

I took a scan between `functions.scala` and `FunctionRegistry` and it seems these are all left. For `countDistinct` and `sumDistinct`, they are not registered in `FunctionRegistry`.

This PR deprecates `approxCountDistinct`, `toDegrees` and `toRadians` and introduces `approx_count_distinct`, `degrees` and `radians`.

## How was this patch tested?

Existing tests should cover this.

Author: hyukjinkwon <gu...@gmail.com>
Author: Hyukjin Kwon <gu...@gmail.com>

Closes #14538 from HyukjinKwon/SPARK-16588-followup.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2b01d3c7
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2b01d3c7
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2b01d3c7

Branch: refs/heads/master
Commit: 2b01d3c701c58f07fa42afd570523dd161384882
Parents: 24097d8
Author: hyukjinkwon <gu...@gmail.com>
Authored: Fri Oct 7 11:49:34 2016 +0100
Committer: Sean Owen <so...@cloudera.com>
Committed: Fri Oct 7 11:49:34 2016 +0100

----------------------------------------------------------------------
 python/pyspark/sql/functions.py                 | 33 +++++--
 .../scala/org/apache/spark/sql/functions.scala  | 91 ++++++++++++++++----
 .../apache/spark/sql/DataFrameWindowSuite.scala |  2 +-
 .../apache/spark/sql/MathExpressionsSuite.scala | 12 +--
 4 files changed, 105 insertions(+), 33 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/2b01d3c7/python/pyspark/sql/functions.py
----------------------------------------------------------------------
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index 45d6bf9..7fa3fd2 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -112,11 +112,8 @@ _functions_1_4 = {
     'sinh': 'Computes the hyperbolic sine of the given value.',
     'tan': 'Computes the tangent of the given value.',
     'tanh': 'Computes the hyperbolic tangent of the given value.',
-    'toDegrees': 'Converts an angle measured in radians to an approximately equivalent angle ' +
-                 'measured in degrees.',
-    'toRadians': 'Converts an angle measured in degrees to an approximately equivalent angle ' +
-                 'measured in radians.',
-
+    'toDegrees': '.. note:: Deprecated in 2.1, use degrees instead.',
+    'toRadians': '.. note:: Deprecated in 2.1, use radians instead.',
     'bitwiseNOT': 'Computes bitwise not.',
 }
 
@@ -135,7 +132,15 @@ _functions_1_6 = {
     'kurtosis': 'Aggregate function: returns the kurtosis of the values in a group.',
     'collect_list': 'Aggregate function: returns a list of objects with duplicates.',
     'collect_set': 'Aggregate function: returns a set of objects with duplicate elements' +
-                   ' eliminated.'
+                   ' eliminated.',
+}
+
+_functions_2_1 = {
+    # unary math functions
+    'degrees': 'Converts an angle measured in radians to an approximately equivalent angle ' +
+               'measured in degrees.',
+    'radians': 'Converts an angle measured in degrees to an approximately equivalent angle ' +
+               'measured in radians.',
 }
 
 # math functions that take two arguments as input
@@ -182,21 +187,31 @@ for _name, _doc in _window_functions.items():
     globals()[_name] = since(1.6)(_create_window_function(_name, _doc))
 for _name, _doc in _functions_1_6.items():
     globals()[_name] = since(1.6)(_create_function(_name, _doc))
+for _name, _doc in _functions_2_1.items():
+    globals()[_name] = since(2.1)(_create_function(_name, _doc))
 del _name, _doc
 
 
 @since(1.3)
 def approxCountDistinct(col, rsd=None):
+    """
+    .. note:: Deprecated in 2.1, use approx_count_distinct instead.
+    """
+    return approx_count_distinct(col, rsd)
+
+
+@since(2.1)
+def approx_count_distinct(col, rsd=None):
     """Returns a new :class:`Column` for approximate distinct count of ``col``.
 
-    >>> df.agg(approxCountDistinct(df.age).alias('c')).collect()
+    >>> df.agg(approx_count_distinct(df.age).alias('c')).collect()
     [Row(c=2)]
     """
     sc = SparkContext._active_spark_context
     if rsd is None:
-        jc = sc._jvm.functions.approxCountDistinct(_to_java_column(col))
+        jc = sc._jvm.functions.approx_count_distinct(_to_java_column(col))
     else:
-        jc = sc._jvm.functions.approxCountDistinct(_to_java_column(col), rsd)
+        jc = sc._jvm.functions.approx_count_distinct(_to_java_column(col), rsd)
     return Column(jc)
 
 

http://git-wip-us.apache.org/repos/asf/spark/blob/2b01d3c7/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index 3bc1c5b..40f82d8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -183,12 +183,42 @@ object functions {
   //////////////////////////////////////////////////////////////////////////////////////////////
 
   /**
+   * @group agg_funcs
+   * @since 1.3.0
+   */
+  @deprecated("Use approx_count_distinct", "2.1.0")
+  def approxCountDistinct(e: Column): Column = approx_count_distinct(e)
+
+  /**
+   * @group agg_funcs
+   * @since 1.3.0
+   */
+  @deprecated("Use approx_count_distinct", "2.1.0")
+  def approxCountDistinct(columnName: String): Column = approx_count_distinct(columnName)
+
+  /**
+   * @group agg_funcs
+   * @since 1.3.0
+   */
+  @deprecated("Use approx_count_distinct", "2.1.0")
+  def approxCountDistinct(e: Column, rsd: Double): Column = approx_count_distinct(e, rsd)
+
+  /**
+   * @group agg_funcs
+   * @since 1.3.0
+   */
+  @deprecated("Use approx_count_distinct", "2.1.0")
+  def approxCountDistinct(columnName: String, rsd: Double): Column = {
+    approx_count_distinct(Column(columnName), rsd)
+  }
+
+  /**
    * Aggregate function: returns the approximate number of distinct items in a group.
    *
    * @group agg_funcs
-   * @since 1.3.0
+   * @since 2.1.0
    */
-  def approxCountDistinct(e: Column): Column = withAggregateFunction {
+  def approx_count_distinct(e: Column): Column = withAggregateFunction {
     HyperLogLogPlusPlus(e.expr)
   }
 
@@ -196,9 +226,9 @@ object functions {
    * Aggregate function: returns the approximate number of distinct items in a group.
    *
    * @group agg_funcs
-   * @since 1.3.0
+   * @since 2.1.0
    */
-  def approxCountDistinct(columnName: String): Column = approxCountDistinct(column(columnName))
+  def approx_count_distinct(columnName: String): Column = approx_count_distinct(column(columnName))
 
   /**
    * Aggregate function: returns the approximate number of distinct items in a group.
@@ -206,9 +236,9 @@ object functions {
    * @param rsd maximum estimation error allowed (default = 0.05)
    *
    * @group agg_funcs
-   * @since 1.3.0
+   * @since 2.1.0
    */
-  def approxCountDistinct(e: Column, rsd: Double): Column = withAggregateFunction {
+  def approx_count_distinct(e: Column, rsd: Double): Column = withAggregateFunction {
     HyperLogLogPlusPlus(e.expr, rsd, 0, 0)
   }
 
@@ -218,10 +248,10 @@ object functions {
    * @param rsd maximum estimation error allowed (default = 0.05)
    *
    * @group agg_funcs
-   * @since 1.3.0
+   * @since 2.1.0
    */
-  def approxCountDistinct(columnName: String, rsd: Double): Column = {
-    approxCountDistinct(Column(columnName), rsd)
+  def approx_count_distinct(columnName: String, rsd: Double): Column = {
+    approx_count_distinct(Column(columnName), rsd)
   }
 
   /**
@@ -1950,36 +1980,64 @@ object functions {
   def tanh(columnName: String): Column = tanh(Column(columnName))
 
   /**
+   * @group math_funcs
+   * @since 1.4.0
+   */
+  @deprecated("Use degrees", "2.1.0")
+  def toDegrees(e: Column): Column = degrees(e)
+
+  /**
+   * @group math_funcs
+   * @since 1.4.0
+   */
+  @deprecated("Use degrees", "2.1.0")
+  def toDegrees(columnName: String): Column = degrees(Column(columnName))
+
+  /**
    * Converts an angle measured in radians to an approximately equivalent angle measured in degrees.
    *
    * @group math_funcs
-   * @since 1.4.0
+   * @since 2.1.0
    */
-  def toDegrees(e: Column): Column = withExpr { ToDegrees(e.expr) }
+  def degrees(e: Column): Column = withExpr { ToDegrees(e.expr) }
 
   /**
    * Converts an angle measured in radians to an approximately equivalent angle measured in degrees.
    *
    * @group math_funcs
+   * @since 2.1.0
+   */
+  def degrees(columnName: String): Column = degrees(Column(columnName))
+
+  /**
+   * @group math_funcs
+   * @since 1.4.0
+   */
+  @deprecated("Use radians", "2.1.0")
+  def toRadians(e: Column): Column = radians(e)
+
+  /**
+   * @group math_funcs
    * @since 1.4.0
    */
-  def toDegrees(columnName: String): Column = toDegrees(Column(columnName))
+  @deprecated("Use radians", "2.1.0")
+  def toRadians(columnName: String): Column = radians(Column(columnName))
 
   /**
    * Converts an angle measured in degrees to an approximately equivalent angle measured in radians.
    *
    * @group math_funcs
-   * @since 1.4.0
+   * @since 2.1.0
    */
-  def toRadians(e: Column): Column = withExpr { ToRadians(e.expr) }
+  def radians(e: Column): Column = withExpr { ToRadians(e.expr) }
 
   /**
    * Converts an angle measured in degrees to an approximately equivalent angle measured in radians.
    *
    * @group math_funcs
-   * @since 1.4.0
+   * @since 2.1.0
    */
-  def toRadians(columnName: String): Column = toRadians(Column(columnName))
+  def radians(columnName: String): Column = radians(Column(columnName))
 
   //////////////////////////////////////////////////////////////////////////////////////////////
   // Misc functions
@@ -3096,5 +3154,4 @@ object functions {
   def callUDF(udfName: String, cols: Column*): Column = withExpr {
     UnresolvedFunction(udfName, cols.map(_.expr), isDistinct = false)
   }
-
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/2b01d3c7/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowSuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowSuite.scala
index c6f8c3a..c2b47ca 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowSuite.scala
@@ -228,7 +228,7 @@ class DataFrameWindowSuite extends QueryTest with SharedSQLContext {
         $"key",
         var_pop($"value").over(window),
         var_samp($"value").over(window),
-        approxCountDistinct($"value").over(window)),
+        approx_count_distinct($"value").over(window)),
       Seq.fill(4)(Row("a", 1.0d / 4.0d, 1.0d / 3.0d, 2))
       ++ Seq.fill(3)(Row("b", 2.0d / 3.0d, 1.0d, 3)))
   }

http://git-wip-us.apache.org/repos/asf/spark/blob/2b01d3c7/sql/core/src/test/scala/org/apache/spark/sql/MathExpressionsSuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/MathExpressionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/MathExpressionsSuite.scala
index 0de7f23..6944c6f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/MathExpressionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/MathExpressionsSuite.scala
@@ -148,19 +148,19 @@ class MathExpressionsSuite extends QueryTest with SharedSQLContext {
     testOneToOneMathFunction(tanh, math.tanh)
   }
 
-  test("toDegrees") {
-    testOneToOneMathFunction(toDegrees, math.toDegrees)
+  test("degrees") {
+    testOneToOneMathFunction(degrees, math.toDegrees)
     checkAnswer(
       sql("SELECT degrees(0), degrees(1), degrees(1.5)"),
-      Seq((1, 2)).toDF().select(toDegrees(lit(0)), toDegrees(lit(1)), toDegrees(lit(1.5)))
+      Seq((1, 2)).toDF().select(degrees(lit(0)), degrees(lit(1)), degrees(lit(1.5)))
     )
   }
 
-  test("toRadians") {
-    testOneToOneMathFunction(toRadians, math.toRadians)
+  test("radians") {
+    testOneToOneMathFunction(radians, math.toRadians)
     checkAnswer(
       sql("SELECT radians(0), radians(1), radians(1.5)"),
-      Seq((1, 2)).toDF().select(toRadians(lit(0)), toRadians(lit(1)), toRadians(lit(1.5)))
+      Seq((1, 2)).toDF().select(radians(lit(0)), radians(lit(1)), radians(lit(1.5)))
     )
   }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org