You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by sr...@apache.org on 2016/10/07 10:49:40 UTC
spark git commit: [SPARK-16960][SQL] Deprecate approxCountDistinct,
toDegrees and toRadians according to FunctionRegistry
Repository: spark
Updated Branches:
refs/heads/master 24097d847 -> 2b01d3c70
[SPARK-16960][SQL] Deprecate approxCountDistinct, toDegrees and toRadians according to FunctionRegistry
## What changes were proposed in this pull request?
It seems `approxCountDistinct`, `toDegrees` and `toRadians` are also missed while matching the names to the ones in `FunctionRegistry`. (please see [approx_count_distinct](https://github.com/apache/spark/blob/5c2ae79bfcf448d8dc9217efafa1409997c739de/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala#L244), [degrees](https://github.com/apache/spark/blob/5c2ae79bfcf448d8dc9217efafa1409997c739de/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala#L203) and [radians](https://github.com/apache/spark/blob/5c2ae79bfcf448d8dc9217efafa1409997c739de/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala#L222) in `FunctionRegistry`).
I took a scan between `functions.scala` and `FunctionRegistry` and it seems these are all left. For `countDistinct` and `sumDistinct`, they are not registered in `FunctionRegistry`.
This PR deprecates `approxCountDistinct`, `toDegrees` and `toRadians` and introduces `approx_count_distinct`, `degrees` and `radians`.
## How was this patch tested?
Existing tests should cover this.
Author: hyukjinkwon <gu...@gmail.com>
Author: Hyukjin Kwon <gu...@gmail.com>
Closes #14538 from HyukjinKwon/SPARK-16588-followup.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2b01d3c7
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2b01d3c7
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2b01d3c7
Branch: refs/heads/master
Commit: 2b01d3c701c58f07fa42afd570523dd161384882
Parents: 24097d8
Author: hyukjinkwon <gu...@gmail.com>
Authored: Fri Oct 7 11:49:34 2016 +0100
Committer: Sean Owen <so...@cloudera.com>
Committed: Fri Oct 7 11:49:34 2016 +0100
----------------------------------------------------------------------
python/pyspark/sql/functions.py | 33 +++++--
.../scala/org/apache/spark/sql/functions.scala | 91 ++++++++++++++++----
.../apache/spark/sql/DataFrameWindowSuite.scala | 2 +-
.../apache/spark/sql/MathExpressionsSuite.scala | 12 +--
4 files changed, 105 insertions(+), 33 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/2b01d3c7/python/pyspark/sql/functions.py
----------------------------------------------------------------------
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index 45d6bf9..7fa3fd2 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -112,11 +112,8 @@ _functions_1_4 = {
'sinh': 'Computes the hyperbolic sine of the given value.',
'tan': 'Computes the tangent of the given value.',
'tanh': 'Computes the hyperbolic tangent of the given value.',
- 'toDegrees': 'Converts an angle measured in radians to an approximately equivalent angle ' +
- 'measured in degrees.',
- 'toRadians': 'Converts an angle measured in degrees to an approximately equivalent angle ' +
- 'measured in radians.',
-
+ 'toDegrees': '.. note:: Deprecated in 2.1, use degrees instead.',
+ 'toRadians': '.. note:: Deprecated in 2.1, use radians instead.',
'bitwiseNOT': 'Computes bitwise not.',
}
@@ -135,7 +132,15 @@ _functions_1_6 = {
'kurtosis': 'Aggregate function: returns the kurtosis of the values in a group.',
'collect_list': 'Aggregate function: returns a list of objects with duplicates.',
'collect_set': 'Aggregate function: returns a set of objects with duplicate elements' +
- ' eliminated.'
+ ' eliminated.',
+}
+
+_functions_2_1 = {
+ # unary math functions
+ 'degrees': 'Converts an angle measured in radians to an approximately equivalent angle ' +
+ 'measured in degrees.',
+ 'radians': 'Converts an angle measured in degrees to an approximately equivalent angle ' +
+ 'measured in radians.',
}
# math functions that take two arguments as input
@@ -182,21 +187,31 @@ for _name, _doc in _window_functions.items():
globals()[_name] = since(1.6)(_create_window_function(_name, _doc))
for _name, _doc in _functions_1_6.items():
globals()[_name] = since(1.6)(_create_function(_name, _doc))
+for _name, _doc in _functions_2_1.items():
+ globals()[_name] = since(2.1)(_create_function(_name, _doc))
del _name, _doc
@since(1.3)
def approxCountDistinct(col, rsd=None):
+ """
+ .. note:: Deprecated in 2.1, use approx_count_distinct instead.
+ """
+ return approx_count_distinct(col, rsd)
+
+
+@since(2.1)
+def approx_count_distinct(col, rsd=None):
"""Returns a new :class:`Column` for approximate distinct count of ``col``.
- >>> df.agg(approxCountDistinct(df.age).alias('c')).collect()
+ >>> df.agg(approx_count_distinct(df.age).alias('c')).collect()
[Row(c=2)]
"""
sc = SparkContext._active_spark_context
if rsd is None:
- jc = sc._jvm.functions.approxCountDistinct(_to_java_column(col))
+ jc = sc._jvm.functions.approx_count_distinct(_to_java_column(col))
else:
- jc = sc._jvm.functions.approxCountDistinct(_to_java_column(col), rsd)
+ jc = sc._jvm.functions.approx_count_distinct(_to_java_column(col), rsd)
return Column(jc)
http://git-wip-us.apache.org/repos/asf/spark/blob/2b01d3c7/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index 3bc1c5b..40f82d8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -183,12 +183,42 @@ object functions {
//////////////////////////////////////////////////////////////////////////////////////////////
/**
+ * @group agg_funcs
+ * @since 1.3.0
+ */
+ @deprecated("Use approx_count_distinct", "2.1.0")
+ def approxCountDistinct(e: Column): Column = approx_count_distinct(e)
+
+ /**
+ * @group agg_funcs
+ * @since 1.3.0
+ */
+ @deprecated("Use approx_count_distinct", "2.1.0")
+ def approxCountDistinct(columnName: String): Column = approx_count_distinct(columnName)
+
+ /**
+ * @group agg_funcs
+ * @since 1.3.0
+ */
+ @deprecated("Use approx_count_distinct", "2.1.0")
+ def approxCountDistinct(e: Column, rsd: Double): Column = approx_count_distinct(e, rsd)
+
+ /**
+ * @group agg_funcs
+ * @since 1.3.0
+ */
+ @deprecated("Use approx_count_distinct", "2.1.0")
+ def approxCountDistinct(columnName: String, rsd: Double): Column = {
+ approx_count_distinct(Column(columnName), rsd)
+ }
+
+ /**
* Aggregate function: returns the approximate number of distinct items in a group.
*
* @group agg_funcs
- * @since 1.3.0
+ * @since 2.1.0
*/
- def approxCountDistinct(e: Column): Column = withAggregateFunction {
+ def approx_count_distinct(e: Column): Column = withAggregateFunction {
HyperLogLogPlusPlus(e.expr)
}
@@ -196,9 +226,9 @@ object functions {
* Aggregate function: returns the approximate number of distinct items in a group.
*
* @group agg_funcs
- * @since 1.3.0
+ * @since 2.1.0
*/
- def approxCountDistinct(columnName: String): Column = approxCountDistinct(column(columnName))
+ def approx_count_distinct(columnName: String): Column = approx_count_distinct(column(columnName))
/**
* Aggregate function: returns the approximate number of distinct items in a group.
@@ -206,9 +236,9 @@ object functions {
* @param rsd maximum estimation error allowed (default = 0.05)
*
* @group agg_funcs
- * @since 1.3.0
+ * @since 2.1.0
*/
- def approxCountDistinct(e: Column, rsd: Double): Column = withAggregateFunction {
+ def approx_count_distinct(e: Column, rsd: Double): Column = withAggregateFunction {
HyperLogLogPlusPlus(e.expr, rsd, 0, 0)
}
@@ -218,10 +248,10 @@ object functions {
* @param rsd maximum estimation error allowed (default = 0.05)
*
* @group agg_funcs
- * @since 1.3.0
+ * @since 2.1.0
*/
- def approxCountDistinct(columnName: String, rsd: Double): Column = {
- approxCountDistinct(Column(columnName), rsd)
+ def approx_count_distinct(columnName: String, rsd: Double): Column = {
+ approx_count_distinct(Column(columnName), rsd)
}
/**
@@ -1950,36 +1980,64 @@ object functions {
def tanh(columnName: String): Column = tanh(Column(columnName))
/**
+ * @group math_funcs
+ * @since 1.4.0
+ */
+ @deprecated("Use degrees", "2.1.0")
+ def toDegrees(e: Column): Column = degrees(e)
+
+ /**
+ * @group math_funcs
+ * @since 1.4.0
+ */
+ @deprecated("Use degrees", "2.1.0")
+ def toDegrees(columnName: String): Column = degrees(Column(columnName))
+
+ /**
* Converts an angle measured in radians to an approximately equivalent angle measured in degrees.
*
* @group math_funcs
- * @since 1.4.0
+ * @since 2.1.0
*/
- def toDegrees(e: Column): Column = withExpr { ToDegrees(e.expr) }
+ def degrees(e: Column): Column = withExpr { ToDegrees(e.expr) }
/**
* Converts an angle measured in radians to an approximately equivalent angle measured in degrees.
*
* @group math_funcs
+ * @since 2.1.0
+ */
+ def degrees(columnName: String): Column = degrees(Column(columnName))
+
+ /**
+ * @group math_funcs
+ * @since 1.4.0
+ */
+ @deprecated("Use radians", "2.1.0")
+ def toRadians(e: Column): Column = radians(e)
+
+ /**
+ * @group math_funcs
* @since 1.4.0
*/
- def toDegrees(columnName: String): Column = toDegrees(Column(columnName))
+ @deprecated("Use radians", "2.1.0")
+ def toRadians(columnName: String): Column = radians(Column(columnName))
/**
* Converts an angle measured in degrees to an approximately equivalent angle measured in radians.
*
* @group math_funcs
- * @since 1.4.0
+ * @since 2.1.0
*/
- def toRadians(e: Column): Column = withExpr { ToRadians(e.expr) }
+ def radians(e: Column): Column = withExpr { ToRadians(e.expr) }
/**
* Converts an angle measured in degrees to an approximately equivalent angle measured in radians.
*
* @group math_funcs
- * @since 1.4.0
+ * @since 2.1.0
*/
- def toRadians(columnName: String): Column = toRadians(Column(columnName))
+ def radians(columnName: String): Column = radians(Column(columnName))
//////////////////////////////////////////////////////////////////////////////////////////////
// Misc functions
@@ -3096,5 +3154,4 @@ object functions {
def callUDF(udfName: String, cols: Column*): Column = withExpr {
UnresolvedFunction(udfName, cols.map(_.expr), isDistinct = false)
}
-
}
http://git-wip-us.apache.org/repos/asf/spark/blob/2b01d3c7/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowSuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowSuite.scala
index c6f8c3a..c2b47ca 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowSuite.scala
@@ -228,7 +228,7 @@ class DataFrameWindowSuite extends QueryTest with SharedSQLContext {
$"key",
var_pop($"value").over(window),
var_samp($"value").over(window),
- approxCountDistinct($"value").over(window)),
+ approx_count_distinct($"value").over(window)),
Seq.fill(4)(Row("a", 1.0d / 4.0d, 1.0d / 3.0d, 2))
++ Seq.fill(3)(Row("b", 2.0d / 3.0d, 1.0d, 3)))
}
http://git-wip-us.apache.org/repos/asf/spark/blob/2b01d3c7/sql/core/src/test/scala/org/apache/spark/sql/MathExpressionsSuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/MathExpressionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/MathExpressionsSuite.scala
index 0de7f23..6944c6f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/MathExpressionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/MathExpressionsSuite.scala
@@ -148,19 +148,19 @@ class MathExpressionsSuite extends QueryTest with SharedSQLContext {
testOneToOneMathFunction(tanh, math.tanh)
}
- test("toDegrees") {
- testOneToOneMathFunction(toDegrees, math.toDegrees)
+ test("degrees") {
+ testOneToOneMathFunction(degrees, math.toDegrees)
checkAnswer(
sql("SELECT degrees(0), degrees(1), degrees(1.5)"),
- Seq((1, 2)).toDF().select(toDegrees(lit(0)), toDegrees(lit(1)), toDegrees(lit(1.5)))
+ Seq((1, 2)).toDF().select(degrees(lit(0)), degrees(lit(1)), degrees(lit(1.5)))
)
}
- test("toRadians") {
- testOneToOneMathFunction(toRadians, math.toRadians)
+ test("radians") {
+ testOneToOneMathFunction(radians, math.toRadians)
checkAnswer(
sql("SELECT radians(0), radians(1), radians(1.5)"),
- Seq((1, 2)).toDF().select(toRadians(lit(0)), toRadians(lit(1)), toRadians(lit(1.5)))
+ Seq((1, 2)).toDF().select(radians(lit(0)), radians(lit(1)), radians(lit(1.5)))
)
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org