You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2020/02/06 12:39:59 UTC
[spark] branch branch-3.0 updated: [SPARK-27297][DOC][FOLLOW-UP]
Improve documentation for various Scala functions
This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.0 by this push:
new 659f8c8 [SPARK-27297][DOC][FOLLOW-UP] Improve documentation for various Scala functions
659f8c8 is described below
commit 659f8c8ef549fd040596978478898266d24e88ff
Author: yi.wu <yi...@databricks.com>
AuthorDate: Thu Feb 6 20:34:29 2020 +0800
[SPARK-27297][DOC][FOLLOW-UP] Improve documentation for various Scala functions
### What changes were proposed in this pull request?
Add examples and parameter description for these Scala functions:
* transform
* exists
* forall
* aggregate
* zip_with
* transform_keys
* transform_values
* map_filter
* map_zip_with
### Why are the changes needed?
Better documentation for UX.
### Does this PR introduce any user-facing change?
No.
### How was this patch tested?
Pass Jenkins.
Closes #27449 from Ngone51/doc-funcs.
Authored-by: yi.wu <yi...@databricks.com>
Signed-off-by: Wenchen Fan <we...@databricks.com>
(cherry picked from commit 368ee62a5dce83682ccaec92feeea8428af5a8cf)
Signed-off-by: Wenchen Fan <we...@databricks.com>
---
.../scala/org/apache/spark/sql/functions.scala | 93 +++++++++++++++++++---
1 file changed, 83 insertions(+), 10 deletions(-)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index da26c5a..d125581 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -3410,6 +3410,12 @@ object functions {
/**
* Returns an array of elements after applying a transformation to each element
* in the input array.
+ * {{{
+ * df.select(transform(col("i"), x => x + 1))
+ * }}}
+ *
+ * @param column the input array column
+ * @param f col => transformed_col, the lambda function to transform the input column
*
* @group collection_funcs
* @since 3.0.0
@@ -3421,6 +3427,13 @@ object functions {
/**
* Returns an array of elements after applying a transformation to each element
* in the input array.
+ * {{{
+ * df.select(transform(col("i"), (x, i) => x + i))
+ * }}}
+ *
+ * @param column the input array column
+ * @param f (col, index) => transformed_col, the lambda function to filter the input column
+ * given the index. Indices start at 0.
*
* @group collection_funcs
* @since 3.0.0
@@ -3431,6 +3444,12 @@ object functions {
/**
* Returns whether a predicate holds for one or more elements in the array.
+ * {{{
+ * df.select(exists(col("i"), _ % 2 === 0))
+ * }}}
+ *
+ * @param column the input array column
+ * @param f col => predicate, the Boolean predicate to check the input column
*
* @group collection_funcs
* @since 3.0.0
@@ -3441,6 +3460,12 @@ object functions {
/**
* Returns whether a predicate holds for every element in the array.
+ * {{{
+ * df.select(forall(col("i"), x => x % 2 === 0))
+ * }}}
+ *
+ * @param column the input array column
+ * @param f col => predicate, the Boolean predicate to check the input column
*
* @group collection_funcs
* @since 3.0.0
@@ -3453,11 +3478,10 @@ object functions {
* Returns an array of elements for which a predicate holds in a given array.
* {{{
* df.select(filter(col("s"), x => x % 2 === 0))
- * df.selectExpr("filter(col, x -> x % 2 == 0)")
* }}}
*
- * @param column: the input array column
- * @param f: col => predicate, the Boolean predicate to filter the input column
+ * @param column the input array column
+ * @param f col => predicate, the Boolean predicate to filter the input column
*
* @group collection_funcs
* @since 3.0.0
@@ -3470,11 +3494,10 @@ object functions {
* Returns an array of elements for which a predicate holds in a given array.
* {{{
* df.select(filter(col("s"), (x, i) => i % 2 === 0))
- * df.selectExpr("filter(col, (x, i) -> i % 2 == 0)")
* }}}
*
- * @param column: the input array column
- * @param f: (col, index) => predicate, the Boolean predicate to filter the input column
+ * @param column the input array column
+ * @param f (col, index) => predicate, the Boolean predicate to filter the input column
* given the index. Indices start at 0.
*
* @group collection_funcs
@@ -3488,18 +3511,28 @@ object functions {
* Applies a binary operator to an initial state and all elements in the array,
* and reduces this to a single state. The final state is converted into the final result
* by applying a finish function.
+ * {{{
+ * df.select(aggregate(col("i"), lit(0), (acc, x) => acc + x, _ * 10))
+ * }}}
+ *
+ * @param expr the input array column
+ * @param initialValue the initial value
+ * @param merge (combined_value, input_value) => combined_value, the merge function to merge
+ * an input value to the combined_value
+ * @param finish combined_value => final_value, the lambda function to convert the combined value
+ * of all inputs to final result
*
* @group collection_funcs
* @since 3.0.0
*/
def aggregate(
expr: Column,
- zero: Column,
+ initialValue: Column,
merge: (Column, Column) => Column,
finish: Column => Column): Column = withExpr {
ArrayAggregate(
expr.expr,
- zero.expr,
+ initialValue.expr,
createLambda(merge),
createLambda(finish)
)
@@ -3508,17 +3541,31 @@ object functions {
/**
* Applies a binary operator to an initial state and all elements in the array,
* and reduces this to a single state.
+ * {{{
+ * df.select(aggregate(col("i"), lit(0), (acc, x) => acc + x))
+ * }}}
*
+ * @param expr the input array column
+ * @param initialValue the initial value
+ * @param merge (combined_value, input_value) => combined_value, the merge function to merge
+ * an input value to the combined_value
* @group collection_funcs
* @since 3.0.0
*/
- def aggregate(expr: Column, zero: Column, merge: (Column, Column) => Column): Column =
- aggregate(expr, zero, merge, c => c)
+ def aggregate(expr: Column, initialValue: Column, merge: (Column, Column) => Column): Column =
+ aggregate(expr, initialValue, merge, c => c)
/**
* Merge two given arrays, element-wise, into a single array using a function.
* If one array is shorter, nulls are appended at the end to match the length of the longer
* array, before applying the function.
+ * {{{
+ * df.select(zip_with(df1("val1"), df1("val2"), (x, y) => x + y))
+ * }}}
+ *
+ * @param left the left input array column
+ * @param right the right input array column
+ * @param f (lCol, rCol) => col, the lambda function to merge two input columns into one column
*
* @group collection_funcs
* @since 3.0.0
@@ -3530,6 +3577,12 @@ object functions {
/**
* Applies a function to every key-value pair in a map and returns
* a map with the results of those applications as the new keys for the pairs.
+ * {{{
+ * df.select(transform_keys(col("i"), (k, v) => k + v))
+ * }}}
+ *
+ * @param expr the input map column
+ * @param f (key, value) => new_key, the lambda function to transform the key of input map column
*
* @group collection_funcs
* @since 3.0.0
@@ -3541,6 +3594,13 @@ object functions {
/**
* Applies a function to every key-value pair in a map and returns
* a map with the results of those applications as the new values for the pairs.
+ * {{{
+ * df.select(transform_values(col("i"), (k, v) => k + v))
+ * }}}
+ *
+ * @param expr the input map column
+ * @param f (key, value) => new_value, the lambda function to transform the value of input map
+ * column
*
* @group collection_funcs
* @since 3.0.0
@@ -3551,6 +3611,12 @@ object functions {
/**
* Returns a map whose key-value pairs satisfy a predicate.
+ * {{{
+ * df.select(map_filter(col("m"), (k, v) => k * 10 === v))
+ * }}}
+ *
+ * @param expr the input map column
+ * @param f (key, value) => predicate, the Boolean predicate to filter the input map column
*
* @group collection_funcs
* @since 3.0.0
@@ -3561,6 +3627,13 @@ object functions {
/**
* Merge two given maps, key-wise into a single map using a function.
+ * {{{
+ * df.select(map_zip_with(df("m1"), df("m2"), (k, v1, v2) => k === v1 + v2))
+ * }}}
+ *
+ * @param left the left input map column
+ * @param right the right input map column
+ * @param f (key, value1, value2) => new_value, the lambda function to merge the map values
*
* @group collection_funcs
* @since 3.0.0
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org