You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by fe...@apache.org on 2017/06/30 06:00:52 UTC
spark git commit: [SPARK-20889][SPARKR] Grouped documentation for
COLLECTION column methods
Repository: spark
Updated Branches:
refs/heads/master fddb63f46 -> 52981715b
[SPARK-20889][SPARKR] Grouped documentation for COLLECTION column methods
## What changes were proposed in this pull request?
Grouped documentation for column collection methods.
Author: actuaryzhang <ac...@gmail.com>
Author: Wayne Zhang <ac...@gmail.com>
Closes #18458 from actuaryzhang/sparkRDocCollection.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/52981715
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/52981715
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/52981715
Branch: refs/heads/master
Commit: 52981715bb8d653a1141f55b36da804412eb783a
Parents: fddb63f
Author: actuaryzhang <ac...@gmail.com>
Authored: Thu Jun 29 23:00:50 2017 -0700
Committer: Felix Cheung <fe...@apache.org>
Committed: Thu Jun 29 23:00:50 2017 -0700
----------------------------------------------------------------------
R/pkg/R/functions.R | 204 +++++++++++++++++++++--------------------------
R/pkg/R/generics.R | 27 ++++---
2 files changed, 108 insertions(+), 123 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/52981715/R/pkg/R/functions.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index 67cb7a7..a1f5c4f 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -171,6 +171,35 @@ NULL
#' }
NULL
+#' Collection functions for Column operations
+#'
+#' Collection functions defined for \code{Column}.
+#'
+#' @param x Column to compute on. Note the difference in the following methods:
+#' \itemize{
+#' \item \code{to_json}: it is the column containing the struct or array of the structs.
+#' \item \code{from_json}: it is the column containing the JSON string.
+#' }
+#' @param ... additional argument(s). In \code{to_json} and \code{from_json}, this contains
+#' additional named properties to control how it is converted, accepts the same
+#' options as the JSON data source.
+#' @name column_collection_functions
+#' @rdname column_collection_functions
+#' @family collection functions
+#' @examples
+#' \dontrun{
+#' # Dataframe used throughout this doc
+#' df <- createDataFrame(cbind(model = rownames(mtcars), mtcars))
+#' df <- createDataFrame(cbind(model = rownames(mtcars), mtcars))
+#' tmp <- mutate(df, v1 = create_array(df$mpg, df$cyl, df$hp))
+#' head(select(tmp, array_contains(tmp$v1, 21), size(tmp$v1)))
+#' tmp2 <- mutate(tmp, v2 = explode(tmp$v1))
+#' head(tmp2)
+#' head(select(tmp, posexplode(tmp$v1)))
+#' head(select(tmp, sort_array(tmp$v1)))
+#' head(select(tmp, sort_array(tmp$v1, asc = FALSE)))}
+NULL
+
#' @details
#' \code{lit}: A new Column is created to represent the literal value.
#' If the parameter is a Column, it is returned unchanged.
@@ -1642,30 +1671,23 @@ setMethod("to_date",
column(jc)
})
-#' to_json
-#'
-#' Converts a column containing a \code{structType} or array of \code{structType} into a Column
-#' of JSON string. Resolving the Column can fail if an unsupported type is encountered.
-#'
-#' @param x Column containing the struct or array of the structs
-#' @param ... additional named properties to control how it is converted, accepts the same options
-#' as the JSON data source.
+#' @details
+#' \code{to_json}: Converts a column containing a \code{structType} or array of \code{structType}
+#' into a Column of JSON string. Resolving the Column can fail if an unsupported type is encountered.
#'
-#' @family non-aggregate functions
-#' @rdname to_json
-#' @name to_json
-#' @aliases to_json,Column-method
+#' @rdname column_collection_functions
+#' @aliases to_json to_json,Column-method
#' @export
#' @examples
+#'
#' \dontrun{
#' # Converts a struct into a JSON object
-#' df <- sql("SELECT named_struct('date', cast('2000-01-01' as date)) as d")
-#' select(df, to_json(df$d, dateFormat = 'dd/MM/yyyy'))
+#' df2 <- sql("SELECT named_struct('date', cast('2000-01-01' as date)) as d")
+#' select(df2, to_json(df2$d, dateFormat = 'dd/MM/yyyy'))
#'
#' # Converts an array of structs into a JSON array
-#' df <- sql("SELECT array(named_struct('name', 'Bob'), named_struct('name', 'Alice')) as people")
-#' select(df, to_json(df$people))
-#'}
+#' df2 <- sql("SELECT array(named_struct('name', 'Bob'), named_struct('name', 'Alice')) as people")
+#' df2 <- mutate(df2, people_json = to_json(df2$people))}
#' @note to_json since 2.2.0
setMethod("to_json", signature(x = "Column"),
function(x, ...) {
@@ -2120,28 +2142,28 @@ setMethod("date_format", signature(y = "Column", x = "character"),
column(jc)
})
-#' from_json
-#'
-#' Parses a column containing a JSON string into a Column of \code{structType} with the specified
-#' \code{schema} or array of \code{structType} if \code{as.json.array} is set to \code{TRUE}.
-#' If the string is unparseable, the Column will contains the value NA.
+#' @details
+#' \code{from_json}: Parses a column containing a JSON string into a Column of \code{structType}
+#' with the specified \code{schema} or array of \code{structType} if \code{as.json.array} is set
+#' to \code{TRUE}. If the string is unparseable, the Column will contain the value NA.
#'
-#' @param x Column containing the JSON string.
+#' @rdname column_collection_functions
#' @param schema a structType object to use as the schema to use when parsing the JSON string.
#' @param as.json.array indicating if input string is JSON array of objects or a single object.
-#' @param ... additional named properties to control how the json is parsed, accepts the same
-#' options as the JSON data source.
-#'
-#' @family non-aggregate functions
-#' @rdname from_json
-#' @name from_json
-#' @aliases from_json,Column,structType-method
+#' @aliases from_json from_json,Column,structType-method
#' @export
#' @examples
+#'
#' \dontrun{
-#' schema <- structType(structField("name", "string"),
-#' select(df, from_json(df$value, schema, dateFormat = "dd/MM/yyyy"))
-#'}
+#' df2 <- sql("SELECT named_struct('date', cast('2000-01-01' as date)) as d")
+#' df2 <- mutate(df2, d2 = to_json(df2$d, dateFormat = 'dd/MM/yyyy'))
+#' schema <- structType(structField("date", "string"))
+#' head(select(df2, from_json(df2$d2, schema, dateFormat = 'dd/MM/yyyy')))
+
+#' df2 <- sql("SELECT named_struct('name', 'Bob') as people")
+#' df2 <- mutate(df2, people_json = to_json(df2$people))
+#' schema <- structType(structField("name", "string"))
+#' head(select(df2, from_json(df2$people_json, schema)))}
#' @note from_json since 2.2.0
setMethod("from_json", signature(x = "Column", schema = "structType"),
function(x, schema, as.json.array = FALSE, ...) {
@@ -3101,18 +3123,14 @@ setMethod("row_number",
###################### Collection functions######################
-#' array_contains
-#'
-#' Returns null if the array is null, true if the array contains the value, and false otherwise.
+#' @details
+#' \code{array_contains}: Returns null if the array is null, true if the array contains
+#' the value, and false otherwise.
#'
-#' @param x A Column
#' @param value A value to be checked if contained in the column
-#' @rdname array_contains
-#' @aliases array_contains,Column-method
-#' @name array_contains
-#' @family collection functions
+#' @rdname column_collection_functions
+#' @aliases array_contains array_contains,Column-method
#' @export
-#' @examples \dontrun{array_contains(df$c, 1)}
#' @note array_contains since 1.6.0
setMethod("array_contains",
signature(x = "Column", value = "ANY"),
@@ -3121,18 +3139,12 @@ setMethod("array_contains",
column(jc)
})
-#' explode
-#'
-#' Creates a new row for each element in the given array or map column.
-#'
-#' @param x Column to compute on
+#' @details
+#' \code{explode}: Creates a new row for each element in the given array or map column.
#'
-#' @rdname explode
-#' @name explode
-#' @family collection functions
-#' @aliases explode,Column-method
+#' @rdname column_collection_functions
+#' @aliases explode explode,Column-method
#' @export
-#' @examples \dontrun{explode(df$c)}
#' @note explode since 1.5.0
setMethod("explode",
signature(x = "Column"),
@@ -3141,18 +3153,12 @@ setMethod("explode",
column(jc)
})
-#' size
-#'
-#' Returns length of array or map.
-#'
-#' @param x Column to compute on
+#' @details
+#' \code{size}: Returns length of array or map.
#'
-#' @rdname size
-#' @name size
-#' @aliases size,Column-method
-#' @family collection functions
+#' @rdname column_collection_functions
+#' @aliases size size,Column-method
#' @export
-#' @examples \dontrun{size(df$c)}
#' @note size since 1.5.0
setMethod("size",
signature(x = "Column"),
@@ -3161,25 +3167,16 @@ setMethod("size",
column(jc)
})
-#' sort_array
-#'
-#' Sorts the input array in ascending or descending order according
+#' @details
+#' \code{sort_array}: Sorts the input array in ascending or descending order according
#' to the natural ordering of the array elements.
#'
-#' @param x A Column to sort
+#' @rdname column_collection_functions
#' @param asc A logical flag indicating the sorting order.
#' TRUE, sorting is in ascending order.
#' FALSE, sorting is in descending order.
-#' @rdname sort_array
-#' @name sort_array
-#' @aliases sort_array,Column-method
-#' @family collection functions
+#' @aliases sort_array sort_array,Column-method
#' @export
-#' @examples
-#' \dontrun{
-#' sort_array(df$c)
-#' sort_array(df$c, FALSE)
-#' }
#' @note sort_array since 1.6.0
setMethod("sort_array",
signature(x = "Column"),
@@ -3188,18 +3185,13 @@ setMethod("sort_array",
column(jc)
})
-#' posexplode
-#'
-#' Creates a new row for each element with position in the given array or map column.
-#'
-#' @param x Column to compute on
+#' @details
+#' \code{posexplode}: Creates a new row for each element with position in the given array
+#' or map column.
#'
-#' @rdname posexplode
-#' @name posexplode
-#' @family collection functions
-#' @aliases posexplode,Column-method
+#' @rdname column_collection_functions
+#' @aliases posexplode posexplode,Column-method
#' @export
-#' @examples \dontrun{posexplode(df$c)}
#' @note posexplode since 2.1.0
setMethod("posexplode",
signature(x = "Column"),
@@ -3325,27 +3317,24 @@ setMethod("repeat_string",
column(jc)
})
-#' explode_outer
-#'
-#' Creates a new row for each element in the given array or map column.
+#' @details
+#' \code{explode}: Creates a new row for each element in the given array or map column.
#' Unlike \code{explode}, if the array/map is \code{null} or empty
#' then \code{null} is produced.
#'
-#' @param x Column to compute on
#'
-#' @rdname explode_outer
-#' @name explode_outer
-#' @family collection functions
-#' @aliases explode_outer,Column-method
+#' @rdname column_collection_functions
+#' @aliases explode_outer explode_outer,Column-method
#' @export
#' @examples
+#'
#' \dontrun{
-#' df <- createDataFrame(data.frame(
+#' df2 <- createDataFrame(data.frame(
#' id = c(1, 2, 3), text = c("a,b,c", NA, "d,e")
#' ))
#'
-#' head(select(df, df$id, explode_outer(split_string(df$text, ","))))
-#' }
+#' head(select(df2, df2$id, explode_outer(split_string(df2$text, ","))))
+#' head(select(df2, df2$id, posexplode_outer(split_string(df2$text, ","))))}
#' @note explode_outer since 2.3.0
setMethod("explode_outer",
signature(x = "Column"),
@@ -3354,27 +3343,14 @@ setMethod("explode_outer",
column(jc)
})
-#' posexplode_outer
-#'
-#' Creates a new row for each element with position in the given array or map column.
-#' Unlike \code{posexplode}, if the array/map is \code{null} or empty
+#' @details
+#' \code{posexplode_outer}: Creates a new row for each element with position in the given
+#' array or map column. Unlike \code{posexplode}, if the array/map is \code{null} or empty
#' then the row (\code{null}, \code{null}) is produced.
#'
-#' @param x Column to compute on
-#'
-#' @rdname posexplode_outer
-#' @name posexplode_outer
-#' @family collection functions
-#' @aliases posexplode_outer,Column-method
+#' @rdname column_collection_functions
+#' @aliases posexplode_outer posexplode_outer,Column-method
#' @export
-#' @examples
-#' \dontrun{
-#' df <- createDataFrame(data.frame(
-#' id = c(1, 2, 3), text = c("a,b,c", NA, "d,e")
-#' ))
-#'
-#' head(select(df, df$id, posexplode_outer(split_string(df$text, ","))))
-#' }
#' @note posexplode_outer since 2.3.0
setMethod("posexplode_outer",
signature(x = "Column"),
http://git-wip-us.apache.org/repos/asf/spark/blob/52981715/R/pkg/R/generics.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index bdd4b36..b901b74 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -913,8 +913,9 @@ setGeneric("add_months", function(y, x) { standardGeneric("add_months") })
#' @name NULL
setGeneric("approxCountDistinct", function(x, ...) { standardGeneric("approxCountDistinct") })
-#' @rdname array_contains
+#' @rdname column_collection_functions
#' @export
+#' @name NULL
setGeneric("array_contains", function(x, value) { standardGeneric("array_contains") })
#' @rdname column_string_functions
@@ -1062,12 +1063,14 @@ setGeneric("dense_rank", function(x = "missing") { standardGeneric("dense_rank")
#' @name NULL
setGeneric("encode", function(x, charset) { standardGeneric("encode") })
-#' @rdname explode
+#' @rdname column_collection_functions
#' @export
+#' @name NULL
setGeneric("explode", function(x) { standardGeneric("explode") })
-#' @rdname explode_outer
+#' @rdname column_collection_functions
#' @export
+#' @name NULL
setGeneric("explode_outer", function(x) { standardGeneric("explode_outer") })
#' @rdname column_nonaggregate_functions
@@ -1090,8 +1093,9 @@ setGeneric("format_number", function(y, x) { standardGeneric("format_number") })
#' @name NULL
setGeneric("format_string", function(format, x, ...) { standardGeneric("format_string") })
-#' @rdname from_json
+#' @rdname column_collection_functions
#' @export
+#' @name NULL
setGeneric("from_json", function(x, schema, ...) { standardGeneric("from_json") })
#' @rdname column_datetime_functions
@@ -1275,12 +1279,14 @@ setGeneric("percent_rank", function(x = "missing") { standardGeneric("percent_ra
#' @name NULL
setGeneric("pmod", function(y, x) { standardGeneric("pmod") })
-#' @rdname posexplode
+#' @rdname column_collection_functions
#' @export
+#' @name NULL
setGeneric("posexplode", function(x) { standardGeneric("posexplode") })
-#' @rdname posexplode_outer
+#' @rdname column_collection_functions
#' @export
+#' @name NULL
setGeneric("posexplode_outer", function(x) { standardGeneric("posexplode_outer") })
#' @rdname column_datetime_functions
@@ -1383,8 +1389,9 @@ setGeneric("shiftRightUnsigned", function(y, x) { standardGeneric("shiftRightUns
#' @name NULL
setGeneric("signum", function(x) { standardGeneric("signum") })
-#' @rdname size
+#' @rdname column_collection_functions
#' @export
+#' @name NULL
setGeneric("size", function(x) { standardGeneric("size") })
#' @rdname column_aggregate_functions
@@ -1392,8 +1399,9 @@ setGeneric("size", function(x) { standardGeneric("size") })
#' @name NULL
setGeneric("skewness", function(x) { standardGeneric("skewness") })
-#' @rdname sort_array
+#' @rdname column_collection_functions
#' @export
+#' @name NULL
setGeneric("sort_array", function(x, asc = TRUE) { standardGeneric("sort_array") })
#' @rdname column_string_functions
@@ -1456,8 +1464,9 @@ setGeneric("toRadians", function(x) { standardGeneric("toRadians") })
#' @name NULL
setGeneric("to_date", function(x, format) { standardGeneric("to_date") })
-#' @rdname to_json
+#' @rdname column_collection_functions
#' @export
+#' @name NULL
setGeneric("to_json", function(x, ...) { standardGeneric("to_json") })
#' @rdname column_datetime_functions
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org