You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by fe...@apache.org on 2017/06/29 02:31:57 UTC
spark git commit: [SPARK-20889][SPARKR] Grouped documentation for STRING column methods

Repository: spark
Updated Branches:
  refs/heads/master b72b8521d -> 376d90d55


[SPARK-20889][SPARKR] Grouped documentation for STRING column methods

## What changes were proposed in this pull request?

Grouped documentation for string column methods.

Author: actuaryzhang <ac...@gmail.com>
Author: Wayne Zhang <ac...@gmail.com>

Closes #18366 from actuaryzhang/sparkRDocString.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/376d90d5
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/376d90d5
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/376d90d5

Branch: refs/heads/master
Commit: 376d90d556fcd4fd84f70ee42a1323e1f48f829d
Parents: b72b852
Author: actuaryzhang <ac...@gmail.com>
Authored: Wed Jun 28 19:31:54 2017 -0700
Committer: Felix Cheung <fe...@apache.org>
Committed: Wed Jun 28 19:31:54 2017 -0700

----------------------------------------------------------------------
 R/pkg/R/functions.R | 573 ++++++++++++++++++++---------------------------
 R/pkg/R/generics.R  |  84 ++++---
 2 files changed, 300 insertions(+), 357 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/376d90d5/R/pkg/R/functions.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index 23ccdf9..70ea620 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -111,6 +111,27 @@ NULL
 #' head(tmp)}
 NULL
 
+#' String functions for Column operations
+#'
+#' String functions defined for \code{Column}.
+#'
+#' @param x Column to compute on except in the following methods:
+#'      \itemize{
+#'      \item \code{instr}: \code{character}, the substring to check. See 'Details'.
+#'      \item \code{format_number}: \code{numeric}, the number of decimal place to
+#'           format to. See 'Details'.
+#'      }
+#' @param y Column to compute on.
+#' @param ... additional columns.
+#' @name column_string_functions
+#' @rdname column_string_functions
+#' @family string functions
+#' @examples
+#' \dontrun{
+#' # Dataframe used throughout this doc
+#' df <- createDataFrame(as.data.frame(Titanic, stringsAsFactors = FALSE))}
+NULL
+
 #' lit
 #'
 #' A new \linkS4class{Column} is created to represent the literal value.
@@ -188,19 +209,17 @@ setMethod("approxCountDistinct",
             column(jc)
           })
 
-#' ascii
-#'
-#' Computes the numeric value of the first character of the string column, and returns the
-#' result as a int column.
-#'
-#' @param x Column to compute on.
+#' @details
+#' \code{ascii}: Computes the numeric value of the first character of the string column,
+#' and returns the result as an int column.
 #'
-#' @rdname ascii
-#' @name ascii
-#' @family string functions
+#' @rdname column_string_functions
 #' @export
-#' @aliases ascii,Column-method
-#' @examples \dontrun{\dontrun{ascii(df$c)}}
+#' @aliases ascii ascii,Column-method
+#' @examples
+#'
+#' \dontrun{
+#' head(select(df, ascii(df$Class), ascii(df$Sex)))}
 #' @note ascii since 1.5.0
 setMethod("ascii",
           signature(x = "Column"),
@@ -256,19 +275,22 @@ setMethod("avg",
             column(jc)
           })
 
-#' base64
-#'
-#' Computes the BASE64 encoding of a binary column and returns it as a string column.
-#' This is the reverse of unbase64.
-#'
-#' @param x Column to compute on.
+#' @details
+#' \code{base64}: Computes the BASE64 encoding of a binary column and returns it as
+#' a string column. This is the reverse of unbase64.
 #'
-#' @rdname base64
-#' @name base64
-#' @family string functions
+#' @rdname column_string_functions
 #' @export
-#' @aliases base64,Column-method
-#' @examples \dontrun{base64(df$c)}
+#' @aliases base64 base64,Column-method
+#' @examples
+#'
+#' \dontrun{
+#' tmp <- mutate(df, s1 = encode(df$Class, "UTF-8"))
+#' str(tmp)
+#' tmp2 <- mutate(tmp, s2 = base64(tmp$s1), s3 = decode(tmp$s1, "UTF-8"),
+#'                     s4 = soundex(tmp$Sex))
+#' head(tmp2)
+#' head(select(tmp2, unbase64(tmp2$s2)))}
 #' @note base64 since 1.5.0
 setMethod("base64",
           signature(x = "Column"),
@@ -620,20 +642,16 @@ setMethod("dayofyear",
             column(jc)
           })
 
-#' decode
-#'
-#' Computes the first argument into a string from a binary using the provided character set
-#' (one of 'US-ASCII', 'ISO-8859-1', 'UTF-8', 'UTF-16BE', 'UTF-16LE', 'UTF-16').
+#' @details
+#' \code{decode}: Computes the first argument into a string from a binary using the provided
+#' character set.
 #'
-#' @param x Column to compute on.
-#' @param charset Character set to use
+#' @param charset Character set to use (one of "US-ASCII", "ISO-8859-1", "UTF-8", "UTF-16BE",
+#'                "UTF-16LE", "UTF-16").
 #'
-#' @rdname decode
-#' @name decode
-#' @family string functions
-#' @aliases decode,Column,character-method
+#' @rdname column_string_functions
+#' @aliases decode decode,Column,character-method
 #' @export
-#' @examples \dontrun{decode(df$c, "UTF-8")}
 #' @note decode since 1.6.0
 setMethod("decode",
           signature(x = "Column", charset = "character"),
@@ -642,20 +660,13 @@ setMethod("decode",
             column(jc)
           })
 
-#' encode
-#'
-#' Computes the first argument into a binary from a string using the provided character set
-#' (one of 'US-ASCII', 'ISO-8859-1', 'UTF-8', 'UTF-16BE', 'UTF-16LE', 'UTF-16').
-#'
-#' @param x Column to compute on.
-#' @param charset Character set to use
+#' @details
+#' \code{encode}: Computes the first argument into a binary from a string using the provided
+#' character set.
 #'
-#' @rdname encode
-#' @name encode
-#' @family string functions
-#' @aliases encode,Column,character-method
+#' @rdname column_string_functions
+#' @aliases encode encode,Column,character-method
 #' @export
-#' @examples \dontrun{encode(df$c, "UTF-8")}
 #' @note encode since 1.6.0
 setMethod("encode",
           signature(x = "Column", charset = "character"),
@@ -788,21 +799,23 @@ setMethod("hour",
             column(jc)
           })
 
-#' initcap
-#'
-#' Returns a new string column by converting the first letter of each word to uppercase.
-#' Words are delimited by whitespace.
-#'
-#' For example, "hello world" will become "Hello World".
-#'
-#' @param x Column to compute on.
+#' @details
+#' \code{initcap}: Returns a new string column by converting the first letter of
+#' each word to uppercase. Words are delimited by whitespace. For example, "hello world"
+#' will become "Hello World".
 #'
-#' @rdname initcap
-#' @name initcap
-#' @family string functions
-#' @aliases initcap,Column-method
+#' @rdname column_string_functions
+#' @aliases initcap initcap,Column-method
 #' @export
-#' @examples \dontrun{initcap(df$c)}
+#' @examples
+#'
+#' \dontrun{
+#' tmp <- mutate(df, sex_lower = lower(df$Sex), age_upper = upper(df$age),
+#'                   sex_age = concat_ws(" ", lower(df$sex), lower(df$age)))
+#' head(tmp)
+#' tmp2 <- mutate(tmp, s1 = initcap(tmp$sex_lower), s2 = initcap(tmp$sex_age),
+#'                     s3 = reverse(df$Sex))
+#' head(tmp2)}
 #' @note initcap since 1.5.0
 setMethod("initcap",
           signature(x = "Column"),
@@ -918,18 +931,12 @@ setMethod("last_day",
             column(jc)
           })
 
-#' length
-#'
-#' Computes the length of a given string or binary column.
-#'
-#' @param x Column to compute on.
+#' @details
+#' \code{length}: Computes the length of a given string or binary column.
 #'
-#' @rdname length
-#' @name length
-#' @aliases length,Column-method
-#' @family string functions
+#' @rdname column_string_functions
+#' @aliases length length,Column-method
 #' @export
-#' @examples \dontrun{length(df$c)}
 #' @note length since 1.5.0
 setMethod("length",
           signature(x = "Column"),
@@ -994,18 +1001,12 @@ setMethod("log2",
             column(jc)
           })
 
-#' lower
-#'
-#' Converts a string column to lower case.
-#'
-#' @param x Column to compute on.
+#' @details
+#' \code{lower}: Converts a string column to lower case.
 #'
-#' @rdname lower
-#' @name lower
-#' @family string functions
-#' @aliases lower,Column-method
+#' @rdname column_string_functions
+#' @aliases lower lower,Column-method
 #' @export
-#' @examples \dontrun{lower(df$c)}
 #' @note lower since 1.4.0
 setMethod("lower",
           signature(x = "Column"),
@@ -1014,18 +1015,24 @@ setMethod("lower",
             column(jc)
           })
 
-#' ltrim
-#'
-#' Trim the spaces from left end for the specified string value.
-#'
-#' @param x Column to compute on.
+#' @details
+#' \code{ltrim}: Trims the spaces from left end for the specified string value.
 #'
-#' @rdname ltrim
-#' @name ltrim
-#' @family string functions
-#' @aliases ltrim,Column-method
+#' @rdname column_string_functions
+#' @aliases ltrim ltrim,Column-method
 #' @export
-#' @examples \dontrun{ltrim(df$c)}
+#' @examples
+#'
+#' \dontrun{
+#' tmp <- mutate(df, SexLpad = lpad(df$Sex, 6, " "), SexRpad = rpad(df$Sex, 7, " "))
+#' head(select(tmp, length(tmp$Sex), length(tmp$SexLpad), length(tmp$SexRpad)))
+#' tmp2 <- mutate(tmp, SexLtrim = ltrim(tmp$SexLpad), SexRtrim = rtrim(tmp$SexRpad),
+#'                     SexTrim = trim(tmp$SexLpad))
+#' head(select(tmp2, length(tmp2$Sex), length(tmp2$SexLtrim),
+#'                   length(tmp2$SexRtrim), length(tmp2$SexTrim)))
+#'
+#' tmp <- mutate(df, SexLpad = lpad(df$Sex, 6, "xx"), SexRpad = rpad(df$Sex, 7, "xx"))
+#' head(tmp)}
 #' @note ltrim since 1.5.0
 setMethod("ltrim",
           signature(x = "Column"),
@@ -1198,18 +1205,12 @@ setMethod("quarter",
             column(jc)
           })
 
-#' reverse
-#'
-#' Reverses the string column and returns it as a new string column.
-#'
-#' @param x Column to compute on.
+#' @details
+#' \code{reverse}: Reverses the string column and returns it as a new string column.
 #'
-#' @rdname reverse
-#' @name reverse
-#' @family string functions
-#' @aliases reverse,Column-method
+#' @rdname column_string_functions
+#' @aliases reverse reverse,Column-method
 #' @export
-#' @examples \dontrun{reverse(df$c)}
 #' @note reverse since 1.5.0
 setMethod("reverse",
           signature(x = "Column"),
@@ -1268,18 +1269,12 @@ setMethod("bround",
             column(jc)
           })
 
-#' rtrim
-#'
-#' Trim the spaces from right end for the specified string value.
-#'
-#' @param x Column to compute on.
+#' @details
+#' \code{rtrim}: Trims the spaces from right end for the specified string value.
 #'
-#' @rdname rtrim
-#' @name rtrim
-#' @family string functions
-#' @aliases rtrim,Column-method
+#' @rdname column_string_functions
+#' @aliases rtrim rtrim,Column-method
 #' @export
-#' @examples \dontrun{rtrim(df$c)}
 #' @note rtrim since 1.5.0
 setMethod("rtrim",
           signature(x = "Column"),
@@ -1409,18 +1404,12 @@ setMethod("skewness",
             column(jc)
           })
 
-#' soundex
-#'
-#' Return the soundex code for the specified expression.
-#'
-#' @param x Column to compute on.
+#' @details
+#' \code{soundex}: Returns the soundex code for the specified expression.
 #'
-#' @rdname soundex
-#' @name soundex
-#' @family string functions
-#' @aliases soundex,Column-method
+#' @rdname column_string_functions
+#' @aliases soundex soundex,Column-method
 #' @export
-#' @examples \dontrun{soundex(df$c)}
 #' @note soundex since 1.5.0
 setMethod("soundex",
           signature(x = "Column"),
@@ -1731,18 +1720,12 @@ setMethod("to_timestamp",
             column(jc)
           })
 
-#' trim
-#'
-#' Trim the spaces from both ends for the specified string column.
-#'
-#' @param x Column to compute on.
+#' @details
+#' \code{trim}: Trims the spaces from both ends for the specified string column.
 #'
-#' @rdname trim
-#' @name trim
-#' @family string functions
-#' @aliases trim,Column-method
+#' @rdname column_string_functions
+#' @aliases trim trim,Column-method
 #' @export
-#' @examples \dontrun{trim(df$c)}
 #' @note trim since 1.5.0
 setMethod("trim",
           signature(x = "Column"),
@@ -1751,19 +1734,13 @@ setMethod("trim",
             column(jc)
           })
 
-#' unbase64
-#'
-#' Decodes a BASE64 encoded string column and returns it as a binary column.
+#' @details
+#' \code{unbase64}: Decodes a BASE64 encoded string column and returns it as a binary column.
 #' This is the reverse of base64.
 #'
-#' @param x Column to compute on.
-#'
-#' @rdname unbase64
-#' @name unbase64
-#' @family string functions
-#' @aliases unbase64,Column-method
+#' @rdname column_string_functions
+#' @aliases unbase64 unbase64,Column-method
 #' @export
-#' @examples \dontrun{unbase64(df$c)}
 #' @note unbase64 since 1.5.0
 setMethod("unbase64",
           signature(x = "Column"),
@@ -1787,18 +1764,12 @@ setMethod("unhex",
             column(jc)
           })
 
-#' upper
-#'
-#' Converts a string column to upper case.
-#'
-#' @param x Column to compute on.
+#' @details
+#' \code{upper}: Converts a string column to upper case.
 #'
-#' @rdname upper
-#' @name upper
-#' @family string functions
-#' @aliases upper,Column-method
+#' @rdname column_string_functions
+#' @aliases upper upper,Column-method
 #' @export
-#' @examples \dontrun{upper(df$c)}
 #' @note upper since 1.4.0
 setMethod("upper",
           signature(x = "Column"),
@@ -1949,19 +1920,19 @@ setMethod("hypot", signature(y = "Column"),
             column(jc)
           })
 
-#' levenshtein
-#'
-#' Computes the Levenshtein distance of the two given string columns.
-#'
-#' @param x Column to compute on.
-#' @param y Column to compute on.
+#' @details
+#' \code{levenshtein}: Computes the Levenshtein distance of the two given string columns.
 #'
-#' @rdname levenshtein
-#' @name levenshtein
-#' @family string functions
-#' @aliases levenshtein,Column-method
+#' @rdname column_string_functions
+#' @aliases levenshtein levenshtein,Column-method
 #' @export
-#' @examples \dontrun{levenshtein(df$c, x)}
+#' @examples
+#'
+#' \dontrun{
+#' tmp <- mutate(df, d1 = levenshtein(df$Class, df$Sex),
+#'                   d2 = levenshtein(df$Age, df$Sex),
+#'                   d3 = levenshtein(df$Age, df$Age))
+#' head(tmp)}
 #' @note levenshtein since 1.5.0
 setMethod("levenshtein", signature(y = "Column"),
           function(y, x) {
@@ -2061,20 +2032,22 @@ setMethod("countDistinct",
             column(jc)
           })
 
-
-#' concat
-#'
-#' Concatenates multiple input string columns together into a single string column.
-#'
-#' @param x Column to compute on
-#' @param ... other columns
+#' @details
+#' \code{concat}: Concatenates multiple input string columns together into a single string column.
 #'
-#' @family string functions
-#' @rdname concat
-#' @name concat
-#' @aliases concat,Column-method
+#' @rdname column_string_functions
+#' @aliases concat concat,Column-method
 #' @export
-#' @examples \dontrun{concat(df$strings, df$strings2)}
+#' @examples
+#'
+#' \dontrun{
+#' # concatenate strings
+#' tmp <- mutate(df, s1 = concat(df$Class, df$Sex),
+#'                   s2 = concat(df$Class, df$Sex, df$Age),
+#'                   s3 = concat(df$Class, df$Sex, df$Age, df$Class),
+#'                   s4 = concat_ws("_", df$Class, df$Sex),
+#'                   s5 = concat_ws("+", df$Class, df$Sex, df$Age, df$Survived))
+#' head(tmp)}
 #' @note concat since 1.5.0
 setMethod("concat",
           signature(x = "Column"),
@@ -2243,22 +2216,21 @@ setMethod("from_utc_timestamp", signature(y = "Column", x = "character"),
             column(jc)
           })
 
-#' instr
-#'
-#' Locate the position of the first occurrence of substr column in the given string.
-#' Returns null if either of the arguments are null.
-#'
-#' Note: The position is not zero based, but 1 based index. Returns 0 if substr
-#' could not be found in str.
+#' @details
+#' \code{instr}: Locates the position of the first occurrence of a substring (\code{x})
+#' in the given string column (\code{y}). Returns null if either of the arguments are null.
+#' Note: The position is not zero based, but 1 based index. Returns 0 if the substring
+#' could not be found in the string column.
 #'
-#' @param y column to check
-#' @param x substring to check
-#' @family string functions
-#' @aliases instr,Column,character-method
-#' @rdname instr
-#' @name instr
+#' @rdname column_string_functions
+#' @aliases instr instr,Column,character-method
 #' @export
-#' @examples \dontrun{instr(df$c, 'b')}
+#' @examples
+#'
+#' \dontrun{
+#' tmp <- mutate(df, s1 = instr(df$Sex, "m"), s2 = instr(df$Sex, "M"),
+#'                   s3 = locate("m", df$Sex), s4 = locate("m", df$Sex, pos = 4))
+#' head(tmp)}
 #' @note instr since 1.5.0
 setMethod("instr", signature(y = "Column", x = "character"),
           function(y, x) {
@@ -2345,22 +2317,22 @@ setMethod("date_sub", signature(y = "Column", x = "numeric"),
             column(jc)
           })
 
-#' format_number
-#'
-#' Formats numeric column y to a format like '#,###,###.##', rounded to x decimal places
-#' with HALF_EVEN round mode, and returns the result as a string column.
-#'
-#' If x is 0, the result has no decimal point or fractional part.
-#' If x < 0, the result will be null.
+#' @details
+#' \code{format_number}: Formats numeric column \code{y} to a format like '#,###,###.##',
+#' rounded to \code{x} decimal places with HALF_EVEN round mode, and returns the result
+#' as a string column.
+#' If \code{x} is 0, the result has no decimal point or fractional part.
+#' If \code{x} < 0, the result will be null.
 #'
-#' @param y column to format
-#' @param x number of decimal place to format to
-#' @family string functions
-#' @rdname format_number
-#' @name format_number
-#' @aliases format_number,Column,numeric-method
+#' @rdname column_string_functions
+#' @aliases format_number format_number,Column,numeric-method
 #' @export
-#' @examples \dontrun{format_number(df$n, 4)}
+#' @examples
+#'
+#' \dontrun{
+#' tmp <- mutate(df, v1 = df$Freq/3)
+#' head(select(tmp, format_number(tmp$v1, 0), format_number(tmp$v1, 2),
+#'                  format_string("%4.2f %s", tmp$v1, tmp$Sex)), 10)}
 #' @note format_number since 1.5.0
 setMethod("format_number", signature(y = "Column", x = "numeric"),
           function(y, x) {
@@ -2438,21 +2410,14 @@ setMethod("shiftRightUnsigned", signature(y = "Column", x = "numeric"),
             column(jc)
           })
 
-#' concat_ws
-#'
-#' Concatenates multiple input string columns together into a single string column,
-#' using the given separator.
+#' @details
+#' \code{concat_ws}: Concatenates multiple input string columns together into a single
+#' string column, using the given separator.
 #'
-#' @param x column to concatenate.
 #' @param sep separator to use.
-#' @param ... other columns to concatenate.
-#'
-#' @family string functions
-#' @rdname concat_ws
-#' @name concat_ws
-#' @aliases concat_ws,character,Column-method
+#' @rdname column_string_functions
+#' @aliases concat_ws concat_ws,character,Column-method
 #' @export
-#' @examples \dontrun{concat_ws('-', df$s, df$d)}
 #' @note concat_ws since 1.5.0
 setMethod("concat_ws", signature(sep = "character", x = "Column"),
           function(sep, x, ...) {
@@ -2499,19 +2464,14 @@ setMethod("expr", signature(x = "character"),
             column(jc)
           })
 
-#' format_string
-#'
-#' Formats the arguments in printf-style and returns the result as a string column.
+#' @details
+#' \code{format_string}: Formats the arguments in printf-style and returns the result
+#' as a string column.
 #'
 #' @param format a character object of format strings.
-#' @param x a Column.
-#' @param ... additional Column(s).
-#' @family string functions
-#' @rdname format_string
-#' @name format_string
-#' @aliases format_string,character,Column-method
+#' @rdname column_string_functions
+#' @aliases format_string format_string,character,Column-method
 #' @export
-#' @examples \dontrun{format_string('%d %s', df$a, df$b)}
 #' @note format_string since 1.5.0
 setMethod("format_string", signature(format = "character", x = "Column"),
           function(format, x, ...) {
@@ -2620,23 +2580,17 @@ setMethod("window", signature(x = "Column"),
             column(jc)
           })
 
-#' locate
-#'
-#' Locate the position of the first occurrence of substr.
-#'
+#' @details
+#' \code{locate}: Locates the position of the first occurrence of substr.
 #' Note: The position is not zero based, but 1 based index. Returns 0 if substr
 #' could not be found in str.
 #'
 #' @param substr a character string to be matched.
 #' @param str a Column where matches are sought for each entry.
 #' @param pos start position of search.
-#' @param ... further arguments to be passed to or from other methods.
-#' @family string functions
-#' @rdname locate
-#' @aliases locate,character,Column-method
-#' @name locate
+#' @rdname column_string_functions
+#' @aliases locate locate,character,Column-method
 #' @export
-#' @examples \dontrun{locate('b', df$c, 1)}
 #' @note locate since 1.5.0
 setMethod("locate", signature(substr = "character", str = "Column"),
           function(substr, str, pos = 1) {
@@ -2646,19 +2600,14 @@ setMethod("locate", signature(substr = "character", str = "Column"),
             column(jc)
           })
 
-#' lpad
-#'
-#' Left-pad the string column with
+#' @details
+#' \code{lpad}: Left-padded with pad to a length of len.
 #'
-#' @param x the string Column to be left-padded.
 #' @param len maximum length of each output result.
 #' @param pad a character string to be padded with.
-#' @family string functions
-#' @rdname lpad
-#' @aliases lpad,Column,numeric,character-method
-#' @name lpad
+#' @rdname column_string_functions
+#' @aliases lpad lpad,Column,numeric,character-method
 #' @export
-#' @examples \dontrun{lpad(df$c, 6, '#')}
 #' @note lpad since 1.5.0
 setMethod("lpad", signature(x = "Column", len = "numeric", pad = "character"),
           function(x, len, pad) {
@@ -2728,20 +2677,27 @@ setMethod("randn", signature(seed = "numeric"),
             column(jc)
           })
 
-#' regexp_extract
-#'
-#' Extract a specific \code{idx} group identified by a Java regex, from the specified string column.
-#' If the regex did not match, or the specified group did not match, an empty string is returned.
+#' @details
+#' \code{regexp_extract}: Extracts a specific \code{idx} group identified by a Java regex,
+#' from the specified string column. If the regex did not match, or the specified group did
+#' not match, an empty string is returned.
 #'
-#' @param x a string Column.
 #' @param pattern a regular expression.
 #' @param idx a group index.
-#' @family string functions
-#' @rdname regexp_extract
-#' @name regexp_extract
-#' @aliases regexp_extract,Column,character,numeric-method
+#' @rdname column_string_functions
+#' @aliases regexp_extract regexp_extract,Column,character,numeric-method
 #' @export
-#' @examples \dontrun{regexp_extract(df$c, '(\d+)-(\d+)', 1)}
+#' @examples
+#'
+#' \dontrun{
+#' tmp <- mutate(df, s1 = regexp_extract(df$Class, "(\\d+)\\w+", 1),
+#'                   s2 = regexp_extract(df$Sex, "^(\\w)\\w+", 1),
+#'                   s3 = regexp_replace(df$Class, "\\D+", ""),
+#'                   s4 = substring_index(df$Sex, "a", 1),
+#'                   s5 = substring_index(df$Sex, "a", -1),
+#'                   s6 = translate(df$Sex, "ale", ""),
+#'                   s7 = translate(df$Sex, "a", "-"))
+#' head(tmp)}
 #' @note regexp_extract since 1.5.0
 setMethod("regexp_extract",
           signature(x = "Column", pattern = "character", idx = "numeric"),
@@ -2752,19 +2708,14 @@ setMethod("regexp_extract",
             column(jc)
           })
 
-#' regexp_replace
-#'
-#' Replace all substrings of the specified string value that match regexp with rep.
+#' @details
+#' \code{regexp_replace}: Replaces all substrings of the specified string value that
+#' match regexp with rep.
 #'
-#' @param x a string Column.
-#' @param pattern a regular expression.
 #' @param replacement a character string that a matched \code{pattern} is replaced with.
-#' @family string functions
-#' @rdname regexp_replace
-#' @name regexp_replace
-#' @aliases regexp_replace,Column,character,character-method
+#' @rdname column_string_functions
+#' @aliases regexp_replace regexp_replace,Column,character,character-method
 #' @export
-#' @examples \dontrun{regexp_replace(df$c, '(\\d+)', '--')}
 #' @note regexp_replace since 1.5.0
 setMethod("regexp_replace",
           signature(x = "Column", pattern = "character", replacement = "character"),
@@ -2775,19 +2726,12 @@ setMethod("regexp_replace",
             column(jc)
           })
 
-#' rpad
-#'
-#' Right-padded with pad to a length of len.
+#' @details
+#' \code{rpad}: Right-padded with pad to a length of len.
 #'
-#' @param x the string Column to be right-padded.
-#' @param len maximum length of each output result.
-#' @param pad a character string to be padded with.
-#' @family string functions
-#' @rdname rpad
-#' @name rpad
-#' @aliases rpad,Column,numeric,character-method
+#' @rdname column_string_functions
+#' @aliases rpad rpad,Column,numeric,character-method
 #' @export
-#' @examples \dontrun{rpad(df$c, 6, '#')}
 #' @note rpad since 1.5.0
 setMethod("rpad", signature(x = "Column", len = "numeric", pad = "character"),
           function(x, len, pad) {
@@ -2797,28 +2741,20 @@ setMethod("rpad", signature(x = "Column", len = "numeric", pad = "character"),
             column(jc)
           })
 
-#' substring_index
-#'
-#' Returns the substring from string str before count occurrences of the delimiter delim.
-#' If count is positive, everything the left of the final delimiter (counting from left) is
-#' returned. If count is negative, every to the right of the final delimiter (counting from the
-#' right) is returned. substring_index performs a case-sensitive match when searching for delim.
+#' @details
+#' \code{substring_index}: Returns the substring from string str before count occurrences of
+#' the delimiter delim. If count is positive, everything the left of the final delimiter
+#' (counting from left) is returned. If count is negative, every to the right of the final
+#' delimiter (counting from the right) is returned. substring_index performs a case-sensitive
+#' match when searching for delim.
 #'
-#' @param x a Column.
 #' @param delim a delimiter string.
 #' @param count number of occurrences of \code{delim} before the substring is returned.
 #'              A positive number means counting from the left, while negative means
 #'              counting from the right.
-#' @family string functions
-#' @rdname substring_index
-#' @aliases substring_index,Column,character,numeric-method
-#' @name substring_index
+#' @rdname column_string_functions
+#' @aliases substring_index substring_index,Column,character,numeric-method
 #' @export
-#' @examples
-#'\dontrun{
-#'substring_index(df$c, '.', 2)
-#'substring_index(df$c, '.', -1)
-#'}
 #' @note substring_index since 1.5.0
 setMethod("substring_index",
           signature(x = "Column", delim = "character", count = "numeric"),
@@ -2829,24 +2765,19 @@ setMethod("substring_index",
             column(jc)
           })
 
-#' translate
-#'
-#' Translate any character in the src by a character in replaceString.
+#' @details
+#' \code{translate}: Translates any character in the src by a character in replaceString.
 #' The characters in replaceString is corresponding to the characters in matchingString.
 #' The translate will happen when any character in the string matching with the character
 #' in the matchingString.
 #'
-#' @param x a string Column.
 #' @param matchingString a source string where each character will be translated.
 #' @param replaceString a target string where each \code{matchingString} character will
 #'                      be replaced by the character in \code{replaceString}
 #'                      at the same location, if any.
-#' @family string functions
-#' @rdname translate
-#' @name translate
-#' @aliases translate,Column,character,character-method
+#' @rdname column_string_functions
+#' @aliases translate translate,Column,character,character-method
 #' @export
-#' @examples \dontrun{translate(df$c, 'rnlt', '123')}
 #' @note translate since 1.5.0
 setMethod("translate",
           signature(x = "Column", matchingString = "character", replaceString = "character"),
@@ -3419,28 +3350,20 @@ setMethod("collect_set",
             column(jc)
           })
 
-#' split_string
-#'
-#' Splits string on regular expression.
-#'
-#' Equivalent to \code{split} SQL function
-#'
-#' @param x Column to compute on
-#' @param pattern Java regular expression
+#' @details
+#' \code{split_string}: Splits string on regular expression.
+#' Equivalent to \code{split} SQL function.
 #'
-#' @rdname split_string
-#' @family string functions
-#' @aliases split_string,Column-method
+#' @rdname column_string_functions
+#' @aliases split_string split_string,Column-method
 #' @export
 #' @examples
-#' \dontrun{
-#' df <- read.text("README.md")
-#'
-#' head(select(df, split_string(df$value, "\\s+")))
 #'
+#' \dontrun{
+#' head(select(df, split_string(df$Sex, "a")))
+#' head(select(df, split_string(df$Class, "\\d")))
 #' # This is equivalent to the following SQL expression
-#' head(selectExpr(df, "split(value, '\\\\s+')"))
-#' }
+#' head(selectExpr(df, "split(Class, '\\\\d')"))}
 #' @note split_string 2.3.0
 setMethod("split_string",
           signature(x = "Column", pattern = "character"),
@@ -3449,28 +3372,20 @@ setMethod("split_string",
             column(jc)
           })
 
-#' repeat_string
-#'
-#' Repeats string n times.
-#'
-#' Equivalent to \code{repeat} SQL function
+#' @details
+#' \code{repeat_string}: Repeats string n times.
+#' Equivalent to \code{repeat} SQL function.
 #'
-#' @param x Column to compute on
 #' @param n Number of repetitions
-#'
-#' @rdname repeat_string
-#' @family string functions
-#' @aliases repeat_string,Column-method
+#' @rdname column_string_functions
+#' @aliases repeat_string repeat_string,Column-method
 #' @export
 #' @examples
-#' \dontrun{
-#' df <- read.text("README.md")
-#'
-#' first(select(df, repeat_string(df$value, 3)))
 #'
+#' \dontrun{
+#' head(select(df, repeat_string(df$Class, 3)))
 #' # This is equivalent to the following SQL expression
-#' first(selectExpr(df, "repeat(value, 3)"))
-#' }
+#' head(selectExpr(df, "repeat(Class, 3)"))}
 #' @note repeat_string since 2.3.0
 setMethod("repeat_string",
           signature(x = "Column", n = "numeric"),

http://git-wip-us.apache.org/repos/asf/spark/blob/376d90d5/R/pkg/R/generics.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index 0248ec5..dc99e3d 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -917,8 +917,9 @@ setGeneric("approxCountDistinct", function(x, ...) { standardGeneric("approxCoun
 #' @export
 setGeneric("array_contains", function(x, value) { standardGeneric("array_contains") })
 
-#' @rdname ascii
+#' @rdname column_string_functions
 #' @export
+#' @name NULL
 setGeneric("ascii", function(x) { standardGeneric("ascii") })
 
 #' @param x Column to compute on or a GroupedData object.
@@ -927,8 +928,9 @@ setGeneric("ascii", function(x) { standardGeneric("ascii") })
 #' @export
 setGeneric("avg", function(x, ...) { standardGeneric("avg") })
 
-#' @rdname base64
+#' @rdname column_string_functions
 #' @export
+#' @name NULL
 setGeneric("base64", function(x) { standardGeneric("base64") })
 
 #' @rdname column_math_functions
@@ -969,12 +971,14 @@ setGeneric("collect_set", function(x) { standardGeneric("collect_set") })
 #' @export
 setGeneric("column", function(x) { standardGeneric("column") })
 
-#' @rdname concat
+#' @rdname column_string_functions
 #' @export
+#' @name NULL
 setGeneric("concat", function(x, ...) { standardGeneric("concat") })
 
-#' @rdname concat_ws
+#' @rdname column_string_functions
 #' @export
+#' @name NULL
 setGeneric("concat_ws", function(sep, x, ...) { standardGeneric("concat_ws") })
 
 #' @rdname column_math_functions
@@ -1038,8 +1042,9 @@ setGeneric("dayofmonth", function(x) { standardGeneric("dayofmonth") })
 #' @name NULL
 setGeneric("dayofyear", function(x) { standardGeneric("dayofyear") })
 
-#' @rdname decode
+#' @rdname column_string_functions
 #' @export
+#' @name NULL
 setGeneric("decode", function(x, charset) { standardGeneric("decode") })
 
 #' @param x empty. Should be used with no argument.
@@ -1047,8 +1052,9 @@ setGeneric("decode", function(x, charset) { standardGeneric("decode") })
 #' @export
 setGeneric("dense_rank", function(x = "missing") { standardGeneric("dense_rank") })
 
-#' @rdname encode
+#' @rdname column_string_functions
 #' @export
+#' @name NULL
 setGeneric("encode", function(x, charset) { standardGeneric("encode") })
 
 #' @rdname explode
@@ -1068,12 +1074,14 @@ setGeneric("expr", function(x) { standardGeneric("expr") })
 #' @name NULL
 setGeneric("from_utc_timestamp", function(y, x) { standardGeneric("from_utc_timestamp") })
 
-#' @rdname format_number
+#' @rdname column_string_functions
 #' @export
+#' @name NULL
 setGeneric("format_number", function(y, x) { standardGeneric("format_number") })
 
-#' @rdname format_string
+#' @rdname column_string_functions
 #' @export
+#' @name NULL
 setGeneric("format_string", function(format, x, ...) { standardGeneric("format_string") })
 
 #' @rdname from_json
@@ -1114,8 +1122,9 @@ setGeneric("hour", function(x) { standardGeneric("hour") })
 #' @name NULL
 setGeneric("hypot", function(y, x) { standardGeneric("hypot") })
 
-#' @rdname initcap
+#' @rdname column_string_functions
 #' @export
+#' @name NULL
 setGeneric("initcap", function(x) { standardGeneric("initcap") })
 
 #' @param x empty. Should be used with no argument.
@@ -1124,8 +1133,9 @@ setGeneric("initcap", function(x) { standardGeneric("initcap") })
 setGeneric("input_file_name",
            function(x = "missing") { standardGeneric("input_file_name") })
 
-#' @rdname instr
+#' @rdname column_string_functions
 #' @export
+#' @name NULL
 setGeneric("instr", function(y, x) { standardGeneric("instr") })
 
 #' @rdname is.nan
@@ -1158,28 +1168,33 @@ setGeneric("lead", function(x, offset, defaultValue = NULL) { standardGeneric("l
 #' @export
 setGeneric("least", function(x, ...) { standardGeneric("least") })
 
-#' @rdname levenshtein
+#' @rdname column_string_functions
 #' @export
+#' @name NULL
 setGeneric("levenshtein", function(y, x) { standardGeneric("levenshtein") })
 
 #' @rdname lit
 #' @export
 setGeneric("lit", function(x) { standardGeneric("lit") })
 
-#' @rdname locate
+#' @rdname column_string_functions
 #' @export
+#' @name NULL
 setGeneric("locate", function(substr, str, ...) { standardGeneric("locate") })
 
-#' @rdname lower
+#' @rdname column_string_functions
 #' @export
+#' @name NULL
 setGeneric("lower", function(x) { standardGeneric("lower") })
 
-#' @rdname lpad
+#' @rdname column_string_functions
 #' @export
+#' @name NULL
 setGeneric("lpad", function(x, len, pad) { standardGeneric("lpad") })
 
-#' @rdname ltrim
+#' @rdname column_string_functions
 #' @export
+#' @name NULL
 setGeneric("ltrim", function(x) { standardGeneric("ltrim") })
 
 #' @rdname md5
@@ -1272,21 +1287,25 @@ setGeneric("randn", function(seed) { standardGeneric("randn") })
 #' @export
 setGeneric("rank", function(x, ...) { standardGeneric("rank") })
 
-#' @rdname regexp_extract
+#' @rdname column_string_functions
 #' @export
+#' @name NULL
 setGeneric("regexp_extract", function(x, pattern, idx) { standardGeneric("regexp_extract") })
 
-#' @rdname regexp_replace
+#' @rdname column_string_functions
 #' @export
+#' @name NULL
 setGeneric("regexp_replace",
            function(x, pattern, replacement) { standardGeneric("regexp_replace") })
 
-#' @rdname repeat_string
+#' @rdname column_string_functions
 #' @export
+#' @name NULL
 setGeneric("repeat_string", function(x, n) { standardGeneric("repeat_string") })
 
-#' @rdname reverse
+#' @rdname column_string_functions
 #' @export
+#' @name NULL
 setGeneric("reverse", function(x) { standardGeneric("reverse") })
 
 #' @rdname column_math_functions
@@ -1299,12 +1318,14 @@ setGeneric("rint", function(x) { standardGeneric("rint") })
 #' @export
 setGeneric("row_number", function(x = "missing") { standardGeneric("row_number") })
 
-#' @rdname rpad
+#' @rdname column_string_functions
 #' @export
+#' @name NULL
 setGeneric("rpad", function(x, len, pad) { standardGeneric("rpad") })
 
-#' @rdname rtrim
+#' @rdname column_string_functions
 #' @export
+#' @name NULL
 setGeneric("rtrim", function(x) { standardGeneric("rtrim") })
 
 #' @rdname column_aggregate_functions
@@ -1358,12 +1379,14 @@ setGeneric("skewness", function(x) { standardGeneric("skewness") })
 #' @export
 setGeneric("sort_array", function(x, asc = TRUE) { standardGeneric("sort_array") })
 
-#' @rdname split_string
+#' @rdname column_string_functions
 #' @export
+#' @name NULL
 setGeneric("split_string", function(x, pattern) { standardGeneric("split_string") })
 
-#' @rdname soundex
+#' @rdname column_string_functions
 #' @export
+#' @name NULL
 setGeneric("soundex", function(x) { standardGeneric("soundex") })
 
 #' @param x empty. Should be used with no argument.
@@ -1390,8 +1413,9 @@ setGeneric("stddev_samp", function(x) { standardGeneric("stddev_samp") })
 #' @export
 setGeneric("struct", function(x, ...) { standardGeneric("struct") })
 
-#' @rdname substring_index
+#' @rdname column_string_functions
 #' @export
+#' @name NULL
 setGeneric("substring_index", function(x, delim, count) { standardGeneric("substring_index") })
 
 #' @rdname column_aggregate_functions
@@ -1428,16 +1452,19 @@ setGeneric("to_timestamp", function(x, format) { standardGeneric("to_timestamp")
 #' @name NULL
 setGeneric("to_utc_timestamp", function(y, x) { standardGeneric("to_utc_timestamp") })
 
-#' @rdname translate
+#' @rdname column_string_functions
 #' @export
+#' @name NULL
 setGeneric("translate", function(x, matchingString, replaceString) { standardGeneric("translate") })
 
-#' @rdname trim
+#' @rdname column_string_functions
 #' @export
+#' @name NULL
 setGeneric("trim", function(x) { standardGeneric("trim") })
 
-#' @rdname unbase64
+#' @rdname column_string_functions
 #' @export
+#' @name NULL
 setGeneric("unbase64", function(x) { standardGeneric("unbase64") })
 
 #' @rdname column_math_functions
@@ -1450,8 +1477,9 @@ setGeneric("unhex", function(x) { standardGeneric("unhex") })
 #' @name NULL
 setGeneric("unix_timestamp", function(x, format) { standardGeneric("unix_timestamp") })
 
-#' @rdname upper
+#' @rdname column_string_functions
 #' @export
+#' @name NULL
 setGeneric("upper", function(x) { standardGeneric("upper") })
 
 #' @rdname column_aggregate_functions


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org