You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by fe...@apache.org on 2016/08/24 23:00:10 UTC

spark git commit: [SPARKR][MINOR] Add more examples to window function docs

Repository: spark
Updated Branches:
  refs/heads/master 945c04bcd -> 18708f76c


[SPARKR][MINOR] Add more examples to window function docs

## What changes were proposed in this pull request?

This PR adds more examples to window function docs to make them more accessible to the users.

It also fixes default value issues for `lag` and `lead`.

## How was this patch tested?

Manual test, R unit test.

Author: Junyang Qian <ju...@databricks.com>

Closes #14779 from junyangq/SPARKR-FixWindowFunctionDocs.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/18708f76
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/18708f76
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/18708f76

Branch: refs/heads/master
Commit: 18708f76c366c6e01b5865981666e40d8642ac20
Parents: 945c04b
Author: Junyang Qian <ju...@databricks.com>
Authored: Wed Aug 24 16:00:04 2016 -0700
Committer: Felix Cheung <fe...@apache.org>
Committed: Wed Aug 24 16:00:04 2016 -0700

----------------------------------------------------------------------
 R/pkg/R/WindowSpec.R | 12 ++++++++
 R/pkg/R/functions.R  | 78 ++++++++++++++++++++++++++++++++++++-----------
 2 files changed, 72 insertions(+), 18 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/18708f76/R/pkg/R/WindowSpec.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/WindowSpec.R b/R/pkg/R/WindowSpec.R
index ddd2ef2..4ac83c2 100644
--- a/R/pkg/R/WindowSpec.R
+++ b/R/pkg/R/WindowSpec.R
@@ -203,6 +203,18 @@ setMethod("rangeBetween",
 #' @aliases over,Column,WindowSpec-method
 #' @family colum_func
 #' @export
+#' @examples \dontrun{
+#'   df <- createDataFrame(mtcars)
+#'
+#'   # Partition by am (transmission) and order by hp (horsepower)
+#'   ws <- orderBy(windowPartitionBy("am"), "hp")
+#'
+#'   # Rank on hp within each partition
+#'   out <- select(df, over(rank(), ws), df$hp, df$am)
+#'
+#'   # Lag mpg values by 1 row on the partition-and-ordered table
+#'   out <- select(df, over(lead(df$mpg), ws), df$mpg, df$hp, df$am)
+#' }
 #' @note over since 2.0.0
 setMethod("over",
           signature(x = "Column", window = "WindowSpec"),

http://git-wip-us.apache.org/repos/asf/spark/blob/18708f76/R/pkg/R/functions.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index f042add..dbf8dd8 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -3121,9 +3121,9 @@ setMethod("ifelse",
 #' @aliases cume_dist,missing-method
 #' @export
 #' @examples \dontrun{
-#'   df <- createDataFrame(iris)
-#'   ws <- orderBy(windowPartitionBy("Species"), "Sepal_Length")
-#'   out <- select(df, over(cume_dist(), ws), df$Sepal_Length, df$Species)
+#'   df <- createDataFrame(mtcars)
+#'   ws <- orderBy(windowPartitionBy("am"), "hp")
+#'   out <- select(df, over(cume_dist(), ws), df$hp, df$am)
 #' }
 #' @note cume_dist since 1.6.0
 setMethod("cume_dist",
@@ -3148,7 +3148,11 @@ setMethod("cume_dist",
 #' @family window_funcs
 #' @aliases dense_rank,missing-method
 #' @export
-#' @examples \dontrun{dense_rank()}
+#' @examples \dontrun{
+#'   df <- createDataFrame(mtcars)
+#'   ws <- orderBy(windowPartitionBy("am"), "hp")
+#'   out <- select(df, over(dense_rank(), ws), df$hp, df$am)
+#' }
 #' @note dense_rank since 1.6.0
 setMethod("dense_rank",
           signature("missing"),
@@ -3168,18 +3172,26 @@ setMethod("dense_rank",
 #' @param x the column as a character string or a Column to compute on.
 #' @param offset the number of rows back from the current row from which to obtain a value.
 #'               If not specified, the default is 1.
-#' @param defaultValue default to use when the offset row does not exist.
+#' @param defaultValue (optional) default to use when the offset row does not exist.
 #' @param ... further arguments to be passed to or from other methods.
 #' @rdname lag
 #' @name lag
 #' @aliases lag,characterOrColumn-method
 #' @family window_funcs
 #' @export
-#' @examples \dontrun{lag(df$c)}
+#' @examples \dontrun{
+#'   df <- createDataFrame(mtcars)
+#'
+#'   # Partition by am (transmission) and order by hp (horsepower)
+#'   ws <- orderBy(windowPartitionBy("am"), "hp")
+#'
+#'   # Lag mpg values by 1 row on the partition-and-ordered table
+#'   out <- select(df, over(lag(df$mpg), ws), df$mpg, df$hp, df$am)
+#' }
 #' @note lag since 1.6.0
 setMethod("lag",
           signature(x = "characterOrColumn"),
-          function(x, offset, defaultValue = NULL) {
+          function(x, offset = 1, defaultValue = NULL) {
             col <- if (class(x) == "Column") {
               x@jc
             } else {
@@ -3194,25 +3206,35 @@ setMethod("lag",
 #' lead
 #'
 #' Window function: returns the value that is \code{offset} rows after the current row, and
-#' NULL if there is less than \code{offset} rows after the current row. For example,
-#' an \code{offset} of one will return the next row at any given point in the window partition.
+#' \code{defaultValue} if there is less than \code{offset} rows after the current row.
+#' For example, an \code{offset} of one will return the next row at any given point
+#' in the window partition.
 #'
 #' This is equivalent to the \code{LEAD} function in SQL.
 #'
-#' @param x Column to compute on
-#' @param offset Number of rows to offset
-#' @param defaultValue (Optional) default value to use
+#' @param x the column as a character string or a Column to compute on.
+#' @param offset the number of rows after the current row from which to obtain a value.
+#'               If not specified, the default is 1.
+#' @param defaultValue (optional) default to use when the offset row does not exist.
 #'
 #' @rdname lead
 #' @name lead
 #' @family window_funcs
 #' @aliases lead,characterOrColumn,numeric-method
 #' @export
-#' @examples \dontrun{lead(df$c)}
+#' @examples \dontrun{
+#'   df <- createDataFrame(mtcars)
+#'
+#'   # Partition by am (transmission) and order by hp (horsepower)
+#'   ws <- orderBy(windowPartitionBy("am"), "hp")
+#'
+#'   # Lead mpg values by 1 row on the partition-and-ordered table
+#'   out <- select(df, over(lead(df$mpg), ws), df$mpg, df$hp, df$am)
+#' }
 #' @note lead since 1.6.0
 setMethod("lead",
           signature(x = "characterOrColumn", offset = "numeric", defaultValue = "ANY"),
-          function(x, offset, defaultValue = NULL) {
+          function(x, offset = 1, defaultValue = NULL) {
             col <- if (class(x) == "Column") {
               x@jc
             } else {
@@ -3239,7 +3261,15 @@ setMethod("lead",
 #' @aliases ntile,numeric-method
 #' @family window_funcs
 #' @export
-#' @examples \dontrun{ntile(1)}
+#' @examples \dontrun{
+#'   df <- createDataFrame(mtcars)
+#'
+#'   # Partition by am (transmission) and order by hp (horsepower)
+#'   ws <- orderBy(windowPartitionBy("am"), "hp")
+#'
+#'   # Get ntile group id (1-4) for hp
+#'   out <- select(df, over(ntile(4), ws), df$hp, df$am)
+#' }
 #' @note ntile since 1.6.0
 setMethod("ntile",
           signature(x = "numeric"),
@@ -3263,7 +3293,11 @@ setMethod("ntile",
 #' @family window_funcs
 #' @aliases percent_rank,missing-method
 #' @export
-#' @examples \dontrun{percent_rank()}
+#' @examples \dontrun{
+#'   df <- createDataFrame(mtcars)
+#'   ws <- orderBy(windowPartitionBy("am"), "hp")
+#'   out <- select(df, over(percent_rank(), ws), df$hp, df$am)
+#' }
 #' @note percent_rank since 1.6.0
 setMethod("percent_rank",
           signature("missing"),
@@ -3288,7 +3322,11 @@ setMethod("percent_rank",
 #' @family window_funcs
 #' @aliases rank,missing-method
 #' @export
-#' @examples \dontrun{rank()}
+#' @examples \dontrun{
+#'   df <- createDataFrame(mtcars)
+#'   ws <- orderBy(windowPartitionBy("am"), "hp")
+#'   out <- select(df, over(rank(), ws), df$hp, df$am)
+#' }
 #' @note rank since 1.6.0
 setMethod("rank",
           signature(x = "missing"),
@@ -3321,7 +3359,11 @@ setMethod("rank",
 #' @aliases row_number,missing-method
 #' @family window_funcs
 #' @export
-#' @examples \dontrun{row_number()}
+#' @examples \dontrun{
+#'   df <- createDataFrame(mtcars)
+#'   ws <- orderBy(windowPartitionBy("am"), "hp")
+#'   out <- select(df, over(row_number(), ws), df$hp, df$am)
+#' }
 #' @note row_number since 1.6.0
 setMethod("row_number",
           signature("missing"),


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org