You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by fe...@apache.org on 2017/04/21 19:06:25 UTC
spark git commit: [SPARK-20371][R] Add wrappers for collect_list and
collect_set
Repository: spark
Updated Branches:
refs/heads/master eb00378f0 -> fd648bff6
[SPARK-20371][R] Add wrappers for collect_list and collect_set
## What changes were proposed in this pull request?
Adds wrappers for `collect_list` and `collect_set`.
## How was this patch tested?
Unit tests, `check-cran.sh`
Author: zero323 <ze...@users.noreply.github.com>
Closes #17672 from zero323/SPARK-20371.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/fd648bff
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/fd648bff
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/fd648bff
Branch: refs/heads/master
Commit: fd648bff63f91a30810910dfc5664eea0ff5e6f9
Parents: eb00378
Author: zero323 <ze...@users.noreply.github.com>
Authored: Fri Apr 21 12:06:21 2017 -0700
Committer: Felix Cheung <fe...@apache.org>
Committed: Fri Apr 21 12:06:21 2017 -0700
----------------------------------------------------------------------
R/pkg/NAMESPACE | 2 ++
R/pkg/R/functions.R | 40 ++++++++++++++++++++++++++
R/pkg/R/generics.R | 9 ++++++
R/pkg/inst/tests/testthat/test_sparkSQL.R | 22 ++++++++++++++
4 files changed, 73 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/fd648bff/R/pkg/NAMESPACE
----------------------------------------------------------------------
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index b6b559a..e804e30 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -203,6 +203,8 @@ exportMethods("%in%",
"cbrt",
"ceil",
"ceiling",
+ "collect_list",
+ "collect_set",
"column",
"concat",
"concat_ws",
http://git-wip-us.apache.org/repos/asf/spark/blob/fd648bff/R/pkg/R/functions.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index f854df1..e7decb9 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -3705,3 +3705,43 @@ setMethod("create_map",
jc <- callJStatic("org.apache.spark.sql.functions", "map", jcols)
column(jc)
})
+
+#' collect_list
+#'
+#' Creates a list of objects with duplicates.
+#'
+#' @param x Column to compute on
+#'
+#' @rdname collect_list
+#' @name collect_list
+#' @family agg_funcs
+#' @aliases collect_list,Column-method
+#' @export
+#' @examples \dontrun{collect_list(df$x)}
+#' @note collect_list since 2.3.0
+setMethod("collect_list",
+ signature(x = "Column"),
+ function(x) {
+ jc <- callJStatic("org.apache.spark.sql.functions", "collect_list", x@jc)
+ column(jc)
+ })
+
+#' collect_set
+#'
+#' Creates a list of objects with duplicate elements eliminated.
+#'
+#' @param x Column to compute on
+#'
+#' @rdname collect_set
+#' @name collect_set
+#' @family agg_funcs
+#' @aliases collect_set,Column-method
+#' @export
+#' @examples \dontrun{collect_set(df$x)}
+#' @note collect_set since 2.3.0
+setMethod("collect_set",
+ signature(x = "Column"),
+ function(x) {
+ jc <- callJStatic("org.apache.spark.sql.functions", "collect_set", x@jc)
+ column(jc)
+ })
http://git-wip-us.apache.org/repos/asf/spark/blob/fd648bff/R/pkg/R/generics.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index da46823..61d248e 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -918,6 +918,14 @@ setGeneric("cbrt", function(x) { standardGeneric("cbrt") })
#' @export
setGeneric("ceil", function(x) { standardGeneric("ceil") })
+#' @rdname collect_list
+#' @export
+setGeneric("collect_list", function(x) { standardGeneric("collect_list") })
+
+#' @rdname collect_set
+#' @export
+setGeneric("collect_set", function(x) { standardGeneric("collect_set") })
+
#' @rdname column
#' @export
setGeneric("column", function(x) { standardGeneric("column") })
@@ -1358,6 +1366,7 @@ setGeneric("window", function(x, ...) { standardGeneric("window") })
#' @export
setGeneric("year", function(x) { standardGeneric("year") })
+
###################### Spark.ML Methods ##########################
#' @rdname fitted
http://git-wip-us.apache.org/repos/asf/spark/blob/fd648bff/R/pkg/inst/tests/testthat/test_sparkSQL.R
----------------------------------------------------------------------
diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R
index 9e87a47..bf2093f 100644
--- a/R/pkg/inst/tests/testthat/test_sparkSQL.R
+++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R
@@ -1731,6 +1731,28 @@ test_that("group by, agg functions", {
expect_true(abs(sd(1:2) - 0.7071068) < 1e-6)
expect_true(abs(var(1:5, 1:5) - 2.5) < 1e-6)
+ # Test collect_list and collect_set
+ gd3_collections_local <- collect(
+ agg(gd3, collect_set(df8$age), collect_list(df8$age))
+ )
+
+ expect_equal(
+ unlist(gd3_collections_local[gd3_collections_local$name == "Andy", 2]),
+ c(30)
+ )
+
+ expect_equal(
+ unlist(gd3_collections_local[gd3_collections_local$name == "Andy", 3]),
+ c(30, 30)
+ )
+
+ expect_equal(
+ sort(unlist(
+ gd3_collections_local[gd3_collections_local$name == "Justin", 3]
+ )),
+ c(1, 19)
+ )
+
unlink(jsonPath2)
unlink(jsonPath3)
})
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org