You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2017/08/03 14:00:07 UTC
spark git commit: [SPARK-21602][R] Add map_keys and map_values
functions to R
Repository: spark
Updated Branches:
refs/heads/master e7c59b417 -> 97ba49183
[SPARK-21602][R] Add map_keys and map_values functions to R
## What changes were proposed in this pull request?
This PR adds `map_values` and `map_keys` to R API.
```r
> df <- createDataFrame(cbind(model = rownames(mtcars), mtcars))
> tmp <- mutate(df, v = create_map(df$model, df$cyl))
> head(select(tmp, map_keys(tmp$v)))
```
```
map_keys(v)
1 Mazda RX4
2 Mazda RX4 Wag
3 Datsun 710
4 Hornet 4 Drive
5 Hornet Sportabout
6 Valiant
```
```r
> head(select(tmp, map_values(tmp$v)))
```
```
map_values(v)
1 6
2 6
3 4
4 6
5 8
6 6
```
## How was this patch tested?
Manual tests and unit tests in `R/pkg/tests/fulltests/test_sparkSQL.R`
Author: hyukjinkwon <gu...@gmail.com>
Closes #18809 from HyukjinKwon/map-keys-values-r.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/97ba4918
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/97ba4918
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/97ba4918
Branch: refs/heads/master
Commit: 97ba4918368ba15334427bdd91230829ece606f6
Parents: e7c59b4
Author: hyukjinkwon <gu...@gmail.com>
Authored: Thu Aug 3 23:00:00 2017 +0900
Committer: hyukjinkwon <gu...@gmail.com>
Committed: Thu Aug 3 23:00:00 2017 +0900
----------------------------------------------------------------------
R/pkg/NAMESPACE | 2 ++
R/pkg/R/functions.R | 33 +++++++++++++++++++++++++++++-
R/pkg/R/generics.R | 10 +++++++++
R/pkg/tests/fulltests/test_sparkSQL.R | 8 ++++++++
4 files changed, 52 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/97ba4918/R/pkg/NAMESPACE
----------------------------------------------------------------------
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index 232f5cf..a1dd1af 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -286,6 +286,8 @@ exportMethods("%<=>%",
"lower",
"lpad",
"ltrim",
+ "map_keys",
+ "map_values",
"max",
"md5",
"mean",
http://git-wip-us.apache.org/repos/asf/spark/blob/97ba4918/R/pkg/R/functions.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index 86507f1..5a46d73 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -195,7 +195,10 @@ NULL
#' head(tmp2)
#' head(select(tmp, posexplode(tmp$v1)))
#' head(select(tmp, sort_array(tmp$v1)))
-#' head(select(tmp, sort_array(tmp$v1, asc = FALSE)))}
+#' head(select(tmp, sort_array(tmp$v1, asc = FALSE)))
+#' tmp3 <- mutate(df, v3 = create_map(df$model, df$cyl))
+#' head(select(tmp3, map_keys(tmp3$v3)))
+#' head(select(tmp3, map_values(tmp3$v3)))}
NULL
#' Window functions for Column operations
@@ -3056,6 +3059,34 @@ setMethod("array_contains",
})
#' @details
+#' \code{map_keys}: Returns an unordered array containing the keys of the map.
+#'
+#' @rdname column_collection_functions
+#' @aliases map_keys map_keys,Column-method
+#' @export
+#' @note map_keys since 2.3.0
+setMethod("map_keys",
+ signature(x = "Column"),
+ function(x) {
+ jc <- callJStatic("org.apache.spark.sql.functions", "map_keys", x@jc)
+ column(jc)
+ })
+
+#' @details
+#' \code{map_values}: Returns an unordered array containing the values of the map.
+#'
+#' @rdname column_collection_functions
+#' @aliases map_values map_values,Column-method
+#' @export
+#' @note map_values since 2.3.0
+setMethod("map_values",
+ signature(x = "Column"),
+ function(x) {
+ jc <- callJStatic("org.apache.spark.sql.functions", "map_values", x@jc)
+ column(jc)
+ })
+
+#' @details
#' \code{explode}: Creates a new row for each element in the given array or map column.
#'
#' @rdname column_collection_functions
http://git-wip-us.apache.org/repos/asf/spark/blob/97ba4918/R/pkg/R/generics.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index 9209874..df91c35 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -1213,6 +1213,16 @@ setGeneric("lpad", function(x, len, pad) { standardGeneric("lpad") })
#' @name NULL
setGeneric("ltrim", function(x) { standardGeneric("ltrim") })
+#' @rdname column_collection_functions
+#' @export
+#' @name NULL
+setGeneric("map_keys", function(x) { standardGeneric("map_keys") })
+
+#' @rdname column_collection_functions
+#' @export
+#' @name NULL
+setGeneric("map_values", function(x) { standardGeneric("map_values") })
+
#' @rdname column_misc_functions
#' @export
#' @name NULL
http://git-wip-us.apache.org/repos/asf/spark/blob/97ba4918/R/pkg/tests/fulltests/test_sparkSQL.R
----------------------------------------------------------------------
diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R
index 77052d4..deb0e16 100644
--- a/R/pkg/tests/fulltests/test_sparkSQL.R
+++ b/R/pkg/tests/fulltests/test_sparkSQL.R
@@ -1436,6 +1436,14 @@ test_that("column functions", {
result <- collect(select(df, sort_array(df[[1]])))[[1]]
expect_equal(result, list(list(1L, 2L, 3L), list(4L, 5L, 6L)))
+ # Test map_keys() and map_values()
+ df <- createDataFrame(list(list(map = as.environment(list(x = 1, y = 2)))))
+ result <- collect(select(df, map_keys(df$map)))[[1]]
+ expect_equal(result, list(list("x", "y")))
+
+ result <- collect(select(df, map_values(df$map)))[[1]]
+ expect_equal(result, list(list(1, 2)))
+
# Test that stats::lag is working
expect_equal(length(lag(ldeaths, 12)), 72)
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org