You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2017/08/03 14:00:07 UTC
spark git commit: [SPARK-21602][R] Add map_keys and map_values functions to R

Repository: spark
Updated Branches:
  refs/heads/master e7c59b417 -> 97ba49183


[SPARK-21602][R] Add map_keys and map_values functions to R

## What changes were proposed in this pull request?

This PR adds `map_values` and `map_keys` to R API.

```r
> df <- createDataFrame(cbind(model = rownames(mtcars), mtcars))
> tmp <- mutate(df, v = create_map(df$model, df$cyl))
> head(select(tmp, map_keys(tmp$v)))
```
```
        map_keys(v)
1         Mazda RX4
2     Mazda RX4 Wag
3        Datsun 710
4    Hornet 4 Drive
5 Hornet Sportabout
6           Valiant
```
```r
> head(select(tmp, map_values(tmp$v)))
```
```
  map_values(v)
1             6
2             6
3             4
4             6
5             8
6             6
```

## How was this patch tested?

Manual tests and unit tests in `R/pkg/tests/fulltests/test_sparkSQL.R`

Author: hyukjinkwon <gu...@gmail.com>

Closes #18809 from HyukjinKwon/map-keys-values-r.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/97ba4918
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/97ba4918
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/97ba4918

Branch: refs/heads/master
Commit: 97ba4918368ba15334427bdd91230829ece606f6
Parents: e7c59b4
Author: hyukjinkwon <gu...@gmail.com>
Authored: Thu Aug 3 23:00:00 2017 +0900
Committer: hyukjinkwon <gu...@gmail.com>
Committed: Thu Aug 3 23:00:00 2017 +0900

----------------------------------------------------------------------
 R/pkg/NAMESPACE                       |  2 ++
 R/pkg/R/functions.R                   | 33 +++++++++++++++++++++++++++++-
 R/pkg/R/generics.R                    | 10 +++++++++
 R/pkg/tests/fulltests/test_sparkSQL.R |  8 ++++++++
 4 files changed, 52 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/97ba4918/R/pkg/NAMESPACE
----------------------------------------------------------------------
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index 232f5cf..a1dd1af 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -286,6 +286,8 @@ exportMethods("%<=>%",
               "lower",
               "lpad",
               "ltrim",
+              "map_keys",
+              "map_values",
               "max",
               "md5",
               "mean",

http://git-wip-us.apache.org/repos/asf/spark/blob/97ba4918/R/pkg/R/functions.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index 86507f1..5a46d73 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -195,7 +195,10 @@ NULL
 #' head(tmp2)
 #' head(select(tmp, posexplode(tmp$v1)))
 #' head(select(tmp, sort_array(tmp$v1)))
-#' head(select(tmp, sort_array(tmp$v1, asc = FALSE)))}
+#' head(select(tmp, sort_array(tmp$v1, asc = FALSE)))
+#' tmp3 <- mutate(df, v3 = create_map(df$model, df$cyl))
+#' head(select(tmp3, map_keys(tmp3$v3)))
+#' head(select(tmp3, map_values(tmp3$v3)))}
 NULL
 
 #' Window functions for Column operations
@@ -3056,6 +3059,34 @@ setMethod("array_contains",
           })
 
 #' @details
+#' \code{map_keys}: Returns an unordered array containing the keys of the map.
+#'
+#' @rdname column_collection_functions
+#' @aliases map_keys map_keys,Column-method
+#' @export
+#' @note map_keys since 2.3.0
+setMethod("map_keys",
+          signature(x = "Column"),
+          function(x) {
+            jc <- callJStatic("org.apache.spark.sql.functions", "map_keys", x@jc)
+            column(jc)
+         })
+
+#' @details
+#' \code{map_values}: Returns an unordered array containing the values of the map.
+#'
+#' @rdname column_collection_functions
+#' @aliases map_values map_values,Column-method
+#' @export
+#' @note map_values since 2.3.0
+setMethod("map_values",
+          signature(x = "Column"),
+          function(x) {
+            jc <- callJStatic("org.apache.spark.sql.functions", "map_values", x@jc)
+            column(jc)
+          })
+
+#' @details
 #' \code{explode}: Creates a new row for each element in the given array or map column.
 #'
 #' @rdname column_collection_functions

http://git-wip-us.apache.org/repos/asf/spark/blob/97ba4918/R/pkg/R/generics.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index 9209874..df91c35 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -1213,6 +1213,16 @@ setGeneric("lpad", function(x, len, pad) { standardGeneric("lpad") })
 #' @name NULL
 setGeneric("ltrim", function(x) { standardGeneric("ltrim") })
 
+#' @rdname column_collection_functions
+#' @export
+#' @name NULL
+setGeneric("map_keys", function(x) { standardGeneric("map_keys") })
+
+#' @rdname column_collection_functions
+#' @export
+#' @name NULL
+setGeneric("map_values", function(x) { standardGeneric("map_values") })
+
 #' @rdname column_misc_functions
 #' @export
 #' @name NULL

http://git-wip-us.apache.org/repos/asf/spark/blob/97ba4918/R/pkg/tests/fulltests/test_sparkSQL.R
----------------------------------------------------------------------
diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R
index 77052d4..deb0e16 100644
--- a/R/pkg/tests/fulltests/test_sparkSQL.R
+++ b/R/pkg/tests/fulltests/test_sparkSQL.R
@@ -1436,6 +1436,14 @@ test_that("column functions", {
   result <- collect(select(df, sort_array(df[[1]])))[[1]]
   expect_equal(result, list(list(1L, 2L, 3L), list(4L, 5L, 6L)))
 
+  # Test map_keys() and map_values()
+  df <- createDataFrame(list(list(map = as.environment(list(x = 1, y = 2)))))
+  result <- collect(select(df, map_keys(df$map)))[[1]]
+  expect_equal(result, list(list("x", "y")))
+
+  result <- collect(select(df, map_values(df$map)))[[1]]
+  expect_equal(result, list(list(1, 2)))
+
   # Test that stats::lag is working
   expect_equal(length(lag(ldeaths, 12)), 72)
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org