You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2018/05/12 11:21:49 UTC
spark git commit: [SPARK-24198][SPARKR][SQL] Adding slice function to
SparkR
Repository: spark
Updated Branches:
refs/heads/master e3dabdf6e -> 5902125ac
[SPARK-24198][SPARKR][SQL] Adding slice function to SparkR
## What changes were proposed in this pull request?
The PR adds the `slice` function to SparkR. The function returns a subset of consecutive elements from the given array.
```
> df <- createDataFrame(cbind(model = rownames(mtcars), mtcars))
> tmp <- mutate(df, v1 = create_array(df$mpg, df$cyl, df$hp))
> head(select(tmp, slice(tmp$v1, 2L, 2L)))
```
```
slice(v1, 2, 2)
1 6, 110
2 6, 110
3 4, 93
4 6, 110
5 8, 175
6 6, 105
```
## How was this patch tested?
A test added into R/pkg/tests/fulltests/test_sparkSQL.R
Author: Marek Novotny <mn...@gmail.com>
Closes #21298 from mn-mikke/SPARK-24198.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5902125a
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5902125a
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5902125a
Branch: refs/heads/master
Commit: 5902125ac7ad25a0cb7aa3d98825c8290ee33c12
Parents: e3dabdf
Author: Marek Novotny <mn...@gmail.com>
Authored: Sat May 12 19:21:42 2018 +0800
Committer: hyukjinkwon <gu...@apache.org>
Committed: Sat May 12 19:21:42 2018 +0800
----------------------------------------------------------------------
R/pkg/NAMESPACE | 1 +
R/pkg/R/functions.R | 17 +++++++++++++++++
R/pkg/R/generics.R | 4 ++++
R/pkg/tests/fulltests/test_sparkSQL.R | 5 +++++
4 files changed, 27 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/5902125a/R/pkg/NAMESPACE
----------------------------------------------------------------------
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index 5f82096..c575fe2 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -352,6 +352,7 @@ exportMethods("%<=>%",
"sinh",
"size",
"skewness",
+ "slice",
"sort_array",
"soundex",
"spark_partition_id",
http://git-wip-us.apache.org/repos/asf/spark/blob/5902125a/R/pkg/R/functions.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index 4964594..77d70cb 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -212,6 +212,7 @@ NULL
#' tmp2 <- mutate(tmp, v2 = explode(tmp$v1))
#' head(tmp2)
#' head(select(tmp, posexplode(tmp$v1)))
+#' head(select(tmp, slice(tmp$v1, 2L, 2L)))
#' head(select(tmp, sort_array(tmp$v1)))
#' head(select(tmp, sort_array(tmp$v1, asc = FALSE)))
#' tmp3 <- mutate(df, v3 = create_map(df$model, df$cyl))
@@ -3143,6 +3144,22 @@ setMethod("size",
})
#' @details
+#' \code{slice}: Returns an array containing all the elements in x from the index start
+#' (or starting from the end if start is negative) with the specified length.
+#'
+#' @rdname column_collection_functions
+#' @param start an index indicating the first element occuring in the result.
+#' @param length a number of consecutive elements choosen to the result.
+#' @aliases slice slice,Column-method
+#' @note slice since 2.4.0
+setMethod("slice",
+ signature(x = "Column"),
+ function(x, start, length) {
+ jc <- callJStatic("org.apache.spark.sql.functions", "slice", x@jc, start, length)
+ column(jc)
+ })
+
+#' @details
#' \code{sort_array}: Sorts the input array in ascending or descending order according to
#' the natural ordering of the array elements. NA elements will be placed at the beginning of
#' the returned array in ascending order or at the end of the returned array in descending order.
http://git-wip-us.apache.org/repos/asf/spark/blob/5902125a/R/pkg/R/generics.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index 5faa51e..fbc4113 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -1196,6 +1196,10 @@ setGeneric("skewness", function(x) { standardGeneric("skewness") })
#' @rdname column_collection_functions
#' @name NULL
+setGeneric("slice", function(x, start, length) { standardGeneric("slice") })
+
+#' @rdname column_collection_functions
+#' @name NULL
setGeneric("sort_array", function(x, asc = TRUE) { standardGeneric("sort_array") })
#' @rdname column_string_functions
http://git-wip-us.apache.org/repos/asf/spark/blob/5902125a/R/pkg/tests/fulltests/test_sparkSQL.R
----------------------------------------------------------------------
diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R
index b8bfded..2a550b9 100644
--- a/R/pkg/tests/fulltests/test_sparkSQL.R
+++ b/R/pkg/tests/fulltests/test_sparkSQL.R
@@ -1507,6 +1507,11 @@ test_that("column functions", {
result <- collect(select(df, sort_array(df[[1]])))[[1]]
expect_equal(result, list(list(NA, 1L, 2L, 3L), list(NA, NA, 4L, 5L, 6L)))
+ # Test slice()
+ df <- createDataFrame(list(list(list(1L, 2L, 3L)), list(list(4L, 5L))))
+ result <- collect(select(df, slice(df[[1]], 2L, 2L)))[[1]]
+ expect_equal(result, list(list(2L, 3L), list(5L)))
+
# Test flattern
df <- createDataFrame(list(list(list(list(1L, 2L), list(3L, 4L))),
list(list(list(5L, 6L), list(7L, 8L)))))
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org