You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2018/05/12 11:21:49 UTC

spark git commit: [SPARK-24198][SPARKR][SQL] Adding slice function to SparkR

Repository: spark
Updated Branches:
  refs/heads/master e3dabdf6e -> 5902125ac


[SPARK-24198][SPARKR][SQL] Adding slice function to SparkR

## What changes were proposed in this pull request?
The PR adds the `slice` function to SparkR. The function returns a subset of consecutive elements from the given array.
```
> df <- createDataFrame(cbind(model = rownames(mtcars), mtcars))
> tmp <- mutate(df, v1 = create_array(df$mpg, df$cyl, df$hp))
> head(select(tmp, slice(tmp$v1, 2L, 2L)))
```
```
  slice(v1, 2, 2)
1          6, 110
2          6, 110
3           4, 93
4          6, 110
5          8, 175
6          6, 105
```

## How was this patch tested?

A test added into R/pkg/tests/fulltests/test_sparkSQL.R

Author: Marek Novotny <mn...@gmail.com>

Closes #21298 from mn-mikke/SPARK-24198.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5902125a
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5902125a
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5902125a

Branch: refs/heads/master
Commit: 5902125ac7ad25a0cb7aa3d98825c8290ee33c12
Parents: e3dabdf
Author: Marek Novotny <mn...@gmail.com>
Authored: Sat May 12 19:21:42 2018 +0800
Committer: hyukjinkwon <gu...@apache.org>
Committed: Sat May 12 19:21:42 2018 +0800

----------------------------------------------------------------------
 R/pkg/NAMESPACE                       |  1 +
 R/pkg/R/functions.R                   | 17 +++++++++++++++++
 R/pkg/R/generics.R                    |  4 ++++
 R/pkg/tests/fulltests/test_sparkSQL.R |  5 +++++
 4 files changed, 27 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/5902125a/R/pkg/NAMESPACE
----------------------------------------------------------------------
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index 5f82096..c575fe2 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -352,6 +352,7 @@ exportMethods("%<=>%",
               "sinh",
               "size",
               "skewness",
+              "slice",
               "sort_array",
               "soundex",
               "spark_partition_id",

http://git-wip-us.apache.org/repos/asf/spark/blob/5902125a/R/pkg/R/functions.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index 4964594..77d70cb 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -212,6 +212,7 @@ NULL
 #' tmp2 <- mutate(tmp, v2 = explode(tmp$v1))
 #' head(tmp2)
 #' head(select(tmp, posexplode(tmp$v1)))
+#' head(select(tmp, slice(tmp$v1, 2L, 2L)))
 #' head(select(tmp, sort_array(tmp$v1)))
 #' head(select(tmp, sort_array(tmp$v1, asc = FALSE)))
 #' tmp3 <- mutate(df, v3 = create_map(df$model, df$cyl))
@@ -3143,6 +3144,22 @@ setMethod("size",
           })
 
 #' @details
+#' \code{slice}: Returns an array containing all the elements in x from the index start
+#' (or starting from the end if start is negative) with the specified length.
+#'
+#' @rdname column_collection_functions
+#' @param start an index indicating the first element occuring in the result.
+#' @param length a number of consecutive elements choosen to the result.
+#' @aliases slice slice,Column-method
+#' @note slice since 2.4.0
+setMethod("slice",
+          signature(x = "Column"),
+          function(x, start, length) {
+            jc <- callJStatic("org.apache.spark.sql.functions", "slice", x@jc, start, length)
+            column(jc)
+          })
+
+#' @details
 #' \code{sort_array}: Sorts the input array in ascending or descending order according to
 #' the natural ordering of the array elements. NA elements will be placed at the beginning of
 #' the returned array in ascending order or at the end of the returned array in descending order.

http://git-wip-us.apache.org/repos/asf/spark/blob/5902125a/R/pkg/R/generics.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index 5faa51e..fbc4113 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -1196,6 +1196,10 @@ setGeneric("skewness", function(x) { standardGeneric("skewness") })
 
 #' @rdname column_collection_functions
 #' @name NULL
+setGeneric("slice", function(x, start, length) { standardGeneric("slice") })
+
+#' @rdname column_collection_functions
+#' @name NULL
 setGeneric("sort_array", function(x, asc = TRUE) { standardGeneric("sort_array") })
 
 #' @rdname column_string_functions

http://git-wip-us.apache.org/repos/asf/spark/blob/5902125a/R/pkg/tests/fulltests/test_sparkSQL.R
----------------------------------------------------------------------
diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R
index b8bfded..2a550b9 100644
--- a/R/pkg/tests/fulltests/test_sparkSQL.R
+++ b/R/pkg/tests/fulltests/test_sparkSQL.R
@@ -1507,6 +1507,11 @@ test_that("column functions", {
   result <- collect(select(df, sort_array(df[[1]])))[[1]]
   expect_equal(result, list(list(NA, 1L, 2L, 3L), list(NA, NA, 4L, 5L, 6L)))
 
+  # Test slice()
+  df <- createDataFrame(list(list(list(1L, 2L, 3L)), list(list(4L, 5L))))
+  result <- collect(select(df, slice(df[[1]], 2L, 2L)))[[1]]
+  expect_equal(result, list(list(2L, 3L), list(5L)))
+
   # Test flattern
   df <- createDataFrame(list(list(list(list(1L, 2L), list(3L, 4L))),
                         list(list(list(5L, 6L), list(7L, 8L)))))


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org