You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2018/05/11 01:05:42 UTC

spark git commit: [SPARK-24197][SPARKR][SQL] Adding array_sort function to SparkR

Repository: spark
Updated Branches:
  refs/heads/master a4206d58e -> 75cf369c7


[SPARK-24197][SPARKR][SQL] Adding array_sort function to SparkR

## What changes were proposed in this pull request?

The PR adds array_sort function to SparkR.

## How was this patch tested?

Tests added into R/pkg/tests/fulltests/test_sparkSQL.R

## Example
```
> df <- createDataFrame(list(list(list(2L, 1L, 3L, NA)), list(list(NA, 6L, 5L, NA, 4L))))
> head(collect(select(df, array_sort(df[[1]]))))
```
Result:
```
   array_sort(_1)
1     1, 2, 3, NA
2 4, 5, 6, NA, NA
```

Author: Marek Novotny <mn...@gmail.com>

Closes #21294 from mn-mikke/SPARK-24197.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/75cf369c
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/75cf369c
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/75cf369c

Branch: refs/heads/master
Commit: 75cf369c742e7c7b68f384d123447c97be95c9f0
Parents: a4206d5
Author: Marek Novotny <mn...@gmail.com>
Authored: Fri May 11 09:05:35 2018 +0800
Committer: hyukjinkwon <gu...@apache.org>
Committed: Fri May 11 09:05:35 2018 +0800

----------------------------------------------------------------------
 R/pkg/NAMESPACE                       |  1 +
 R/pkg/R/functions.R                   | 21 ++++++++++++++++++---
 R/pkg/R/generics.R                    |  4 ++++
 R/pkg/tests/fulltests/test_sparkSQL.R | 13 +++++++++----
 4 files changed, 32 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/75cf369c/R/pkg/NAMESPACE
----------------------------------------------------------------------
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index 8cd0035..5f82096 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -204,6 +204,7 @@ exportMethods("%<=>%",
               "array_max",
               "array_min",
               "array_position",
+              "array_sort",
               "asc",
               "ascii",
               "asin",

http://git-wip-us.apache.org/repos/asf/spark/blob/75cf369c/R/pkg/R/functions.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index 04d0e46..1f97054 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -207,7 +207,7 @@ NULL
 #' tmp <- mutate(df, v1 = create_array(df$mpg, df$cyl, df$hp))
 #' head(select(tmp, array_contains(tmp$v1, 21), size(tmp$v1)))
 #' head(select(tmp, array_max(tmp$v1), array_min(tmp$v1)))
-#' head(select(tmp, array_position(tmp$v1, 21)))
+#' head(select(tmp, array_position(tmp$v1, 21), array_sort(tmp$v1)))
 #' head(select(tmp, flatten(tmp$v1)))
 #' tmp2 <- mutate(tmp, v2 = explode(tmp$v1))
 #' head(tmp2)
@@ -3044,6 +3044,20 @@ setMethod("array_position",
           })
 
 #' @details
+#' \code{array_sort}: Sorts the input array in ascending order. The elements of the input array
+#' must be orderable. NA elements will be placed at the end of the returned array.
+#'
+#' @rdname column_collection_functions
+#' @aliases array_sort array_sort,Column-method
+#' @note array_sort since 2.4.0
+setMethod("array_sort",
+          signature(x = "Column"),
+          function(x) {
+            jc <- callJStatic("org.apache.spark.sql.functions", "array_sort", x@jc)
+            column(jc)
+          })
+
+#' @details
 #' \code{flatten}: Transforms an array of arrays into a single array.
 #'
 #' @rdname column_collection_functions
@@ -3125,8 +3139,9 @@ setMethod("size",
           })
 
 #' @details
-#' \code{sort_array}: Sorts the input array in ascending or descending order according
-#' to the natural ordering of the array elements.
+#' \code{sort_array}: Sorts the input array in ascending or descending order according to
+#' the natural ordering of the array elements. NA elements will be placed at the beginning of
+#' the returned array in ascending order or at the end of the returned array in descending order.
 #'
 #' @rdname column_collection_functions
 #' @param asc a logical flag indicating the sorting order.

http://git-wip-us.apache.org/repos/asf/spark/blob/75cf369c/R/pkg/R/generics.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index 4ef12d1..5faa51e 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -769,6 +769,10 @@ setGeneric("array_min", function(x) { standardGeneric("array_min") })
 #' @name NULL
 setGeneric("array_position", function(x, value) { standardGeneric("array_position") })
 
+#' @rdname column_collection_functions
+#' @name NULL
+setGeneric("array_sort", function(x) { standardGeneric("array_sort") })
+
 #' @rdname column_string_functions
 #' @name NULL
 setGeneric("ascii", function(x) { standardGeneric("ascii") })

http://git-wip-us.apache.org/repos/asf/spark/blob/75cf369c/R/pkg/tests/fulltests/test_sparkSQL.R
----------------------------------------------------------------------
diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R
index 43725e0..b8bfded 100644
--- a/R/pkg/tests/fulltests/test_sparkSQL.R
+++ b/R/pkg/tests/fulltests/test_sparkSQL.R
@@ -1479,8 +1479,7 @@ test_that("column functions", {
   df5 <- createDataFrame(list(list(a = "010101")))
   expect_equal(collect(select(df5, conv(df5$a, 2, 16)))[1, 1], "15")
 
-  # Test array_contains(), array_max(), array_min(), array_position(), element_at()
-  # and sort_array()
+  # Test array_contains(), array_max(), array_min(), array_position() and element_at()
   df <- createDataFrame(list(list(list(1L, 2L, 3L)), list(list(6L, 5L, 4L))))
   result <- collect(select(df, array_contains(df[[1]], 1L)))[[1]]
   expect_equal(result, c(TRUE, FALSE))
@@ -1497,10 +1496,16 @@ test_that("column functions", {
   result <- collect(select(df, element_at(df[[1]], 1L)))[[1]]
   expect_equal(result, c(1, 6))
 
+  # Test array_sort() and sort_array()
+  df <- createDataFrame(list(list(list(2L, 1L, 3L, NA)), list(list(NA, 6L, 5L, NA, 4L))))
+
+  result <- collect(select(df, array_sort(df[[1]])))[[1]]
+  expect_equal(result, list(list(1L, 2L, 3L, NA), list(4L, 5L, 6L, NA, NA)))
+
   result <- collect(select(df, sort_array(df[[1]], FALSE)))[[1]]
-  expect_equal(result, list(list(3L, 2L, 1L), list(6L, 5L, 4L)))
+  expect_equal(result, list(list(3L, 2L, 1L, NA), list(6L, 5L, 4L, NA, NA)))
   result <- collect(select(df, sort_array(df[[1]])))[[1]]
-  expect_equal(result, list(list(1L, 2L, 3L), list(4L, 5L, 6L)))
+  expect_equal(result, list(list(NA, 1L, 2L, 3L), list(NA, NA, 4L, 5L, 6L)))
 
   # Test flattern
   df <- createDataFrame(list(list(list(list(1L, 2L), list(3L, 4L))),


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org