You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2018/05/14 01:49:02 UTC

spark git commit: [SPARK-24186][R][SQL] change reverse and concat to collection functions in R

Repository: spark
Updated Branches:
  refs/heads/master 2fa33649d -> 3f0e801c1


[SPARK-24186][R][SQL] change reverse and concat to collection functions in R

## What changes were proposed in this pull request?

reverse and concat are already in functions.R as column string functions. Since now these two functions are categorized as collection functions in scala and python, we will do the same in R.

## How was this patch tested?

Add test in test_sparkSQL.R

Author: Huaxin Gao <hu...@us.ibm.com>

Closes #21307 from huaxingao/spark_24186.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3f0e801c
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3f0e801c
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3f0e801c

Branch: refs/heads/master
Commit: 3f0e801c11e600ed28491924e550d3ba93f19c19
Parents: 2fa3364
Author: Huaxin Gao <hu...@us.ibm.com>
Authored: Mon May 14 09:48:54 2018 +0800
Committer: hyukjinkwon <gu...@apache.org>
Committed: Mon May 14 09:48:54 2018 +0800

----------------------------------------------------------------------
 R/pkg/R/functions.R                   | 35 +++++++++++++++---------------
 R/pkg/R/generics.R                    |  4 ++--
 R/pkg/tests/fulltests/test_sparkSQL.R | 17 +++++++++++++--
 3 files changed, 35 insertions(+), 21 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/3f0e801c/R/pkg/R/functions.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index 77d70cb..fcb3521 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -208,7 +208,7 @@ NULL
 #' head(select(tmp, array_contains(tmp$v1, 21), size(tmp$v1)))
 #' head(select(tmp, array_max(tmp$v1), array_min(tmp$v1)))
 #' head(select(tmp, array_position(tmp$v1, 21), array_sort(tmp$v1)))
-#' head(select(tmp, flatten(tmp$v1)))
+#' head(select(tmp, flatten(tmp$v1), reverse(tmp$v1)))
 #' tmp2 <- mutate(tmp, v2 = explode(tmp$v1))
 #' head(tmp2)
 #' head(select(tmp, posexplode(tmp$v1)))
@@ -218,7 +218,10 @@ NULL
 #' tmp3 <- mutate(df, v3 = create_map(df$model, df$cyl))
 #' head(select(tmp3, map_keys(tmp3$v3)))
 #' head(select(tmp3, map_values(tmp3$v3)))
-#' head(select(tmp3, element_at(tmp3$v3, "Valiant")))}
+#' head(select(tmp3, element_at(tmp3$v3, "Valiant")))
+#' tmp4 <- mutate(df, v4 = create_array(df$mpg, df$cyl), v5 = create_array(df$hp))
+#' head(select(tmp4, concat(tmp4$v4, tmp4$v5)))
+#' head(select(tmp, concat(df$mpg, df$cyl, df$hp)))}
 NULL
 
 #' Window functions for Column operations
@@ -1260,9 +1263,9 @@ setMethod("quarter",
           })
 
 #' @details
-#' \code{reverse}: Reverses the string column and returns it as a new string column.
+#' \code{reverse}: Returns a reversed string or an array with reverse order of elements.
 #'
-#' @rdname column_string_functions
+#' @rdname column_collection_functions
 #' @aliases reverse reverse,Column-method
 #' @note reverse since 1.5.0
 setMethod("reverse",
@@ -2055,20 +2058,10 @@ setMethod("countDistinct",
 
 #' @details
 #' \code{concat}: Concatenates multiple input columns together into a single column.
-#' If all inputs are binary, concat returns an output as binary. Otherwise, it returns as string.
+#' The function works with strings, binary and compatible array columns.
 #'
-#' @rdname column_string_functions
+#' @rdname column_collection_functions
 #' @aliases concat concat,Column-method
-#' @examples
-#'
-#' \dontrun{
-#' # concatenate strings
-#' tmp <- mutate(df, s1 = concat(df$Class, df$Sex),
-#'                   s2 = concat(df$Class, df$Sex, df$Age),
-#'                   s3 = concat(df$Class, df$Sex, df$Age, df$Class),
-#'                   s4 = concat_ws("_", df$Class, df$Sex),
-#'                   s5 = concat_ws("+", df$Class, df$Sex, df$Age, df$Survived))
-#' head(tmp)}
 #' @note concat since 1.5.0
 setMethod("concat",
           signature(x = "Column"),
@@ -2409,6 +2402,13 @@ setMethod("shiftRightUnsigned", signature(y = "Column", x = "numeric"),
 #' @param sep separator to use.
 #' @rdname column_string_functions
 #' @aliases concat_ws concat_ws,character,Column-method
+#' @examples
+#'
+#' \dontrun{
+#' # concatenate strings
+#' tmp <- mutate(df, s1 = concat_ws("_", df$Class, df$Sex),
+#'                   s2 = concat_ws("+", df$Class, df$Sex, df$Age, df$Survived))
+#' head(tmp)}
 #' @note concat_ws since 1.5.0
 setMethod("concat_ws", signature(sep = "character", x = "Column"),
           function(sep, x, ...) {
@@ -3063,7 +3063,8 @@ setMethod("array_sort",
           })
 
 #' @details
-#' \code{flatten}: Transforms an array of arrays into a single array.
+#' \code{flatten}: Creates a single array from an array of arrays.
+#' If a structure of nested arrays is deeper than two levels, only one level of nesting is removed.
 #'
 #' @rdname column_collection_functions
 #' @aliases flatten flatten,Column-method

http://git-wip-us.apache.org/repos/asf/spark/blob/3f0e801c/R/pkg/R/generics.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index fbc4113..61da30b 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -817,7 +817,7 @@ setGeneric("collect_set", function(x) { standardGeneric("collect_set") })
 #' @rdname column
 setGeneric("column", function(x) { standardGeneric("column") })
 
-#' @rdname column_string_functions
+#' @rdname column_collection_functions
 #' @name NULL
 setGeneric("concat", function(x, ...) { standardGeneric("concat") })
 
@@ -1134,7 +1134,7 @@ setGeneric("regexp_replace",
 #' @name NULL
 setGeneric("repeat_string", function(x, n) { standardGeneric("repeat_string") })
 
-#' @rdname column_string_functions
+#' @rdname column_collection_functions
 #' @name NULL
 setGeneric("reverse", function(x) { standardGeneric("reverse") })
 

http://git-wip-us.apache.org/repos/asf/spark/blob/3f0e801c/R/pkg/tests/fulltests/test_sparkSQL.R
----------------------------------------------------------------------
diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R
index 2a550b9..13b55ac 100644
--- a/R/pkg/tests/fulltests/test_sparkSQL.R
+++ b/R/pkg/tests/fulltests/test_sparkSQL.R
@@ -1479,7 +1479,7 @@ test_that("column functions", {
   df5 <- createDataFrame(list(list(a = "010101")))
   expect_equal(collect(select(df5, conv(df5$a, 2, 16)))[1, 1], "15")
 
-  # Test array_contains(), array_max(), array_min(), array_position() and element_at()
+  # Test array_contains(), array_max(), array_min(), array_position(), element_at() and reverse()
   df <- createDataFrame(list(list(list(1L, 2L, 3L)), list(list(6L, 5L, 4L))))
   result <- collect(select(df, array_contains(df[[1]], 1L)))[[1]]
   expect_equal(result, c(TRUE, FALSE))
@@ -1496,6 +1496,13 @@ test_that("column functions", {
   result <- collect(select(df, element_at(df[[1]], 1L)))[[1]]
   expect_equal(result, c(1, 6))
 
+  result <- collect(select(df, reverse(df[[1]])))[[1]]
+  expect_equal(result, list(list(3L, 2L, 1L), list(4L, 5L, 6L)))
+
+  df2 <- createDataFrame(list(list("abc")))
+  result <- collect(select(df2, reverse(df2[[1]])))[[1]]
+  expect_equal(result, "cba")
+
   # Test array_sort() and sort_array()
   df <- createDataFrame(list(list(list(2L, 1L, 3L, NA)), list(list(NA, 6L, 5L, NA, 4L))))
 
@@ -1512,7 +1519,13 @@ test_that("column functions", {
   result <- collect(select(df, slice(df[[1]], 2L, 2L)))[[1]]
   expect_equal(result, list(list(2L, 3L), list(5L)))
 
-  # Test flattern
+  # Test concat()
+  df <- createDataFrame(list(list(list(1L, 2L, 3L), list(4L, 5L, 6L)),
+                        list(list(7L, 8L, 9L), list(10L, 11L, 12L))))
+  result <- collect(select(df, concat(df[[1]], df[[2]])))[[1]]
+  expect_equal(result, list(list(1L, 2L, 3L, 4L, 5L, 6L), list(7L, 8L, 9L, 10L, 11L, 12L)))
+
+  # Test flatten()
   df <- createDataFrame(list(list(list(list(1L, 2L), list(3L, 4L))),
                         list(list(list(5L, 6L), list(7L, 8L)))))
   result <- collect(select(df, flatten(df[[1]])))[[1]]


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org