You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2018/05/06 02:25:17 UTC

spark git commit: [SPARK-24185][SPARKR][SQL] add flatten function to SparkR

Repository: spark
Updated Branches:
  refs/heads/master 47b5b6852 -> dd4b1b9c7


[SPARK-24185][SPARKR][SQL] add flatten function to SparkR

## What changes were proposed in this pull request?

add array flatten function to SparkR

## How was this patch tested?

Unit tests were added in R/pkg/tests/fulltests/test_sparkSQL.R

Author: Huaxin Gao <hu...@us.ibm.com>

Closes #21244 from huaxingao/spark-24185.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/dd4b1b9c
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/dd4b1b9c
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/dd4b1b9c

Branch: refs/heads/master
Commit: dd4b1b9c7ccad3363a6a21524aed047fcd282f68
Parents: 47b5b68
Author: Huaxin Gao <hu...@us.ibm.com>
Authored: Sun May 6 10:25:01 2018 +0800
Committer: hyukjinkwon <gu...@apache.org>
Committed: Sun May 6 10:25:01 2018 +0800

----------------------------------------------------------------------
 R/pkg/NAMESPACE                       |  1 +
 R/pkg/R/functions.R                   | 14 ++++++++++++++
 R/pkg/R/generics.R                    |  4 ++++
 R/pkg/tests/fulltests/test_sparkSQL.R |  6 ++++++
 4 files changed, 25 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/dd4b1b9c/R/pkg/NAMESPACE
----------------------------------------------------------------------
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index f36d462..8cd0035 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -258,6 +258,7 @@ exportMethods("%<=>%",
               "expr",
               "factorial",
               "first",
+              "flatten",
               "floor",
               "format_number",
               "format_string",

http://git-wip-us.apache.org/repos/asf/spark/blob/dd4b1b9c/R/pkg/R/functions.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index ec4bd4e..0ec99d1 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -208,6 +208,7 @@ NULL
 #' head(select(tmp, array_contains(tmp$v1, 21), size(tmp$v1)))
 #' head(select(tmp, array_max(tmp$v1), array_min(tmp$v1)))
 #' head(select(tmp, array_position(tmp$v1, 21)))
+#' head(select(tmp, flatten(tmp$v1)))
 #' tmp2 <- mutate(tmp, v2 = explode(tmp$v1))
 #' head(tmp2)
 #' head(select(tmp, posexplode(tmp$v1)))
@@ -3036,6 +3037,19 @@ setMethod("array_position",
           })
 
 #' @details
+#' \code{flatten}: Transforms an array of arrays into a single array.
+#'
+#' @rdname column_collection_functions
+#' @aliases flatten flatten,Column-method
+#' @note flatten since 2.4.0
+setMethod("flatten",
+          signature(x = "Column"),
+          function(x) {
+            jc <- callJStatic("org.apache.spark.sql.functions", "flatten", x@jc)
+            column(jc)
+          })
+
+#' @details
 #' \code{map_keys}: Returns an unordered array containing the keys of the map.
 #'
 #' @rdname column_collection_functions

http://git-wip-us.apache.org/repos/asf/spark/blob/dd4b1b9c/R/pkg/R/generics.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index 562d339..4ef12d1 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -918,6 +918,10 @@ setGeneric("explode_outer", function(x) { standardGeneric("explode_outer") })
 #' @name NULL
 setGeneric("expr", function(x) { standardGeneric("expr") })
 
+#' @rdname column_collection_functions
+#' @name NULL
+setGeneric("flatten", function(x) { standardGeneric("flatten") })
+
 #' @rdname column_datetime_diff_functions
 #' @name NULL
 setGeneric("from_utc_timestamp", function(y, x) { standardGeneric("from_utc_timestamp") })

http://git-wip-us.apache.org/repos/asf/spark/blob/dd4b1b9c/R/pkg/tests/fulltests/test_sparkSQL.R
----------------------------------------------------------------------
diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R
index 8cc2db7..3a8866b 100644
--- a/R/pkg/tests/fulltests/test_sparkSQL.R
+++ b/R/pkg/tests/fulltests/test_sparkSQL.R
@@ -1502,6 +1502,12 @@ test_that("column functions", {
   result <- collect(select(df, sort_array(df[[1]])))[[1]]
   expect_equal(result, list(list(1L, 2L, 3L), list(4L, 5L, 6L)))
 
+  # Test flattern
+  df <- createDataFrame(list(list(list(list(1L, 2L), list(3L, 4L))),
+                        list(list(list(5L, 6L), list(7L, 8L)))))
+  result <- collect(select(df, flatten(df[[1]])))[[1]]
+  expect_equal(result, list(list(1L, 2L, 3L, 4L), list(5L, 6L, 7L, 8L)))
+
   # Test map_keys(), map_values() and element_at()
   df <- createDataFrame(list(list(map = as.environment(list(x = 1, y = 2)))))
   result <- collect(select(df, map_keys(df$map)))[[1]]


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org