You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2018/05/06 02:25:17 UTC
spark git commit: [SPARK-24185][SPARKR][SQL] add flatten function to
SparkR
Repository: spark
Updated Branches:
refs/heads/master 47b5b6852 -> dd4b1b9c7
[SPARK-24185][SPARKR][SQL] add flatten function to SparkR
## What changes were proposed in this pull request?
add array flatten function to SparkR
## How was this patch tested?
Unit tests were added in R/pkg/tests/fulltests/test_sparkSQL.R
Author: Huaxin Gao <hu...@us.ibm.com>
Closes #21244 from huaxingao/spark-24185.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/dd4b1b9c
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/dd4b1b9c
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/dd4b1b9c
Branch: refs/heads/master
Commit: dd4b1b9c7ccad3363a6a21524aed047fcd282f68
Parents: 47b5b68
Author: Huaxin Gao <hu...@us.ibm.com>
Authored: Sun May 6 10:25:01 2018 +0800
Committer: hyukjinkwon <gu...@apache.org>
Committed: Sun May 6 10:25:01 2018 +0800
----------------------------------------------------------------------
R/pkg/NAMESPACE | 1 +
R/pkg/R/functions.R | 14 ++++++++++++++
R/pkg/R/generics.R | 4 ++++
R/pkg/tests/fulltests/test_sparkSQL.R | 6 ++++++
4 files changed, 25 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/dd4b1b9c/R/pkg/NAMESPACE
----------------------------------------------------------------------
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index f36d462..8cd0035 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -258,6 +258,7 @@ exportMethods("%<=>%",
"expr",
"factorial",
"first",
+ "flatten",
"floor",
"format_number",
"format_string",
http://git-wip-us.apache.org/repos/asf/spark/blob/dd4b1b9c/R/pkg/R/functions.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index ec4bd4e..0ec99d1 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -208,6 +208,7 @@ NULL
#' head(select(tmp, array_contains(tmp$v1, 21), size(tmp$v1)))
#' head(select(tmp, array_max(tmp$v1), array_min(tmp$v1)))
#' head(select(tmp, array_position(tmp$v1, 21)))
+#' head(select(tmp, flatten(tmp$v1)))
#' tmp2 <- mutate(tmp, v2 = explode(tmp$v1))
#' head(tmp2)
#' head(select(tmp, posexplode(tmp$v1)))
@@ -3036,6 +3037,19 @@ setMethod("array_position",
})
#' @details
+#' \code{flatten}: Transforms an array of arrays into a single array.
+#'
+#' @rdname column_collection_functions
+#' @aliases flatten flatten,Column-method
+#' @note flatten since 2.4.0
+setMethod("flatten",
+ signature(x = "Column"),
+ function(x) {
+ jc <- callJStatic("org.apache.spark.sql.functions", "flatten", x@jc)
+ column(jc)
+ })
+
+#' @details
#' \code{map_keys}: Returns an unordered array containing the keys of the map.
#'
#' @rdname column_collection_functions
http://git-wip-us.apache.org/repos/asf/spark/blob/dd4b1b9c/R/pkg/R/generics.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index 562d339..4ef12d1 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -918,6 +918,10 @@ setGeneric("explode_outer", function(x) { standardGeneric("explode_outer") })
#' @name NULL
setGeneric("expr", function(x) { standardGeneric("expr") })
+#' @rdname column_collection_functions
+#' @name NULL
+setGeneric("flatten", function(x) { standardGeneric("flatten") })
+
#' @rdname column_datetime_diff_functions
#' @name NULL
setGeneric("from_utc_timestamp", function(y, x) { standardGeneric("from_utc_timestamp") })
http://git-wip-us.apache.org/repos/asf/spark/blob/dd4b1b9c/R/pkg/tests/fulltests/test_sparkSQL.R
----------------------------------------------------------------------
diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R
index 8cc2db7..3a8866b 100644
--- a/R/pkg/tests/fulltests/test_sparkSQL.R
+++ b/R/pkg/tests/fulltests/test_sparkSQL.R
@@ -1502,6 +1502,12 @@ test_that("column functions", {
result <- collect(select(df, sort_array(df[[1]])))[[1]]
expect_equal(result, list(list(1L, 2L, 3L), list(4L, 5L, 6L)))
+ # Test flattern
+ df <- createDataFrame(list(list(list(list(1L, 2L), list(3L, 4L))),
+ list(list(list(5L, 6L), list(7L, 8L)))))
+ result <- collect(select(df, flatten(df[[1]])))[[1]]
+ expect_equal(result, list(list(1L, 2L, 3L, 4L), list(5L, 6L, 7L, 8L)))
+
# Test map_keys(), map_values() and element_at()
df <- createDataFrame(list(list(map = as.environment(list(x = 1, y = 2)))))
result <- collect(select(df, map_keys(df$map)))[[1]]
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org