You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by fe...@apache.org on 2018/01/24 06:31:21 UTC
spark git commit: [SPARK-21727][R] Allow multi-element atomic vector
as column type in SparkR DataFrame
Repository: spark
Updated Branches:
refs/heads/master a3911cf89 -> f54b65c15
[SPARK-21727][R] Allow multi-element atomic vector as column type in SparkR DataFrame
## What changes were proposed in this pull request?
A fix to https://issues.apache.org/jira/browse/SPARK-21727, "Operating on an ArrayType in a SparkR DataFrame throws error"
## How was this patch tested?
- Ran tests at R\pkg\tests\run-all.R (see below attached results)
- Tested the following lines in SparkR, which now seem to execute without error:
```
indices <- 1:4
myDf <- data.frame(indices)
myDf$data <- list(rep(0, 20))
mySparkDf <- as.DataFrame(myDf)
collect(mySparkDf)
```
[2018-01-22 SPARK-21727 Test Results.txt](https://github.com/apache/spark/files/1653535/2018-01-22.SPARK-21727.Test.Results.txt)
felixcheung yanboliang sun-rui shivaram
_The contribution is my original work and I license the work to the project under the project’s open source license_
Author: neilalex <ne...@neilalex.com>
Closes #20352 from neilalex/neilalex-sparkr-arraytype.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f54b65c1
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f54b65c1
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f54b65c1
Branch: refs/heads/master
Commit: f54b65c15a732540f7a41a9083eeb7a08feca125
Parents: a3911cf
Author: neilalex <ne...@neilalex.com>
Authored: Tue Jan 23 22:31:14 2018 -0800
Committer: Felix Cheung <fe...@apache.org>
Committed: Tue Jan 23 22:31:14 2018 -0800
----------------------------------------------------------------------
R/pkg/R/serialize.R | 11 ++++----
R/pkg/tests/fulltests/test_Serde.R | 47 +++++++++++++++++++++++++++++++++
2 files changed, 53 insertions(+), 5 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/f54b65c1/R/pkg/R/serialize.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/serialize.R b/R/pkg/R/serialize.R
index 3bbf60d..263b9b5 100644
--- a/R/pkg/R/serialize.R
+++ b/R/pkg/R/serialize.R
@@ -30,14 +30,17 @@
# POSIXct,POSIXlt -> Time
#
# list[T] -> Array[T], where T is one of above mentioned types
+# Multi-element vector of any of the above (except raw) -> Array[T]
# environment -> Map[String, T], where T is a native type
# jobj -> Object, where jobj is an object created in the backend
# nolint end
getSerdeType <- function(object) {
type <- class(object)[[1]]
- if (type != "list") {
- type
+ if (is.atomic(object) & !is.raw(object) & length(object) > 1) {
+ "array"
+ } else if (type != "list") {
+ type
} else {
# Check if all elements are of same type
elemType <- unique(sapply(object, function(elem) { getSerdeType(elem) }))
@@ -50,9 +53,7 @@ getSerdeType <- function(object) {
}
writeObject <- function(con, object, writeType = TRUE) {
- # NOTE: In R vectors have same type as objects. So we don't support
- # passing in vectors as arrays and instead require arrays to be passed
- # as lists.
+ # NOTE: In R vectors have same type as objects
type <- class(object)[[1]] # class of POSIXlt is c("POSIXlt", "POSIXt")
# Checking types is needed here, since 'is.na' only handles atomic vectors,
# lists and pairlists
http://git-wip-us.apache.org/repos/asf/spark/blob/f54b65c1/R/pkg/tests/fulltests/test_Serde.R
----------------------------------------------------------------------
diff --git a/R/pkg/tests/fulltests/test_Serde.R b/R/pkg/tests/fulltests/test_Serde.R
index 6bbd201..3577929 100644
--- a/R/pkg/tests/fulltests/test_Serde.R
+++ b/R/pkg/tests/fulltests/test_Serde.R
@@ -37,6 +37,53 @@ test_that("SerDe of primitive types", {
expect_equal(class(x), "character")
})
+test_that("SerDe of multi-element primitive vectors inside R data.frame", {
+ # vector of integers embedded in R data.frame
+ indices <- 1L:3L
+ myDf <- data.frame(indices)
+ myDf$data <- list(rep(0L, 3L))
+ mySparkDf <- as.DataFrame(myDf)
+ myResultingDf <- collect(mySparkDf)
+ myDfListedData <- data.frame(indices)
+ myDfListedData$data <- list(as.list(rep(0L, 3L)))
+ expect_equal(myResultingDf, myDfListedData)
+ expect_equal(class(myResultingDf[["data"]][[1]]), "list")
+ expect_equal(class(myResultingDf[["data"]][[1]][[1]]), "integer")
+
+ # vector of numeric embedded in R data.frame
+ myDf <- data.frame(indices)
+ myDf$data <- list(rep(0, 3L))
+ mySparkDf <- as.DataFrame(myDf)
+ myResultingDf <- collect(mySparkDf)
+ myDfListedData <- data.frame(indices)
+ myDfListedData$data <- list(as.list(rep(0, 3L)))
+ expect_equal(myResultingDf, myDfListedData)
+ expect_equal(class(myResultingDf[["data"]][[1]]), "list")
+ expect_equal(class(myResultingDf[["data"]][[1]][[1]]), "numeric")
+
+ # vector of logical embedded in R data.frame
+ myDf <- data.frame(indices)
+ myDf$data <- list(rep(TRUE, 3L))
+ mySparkDf <- as.DataFrame(myDf)
+ myResultingDf <- collect(mySparkDf)
+ myDfListedData <- data.frame(indices)
+ myDfListedData$data <- list(as.list(rep(TRUE, 3L)))
+ expect_equal(myResultingDf, myDfListedData)
+ expect_equal(class(myResultingDf[["data"]][[1]]), "list")
+ expect_equal(class(myResultingDf[["data"]][[1]][[1]]), "logical")
+
+ # vector of character embedded in R data.frame
+ myDf <- data.frame(indices)
+ myDf$data <- list(rep("abc", 3L))
+ mySparkDf <- as.DataFrame(myDf)
+ myResultingDf <- collect(mySparkDf)
+ myDfListedData <- data.frame(indices)
+ myDfListedData$data <- list(as.list(rep("abc", 3L)))
+ expect_equal(myResultingDf, myDfListedData)
+ expect_equal(class(myResultingDf[["data"]][[1]]), "list")
+ expect_equal(class(myResultingDf[["data"]][[1]][[1]]), "character")
+})
+
test_that("SerDe of list of primitive types", {
x <- list(1L, 2L, 3L)
y <- callJStatic("SparkRHandler", "echo", x)
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org