You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by sh...@apache.org on 2015/06/09 06:40:23 UTC

spark git commit: [SPARK-6820] [SPARKR] Convert NAs to null type in SparkR DataFrames

Repository: spark
Updated Branches:
  refs/heads/master 82870d507 -> a5c52c1a3


[SPARK-6820] [SPARKR] Convert NAs to null type in SparkR DataFrames

Author: hqzizania <qi...@intel.com>

Closes #6190 from hqzizania/R and squashes the following commits:

1641f9e [hqzizania] fixes and add test units
bb3411a [hqzizania] Convert NAs to null type in SparkR DataFrames


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a5c52c1a
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a5c52c1a
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a5c52c1a

Branch: refs/heads/master
Commit: a5c52c1a3488b69bec19e460d2d1fdb0c9ada58d
Parents: 82870d5
Author: hqzizania <qi...@intel.com>
Authored: Mon Jun 8 21:40:12 2015 -0700
Committer: Shivaram Venkataraman <sh...@cs.berkeley.edu>
Committed: Mon Jun 8 21:40:12 2015 -0700

----------------------------------------------------------------------
 R/pkg/R/serialize.R              |  8 ++++++++
 R/pkg/inst/tests/test_sparkSQL.R | 37 +++++++++++++++++++++++++++++++++++
 2 files changed, 45 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/a5c52c1a/R/pkg/R/serialize.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/serialize.R b/R/pkg/R/serialize.R
index 2081786..3169d79 100644
--- a/R/pkg/R/serialize.R
+++ b/R/pkg/R/serialize.R
@@ -37,6 +37,14 @@ writeObject <- function(con, object, writeType = TRUE) {
   # passing in vectors as arrays and instead require arrays to be passed
   # as lists.
   type <- class(object)[[1]]  # class of POSIXlt is c("POSIXlt", "POSIXt")
+  # Checking types is needed here, since ‘is.na’ only handles atomic vectors,
+  # lists and pairlists
+  if (type %in% c("integer", "character", "logical", "double", "numeric")) {
+    if (is.na(object)) {
+      object <- NULL
+      type <- "NULL"
+    }
+  }
   if (writeType) {
     writeType(con, type)
   }

http://git-wip-us.apache.org/repos/asf/spark/blob/a5c52c1a/R/pkg/inst/tests/test_sparkSQL.R
----------------------------------------------------------------------
diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R
index 30edfc8..8946348 100644
--- a/R/pkg/inst/tests/test_sparkSQL.R
+++ b/R/pkg/inst/tests/test_sparkSQL.R
@@ -101,6 +101,43 @@ test_that("create DataFrame from RDD", {
   expect_equal(dtypes(df), list(c("a", "int"), c("b", "string")))
 })
 
+test_that("convert NAs to null type in DataFrames", {
+  rdd <- parallelize(sc, list(list(1L, 2L), list(NA, 4L)))
+  df <- createDataFrame(sqlContext, rdd, list("a", "b"))
+  expect_true(is.na(collect(df)[2, "a"]))
+  expect_equal(collect(df)[2, "b"], 4L)
+
+  l <- data.frame(x = 1L, y = c(1L, NA_integer_, 3L))
+  df <- createDataFrame(sqlContext, l)
+  expect_equal(collect(df)[2, "x"], 1L)
+  expect_true(is.na(collect(df)[2, "y"]))
+
+  rdd <- parallelize(sc, list(list(1, 2), list(NA, 4)))
+  df <- createDataFrame(sqlContext, rdd, list("a", "b"))
+  expect_true(is.na(collect(df)[2, "a"]))
+  expect_equal(collect(df)[2, "b"], 4)
+
+  l <- data.frame(x = 1, y = c(1, NA_real_, 3))
+  df <- createDataFrame(sqlContext, l)
+  expect_equal(collect(df)[2, "x"], 1)
+  expect_true(is.na(collect(df)[2, "y"]))
+
+  l <- list("a", "b", NA, "d")
+  df <- createDataFrame(sqlContext, l)
+  expect_true(is.na(collect(df)[3, "_1"]))
+  expect_equal(collect(df)[4, "_1"], "d")
+
+  l <- list("a", "b", NA_character_, "d")
+  df <- createDataFrame(sqlContext, l)
+  expect_true(is.na(collect(df)[3, "_1"]))
+  expect_equal(collect(df)[4, "_1"], "d")
+
+  l <- list(TRUE, FALSE, NA, TRUE)
+  df <- createDataFrame(sqlContext, l)
+  expect_true(is.na(collect(df)[3, "_1"]))
+  expect_equal(collect(df)[4, "_1"], TRUE)
+})
+
 test_that("toDF", {
   rdd <- lapply(parallelize(sc, 1:10), function(x) { list(x, as.character(x)) })
   df <- toDF(rdd, list("a", "b"))


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org