You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by sh...@apache.org on 2015/10/01 03:03:34 UTC

spark git commit: [SPARK-10807] [SPARKR] Added as.data.frame as a synonym for collect

Repository: spark
Updated Branches:
  refs/heads/master 89ea0041a -> f21e2da03


[SPARK-10807] [SPARKR] Added as.data.frame as a synonym for collect

Created method as.data.frame as a synonym for collect().

Author: Oscar D. Lara Yejas <ol...@mail.usf.edu>
Author: olarayej <os...@us.ibm.com>
Author: Oscar D. Lara Yejas <os...@us.ibm.com>

Closes #8908 from olarayej/SPARK-10807.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f21e2da0
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f21e2da0
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f21e2da0

Branch: refs/heads/master
Commit: f21e2da03fbf8041fece476e3d5c699aef819451
Parents: 89ea004
Author: Oscar D. Lara Yejas <ol...@mail.usf.edu>
Authored: Wed Sep 30 18:03:31 2015 -0700
Committer: Shivaram Venkataraman <sh...@cs.berkeley.edu>
Committed: Wed Sep 30 18:03:31 2015 -0700

----------------------------------------------------------------------
 R/pkg/NAMESPACE                  |  2 ++
 R/pkg/R/DataFrame.R              | 25 +++++++++++++++++++++++++
 R/pkg/R/generics.R               |  4 ++++
 R/pkg/inst/tests/test_sparkSQL.R |  9 ++++++++-
 4 files changed, 39 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/f21e2da0/R/pkg/NAMESPACE
----------------------------------------------------------------------
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index 9d39630..c28c47d 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -247,3 +247,5 @@ export("structField",
        "structType.jobj",
        "structType.structField",
        "print.structType")
+
+export("as.data.frame")
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/spark/blob/f21e2da0/R/pkg/R/DataFrame.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index c3c1893..65e368c 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -1848,3 +1848,28 @@ setMethod("crosstab",
             sct <- callJMethod(statFunctions, "crosstab", col1, col2)
             collect(dataFrame(sct))
           })
+
+
+#' This function downloads the contents of a DataFrame into an R's data.frame.
+#' Since data.frames are held in memory, ensure that you have enough memory
+#' in your system to accommodate the contents.
+#' 
+#' @title Download data from a DataFrame into a data.frame
+#' @param x a DataFrame
+#' @return a data.frame
+#' @rdname as.data.frame
+#' @examples \dontrun{
+#' 
+#' irisDF <- createDataFrame(sqlContext, iris)
+#' df <- as.data.frame(irisDF[irisDF$Species == "setosa", ])
+#' }
+setMethod("as.data.frame",
+          signature(x = "DataFrame"),
+          function(x, ...) {
+            # Check if additional parameters have been passed
+            if (length(list(...)) > 0) {
+              stop(paste("Unused argument(s): ", paste(list(...), collapse=", ")))
+            }
+            collect(x)
+          }
+)

http://git-wip-us.apache.org/repos/asf/spark/blob/f21e2da0/R/pkg/R/generics.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index 43dd8d2..3db41e0 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -983,3 +983,7 @@ setGeneric("glm")
 #' @rdname rbind
 #' @export
 setGeneric("rbind", signature = "...")
+
+#' @rdname as.data.frame
+#' @export
+setGeneric("as.data.frame")
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/spark/blob/f21e2da0/R/pkg/inst/tests/test_sparkSQL.R
----------------------------------------------------------------------
diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R
index e159a69..8f85eec 100644
--- a/R/pkg/inst/tests/test_sparkSQL.R
+++ b/R/pkg/inst/tests/test_sparkSQL.R
@@ -1327,6 +1327,13 @@ test_that("SQL error message is returned from JVM", {
   expect_equal(grepl("Table Not Found: blah", retError), TRUE)
 })
 
+test_that("Method as.data.frame as a synonym for collect()", {
+  irisDF <- createDataFrame(sqlContext, iris)
+  expect_equal(as.data.frame(irisDF), collect(irisDF))
+  irisDF2 <- irisDF[irisDF$Species == "setosa", ]
+  expect_equal(as.data.frame(irisDF2), collect(irisDF2))
+})
+
 unlink(parquetPath)
 unlink(jsonPath)
-unlink(jsonPathNa)
+unlink(jsonPathNa)
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org