You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by sh...@apache.org on 2015/10/13 19:21:18 UTC
spark git commit: [SPARK-10913] [SPARKR] attach() function support

Repository: spark
Updated Branches:
  refs/heads/master 1e0aba90b -> f7f28ee7a


[SPARK-10913] [SPARKR] attach() function support

Bring the change code up to date.

Author: Adrian Zhuang <ad...@users.noreply.github.com>
Author: adrian555 <wz...@us.ibm.com>

Closes #9031 from adrian555/attach2.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f7f28ee7
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f7f28ee7
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f7f28ee7

Branch: refs/heads/master
Commit: f7f28ee7a513c262d52cf433d25fbf06df9bd1f1
Parents: 1e0aba9
Author: Adrian Zhuang <ad...@users.noreply.github.com>
Authored: Tue Oct 13 10:21:07 2015 -0700
Committer: Shivaram Venkataraman <sh...@cs.berkeley.edu>
Committed: Tue Oct 13 10:21:07 2015 -0700

----------------------------------------------------------------------
 R/pkg/NAMESPACE                  |  1 +
 R/pkg/R/DataFrame.R              | 30 ++++++++++++++++++++++++++++++
 R/pkg/R/generics.R               |  4 ++++
 R/pkg/inst/tests/test_sparkSQL.R | 20 ++++++++++++++++++++
 4 files changed, 55 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/f7f28ee7/R/pkg/NAMESPACE
----------------------------------------------------------------------
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index 41986a5..ed9cd94 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -23,6 +23,7 @@ export("setJobGroup",
 exportClasses("DataFrame")
 
 exportMethods("arrange",
+              "attach",
               "cache",
               "collect",
               "columns",

http://git-wip-us.apache.org/repos/asf/spark/blob/f7f28ee7/R/pkg/R/DataFrame.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 1b9137e..e0ce056 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -1881,3 +1881,33 @@ setMethod("as.data.frame",
             }
             collect(x)
           })
+
+#' The specified DataFrame is attached to the R search path. This means that
+#' the DataFrame is searched by R when evaluating a variable, so columns in
+#' the DataFrame can be accessed by simply giving their names.
+#'
+#' @rdname attach
+#' @title Attach DataFrame to R search path
+#' @param what (DataFrame) The DataFrame to attach
+#' @param pos (integer) Specify position in search() where to attach.
+#' @param name (character) Name to use for the attached DataFrame. Names
+#'   starting with package: are reserved for library.
+#' @param warn.conflicts (logical) If TRUE, warnings are printed about conflicts
+#' from attaching the database, unless that DataFrame contains an object
+#' @examples
+#' \dontrun{
+#' attach(irisDf)
+#' summary(Sepal_Width)
+#' }
+#' @seealso \link{detach}
+setMethod("attach",
+          signature(what = "DataFrame"),
+          function(what, pos = 2, name = deparse(substitute(what)), warn.conflicts = TRUE) {
+            cols <- columns(what)
+            stopifnot(length(cols) > 0)
+            newEnv <- new.env()
+            for (i in 1:length(cols)) {
+              assign(x = cols[i], value = what[, cols[i]], envir = newEnv)
+            }
+            attach(newEnv, pos = pos, name = name, warn.conflicts = warn.conflicts)
+          })

http://git-wip-us.apache.org/repos/asf/spark/blob/f7f28ee7/R/pkg/R/generics.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index 8fad170..c106a00 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -1003,3 +1003,7 @@ setGeneric("rbind", signature = "...")
 #' @rdname as.data.frame
 #' @export
 setGeneric("as.data.frame")
+
+#' @rdname attach
+#' @export
+setGeneric("attach")
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/spark/blob/f7f28ee7/R/pkg/inst/tests/test_sparkSQL.R
----------------------------------------------------------------------
diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R
index b599994..d5509e4 100644
--- a/R/pkg/inst/tests/test_sparkSQL.R
+++ b/R/pkg/inst/tests/test_sparkSQL.R
@@ -1405,6 +1405,26 @@ test_that("Method as.data.frame as a synonym for collect()", {
   expect_equal(as.data.frame(irisDF2), collect(irisDF2))
 })
 
+test_that("attach() on a DataFrame", {
+  df <- jsonFile(sqlContext, jsonPath)
+  expect_error(age)
+  attach(df)
+  expect_is(age, "DataFrame")
+  expected_age <- data.frame(age = c(NA, 30, 19))
+  expect_equal(head(age), expected_age)
+  stat <- summary(age)
+  expect_equal(collect(stat)[5, "age"], "30")
+  age <- age$age + 1
+  expect_is(age, "Column")
+  rm(age)
+  stat2 <- summary(age)
+  expect_equal(collect(stat2)[5, "age"], "30")
+  detach("df")
+  stat3 <- summary(df[, "age"])
+  expect_equal(collect(stat3)[5, "age"], "30")
+  expect_error(age)
+})
+
 unlink(parquetPath)
 unlink(jsonPath)
 unlink(jsonPathNa)


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org