You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by fe...@apache.org on 2017/01/24 08:23:27 UTC
spark git commit: [SPARK-18823][SPARKR] add support for assigning to column

Repository: spark
Updated Branches:
  refs/heads/master ec9493b44 -> f27e02476


[SPARK-18823][SPARKR] add support for assigning to column

## What changes were proposed in this pull request?

Support for
```
df[[myname]] <- 1
df[[2]] <- df$eruptions
```

## How was this patch tested?

manual tests, unit tests

Author: Felix Cheung <fe...@hotmail.com>

Closes #16663 from felixcheung/rcolset.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f27e0247
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f27e0247
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f27e0247

Branch: refs/heads/master
Commit: f27e024768e328b96704a9ef35b77381da480328
Parents: ec9493b
Author: Felix Cheung <fe...@hotmail.com>
Authored: Tue Jan 24 00:23:23 2017 -0800
Committer: Felix Cheung <fe...@apache.org>
Committed: Tue Jan 24 00:23:23 2017 -0800

----------------------------------------------------------------------
 R/pkg/R/DataFrame.R                       | 48 +++++++++++++++++++-------
 R/pkg/inst/tests/testthat/test_sparkSQL.R | 20 +++++++++++
 2 files changed, 55 insertions(+), 13 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/f27e0247/R/pkg/R/DataFrame.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 3d912c9..0a10122 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -1717,6 +1717,23 @@ getColumn <- function(x, c) {
   column(callJMethod(x@sdf, "col", c))
 }
 
+setColumn <- function(x, c, value) {
+  if (class(value) != "Column" && !is.null(value)) {
+    if (isAtomicLengthOne(value)) {
+      value <- lit(value)
+    } else {
+      stop("value must be a Column, literal value as atomic in length of 1, or NULL")
+    }
+  }
+
+  if (is.null(value)) {
+    nx <- drop(x, c)
+  } else {
+    nx <- withColumn(x, c, value)
+  }
+  nx
+}
+
 #' @param name name of a Column (without being wrapped by \code{""}).
 #' @rdname select
 #' @name $
@@ -1735,19 +1752,7 @@ setMethod("$", signature(x = "SparkDataFrame"),
 #' @note $<- since 1.4.0
 setMethod("$<-", signature(x = "SparkDataFrame"),
           function(x, name, value) {
-            if (class(value) != "Column" && !is.null(value)) {
-              if (isAtomicLengthOne(value)) {
-                value <- lit(value)
-              } else {
-                stop("value must be a Column, literal value as atomic in length of 1, or NULL")
-              }
-            }
-
-            if (is.null(value)) {
-              nx <- drop(x, name)
-            } else {
-              nx <- withColumn(x, name, value)
-            }
+            nx <- setColumn(x, name, value)
             x@sdf <- nx@sdf
             x
           })
@@ -1768,6 +1773,21 @@ setMethod("[[", signature(x = "SparkDataFrame", i = "numericOrcharacter"),
           })
 
 #' @rdname subset
+#' @name [[<-
+#' @aliases [[<-,SparkDataFrame,numericOrcharacter-method
+#' @note [[<- since 2.1.1
+setMethod("[[<-", signature(x = "SparkDataFrame", i = "numericOrcharacter"),
+          function(x, i, value) {
+            if (is.numeric(i)) {
+              cols <- columns(x)
+              i <- cols[[i]]
+            }
+            nx <- setColumn(x, i, value)
+            x@sdf <- nx@sdf
+            x
+          })
+
+#' @rdname subset
 #' @name [
 #' @aliases [,SparkDataFrame-method
 #' @note [ since 1.4.0
@@ -1814,6 +1834,8 @@ setMethod("[", signature(x = "SparkDataFrame"),
 #' @param j,select expression for the single Column or a list of columns to select from the SparkDataFrame.
 #' @param drop if TRUE, a Column will be returned if the resulting dataset has only one column.
 #'             Otherwise, a SparkDataFrame will always be returned.
+#' @param value a Column or an atomic vector in the length of 1 as literal value, or \code{NULL}.
+#'              If \code{NULL}, the specified Column is dropped.
 #' @param ... currently not used.
 #' @return A new SparkDataFrame containing only the rows that meet the condition with selected columns.
 #' @export

http://git-wip-us.apache.org/repos/asf/spark/blob/f27e0247/R/pkg/inst/tests/testthat/test_sparkSQL.R
----------------------------------------------------------------------
diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R
index 2601742..aaa8fb4 100644
--- a/R/pkg/inst/tests/testthat/test_sparkSQL.R
+++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R
@@ -1021,6 +1021,9 @@ test_that("select operators", {
   df$age2 <- df$age * 2
   expect_equal(columns(df), c("name", "age", "age2"))
   expect_equal(count(where(df, df$age2 == df$age * 2)), 2)
+  df$age2 <- df[["age"]] * 3
+  expect_equal(columns(df), c("name", "age", "age2"))
+  expect_equal(count(where(df, df$age2 == df$age * 3)), 2)
 
   df$age2 <- 21
   expect_equal(columns(df), c("name", "age", "age2"))
@@ -1033,6 +1036,23 @@ test_that("select operators", {
   expect_error(df$age3 <- c(22, NA),
               "value must be a Column, literal value as atomic in length of 1, or NULL")
 
+  df[["age2"]] <- 23
+  expect_equal(columns(df), c("name", "age", "age2"))
+  expect_equal(count(where(df, df$age2 == 23)), 3)
+
+  df[[3]] <- 24
+  expect_equal(columns(df), c("name", "age", "age2"))
+  expect_equal(count(where(df, df$age2 == 24)), 3)
+
+  df[[3]] <- df$age
+  expect_equal(count(where(df, df$age2 == df$age)), 2)
+
+  df[["age2"]] <- df[["name"]]
+  expect_equal(count(where(df, df$age2 == df$name)), 3)
+
+  expect_error(df[["age3"]] <- c(22, 23),
+              "value must be a Column, literal value as atomic in length of 1, or NULL")
+
   # Test parameter drop
   expect_equal(class(df[, 1]) == "SparkDataFrame", T)
   expect_equal(class(df[, 1, drop = T]) == "Column", T)


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org