You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by fe...@apache.org on 2017/01/24 08:23:27 UTC
spark git commit: [SPARK-18823][SPARKR] add support for assigning to
column
Repository: spark
Updated Branches:
refs/heads/master ec9493b44 -> f27e02476
[SPARK-18823][SPARKR] add support for assigning to column
## What changes were proposed in this pull request?
Support for
```
df[[myname]] <- 1
df[[2]] <- df$eruptions
```
## How was this patch tested?
manual tests, unit tests
Author: Felix Cheung <fe...@hotmail.com>
Closes #16663 from felixcheung/rcolset.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f27e0247
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f27e0247
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f27e0247
Branch: refs/heads/master
Commit: f27e024768e328b96704a9ef35b77381da480328
Parents: ec9493b
Author: Felix Cheung <fe...@hotmail.com>
Authored: Tue Jan 24 00:23:23 2017 -0800
Committer: Felix Cheung <fe...@apache.org>
Committed: Tue Jan 24 00:23:23 2017 -0800
----------------------------------------------------------------------
R/pkg/R/DataFrame.R | 48 +++++++++++++++++++-------
R/pkg/inst/tests/testthat/test_sparkSQL.R | 20 +++++++++++
2 files changed, 55 insertions(+), 13 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/f27e0247/R/pkg/R/DataFrame.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 3d912c9..0a10122 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -1717,6 +1717,23 @@ getColumn <- function(x, c) {
column(callJMethod(x@sdf, "col", c))
}
+setColumn <- function(x, c, value) {
+ if (class(value) != "Column" && !is.null(value)) {
+ if (isAtomicLengthOne(value)) {
+ value <- lit(value)
+ } else {
+ stop("value must be a Column, literal value as atomic in length of 1, or NULL")
+ }
+ }
+
+ if (is.null(value)) {
+ nx <- drop(x, c)
+ } else {
+ nx <- withColumn(x, c, value)
+ }
+ nx
+}
+
#' @param name name of a Column (without being wrapped by \code{""}).
#' @rdname select
#' @name $
@@ -1735,19 +1752,7 @@ setMethod("$", signature(x = "SparkDataFrame"),
#' @note $<- since 1.4.0
setMethod("$<-", signature(x = "SparkDataFrame"),
function(x, name, value) {
- if (class(value) != "Column" && !is.null(value)) {
- if (isAtomicLengthOne(value)) {
- value <- lit(value)
- } else {
- stop("value must be a Column, literal value as atomic in length of 1, or NULL")
- }
- }
-
- if (is.null(value)) {
- nx <- drop(x, name)
- } else {
- nx <- withColumn(x, name, value)
- }
+ nx <- setColumn(x, name, value)
x@sdf <- nx@sdf
x
})
@@ -1768,6 +1773,21 @@ setMethod("[[", signature(x = "SparkDataFrame", i = "numericOrcharacter"),
})
#' @rdname subset
+#' @name [[<-
+#' @aliases [[<-,SparkDataFrame,numericOrcharacter-method
+#' @note [[<- since 2.1.1
+setMethod("[[<-", signature(x = "SparkDataFrame", i = "numericOrcharacter"),
+ function(x, i, value) {
+ if (is.numeric(i)) {
+ cols <- columns(x)
+ i <- cols[[i]]
+ }
+ nx <- setColumn(x, i, value)
+ x@sdf <- nx@sdf
+ x
+ })
+
+#' @rdname subset
#' @name [
#' @aliases [,SparkDataFrame-method
#' @note [ since 1.4.0
@@ -1814,6 +1834,8 @@ setMethod("[", signature(x = "SparkDataFrame"),
#' @param j,select expression for the single Column or a list of columns to select from the SparkDataFrame.
#' @param drop if TRUE, a Column will be returned if the resulting dataset has only one column.
#' Otherwise, a SparkDataFrame will always be returned.
+#' @param value a Column or an atomic vector in the length of 1 as literal value, or \code{NULL}.
+#' If \code{NULL}, the specified Column is dropped.
#' @param ... currently not used.
#' @return A new SparkDataFrame containing only the rows that meet the condition with selected columns.
#' @export
http://git-wip-us.apache.org/repos/asf/spark/blob/f27e0247/R/pkg/inst/tests/testthat/test_sparkSQL.R
----------------------------------------------------------------------
diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R
index 2601742..aaa8fb4 100644
--- a/R/pkg/inst/tests/testthat/test_sparkSQL.R
+++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R
@@ -1021,6 +1021,9 @@ test_that("select operators", {
df$age2 <- df$age * 2
expect_equal(columns(df), c("name", "age", "age2"))
expect_equal(count(where(df, df$age2 == df$age * 2)), 2)
+ df$age2 <- df[["age"]] * 3
+ expect_equal(columns(df), c("name", "age", "age2"))
+ expect_equal(count(where(df, df$age2 == df$age * 3)), 2)
df$age2 <- 21
expect_equal(columns(df), c("name", "age", "age2"))
@@ -1033,6 +1036,23 @@ test_that("select operators", {
expect_error(df$age3 <- c(22, NA),
"value must be a Column, literal value as atomic in length of 1, or NULL")
+ df[["age2"]] <- 23
+ expect_equal(columns(df), c("name", "age", "age2"))
+ expect_equal(count(where(df, df$age2 == 23)), 3)
+
+ df[[3]] <- 24
+ expect_equal(columns(df), c("name", "age", "age2"))
+ expect_equal(count(where(df, df$age2 == 24)), 3)
+
+ df[[3]] <- df$age
+ expect_equal(count(where(df, df$age2 == df$age)), 2)
+
+ df[["age2"]] <- df[["name"]]
+ expect_equal(count(where(df, df$age2 == df$name)), 3)
+
+ expect_error(df[["age3"]] <- c(22, 23),
+ "value must be a Column, literal value as atomic in length of 1, or NULL")
+
# Test parameter drop
expect_equal(class(df[, 1]) == "SparkDataFrame", T)
expect_equal(class(df[, 1, drop = T]) == "Column", T)
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org