You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by sh...@apache.org on 2015/12/29 08:15:35 UTC
spark git commit: [SPARK-12526][SPARKR] ifelse`, `when`, `otherwise` unable to take Column as value

Repository: spark
Updated Branches:
  refs/heads/master 73862a1eb -> d80cc90b5


[SPARK-12526][SPARKR] ifelse`, `when`, `otherwise` unable to take Column as value

`ifelse`, `when`, `otherwise` is unable to take `Column` typed S4 object as values.

For example:
```r
ifelse(lit(1) == lit(1), lit(2), lit(3))
ifelse(df$mpg > 0, df$mpg, 0)
```
will both fail with
```r
attempt to replicate an object of type 'environment'
```

The PR replaces `ifelse` calls with `if ... else ...` inside the function implementations to avoid attempt to vectorize(i.e. `rep()`). It remains to be discussed whether we should instead support vectorization in these functions for consistency because `ifelse` in base R is vectorized but I cannot foresee any scenarios these functions will want to be vectorized in SparkR.

For reference, added test cases which trigger failures:
```r
. Error: when(), otherwise() and ifelse() with column on a DataFrame ----------
error in evaluating the argument 'x' in selecting a method for function 'collect':
  error in evaluating the argument 'col' in selecting a method for function 'select':
  attempt to replicate an object of type 'environment'
Calls: when -> when -> ifelse -> ifelse

1: withCallingHandlers(eval(code, new_test_environment), error = capture_calls, message = function(c) invokeRestart("muffleMessage"))
2: eval(code, new_test_environment)
3: eval(expr, envir, enclos)
4: expect_equal(collect(select(df, when(df$a > 1 & df$b > 2, lit(1))))[, 1], c(NA, 1)) at test_sparkSQL.R:1126
5: expect_that(object, equals(expected, label = expected.label, ...), info = info, label = label)
6: condition(object)
7: compare(actual, expected, ...)
8: collect(select(df, when(df$a > 1 & df$b > 2, lit(1))))
Error: Test failures
Execution halted
```

Author: Forest Fang <fo...@outlook.com>

Closes #10481 from saurfang/spark-12526.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d80cc90b
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d80cc90b
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d80cc90b

Branch: refs/heads/master
Commit: d80cc90b5545cff82cd9b340f12d01eafc9ca524
Parents: 73862a1
Author: Forest Fang <fo...@outlook.com>
Authored: Tue Dec 29 12:45:24 2015 +0530
Committer: Shivaram Venkataraman <sh...@cs.berkeley.edu>
Committed: Tue Dec 29 12:45:24 2015 +0530

----------------------------------------------------------------------
 R/pkg/R/column.R                          |  4 ++--
 R/pkg/R/functions.R                       | 13 ++++++++-----
 R/pkg/inst/tests/testthat/test_sparkSQL.R |  8 ++++++++
 3 files changed, 18 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/d80cc90b/R/pkg/R/column.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/column.R b/R/pkg/R/column.R
index 7bb8ef2..356bcee 100644
--- a/R/pkg/R/column.R
+++ b/R/pkg/R/column.R
@@ -215,7 +215,7 @@ setMethod("%in%",
 
 #' otherwise
 #'
-#' If values in the specified column are null, returns the value. 
+#' If values in the specified column are null, returns the value.
 #' Can be used in conjunction with `when` to specify a default value for expressions.
 #'
 #' @rdname otherwise
@@ -225,7 +225,7 @@ setMethod("%in%",
 setMethod("otherwise",
           signature(x = "Column", value = "ANY"),
           function(x, value) {
-            value <- ifelse(class(value) == "Column", value@jc, value)
+            value <- if (class(value) == "Column") { value@jc } else { value }
             jc <- callJMethod(x@jc, "otherwise", value)
             column(jc)
           })

http://git-wip-us.apache.org/repos/asf/spark/blob/d80cc90b/R/pkg/R/functions.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index 09e4e04..df36bc8 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -37,7 +37,7 @@ setMethod("lit", signature("ANY"),
           function(x) {
             jc <- callJStatic("org.apache.spark.sql.functions",
                               "lit",
-                              ifelse(class(x) == "Column", x@jc, x))
+                              if (class(x) == "Column") { x@jc } else { x })
             column(jc)
           })
 
@@ -2262,7 +2262,7 @@ setMethod("unix_timestamp", signature(x = "Column", format = "character"),
 setMethod("when", signature(condition = "Column", value = "ANY"),
           function(condition, value) {
               condition <- condition@jc
-              value <- ifelse(class(value) == "Column", value@jc, value)
+              value <- if (class(value) == "Column") { value@jc } else { value }
               jc <- callJStatic("org.apache.spark.sql.functions", "when", condition, value)
               column(jc)
           })
@@ -2277,13 +2277,16 @@ setMethod("when", signature(condition = "Column", value = "ANY"),
 #' @name ifelse
 #' @seealso \link{when}
 #' @export
-#' @examples \dontrun{ifelse(df$a > 1 & df$b > 2, 0, 1)}
+#' @examples \dontrun{
+#' ifelse(df$a > 1 & df$b > 2, 0, 1)
+#' ifelse(df$a > 1, df$a, 1)
+#' }
 setMethod("ifelse",
           signature(test = "Column", yes = "ANY", no = "ANY"),
           function(test, yes, no) {
               test <- test@jc
-              yes <- ifelse(class(yes) == "Column", yes@jc, yes)
-              no <- ifelse(class(no) == "Column", no@jc, no)
+              yes <- if (class(yes) == "Column") { yes@jc } else { yes }
+              no <- if (class(no) == "Column") { no@jc } else { no }
               jc <- callJMethod(callJStatic("org.apache.spark.sql.functions",
                                             "when",
                                             test, yes),

http://git-wip-us.apache.org/repos/asf/spark/blob/d80cc90b/R/pkg/inst/tests/testthat/test_sparkSQL.R
----------------------------------------------------------------------
diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R
index 135c757..c2b6adb 100644
--- a/R/pkg/inst/tests/testthat/test_sparkSQL.R
+++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R
@@ -1120,6 +1120,14 @@ test_that("when(), otherwise() and ifelse() on a DataFrame", {
   expect_equal(collect(select(df, ifelse(df$a > 1 & df$b > 2, 0, 1)))[, 1], c(1, 0))
 })
 
+test_that("when(), otherwise() and ifelse() with column on a DataFrame", {
+  l <- list(list(a = 1, b = 2), list(a = 3, b = 4))
+  df <- createDataFrame(sqlContext, l)
+  expect_equal(collect(select(df, when(df$a > 1 & df$b > 2, lit(1))))[, 1], c(NA, 1))
+  expect_equal(collect(select(df, otherwise(when(df$a > 1, lit(1)), lit(0))))[, 1], c(0, 1))
+  expect_equal(collect(select(df, ifelse(df$a > 1 & df$b > 2, lit(0), lit(1))))[, 1], c(1, 0))
+})
+
 test_that("group by, agg functions", {
   df <- read.json(sqlContext, jsonPath)
   df1 <- agg(df, name = "max", age = "sum")


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org