You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2020/12/17 11:43:25 UTC
[spark] branch branch-3.1 updated: [SPARK-26199][SPARK-31517][R]
Fix strategy for handling ... names in mutate
This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.1 by this push:
new b557147 [SPARK-26199][SPARK-31517][R] Fix strategy for handling ... names in mutate
b557147 is described below
commit b5571476a5171495770a556b0b1da044263d0ebe
Author: Michael Chirico <mi...@grabtaxi.com>
AuthorDate: Thu Dec 17 17:20:45 2020 +0900
[SPARK-26199][SPARK-31517][R] Fix strategy for handling ... names in mutate
### What changes were proposed in this pull request?
Change the strategy for how the varargs are handled in the default `mutate` method
### Why are the changes needed?
Bugfix -- `deparse` + `sapply` not working as intended due to `width.cutoff`
### Does this PR introduce any user-facing change?
Yes, bugfix. Shouldn't change any working code.
### How was this patch tested?
None! yet.
Closes #28386 from MichaelChirico/r-mutate-deparse.
Lead-authored-by: Michael Chirico <mi...@grabtaxi.com>
Co-authored-by: Michael Chirico <mi...@gmail.com>
Signed-off-by: HyukjinKwon <gu...@apache.org>
---
R/pkg/R/DataFrame.R | 18 ++++++++++--------
R/pkg/tests/fulltests/test_sparkSQL.R | 15 +++++++++++++++
2 files changed, 25 insertions(+), 8 deletions(-)
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 31a651e..8ca338f 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -2277,16 +2277,17 @@ setMethod("mutate",
# For named arguments, use the names for arguments as the column names
# For unnamed arguments, use the argument symbols as the column names
- args <- sapply(substitute(list(...))[-1], deparse)
ns <- names(cols)
- if (!is.null(ns)) {
- lapply(seq_along(args), function(i) {
- if (ns[[i]] != "") {
- args[[i]] <<- ns[[i]]
- }
+ if (is.null(ns)) ns <- rep("", length(cols))
+ named_idx <- nzchar(ns)
+ if (!all(named_idx)) {
+ # SPARK-31517: deparse uses width.cutoff on wide input and the
+ # output is length>1, so need to collapse it to scalar
+ colsub <- substitute(list(...))[-1L]
+ ns[!named_idx] <- sapply(which(!named_idx), function(ii) {
+ paste(gsub("^\\s*|\\s*$", "", deparse(colsub[[ii]])), collapse = " ")
})
}
- ns <- args
# The last column of the same name in the specific columns takes effect
deDupCols <- list()
@@ -3444,7 +3445,8 @@ setMethod("as.data.frame",
#' @note attach since 1.6.0
setMethod("attach",
signature(what = "SparkDataFrame"),
- function(what, pos = 2L, name = deparse(substitute(what), backtick = FALSE),
+ function(what, pos = 2L,
+ name = paste(deparse(substitute(what), backtick = FALSE), collapse = " "),
warn.conflicts = TRUE) {
args <- as.list(environment()) # capture all parameters - this must be the first line
newEnv <- assignNewEnv(args$what)
diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R
index c623f53..ebf08b9 100644
--- a/R/pkg/tests/fulltests/test_sparkSQL.R
+++ b/R/pkg/tests/fulltests/test_sparkSQL.R
@@ -2884,6 +2884,15 @@ test_that("mutate(), transform(), rename() and names()", {
expect_equal(nrow(result), 153)
expect_equal(ncol(result), 2)
detach(airquality)
+
+ # ensure long inferred names are handled without error (SPARK-26199)
+ # test implicitly assumes eval(formals(deparse)$width.cutoff) = 60
+ # (which has always been true as of 2020-11-15)
+ newDF <- mutate(
+ df,
+ df$age + 12345678901234567890 + 12345678901234567890 + 12345678901234
+ )
+ expect_match(tail(columns(newDF), 1L), "234567890", fixed = TRUE)
})
test_that("read/write ORC files", {
@@ -3273,6 +3282,12 @@ test_that("attach() on a DataFrame", {
stat3 <- summary(df[, "age", drop = F])
expect_equal(collect(stat3)[8, "age"], "30")
expect_error(age)
+
+ # attach method uses deparse(); ensure no errors from a very long input
+ abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnop <- df # nolint
+ attach(abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnop)
+ expect_true(any(grepl("abcdefghijklmnopqrstuvwxyz", search())))
+ detach("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnop")
})
test_that("with() on a DataFrame", {
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org