You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ks...@apache.org on 2020/05/13 01:15:30 UTC

[arrow] 14/17: ARROW-8758: [R] Updates for compatibility with dplyr 1.0

This is an automated email from the ASF dual-hosted git repository.

kszucs pushed a commit to branch maint-0.17.x
in repository https://gitbox.apache.org/repos/asf/arrow.git

commit 116ed8832834d102583597be6031e9cb28e5dfb2
Author: Neal Richardson <ne...@gmail.com>
AuthorDate: Mon May 11 10:15:27 2020 -0700

    ARROW-8758: [R] Updates for compatibility with dplyr 1.0
    
    I tested this locally with the current version of `dplyr` on CRAN and the dev version scheduled to be released to CRAN on May 15. Our tests now pass with both versions.
    
    Changes addressed:
    
    * `group_by` now requires a character vector of grouping variable names, so now we use `group_vars()` instead of `groups()`. `group_vars()` works in the current `dplyr` release, so this is a simple change.
    * The argument name in `group_by()` changed from `add` to `.add`, and calling it with the name that works in the current version raises a deprecation warning in dplyr 1.0. The fix here supports both spellings of the argument, and it avoids the warning by determining which version of the internal dplyr function exists and calling the appropriate one.
    * `dplyr::transmute()` no longer calls `dplyr::mutate()` internally, so it doesn't just work on Arrow objects anymore. I skipped the one test that called it and left a TODO to add a transmute method.
    
    Closes #7147 from nealrichardson/dplyr-1.0
    
    Authored-by: Neal Richardson <ne...@gmail.com>
    Signed-off-by: Neal Richardson <ne...@gmail.com>
---
 r/NEWS.md                     |  4 ++++
 r/R/dplyr.R                   | 13 +++++++++----
 r/tests/testthat/test-dplyr.R |  1 +
 3 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/r/NEWS.md b/r/NEWS.md
index 7cf8ef9..231b3d3 100644
--- a/r/NEWS.md
+++ b/r/NEWS.md
@@ -17,6 +17,10 @@
   under the License.
 -->
 
+# arrow 0.17.0.9000
+
+* Updates for compatibility with `dplyr` 1.0
+
 # arrow 0.17.0
 
 ## Feather v2
diff --git a/r/R/dplyr.R b/r/R/dplyr.R
index 1d3b5ee..5ea0a7a 100644
--- a/r/R/dplyr.R
+++ b/r/R/dplyr.R
@@ -264,7 +264,7 @@ restore_dplyr_features <- function(df, query) {
   }
   # Preserve groupings, if present
   if (length(query$group_by_vars)) {
-    df <- dplyr::grouped_df(df, dplyr::groups(query))
+    df <- dplyr::grouped_df(df, dplyr::group_vars(query))
   }
   df
 }
@@ -294,9 +294,15 @@ summarise.arrow_dplyr_query <- function(.data, ...) {
 }
 summarise.Dataset <- summarise.Table <- summarise.RecordBatch <- summarise.arrow_dplyr_query
 
-group_by.arrow_dplyr_query <- function(.data, ..., add = FALSE) {
+group_by.arrow_dplyr_query <- function(.data, ..., .add = FALSE, add = .add) {
   .data <- arrow_dplyr_query(.data)
-  .data$group_by_vars <- dplyr::group_by_prepare(.data, ..., add = add)$group_names
+  if (".add" %in% names(formals(dplyr::group_by))) {
+    # dplyr >= 1.0
+    gv <- dplyr::group_by_prepare(.data, ..., .add = .add)$group_names
+  } else {
+    gv <- dplyr::group_by_prepare(.data, ..., add = add)$group_names
+  }
+  .data$group_by_vars <- gv
   .data
 }
 group_by.Dataset <- group_by.Table <- group_by.RecordBatch <- group_by.arrow_dplyr_query
@@ -324,7 +330,6 @@ mutate.arrow_dplyr_query <- function(.data, ...) {
   dplyr::mutate(dplyr::collect(.data), ...)
 }
 mutate.Dataset <- mutate.Table <- mutate.RecordBatch <- mutate.arrow_dplyr_query
-# transmute() "just works" because it calls mutate() internally
 # TODO: add transmute() that does what summarise() does (select only the vars we need)
 
 arrange.arrow_dplyr_query <- function(.data, ...) {
diff --git a/r/tests/testthat/test-dplyr.R b/r/tests/testthat/test-dplyr.R
index e531fb3..3ae915e 100644
--- a/r/tests/testthat/test-dplyr.R
+++ b/r/tests/testthat/test-dplyr.R
@@ -228,6 +228,7 @@ test_that("mutate", {
 })
 
 test_that("transmute", {
+  skip("TODO: reimplement transmute (with dplyr 1.0, it no longer just works via mutate)")
   expect_dplyr_equal(
     input %>%
       select(int, chr) %>%