You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ko...@apache.org on 2022/11/15 08:51:03 UTC
[arrow] 07/27: ARROW-18131: [R] Correctly handle .data pronoun in group_by() (#14484)

This is an automated email from the ASF dual-hosted git repository.

kou pushed a commit to branch maint-10.0.x
in repository https://gitbox.apache.org/repos/asf/arrow.git

commit 9608485fa912c4cd9ea0cd0b22c2eeb886fc99b0
Author: Nic Crane <th...@gmail.com>
AuthorDate: Mon Oct 24 14:03:53 2022 +0100

    ARROW-18131: [R] Correctly handle .data pronoun in group_by() (#14484)
    
    Authored-by: Nic Crane <th...@gmail.com>
    Signed-off-by: Nic Crane <th...@gmail.com>
---
 r/R/dplyr-group-by.R                   | 17 +++++++++--------
 r/tests/testthat/test-dplyr-group-by.R |  9 +++++++++
 2 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/r/R/dplyr-group-by.R b/r/R/dplyr-group-by.R
index 85825b9bf2..a7b1ab9dbc 100644
--- a/r/R/dplyr-group-by.R
+++ b/r/R/dplyr-group-by.R
@@ -37,19 +37,20 @@ group_by.arrow_dplyr_query <- function(.data,
   expression_list <- expand_across(.data, quos(...))
   new_groups <- ensure_named_exprs(expression_list)
 
+  # set up group names and check which are new
+  gbp <- dplyr::group_by_prepare(.data, !!!expression_list, .add = .add)
+  existing_groups <- dplyr::group_vars(gbp$data)
+  new_group_names <- setdiff(gbp$group_names, existing_groups)
+
+  names(new_groups) <- new_group_names
+
   if (length(new_groups)) {
     # Add them to the data
     .data <- dplyr::mutate(.data, !!!new_groups)
   }
 
-  if (.add) {
-    gv <- union(dplyr::group_vars(.data), names(new_groups))
-  } else {
-    gv <- names(new_groups)
-  }
-
-  .data$group_by_vars <- gv %||% character()
-  .data$drop_empty_groups <- ifelse(length(gv), .drop, dplyr::group_by_drop_default(.data))
+  .data$group_by_vars <- gbp$group_names
+  .data$drop_empty_groups <- ifelse(length(gbp$group_names), .drop, dplyr::group_by_drop_default(.data))
   .data
 }
 group_by.Dataset <- group_by.ArrowTabular <- group_by.RecordBatchReader <- group_by.arrow_dplyr_query
diff --git a/r/tests/testthat/test-dplyr-group-by.R b/r/tests/testthat/test-dplyr-group-by.R
index e4e4d41d49..9f2869dd10 100644
--- a/r/tests/testthat/test-dplyr-group-by.R
+++ b/r/tests/testthat/test-dplyr-group-by.R
@@ -305,3 +305,12 @@ test_that("Can use across() within group_by()", {
     tbl
   )
 })
+
+test_that("ARROW-18131 - correctly handles .data pronoun in group_by()", {
+  compare_dplyr_binding(
+    .input %>%
+      group_by(.data$lgl) %>%
+      collect(),
+    tbl
+  )
+})