You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ko...@apache.org on 2022/11/15 08:51:03 UTC
[arrow] 07/27: ARROW-18131: [R] Correctly handle .data pronoun in group_by() (#14484)
This is an automated email from the ASF dual-hosted git repository.
kou pushed a commit to branch maint-10.0.x
in repository https://gitbox.apache.org/repos/asf/arrow.git
commit 9608485fa912c4cd9ea0cd0b22c2eeb886fc99b0
Author: Nic Crane <th...@gmail.com>
AuthorDate: Mon Oct 24 14:03:53 2022 +0100
ARROW-18131: [R] Correctly handle .data pronoun in group_by() (#14484)
Authored-by: Nic Crane <th...@gmail.com>
Signed-off-by: Nic Crane <th...@gmail.com>
---
r/R/dplyr-group-by.R | 17 +++++++++--------
r/tests/testthat/test-dplyr-group-by.R | 9 +++++++++
2 files changed, 18 insertions(+), 8 deletions(-)
diff --git a/r/R/dplyr-group-by.R b/r/R/dplyr-group-by.R
index 85825b9bf2..a7b1ab9dbc 100644
--- a/r/R/dplyr-group-by.R
+++ b/r/R/dplyr-group-by.R
@@ -37,19 +37,20 @@ group_by.arrow_dplyr_query <- function(.data,
expression_list <- expand_across(.data, quos(...))
new_groups <- ensure_named_exprs(expression_list)
+ # set up group names and check which are new
+ gbp <- dplyr::group_by_prepare(.data, !!!expression_list, .add = .add)
+ existing_groups <- dplyr::group_vars(gbp$data)
+ new_group_names <- setdiff(gbp$group_names, existing_groups)
+
+ names(new_groups) <- new_group_names
+
if (length(new_groups)) {
# Add them to the data
.data <- dplyr::mutate(.data, !!!new_groups)
}
- if (.add) {
- gv <- union(dplyr::group_vars(.data), names(new_groups))
- } else {
- gv <- names(new_groups)
- }
-
- .data$group_by_vars <- gv %||% character()
- .data$drop_empty_groups <- ifelse(length(gv), .drop, dplyr::group_by_drop_default(.data))
+ .data$group_by_vars <- gbp$group_names
+ .data$drop_empty_groups <- ifelse(length(gbp$group_names), .drop, dplyr::group_by_drop_default(.data))
.data
}
group_by.Dataset <- group_by.ArrowTabular <- group_by.RecordBatchReader <- group_by.arrow_dplyr_query
diff --git a/r/tests/testthat/test-dplyr-group-by.R b/r/tests/testthat/test-dplyr-group-by.R
index e4e4d41d49..9f2869dd10 100644
--- a/r/tests/testthat/test-dplyr-group-by.R
+++ b/r/tests/testthat/test-dplyr-group-by.R
@@ -305,3 +305,12 @@ test_that("Can use across() within group_by()", {
tbl
)
})
+
+test_that("ARROW-18131 - correctly handles .data pronoun in group_by()", {
+ compare_dplyr_binding(
+ .input %>%
+ group_by(.data$lgl) %>%
+ collect(),
+ tbl
+ )
+})