You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by jo...@apache.org on 2021/07/24 14:18:40 UTC
[arrow] branch master updated: ARROW-13434: [R] group_by() with an unnammed expression

This is an automated email from the ASF dual-hosted git repository.

jonkeane pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 2d921dc  ARROW-13434: [R] group_by() with an unnammed expression
2d921dc is described below

commit 2d921dc8620a7edb13b953b75647473eadcd4f1c
Author: Jonathan Keane <jk...@gmail.com>
AuthorDate: Sat Jul 24 09:17:19 2021 -0500

    ARROW-13434: [R] group_by() with an unnammed expression
    
    Closes #10785 from jonkeane/ARROW-13434-groupby-expr
    
    Authored-by: Jonathan Keane <jk...@gmail.com>
    Signed-off-by: Jonathan Keane <jk...@gmail.com>
---
 r/NAMESPACE                            |  2 ++
 r/R/arrow-package.R                    |  4 ++--
 r/R/dplyr-group-by.R                   | 17 +++++++++++++----
 r/tests/testthat/test-dplyr-group-by.R |  8 +++++++-
 4 files changed, 24 insertions(+), 7 deletions(-)

diff --git a/r/NAMESPACE b/r/NAMESPACE
index 814868d..b0f4b0b 100644
--- a/r/NAMESPACE
+++ b/r/NAMESPACE
@@ -289,6 +289,7 @@ importFrom(bit64,print.integer64)
 importFrom(bit64,str.integer64)
 importFrom(methods,as)
 importFrom(purrr,as_mapper)
+importFrom(purrr,imap_chr)
 importFrom(purrr,keep)
 importFrom(purrr,map)
 importFrom(purrr,map2)
@@ -322,6 +323,7 @@ importFrom(rlang,new_data_mask)
 importFrom(rlang,new_environment)
 importFrom(rlang,quo_get_expr)
 importFrom(rlang,quo_is_null)
+importFrom(rlang,quo_name)
 importFrom(rlang,quo_set_expr)
 importFrom(rlang,quos)
 importFrom(rlang,seq2)
diff --git a/r/R/arrow-package.R b/r/R/arrow-package.R
index d2bf81c..3ebd687 100644
--- a/r/R/arrow-package.R
+++ b/r/R/arrow-package.R
@@ -17,9 +17,9 @@
 
 #' @importFrom stats quantile median na.omit na.exclude na.pass na.fail
 #' @importFrom R6 R6Class
-#' @importFrom purrr as_mapper map map2 map_chr map2_chr map_dfr map_int map_lgl keep
+#' @importFrom purrr as_mapper map map2 map_chr map2_chr map_dfr map_int map_lgl keep imap_chr
 #' @importFrom assertthat assert_that is.string
-#' @importFrom rlang list2 %||% is_false abort dots_n warn enquo quo_is_null enquos is_integerish quos eval_tidy new_data_mask syms env new_environment env_bind as_label set_names exec is_bare_character quo_get_expr quo_set_expr .data seq2 is_quosure enexpr enexprs expr caller_env is_character
+#' @importFrom rlang list2 %||% is_false abort dots_n warn enquo quo_is_null enquos is_integerish quos eval_tidy new_data_mask syms env new_environment env_bind as_label set_names exec is_bare_character quo_get_expr quo_set_expr .data seq2 is_quosure enexpr enexprs expr caller_env is_character quo_name
 #' @importFrom tidyselect vars_pull vars_rename vars_select eval_select
 #' @useDynLib arrow, .registration = TRUE
 #' @keywords internal
diff --git a/r/R/dplyr-group-by.R b/r/R/dplyr-group-by.R
index d2cf7925..c426a66 100644
--- a/r/R/dplyr-group-by.R
+++ b/r/R/dplyr-group-by.R
@@ -24,11 +24,20 @@ group_by.arrow_dplyr_query <- function(.data,
                                        add = .add,
                                        .drop = dplyr::group_by_drop_default(.data)) {
   .data <- arrow_dplyr_query(.data)
-  # ... can contain expressions (i.e. can add (or rename?) columns)
-  # Check for those (they show up as named expressions)
   new_groups <- enquos(...)
-  new_groups <- new_groups[nzchar(names(new_groups))]
+  # ... can contain expressions (i.e. can add (or rename?) columns) and so we
+  # need to identify those and add them on to the query with mutate. Specifically,
+  # we want to mark as new:
+  #   * expressions (named or otherwise)
+  #   * variables that have new names
+  # All others (i.e. simple references to variables) should not be (re)-added
+  new_group_ind <- map_lgl(new_groups, ~!(quo_name(.x) %in% names(.data)))
+  named_group_ind <- map_lgl(names(new_groups), nzchar)
+  new_groups <- new_groups[new_group_ind | named_group_ind]
   if (length(new_groups)) {
+    # now either use the name that was given in ... or if that is "" then use the expr
+    names(new_groups) <- imap_chr(new_groups, ~ ifelse(.y == "", quo_name(.x), .y))
+
     # Add them to the data
     .data <- dplyr::mutate(.data, !!!new_groups)
   }
@@ -62,4 +71,4 @@ ungroup.arrow_dplyr_query <- function(x, ...) {
   x$drop_empty_groups <- NULL
   x
 }
-ungroup.Dataset <- ungroup.ArrowTabular <- force
\ No newline at end of file
+ungroup.Dataset <- ungroup.ArrowTabular <- force
diff --git a/r/tests/testthat/test-dplyr-group-by.R b/r/tests/testthat/test-dplyr-group-by.R
index 8583c2f..fe0394b 100644
--- a/r/tests/testthat/test-dplyr-group-by.R
+++ b/r/tests/testthat/test-dplyr-group-by.R
@@ -33,7 +33,7 @@ test_that("group_by groupings are recorded", {
   )
 })
 
-test_that("group_by doesn't yet support creating/renaming", {
+test_that("group_by supports creating/renaming", {
   expect_dplyr_equal(
     input %>%
       group_by(chr, numbers = int) %>%
@@ -46,6 +46,12 @@ test_that("group_by doesn't yet support creating/renaming", {
       collect(),
     tbl
   )
+  expect_dplyr_equal(
+    input %>%
+      group_by(int > 4, lgl, foo = int > 5) %>%
+      collect(),
+    tbl
+  )
 })
 
 test_that("ungroup", {