You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by pa...@apache.org on 2023/11/06 23:47:59 UTC

(arrow) branch main updated: GH-38602: [R] Add missing `prod` for summarize (#38601)

This is an automated email from the ASF dual-hosted git repository.

paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 6dcba930be GH-38602: [R] Add missing `prod` for summarize (#38601)
6dcba930be is described below

commit 6dcba930bed468268288dc6f66c1580cbf5f80e0
Author: Maximilian Muecke <mu...@gmail.com>
AuthorDate: Tue Nov 7 00:47:50 2023 +0100

    GH-38602: [R] Add missing `prod` for summarize (#38601)
    
    
    
    ### Rationale for this change
    
    `prod` is currently missing for use in summarize.
    
    ### What changes are included in this PR?
    
    Added `prod` for summarize aggregation.
    
    ### Are these changes tested?
    
    Yes, included the same tests used for the other aggregation functions for summarize.
    
    ### Are there any user-facing changes?
    
    Yes, added `prod` function.
    
    * Closes: #38602
    
    Authored-by: Maximilian Muecke <mu...@gmail.com>
    Signed-off-by: Dewey Dunnington <de...@fishandwhistle.net>
---
 r/R/dplyr-funcs-doc.R                   |  1 +
 r/R/dplyr-summarize.R                   |  7 +++++++
 r/man/acero.Rd                          |  1 +
 r/src/compute.cpp                       |  3 ++-
 r/tests/testthat/test-dplyr-summarize.R | 23 +++++++++++++++++++++++
 5 files changed, 34 insertions(+), 1 deletion(-)

diff --git a/r/R/dplyr-funcs-doc.R b/r/R/dplyr-funcs-doc.R
index 5099e903da..492729df8c 100644
--- a/r/R/dplyr-funcs-doc.R
+++ b/r/R/dplyr-funcs-doc.R
@@ -167,6 +167,7 @@
 #' * [`paste0()`][base::paste0()]: the `collapse` argument is not yet supported
 #' * [`pmax()`][base::pmax()]
 #' * [`pmin()`][base::pmin()]
+#' * [`prod()`][base::prod()]
 #' * [`round()`][base::round()]
 #' * [`sign()`][base::sign()]
 #' * [`sin()`][base::sin()]
diff --git a/r/R/dplyr-summarize.R b/r/R/dplyr-summarize.R
index 1e9d42969d..e2c70f9e89 100644
--- a/r/R/dplyr-summarize.R
+++ b/r/R/dplyr-summarize.R
@@ -42,6 +42,13 @@ register_bindings_aggregate <- function() {
       options = list(skip_nulls = na.rm, min_count = 0L)
     )
   })
+  register_binding_agg("base::prod", function(..., na.rm = FALSE) {
+    list(
+      fun = "product",
+      data = ensure_one_arg(list2(...), "prod"),
+      options = list(skip_nulls = na.rm, min_count = 0L)
+    )
+  })
   register_binding_agg("base::any", function(..., na.rm = FALSE) {
     list(
       fun = "any",
diff --git a/r/man/acero.Rd b/r/man/acero.Rd
index c9fb4d37a1..12afdc2313 100644
--- a/r/man/acero.Rd
+++ b/r/man/acero.Rd
@@ -156,6 +156,7 @@ Consider using the lubridate specialised parsing functions \code{ymd()}, \code{y
 \item \code{\link[base:paste]{paste0()}}: the \code{collapse} argument is not yet supported
 \item \code{\link[base:Extremes]{pmax()}}
 \item \code{\link[base:Extremes]{pmin()}}
+\item \code{\link[base:prod]{prod()}}
 \item \code{\link[base:Round]{round()}}
 \item \code{\link[base:sign]{sign()}}
 \item \code{\link[base:Trig]{sin()}}
diff --git a/r/src/compute.cpp b/r/src/compute.cpp
index 578ce74d05..87d1326ed3 100644
--- a/r/src/compute.cpp
+++ b/r/src/compute.cpp
@@ -176,7 +176,8 @@ std::shared_ptr<arrow::compute::FunctionOptions> make_compute_options(
       func_name == "hash_approximate_median" || func_name == "mean" ||
       func_name == "hash_mean" || func_name == "min_max" || func_name == "hash_min_max" ||
       func_name == "min" || func_name == "hash_min" || func_name == "max" ||
-      func_name == "hash_max" || func_name == "sum" || func_name == "hash_sum") {
+      func_name == "hash_max" || func_name == "sum" || func_name == "hash_sum" ||
+      func_name == "product" || func_name == "hash_product") {
     using Options = arrow::compute::ScalarAggregateOptions;
     auto out = std::make_shared<Options>(Options::Defaults());
     if (!Rf_isNull(options["min_count"])) {
diff --git a/r/tests/testthat/test-dplyr-summarize.R b/r/tests/testthat/test-dplyr-summarize.R
index e2fb9841e7..2999371192 100644
--- a/r/tests/testthat/test-dplyr-summarize.R
+++ b/r/tests/testthat/test-dplyr-summarize.R
@@ -91,6 +91,27 @@ test_that("Group by sum on dataset", {
   )
 })
 
+test_that("Group by prod on dataset", {
+  compare_dplyr_binding(
+    .input %>%
+      group_by(some_grouping) %>%
+      summarize(prod = prod(int, na.rm = TRUE)) %>%
+      collect(),
+    tbl
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      group_by(some_grouping) %>%
+      summarize(
+        prod = prod(int, na.rm = FALSE),
+        prod2 = base::prod(int, na.rm = TRUE)
+      ) %>%
+      collect(),
+    tbl
+  )
+})
+
 test_that("Group by mean on dataset", {
   compare_dplyr_binding(
     .input %>%
@@ -319,6 +340,7 @@ test_that("Functions that take ... but we only accept a single arg", {
   # the agg_funcs directly
   expect_error(call_binding_agg("n_distinct"), "n_distinct() with 0 arguments", fixed = TRUE)
   expect_error(call_binding_agg("sum"), "sum() with 0 arguments", fixed = TRUE)
+  expect_error(call_binding_agg("prod"), "prod() with 0 arguments", fixed = TRUE)
   expect_error(call_binding_agg("any"), "any() with 0 arguments", fixed = TRUE)
   expect_error(call_binding_agg("all"), "all() with 0 arguments", fixed = TRUE)
   expect_error(call_binding_agg("min"), "min() with 0 arguments", fixed = TRUE)
@@ -642,6 +664,7 @@ test_that("summarise() with !!sym()", {
       group_by(false) %>%
       summarise(
         sum = sum(!!sym(test_dbl_col)),
+        prod = prod(!!sym(test_dbl_col)),
         any = any(!!sym(test_lgl_col)),
         all = all(!!sym(test_lgl_col)),
         mean = mean(!!sym(test_dbl_col)),