You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by pa...@apache.org on 2023/11/06 23:47:59 UTC
(arrow) branch main updated: GH-38602: [R] Add missing `prod` for summarize (#38601)
This is an automated email from the ASF dual-hosted git repository.
paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 6dcba930be GH-38602: [R] Add missing `prod` for summarize (#38601)
6dcba930be is described below
commit 6dcba930bed468268288dc6f66c1580cbf5f80e0
Author: Maximilian Muecke <mu...@gmail.com>
AuthorDate: Tue Nov 7 00:47:50 2023 +0100
GH-38602: [R] Add missing `prod` for summarize (#38601)
### Rationale for this change
`prod` is currently missing for use in summarize.
### What changes are included in this PR?
Added `prod` for summarize aggregation.
### Are these changes tested?
Yes, included the same tests used for the other aggregation functions for summarize.
### Are there any user-facing changes?
Yes, added `prod` function.
* Closes: #38602
Authored-by: Maximilian Muecke <mu...@gmail.com>
Signed-off-by: Dewey Dunnington <de...@fishandwhistle.net>
---
r/R/dplyr-funcs-doc.R | 1 +
r/R/dplyr-summarize.R | 7 +++++++
r/man/acero.Rd | 1 +
r/src/compute.cpp | 3 ++-
r/tests/testthat/test-dplyr-summarize.R | 23 +++++++++++++++++++++++
5 files changed, 34 insertions(+), 1 deletion(-)
diff --git a/r/R/dplyr-funcs-doc.R b/r/R/dplyr-funcs-doc.R
index 5099e903da..492729df8c 100644
--- a/r/R/dplyr-funcs-doc.R
+++ b/r/R/dplyr-funcs-doc.R
@@ -167,6 +167,7 @@
#' * [`paste0()`][base::paste0()]: the `collapse` argument is not yet supported
#' * [`pmax()`][base::pmax()]
#' * [`pmin()`][base::pmin()]
+#' * [`prod()`][base::prod()]
#' * [`round()`][base::round()]
#' * [`sign()`][base::sign()]
#' * [`sin()`][base::sin()]
diff --git a/r/R/dplyr-summarize.R b/r/R/dplyr-summarize.R
index 1e9d42969d..e2c70f9e89 100644
--- a/r/R/dplyr-summarize.R
+++ b/r/R/dplyr-summarize.R
@@ -42,6 +42,13 @@ register_bindings_aggregate <- function() {
options = list(skip_nulls = na.rm, min_count = 0L)
)
})
+ register_binding_agg("base::prod", function(..., na.rm = FALSE) {
+ list(
+ fun = "product",
+ data = ensure_one_arg(list2(...), "prod"),
+ options = list(skip_nulls = na.rm, min_count = 0L)
+ )
+ })
register_binding_agg("base::any", function(..., na.rm = FALSE) {
list(
fun = "any",
diff --git a/r/man/acero.Rd b/r/man/acero.Rd
index c9fb4d37a1..12afdc2313 100644
--- a/r/man/acero.Rd
+++ b/r/man/acero.Rd
@@ -156,6 +156,7 @@ Consider using the lubridate specialised parsing functions \code{ymd()}, \code{y
\item \code{\link[base:paste]{paste0()}}: the \code{collapse} argument is not yet supported
\item \code{\link[base:Extremes]{pmax()}}
\item \code{\link[base:Extremes]{pmin()}}
+\item \code{\link[base:prod]{prod()}}
\item \code{\link[base:Round]{round()}}
\item \code{\link[base:sign]{sign()}}
\item \code{\link[base:Trig]{sin()}}
diff --git a/r/src/compute.cpp b/r/src/compute.cpp
index 578ce74d05..87d1326ed3 100644
--- a/r/src/compute.cpp
+++ b/r/src/compute.cpp
@@ -176,7 +176,8 @@ std::shared_ptr<arrow::compute::FunctionOptions> make_compute_options(
func_name == "hash_approximate_median" || func_name == "mean" ||
func_name == "hash_mean" || func_name == "min_max" || func_name == "hash_min_max" ||
func_name == "min" || func_name == "hash_min" || func_name == "max" ||
- func_name == "hash_max" || func_name == "sum" || func_name == "hash_sum") {
+ func_name == "hash_max" || func_name == "sum" || func_name == "hash_sum" ||
+ func_name == "product" || func_name == "hash_product") {
using Options = arrow::compute::ScalarAggregateOptions;
auto out = std::make_shared<Options>(Options::Defaults());
if (!Rf_isNull(options["min_count"])) {
diff --git a/r/tests/testthat/test-dplyr-summarize.R b/r/tests/testthat/test-dplyr-summarize.R
index e2fb9841e7..2999371192 100644
--- a/r/tests/testthat/test-dplyr-summarize.R
+++ b/r/tests/testthat/test-dplyr-summarize.R
@@ -91,6 +91,27 @@ test_that("Group by sum on dataset", {
)
})
+test_that("Group by prod on dataset", {
+ compare_dplyr_binding(
+ .input %>%
+ group_by(some_grouping) %>%
+ summarize(prod = prod(int, na.rm = TRUE)) %>%
+ collect(),
+ tbl
+ )
+
+ compare_dplyr_binding(
+ .input %>%
+ group_by(some_grouping) %>%
+ summarize(
+ prod = prod(int, na.rm = FALSE),
+ prod2 = base::prod(int, na.rm = TRUE)
+ ) %>%
+ collect(),
+ tbl
+ )
+})
+
test_that("Group by mean on dataset", {
compare_dplyr_binding(
.input %>%
@@ -319,6 +340,7 @@ test_that("Functions that take ... but we only accept a single arg", {
# the agg_funcs directly
expect_error(call_binding_agg("n_distinct"), "n_distinct() with 0 arguments", fixed = TRUE)
expect_error(call_binding_agg("sum"), "sum() with 0 arguments", fixed = TRUE)
+ expect_error(call_binding_agg("prod"), "prod() with 0 arguments", fixed = TRUE)
expect_error(call_binding_agg("any"), "any() with 0 arguments", fixed = TRUE)
expect_error(call_binding_agg("all"), "all() with 0 arguments", fixed = TRUE)
expect_error(call_binding_agg("min"), "min() with 0 arguments", fixed = TRUE)
@@ -642,6 +664,7 @@ test_that("summarise() with !!sym()", {
group_by(false) %>%
summarise(
sum = sum(!!sym(test_dbl_col)),
+ prod = prod(!!sym(test_dbl_col)),
any = any(!!sym(test_lgl_col)),
all = all(!!sym(test_lgl_col)),
mean = mean(!!sym(test_dbl_col)),