You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by jo...@apache.org on 2022/04/13 23:26:45 UTC
[arrow] branch master updated: ARROW-14168: [R] Warn only once about arrow function differences
This is an automated email from the ASF dual-hosted git repository.
jonkeane pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 9d24ded7f7 ARROW-14168: [R] Warn only once about arrow function differences
9d24ded7f7 is described below
commit 9d24ded7f7d58717c9d78308b0c59ab7a9636006
Author: Edward Visel <16...@users.noreply.github.com>
AuthorDate: Wed Apr 13 18:26:37 2022 -0500
ARROW-14168: [R] Warn only once about arrow function differences
Addresses [ARROW-14168](https://issues.apache.org/jira/browse/ARROW-14168) by changing `median()` and `quantile()` to warn once, and adjusts the tests accordingly.
Closes #12867 from alistaire47/feat/fun-diff-warn-once
Lead-authored-by: Edward Visel <16...@users.noreply.github.com>
Co-authored-by: Jonathan Keane <jk...@gmail.com>
Signed-off-by: Jonathan Keane <jk...@gmail.com>
---
r/NEWS.md | 1 +
r/R/dplyr-summarize.R | 10 +-
r/tests/testthat/helper-arrow.R | 14 +++
r/tests/testthat/test-dplyr-summarize.R | 205 +++++++++++++++++---------------
4 files changed, 132 insertions(+), 98 deletions(-)
diff --git a/r/NEWS.md b/r/NEWS.md
index 1a1f198e0f..2e6a993507 100644
--- a/r/NEWS.md
+++ b/r/NEWS.md
@@ -27,6 +27,7 @@
* date-time functionality:
* Added `difftime` and `as.difftime()`
* Added `as.Date()` to convert to date
+* `median()` and `quantile()` will warn once about approximate calculations regardless of interactivity.
# arrow 7.0.0
diff --git a/r/R/dplyr-summarize.R b/r/R/dplyr-summarize.R
index d8e6c46d92..6484d56866 100644
--- a/r/R/dplyr-summarize.R
+++ b/r/R/dplyr-summarize.R
@@ -106,8 +106,9 @@ register_bindings_aggregate <- function() {
# this warning (ARROW-14021)
warn(
"quantile() currently returns an approximate quantile in Arrow",
- .frequency = ifelse(is_interactive(), "once", "always"),
- .frequency_id = "arrow.quantile.approximate"
+ .frequency = "once",
+ .frequency_id = "arrow.quantile.approximate",
+ class = "arrow.quantile.approximate"
)
list(
fun = "tdigest",
@@ -120,8 +121,9 @@ register_bindings_aggregate <- function() {
# this warning (ARROW-14021)
warn(
"median() currently returns an approximate median in Arrow",
- .frequency = ifelse(is_interactive(), "once", "always"),
- .frequency_id = "arrow.median.approximate"
+ .frequency = "once",
+ .frequency_id = "arrow.median.approximate",
+ class = "arrow.median.approximate"
)
list(
fun = "approximate_median",
diff --git a/r/tests/testthat/helper-arrow.R b/r/tests/testthat/helper-arrow.R
index 545f2d0440..873bb55712 100644
--- a/r/tests/testthat/helper-arrow.R
+++ b/r/tests/testthat/helper-arrow.R
@@ -56,6 +56,20 @@ test_that <- function(what, code) {
})
}
+# backport of 4.0.0 implementation
+if (getRversion() < "4.0.0") {
+ suppressWarnings <- function(expr, classes = "warning") {
+ withCallingHandlers(
+ expr,
+ warning = function(w) {
+ if (inherits(w, classes)) {
+ invokeRestart("muffleWarning")
+ }
+ }
+ )
+ }
+}
+
# Wrapper to run tests that only touch R code even when the C++ library isn't
# available (so that at least some tests are run on those platforms)
r_only <- function(code) {
diff --git a/r/tests/testthat/test-dplyr-summarize.R b/r/tests/testthat/test-dplyr-summarize.R
index efadb2722d..73e3312ee0 100644
--- a/r/tests/testthat/test-dplyr-summarize.R
+++ b/r/tests/testthat/test-dplyr-summarize.R
@@ -17,7 +17,13 @@
skip_if(on_old_windows())
-withr::local_options(list(arrow.summarise.sort = TRUE))
+withr::local_options(list(
+ arrow.summarise.sort = TRUE,
+ rlib_warning_verbosity = "verbose",
+ # This prevents the warning in `summarize()` about having grouped output without
+ # also specifying what to do with `.groups`
+ dplyr.summarise.inform = FALSE
+))
library(dplyr, warn.conflicts = FALSE)
library(stringr)
@@ -296,52 +302,56 @@ test_that("median()", {
# output of type float64. The calls to median(int, ...) in the tests below
# are enclosed in as.double() to work around this known difference.
- # Use old testthat behavior here so we don't have to assert the same warning
- # over and over
- local_edition(2)
-
# with groups
- compare_dplyr_binding(
- .input %>%
- group_by(some_grouping) %>%
- summarize(
- med_dbl = median(dbl),
- med_int = as.double(median(int)),
- med_dbl_narmf = median(dbl, FALSE),
- med_int_narmf = as.double(median(int, na.rm = FALSE)),
- med_dbl_narmt = median(dbl, na.rm = TRUE),
- med_int_narmt = as.double(median(int, TRUE))
- ) %>%
- arrange(some_grouping) %>%
- collect(),
- tbl,
- warning = "median\\(\\) currently returns an approximate median in Arrow"
+ suppressWarnings(
+ compare_dplyr_binding(
+ .input %>%
+ group_by(some_grouping) %>%
+ summarize(
+ med_dbl = median(dbl),
+ med_int = as.double(median(int)),
+ med_dbl_narmf = median(dbl, FALSE),
+ med_int_narmf = as.double(median(int, na.rm = FALSE)),
+ med_dbl_narmt = median(dbl, na.rm = TRUE),
+ med_int_narmt = as.double(median(int, TRUE))
+ ) %>%
+ arrange(some_grouping) %>%
+ collect(),
+ tbl,
+ warning = "median\\(\\) currently returns an approximate median in Arrow"
+ ),
+ classes = "arrow.median.approximate"
)
# without groups, with na.rm = TRUE
- compare_dplyr_binding(
- .input %>%
- summarize(
- med_dbl_narmt = median(dbl, na.rm = TRUE),
- med_int_narmt = as.double(median(int, TRUE))
- ) %>%
- collect(),
- tbl,
- warning = "median\\(\\) currently returns an approximate median in Arrow"
+ suppressWarnings(
+ compare_dplyr_binding(
+ .input %>%
+ summarize(
+ med_dbl_narmt = median(dbl, na.rm = TRUE),
+ med_int_narmt = as.double(median(int, TRUE))
+ ) %>%
+ collect(),
+ tbl,
+ warning = "median\\(\\) currently returns an approximate median in Arrow"
+ ),
+ classes = "arrow.median.approximate"
)
# without groups, with na.rm = FALSE (the default)
- compare_dplyr_binding(
- .input %>%
- summarize(
- med_dbl = median(dbl),
- med_int = as.double(median(int)),
- med_dbl_narmf = median(dbl, FALSE),
- med_int_narmf = as.double(median(int, na.rm = FALSE))
- ) %>%
- collect(),
- tbl,
- warning = "median\\(\\) currently returns an approximate median in Arrow"
+ suppressWarnings(
+ compare_dplyr_binding(
+ .input %>%
+ summarize(
+ med_dbl = median(dbl),
+ med_int = as.double(median(int)),
+ med_dbl_narmf = median(dbl, FALSE),
+ med_int_narmf = as.double(median(int, na.rm = FALSE))
+ ) %>%
+ collect(),
+ tbl,
+ warning = "median\\(\\) currently returns an approximate median in Arrow"
+ ),
+ classes = "arrow.median.approximate"
)
- local_edition(3)
})
test_that("quantile()", {
@@ -367,71 +377,78 @@ test_that("quantile()", {
# return output of type float64. The calls to quantile(int, ...) in the tests
# below are enclosed in as.double() to work around this known difference.
- local_edition(2)
# with groups
- expect_warning(
- expect_equal(
- tbl %>%
- group_by(some_grouping) %>%
- summarize(
- q_dbl = quantile(dbl, probs = 0.5, na.rm = TRUE, names = FALSE),
- q_int = as.double(
- quantile(int, probs = 0.5, na.rm = TRUE, names = FALSE)
- )
- ) %>%
- arrange(some_grouping),
- Table$create(tbl) %>%
- group_by(some_grouping) %>%
- summarize(
- q_dbl = quantile(dbl, probs = 0.5, na.rm = TRUE),
- q_int = as.double(quantile(int, probs = 0.5, na.rm = TRUE))
- ) %>%
- arrange(some_grouping) %>%
- collect()
+ suppressWarnings(
+ expect_warning(
+ expect_equal(
+ tbl %>%
+ group_by(some_grouping) %>%
+ summarize(
+ q_dbl = quantile(dbl, probs = 0.5, na.rm = TRUE, names = FALSE),
+ q_int = as.double(
+ quantile(int, probs = 0.5, na.rm = TRUE, names = FALSE)
+ )
+ ) %>%
+ arrange(some_grouping),
+ Table$create(tbl) %>%
+ group_by(some_grouping) %>%
+ summarize(
+ q_dbl = quantile(dbl, probs = 0.5, na.rm = TRUE),
+ q_int = as.double(quantile(int, probs = 0.5, na.rm = TRUE))
+ ) %>%
+ arrange(some_grouping) %>%
+ collect()
+ ),
+ "quantile() currently returns an approximate quantile in Arrow",
+ fixed = TRUE
),
- "quantile() currently returns an approximate quantile in Arrow",
- fixed = TRUE
+ classes = "arrow.quantile.approximate"
)
# without groups
- expect_warning(
- expect_equal(
- tbl %>%
- summarize(
- q_dbl = quantile(dbl, probs = 0.5, na.rm = TRUE, names = FALSE),
- q_int = as.double(
- quantile(int, probs = 0.5, na.rm = TRUE, names = FALSE)
- )
- ),
- Table$create(tbl) %>%
- summarize(
- q_dbl = quantile(dbl, probs = 0.5, na.rm = TRUE),
- q_int = as.double(quantile(int, probs = 0.5, na.rm = TRUE))
- ) %>%
- collect()
+ suppressWarnings(
+ expect_warning(
+ expect_equal(
+ tbl %>%
+ summarize(
+ q_dbl = quantile(dbl, probs = 0.5, na.rm = TRUE, names = FALSE),
+ q_int = as.double(
+ quantile(int, probs = 0.5, na.rm = TRUE, names = FALSE)
+ )
+ ),
+ Table$create(tbl) %>%
+ summarize(
+ q_dbl = quantile(dbl, probs = 0.5, na.rm = TRUE),
+ q_int = as.double(quantile(int, probs = 0.5, na.rm = TRUE))
+ ) %>%
+ collect()
+ ),
+ "quantile() currently returns an approximate quantile in Arrow",
+ fixed = TRUE
),
- "quantile() currently returns an approximate quantile in Arrow",
- fixed = TRUE
+ classes = "arrow.quantile.approximate"
)
# with missing values and na.rm = FALSE
- expect_warning(
- expect_equal(
- tibble(
- q_dbl = NA_real_,
- q_int = NA_real_
+ suppressWarnings(
+ expect_warning(
+ expect_equal(
+ tibble(
+ q_dbl = NA_real_,
+ q_int = NA_real_
+ ),
+ Table$create(tbl) %>%
+ summarize(
+ q_dbl = quantile(dbl, probs = 0.5, na.rm = FALSE),
+ q_int = as.double(quantile(int, probs = 0.5, na.rm = FALSE))
+ ) %>%
+ collect()
),
- Table$create(tbl) %>%
- summarize(
- q_dbl = quantile(dbl, probs = 0.5, na.rm = FALSE),
- q_int = as.double(quantile(int, probs = 0.5, na.rm = FALSE))
- ) %>%
- collect()
+ "quantile() currently returns an approximate quantile in Arrow",
+ fixed = TRUE
),
- "quantile() currently returns an approximate quantile in Arrow",
- fixed = TRUE
+ classes = "arrow.quantile.approximate"
)
- local_edition(3)
# with a vector of 2+ probs
expect_warning(