You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by jo...@apache.org on 2022/04/13 23:26:45 UTC

[arrow] branch master updated: ARROW-14168: [R] Warn only once about arrow function differences

This is an automated email from the ASF dual-hosted git repository.

jonkeane pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 9d24ded7f7 ARROW-14168: [R] Warn only once about arrow function differences
9d24ded7f7 is described below

commit 9d24ded7f7d58717c9d78308b0c59ab7a9636006
Author: Edward Visel <16...@users.noreply.github.com>
AuthorDate: Wed Apr 13 18:26:37 2022 -0500

    ARROW-14168: [R] Warn only once about arrow function differences
    
    Addresses [ARROW-14168](https://issues.apache.org/jira/browse/ARROW-14168) by changing `median()` and `quantile()` to warn once, and adjusts the tests accordingly.
    
    Closes #12867 from alistaire47/feat/fun-diff-warn-once
    
    Lead-authored-by: Edward Visel <16...@users.noreply.github.com>
    Co-authored-by: Jonathan Keane <jk...@gmail.com>
    Signed-off-by: Jonathan Keane <jk...@gmail.com>
---
 r/NEWS.md                               |   1 +
 r/R/dplyr-summarize.R                   |  10 +-
 r/tests/testthat/helper-arrow.R         |  14 +++
 r/tests/testthat/test-dplyr-summarize.R | 205 +++++++++++++++++---------------
 4 files changed, 132 insertions(+), 98 deletions(-)

diff --git a/r/NEWS.md b/r/NEWS.md
index 1a1f198e0f..2e6a993507 100644
--- a/r/NEWS.md
+++ b/r/NEWS.md
@@ -27,6 +27,7 @@
 * date-time functionality:
   * Added `difftime` and `as.difftime()` 
   * Added `as.Date()` to convert to date
+* `median()` and `quantile()` will warn once about approximate calculations regardless of interactivity.
 
 # arrow 7.0.0
 
diff --git a/r/R/dplyr-summarize.R b/r/R/dplyr-summarize.R
index d8e6c46d92..6484d56866 100644
--- a/r/R/dplyr-summarize.R
+++ b/r/R/dplyr-summarize.R
@@ -106,8 +106,9 @@ register_bindings_aggregate <- function() {
     # this warning (ARROW-14021)
     warn(
       "quantile() currently returns an approximate quantile in Arrow",
-      .frequency = ifelse(is_interactive(), "once", "always"),
-      .frequency_id = "arrow.quantile.approximate"
+      .frequency = "once",
+      .frequency_id = "arrow.quantile.approximate",
+      class = "arrow.quantile.approximate"
     )
     list(
       fun = "tdigest",
@@ -120,8 +121,9 @@ register_bindings_aggregate <- function() {
     # this warning (ARROW-14021)
     warn(
       "median() currently returns an approximate median in Arrow",
-      .frequency = ifelse(is_interactive(), "once", "always"),
-      .frequency_id = "arrow.median.approximate"
+      .frequency = "once",
+      .frequency_id = "arrow.median.approximate",
+      class = "arrow.median.approximate"
     )
     list(
       fun = "approximate_median",
diff --git a/r/tests/testthat/helper-arrow.R b/r/tests/testthat/helper-arrow.R
index 545f2d0440..873bb55712 100644
--- a/r/tests/testthat/helper-arrow.R
+++ b/r/tests/testthat/helper-arrow.R
@@ -56,6 +56,20 @@ test_that <- function(what, code) {
   })
 }
 
+# backport of 4.0.0 implementation
+if (getRversion() < "4.0.0") {
+  suppressWarnings <- function(expr, classes = "warning") {
+    withCallingHandlers(
+      expr,
+      warning = function(w) {
+        if (inherits(w, classes)) {
+          invokeRestart("muffleWarning")
+        }
+      }
+    )
+  }
+}
+
 # Wrapper to run tests that only touch R code even when the C++ library isn't
 # available (so that at least some tests are run on those platforms)
 r_only <- function(code) {
diff --git a/r/tests/testthat/test-dplyr-summarize.R b/r/tests/testthat/test-dplyr-summarize.R
index efadb2722d..73e3312ee0 100644
--- a/r/tests/testthat/test-dplyr-summarize.R
+++ b/r/tests/testthat/test-dplyr-summarize.R
@@ -17,7 +17,13 @@
 
 skip_if(on_old_windows())
 
-withr::local_options(list(arrow.summarise.sort = TRUE))
+withr::local_options(list(
+  arrow.summarise.sort = TRUE,
+  rlib_warning_verbosity = "verbose",
+  # This prevents the warning in `summarize()` about having grouped output without
+  # also specifying what to do with `.groups`
+  dplyr.summarise.inform = FALSE
+))
 
 library(dplyr, warn.conflicts = FALSE)
 library(stringr)
@@ -296,52 +302,56 @@ test_that("median()", {
   # output of type float64. The calls to median(int, ...) in the tests below
   # are enclosed in as.double() to work around this known difference.
 
-  # Use old testthat behavior here so we don't have to assert the same warning
-  # over and over
-  local_edition(2)
-
   # with groups
-  compare_dplyr_binding(
-    .input %>%
-      group_by(some_grouping) %>%
-      summarize(
-        med_dbl = median(dbl),
-        med_int = as.double(median(int)),
-        med_dbl_narmf = median(dbl, FALSE),
-        med_int_narmf = as.double(median(int, na.rm = FALSE)),
-        med_dbl_narmt = median(dbl, na.rm = TRUE),
-        med_int_narmt = as.double(median(int, TRUE))
-      ) %>%
-      arrange(some_grouping) %>%
-      collect(),
-    tbl,
-    warning = "median\\(\\) currently returns an approximate median in Arrow"
+  suppressWarnings(
+    compare_dplyr_binding(
+      .input %>%
+        group_by(some_grouping) %>%
+        summarize(
+          med_dbl = median(dbl),
+          med_int = as.double(median(int)),
+          med_dbl_narmf = median(dbl, FALSE),
+          med_int_narmf = as.double(median(int, na.rm = FALSE)),
+          med_dbl_narmt = median(dbl, na.rm = TRUE),
+          med_int_narmt = as.double(median(int, TRUE))
+        ) %>%
+        arrange(some_grouping) %>%
+        collect(),
+      tbl,
+      warning = "median\\(\\) currently returns an approximate median in Arrow"
+    ),
+    classes = "arrow.median.approximate"
   )
   # without groups, with na.rm = TRUE
-  compare_dplyr_binding(
-    .input %>%
-      summarize(
-        med_dbl_narmt = median(dbl, na.rm = TRUE),
-        med_int_narmt = as.double(median(int, TRUE))
-      ) %>%
-      collect(),
-    tbl,
-    warning = "median\\(\\) currently returns an approximate median in Arrow"
+  suppressWarnings(
+    compare_dplyr_binding(
+      .input %>%
+        summarize(
+          med_dbl_narmt = median(dbl, na.rm = TRUE),
+          med_int_narmt = as.double(median(int, TRUE))
+        ) %>%
+        collect(),
+      tbl,
+      warning = "median\\(\\) currently returns an approximate median in Arrow"
+    ),
+    classes = "arrow.median.approximate"
   )
   # without groups, with na.rm = FALSE (the default)
-  compare_dplyr_binding(
-    .input %>%
-      summarize(
-        med_dbl = median(dbl),
-        med_int = as.double(median(int)),
-        med_dbl_narmf = median(dbl, FALSE),
-        med_int_narmf = as.double(median(int, na.rm = FALSE))
-      ) %>%
-      collect(),
-    tbl,
-    warning = "median\\(\\) currently returns an approximate median in Arrow"
+  suppressWarnings(
+    compare_dplyr_binding(
+      .input %>%
+        summarize(
+          med_dbl = median(dbl),
+          med_int = as.double(median(int)),
+          med_dbl_narmf = median(dbl, FALSE),
+          med_int_narmf = as.double(median(int, na.rm = FALSE))
+        ) %>%
+        collect(),
+      tbl,
+      warning = "median\\(\\) currently returns an approximate median in Arrow"
+    ),
+    classes = "arrow.median.approximate"
   )
-  local_edition(3)
 })
 
 test_that("quantile()", {
@@ -367,71 +377,78 @@ test_that("quantile()", {
   # return output of type float64. The calls to quantile(int, ...) in the tests
   # below are enclosed in as.double() to work around this known difference.
 
-  local_edition(2)
   # with groups
-  expect_warning(
-    expect_equal(
-      tbl %>%
-        group_by(some_grouping) %>%
-        summarize(
-          q_dbl = quantile(dbl, probs = 0.5, na.rm = TRUE, names = FALSE),
-          q_int = as.double(
-            quantile(int, probs = 0.5, na.rm = TRUE, names = FALSE)
-          )
-        ) %>%
-        arrange(some_grouping),
-      Table$create(tbl) %>%
-        group_by(some_grouping) %>%
-        summarize(
-          q_dbl = quantile(dbl, probs = 0.5, na.rm = TRUE),
-          q_int = as.double(quantile(int, probs = 0.5, na.rm = TRUE))
-        ) %>%
-        arrange(some_grouping) %>%
-        collect()
+  suppressWarnings(
+    expect_warning(
+      expect_equal(
+        tbl %>%
+          group_by(some_grouping) %>%
+          summarize(
+            q_dbl = quantile(dbl, probs = 0.5, na.rm = TRUE, names = FALSE),
+            q_int = as.double(
+              quantile(int, probs = 0.5, na.rm = TRUE, names = FALSE)
+            )
+          ) %>%
+          arrange(some_grouping),
+        Table$create(tbl) %>%
+          group_by(some_grouping) %>%
+          summarize(
+            q_dbl = quantile(dbl, probs = 0.5, na.rm = TRUE),
+            q_int = as.double(quantile(int, probs = 0.5, na.rm = TRUE))
+          ) %>%
+          arrange(some_grouping) %>%
+          collect()
+      ),
+      "quantile() currently returns an approximate quantile in Arrow",
+      fixed = TRUE
     ),
-    "quantile() currently returns an approximate quantile in Arrow",
-    fixed = TRUE
+    classes = "arrow.quantile.approximate"
   )
 
   # without groups
-  expect_warning(
-    expect_equal(
-      tbl %>%
-        summarize(
-          q_dbl = quantile(dbl, probs = 0.5, na.rm = TRUE, names = FALSE),
-          q_int = as.double(
-            quantile(int, probs = 0.5, na.rm = TRUE, names = FALSE)
-          )
-        ),
-      Table$create(tbl) %>%
-        summarize(
-          q_dbl = quantile(dbl, probs = 0.5, na.rm = TRUE),
-          q_int = as.double(quantile(int, probs = 0.5, na.rm = TRUE))
-        ) %>%
-        collect()
+  suppressWarnings(
+    expect_warning(
+      expect_equal(
+        tbl %>%
+          summarize(
+            q_dbl = quantile(dbl, probs = 0.5, na.rm = TRUE, names = FALSE),
+            q_int = as.double(
+              quantile(int, probs = 0.5, na.rm = TRUE, names = FALSE)
+            )
+          ),
+        Table$create(tbl) %>%
+          summarize(
+            q_dbl = quantile(dbl, probs = 0.5, na.rm = TRUE),
+            q_int = as.double(quantile(int, probs = 0.5, na.rm = TRUE))
+          ) %>%
+          collect()
+      ),
+      "quantile() currently returns an approximate quantile in Arrow",
+      fixed = TRUE
     ),
-    "quantile() currently returns an approximate quantile in Arrow",
-    fixed = TRUE
+    classes = "arrow.quantile.approximate"
   )
 
   # with missing values and na.rm = FALSE
-  expect_warning(
-    expect_equal(
-      tibble(
-        q_dbl = NA_real_,
-        q_int = NA_real_
+  suppressWarnings(
+    expect_warning(
+      expect_equal(
+        tibble(
+          q_dbl = NA_real_,
+          q_int = NA_real_
+        ),
+        Table$create(tbl) %>%
+          summarize(
+            q_dbl = quantile(dbl, probs = 0.5, na.rm = FALSE),
+            q_int = as.double(quantile(int, probs = 0.5, na.rm = FALSE))
+          ) %>%
+          collect()
       ),
-      Table$create(tbl) %>%
-        summarize(
-          q_dbl = quantile(dbl, probs = 0.5, na.rm = FALSE),
-          q_int = as.double(quantile(int, probs = 0.5, na.rm = FALSE))
-        ) %>%
-        collect()
+      "quantile() currently returns an approximate quantile in Arrow",
+      fixed = TRUE
     ),
-    "quantile() currently returns an approximate quantile in Arrow",
-    fixed = TRUE
+    classes = "arrow.quantile.approximate"
   )
-  local_edition(3)
 
   # with a vector of 2+ probs
   expect_warning(