You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ia...@apache.org on 2021/09/21 02:35:29 UTC

[arrow] branch master updated: ARROW-13990: [R] Bindings for round kernels

This is an automated email from the ASF dual-hosted git repository.

ianmcook pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 6a6b464  ARROW-13990: [R] Bindings for round kernels
6a6b464 is described below

commit 6a6b4643611f34cac8c829e9dc41eb6e815abd7b
Author: Ian Cook <ia...@gmail.com>
AuthorDate: Mon Sep 20 22:33:49 2021 -0400

    ARROW-13990: [R] Bindings for round kernels
    
    Closes #11176 from ianmcook/ARROW-13990
    
    Authored-by: Ian Cook <ia...@gmail.com>
    Signed-off-by: Ian Cook <ia...@gmail.com>
---
 r/NAMESPACE                   |  1 +
 r/R/dplyr-functions.R         |  8 +++++
 r/R/enums.R                   | 15 ++++++++
 r/man/enums.Rd                |  5 +++
 r/src/compute.cpp             | 26 ++++++++++++++
 r/tests/testthat/test-dplyr.R | 83 +++++++++++++++++++++++++++++++++++++++++--
 6 files changed, 136 insertions(+), 2 deletions(-)

diff --git a/r/NAMESPACE b/r/NAMESPACE
index cabd8ff..61ca5d8 100644
--- a/r/NAMESPACE
+++ b/r/NAMESPACE
@@ -170,6 +170,7 @@ export(RecordBatchFileReader)
 export(RecordBatchFileWriter)
 export(RecordBatchStreamReader)
 export(RecordBatchStreamWriter)
+export(RoundMode)
 export(S3FileSystem)
 export(Scalar)
 export(Scanner)
diff --git a/r/R/dplyr-functions.R b/r/R/dplyr-functions.R
index 808956e..c4eae0c 100644
--- a/r/R/dplyr-functions.R
+++ b/r/R/dplyr-functions.R
@@ -681,6 +681,14 @@ nse_funcs$trunc <- function(x, ...) {
   build_expr("trunc", x)
 }
 
+nse_funcs$round <- function(x, digits = 0) {
+  build_expr(
+    "round",
+    x,
+    options = list(ndigits = digits, round_mode = RoundMode$HALF_TO_EVEN)
+  )
+}
+
 nse_funcs$wday <- function(x, label = FALSE, abbr = TRUE, week_start = getOption("lubridate.week.start", 7)) {
 
   # The "day_of_week" compute function returns numeric days of week and not locale-aware strftime
diff --git a/r/R/enums.R b/r/R/enums.R
index 019ebc7..d9cb3a5 100644
--- a/r/R/enums.R
+++ b/r/R/enums.R
@@ -148,3 +148,18 @@ NullEncodingBehavior <- enum("NullEncodingBehavior",
 NullHandlingBehavior <- enum("NullHandlingBehavior",
   EMIT_NULL = 0L, SKIP = 1L, REPLACE = 2L
 )
+
+#' @export
+#' @rdname enums
+RoundMode <- enum("RoundMode",
+  DOWN = 0L,
+  UP = 1L,
+  TOWARDS_ZERO = 2L,
+  TOWARDS_INFINITY = 3L,
+  HALF_DOWN = 4L,
+  HALF_UP = 5L,
+  HALF_TOWARDS_ZERO = 6L,
+  HALF_TOWARDS_INFINITY = 7L,
+  HALF_TO_EVEN = 8L,
+  HALF_TO_ODD = 9L
+)
diff --git a/r/man/enums.Rd b/r/man/enums.Rd
index 57ec3ba..e21a9f4 100644
--- a/r/man/enums.Rd
+++ b/r/man/enums.Rd
@@ -16,6 +16,7 @@
 \alias{QuantileInterpolation}
 \alias{NullEncodingBehavior}
 \alias{NullHandlingBehavior}
+\alias{RoundMode}
 \title{Arrow enums}
 \format{
 An object of class \code{TimeUnit::type} (inherits from \code{arrow-enum}) of length 4.
@@ -43,6 +44,8 @@ An object of class \code{QuantileInterpolation} (inherits from \code{arrow-enum}
 An object of class \code{NullEncodingBehavior} (inherits from \code{arrow-enum}) of length 2.
 
 An object of class \code{NullHandlingBehavior} (inherits from \code{arrow-enum}) of length 3.
+
+An object of class \code{RoundMode} (inherits from \code{arrow-enum}) of length 10.
 }
 \usage{
 TimeUnit
@@ -70,6 +73,8 @@ QuantileInterpolation
 NullEncodingBehavior
 
 NullHandlingBehavior
+
+RoundMode
 }
 \description{
 Arrow enums
diff --git a/r/src/compute.cpp b/r/src/compute.cpp
index c6ba0a2..aee4069 100644
--- a/r/src/compute.cpp
+++ b/r/src/compute.cpp
@@ -449,6 +449,32 @@ std::shared_ptr<arrow::compute::FunctionOptions> make_compute_options(
     return std::make_shared<Options>(cpp11::as_cpp<int64_t>(options["pivot"]));
   }
 
+  if (func_name == "round") {
+    using Options = arrow::compute::RoundOptions;
+    auto out = std::make_shared<Options>(Options::Defaults());
+    if (!Rf_isNull(options["ndigits"])) {
+      out->ndigits = cpp11::as_cpp<int64_t>(options["ndigits"]);
+    }
+    SEXP round_mode = options["round_mode"];
+    if (!Rf_isNull(round_mode)) {
+      out->round_mode = cpp11::as_cpp<enum arrow::compute::RoundMode>(round_mode);
+    }
+    return out;
+  }
+
+  if (func_name == "round_to_multiple") {
+    using Options = arrow::compute::RoundToMultipleOptions;
+    auto out = std::make_shared<Options>(Options::Defaults());
+    if (!Rf_isNull(options["multiple"])) {
+      out->multiple = cpp11::as_cpp<double>(options["multiple"]);
+    }
+    SEXP round_mode = options["round_mode"];
+    if (!Rf_isNull(round_mode)) {
+      out->round_mode = cpp11::as_cpp<enum arrow::compute::RoundMode>(round_mode);
+    }
+    return out;
+  }
+
   return nullptr;
 }
 
diff --git a/r/tests/testthat/test-dplyr.R b/r/tests/testthat/test-dplyr.R
index ab7296b..1023c20 100644
--- a/r/tests/testthat/test-dplyr.R
+++ b/r/tests/testthat/test-dplyr.R
@@ -989,7 +989,7 @@ test_that("sign()", {
   )
 })
 
-test_that("ceiling(), floor(), trunc()", {
+test_that("ceiling(), floor(), trunc(), round()", {
   df <- tibble(x = c(-1, -0.55, -0.5, -0.1, 0, 0.1, 0.5, 0.55, 1, NA, NaN))
 
   expect_dplyr_equal(
@@ -997,11 +997,90 @@ test_that("ceiling(), floor(), trunc()", {
       mutate(
         c = ceiling(x),
         f = floor(x),
-        t = trunc(x)
+        t = trunc(x),
+        r = round(x)
       ) %>%
       collect(),
     df
   )
+
+  # with digits set to 1
+  expect_dplyr_equal(
+    input %>%
+      filter(x %% 0.5 == 0) %>% # filter out indeterminate cases (see below)
+      mutate(r = round(x, 1)) %>%
+      collect(),
+    df
+  )
+
+  # with digits set to -1
+  expect_dplyr_equal(
+    input %>%
+      mutate(
+        rd = round(floor(x * 111), -1), # double
+        y = ifelse(is.nan(x), NA_integer_, x),
+        ri = round(as.integer(y * 111), -1) # integer (with the NaN removed)
+      ) %>%
+      collect(),
+    df
+  )
+
+  # round(x, -2) is equivalent to round_to_multiple(x, 100)
+  expect_equal(
+    Table$create(x = 1111.1) %>%
+      mutate(r = round(x, -2)) %>%
+      collect(),
+    Table$create(x = 1111.1) %>%
+      mutate(r = arrow_round_to_multiple(x, options = list(multiple = 100))) %>%
+      collect()
+  )
+
+  # For consistency with base R, the binding for round() uses the Arrow
+  # library's HALF_TO_EVEN round mode, but the expectations *above* would pass
+  # even if another round mode were used. The expectations *below* should fail
+  # with other round modes. However, some decimal numbers cannot be represented
+  # exactly as floating point numbers, and for the ones that also end in 5 (such
+  # as 0.55), R's rounding behavior is indeterminate: it will vary depending on
+  # the OS. In practice, this seems to affect Windows, so we skip these tests
+  # on Windows and on CRAN.
+
+  skip_on_cran()
+  skip_on_os("windows")
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(r = round(x, 1)) %>%
+      collect(),
+    df
+  )
+
+  # Verify that round mode HALF_TO_EVEN, which is what the round() binding uses,
+  # yields results consistent with R...
+  expect_equal(
+    as.vector(
+      call_function(
+        "round",
+        Array$create(df$x),
+        options = list(ndigits = 1L, round_mode = RoundMode$HALF_TO_EVEN)
+      )
+    ),
+    round(df$x, 1)
+  )
+  # ...but that the round mode HALF_TOWARDS_ZERO does not. If the expectation
+  # below fails, it means that the expectation above is not effectively testing
+  # that Arrow is using the HALF_TO_EVEN mode.
+  expect_false(
+    isTRUE(all.equal(
+      as.vector(
+        call_function(
+          "round",
+          Array$create(df$x),
+          options = list(ndigits = 1L, round_mode = RoundMode$HALF_TOWARDS_ZERO)
+        )
+      ),
+      round(df$x, 1)
+    ))
+  )
 })
 
 test_that("log functions", {