You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ia...@apache.org on 2021/09/21 02:35:29 UTC
[arrow] branch master updated: ARROW-13990: [R] Bindings for round
kernels
This is an automated email from the ASF dual-hosted git repository.
ianmcook pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 6a6b464 ARROW-13990: [R] Bindings for round kernels
6a6b464 is described below
commit 6a6b4643611f34cac8c829e9dc41eb6e815abd7b
Author: Ian Cook <ia...@gmail.com>
AuthorDate: Mon Sep 20 22:33:49 2021 -0400
ARROW-13990: [R] Bindings for round kernels
Closes #11176 from ianmcook/ARROW-13990
Authored-by: Ian Cook <ia...@gmail.com>
Signed-off-by: Ian Cook <ia...@gmail.com>
---
r/NAMESPACE | 1 +
r/R/dplyr-functions.R | 8 +++++
r/R/enums.R | 15 ++++++++
r/man/enums.Rd | 5 +++
r/src/compute.cpp | 26 ++++++++++++++
r/tests/testthat/test-dplyr.R | 83 +++++++++++++++++++++++++++++++++++++++++--
6 files changed, 136 insertions(+), 2 deletions(-)
diff --git a/r/NAMESPACE b/r/NAMESPACE
index cabd8ff..61ca5d8 100644
--- a/r/NAMESPACE
+++ b/r/NAMESPACE
@@ -170,6 +170,7 @@ export(RecordBatchFileReader)
export(RecordBatchFileWriter)
export(RecordBatchStreamReader)
export(RecordBatchStreamWriter)
+export(RoundMode)
export(S3FileSystem)
export(Scalar)
export(Scanner)
diff --git a/r/R/dplyr-functions.R b/r/R/dplyr-functions.R
index 808956e..c4eae0c 100644
--- a/r/R/dplyr-functions.R
+++ b/r/R/dplyr-functions.R
@@ -681,6 +681,14 @@ nse_funcs$trunc <- function(x, ...) {
build_expr("trunc", x)
}
+nse_funcs$round <- function(x, digits = 0) {
+ build_expr(
+ "round",
+ x,
+ options = list(ndigits = digits, round_mode = RoundMode$HALF_TO_EVEN)
+ )
+}
+
nse_funcs$wday <- function(x, label = FALSE, abbr = TRUE, week_start = getOption("lubridate.week.start", 7)) {
# The "day_of_week" compute function returns numeric days of week and not locale-aware strftime
diff --git a/r/R/enums.R b/r/R/enums.R
index 019ebc7..d9cb3a5 100644
--- a/r/R/enums.R
+++ b/r/R/enums.R
@@ -148,3 +148,18 @@ NullEncodingBehavior <- enum("NullEncodingBehavior",
NullHandlingBehavior <- enum("NullHandlingBehavior",
EMIT_NULL = 0L, SKIP = 1L, REPLACE = 2L
)
+
+#' @export
+#' @rdname enums
+RoundMode <- enum("RoundMode",
+ DOWN = 0L,
+ UP = 1L,
+ TOWARDS_ZERO = 2L,
+ TOWARDS_INFINITY = 3L,
+ HALF_DOWN = 4L,
+ HALF_UP = 5L,
+ HALF_TOWARDS_ZERO = 6L,
+ HALF_TOWARDS_INFINITY = 7L,
+ HALF_TO_EVEN = 8L,
+ HALF_TO_ODD = 9L
+)
diff --git a/r/man/enums.Rd b/r/man/enums.Rd
index 57ec3ba..e21a9f4 100644
--- a/r/man/enums.Rd
+++ b/r/man/enums.Rd
@@ -16,6 +16,7 @@
\alias{QuantileInterpolation}
\alias{NullEncodingBehavior}
\alias{NullHandlingBehavior}
+\alias{RoundMode}
\title{Arrow enums}
\format{
An object of class \code{TimeUnit::type} (inherits from \code{arrow-enum}) of length 4.
@@ -43,6 +44,8 @@ An object of class \code{QuantileInterpolation} (inherits from \code{arrow-enum}
An object of class \code{NullEncodingBehavior} (inherits from \code{arrow-enum}) of length 2.
An object of class \code{NullHandlingBehavior} (inherits from \code{arrow-enum}) of length 3.
+
+An object of class \code{RoundMode} (inherits from \code{arrow-enum}) of length 10.
}
\usage{
TimeUnit
@@ -70,6 +73,8 @@ QuantileInterpolation
NullEncodingBehavior
NullHandlingBehavior
+
+RoundMode
}
\description{
Arrow enums
diff --git a/r/src/compute.cpp b/r/src/compute.cpp
index c6ba0a2..aee4069 100644
--- a/r/src/compute.cpp
+++ b/r/src/compute.cpp
@@ -449,6 +449,32 @@ std::shared_ptr<arrow::compute::FunctionOptions> make_compute_options(
return std::make_shared<Options>(cpp11::as_cpp<int64_t>(options["pivot"]));
}
+ if (func_name == "round") {
+ using Options = arrow::compute::RoundOptions;
+ auto out = std::make_shared<Options>(Options::Defaults());
+ if (!Rf_isNull(options["ndigits"])) {
+ out->ndigits = cpp11::as_cpp<int64_t>(options["ndigits"]);
+ }
+ SEXP round_mode = options["round_mode"];
+ if (!Rf_isNull(round_mode)) {
+ out->round_mode = cpp11::as_cpp<enum arrow::compute::RoundMode>(round_mode);
+ }
+ return out;
+ }
+
+ if (func_name == "round_to_multiple") {
+ using Options = arrow::compute::RoundToMultipleOptions;
+ auto out = std::make_shared<Options>(Options::Defaults());
+ if (!Rf_isNull(options["multiple"])) {
+ out->multiple = cpp11::as_cpp<double>(options["multiple"]);
+ }
+ SEXP round_mode = options["round_mode"];
+ if (!Rf_isNull(round_mode)) {
+ out->round_mode = cpp11::as_cpp<enum arrow::compute::RoundMode>(round_mode);
+ }
+ return out;
+ }
+
return nullptr;
}
diff --git a/r/tests/testthat/test-dplyr.R b/r/tests/testthat/test-dplyr.R
index ab7296b..1023c20 100644
--- a/r/tests/testthat/test-dplyr.R
+++ b/r/tests/testthat/test-dplyr.R
@@ -989,7 +989,7 @@ test_that("sign()", {
)
})
-test_that("ceiling(), floor(), trunc()", {
+test_that("ceiling(), floor(), trunc(), round()", {
df <- tibble(x = c(-1, -0.55, -0.5, -0.1, 0, 0.1, 0.5, 0.55, 1, NA, NaN))
expect_dplyr_equal(
@@ -997,11 +997,90 @@ test_that("ceiling(), floor(), trunc()", {
mutate(
c = ceiling(x),
f = floor(x),
- t = trunc(x)
+ t = trunc(x),
+ r = round(x)
) %>%
collect(),
df
)
+
+ # with digits set to 1
+ expect_dplyr_equal(
+ input %>%
+ filter(x %% 0.5 == 0) %>% # filter out indeterminate cases (see below)
+ mutate(r = round(x, 1)) %>%
+ collect(),
+ df
+ )
+
+ # with digits set to -1
+ expect_dplyr_equal(
+ input %>%
+ mutate(
+ rd = round(floor(x * 111), -1), # double
+ y = ifelse(is.nan(x), NA_integer_, x),
+ ri = round(as.integer(y * 111), -1) # integer (with the NaN removed)
+ ) %>%
+ collect(),
+ df
+ )
+
+ # round(x, -2) is equivalent to round_to_multiple(x, 100)
+ expect_equal(
+ Table$create(x = 1111.1) %>%
+ mutate(r = round(x, -2)) %>%
+ collect(),
+ Table$create(x = 1111.1) %>%
+ mutate(r = arrow_round_to_multiple(x, options = list(multiple = 100))) %>%
+ collect()
+ )
+
+ # For consistency with base R, the binding for round() uses the Arrow
+ # library's HALF_TO_EVEN round mode, but the expectations *above* would pass
+ # even if another round mode were used. The expectations *below* should fail
+ # with other round modes. However, some decimal numbers cannot be represented
+ # exactly as floating point numbers, and for the ones that also end in 5 (such
+ # as 0.55), R's rounding behavior is indeterminate: it will vary depending on
+ # the OS. In practice, this seems to affect Windows, so we skip these tests
+ # on Windows and on CRAN.
+
+ skip_on_cran()
+ skip_on_os("windows")
+
+ expect_dplyr_equal(
+ input %>%
+ mutate(r = round(x, 1)) %>%
+ collect(),
+ df
+ )
+
+ # Verify that round mode HALF_TO_EVEN, which is what the round() binding uses,
+ # yields results consistent with R...
+ expect_equal(
+ as.vector(
+ call_function(
+ "round",
+ Array$create(df$x),
+ options = list(ndigits = 1L, round_mode = RoundMode$HALF_TO_EVEN)
+ )
+ ),
+ round(df$x, 1)
+ )
+ # ...but that the round mode HALF_TOWARDS_ZERO does not. If the expectation
+ # below fails, it means that the expectation above is not effectively testing
+ # that Arrow is using the HALF_TO_EVEN mode.
+ expect_false(
+ isTRUE(all.equal(
+ as.vector(
+ call_function(
+ "round",
+ Array$create(df$x),
+ options = list(ndigits = 1L, round_mode = RoundMode$HALF_TOWARDS_ZERO)
+ )
+ ),
+ round(df$x, 1)
+ ))
+ )
})
test_that("log functions", {