You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ko...@apache.org on 2021/11/10 02:06:50 UTC
[arrow] 05/12: ARROW-14310: [R] Make expect_dplyr_equal() more
intuitive
This is an automated email from the ASF dual-hosted git repository.
kou pushed a commit to branch maint-6.0.x
in repository https://gitbox.apache.org/repos/asf/arrow.git
commit 88683e829759b3689dfa48cc7caa950e8019250b
Author: Nic Crane <th...@gmail.com>
AuthorDate: Thu Oct 28 17:12:56 2021 +0100
ARROW-14310: [R] Make expect_dplyr_equal() more intuitive
* Renamed "input" to ".input" in test helpers to make more obvious that it's a "special" variable
* Renamed functions that don't fit the `testthat::expect_*()` pattern to prevent confusion
* Added additional documentation to helper functions
Closes #11403 from thisisnic/ARROW-14310_expect_dplyr_equal
Lead-authored-by: Nic Crane <th...@gmail.com>
Co-authored-by: Nic <th...@gmail.com>
Signed-off-by: Nic Crane <th...@gmail.com>
---
r/tests/testthat/helper-expectation.R | 120 +++++--
r/tests/testthat/test-Array.R | 8 +-
r/tests/testthat/test-compute-aggregate.R | 62 ++--
r/tests/testthat/test-compute-sort.R | 60 ++--
r/tests/testthat/test-dplyr-arrange.R | 80 ++---
r/tests/testthat/test-dplyr-collapse.R | 12 +-
r/tests/testthat/test-dplyr-count.R | 32 +-
r/tests/testthat/test-dplyr-distinct.R | 32 +-
r/tests/testthat/test-dplyr-filter.R | 138 +++----
r/tests/testthat/test-dplyr-funcs-conditional.R | 100 +++---
r/tests/testthat/test-dplyr-funcs-datetime.R | 116 +++---
r/tests/testthat/test-dplyr-funcs-math.R | 98 ++---
r/tests/testthat/test-dplyr-funcs-string.R | 455 ++++++++++++------------
r/tests/testthat/test-dplyr-funcs-type.R | 84 ++---
r/tests/testthat/test-dplyr-group-by.R | 54 +--
r/tests/testthat/test-dplyr-join.R | 36 +-
r/tests/testthat/test-dplyr-mutate.R | 116 +++---
r/tests/testthat/test-dplyr-query.R | 29 +-
r/tests/testthat/test-dplyr-select.R | 72 ++--
r/tests/testthat/test-dplyr-summarize.R | 280 +++++++--------
r/tests/testthat/test-metadata.R | 24 +-
r/tests/testthat/test-na-omit.R | 12 +-
22 files changed, 1032 insertions(+), 988 deletions(-)
diff --git a/r/tests/testthat/helper-expectation.R b/r/tests/testthat/helper-expectation.R
index b262db4..ef6142b 100644
--- a/r/tests/testthat/helper-expectation.R
+++ b/r/tests/testthat/helper-expectation.R
@@ -28,6 +28,8 @@ expect_r6_class <- function(object, class) {
expect_s3_class(object, "R6")
}
+#' Mask `testthat::expect_equal()` in order to compare ArrowObjects using their
+#' `Equals` methods from the C++ library.
expect_equal <- function(object, expected, ignore_attr = FALSE, ..., info = NULL, label = NULL) {
if (inherits(object, "ArrowObject") && inherits(expected, "ArrowObject")) {
mc <- match.call()
@@ -64,24 +66,35 @@ verify_output <- function(...) {
testthat::verify_output(...)
}
-#' @param expr A dplyr pipeline with `input` as its start
-#' @param tbl A tbl/df as reference, will make RB/Table with
-#' @param skip_record_batch string skip message, if should skip RB test
-#' @param skip_table string skip message, if should skip Table test
-#' @param warning string expected warning from the RecordBatch and Table paths,
-#' passed to `expect_warning()`. Special values:
+#' Ensure that dplyr methods on Arrow objects return the same as for data frames
+#'
+#' This function compares the output of running a dplyr expression on a tibble
+#' or data.frame object against the output of the same expression run on
+#' Arrow Table and RecordBatch objects.
+#'
+#'
+#' @param expr A dplyr pipeline which must have `.input` as its start
+#' @param tbl A tibble or data.frame which will be substituted for `.input`
+#' @param skip_record_batch The skip message to show (if you should skip the
+#' RecordBatch test)
+#' @param skip_table The skip message to show (if you should skip the Table test)
+#' @param warning The expected warning from the RecordBatch and Table comparison
+#' paths, passed to `expect_warning()`. Special values:
#' * `NA` (the default) for ensuring no warning message
#' * `TRUE` is a special case to mean to check for the
#' "not supported in Arrow; pulling data into R" message.
#' @param ... additional arguments, passed to `expect_equal()`
-expect_dplyr_equal <- function(expr,
- tbl,
- skip_record_batch = NULL,
- skip_table = NULL,
- warning = NA,
- ...) {
+compare_dplyr_binding <- function(expr,
+ tbl,
+ skip_record_batch = NULL,
+ skip_table = NULL,
+ warning = NA,
+ ...) {
+
+ # Quote the contents of `expr` so that we can evaluate it a few different ways
expr <- rlang::enquo(expr)
- expected <- rlang::eval_tidy(expr, rlang::new_data_mask(rlang::env(input = tbl)))
+ # Get the expected output by evaluating expr on the .input data.frame using regular dplyr
+ expected <- rlang::eval_tidy(expr, rlang::new_data_mask(rlang::env(.input = tbl)))
if (isTRUE(warning)) {
# Special-case the simple warning:
@@ -91,11 +104,12 @@ expect_dplyr_equal <- function(expr,
skip_msg <- NULL
+ # Evaluate `expr` on a RecordBatch object and compare with `expected`
if (is.null(skip_record_batch)) {
expect_warning(
via_batch <- rlang::eval_tidy(
expr,
- rlang::new_data_mask(rlang::env(input = record_batch(tbl)))
+ rlang::new_data_mask(rlang::env(.input = record_batch(tbl)))
),
warning
)
@@ -104,11 +118,12 @@ expect_dplyr_equal <- function(expr,
skip_msg <- c(skip_msg, skip_record_batch)
}
+ # Evaluate `expr` on a Table object and compare with `expected`
if (is.null(skip_table)) {
expect_warning(
via_table <- rlang::eval_tidy(
expr,
- rlang::new_data_mask(rlang::env(input = arrow_table(tbl)))
+ rlang::new_data_mask(rlang::env(.input = arrow_table(tbl)))
),
warning
)
@@ -122,21 +137,30 @@ expect_dplyr_equal <- function(expr,
}
}
-expect_dplyr_error <- function(expr, # A dplyr pipeline with `input` as its start
- tbl, # A tbl/df as reference, will make RB/Table with
- ...) {
+#' Assert that Arrow dplyr methods error in the same way as methods on data.frame
+#'
+#' Comparing the error message generated when running expressions on R objects
+#' against the error message generated by running the same expression on Arrow
+#' Tables and RecordBatches.
+#'
+#' @param expr A dplyr pipeline which must have `.input` as its start
+#' @param tbl A tibble or data.frame which will be substituted for `.input`
+#' @param ... additional arguments, passed to `expect_error()`
+compare_dplyr_error <- function(expr,
+ tbl,
+ ...) {
# ensure we have supplied tbl
force(tbl)
expr <- rlang::enquo(expr)
msg <- tryCatch(
- rlang::eval_tidy(expr, rlang::new_data_mask(rlang::env(input = tbl))),
+ rlang::eval_tidy(expr, rlang::new_data_mask(rlang::env(.input = tbl))),
error = function(e) {
msg <- conditionMessage(e)
# The error here is of the form:
#
- # Problem with `filter()` input `..1`.
+ # Problem with `filter()` .input `..1`.
# x object 'b_var' not found
# ℹ Input `..1` is `chr == b_var`.
#
@@ -158,7 +182,7 @@ expect_dplyr_error <- function(expr, # A dplyr pipeline with `input` as its star
expect_error(
rlang::eval_tidy(
expr,
- rlang::new_data_mask(rlang::env(input = record_batch(tbl)))
+ rlang::new_data_mask(rlang::env(.input = record_batch(tbl)))
),
msg,
...
@@ -166,27 +190,36 @@ expect_dplyr_error <- function(expr, # A dplyr pipeline with `input` as its star
expect_error(
rlang::eval_tidy(
expr,
- rlang::new_data_mask(rlang::env(input = arrow_table(tbl)))
+ rlang::new_data_mask(rlang::env(.input = arrow_table(tbl)))
),
msg,
...
)
}
-expect_vector_equal <- function(expr, # A vectorized R expression containing `input` as its input
- vec, # A vector as reference, will make Array/ChunkedArray with
- skip_array = NULL, # Msg, if should skip Array test
- skip_chunked_array = NULL, # Msg, if should skip ChunkedArray test
- ignore_attr = FALSE, # ignore attributes?
- ...) {
+#' Comparing the output of running expressions on R vectors against the same
+#' expression run on Arrow Arrays and ChunkedArrays.
+#'
+#' @param expr A vectorized R expression which must have `.input` as its start
+#' @param vec A vector which will be substituted for `.input`
+#' @param skip_array The skip message to show (if you should skip the Array test)
+#' @param skip_chunked_array The skip message to show (if you should skip the ChunkedArray test)
+#' @param ignore_attr Ignore differences in specified attributes?
+#' @param ... additional arguments, passed to `expect_as_vector()`
+compare_expression <- function(expr,
+ vec,
+ skip_array = NULL,
+ skip_chunked_array = NULL,
+ ignore_attr = FALSE,
+ ...) {
expr <- rlang::enquo(expr)
- expected <- rlang::eval_tidy(expr, rlang::new_data_mask(rlang::env(input = vec)))
+ expected <- rlang::eval_tidy(expr, rlang::new_data_mask(rlang::env(.input = vec)))
skip_msg <- NULL
if (is.null(skip_array)) {
via_array <- rlang::eval_tidy(
expr,
- rlang::new_data_mask(rlang::env(input = Array$create(vec)))
+ rlang::new_data_mask(rlang::env(.input = Array$create(vec)))
)
expect_as_vector(via_array, expected, ignore_attr, ...)
} else {
@@ -199,7 +232,7 @@ expect_vector_equal <- function(expr, # A vectorized R expression containing `in
via_chunked <- rlang::eval_tidy(
expr,
- rlang::new_data_mask(rlang::env(input = ChunkedArray$create(split_vector[[1]], split_vector[[2]])))
+ rlang::new_data_mask(rlang::env(.input = ChunkedArray$create(split_vector[[1]], split_vector[[2]])))
)
expect_as_vector(via_chunked, expected, ignore_attr, ...)
} else {
@@ -211,15 +244,24 @@ expect_vector_equal <- function(expr, # A vectorized R expression containing `in
}
}
-expect_vector_error <- function(expr, # A vectorized R expression containing `input` as its input
- vec, # A vector as reference, will make Array/ChunkedArray with
- skip_array = NULL, # Msg, if should skip Array test
- skip_chunked_array = NULL, # Msg, if should skip ChunkedArray test
- ...) {
+#' Comparing the error message generated when running expressions on R objects
+#' against the error message generated by running the same expression on Arrow
+#' Arrays and ChunkedArrays.
+#'
+#' @param expr An R expression which must have `.input` as its start
+#' @param vec A vector which will be substituted for `.input`
+#' @param skip_array The skip message to show (if you should skip the Array test)
+#' @param skip_chunked_array The skip message to show (if you should skip the ChunkedArray test)
+#' @param ... additional arguments, passed to `expect_error()`
+compare_expression_error <- function(expr,
+ vec,
+ skip_array = NULL,
+ skip_chunked_array = NULL,
+ ...) {
expr <- rlang::enquo(expr)
msg <- tryCatch(
- rlang::eval_tidy(expr, rlang::new_data_mask(rlang::env(input = vec))),
+ rlang::eval_tidy(expr, rlang::new_data_mask(rlang::env(.input = vec))),
error = function(e) {
msg <- conditionMessage(e)
@@ -240,7 +282,7 @@ expect_vector_error <- function(expr, # A vectorized R expression containing `in
expect_error(
rlang::eval_tidy(
expr,
- rlang::new_data_mask(rlang::env(input = Array$create(vec)))
+ rlang::new_data_mask(rlang::env(.input = Array$create(vec)))
),
msg,
...
@@ -256,7 +298,7 @@ expect_vector_error <- function(expr, # A vectorized R expression containing `in
expect_error(
rlang::eval_tidy(
expr,
- rlang::new_data_mask(rlang::env(input = ChunkedArray$create(split_vector[[1]], split_vector[[2]])))
+ rlang::new_data_mask(rlang::env(.input = ChunkedArray$create(split_vector[[1]], split_vector[[2]])))
),
msg,
...
diff --git a/r/tests/testthat/test-Array.R b/r/tests/testthat/test-Array.R
index a4f8239..ce23c26 100644
--- a/r/tests/testthat/test-Array.R
+++ b/r/tests/testthat/test-Array.R
@@ -318,19 +318,19 @@ test_that("support for NaN (ARROW-3615)", {
test_that("is.nan() evalutes to FALSE on NA (for consistency with base R)", {
x <- c(1.0, NA, NaN, -1.0)
- expect_vector_equal(is.nan(input), x)
+ compare_expression(is.nan(.input), x)
})
test_that("is.nan() evalutes to FALSE on non-floats (for consistency with base R)", {
x <- c(1L, 2L, 3L)
y <- c("foo", "bar")
- expect_vector_equal(is.nan(input), x)
- expect_vector_equal(is.nan(input), y)
+ compare_expression(is.nan(.input), x)
+ compare_expression(is.nan(.input), y)
})
test_that("is.na() evalutes to TRUE on NaN (for consistency with base R)", {
x <- c(1, NA, NaN, -1)
- expect_vector_equal(is.na(input), x)
+ compare_expression(is.na(.input), x)
})
test_that("integer types casts (ARROW-3741)", {
diff --git a/r/tests/testthat/test-compute-aggregate.R b/r/tests/testthat/test-compute-aggregate.R
index 5054d04..018279d 100644
--- a/r/tests/testthat/test-compute-aggregate.R
+++ b/r/tests/testthat/test-compute-aggregate.R
@@ -295,40 +295,40 @@ test_that("median passes ... args to quantile", {
})
test_that("median.Array and median.ChunkedArray", {
- expect_vector_equal(
- median(input),
+ compare_expression(
+ median(.input),
1:4
)
- expect_vector_equal(
- median(input),
+ compare_expression(
+ median(.input),
1:5
)
- expect_vector_equal(
- median(input),
+ compare_expression(
+ median(.input),
numeric(0)
)
- expect_vector_equal(
- median(input, na.rm = FALSE),
+ compare_expression(
+ median(.input, na.rm = FALSE),
c(1, 2, NA)
)
- expect_vector_equal(
- median(input, na.rm = TRUE),
+ compare_expression(
+ median(.input, na.rm = TRUE),
c(1, 2, NA)
)
- expect_vector_equal(
- median(input, na.rm = TRUE),
+ compare_expression(
+ median(.input, na.rm = TRUE),
NA_real_
)
- expect_vector_equal(
- median(input, na.rm = FALSE),
+ compare_expression(
+ median(.input, na.rm = FALSE),
c(1, 2, NA)
)
- expect_vector_equal(
- median(input, na.rm = TRUE),
+ compare_expression(
+ median(.input, na.rm = TRUE),
c(1, 2, NA)
)
- expect_vector_equal(
- median(input, na.rm = TRUE),
+ compare_expression(
+ median(.input, na.rm = TRUE),
NA_real_
)
})
@@ -388,31 +388,31 @@ test_that("value_counts", {
test_that("any.Array and any.ChunkedArray", {
data <- c(1:10, NA, NA)
- expect_vector_equal(any(input > 5), data)
- expect_vector_equal(any(input > 5, na.rm = TRUE), data)
- expect_vector_equal(any(input < 1), data)
- expect_vector_equal(any(input < 1, na.rm = TRUE), data)
+ compare_expression(any(.input > 5), data)
+ compare_expression(any(.input > 5, na.rm = TRUE), data)
+ compare_expression(any(.input < 1), data)
+ compare_expression(any(.input < 1, na.rm = TRUE), data)
data_logical <- c(TRUE, FALSE, TRUE, NA, FALSE)
- expect_vector_equal(any(input), data_logical)
- expect_vector_equal(any(input, na.rm = FALSE), data_logical)
- expect_vector_equal(any(input, na.rm = TRUE), data_logical)
+ compare_expression(any(.input), data_logical)
+ compare_expression(any(.input, na.rm = FALSE), data_logical)
+ compare_expression(any(.input, na.rm = TRUE), data_logical)
})
test_that("all.Array and all.ChunkedArray", {
data <- c(1:10, NA, NA)
- expect_vector_equal(all(input > 5), data)
- expect_vector_equal(all(input > 5, na.rm = TRUE), data)
+ compare_expression(all(.input > 5), data)
+ compare_expression(all(.input > 5, na.rm = TRUE), data)
- expect_vector_equal(all(input < 11), data)
- expect_vector_equal(all(input < 11, na.rm = TRUE), data)
+ compare_expression(all(.input < 11), data)
+ compare_expression(all(.input < 11, na.rm = TRUE), data)
data_logical <- c(TRUE, TRUE, NA)
- expect_vector_equal(all(input), data_logical)
- expect_vector_equal(all(input, na.rm = TRUE), data_logical)
+ compare_expression(all(.input), data_logical)
+ compare_expression(all(.input, na.rm = TRUE), data_logical)
})
test_that("variance", {
diff --git a/r/tests/testthat/test-compute-sort.R b/r/tests/testthat/test-compute-sort.R
index 0b6dd3e..e3574d8 100644
--- a/r/tests/testthat/test-compute-sort.R
+++ b/r/tests/testthat/test-compute-sort.R
@@ -66,70 +66,70 @@ test_that("ChunkedArray$SortIndices()", {
})
test_that("sort(vector), sort(Array), sort(ChunkedArray) give equivalent results on integers", {
- expect_vector_equal(
- sort(input),
+ compare_expression(
+ sort(.input),
tbl$int
)
- expect_vector_equal(
- sort(input, na.last = NA),
+ compare_expression(
+ sort(.input, na.last = NA),
tbl$int
)
- expect_vector_equal(
- sort(input, na.last = TRUE),
+ compare_expression(
+ sort(.input, na.last = TRUE),
tbl$int
)
- expect_vector_equal(
- sort(input, na.last = FALSE),
+ compare_expression(
+ sort(.input, na.last = FALSE),
tbl$int
)
- expect_vector_equal(
- sort(input, decreasing = TRUE),
+ compare_expression(
+ sort(.input, decreasing = TRUE),
tbl$int,
)
- expect_vector_equal(
- sort(input, decreasing = TRUE, na.last = TRUE),
+ compare_expression(
+ sort(.input, decreasing = TRUE, na.last = TRUE),
tbl$int,
)
- expect_vector_equal(
- sort(input, decreasing = TRUE, na.last = FALSE),
+ compare_expression(
+ sort(.input, decreasing = TRUE, na.last = FALSE),
tbl$int,
)
})
test_that("sort(vector), sort(Array), sort(ChunkedArray) give equivalent results on strings", {
- expect_vector_equal(
- sort(input, decreasing = TRUE, na.last = FALSE),
+ compare_expression(
+ sort(.input, decreasing = TRUE, na.last = FALSE),
tbl$chr
)
- expect_vector_equal(
- sort(input, decreasing = TRUE, na.last = FALSE),
+ compare_expression(
+ sort(.input, decreasing = TRUE, na.last = FALSE),
tbl$chr
)
})
test_that("sort(vector), sort(Array), sort(ChunkedArray) give equivalent results on floats", {
- expect_vector_equal(
- sort(input, decreasing = TRUE, na.last = TRUE),
+ compare_expression(
+ sort(.input, decreasing = TRUE, na.last = TRUE),
tbl$dbl
)
- expect_vector_equal(
- sort(input, decreasing = FALSE, na.last = TRUE),
+ compare_expression(
+ sort(.input, decreasing = FALSE, na.last = TRUE),
tbl$dbl
)
- expect_vector_equal(
- sort(input, decreasing = TRUE, na.last = NA),
+ compare_expression(
+ sort(.input, decreasing = TRUE, na.last = NA),
tbl$dbl
)
- expect_vector_equal(
- sort(input, decreasing = TRUE, na.last = FALSE),
+ compare_expression(
+ sort(.input, decreasing = TRUE, na.last = FALSE),
tbl$dbl,
)
- expect_vector_equal(
- sort(input, decreasing = FALSE, na.last = NA),
+ compare_expression(
+ sort(.input, decreasing = FALSE, na.last = NA),
tbl$dbl
)
- expect_vector_equal(
- sort(input, decreasing = FALSE, na.last = FALSE),
+ compare_expression(
+ sort(.input, decreasing = FALSE, na.last = FALSE),
tbl$dbl,
)
})
diff --git a/r/tests/testthat/test-dplyr-arrange.R b/r/tests/testthat/test-dplyr-arrange.R
index ae747ef..d22f64a 100644
--- a/r/tests/testthat/test-dplyr-arrange.R
+++ b/r/tests/testthat/test-dplyr-arrange.R
@@ -23,118 +23,118 @@ library(dplyr, warn.conflicts = FALSE)
tbl <- slice_sample(example_data_for_sorting, prop = 1L)
test_that("arrange() on integer, double, and character columns", {
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
arrange(int, chr) %>%
collect(),
tbl
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
arrange(int, desc(dbl)) %>%
collect(),
tbl
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
arrange(int, desc(desc(dbl))) %>%
collect(),
tbl
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
arrange(int) %>%
arrange(desc(dbl)) %>%
collect(),
tbl
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
arrange(int + dbl, chr) %>%
collect(),
tbl
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(zzz = int + dbl, ) %>%
arrange(zzz, chr) %>%
collect(),
tbl
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(zzz = int + dbl) %>%
arrange(int + dbl, chr) %>%
collect(),
tbl
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(int + dbl) %>%
arrange(int + dbl, chr) %>%
collect(),
tbl
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
group_by(grp) %>%
arrange(int, dbl) %>%
collect(),
tbl
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
group_by(grp) %>%
arrange(int, dbl, .by_group = TRUE) %>%
collect(),
tbl
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
group_by(grp, grp2) %>%
arrange(int, dbl, .by_group = TRUE) %>%
collect(),
tbl %>%
mutate(grp2 = ifelse(is.na(lgl), 1L, as.integer(lgl)))
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
group_by(grp) %>%
arrange(.by_group = TRUE) %>%
pull(grp),
tbl
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
arrange() %>%
collect(),
tbl %>%
group_by(grp)
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
group_by(grp) %>%
arrange() %>%
collect(),
tbl
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
arrange() %>%
collect(),
tbl
)
test_sort_col <- "chr"
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
arrange(!!sym(test_sort_col)) %>%
collect(),
tbl %>%
select(chr, lgl)
)
test_sort_cols <- c("int", "dbl")
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
arrange(!!!syms(test_sort_cols)) %>%
collect(),
tbl
@@ -142,14 +142,14 @@ test_that("arrange() on integer, double, and character columns", {
})
test_that("arrange() on datetime columns", {
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
arrange(dttm, int) %>%
collect(),
tbl
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
arrange(dttm) %>%
collect(),
tbl %>%
@@ -158,8 +158,8 @@ test_that("arrange() on datetime columns", {
})
test_that("arrange() on logical columns", {
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
arrange(lgl, int) %>%
collect(),
tbl
diff --git a/r/tests/testthat/test-dplyr-collapse.R b/r/tests/testthat/test-dplyr-collapse.R
index 13d870f..c7281b6 100644
--- a/r/tests/testthat/test-dplyr-collapse.R
+++ b/r/tests/testthat/test-dplyr-collapse.R
@@ -94,8 +94,8 @@ test_that("collapse", {
expect_true(is_collapsed(collapse(q)))
expect_false(is_collapsed(collapse(q)$.data))
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
filter(dbl > 2, chr == "d" | chr == "f") %>%
select(chr, int, lgl) %>%
mutate(twice = int * 2L) %>%
@@ -106,8 +106,8 @@ test_that("collapse", {
tbl
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
filter(dbl > 2, chr == "d" | chr == "f") %>%
collapse() %>%
select(chr, int, lgl) %>%
@@ -118,8 +118,8 @@ test_that("collapse", {
tbl
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
filter(dbl > 2, chr == "d" | chr == "f") %>%
collapse() %>%
group_by(chr) %>%
diff --git a/r/tests/testthat/test-dplyr-count.R b/r/tests/testthat/test-dplyr-count.R
index 1a852e1..8af9b57 100644
--- a/r/tests/testthat/test-dplyr-count.R
+++ b/r/tests/testthat/test-dplyr-count.R
@@ -24,15 +24,15 @@ tbl <- example_data
tbl$some_grouping <- rep(c(1, 2), 5)
test_that("count/tally", {
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
count() %>%
collect(),
tbl
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
tally() %>%
collect(),
tbl
@@ -40,16 +40,16 @@ test_that("count/tally", {
})
test_that("count/tally with wt and grouped data", {
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
group_by(some_grouping) %>%
count(wt = int) %>%
collect(),
tbl
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
group_by(some_grouping) %>%
tally(wt = int) %>%
collect(),
@@ -58,16 +58,16 @@ test_that("count/tally with wt and grouped data", {
})
test_that("count/tally with sort", {
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
group_by(some_grouping) %>%
count(wt = int, sort = TRUE) %>%
collect(),
tbl
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
group_by(some_grouping) %>%
tally(wt = int, sort = TRUE) %>%
collect(),
@@ -76,15 +76,15 @@ test_that("count/tally with sort", {
})
test_that("count/tally with name arg", {
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
count(name = "new_col") %>%
collect(),
tbl
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
tally(name = "new_col") %>%
collect(),
tbl
diff --git a/r/tests/testthat/test-dplyr-distinct.R b/r/tests/testthat/test-dplyr-distinct.R
index 4e85a5a..3a44c73 100644
--- a/r/tests/testthat/test-dplyr-distinct.R
+++ b/r/tests/testthat/test-dplyr-distinct.R
@@ -23,8 +23,8 @@ tbl <- example_data
tbl$some_grouping <- rep(c(1, 2), 5)
test_that("distinct()", {
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
distinct(some_grouping, lgl) %>%
collect() %>%
arrange(some_grouping, lgl),
@@ -33,16 +33,16 @@ test_that("distinct()", {
})
test_that("distinct() works without any variables", {
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
distinct() %>%
arrange(int) %>%
collect(),
tbl
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
group_by(x = int + 1) %>%
distinct() %>%
# Even though we have group_by(x), all cols (including int) are kept
@@ -53,8 +53,8 @@ test_that("distinct() works without any variables", {
})
test_that("distinct() can retain groups", {
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
group_by(some_grouping, int) %>%
distinct(lgl) %>%
collect() %>%
@@ -63,8 +63,8 @@ test_that("distinct() can retain groups", {
)
# With expressions here
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
group_by(y = some_grouping, int) %>%
distinct(x = lgl) %>%
collect() %>%
@@ -74,16 +74,16 @@ test_that("distinct() can retain groups", {
})
test_that("distinct() can contain expressions", {
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
distinct(lgl, x = some_grouping + 1) %>%
collect() %>%
arrange(lgl, x),
tbl
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
group_by(lgl, int) %>%
distinct(x = some_grouping + 1) %>%
collect() %>%
@@ -94,8 +94,8 @@ test_that("distinct() can contain expressions", {
test_that("distinct() can return all columns", {
skip("ARROW-13993 - need this to return correct rows from other cols")
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
distinct(lgl, .keep_all = TRUE) %>%
collect() %>%
arrange(int),
diff --git a/r/tests/testthat/test-dplyr-filter.R b/r/tests/testthat/test-dplyr-filter.R
index 74b7cf3..72a6422 100644
--- a/r/tests/testthat/test-dplyr-filter.R
+++ b/r/tests/testthat/test-dplyr-filter.R
@@ -29,8 +29,8 @@ tbl$padded_strings <- stringr::str_pad(letters[1:10], width = 2 * (1:10) + 1, si
tbl$some_negative <- tbl$int * (-1)^(1:nrow(tbl)) # nolint
test_that("filter() on is.na()", {
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
filter(is.na(lgl)) %>%
select(chr, int, lgl) %>%
collect(),
@@ -39,8 +39,8 @@ test_that("filter() on is.na()", {
})
test_that("filter() with NAs in selection", {
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
filter(lgl) %>%
select(chr, int, lgl) %>%
collect(),
@@ -49,8 +49,8 @@ test_that("filter() with NAs in selection", {
})
test_that("Filter returning an empty Table should not segfault (ARROW-8354)", {
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
filter(false) %>%
select(chr, int, lgl) %>%
collect(),
@@ -60,8 +60,8 @@ test_that("Filter returning an empty Table should not segfault (ARROW-8354)", {
test_that("filtering with expression", {
char_sym <- "b"
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
filter(chr == char_sym) %>%
select(string = chr, int) %>%
collect(),
@@ -70,56 +70,56 @@ test_that("filtering with expression", {
})
test_that("filtering with arithmetic", {
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
filter(dbl + 1 > 3) %>%
select(string = chr, int, dbl) %>%
collect(),
tbl
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
filter(dbl / 2 > 3) %>%
select(string = chr, int, dbl) %>%
collect(),
tbl
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
filter(dbl / 2L > 3) %>%
select(string = chr, int, dbl) %>%
collect(),
tbl
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
filter(int / 2 > 3) %>%
select(string = chr, int, dbl) %>%
collect(),
tbl
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
filter(int / 2L > 3) %>%
select(string = chr, int, dbl) %>%
collect(),
tbl
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
filter(dbl %/% 2 > 3) %>%
select(string = chr, int, dbl) %>%
collect(),
tbl
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
filter(dbl^2 > 3) %>%
select(string = chr, int, dbl) %>%
collect(),
@@ -128,24 +128,24 @@ test_that("filtering with arithmetic", {
})
test_that("filtering with expression + autocasting", {
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
filter(dbl + 1 > 3L) %>% # test autocasting with comparison to 3L
select(string = chr, int, dbl) %>%
collect(),
tbl
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
filter(int + 1 > 3) %>%
select(string = chr, int, dbl) %>%
collect(),
tbl
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
filter(int^2 > 3) %>%
select(string = chr, int, dbl) %>%
collect(),
@@ -154,8 +154,8 @@ test_that("filtering with expression + autocasting", {
})
test_that("More complex select/filter", {
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
filter(dbl > 2, chr == "d" | chr == "f") %>%
select(chr, int, lgl) %>%
filter(int < 5) %>%
@@ -166,8 +166,8 @@ test_that("More complex select/filter", {
})
test_that("filter() with %in%", {
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
filter(dbl > 2, chr %in% c("d", "f")) %>%
collect(),
tbl
@@ -175,20 +175,20 @@ test_that("filter() with %in%", {
})
test_that("Negative scalar values", {
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
filter(some_negative > -2) %>%
collect(),
tbl
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
filter(some_negative %in% -1) %>%
collect(),
tbl
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
filter(int == -some_negative) %>%
collect(),
tbl
@@ -196,15 +196,15 @@ test_that("Negative scalar values", {
})
test_that("filter() with between()", {
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
filter(between(dbl, 1, 2)) %>%
collect(),
tbl
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
filter(between(dbl, 0.5, 2)) %>%
collect(),
tbl
@@ -243,15 +243,15 @@ test_that("filter() with between()", {
test_that("filter() with string ops", {
skip_if_not_available("utf8proc")
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
filter(dbl > 2, str_length(verses) > 25) %>%
collect(),
tbl
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
filter(dbl > 2, str_length(str_trim(padded_strings, "left")) > 5) %>%
collect(),
tbl
@@ -260,31 +260,31 @@ test_that("filter() with string ops", {
test_that("filter environment scope", {
# "object 'b_var' not found"
- expect_dplyr_error(input %>% filter(chr == b_var), tbl)
+ compare_dplyr_error(.input %>% filter(chr == b_var), tbl)
b_var <- "b"
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
filter(chr == b_var) %>%
collect(),
tbl
)
# Also for functions
# 'could not find function "isEqualTo"' because we haven't defined it yet
- expect_dplyr_error(input %>% filter(isEqualTo(int, 4)), tbl)
+ compare_dplyr_error(.input %>% filter(isEqualTo(int, 4)), tbl)
# This works but only because there are S3 methods for those operations
isEqualTo <- function(x, y) x == y & !is.na(x)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
select(-fct) %>% # factor levels aren't identical
filter(isEqualTo(int, 4)) %>%
collect(),
tbl
)
# Try something that needs to call another nse_func
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
select(-fct) %>%
filter(nchar(padded_strings) < 10) %>%
collect(),
@@ -292,8 +292,8 @@ test_that("filter environment scope", {
)
isShortString <- function(x) nchar(x) < 10
skip("TODO: 14071")
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
select(-fct) %>%
filter(isShortString(padded_strings)) %>%
collect(),
@@ -327,15 +327,15 @@ test_that("Filtering on a column that doesn't exist errors correctly", {
})
test_that("Filtering with unsupported functions", {
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
filter(int > 2, pnorm(dbl) > .99) %>%
collect(),
tbl,
warning = "Expression pnorm\\(dbl\\) > 0.99 not supported in Arrow; pulling data into R"
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
filter(
nchar(chr, type = "bytes", allowNA = TRUE) == 1, # bad, Arrow msg
int > 2, # good
@@ -361,7 +361,7 @@ test_that("Calling Arrow compute functions 'directly'", {
select(string = chr, int, dbl)
)
- expect_dplyr_equal(
+ compare_dplyr_binding(
tbl %>%
record_batch() %>%
filter(arrow_greater(arrow_add(dbl, 1), 3L)) %>%
@@ -374,16 +374,16 @@ test_that("Calling Arrow compute functions 'directly'", {
})
test_that("filter() with .data pronoun", {
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
filter(.data$dbl > 4) %>%
select(.data$chr, .data$int, .data$lgl) %>%
collect(),
tbl
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
filter(is.na(.data$lgl)) %>%
select(.data$chr, .data$int, .data$lgl) %>%
collect(),
@@ -392,8 +392,8 @@ test_that("filter() with .data pronoun", {
# and the .env pronoun too!
chr <- 4
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
filter(.data$dbl > .env$chr) %>%
select(.data$chr, .data$int, .data$lgl) %>%
collect(),
@@ -402,8 +402,8 @@ test_that("filter() with .data pronoun", {
skip("test now faulty - code no longer gives error & outputs a empty tibble")
# but there is an error if we don't override the masking with `.env`
- expect_dplyr_error(
- input %>%
+ compare_dplyr_error(
+ .input %>%
filter(.data$dbl > chr) %>%
select(.data$chr, .data$int, .data$lgl) %>%
collect(),
diff --git a/r/tests/testthat/test-dplyr-funcs-conditional.R b/r/tests/testthat/test-dplyr-funcs-conditional.R
index e597d36..4f27007 100644
--- a/r/tests/testthat/test-dplyr-funcs-conditional.R
+++ b/r/tests/testthat/test-dplyr-funcs-conditional.R
@@ -26,8 +26,8 @@ tbl$verses <- verses[[1]]
tbl$another_chr <- tail(letters, 10)
test_that("if_else and ifelse", {
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(
y = if_else(int > 5, 1, 0)
) %>%
@@ -35,8 +35,8 @@ test_that("if_else and ifelse", {
tbl
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(
y = if_else(int > 5, int, 0L)
) %>%
@@ -53,8 +53,8 @@ test_that("if_else and ifelse", {
"NotImplemented: Function if_else has no kernel matching input types"
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(
y = if_else(int > 5, 1, NA_real_)
) %>%
@@ -62,8 +62,8 @@ test_that("if_else and ifelse", {
tbl
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(
y = ifelse(int > 5, 1, 0)
) %>%
@@ -71,8 +71,8 @@ test_that("if_else and ifelse", {
tbl
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(
y = if_else(dbl > 5, TRUE, FALSE)
) %>%
@@ -80,8 +80,8 @@ test_that("if_else and ifelse", {
tbl
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(
y = if_else(chr %in% letters[1:3], 1L, 3L)
) %>%
@@ -89,8 +89,8 @@ test_that("if_else and ifelse", {
tbl
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(
y = if_else(int > 5, "one", "zero")
) %>%
@@ -98,8 +98,8 @@ test_that("if_else and ifelse", {
tbl
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(
y = if_else(int > 5, chr, another_chr)
) %>%
@@ -107,8 +107,8 @@ test_that("if_else and ifelse", {
tbl
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(
y = if_else(int > 5, "true", chr, missing = "MISSING")
) %>%
@@ -118,8 +118,8 @@ test_that("if_else and ifelse", {
# TODO: remove the mutate + warning after ARROW-13358 is merged and Arrow
# supports factors in if(_)else
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(
y = if_else(int > 5, fct, factor("a"))
) %>%
@@ -131,8 +131,8 @@ test_that("if_else and ifelse", {
)
# detecting NA and NaN works just fine
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(
y = if_else(is.na(dbl), chr, "false", missing = "MISSING")
) %>%
@@ -142,8 +142,8 @@ test_that("if_else and ifelse", {
# However, currently comparisons with NaNs return false and not NaNs or NAs
skip("ARROW-13364")
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(
y = if_else(dbl > 5, chr, another_chr, missing = "MISSING")
) %>%
@@ -152,8 +152,8 @@ test_that("if_else and ifelse", {
)
skip("TODO: could? should? we support the autocasting in ifelse")
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(y = ifelse(int > 5, 1, FALSE)) %>%
collect(),
tbl
@@ -161,26 +161,26 @@ test_that("if_else and ifelse", {
})
test_that("case_when()", {
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
transmute(cw = case_when(lgl ~ dbl, !false ~ dbl + dbl2)) %>%
collect(),
tbl
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(cw = case_when(int > 5 ~ 1, TRUE ~ 0)) %>%
collect(),
tbl
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
transmute(cw = case_when(chr %in% letters[1:3] ~ 1L) + 41L) %>%
collect(),
tbl
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
filter(case_when(
dbl + int - 1.1 == dbl2 ~ TRUE,
NA ~ NA,
@@ -256,21 +256,21 @@ test_that("case_when()", {
)
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
transmute(cw = case_when(lgl ~ "abc")) %>%
collect(),
tbl
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
transmute(cw = case_when(lgl ~ verses, !false ~ paste(chr, chr))) %>%
collect(),
tbl
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(
cw = case_when(!(!(!(lgl))) ~ factor(chr), TRUE ~ fct)
) %>%
@@ -288,8 +288,8 @@ test_that("coalesce()", {
y = c(NA_character_, "b", "c"),
z = c("a", "b", "c")
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(
cw = coalesce(w),
cz = coalesce(z),
@@ -308,8 +308,8 @@ test_that("coalesce()", {
y = c(NA_integer_, 2L, 3L),
z = 1:3
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(
cw = coalesce(w),
cz = coalesce(z),
@@ -328,8 +328,8 @@ test_that("coalesce()", {
y = c(NA_real_, 2.2, 3.3),
z = c(1.1, 2.2, 3.3)
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(
cw = coalesce(w),
cz = coalesce(z),
@@ -369,8 +369,8 @@ test_that("coalesce()", {
float32()
)
# with R literal values
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(
c1 = coalesce(4.4),
c2 = coalesce(NA_real_),
@@ -390,8 +390,8 @@ test_that("coalesce()", {
x = factor("a", levels = c("a", "z")),
y = factor("b", levels = c("a", "b", "c"))
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(c = coalesce(x, y)) %>%
collect() %>%
# This is a no-op on the Arrow side, but necessary to make the results equal
diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R
index b18898e..5cb515e 100644
--- a/r/tests/testthat/test-dplyr-funcs-datetime.R
+++ b/r/tests/testthat/test-dplyr-funcs-datetime.R
@@ -47,8 +47,8 @@ test_df <- tibble::tibble(
# These tests test component extraction from timestamp objects
test_that("extract year from timestamp", {
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(x = year(datetime)) %>%
collect(),
test_df
@@ -56,8 +56,8 @@ test_that("extract year from timestamp", {
})
test_that("extract isoyear from timestamp", {
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(x = isoyear(datetime)) %>%
collect(),
test_df
@@ -65,8 +65,8 @@ test_that("extract isoyear from timestamp", {
})
test_that("extract quarter from timestamp", {
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(x = quarter(datetime)) %>%
collect(),
test_df
@@ -74,8 +74,8 @@ test_that("extract quarter from timestamp", {
})
test_that("extract month from timestamp", {
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(x = month(datetime)) %>%
collect(),
test_df
@@ -83,8 +83,8 @@ test_that("extract month from timestamp", {
})
test_that("extract isoweek from timestamp", {
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(x = isoweek(datetime)) %>%
collect(),
test_df
@@ -92,8 +92,8 @@ test_that("extract isoweek from timestamp", {
})
test_that("extract epiweek from timestamp", {
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(x = epiweek(datetime)) %>%
collect(),
test_df
@@ -101,8 +101,8 @@ test_that("extract epiweek from timestamp", {
})
test_that("extract day from timestamp", {
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(x = day(datetime)) %>%
collect(),
test_df
@@ -110,22 +110,22 @@ test_that("extract day from timestamp", {
})
test_that("extract wday from timestamp", {
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(x = wday(datetime)) %>%
collect(),
test_df
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(x = wday(date, week_start = 3)) %>%
collect(),
test_df
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(x = wday(date, week_start = 1)) %>%
collect(),
test_df
@@ -133,16 +133,16 @@ test_that("extract wday from timestamp", {
skip_on_os("windows") # https://issues.apache.org/jira/browse/ARROW-13168
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(x = wday(date, label = TRUE)) %>%
mutate(x = as.character(x)) %>%
collect(),
test_df
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(x = wday(datetime, label = TRUE, abbr = TRUE)) %>%
mutate(x = as.character(x)) %>%
collect(),
@@ -151,8 +151,8 @@ test_that("extract wday from timestamp", {
})
test_that("extract yday from timestamp", {
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(x = yday(datetime)) %>%
collect(),
test_df
@@ -160,8 +160,8 @@ test_that("extract yday from timestamp", {
})
test_that("extract hour from timestamp", {
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(x = hour(datetime)) %>%
collect(),
test_df
@@ -169,8 +169,8 @@ test_that("extract hour from timestamp", {
})
test_that("extract minute from timestamp", {
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(x = minute(datetime)) %>%
collect(),
test_df
@@ -178,8 +178,8 @@ test_that("extract minute from timestamp", {
})
test_that("extract second from timestamp", {
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(x = second(datetime)) %>%
collect(),
test_df,
@@ -191,8 +191,8 @@ test_that("extract second from timestamp", {
# These tests test extraction of components from date32 objects
test_that("extract year from date", {
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(x = year(date)) %>%
collect(),
test_df
@@ -200,8 +200,8 @@ test_that("extract year from date", {
})
test_that("extract isoyear from date", {
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(x = isoyear(date)) %>%
collect(),
test_df
@@ -209,8 +209,8 @@ test_that("extract isoyear from date", {
})
test_that("extract quarter from date", {
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(x = quarter(date)) %>%
collect(),
test_df
@@ -218,8 +218,8 @@ test_that("extract quarter from date", {
})
test_that("extract month from date", {
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(x = month(date)) %>%
collect(),
test_df
@@ -227,8 +227,8 @@ test_that("extract month from date", {
})
test_that("extract isoweek from date", {
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(x = isoweek(date)) %>%
collect(),
test_df
@@ -236,8 +236,8 @@ test_that("extract isoweek from date", {
})
test_that("extract epiweek from date", {
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(x = epiweek(date)) %>%
collect(),
test_df
@@ -245,8 +245,8 @@ test_that("extract epiweek from date", {
})
test_that("extract day from date", {
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(x = day(date)) %>%
collect(),
test_df
@@ -254,22 +254,22 @@ test_that("extract day from date", {
})
test_that("extract wday from date", {
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(x = wday(date)) %>%
collect(),
test_df
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(x = wday(date, week_start = 3)) %>%
collect(),
test_df
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(x = wday(date, week_start = 1)) %>%
collect(),
test_df
@@ -277,16 +277,16 @@ test_that("extract wday from date", {
skip_on_os("windows") # https://issues.apache.org/jira/browse/ARROW-13168
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(x = wday(date, label = TRUE, abbr = TRUE)) %>%
mutate(x = as.character(x)) %>%
collect(),
test_df
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(x = wday(date, label = TRUE)) %>%
mutate(x = as.character(x)) %>%
collect(),
@@ -295,8 +295,8 @@ test_that("extract wday from date", {
})
test_that("extract yday from date", {
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(x = yday(date)) %>%
collect(),
test_df
diff --git a/r/tests/testthat/test-dplyr-funcs-math.R b/r/tests/testthat/test-dplyr-funcs-math.R
index 2f2de18..b666306 100644
--- a/r/tests/testthat/test-dplyr-funcs-math.R
+++ b/r/tests/testthat/test-dplyr-funcs-math.R
@@ -23,8 +23,8 @@ library(dplyr, warn.conflicts = FALSE)
test_that("abs()", {
df <- tibble(x = c(-127, -10, -1, -0, 0, 1, 10, 127, NA))
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
transmute(abs = abs(x)) %>%
collect(),
df
@@ -34,8 +34,8 @@ test_that("abs()", {
test_that("sign()", {
df <- tibble(x = c(-127, -10, -1, -0, 0, 1, 10, 127, NA))
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
transmute(sign = sign(x)) %>%
collect(),
df
@@ -45,8 +45,8 @@ test_that("sign()", {
test_that("ceiling(), floor(), trunc(), round()", {
df <- tibble(x = c(-1, -0.55, -0.5, -0.1, 0, 0.1, 0.5, 0.55, 1, NA, NaN))
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(
c = ceiling(x),
f = floor(x),
@@ -58,8 +58,8 @@ test_that("ceiling(), floor(), trunc(), round()", {
)
# with digits set to 1
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
filter(x %% 0.5 == 0) %>% # filter out indeterminate cases (see below)
mutate(r = round(x, 1)) %>%
collect(),
@@ -67,8 +67,8 @@ test_that("ceiling(), floor(), trunc(), round()", {
)
# with digits set to -1
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(
rd = round(floor(x * 111), -1), # double
y = ifelse(is.nan(x), NA_integer_, x),
@@ -100,8 +100,8 @@ test_that("ceiling(), floor(), trunc(), round()", {
skip_on_cran()
skip_on_os("windows")
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(r = round(x, 1)) %>%
collect(),
df
@@ -139,37 +139,37 @@ test_that("ceiling(), floor(), trunc(), round()", {
test_that("log functions", {
df <- tibble(x = c(1:10, NA, NA))
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(y = log(x)) %>%
collect(),
df
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(y = log(x, base = exp(1))) %>%
collect(),
df
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(y = log(x, base = 2)) %>%
collect(),
df
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(y = log(x, base = 10)) %>%
collect(),
df
)
# test log(, base = (length == 1))
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(y = log(x, base = 5)) %>%
collect(),
df
@@ -190,8 +190,8 @@ test_that("log functions", {
)
# test log(, base = Expression)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
# test cases where base = 1 below
filter(x != 1) %>%
mutate(
@@ -205,8 +205,8 @@ test_that("log functions", {
# log(1, base = 1) is NaN in both R and Arrow
# suppress the R warning because R warns but Arrow does not
suppressWarnings(
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(y = log(x, base = y)) %>%
collect(),
tibble(x = 1, y = 1)
@@ -214,36 +214,36 @@ test_that("log functions", {
)
# log(n != 1, base = 1) is Inf in R and Arrow
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(y = log(x, base = y)) %>%
collect(),
tibble(x = 10, y = 1)
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(y = logb(x)) %>%
collect(),
df
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(y = log1p(x)) %>%
collect(),
df
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(y = log2(x)) %>%
collect(),
df
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(y = log10(x)) %>%
collect(),
df
@@ -253,36 +253,36 @@ test_that("log functions", {
test_that("trig functions", {
df <- tibble(x = c(seq(from = 0, to = 1, by = 0.1), NA))
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(y = sin(x)) %>%
collect(),
df
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(y = cos(x)) %>%
collect(),
df
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(y = tan(x)) %>%
collect(),
df
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(y = asin(x)) %>%
collect(),
df
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(y = acos(x)) %>%
collect(),
df
@@ -292,15 +292,15 @@ test_that("trig functions", {
test_that("arith functions ", {
df <- tibble(x = c(1:5, NA))
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
transmute(
int_div = x %/% 2,
addition = x + 1,
multiplication = x * 3,
subtraction = x - 5,
division = x / 2,
- power = x ^ 3,
+ power = x^3,
modulo = x %% 3
) %>%
collect(),
diff --git a/r/tests/testthat/test-dplyr-funcs-string.R b/r/tests/testthat/test-dplyr-funcs-string.R
index 333735b..5e092f4 100644
--- a/r/tests/testthat/test-dplyr-funcs-string.R
+++ b/r/tests/testthat/test-dplyr-funcs-string.R
@@ -35,84 +35,84 @@ test_that("paste, paste0, and str_c", {
y <- Expression$field_ref("y")
# no NAs in data
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
transmute(paste(v, w)) %>%
collect(),
df
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
transmute(paste(v, w, sep = "-")) %>%
collect(),
df
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
transmute(paste0(v, w)) %>%
collect(),
df
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
transmute(str_c(v, w)) %>%
collect(),
df
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
transmute(str_c(v, w, sep = "+")) %>%
collect(),
df
)
# NAs in data
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
transmute(paste(x, y)) %>%
collect(),
df
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
transmute(paste(x, y, sep = "-")) %>%
collect(),
df
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
transmute(str_c(x, y)) %>%
collect(),
df
)
# non-character column in dots
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
transmute(paste0(x, y, z)) %>%
collect(),
df
)
# literal string in dots
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
transmute(paste(x, "foo", y)) %>%
collect(),
df
)
# literal NA in dots
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
transmute(paste(x, NA, y)) %>%
collect(),
df
)
# expressions in dots
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
transmute(paste0(x, toupper(y), as.character(z))) %>%
collect(),
df
@@ -125,16 +125,16 @@ test_that("paste, paste0, and str_c", {
"Invalid separator"
)
# emits null in str_c() (consistent with stringr::str_c())
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
transmute(str_c(x, y, sep = NA_character_)) %>%
collect(),
df
)
# sep passed in dots to paste0 (which doesn't take a sep argument)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
transmute(paste0(x, y, sep = "-")) %>%
collect(),
df
@@ -181,8 +181,8 @@ test_that("paste, paste0, and str_c", {
test_that("grepl with ignore.case = FALSE and fixed = TRUE", {
df <- tibble(x = c("Foo", "bar"))
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
filter(grepl("o", x, fixed = TRUE)) %>%
collect(),
df
@@ -191,14 +191,14 @@ test_that("grepl with ignore.case = FALSE and fixed = TRUE", {
test_that("sub and gsub with ignore.case = FALSE and fixed = TRUE", {
df <- tibble(x = c("Foo", "bar"))
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
transmute(x = sub("Foo", "baz", x, fixed = TRUE)) %>%
collect(),
df
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
transmute(x = gsub("o", "u", x, fixed = TRUE)) %>%
collect(),
df
@@ -212,20 +212,20 @@ test_that("grepl", {
df <- tibble(x = c("Foo", "bar"))
for (fixed in c(TRUE, FALSE)) {
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
filter(grepl("Foo", x, fixed = fixed)) %>%
collect(),
df
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
transmute(x = grepl("^B.+", x, ignore.case = FALSE, fixed = fixed)) %>%
collect(),
df
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
filter(grepl("Foo", x, ignore.case = FALSE, fixed = fixed)) %>%
collect(),
df
@@ -237,7 +237,7 @@ test_that("grepl with ignore.case = TRUE and fixed = TRUE", {
df <- tibble(x = c("Foo", "bar"))
# base::grepl() ignores ignore.case = TRUE with a warning when fixed = TRUE,
- # so we can't use expect_dplyr_equal() for these tests
+ # so we can't use compare_dplyr_binding() for these tests
expect_equal(
df %>%
Table$create() %>%
@@ -257,44 +257,44 @@ test_that("grepl with ignore.case = TRUE and fixed = TRUE", {
test_that("str_detect", {
df <- tibble(x = c("Foo", "bar"))
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
filter(str_detect(x, regex("^F"))) %>%
collect(),
df
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
transmute(x = str_detect(x, regex("^f[A-Z]{2}", ignore_case = TRUE))) %>%
collect(),
df
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
transmute(x = str_detect(x, regex("^f[A-Z]{2}", ignore_case = TRUE), negate = TRUE)) %>%
collect(),
df
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
filter(str_detect(x, fixed("o"))) %>%
collect(),
df
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
filter(str_detect(x, fixed("O"))) %>%
collect(),
df
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
filter(str_detect(x, fixed("O", ignore_case = TRUE))) %>%
collect(),
df
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
filter(str_detect(x, fixed("O", ignore_case = TRUE), negate = TRUE)) %>%
collect(),
df
@@ -305,20 +305,20 @@ test_that("sub and gsub", {
df <- tibble(x = c("Foo", "bar"))
for (fixed in c(TRUE, FALSE)) {
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
transmute(x = sub("Foo", "baz", x, fixed = fixed)) %>%
collect(),
df
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
transmute(x = sub("^B.+", "baz", x, ignore.case = FALSE, fixed = fixed)) %>%
collect(),
df
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
transmute(x = sub("Foo", "baz", x, ignore.case = FALSE, fixed = fixed)) %>%
collect(),
df
@@ -330,7 +330,7 @@ test_that("sub and gsub with ignore.case = TRUE and fixed = TRUE", {
df <- tibble(x = c("Foo", "bar"))
# base::sub() and base::gsub() ignore ignore.case = TRUE with a warning when
- # fixed = TRUE, so we can't use expect_dplyr_equal() for these tests
+ # fixed = TRUE, so we can't use compare_dplyr_binding() for these tests
expect_equal(
df %>%
Table$create() %>%
@@ -357,47 +357,47 @@ test_that("sub and gsub with ignore.case = TRUE and fixed = TRUE", {
test_that("str_replace and str_replace_all", {
df <- tibble(x = c("Foo", "bar"))
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
transmute(x = str_replace_all(x, "^F", "baz")) %>%
collect(),
df
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
transmute(x = str_replace_all(x, regex("^F"), "baz")) %>%
collect(),
df
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(x = str_replace(x, "^F[a-z]{2}", "baz")) %>%
collect(),
df
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
transmute(x = str_replace(x, regex("^f[A-Z]{2}", ignore_case = TRUE), "baz")) %>%
collect(),
df
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
transmute(x = str_replace_all(x, fixed("o"), "u")) %>%
collect(),
df
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
transmute(x = str_replace(x, fixed("O"), "u")) %>%
collect(),
df
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
transmute(x = str_replace(x, fixed("O", ignore_case = TRUE), "u")) %>%
collect(),
df
@@ -407,8 +407,8 @@ test_that("str_replace and str_replace_all", {
test_that("strsplit and str_split", {
df <- tibble(x = c("Foo and bar", "baz and qux and quux"))
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(x = strsplit(x, "and")) %>%
collect(),
df,
@@ -416,50 +416,50 @@ test_that("strsplit and str_split", {
# has type information in it, but it's just a bare list from R/dplyr.
ignore_attr = TRUE
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(x = strsplit(x, "and.*", fixed = TRUE)) %>%
collect(),
df,
ignore_attr = TRUE
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(x = strsplit(x, " +and +")) %>%
collect(),
df,
ignore_attr = TRUE
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(x = str_split(x, "and")) %>%
collect(),
df,
ignore_attr = TRUE
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(x = str_split(x, "and", n = 2)) %>%
collect(),
df,
ignore_attr = TRUE
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(x = str_split(x, fixed("and"), n = 2)) %>%
collect(),
df,
ignore_attr = TRUE
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(x = str_split(x, regex("and"), n = 2)) %>%
collect(),
df,
ignore_attr = TRUE
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(x = str_split(x, "Foo|bar", n = 2)) %>%
collect(),
df,
@@ -469,8 +469,8 @@ test_that("strsplit and str_split", {
test_that("str_to_lower, str_to_upper, and str_to_title", {
df <- tibble(x = c("foo1", " \tB a R\n", "!apACHe aRroW!"))
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
transmute(
x_lower = str_to_lower(x),
x_upper = str_to_upper(x),
@@ -596,8 +596,8 @@ test_that("backreferences in pattern in string detection", {
skip("RE2 does not support backreferences in pattern (https://github.com/google/re2/issues/101)")
df <- tibble(x = c("Foo", "bar"))
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
filter(str_detect(x, regex("F([aeiou])\\1"))) %>%
collect(),
df
@@ -607,8 +607,8 @@ test_that("backreferences in pattern in string detection", {
test_that("backreferences (substitutions) in string replacement", {
df <- tibble(x = c("Foo", "bar"))
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
transmute(desc = sub(
"(?:https?|ftp)://([^/\r\n]+)(/[^\r\n]*)?",
"path `\\2` on server `\\1`",
@@ -617,20 +617,20 @@ test_that("backreferences (substitutions) in string replacement", {
collect(),
tibble(url = "https://arrow.apache.org/docs/r/")
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
transmute(x = str_replace(x, "^(\\w)o(.*)", "\\1\\2p")) %>%
collect(),
df
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
transmute(x = str_replace(x, regex("^(\\w)o(.*)", ignore_case = TRUE), "\\1\\2p")) %>%
collect(),
df
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
transmute(x = str_replace(x, regex("^(\\w)o(.*)", ignore_case = TRUE), "\\1\\2p")) %>%
collect(),
df
@@ -640,7 +640,7 @@ test_that("backreferences (substitutions) in string replacement", {
test_that("edge cases in string detection and replacement", {
# in case-insensitive fixed match/replace, test that "\\E" in the search
# string and backslashes in the replacement string are interpreted literally.
- # this test does not use expect_dplyr_equal() because base::sub() and
+ # this test does not use compare_dplyr_binding() because base::sub() and
# base::grepl() do not support ignore.case = TRUE when fixed = TRUE.
expect_equal(
tibble(x = c("\\Q\\e\\D")) %>%
@@ -659,14 +659,14 @@ test_that("edge cases in string detection and replacement", {
# test that a user's "(?i)" prefix does not break the "(?i)" prefix that's
# added in case-insensitive regex match/replace
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
filter(grepl("(?i)^[abc]{3}$", x, ignore.case = TRUE, fixed = FALSE)) %>%
collect(),
tibble(x = c("ABC"))
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
transmute(x = sub("(?i)^[abc]{3}$", "123", x, ignore.case = TRUE, fixed = FALSE)) %>%
collect(),
tibble(x = c("ABC"))
@@ -762,38 +762,39 @@ test_that("strftime", {
formats <- "%a %A %w %d %b %B %m %y %Y %H %I %p %M %z %Z %j %U %W %x %X %% %G %V %u"
formats_date <- "%a %A %w %d %b %B %m %y %Y %H %I %p %M %j %U %W %x %X %% %G %V %u"
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(x = strftime(datetime, format = formats)) %>%
collect(),
times
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(x = strftime(date, format = formats_date)) %>%
collect(),
times
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(x = strftime(datetime, format = formats, tz = "Pacific/Marquesas")) %>%
collect(),
times
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(x = strftime(datetime, format = formats, tz = "EST", usetz = TRUE)) %>%
collect(),
times
)
withr::with_timezone(
- "Pacific/Marquesas", {
- expect_dplyr_equal(
- input %>%
+ "Pacific/Marquesas",
+ {
+ compare_dplyr_binding(
+ .input %>%
mutate(
x = strftime(datetime, format = formats, tz = "EST"),
x_date = strftime(date, format = formats_date, tz = "EST")
@@ -802,8 +803,8 @@ test_that("strftime", {
times
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(
x = strftime(datetime, format = formats),
x_date = strftime(date, format = formats_date)
@@ -828,8 +829,8 @@ test_that("strftime", {
# Timestamps with second precision are represented as integers while
# milliseconds, microsecond and nanoseconds are represented as fixed floating
# point numbers with 3, 6 and 9 decimal places respectively.
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(x = strftime(datetime, format = "%S")) %>%
transmute(as.double(substr(x, 1, 2))) %>%
collect(),
@@ -842,8 +843,8 @@ test_that("format_ISO8601", {
skip_on_os("windows") # https://issues.apache.org/jira/browse/ARROW-13168
times <- tibble(x = c(lubridate::ymd_hms("2018-10-07 19:04:05", tz = "Etc/GMT+6"), NA))
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(x = format_ISO8601(x, precision = "ymd", usetz = FALSE)) %>%
collect(),
times
@@ -870,16 +871,16 @@ test_that("format_ISO8601", {
"Timezone not present, cannot convert to string with timezone: %Y-%m-%dT%H:%M:%S%z"
)
} else {
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(x = format_ISO8601(x, precision = "ymd", usetz = TRUE)) %>%
collect(),
times
)
# See comment regarding %S flag in strftime tests
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(x = format_ISO8601(x, precision = "ymdhms", usetz = TRUE)) %>%
mutate(x = gsub("\\.0*", "", x)) %>%
collect(),
@@ -889,8 +890,8 @@ test_that("format_ISO8601", {
# See comment regarding %S flag in strftime tests
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(x = format_ISO8601(x, precision = "ymdhms", usetz = FALSE)) %>%
mutate(x = gsub("\\.0*", "", x)) %>%
collect(),
@@ -945,15 +946,15 @@ test_that("stri_reverse and arrow_ascii_reverse functions", {
df_utf8 <- tibble(x = c("Foo\u00A0\u0061nd\u00A0bar", "\u0062az\u00A0and\u00A0qux\u3000and\u00A0quux"))
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(x = stri_reverse(x)) %>%
collect(),
df_utf8
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(x = stri_reverse(x)) %>%
collect(),
df_ascii
@@ -980,7 +981,7 @@ test_that("str_like", {
df <- tibble(x = c("Foo and bar", "baz and qux and quux"))
# TODO: After new version of stringr with str_like has been released, update all
- # these tests to use expect_dplyr_equal
+ # these tests to use compare_dplyr_binding
# No match - entire string
expect_equal(
@@ -1029,8 +1030,8 @@ test_that("str_like", {
# This will give an error until a new version of stringr with str_like has been released
skip_if_not(packageVersion("stringr") > "1.4.0")
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(x = str_like(x, "%baz%")) %>%
collect(),
df
@@ -1040,36 +1041,36 @@ test_that("str_like", {
test_that("str_pad", {
df <- tibble(x = c("Foo and bar", "baz and qux and quux"))
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(x = str_pad(x, width = 31)) %>%
collect(),
df
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(x = str_pad(x, width = 30, side = "right")) %>%
collect(),
df
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(x = str_pad(x, width = 31, side = "left", pad = "+")) %>%
collect(),
df
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(x = str_pad(x, width = 10, side = "left", pad = "+")) %>%
collect(),
df
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(x = str_pad(x, width = 31, side = "both")) %>%
collect(),
df
@@ -1079,64 +1080,64 @@ test_that("str_pad", {
test_that("substr", {
df <- tibble(x = "Apache Arrow")
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(y = substr(x, 1, 6)) %>%
collect(),
df
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(y = substr(x, 0, 6)) %>%
collect(),
df
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(y = substr(x, -1, 6)) %>%
collect(),
df
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(y = substr(x, 6, 1)) %>%
collect(),
df
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(y = substr(x, -1, -2)) %>%
collect(),
df
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(y = substr(x, 9, 6)) %>%
collect(),
df
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(y = substr(x, 1, 6)) %>%
collect(),
df
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(y = substr(x, 8, 12)) %>%
collect(),
df
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(y = substr(x, -5, -1)) %>%
collect(),
df
@@ -1157,8 +1158,8 @@ test_that("substring", {
# nse_funcs$substring just calls nse_funcs$substr, tested extensively above
df <- tibble(x = "Apache Arrow")
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(y = substring(x, 1, 6)) %>%
collect(),
df
@@ -1168,71 +1169,71 @@ test_that("substring", {
test_that("str_sub", {
df <- tibble(x = "Apache Arrow")
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(y = str_sub(x, 1, 6)) %>%
collect(),
df
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(y = str_sub(x, 0, 6)) %>%
collect(),
df
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(y = str_sub(x, -1, 6)) %>%
collect(),
df
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(y = str_sub(x, 6, 1)) %>%
collect(),
df
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(y = str_sub(x, -1, -2)) %>%
collect(),
df
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(y = str_sub(x, -1, 3)) %>%
collect(),
df
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(y = str_sub(x, 9, 6)) %>%
collect(),
df
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(y = str_sub(x, 1, 6)) %>%
collect(),
df
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(y = str_sub(x, 8, 12)) %>%
collect(),
df
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(y = str_sub(x, -5, -1)) %>%
collect(),
df
@@ -1252,85 +1253,85 @@ test_that("str_sub", {
test_that("str_starts, str_ends, startsWith, endsWith", {
df <- tibble(x = c("Foo", "bar", "baz", "qux"))
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
filter(str_starts(x, "b.*")) %>%
collect(),
df
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
filter(str_starts(x, "b.*", negate = TRUE)) %>%
collect(),
df
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
filter(str_starts(x, fixed("b.*"))) %>%
collect(),
df
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
filter(str_starts(x, fixed("b"))) %>%
collect(),
df
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
filter(str_ends(x, "r")) %>%
collect(),
df
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
filter(str_ends(x, "r", negate = TRUE)) %>%
collect(),
df
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
filter(str_ends(x, fixed("r$"))) %>%
collect(),
df
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
filter(str_ends(x, fixed("r"))) %>%
collect(),
df
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
filter(startsWith(x, "b")) %>%
collect(),
df
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
filter(endsWith(x, "r")) %>%
collect(),
df
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
filter(startsWith(x, "b.*")) %>%
collect(),
df
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
filter(endsWith(x, "r$")) %>%
collect(),
df
@@ -1343,22 +1344,22 @@ test_that("str_count", {
dots = c("a.", "...", ".a.a", "a..a.", "ab...", "dse....", ".f..d..")
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(a_count = str_count(cities, pattern = "a")) %>%
collect(),
df
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(p_count = str_count(cities, pattern = "d")) %>%
collect(),
df
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(p_count = str_count(cities,
pattern = regex("d", ignore_case = TRUE)
)) %>%
@@ -1366,31 +1367,31 @@ test_that("str_count", {
df
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(e_count = str_count(cities, pattern = "u")) %>%
collect(),
df
)
# nse_funcs$str_count() is not vectorised over pattern
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(let_count = str_count(cities, pattern = c("a", "b", "e", "g", "p", "n", "s"))) %>%
collect(),
df,
warning = TRUE
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(dots_count = str_count(dots, ".")) %>%
collect(),
df
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(dots_count = str_count(dots, fixed("."))) %>%
collect(),
df
diff --git a/r/tests/testthat/test-dplyr-funcs-type.R b/r/tests/testthat/test-dplyr-funcs-type.R
index 7f7396d..859dc14 100644
--- a/r/tests/testthat/test-dplyr-funcs-type.R
+++ b/r/tests/testthat/test-dplyr-funcs-type.R
@@ -88,8 +88,8 @@ test_that("explicit type conversions with cast()", {
test_that("explicit type conversions with as.*()", {
library(bit64)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
transmute(
int2chr = as.character(int),
int2dbl = as.double(int),
@@ -103,8 +103,8 @@ test_that("explicit type conversions with as.*()", {
collect(),
tbl
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
transmute(
chr2chr = as.character(chr),
chr2dbl = as.double(chr),
@@ -114,8 +114,8 @@ test_that("explicit type conversions with as.*()", {
collect(),
tibble(chr = c("1", "2", "3"))
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
transmute(
chr2i64 = as.integer64(chr),
dbl2i64 = as.integer64(dbl),
@@ -124,8 +124,8 @@ test_that("explicit type conversions with as.*()", {
collect(),
tibble(chr = "10000000000", dbl = 10000000000, i64 = as.integer64(1e10))
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
transmute(
chr2lgl = as.logical(chr),
dbl2lgl = as.logical(dbl),
@@ -138,8 +138,8 @@ test_that("explicit type conversions with as.*()", {
int = c(1L, 0L, -99L, 0L)
)
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
transmute(
dbl2chr = as.character(dbl),
dbl2dbl = as.double(dbl),
@@ -170,8 +170,8 @@ test_that("is.finite(), is.infinite(), is.nan()", {
-4.94065645841246544e-324, 1.79769313486231570e+308, 0,
NA_real_, NaN, Inf, -Inf
))
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
transmute(
is_fin = is.finite(x),
is_inf = is.infinite(x)
@@ -180,8 +180,8 @@ test_that("is.finite(), is.infinite(), is.nan()", {
df
)
# is.nan() evaluates to FALSE on NA_real_ (ARROW-12850)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
transmute(
is_nan = is.nan(x)
) %>%
@@ -192,8 +192,8 @@ test_that("is.finite(), is.infinite(), is.nan()", {
test_that("is.na() evaluates to TRUE on NaN (ARROW-12055)", {
df <- tibble(x = c(1.1, 2.2, NA_real_, 4.4, NaN, 6.6, 7.7))
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
transmute(
is_na = is.na(x)
) %>%
@@ -306,8 +306,8 @@ test_that("type checks with is() giving Arrow types", {
test_that("type checks with is() giving R types", {
library(bit64)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
transmute(
chr_is_chr = is(chr, "character"),
chr_is_fct = is(chr, "factor"),
@@ -348,8 +348,8 @@ test_that("type checks with is() giving R types", {
collect(),
tbl
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
transmute(
i64_is_chr = is(i64, "character"),
i64_is_fct = is(i64, "factor"),
@@ -378,8 +378,8 @@ test_that("type checks with is() giving R types", {
test_that("type checks with is.*()", {
library(bit64)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
transmute(
chr_is_chr = is.character(chr),
chr_is_dbl = is.double(chr),
@@ -425,8 +425,8 @@ test_that("type checks with is.*()", {
collect(),
tbl
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
transmute(
i64_is_chr = is.character(i64),
# TODO: investigate why this is not matching when testthat runs it
@@ -457,8 +457,8 @@ test_that("type checks with is.*()", {
test_that("type checks with is_*()", {
library(rlang, warn.conflicts = FALSE)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
transmute(
chr_is_chr = is_character(chr),
chr_is_dbl = is_double(chr),
@@ -487,8 +487,8 @@ test_that("type checks with is_*()", {
})
test_that("type checks on expressions", {
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
transmute(
a = is.character(as.character(int)),
b = is.integer(as.character(int)),
@@ -503,8 +503,8 @@ test_that("type checks on expressions", {
# the code in the expectation below depends on RE2
skip_if_not_available("re2")
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
transmute(
a = is.logical(grepl("[def]", chr))
) %>%
@@ -514,8 +514,8 @@ test_that("type checks on expressions", {
})
test_that("type checks on R scalar literals", {
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
transmute(
chr_is_chr = is.character("foo"),
int_is_chr = is.character(42L),
@@ -543,16 +543,16 @@ test_that("as.factor()/dictionary_encode()", {
df1 <- tibble(x = c("C", "D", "B", NA, "D", "B", "S", "A", "B", "Z", "B"))
df2 <- tibble(x = c(5, 5, 5, NA, 2, 3, 6, 8))
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
transmute(x = as.factor(x)) %>%
collect(),
df1
)
expect_warning(
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
transmute(x = as.factor(x)) %>%
collect(),
df2
@@ -593,8 +593,8 @@ test_that("bad explicit type conversions with as.*()", {
# Arrow returns lowercase "true", "false" (instead of "TRUE", "FALSE" like R)
expect_error(
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
transmute(lgl2chr = as.character(lgl)) %>%
collect(),
tibble(lgl = c(TRUE, FALSE, NA))
@@ -605,8 +605,8 @@ test_that("bad explicit type conversions with as.*()", {
# a warning like R does)
expect_error(
expect_warning(
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
transmute(chr2num = as.numeric(chr)) %>%
collect(),
tibble(chr = c("l.O", "S.S", ""))
@@ -617,8 +617,8 @@ test_that("bad explicit type conversions with as.*()", {
# Arrow fails to parse these strings as Booleans (instead of returning NAs
# like R does)
expect_error(
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
transmute(chr2lgl = as.logical(chr)) %>%
collect(),
tibble(chr = c("TRU", "FAX", ""))
diff --git a/r/tests/testthat/test-dplyr-group-by.R b/r/tests/testthat/test-dplyr-group-by.R
index 9f1d385..7cfcfb5 100644
--- a/r/tests/testthat/test-dplyr-group-by.R
+++ b/r/tests/testthat/test-dplyr-group-by.R
@@ -23,8 +23,8 @@ library(stringr)
tbl <- example_data
test_that("group_by groupings are recorded", {
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
group_by(chr) %>%
select(int, chr) %>%
filter(int > 5) %>%
@@ -34,20 +34,20 @@ test_that("group_by groupings are recorded", {
})
test_that("group_by supports creating/renaming", {
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
group_by(chr, numbers = int) %>%
collect(),
tbl
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
group_by(chr, numbers = int * 4) %>%
collect(),
tbl
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
group_by(int > 4, lgl, foo = int > 5) %>%
collect(),
tbl
@@ -55,8 +55,8 @@ test_that("group_by supports creating/renaming", {
})
test_that("ungroup", {
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
group_by(chr) %>%
select(int, chr) %>%
ungroup() %>%
@@ -66,11 +66,11 @@ test_that("ungroup", {
)
# to confirm that the above expectation is actually testing what we think it's
- # testing, verify that expect_dplyr_equal() distinguishes between grouped and
+ # testing, verify that compare_dplyr_binding() distinguishes between grouped and
# ungrouped tibbles
expect_error(
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
group_by(chr) %>%
select(int, chr) %>%
(function(x) if (inherits(x, "tbl_df")) ungroup(x) else x) %>%
@@ -82,8 +82,8 @@ test_that("ungroup", {
})
test_that("group_by then rename", {
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
group_by(chr) %>%
select(string = chr, int) %>%
collect(),
@@ -93,14 +93,14 @@ test_that("group_by then rename", {
test_that("group_by with .drop", {
test_groups <- c("starting_a_fight", "consoling_a_child", "petting_a_dog")
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
group_by(!!!syms(test_groups), .drop = TRUE) %>%
collect(),
example_with_logical_factors
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
group_by(!!!syms(test_groups), .drop = FALSE) %>%
collect(),
example_with_logical_factors
@@ -131,25 +131,25 @@ test_that("group_by with .drop", {
group_by_drop_default(),
TRUE
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
group_by(.drop = FALSE) %>% # no group by vars
group_by_drop_default(),
example_with_logical_factors
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
group_by_drop_default(),
example_with_logical_factors
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
group_by(!!!syms(test_groups)) %>%
group_by_drop_default(),
example_with_logical_factors
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
group_by(!!!syms(test_groups), .drop = FALSE) %>%
ungroup() %>%
group_by_drop_default(),
diff --git a/r/tests/testthat/test-dplyr-join.R b/r/tests/testthat/test-dplyr-join.R
index 03c1705..3ff9ad8 100644
--- a/r/tests/testthat/test-dplyr-join.R
+++ b/r/tests/testthat/test-dplyr-join.R
@@ -40,8 +40,8 @@ to_join_tab <- Table$create(to_join)
test_that("left_join", {
expect_message(
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
left_join(to_join) %>%
collect(),
left
@@ -51,14 +51,14 @@ test_that("left_join", {
})
test_that("left_join `by` args", {
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
left_join(to_join, by = "some_grouping") %>%
collect(),
left
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
left_join(
to_join %>%
rename(the_grouping = some_grouping),
@@ -70,8 +70,8 @@ test_that("left_join `by` args", {
# TODO: allow renaming columns on the right side as well
skip("ARROW-14184")
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
rename(the_grouping = some_grouping) %>%
left_join(
to_join,
@@ -108,8 +108,8 @@ test_that("Error handling", {
# TODO: casting: int and float columns?
test_that("right_join", {
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
right_join(to_join, by = "some_grouping") %>%
collect(),
left
@@ -117,8 +117,8 @@ test_that("right_join", {
})
test_that("inner_join", {
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
inner_join(to_join, by = "some_grouping") %>%
collect(),
left
@@ -126,8 +126,8 @@ test_that("inner_join", {
})
test_that("full_join", {
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
full_join(to_join, by = "some_grouping") %>%
collect(),
left
@@ -135,8 +135,8 @@ test_that("full_join", {
})
test_that("semi_join", {
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
semi_join(to_join, by = "some_grouping") %>%
collect(),
left
@@ -144,8 +144,8 @@ test_that("semi_join", {
})
test_that("anti_join", {
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
anti_join(to_join, by = "some_grouping") %>%
collect(),
left
diff --git a/r/tests/testthat/test-dplyr-mutate.R b/r/tests/testthat/test-dplyr-mutate.R
index 419c583..886ec9e 100644
--- a/r/tests/testthat/test-dplyr-mutate.R
+++ b/r/tests/testthat/test-dplyr-mutate.R
@@ -35,8 +35,8 @@ test_that("mutate() is lazy", {
})
test_that("basic mutate", {
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
select(int, chr) %>%
filter(int > 5) %>%
mutate(int = int + 6L) %>%
@@ -46,8 +46,8 @@ test_that("basic mutate", {
})
test_that("mutate() with NULL inputs", {
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(int = NULL) %>%
collect(),
tbl
@@ -55,8 +55,8 @@ test_that("mutate() with NULL inputs", {
})
test_that("empty mutate()", {
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate() %>%
collect(),
tbl
@@ -64,8 +64,8 @@ test_that("empty mutate()", {
})
test_that("transmute", {
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
select(int, chr) %>%
filter(int > 5) %>%
transmute(int = int + 6L) %>%
@@ -75,8 +75,8 @@ test_that("transmute", {
})
test_that("transmute() with NULL inputs", {
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
transmute(int = NULL) %>%
collect(),
tbl
@@ -84,8 +84,8 @@ test_that("transmute() with NULL inputs", {
})
test_that("empty transmute()", {
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
transmute() %>%
collect(),
tbl
@@ -128,8 +128,8 @@ test_that("transmute() defuses dots arguments (ARROW-13262)", {
})
test_that("mutate and refer to previous mutants", {
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
select(int, verses) %>%
mutate(
line_lengths = nchar(verses),
@@ -142,8 +142,8 @@ test_that("mutate and refer to previous mutants", {
})
test_that("nchar() arguments", {
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
select(int, verses) %>%
mutate(
line_lengths = nchar(verses, type = "bytes"),
@@ -154,8 +154,8 @@ test_that("nchar() arguments", {
tbl
)
# This tests the whole abandon_ship() machinery
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
select(int, verses) %>%
mutate(
line_lengths = nchar(verses, type = "bytes", allowNA = TRUE),
@@ -172,8 +172,8 @@ test_that("nchar() arguments", {
})
test_that("mutate with .data pronoun", {
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
select(int, verses) %>%
mutate(
line_lengths = str_length(verses),
@@ -186,8 +186,8 @@ test_that("mutate with .data pronoun", {
})
test_that("mutate with unnamed expressions", {
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
select(int, padded_strings) %>%
mutate(
int, # bare column name
@@ -200,8 +200,8 @@ test_that("mutate with unnamed expressions", {
})
test_that("mutate with reassigning same name", {
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
transmute(
new = lgl,
new = chr
@@ -212,8 +212,8 @@ test_that("mutate with reassigning same name", {
})
test_that("mutate with single value for recycling", {
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
select(int, padded_strings) %>%
mutate(
dr_bronner = 1 # ALL ONE!
@@ -225,8 +225,8 @@ test_that("mutate with single value for recycling", {
test_that("dplyr::mutate's examples", {
# Newly created variables are available immediately
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
select(name, mass) %>%
mutate(
mass2 = mass * 2,
@@ -238,8 +238,8 @@ test_that("dplyr::mutate's examples", {
# As well as adding new variables, you can use mutate() to
# remove variables and modify existing variables.
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
select(name, height, mass, homeworld) %>%
mutate(
mass = NULL,
@@ -253,8 +253,8 @@ test_that("dplyr::mutate's examples", {
# but warn that they're pulling data into R to do so
# across and autosplicing: ARROW-11699
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
select(name, homeworld, species) %>%
mutate(across(!name, as.factor)) %>%
collect(),
@@ -263,8 +263,8 @@ test_that("dplyr::mutate's examples", {
)
# group_by then mutate
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
select(name, mass, homeworld) %>%
group_by(homeworld) %>%
mutate(rank = min_rank(desc(mass))) %>%
@@ -275,8 +275,8 @@ test_that("dplyr::mutate's examples", {
# `.before` and `.after` experimental args: ARROW-11701
df <- tibble(x = 1, y = 2)
- expect_dplyr_equal(
- input %>% mutate(z = x + y) %>% collect(),
+ compare_dplyr_binding(
+ .input %>% mutate(z = x + y) %>% collect(),
df
)
#> # A tibble: 1 x 3
@@ -284,16 +284,16 @@ test_that("dplyr::mutate's examples", {
#> <dbl> <dbl> <dbl>
#> 1 1 2 3
- expect_dplyr_equal(
- input %>% mutate(z = x + y, .before = 1) %>% collect(),
+ compare_dplyr_binding(
+ .input %>% mutate(z = x + y, .before = 1) %>% collect(),
df
)
#> # A tibble: 1 x 3
#> z x y
#> <dbl> <dbl> <dbl>
#> 1 3 1 2
- expect_dplyr_equal(
- input %>% mutate(z = x + y, .after = x) %>% collect(),
+ compare_dplyr_binding(
+ .input %>% mutate(z = x + y, .after = x) %>% collect(),
df
)
#> # A tibble: 1 x 3
@@ -304,32 +304,32 @@ test_that("dplyr::mutate's examples", {
# By default, mutate() keeps all columns from the input data.
# Experimental: You can override with `.keep`
df <- tibble(x = 1, y = 2, a = "a", b = "b")
- expect_dplyr_equal(
- input %>% mutate(z = x + y, .keep = "all") %>% collect(), # the default
+ compare_dplyr_binding(
+ .input %>% mutate(z = x + y, .keep = "all") %>% collect(), # the default
df
)
#> # A tibble: 1 x 5
#> x y a b z
#> <dbl> <dbl> <chr> <chr> <dbl>
#> 1 1 2 a b 3
- expect_dplyr_equal(
- input %>% mutate(z = x + y, .keep = "used") %>% collect(),
+ compare_dplyr_binding(
+ .input %>% mutate(z = x + y, .keep = "used") %>% collect(),
df
)
#> # A tibble: 1 x 3
#> x y z
#> <dbl> <dbl> <dbl>
#> 1 1 2 3
- expect_dplyr_equal(
- input %>% mutate(z = x + y, .keep = "unused") %>% collect(),
+ compare_dplyr_binding(
+ .input %>% mutate(z = x + y, .keep = "unused") %>% collect(),
df
)
#> # A tibble: 1 x 3
#> a b z
#> <chr> <chr> <dbl>
#> 1 a b 3
- expect_dplyr_equal(
- input %>% mutate(z = x + y, .keep = "none") %>% collect(), # same as transmute()
+ compare_dplyr_binding(
+ .input %>% mutate(z = x + y, .keep = "none") %>% collect(), # same as transmute()
df
)
#> # A tibble: 1 x 1
@@ -342,8 +342,8 @@ test_that("dplyr::mutate's examples", {
# tibbles because the expressions are computed within groups.
# The following normalises `mass` by the global average:
# TODO: ARROW-13926
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
select(name, mass, species) %>%
mutate(mass_norm = mass / mean(mass, na.rm = TRUE)) %>%
collect(),
@@ -353,16 +353,16 @@ test_that("dplyr::mutate's examples", {
})
test_that("Can mutate after group_by as long as there are no aggregations", {
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
select(int, chr) %>%
group_by(chr) %>%
mutate(int = int + 6L) %>%
collect(),
tbl
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
select(mean = int, chr) %>%
# rename `int` to `mean` and use `mean` in `mutate()` to test that
# `all_funs()` does not incorrectly identify it as an aggregate function
@@ -498,8 +498,8 @@ test_that("mutate and pmin/pmax", {
val3 = c(0, NA, NA)
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(
max_val_1 = pmax(val1, val2, val3),
max_val_2 = pmax(val1, val2, val3, na.rm = TRUE),
@@ -510,8 +510,8 @@ test_that("mutate and pmin/pmax", {
df
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(
max_val_1 = pmax(val1 - 100, 200, val1 * 100, na.rm = TRUE),
min_val_1 = pmin(val1 - 100, 100, val1 * 100, na.rm = TRUE),
diff --git a/r/tests/testthat/test-dplyr-query.R b/r/tests/testthat/test-dplyr-query.R
index 07cdf08..21a55f4 100644
--- a/r/tests/testthat/test-dplyr-query.R
+++ b/r/tests/testthat/test-dplyr-query.R
@@ -43,8 +43,8 @@ test_that("basic select/filter/collect", {
})
test_that("dim() on query", {
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
filter(int > 5) %>%
select(int, chr) %>%
dim(),
@@ -71,20 +71,20 @@ See $.data for the source Arrow object',
})
test_that("pull", {
- expect_dplyr_equal(
- input %>% pull(),
+ compare_dplyr_binding(
+ .input %>% pull(),
tbl
)
- expect_dplyr_equal(
- input %>% pull(1),
+ compare_dplyr_binding(
+ .input %>% pull(1),
tbl
)
- expect_dplyr_equal(
- input %>% pull(chr),
+ compare_dplyr_binding(
+ .input %>% pull(chr),
tbl
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
filter(int > 4) %>%
rename(strng = chr) %>%
pull(strng),
@@ -224,8 +224,9 @@ test_that("head", {
})
test_that("arrange then head returns the right data (ARROW-14162)", {
- expect_dplyr_equal(
- input %>%
+
+ compare_dplyr_binding(
+ .input %>%
# mpg has ties so we need to sort by two things to get deterministic order
arrange(mpg, disp) %>%
head(4) %>%
@@ -236,8 +237,8 @@ test_that("arrange then head returns the right data (ARROW-14162)", {
})
test_that("arrange then tail returns the right data", {
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
# mpg has ties so we need to sort by two things to get deterministic order
arrange(mpg, disp) %>%
tail(4) %>%
diff --git a/r/tests/testthat/test-dplyr-select.R b/r/tests/testthat/test-dplyr-select.R
index 35c149c..2ca2b10 100644
--- a/r/tests/testthat/test-dplyr-select.R
+++ b/r/tests/testthat/test-dplyr-select.R
@@ -23,16 +23,16 @@ library(stringr)
tbl <- example_data
test_that("Empty select returns no columns", {
- expect_dplyr_equal(
- input %>% select() %>% collect(),
+ compare_dplyr_binding(
+ .input %>% select() %>% collect(),
tbl,
skip_table = "Table with 0 cols doesn't know how many rows it should have"
)
})
test_that("Empty select still includes the group_by columns", {
expect_message(
- expect_dplyr_equal(
- input %>% group_by(chr) %>% select() %>% collect(),
+ compare_dplyr_binding(
+ .input %>% group_by(chr) %>% select() %>% collect(),
tbl
),
"Adding missing grouping variables"
@@ -40,20 +40,20 @@ test_that("Empty select still includes the group_by columns", {
})
test_that("select/rename", {
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
select(string = chr, int) %>%
collect(),
tbl
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
rename(string = chr) %>%
collect(),
tbl
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
rename(strng = chr) %>%
rename(other = strng) %>%
collect(),
@@ -66,8 +66,8 @@ test_that("select/rename with selection helpers", {
# TODO: add some passing tests here
expect_error(
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
select(where(is.numeric)) %>%
collect(),
tbl
@@ -77,15 +77,15 @@ test_that("select/rename with selection helpers", {
})
test_that("filtering with rename", {
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
filter(chr == "b") %>%
select(string = chr, int) %>%
collect(),
tbl
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
select(string = chr, int) %>%
filter(string == "b") %>%
collect(),
@@ -95,49 +95,49 @@ test_that("filtering with rename", {
test_that("relocate", {
df <- tibble(a = 1, b = 1, c = 1, d = "a", e = "a", f = "a")
- expect_dplyr_equal(
- input %>% relocate(f) %>% collect(),
+ compare_dplyr_binding(
+ .input %>% relocate(f) %>% collect(),
df,
)
- expect_dplyr_equal(
- input %>% relocate(a, .after = c) %>% collect(),
+ compare_dplyr_binding(
+ .input %>% relocate(a, .after = c) %>% collect(),
df,
)
- expect_dplyr_equal(
- input %>% relocate(f, .before = b) %>% collect(),
+ compare_dplyr_binding(
+ .input %>% relocate(f, .before = b) %>% collect(),
df,
)
- expect_dplyr_equal(
- input %>% relocate(a, .after = last_col()) %>% collect(),
+ compare_dplyr_binding(
+ .input %>% relocate(a, .after = last_col()) %>% collect(),
df,
)
- expect_dplyr_equal(
- input %>% relocate(ff = f) %>% collect(),
+ compare_dplyr_binding(
+ .input %>% relocate(ff = f) %>% collect(),
df,
)
})
test_that("relocate with selection helpers", {
df <- tibble(a = 1, b = 1, c = 1, d = "a", e = "a", f = "a")
- expect_dplyr_equal(
- input %>% relocate(any_of(c("a", "e", "i", "o", "u"))) %>% collect(),
+ compare_dplyr_binding(
+ .input %>% relocate(any_of(c("a", "e", "i", "o", "u"))) %>% collect(),
df
)
- expect_dplyr_equal(
- input %>% relocate(where(is.character)) %>% collect(),
+ compare_dplyr_binding(
+ .input %>% relocate(where(is.character)) %>% collect(),
df
)
- expect_dplyr_equal(
- input %>% relocate(a, b, c, .after = where(is.character)) %>% collect(),
+ compare_dplyr_binding(
+ .input %>% relocate(a, b, c, .after = where(is.character)) %>% collect(),
df
)
- expect_dplyr_equal(
- input %>% relocate(d, e, f, .before = where(is.numeric)) %>% collect(),
+ compare_dplyr_binding(
+ .input %>% relocate(d, e, f, .before = where(is.numeric)) %>% collect(),
df
)
# works after other dplyr verbs
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(c = as.character(c)) %>%
relocate(d, e, f, .after = where(is.numeric)) %>%
collect(),
diff --git a/r/tests/testthat/test-dplyr-summarize.R b/r/tests/testthat/test-dplyr-summarize.R
index a722e52..3988412 100644
--- a/r/tests/testthat/test-dplyr-summarize.R
+++ b/r/tests/testthat/test-dplyr-summarize.R
@@ -45,14 +45,14 @@ test_that("summarize() doesn't evaluate eagerly", {
})
test_that("Can aggregate in Arrow", {
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
summarize(total = sum(int, na.rm = TRUE)) %>%
collect(),
tbl
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
summarize(total = sum(int)) %>%
collect(),
tbl
@@ -60,24 +60,24 @@ test_that("Can aggregate in Arrow", {
})
test_that("Group by sum on dataset", {
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
group_by(some_grouping) %>%
summarize(total = sum(int, na.rm = TRUE)) %>%
collect(),
tbl
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
group_by(some_grouping) %>%
summarize(total = sum(int * 4, na.rm = TRUE)) %>%
collect(),
tbl
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
group_by(some_grouping) %>%
summarize(total = sum(int)) %>%
collect(),
@@ -86,16 +86,16 @@ test_that("Group by sum on dataset", {
})
test_that("Group by mean on dataset", {
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
group_by(some_grouping) %>%
summarize(mean = mean(int, na.rm = TRUE)) %>%
collect(),
tbl
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
group_by(some_grouping) %>%
summarize(mean = mean(int, na.rm = FALSE)) %>%
collect(),
@@ -104,16 +104,16 @@ test_that("Group by mean on dataset", {
})
test_that("Group by sd on dataset", {
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
group_by(some_grouping) %>%
summarize(sd = sd(int, na.rm = TRUE)) %>%
collect(),
tbl
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
group_by(some_grouping) %>%
summarize(sd = sd(int, na.rm = FALSE)) %>%
collect(),
@@ -122,16 +122,16 @@ test_that("Group by sd on dataset", {
})
test_that("Group by var on dataset", {
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
group_by(some_grouping) %>%
summarize(var = var(int, na.rm = TRUE)) %>%
collect(),
tbl
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
group_by(some_grouping) %>%
summarize(var = var(int, na.rm = FALSE)) %>%
collect(),
@@ -140,15 +140,15 @@ test_that("Group by var on dataset", {
})
test_that("n()", {
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
summarize(counts = n()) %>%
collect(),
tbl
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
group_by(some_grouping) %>%
summarize(counts = n()) %>%
arrange(some_grouping) %>%
@@ -158,53 +158,53 @@ test_that("n()", {
})
test_that("Group by any/all", {
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
group_by(some_grouping) %>%
summarize(any(lgl, na.rm = TRUE)) %>%
collect(),
tbl
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
group_by(some_grouping) %>%
summarize(all(lgl, na.rm = TRUE)) %>%
collect(),
tbl
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
group_by(some_grouping) %>%
summarize(any(lgl, na.rm = FALSE)) %>%
collect(),
tbl
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
group_by(some_grouping) %>%
summarize(all(lgl, na.rm = FALSE)) %>%
collect(),
tbl
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(has_words = nchar(verses) < 0) %>%
group_by(some_grouping) %>%
summarize(any(has_words, na.rm = TRUE)) %>%
collect(),
tbl
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(has_words = nchar(verses) < 0) %>%
group_by(some_grouping) %>%
summarize(all(has_words, na.rm = TRUE)) %>%
collect(),
tbl
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
group_by(some_grouping) %>%
summarize(has_words = all(nchar(verses) < 0, na.rm = TRUE)) %>%
collect(),
@@ -214,43 +214,43 @@ test_that("Group by any/all", {
test_that("n_distinct() on dataset", {
# With groupby
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
group_by(some_grouping) %>%
summarize(distinct = n_distinct(lgl, na.rm = FALSE)) %>%
collect(),
tbl
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
group_by(some_grouping) %>%
summarize(distinct = n_distinct(lgl, na.rm = TRUE)) %>%
collect(),
tbl
)
# Without groupby
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
summarize(distinct = n_distinct(lgl, na.rm = FALSE)) %>%
collect(),
tbl
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
summarize(distinct = n_distinct(lgl, na.rm = TRUE)) %>%
collect(),
tbl
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
summarize(distinct = n_distinct(int, lgl)) %>%
collect(),
tbl,
warning = "Multiple arguments"
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
group_by(some_grouping) %>%
summarize(distinct = n_distinct(int, lgl)) %>%
collect(),
@@ -260,15 +260,15 @@ test_that("n_distinct() on dataset", {
})
test_that("Functions that take ... but we only accept a single arg", {
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
summarize(distinct = n_distinct()) %>%
collect(),
tbl,
warning = "0 arguments"
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
summarize(distinct = n_distinct(int, lgl)) %>%
collect(),
tbl,
@@ -301,8 +301,8 @@ test_that("median()", {
local_edition(2)
# with groups
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
group_by(some_grouping) %>%
summarize(
med_dbl = median(dbl),
@@ -318,8 +318,8 @@ test_that("median()", {
warning = "median\\(\\) currently returns an approximate median in Arrow"
)
# without groups, with na.rm = TRUE
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
summarize(
med_dbl_narmt = median(dbl, na.rm = TRUE),
med_int_narmt = as.double(median(int, TRUE))
@@ -329,8 +329,8 @@ test_that("median()", {
warning = "median\\(\\) currently returns an approximate median in Arrow"
)
# without groups, with na.rm = FALSE (the default)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
summarize(
med_dbl = median(dbl),
med_int = as.double(median(int)),
@@ -354,8 +354,8 @@ test_that("quantile()", {
# controls whether the result has a names attribute. It defaults to
# names = TRUE. With Arrow, it is not possible to give the result a names
# attribute, so the quantile() binding in Arrow does not accept a `names`
- # argument. Differences in this names attribute cause expect_dplyr_equal() to
- # report that the objects are not equal, so we do not use expect_dplyr_equal()
+ # argument. Differences in this names attribute cause compare_dplyr_binding() to
+ # report that the objects are not equal, so we do not use compare_dplyr_binding()
# in the tests below.
# The tests below all use probs = 0.5 because other values cause differences
@@ -443,16 +443,16 @@ test_that("quantile()", {
})
test_that("summarize() with min() and max()", {
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
select(int, chr) %>%
filter(int > 5) %>% # this filters out the NAs in `int`
summarize(min_int = min(int), max_int = max(int)) %>%
collect(),
tbl,
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
select(int, chr) %>%
filter(int > 5) %>% # this filters out the NAs in `int`
summarize(
@@ -462,15 +462,15 @@ test_that("summarize() with min() and max()", {
collect(),
tbl,
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
select(int, chr) %>%
summarize(min_int = min(int), max_int = max(int)) %>%
collect(),
tbl,
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
select(int) %>%
summarize(
min_int = min(int, na.rm = TRUE),
@@ -479,8 +479,8 @@ test_that("summarize() with min() and max()", {
collect(),
tbl,
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
select(dbl, int) %>%
summarize(
min_int = -min(log(ceiling(dbl)), na.rm = TRUE),
@@ -491,15 +491,15 @@ test_that("summarize() with min() and max()", {
)
# multiple dots arguments to min(), max() not supported
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
summarize(min_mult = min(dbl, int)) %>%
collect(),
tbl,
warning = "Multiple arguments to min\\(\\) not supported by Arrow"
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
select(int, dbl, dbl2) %>%
summarize(max_mult = max(int, dbl, dbl2)) %>%
collect(),
@@ -509,8 +509,8 @@ test_that("summarize() with min() and max()", {
# min(logical) or max(logical) yields integer in R
# min(Boolean) or max(Boolean) yields Boolean in Arrow
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
select(lgl) %>%
summarize(
max_lgl = as.logical(max(lgl, na.rm = TRUE)),
@@ -522,8 +522,8 @@ test_that("summarize() with min() and max()", {
})
test_that("min() and max() on character strings", {
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
summarize(
min_chr = min(chr, na.rm = TRUE),
max_chr = max(chr, na.rm = TRUE)
@@ -532,8 +532,8 @@ test_that("min() and max() on character strings", {
tbl,
)
skip("Strings not supported by hash_min_max (ARROW-13988)")
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
group_by(fct) %>%
summarize(
min_chr = min(chr, na.rm = TRUE),
@@ -548,8 +548,8 @@ test_that("summarise() with !!sym()", {
test_chr_col <- "int"
test_dbl_col <- "dbl"
test_lgl_col <- "lgl"
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
group_by(false) %>%
summarise(
sum = sum(!!sym(test_dbl_col)),
@@ -568,24 +568,24 @@ test_that("summarise() with !!sym()", {
})
test_that("Filter and aggregate", {
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
filter(some_grouping == 2) %>%
summarize(total = sum(int, na.rm = TRUE)) %>%
collect(),
tbl
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
filter(int > 5) %>%
summarize(total = sum(int, na.rm = TRUE)) %>%
collect(),
tbl
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
filter(some_grouping == 2) %>%
group_by(some_grouping) %>%
summarize(total = sum(int, na.rm = TRUE)) %>%
@@ -593,8 +593,8 @@ test_that("Filter and aggregate", {
tbl
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
filter(int > 5) %>%
group_by(some_grouping) %>%
summarize(total = sum(int, na.rm = TRUE)) %>%
@@ -604,16 +604,16 @@ test_that("Filter and aggregate", {
})
test_that("Group by edge cases", {
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
group_by(some_grouping * 2) %>%
summarize(total = sum(int, na.rm = TRUE)) %>%
collect(),
tbl
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
group_by(alt = some_grouping * 2) %>%
summarize(total = sum(int, na.rm = TRUE)) %>%
collect(),
@@ -629,8 +629,8 @@ test_that("Do things after summarize", {
pull() %>%
tail(1)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
group_by(some_grouping) %>%
filter(int > 5) %>%
summarize(total = sum(int, na.rm = TRUE)) %>%
@@ -640,8 +640,8 @@ test_that("Do things after summarize", {
tbl
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
filter(dbl > 2) %>%
select(chr, int, lgl) %>%
mutate(twice = int * 2L) %>%
@@ -658,8 +658,8 @@ test_that("Do things after summarize", {
test_that("Expressions on aggregations", {
# This is what it effectively is
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
group_by(some_grouping) %>%
summarize(
any = any(lgl),
@@ -672,8 +672,8 @@ test_that("Expressions on aggregations", {
tbl
)
# More concisely:
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
group_by(some_grouping) %>%
summarize(any(lgl) & !all(lgl)) %>%
collect(),
@@ -681,8 +681,8 @@ test_that("Expressions on aggregations", {
)
# Save one of the aggregates first
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
group_by(some_grouping) %>%
summarize(
any_lgl = any(lgl),
@@ -693,8 +693,8 @@ test_that("Expressions on aggregations", {
)
# Make sure order of columns in result is correct
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
group_by(some_grouping) %>%
summarize(
any_lgl = any(lgl),
@@ -707,8 +707,8 @@ test_that("Expressions on aggregations", {
# Aggregate on an aggregate (trivial but dplyr allows)
skip("Aggregate on an aggregate not supported")
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
group_by(some_grouping) %>%
summarize(
any_lgl = any(any(lgl))
@@ -719,8 +719,8 @@ test_that("Expressions on aggregations", {
})
test_that("Summarize with 0 arguments", {
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
group_by(some_grouping) %>%
summarize() %>%
collect(),
@@ -730,8 +730,8 @@ test_that("Summarize with 0 arguments", {
test_that("Not (yet) supported: implicit join", {
withr::local_options(list(arrow.debug = TRUE))
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
group_by(some_grouping) %>%
summarize(
sum((dbl - mean(dbl))^2)
@@ -740,8 +740,8 @@ test_that("Not (yet) supported: implicit join", {
tbl,
warning = "Expression sum\\(\\(dbl - mean\\(dbl\\)\\)\\^2\\) not supported in Arrow; pulling data into R"
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
group_by(some_grouping) %>%
summarize(
sum(dbl - mean(dbl))
@@ -750,8 +750,8 @@ test_that("Not (yet) supported: implicit join", {
tbl,
warning = "Expression sum\\(dbl - mean\\(dbl\\)\\) not supported in Arrow; pulling data into R"
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
group_by(some_grouping) %>%
summarize(
sqrt(sum((dbl - mean(dbl))^2) / (n() - 1L))
@@ -761,8 +761,8 @@ test_that("Not (yet) supported: implicit join", {
warning = "Expression sum\\(\\(dbl - mean\\(dbl\\)\\)\\^2\\) not supported in Arrow; pulling data into R"
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
group_by(some_grouping) %>%
summarize(
dbl - mean(dbl)
@@ -773,8 +773,8 @@ test_that("Not (yet) supported: implicit join", {
)
# This one could possibly be supported--in mutate()
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
group_by(some_grouping) %>%
summarize(
dbl - int
@@ -786,36 +786,36 @@ test_that("Not (yet) supported: implicit join", {
})
test_that(".groups argument", {
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
group_by(some_grouping, int < 6) %>%
summarize(count = n()) %>%
collect(),
tbl
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
group_by(some_grouping, int < 6) %>%
summarize(count = n(), .groups = "drop_last") %>%
collect(),
tbl
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
group_by(some_grouping, int < 6) %>%
summarize(count = n(), .groups = "keep") %>%
collect(),
tbl
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
group_by(some_grouping, int < 6) %>%
summarize(count = n(), .groups = "drop") %>%
collect(),
tbl
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
group_by(some_grouping, int < 6) %>%
summarize(count = n(), .groups = "rowwise") %>%
collect(),
@@ -844,8 +844,8 @@ test_that("summarize() handles group_by .drop", {
x = 1:10,
y = factor(rep(c("a", "c"), each = 5), levels = c("a", "b", "c"))
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
group_by(y) %>%
count() %>%
collect() %>%
@@ -853,8 +853,8 @@ test_that("summarize() handles group_by .drop", {
tbl
)
# Not supported: check message
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
group_by(y, .drop = FALSE) %>%
count() %>%
collect() %>%
@@ -867,8 +867,8 @@ test_that("summarize() handles group_by .drop", {
)
# But this is ok because there is no factor group
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
group_by(y, .drop = FALSE) %>%
count() %>%
collect() %>%
diff --git a/r/tests/testthat/test-metadata.R b/r/tests/testthat/test-metadata.R
index c560da7..4c4d8a7 100644
--- a/r/tests/testthat/test-metadata.R
+++ b/r/tests/testthat/test-metadata.R
@@ -315,26 +315,26 @@ test_that("When we encounter SF cols, we warn", {
test_that("dplyr with metadata", {
skip_if_not_available("dataset")
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
collect(),
example_with_metadata
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
select(a) %>%
collect(),
example_with_metadata
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(z = b * 4) %>%
select(z, a) %>%
collect(),
example_with_metadata
)
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(z = nchar(a)) %>%
select(z, a) %>%
collect(),
@@ -342,8 +342,8 @@ test_that("dplyr with metadata", {
)
# dplyr drops top-level attributes if you do summarize, though attributes
# of grouping columns appear to come through
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
group_by(a) %>%
summarize(n()) %>%
collect(),
@@ -351,8 +351,8 @@ test_that("dplyr with metadata", {
)
# Same name in output but different data, so the column metadata shouldn't
# carry through
- expect_dplyr_equal(
- input %>%
+ compare_dplyr_binding(
+ .input %>%
mutate(a = nchar(a)) %>%
select(a) %>%
collect(),
diff --git a/r/tests/testthat/test-na-omit.R b/r/tests/testthat/test-na-omit.R
index 894dbe3..fafebb4 100644
--- a/r/tests/testthat/test-na-omit.R
+++ b/r/tests/testthat/test-na-omit.R
@@ -26,18 +26,18 @@ test_that("na.fail on Scalar", {
})
test_that("na.omit on Array and ChunkedArray", {
- expect_vector_equal(na.omit(input), data_no_na)
- expect_vector_equal(na.omit(input), data_na, ignore_attr = TRUE)
+ compare_expression(na.omit(.input), data_no_na)
+ compare_expression(na.omit(.input), data_na, ignore_attr = TRUE)
})
test_that("na.exclude on Array and ChunkedArray", {
- expect_vector_equal(na.exclude(input), data_no_na)
- expect_vector_equal(na.exclude(input), data_na, ignore_attr = TRUE)
+ compare_expression(na.exclude(.input), data_no_na)
+ compare_expression(na.exclude(.input), data_na, ignore_attr = TRUE)
})
test_that("na.fail on Array and ChunkedArray", {
- expect_vector_equal(na.fail(input), data_no_na, ignore_attr = TRUE)
- expect_vector_error(na.fail(input), data_na)
+ compare_expression(na.fail(.input), data_no_na, ignore_attr = TRUE)
+ compare_expression_error(na.fail(.input), data_na)
})
test_that("na.fail on Scalar", {