You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by th...@apache.org on 2023/05/03 14:53:43 UTC
[arrow] branch main updated: GH-34775: [R] arrow_table: as.data.frame() sometimes returns a tbl and sometimes a data.frame (#35173)
This is an automated email from the ASF dual-hosted git repository.
thisisnic pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 205ceb9938 GH-34775: [R] arrow_table: as.data.frame() sometimes returns a tbl and sometimes a data.frame (#35173)
205ceb9938 is described below
commit 205ceb99389d2d5060551ebcba17635185c74d9d
Author: Nic Crane <th...@gmail.com>
AuthorDate: Wed May 3 16:53:35 2023 +0200
GH-34775: [R] arrow_table: as.data.frame() sometimes returns a tbl and sometimes a data.frame (#35173)
Features of this PR:
* Ensures that calling `as.data.frame()` on Arrow objects returns base R `data.frame` objects.
* Drops the `class` attribute metadata of input objects of `data.frame` class (i.e. that don't have inherit from any additional classes other than `data.frame`). This results in us sacrificing roundtrip class fidelity for `data.frame` objects (i.e. if we input a base R data.frame, convert it to an Arrow Table, and then convert it back to R, we get a tibble). However, we now have consistency in the type of returned objects, retain roundtrip fidelity for other (non-class) metadata, an [...]
* Implements `dplyr::collect()` for StructArrays so that these objects can still be returned as tibbles if needed.
* Renames `expect_data_frame()` to `expect_equal_data_frame()` for clarity, and updates it to convert both the object and expected object to data.frames.
* Closes: #34775
Authored-by: Nic Crane <th...@gmail.com>
Signed-off-by: Nic Crane <th...@gmail.com>
---
r/R/array.R | 2 +-
r/R/arrow-tabular.R | 3 +-
r/R/csv.R | 2 +-
r/R/dplyr-collect.R | 7 +-
r/R/dplyr.R | 3 +-
r/R/feather.R | 3 +-
r/R/ipc-stream.R | 2 +-
r/R/json.R | 2 +-
r/R/metadata.R | 9 +++
r/R/parquet.R | 2 +-
r/tests/testthat/helper-expectation.R | 5 +-
r/tests/testthat/test-RecordBatch.R | 83 ++++++++++----------
r/tests/testthat/test-Table.R | 110 +++++++++++++++------------
r/tests/testthat/test-compute-aggregate.R | 2 +-
r/tests/testthat/test-compute-sort.R | 8 +-
r/tests/testthat/test-dataset-csv.R | 2 +-
r/tests/testthat/test-dataset.R | 50 ++++++------
r/tests/testthat/test-dplyr-funcs-datetime.R | 8 +-
r/tests/testthat/test-dplyr-query.R | 26 +++----
r/tests/testthat/test-duckdb.R | 3 +-
r/tests/testthat/test-feather.R | 2 +-
r/tests/testthat/test-metadata.R | 25 ++++--
r/tests/testthat/test-na-omit.R | 16 ++--
r/tests/testthat/test-python-flight.R | 6 +-
r/tests/testthat/test-python.R | 10 +--
r/tests/testthat/test-read-write.R | 4 +-
r/tests/testthat/test-utf.R | 18 ++---
27 files changed, 229 insertions(+), 184 deletions(-)
diff --git a/r/R/array.R b/r/R/array.R
index 109f6daaa2..3e9e0eae1a 100644
--- a/r/R/array.R
+++ b/r/R/array.R
@@ -474,7 +474,7 @@ dim.StructArray <- function(x, ...) c(length(x), x$type$num_fields)
#' @export
as.data.frame.StructArray <- function(x, row.names = NULL, optional = FALSE, ...) {
- as.vector(x)
+ as.data.frame(collect.StructArray(x), row.names = row.names, optional = optional, ...)
}
#' @rdname array
diff --git a/r/R/arrow-tabular.R b/r/R/arrow-tabular.R
index ae68cc2118..e62547d291 100644
--- a/r/R/arrow-tabular.R
+++ b/r/R/arrow-tabular.R
@@ -94,7 +94,8 @@ ArrowTabular <- R6Class("ArrowTabular",
#' @export
as.data.frame.ArrowTabular <- function(x, row.names = NULL, optional = FALSE, ...) {
df <- x$to_data_frame()
- apply_arrow_r_metadata(df, x$metadata$r)
+ out <- apply_arrow_r_metadata(df, x$metadata$r)
+ as.data.frame(out, row.names = row.names, optional = optional, ...)
}
#' @export
diff --git a/r/R/csv.R b/r/R/csv.R
index 8224323866..88008bd682 100644
--- a/r/R/csv.R
+++ b/r/R/csv.R
@@ -248,7 +248,7 @@ read_delim_arrow <- function(file,
}
if (isTRUE(as_data_frame)) {
- tab <- as.data.frame(tab)
+ tab <- collect.ArrowTabular(tab)
}
tab
diff --git a/r/R/dplyr-collect.R b/r/R/dplyr-collect.R
index 9205a31b14..970722e86a 100644
--- a/r/R/dplyr-collect.R
+++ b/r/R/dplyr-collect.R
@@ -24,7 +24,8 @@ collect.arrow_dplyr_query <- function(x, as_data_frame = TRUE, ...) {
}
collect.ArrowTabular <- function(x, as_data_frame = TRUE, ...) {
if (as_data_frame) {
- as.data.frame(x, ...)
+ df <- x$to_data_frame()
+ apply_arrow_r_metadata(df, x$metadata$r)
} else {
x
}
@@ -34,6 +35,10 @@ collect.Dataset <- function(x, as_data_frame = TRUE, ...) {
}
collect.RecordBatchReader <- collect.Dataset
+collect.StructArray <- function(x, row.names = NULL, optional = FALSE, ...) {
+ as.vector(x)
+}
+
compute.ArrowTabular <- function(x, ...) x
compute.arrow_dplyr_query <- function(x, ...) {
# TODO: should this tryCatch move down into as_arrow_table()?
diff --git a/r/R/dplyr.R b/r/R/dplyr.R
index 54ecc80aad..62b345e1ce 100644
--- a/r/R/dplyr.R
+++ b/r/R/dplyr.R
@@ -216,7 +216,8 @@ unique.RecordBatchReader <- unique.arrow_dplyr_query
#' @export
as.data.frame.arrow_dplyr_query <- function(x, row.names = NULL, optional = FALSE, ...) {
- collect.arrow_dplyr_query(x, as_data_frame = TRUE, ...)
+ out <- collect.arrow_dplyr_query(x, as_data_frame = TRUE, ...)
+ as.data.frame(out)
}
#' @export
diff --git a/r/R/feather.R b/r/R/feather.R
index 1488db29eb..24971669fc 100644
--- a/r/R/feather.R
+++ b/r/R/feather.R
@@ -196,7 +196,8 @@ read_feather <- function(file, col_select = NULL, as_data_frame = TRUE, mmap = T
)
if (isTRUE(as_data_frame)) {
- out <- as.data.frame(out)
+ df <- out$to_data_frame()
+ out <- apply_arrow_r_metadata(df, out$metadata$r)
}
out
}
diff --git a/r/R/ipc-stream.R b/r/R/ipc-stream.R
index f0b4a6aae0..7144132393 100644
--- a/r/R/ipc-stream.R
+++ b/r/R/ipc-stream.R
@@ -106,7 +106,7 @@ read_ipc_stream <- function(file, as_data_frame = TRUE, ...) {
# https://issues.apache.org/jira/browse/ARROW-6830
out <- RecordBatchStreamReader$create(file)$read_table()
if (as_data_frame) {
- out <- as.data.frame(out)
+ out <- collect.ArrowTabular(out)
}
out
}
diff --git a/r/R/json.R b/r/R/json.R
index cdbe850b32..e8131b37f2 100644
--- a/r/R/json.R
+++ b/r/R/json.R
@@ -84,7 +84,7 @@ read_json_arrow <- function(file,
}
if (isTRUE(as_data_frame)) {
- tab <- as.data.frame(tab)
+ tab <- collect.ArrowTabular(tab)
}
tab
}
diff --git a/r/R/metadata.R b/r/R/metadata.R
index 6a54b3e384..3ae2db4eaa 100644
--- a/r/R/metadata.R
+++ b/r/R/metadata.R
@@ -22,6 +22,14 @@
# drop problems attributes (most likely from readr)
x[["attributes"]][["problems"]] <- NULL
+ # remove the class if it's just data.frame
+ if (identical(x$attributes$class, "data.frame")) {
+ x$attributes <- x$attributes[names(x$attributes) != "class"]
+ if (is_empty(x$attributes)) {
+ x <- x[names(x) != "attributes"]
+ }
+ }
+
out <- serialize(x, NULL, ascii = TRUE)
# if the metadata is over 100 kB, compress
@@ -62,6 +70,7 @@ apply_arrow_r_metadata <- function(x, r_metadata) {
expr = {
columns_metadata <- r_metadata$columns
if (is.data.frame(x)) {
+ # if columns metadata exists, apply it here
if (length(names(x)) && !is.null(columns_metadata)) {
for (name in intersect(names(columns_metadata), names(x))) {
x[[name]] <- apply_arrow_r_metadata(x[[name]], columns_metadata[[name]])
diff --git a/r/R/parquet.R b/r/R/parquet.R
index f3d384e8c2..1335e85219 100644
--- a/r/R/parquet.R
+++ b/r/R/parquet.R
@@ -70,7 +70,7 @@ read_parquet <- function(file,
}
if (as_data_frame) {
- tab <- as.data.frame(tab)
+ tab <- collect.ArrowTabular(tab)
}
tab
}
diff --git a/r/tests/testthat/helper-expectation.R b/r/tests/testthat/helper-expectation.R
index 303a96ead7..090ed36aa7 100644
--- a/r/tests/testthat/helper-expectation.R
+++ b/r/tests/testthat/helper-expectation.R
@@ -19,8 +19,9 @@ expect_as_vector <- function(x, y, ...) {
expect_equal(as.vector(x), y, ...)
}
-expect_data_frame <- function(x, y, ...) {
- expect_equal(as.data.frame(x), y, ...)
+# expect both objects to contain equal values when converted to data.frame objects
+expect_equal_data_frame <- function(x, y, ...) {
+ expect_equal(as.data.frame(x), as.data.frame(y), ...)
}
expect_r6_class <- function(object, class) {
diff --git a/r/tests/testthat/test-RecordBatch.R b/r/tests/testthat/test-RecordBatch.R
index 7e7084542d..f29b75dbf4 100644
--- a/r/tests/testthat/test-RecordBatch.R
+++ b/r/tests/testthat/test-RecordBatch.R
@@ -89,7 +89,7 @@ test_that("RecordBatch", {
schema(dbl = float64(), lgl = boolean(), chr = utf8(), fct = dictionary(int8(), utf8()))
)
expect_equal(batch2$column(0), batch$column(1))
- expect_data_frame(batch2, tbl[, -1])
+ expect_equal_data_frame(batch2, tbl[, -1])
# input validation
expect_error(batch$RemoveColumn(NA), "'i' cannot be NA")
@@ -109,10 +109,10 @@ test_that("RecordBatch S3 methods", {
test_that("RecordBatch$Slice", {
batch3 <- batch$Slice(5)
- expect_data_frame(batch3, tbl[6:10, ])
+ expect_equal_data_frame(batch3, tbl[6:10, ])
batch4 <- batch$Slice(5, 2)
- expect_data_frame(batch4, tbl[6:7, ])
+ expect_equal_data_frame(batch4, tbl[6:7, ])
# Input validation
expect_error(batch$Slice("ten"))
@@ -131,20 +131,20 @@ test_that("RecordBatch$Slice", {
})
test_that("[ on RecordBatch", {
- expect_data_frame(batch[6:7, ], tbl[6:7, ])
- expect_data_frame(batch[c(6, 7), ], tbl[6:7, ])
- expect_data_frame(batch[6:7, 2:4], tbl[6:7, 2:4])
- expect_data_frame(batch[, c("dbl", "fct")], tbl[, c(2, 5)])
+ expect_equal_data_frame(batch[6:7, ], tbl[6:7, ])
+ expect_equal_data_frame(batch[c(6, 7), ], tbl[6:7, ])
+ expect_equal_data_frame(batch[6:7, 2:4], tbl[6:7, 2:4])
+ expect_equal_data_frame(batch[, c("dbl", "fct")], tbl[, c(2, 5)])
expect_identical(as.vector(batch[, "chr", drop = TRUE]), tbl$chr)
- expect_data_frame(batch[c(7, 3, 5), 2:4], tbl[c(7, 3, 5), 2:4])
- expect_data_frame(
+ expect_equal_data_frame(batch[c(7, 3, 5), 2:4], tbl[c(7, 3, 5), 2:4])
+ expect_equal_data_frame(
batch[rep(c(FALSE, TRUE), 5), ],
tbl[c(2, 4, 6, 8, 10), ]
)
# bool Array
- expect_data_frame(batch[batch$lgl, ], tbl[tbl$lgl, ])
+ expect_equal_data_frame(batch[batch$lgl, ], tbl[tbl$lgl, ])
# int Array
- expect_data_frame(batch[Array$create(5:6), 2:4], tbl[6:7, 2:4])
+ expect_equal_data_frame(batch[Array$create(5:6), 2:4], tbl[6:7, 2:4])
# input validation
expect_error(batch[, c("dbl", "NOTACOLUMN")], 'Column not found: "NOTACOLUMN"')
@@ -176,15 +176,15 @@ test_that("[[<- assignment", {
# can remove a column
batch[["chr"]] <- NULL
- expect_data_frame(batch, tbl[-4])
+ expect_equal_data_frame(batch, tbl[-4])
# can remove a column by index
batch[[4]] <- NULL
- expect_data_frame(batch, tbl[1:3])
+ expect_equal_data_frame(batch, tbl[1:3])
# can add a named column
batch[["new"]] <- letters[10:1]
- expect_data_frame(batch, dplyr::bind_cols(tbl[1:3], new = letters[10:1]))
+ expect_equal_data_frame(batch, dplyr::bind_cols(tbl[1:3], new = letters[10:1]))
# can replace a column by index
batch[[2]] <- as.numeric(10:1)
@@ -239,16 +239,16 @@ test_that("head and tail on RecordBatch", {
fct = factor(letters[1:10])
)
batch <- RecordBatch$create(tbl)
- expect_data_frame(head(batch), head(tbl))
- expect_data_frame(head(batch, 4), head(tbl, 4))
- expect_data_frame(head(batch, 40), head(tbl, 40))
- expect_data_frame(head(batch, -4), head(tbl, -4))
- expect_data_frame(head(batch, -40), head(tbl, -40))
- expect_data_frame(tail(batch), tail(tbl))
- expect_data_frame(tail(batch, 4), tail(tbl, 4))
- expect_data_frame(tail(batch, 40), tail(tbl, 40))
- expect_data_frame(tail(batch, -4), tail(tbl, -4))
- expect_data_frame(tail(batch, -40), tail(tbl, -40))
+ expect_equal_data_frame(head(batch), head(tbl))
+ expect_equal_data_frame(head(batch, 4), head(tbl, 4))
+ expect_equal_data_frame(head(batch, 40), head(tbl, 40))
+ expect_equal_data_frame(head(batch, -4), head(tbl, -4))
+ expect_equal_data_frame(head(batch, -40), head(tbl, -40))
+ expect_equal_data_frame(tail(batch), tail(tbl))
+ expect_equal_data_frame(tail(batch, 4), tail(tbl, 4))
+ expect_equal_data_frame(tail(batch, 40), tail(tbl, 40))
+ expect_equal_data_frame(tail(batch, -4), tail(tbl, -4))
+ expect_equal_data_frame(tail(batch, -40), tail(tbl, -40))
})
test_that("RecordBatch print method", {
@@ -346,8 +346,8 @@ test_that("record_batch() handles data frame columns", {
b = struct(x = int32(), y = int32())
)
)
- out <- as.data.frame(batch)
- expect_equal(out, tibble::tibble(a = 1:10, b = tib))
+
+ expect_equal_data_frame(batch, tibble::tibble(a = 1:10, b = tib))
# if not named, columns from tib are auto spliced
batch2 <- record_batch(a = 1:10, tib)
@@ -355,8 +355,8 @@ test_that("record_batch() handles data frame columns", {
batch2$schema,
schema(a = int32(), x = int32(), y = int32())
)
- out <- as.data.frame(batch2)
- expect_equal(out, tibble::tibble(a = 1:10, !!!tib))
+
+ expect_equal_data_frame(batch2, tibble::tibble(a = 1:10, !!!tib))
})
test_that("record_batch() handles data frame columns with schema spec", {
@@ -366,8 +366,7 @@ test_that("record_batch() handles data frame columns with schema spec", {
schema <- schema(a = int32(), b = struct(x = int16(), y = float64()))
batch <- record_batch(a = 1:10, b = tib, schema = schema)
expect_equal(batch$schema, schema)
- out <- as.data.frame(batch)
- expect_equal(out, tibble::tibble(a = 1:10, b = tib_float))
+ expect_equal_data_frame(batch, tibble::tibble(a = 1:10, b = tib_float))
schema <- schema(a = int32(), b = struct(x = int16(), y = utf8()))
expect_error(record_batch(a = 1:10, b = tib, schema = schema))
@@ -379,15 +378,15 @@ test_that("record_batch() auto splices (ARROW-5718)", {
batch2 <- record_batch(!!!df)
expect_equal(batch1, batch2)
expect_equal(batch1$schema, schema(x = int32(), y = utf8()))
- expect_data_frame(batch1, df)
+ expect_equal_data_frame(batch1, df)
batch3 <- record_batch(df, z = 1:10)
batch4 <- record_batch(!!!df, z = 1:10)
expect_equal(batch3, batch4)
expect_equal(batch3$schema, schema(x = int32(), y = utf8(), z = int32()))
- expect_equal(
- as.data.frame(batch3),
- tibble::as_tibble(cbind(df, data.frame(z = 1:10)))
+ expect_equal_data_frame(
+ batch3,
+ cbind(df, data.frame(z = 1:10))
)
s <- schema(x = float64(), y = utf8())
@@ -395,16 +394,16 @@ test_that("record_batch() auto splices (ARROW-5718)", {
batch6 <- record_batch(!!!df, schema = s)
expect_equal(batch5, batch6)
expect_equal(batch5$schema, s)
- expect_equal(as.data.frame(batch5), df)
+ expect_equal_data_frame(batch5, df)
s2 <- schema(x = float64(), y = utf8(), z = int16())
batch7 <- record_batch(df, z = 1:10, schema = s2)
batch8 <- record_batch(!!!df, z = 1:10, schema = s2)
expect_equal(batch7, batch8)
expect_equal(batch7$schema, s2)
- expect_equal(
- as.data.frame(batch7),
- tibble::as_tibble(cbind(df, data.frame(z = 1:10)))
+ expect_equal_data_frame(
+ batch7,
+ cbind(df, data.frame(z = 1:10))
)
})
@@ -425,24 +424,24 @@ test_that("record_batch() handles null type (ARROW-7064)", {
})
test_that("record_batch() scalar recycling with vectors", {
- expect_data_frame(
+ expect_equal_data_frame(
record_batch(a = 1:10, b = 5),
tibble::tibble(a = 1:10, b = 5)
)
})
test_that("record_batch() scalar recycling with Scalars, Arrays, and ChunkedArrays", {
- expect_data_frame(
+ expect_equal_data_frame(
record_batch(a = Array$create(1:10), b = Scalar$create(5)),
tibble::tibble(a = 1:10, b = 5)
)
- expect_data_frame(
+ expect_equal_data_frame(
record_batch(a = Array$create(1:10), b = Array$create(5)),
tibble::tibble(a = 1:10, b = 5)
)
- expect_data_frame(
+ expect_equal_data_frame(
record_batch(a = Array$create(1:10), b = ChunkedArray$create(5)),
tibble::tibble(a = 1:10, b = 5)
)
diff --git a/r/tests/testthat/test-Table.R b/r/tests/testthat/test-Table.R
index 233705323e..ce3254a158 100644
--- a/r/tests/testthat/test-Table.R
+++ b/r/tests/testthat/test-Table.R
@@ -68,26 +68,26 @@ tab <- Table$create(tbl)
test_that("[, [[, $ for Table", {
expect_identical(names(tab), names(tbl))
- expect_data_frame(tab[6:7, ], tbl[6:7, ])
- expect_data_frame(tab[6:7, 2:4], tbl[6:7, 2:4])
- expect_data_frame(tab[, c("dbl", "fct")], tbl[, c(2, 5)])
+ expect_equal_data_frame(tab[6:7, ], tbl[6:7, ])
+ expect_equal_data_frame(tab[6:7, 2:4], tbl[6:7, 2:4])
+ expect_equal_data_frame(tab[, c("dbl", "fct")], tbl[, c(2, 5)])
expect_as_vector(tab[, "chr", drop = TRUE], tbl$chr)
# Take within a single chunk
- expect_data_frame(tab[c(7, 3, 5), 2:4], tbl[c(7, 3, 5), 2:4])
- expect_data_frame(tab[rep(c(FALSE, TRUE), 5), ], tbl[c(2, 4, 6, 8, 10), ])
+ expect_equal_data_frame(tab[c(7, 3, 5), 2:4], tbl[c(7, 3, 5), 2:4])
+ expect_equal_data_frame(tab[rep(c(FALSE, TRUE), 5), ], tbl[c(2, 4, 6, 8, 10), ])
# bool ChunkedArray (with one chunk)
- expect_data_frame(tab[tab$lgl, ], tbl[tbl$lgl, ])
+ expect_equal_data_frame(tab[tab$lgl, ], tbl[tbl$lgl, ])
# ChunkedArray with multiple chunks
c1 <- c(TRUE, FALSE, TRUE, TRUE, FALSE)
c2 <- c(FALSE, FALSE, TRUE, TRUE, FALSE)
ca <- ChunkedArray$create(c1, c2)
- expect_data_frame(tab[ca, ], tbl[c(1, 3, 4, 8, 9), ])
+ expect_equal_data_frame(tab[ca, ], tbl[c(1, 3, 4, 8, 9), ])
# int Array
- expect_data_frame(tab[Array$create(5:6), 2:4], tbl[6:7, 2:4])
+ expect_equal_data_frame(tab[Array$create(5:6), 2:4], tbl[6:7, 2:4])
# ChunkedArray
- expect_data_frame(tab[ChunkedArray$create(5L, 6L), 2:4], tbl[6:7, 2:4])
+ expect_equal_data_frame(tab[ChunkedArray$create(5L, 6L), 2:4], tbl[6:7, 2:4])
# Expression
- expect_data_frame(tab[tab$int > 6, ], tbl[tbl$int > 6, ])
+ expect_equal_data_frame(tab[tab$int > 6, ], tbl[tbl$int > 6, ])
expect_as_vector(tab[["int"]], tbl$int)
expect_as_vector(tab$int, tbl$int)
@@ -95,9 +95,9 @@ test_that("[, [[, $ for Table", {
expect_null(tab$qwerty)
expect_null(tab[["asdf"]])
# List-like column slicing
- expect_data_frame(tab[2:4], tbl[2:4])
- expect_data_frame(tab[c(2, 1)], tbl[c(2, 1)])
- expect_data_frame(tab[-3], tbl[-3])
+ expect_equal_data_frame(tab[2:4], tbl[2:4])
+ expect_equal_data_frame(tab[c(2, 1)], tbl[c(2, 1)])
+ expect_equal_data_frame(tab[-3], tbl[-3])
expect_error(tab[[c(4, 3)]])
expect_error(tab[[NA]], "'i' must be character or numeric, not logical")
@@ -112,21 +112,21 @@ test_that("[, [[, $ for Table", {
expect_error(tab[, c(6, NA)], "Column indices cannot be NA")
skip("Table with 0 cols doesn't know how many rows it should have")
- expect_data_frame(tab[0], tbl[0])
+ expect_equal_data_frame(tab[0], tbl[0])
})
test_that("[[<- assignment", {
# can remove a column
tab[["chr"]] <- NULL
- expect_data_frame(tab, tbl[-4])
+ expect_equal_data_frame(tab, tbl[-4])
# can remove a column by index
tab[[4]] <- NULL
- expect_data_frame(tab, tbl[1:3])
+ expect_equal_data_frame(tab, tbl[1:3])
# can add a named column
tab[["new"]] <- letters[10:1]
- expect_data_frame(tab, dplyr::bind_cols(tbl[1:3], new = letters[10:1]))
+ expect_equal_data_frame(tab, dplyr::bind_cols(tbl[1:3], new = letters[10:1]))
# can replace a column by index
tab[[2]] <- as.numeric(10:1)
@@ -177,10 +177,10 @@ test_that("[[<- assignment", {
test_that("Table$Slice", {
tab2 <- tab$Slice(5)
- expect_data_frame(tab2, tbl[6:10, ])
+ expect_equal_data_frame(tab2, tbl[6:10, ])
tab3 <- tab$Slice(5, 2)
- expect_data_frame(tab3, tbl[6:7, ])
+ expect_equal_data_frame(tab3, tbl[6:7, ])
# Input validation
expect_error(tab$Slice("ten"))
@@ -199,16 +199,16 @@ test_that("Table$Slice", {
})
test_that("head and tail on Table", {
- expect_data_frame(head(tab), head(tbl))
- expect_data_frame(head(tab, 4), head(tbl, 4))
- expect_data_frame(head(tab, 40), head(tbl, 40))
- expect_data_frame(head(tab, -4), head(tbl, -4))
- expect_data_frame(head(tab, -40), head(tbl, -40))
- expect_data_frame(tail(tab), tail(tbl))
- expect_data_frame(tail(tab, 4), tail(tbl, 4))
- expect_data_frame(tail(tab, 40), tail(tbl, 40))
- expect_data_frame(tail(tab, -4), tail(tbl, -4))
- expect_data_frame(tail(tab, -40), tail(tbl, -40))
+ expect_equal_data_frame(head(tab), head(tbl))
+ expect_equal_data_frame(head(tab, 4), head(tbl, 4))
+ expect_equal_data_frame(head(tab, 40), head(tbl, 40))
+ expect_equal_data_frame(head(tab, -4), head(tbl, -4))
+ expect_equal_data_frame(head(tab, -40), head(tbl, -40))
+ expect_equal_data_frame(tail(tab), tail(tbl))
+ expect_equal_data_frame(tail(tab, 4), tail(tbl, 4))
+ expect_equal_data_frame(tail(tab, 40), tail(tbl, 40))
+ expect_equal_data_frame(tail(tab, -4), tail(tbl, -4))
+ expect_equal_data_frame(tail(tab, -40), tail(tbl, -40))
})
test_that("Table print method", {
@@ -265,10 +265,9 @@ test_that("table() handles ... of arrays, chunked arrays, vectors", {
tab$schema,
schema(a = int32(), b = int32(), c = float64(), x = int32(), y = utf8())
)
- res <- as.data.frame(tab)
- expect_equal(names(res), c("a", "b", "c", "x", "y"))
- expect_equal(
- res,
+
+ expect_equal_data_frame(
+ tab,
tibble::tibble(a = 1:10, b = 1:10, c = v, x = 1:10, y = letters[1:10])
)
})
@@ -280,14 +279,14 @@ test_that("table() auto splices (ARROW-5718)", {
tab2 <- Table$create(!!!df)
expect_equal(tab1, tab2)
expect_equal(tab1$schema, schema(x = int32(), y = utf8()))
- expect_equal(as.data.frame(tab1), df)
+ expect_equal_data_frame(tab1, df)
s <- schema(x = float64(), y = utf8())
tab3 <- Table$create(df, schema = s)
tab4 <- Table$create(!!!df, schema = s)
expect_equal(tab3, tab4)
expect_equal(tab3$schema, s)
- expect_equal(as.data.frame(tab3), df)
+ expect_equal_data_frame(tab3, df)
})
test_that("Validation when creating table with schema (ARROW-10953)", {
@@ -366,7 +365,7 @@ test_that("Can create table with specific dictionary types", {
expect_equal(sch, tab$schema)
if (i != int64()) {
# TODO: same downcast to int32 as we do for int64() type elsewhere
- expect_identical(as.data.frame(tab), fact)
+ expect_equal_data_frame(tab, fact)
}
}
})
@@ -380,7 +379,7 @@ test_that("Table unifies dictionary on conversion back to R (ARROW-8374)", {
res <- tibble::tibble(f = factor(c("a", "c", NA), levels = c("a", "b", "c", "d")))
tab <- Table$create(b1, b2, b3, b4)
- expect_identical(as.data.frame(tab), res)
+ expect_equal_data_frame(tab, res)
})
test_that("Table$SelectColumns()", {
@@ -410,24 +409,24 @@ test_that("Table$create() with different length columns", {
})
test_that("Table$create() scalar recycling with vectors", {
- expect_data_frame(
+ expect_equal_data_frame(
Table$create(a = 1:10, b = 5),
tibble::tibble(a = 1:10, b = 5)
)
})
test_that("Table$create() scalar recycling with Scalars, Arrays, and ChunkedArrays", {
- expect_data_frame(
+ expect_equal_data_frame(
Table$create(a = Array$create(1:10), b = Scalar$create(5)),
tibble::tibble(a = 1:10, b = 5)
)
- expect_data_frame(
+ expect_equal_data_frame(
Table$create(a = Array$create(1:10), b = Array$create(5)),
tibble::tibble(a = 1:10, b = 5)
)
- expect_data_frame(
+ expect_equal_data_frame(
Table$create(a = Array$create(1:10), b = ChunkedArray$create(5)),
tibble::tibble(a = 1:10, b = 5)
)
@@ -712,21 +711,32 @@ test_that("as_arrow_table() errors on data.frame with NULL names", {
expect_error(as_arrow_table(df), "Input data frame columns must be named")
})
-test_that("we only preserve metadata of input to arrow_table when passed a single data.frame", {
- # data.frame in, data.frame out
+test_that("# GH-35038 - passing in multiple arguments doesn't affect return type", {
+
+ df <- data.frame(x = 1)
+ out1 <- as.data.frame(arrow_table(df, name = "1"))
+ out2 <- as.data.frame(arrow_table(name = "1", df))
+
+ expect_s3_class(out1, c("data.frame"), exact = TRUE)
+ expect_s3_class(out2, c("data.frame"), exact = TRUE)
+})
+
+test_that("as.data.frame() on ArrowTabular objects returns a base R data.frame regardless of input type", {
df <- data.frame(x = 1)
out1 <- as.data.frame(arrow_table(df))
expect_s3_class(out1, "data.frame", exact = TRUE)
- # tibble in, tibble out
tib <- tibble::tibble(x = 1)
out2 <- as.data.frame(arrow_table(tib))
- expect_s3_class(out2, c("tbl_df", "tbl", "data.frame"), exact = TRUE)
+ expect_s3_class(out2, "data.frame", exact = TRUE)
+})
- # GH-35038 - passing in multiple arguments doesn't affect return type
- out3 <- as.data.frame(arrow_table(df, name = "1"))
- out4 <- as.data.frame(arrow_table(name = "1", df))
+test_that("collect() on ArrowTabular objects returns a tibble regardless of input type", {
+ df <- data.frame(x = 1)
+ out1 <- dplyr::collect(arrow_table(df))
+ expect_s3_class(out1, c("tbl_df", "tbl", "data.frame"), exact = TRUE)
- expect_s3_class(out3, c("tbl_df", "tbl", "data.frame"), exact = TRUE)
- expect_s3_class(out4, c("tbl_df", "tbl", "data.frame"), exact = TRUE)
+ tib <- tibble::tibble(x = 1)
+ out2 <- dplyr::collect(arrow_table(tib))
+ expect_s3_class(out2, c("tbl_df", "tbl", "data.frame"), exact = TRUE)
})
diff --git a/r/tests/testthat/test-compute-aggregate.R b/r/tests/testthat/test-compute-aggregate.R
index 98face44ff..2732cdef3e 100644
--- a/r/tests/testthat/test-compute-aggregate.R
+++ b/r/tests/testthat/test-compute-aggregate.R
@@ -377,7 +377,7 @@ test_that("value_counts", {
type = struct(values = float64(), counts = int64())
)
expect_equal(value_counts(a), result)
- expect_identical(as.data.frame(value_counts(a)), result_df)
+ expect_equal_data_frame(value_counts(a), result_df)
expect_identical(as.vector(value_counts(a)$counts), result_df$counts)
})
diff --git a/r/tests/testthat/test-compute-sort.R b/r/tests/testthat/test-compute-sort.R
index ba3039c331..b8c8482d64 100644
--- a/r/tests/testthat/test-compute-sort.R
+++ b/r/tests/testthat/test-compute-sort.R
@@ -145,16 +145,16 @@ test_that("Table$SortIndices()", {
as.vector(x$Take(x$SortIndices("chr"))$chr),
sort(tbl$chr, na.last = TRUE)
)
- expect_identical(
- as.data.frame(x$Take(x$SortIndices(c("int", "dbl"), c(FALSE, FALSE)))),
+ expect_equal_data_frame(
+ x$Take(x$SortIndices(c("int", "dbl"), c(FALSE, FALSE))),
tbl %>% arrange(int, dbl)
)
})
test_that("RecordBatch$SortIndices()", {
x <- record_batch(tbl)
- expect_identical(
- as.data.frame(x$Take(x$SortIndices(c("chr", "int", "dbl"), TRUE))),
+ expect_equal_data_frame(
+ x$Take(x$SortIndices(c("chr", "int", "dbl"), TRUE)),
tbl %>% arrange(desc(chr), desc(int), desc(dbl))
)
})
diff --git a/r/tests/testthat/test-dataset-csv.R b/r/tests/testthat/test-dataset-csv.R
index df58f853a1..98858a7d16 100644
--- a/r/tests/testthat/test-dataset-csv.R
+++ b/r/tests/testthat/test-dataset-csv.R
@@ -91,7 +91,7 @@ test_that("CSV scan options", {
sb$FragmentScanOptions(options)
tab <- sb$Finish()$ToTable()
- expect_equal(as.data.frame(tab), tibble(chr = c("foo", NA)))
+ expect_equal_data_frame(tab, data.frame(chr = c("foo", NA)))
# Set default convert options in CsvFileFormat
csv_format <- CsvFileFormat$create(
diff --git a/r/tests/testthat/test-dataset.R b/r/tests/testthat/test-dataset.R
index 91b405fc01..b9972901a7 100644
--- a/r/tests/testthat/test-dataset.R
+++ b/r/tests/testthat/test-dataset.R
@@ -85,8 +85,8 @@ expect_scan_result <- function(ds, schm) {
tab <- scn$ToTable()
expect_r6_class(tab, "Table")
- expect_equal(
- as.data.frame(tab),
+ expect_equal_data_frame(
+ tab,
df1[8, c("chr", "lgl")]
)
}
@@ -806,19 +806,19 @@ test_that("head/tail", {
big_df <- rbind(df1, df2)
# No n provided (default is 6, all from one batch)
- expect_equal(as.data.frame(head(ds)), head(df1))
- expect_equal(as.data.frame(tail(ds)), tail(df2))
+ expect_equal_data_frame(head(ds), head(df1))
+ expect_equal_data_frame(tail(ds), tail(df2))
# n = 0: have to drop `fct` because factor levels don't come through from
# arrow when there are 0 rows
zero_df <- big_df[FALSE, names(big_df) != "fct"]
- expect_equal(as.data.frame(head(ds, 0))[, names(ds) != "fct"], zero_df)
- expect_equal(as.data.frame(tail(ds, 0))[, names(ds) != "fct"], zero_df)
+ expect_equal_data_frame(as.data.frame(head(ds, 0))[, names(ds) != "fct"], zero_df)
+ expect_equal_data_frame(as.data.frame(tail(ds, 0))[, names(ds) != "fct"], zero_df)
# Two more cases: more than 1 batch, and more than nrow
for (n in c(12, 1000)) {
- expect_equal(as.data.frame(head(ds, n)), head(big_df, n))
- expect_equal(as.data.frame(tail(ds, n)), tail(big_df, n))
+ expect_equal_data_frame(head(ds, n), head(big_df, n))
+ expect_equal_data_frame(tail(ds, n), tail(big_df, n))
}
expect_error(head(ds, -1)) # Not yet implemented
expect_error(tail(ds, -1)) # Not yet implemented
@@ -864,18 +864,18 @@ test_that("unique()", {
test_that("Dataset [ (take by index)", {
ds <- open_dataset(dataset_dir)
# Taking only from one file
- expect_equal(
- as.data.frame(ds[c(4, 5, 9), 3:4]),
+ expect_equal_data_frame(
+ ds[c(4, 5, 9), 3:4],
df1[c(4, 5, 9), 3:4]
)
# Taking from more than one
- expect_equal(
- as.data.frame(ds[c(4, 5, 9, 12, 13), 3:4]),
+ expect_equal_data_frame(
+ ds[c(4, 5, 9, 12, 13), 3:4],
rbind(df1[c(4, 5, 9), 3:4], df2[2:3, 3:4])
)
# Taking out of order
- expect_equal(
- as.data.frame(ds[c(4, 13, 9, 12, 5), ]),
+ expect_equal_data_frame(
+ ds[c(4, 13, 9, 12, 5), ],
rbind(
df1[4, ],
df2[3, ],
@@ -889,8 +889,8 @@ test_that("Dataset [ (take by index)", {
ds2 <- ds %>%
filter(int > 6) %>%
select(int, lgl)
- expect_equal(
- as.data.frame(ds2[c(2, 5), ]),
+ expect_equal_data_frame(
+ ds2[c(2, 5), ],
rbind(
df1[8, c("int", "lgl")],
df2[1, c("int", "lgl")]
@@ -957,7 +957,9 @@ test_that("Can delete filesystem dataset files after collection", {
write_dataset(ds0, dataset_dir2)
ds <- open_dataset(dataset_dir2)
- collected <- ds %>% arrange(int) %>% collect()
+ collected <- ds %>%
+ arrange(int) %>%
+ collect()
unlink(dataset_dir2, recursive = TRUE)
expect_false(dir.exists(dataset_dir2))
@@ -971,7 +973,11 @@ test_that("Can delete filesystem dataset files after collection", {
# dataset
write_dataset(ds0, dataset_dir2)
ds <- open_dataset(dataset_dir2)
- collected <- ds %>% arrange(int) %>% head() %>% arrange(int) %>% collect()
+ collected <- ds %>%
+ arrange(int) %>%
+ head() %>%
+ arrange(int) %>%
+ collect()
unlink(dataset_dir2, recursive = TRUE)
expect_false(dir.exists(dataset_dir2))
@@ -985,11 +991,11 @@ test_that("Scanner$ScanBatches", {
ds <- open_dataset(ipc_dir, format = "feather")
batches <- ds$NewScan()$Finish()$ScanBatches()
table <- Table$create(!!!batches)
- expect_equal(as.data.frame(table), rbind(df1, df2))
+ expect_equal_data_frame(table, rbind(df1, df2))
batches <- ds$NewScan()$Finish()$ScanBatches()
table <- Table$create(!!!batches)
- expect_equal(as.data.frame(table), rbind(df1, df2))
+ expect_equal_data_frame(table, rbind(df1, df2))
})
test_that("Scanner$ToRecordBatchReader()", {
@@ -1001,8 +1007,8 @@ test_that("Scanner$ToRecordBatchReader()", {
Scanner$create()
reader <- scan$ToRecordBatchReader()
expect_r6_class(reader, "RecordBatchReader")
- expect_identical(
- as.data.frame(reader$read_table()),
+ expect_equal_data_frame(
+ reader$read_table(),
df1[df1$int > 6, c("int", "lgl")]
)
})
diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R
index d47f923211..d59356ad65 100644
--- a/r/tests/testthat/test-dplyr-funcs-datetime.R
+++ b/r/tests/testthat/test-dplyr-funcs-datetime.R
@@ -310,7 +310,7 @@ test_that("timestamp round trip correctly via strftime and strptime", {
fmt2 <- paste(base_format2, fmt)
fmt <- paste(base_format, paste0("%", fmt))
test_df <- tibble::tibble(x = strftime(times, format = fmt))
- expect_equal(
+ expect_equal_data_frame(
test_df %>%
arrow_table() %>%
mutate(!!fmt := strptime(x, format = fmt2)) %>%
@@ -1028,7 +1028,7 @@ test_that("leap_year mirror lubridate", {
.input %>%
mutate(x = leap_year(test_year)) %>%
collect(),
- data.frame(
+ tibble::tibble(
test_year = as.Date(c(
"1998-01-01", # not leap year
"1996-01-01", # leap year (divide by 4 rule)
@@ -1048,7 +1048,9 @@ test_that("am/pm mirror lubridate", {
am2 = lubridate::am(test_time),
pm2 = lubridate::pm(test_time)
) %>%
- collect(),
+ # can't use collect() here due to how tibbles store datetimes
+ # TODO: add better explanation above
+ as.data.frame(),
data.frame(
test_time = strptime(
x = c(
diff --git a/r/tests/testthat/test-dplyr-query.R b/r/tests/testthat/test-dplyr-query.R
index e478d0e4c4..bab81a463e 100644
--- a/r/tests/testthat/test-dplyr-query.R
+++ b/r/tests/testthat/test-dplyr-query.R
@@ -119,14 +119,14 @@ test_that("collect(as_data_frame=FALSE)", {
# collect(as_data_frame = FALSE) always returns Table now
expect_r6_class(b2, "Table")
expected <- tbl[tbl$int > 5 & !is.na(tbl$int), c("int", "chr")]
- expect_equal(as.data.frame(b2), expected)
+ expect_equal_data_frame(b2, expected)
b3 <- batch %>%
select(int, strng = chr) %>%
filter(int > 5) %>%
collect(as_data_frame = FALSE)
expect_r6_class(b3, "Table")
- expect_equal(as.data.frame(b3), set_names(expected, c("int", "strng")))
+ expect_equal_data_frame(b3, set_names(expected, c("int", "strng")))
b4 <- batch %>%
select(int, strng = chr) %>%
@@ -134,8 +134,8 @@ test_that("collect(as_data_frame=FALSE)", {
group_by(int) %>%
collect(as_data_frame = FALSE)
expect_r6_class(b4, "Table")
- expect_equal(
- as.data.frame(b4),
+ expect_equal_data_frame(
+ b4,
expected %>%
rename(strng = chr) %>%
group_by(int)
@@ -156,14 +156,14 @@ test_that("compute()", {
expect_r6_class(b2, "Table")
expected <- tbl[tbl$int > 5 & !is.na(tbl$int), c("int", "chr")]
- expect_equal(as.data.frame(b2), expected)
+ expect_equal_data_frame(b2, expected)
b3 <- batch %>%
select(int, strng = chr) %>%
filter(int > 5) %>%
compute()
expect_r6_class(b3, "Table")
- expect_equal(as.data.frame(b3), set_names(expected, c("int", "strng")))
+ expect_equal_data_frame(b3, set_names(expected, c("int", "strng")))
b4 <- batch %>%
select(int, strng = chr) %>%
@@ -171,8 +171,8 @@ test_that("compute()", {
group_by(int) %>%
compute()
expect_r6_class(b4, "Table")
- expect_equal(
- as.data.frame(b4),
+ expect_equal_data_frame(
+ b4,
expected %>%
rename(strng = chr) %>%
group_by(int)
@@ -210,8 +210,7 @@ test_that("arrange then head returns the right data (ARROW-14162)", {
arrange(mpg, disp) %>%
head(4) %>%
collect(),
- mtcars,
- ignore_attr = "row.names"
+ tibble::as_tibble(mtcars)
)
})
@@ -222,8 +221,7 @@ test_that("arrange then tail returns the right data", {
arrange(mpg, disp) %>%
tail(4) %>%
collect(),
- mtcars,
- ignore_attr = "row.names"
+ tibble::as_tibble(mtcars)
)
})
@@ -559,8 +557,8 @@ test_that("compute() on a grouped query returns a Table with groups in metadata"
group_by(int) %>%
compute()
expect_r6_class(tab1, "Table")
- expect_equal(
- as.data.frame(tab1),
+ expect_equal_data_frame(
+ tab1,
tbl %>%
group_by(int)
)
diff --git a/r/tests/testthat/test-duckdb.R b/r/tests/testthat/test-duckdb.R
index 24e8cadf2e..409e99b70f 100644
--- a/r/tests/testthat/test-duckdb.R
+++ b/r/tests/testthat/test-duckdb.R
@@ -168,7 +168,8 @@ test_that("to_arrow roundtrip, with dataset", {
filter(int > 5 & part > 1) %>%
mutate(dbl_plus = dbl + 1) %>%
collect() %>%
- arrange(part, int)
+ arrange(part, int) %>%
+ as.data.frame()
)
})
diff --git a/r/tests/testthat/test-feather.R b/r/tests/testthat/test-feather.R
index 4caadc27c4..188a562fe8 100644
--- a/r/tests/testthat/test-feather.R
+++ b/r/tests/testthat/test-feather.R
@@ -177,7 +177,7 @@ test_that("feather read/write round trip", {
tab1 <- read_feather(feather_file, as_data_frame = FALSE)
expect_r6_class(tab1, "Table")
- expect_equal(tib, as.data.frame(tab1))
+ expect_equal_data_frame(tib, tab1)
})
test_that("Read feather from raw vector", {
diff --git a/r/tests/testthat/test-metadata.R b/r/tests/testthat/test-metadata.R
index 4cf8e49af1..e44cd71038 100644
--- a/r/tests/testthat/test-metadata.R
+++ b/r/tests/testthat/test-metadata.R
@@ -57,7 +57,7 @@ test_that("Table R metadata", {
"$r$columns$c$columns$c1$attributes$extra_attr",
fixed = TRUE
)
- expect_identical(as.data.frame(tab), example_with_metadata)
+ expect_equal_data_frame(tab, example_with_metadata)
})
test_that("R metadata is not stored for types that map to Arrow types (factor, Date, etc.)", {
@@ -94,7 +94,7 @@ test_that("Garbage R metadata doesn't break things", {
tab <- Table$create(example_data[1:6])
tab$metadata$r <- "garbage"
expect_warning(
- expect_identical(as.data.frame(tab), example_data[1:6]),
+ as.data.frame(tab),
"Invalid metadata$r",
fixed = TRUE
)
@@ -103,7 +103,7 @@ test_that("Garbage R metadata doesn't break things", {
tab <- Table$create(example_data[1:6])
tab$metadata$r <- rawToChar(serialize("garbage", NULL, ascii = TRUE))
expect_warning(
- expect_identical(as.data.frame(tab), example_data[1:6]),
+ as.data.frame(tab),
"Invalid metadata$r",
fixed = TRUE
)
@@ -164,7 +164,7 @@ test_that("RecordBatch metadata", {
})
test_that("RecordBatch R metadata", {
- expect_identical(as.data.frame(record_batch(example_with_metadata)), example_with_metadata)
+ expect_equal_data_frame(record_batch(example_with_metadata), example_with_metadata)
})
test_that("R metadata roundtrip via parquet", {
@@ -195,14 +195,14 @@ test_that("haven types roundtrip via feather", {
test_that("Date/time type roundtrip", {
rb <- record_batch(example_with_times)
expect_r6_class(rb$schema$posixlt$type, "VctrsExtensionType")
- expect_identical(as.data.frame(rb), example_with_times)
+ expect_equal_data_frame(rb, example_with_times)
})
test_that("metadata keeps attribute of top level data frame", {
df <- structure(data.frame(x = 1, y = 2), foo = "bar")
tab <- Table$create(df)
expect_identical(attr(as.data.frame(tab), "foo"), "bar")
- expect_identical(as.data.frame(tab), df)
+ expect_equal_data_frame(tab, df)
})
@@ -387,7 +387,18 @@ test_that("grouped_df non-arrow metadata is preserved", {
grouped_tab <- arrow_table(grouped)
expect_equal(
- attributes(as.data.frame(grouped_tab))$other_metadata,
+ attributes(collect.ArrowTabular(grouped_tab))$other_metadata,
"look I'm still here!"
)
})
+
+test_that("data.frame class attribute is not saved", {
+ df <- data.frame(x = 1:5)
+ df_arrow <- arrow_table(df)
+ expect_null(df_arrow$r_metadata$attributes)
+
+ df <- data.frame(x = 1:5)
+ attributes(df)$foo <- "bar"
+ df_arrow <- arrow_table(df)
+ expect_identical(df_arrow$r_metadata, list(attributes = list(foo = "bar"), columns = list(x = NULL)))
+})
diff --git a/r/tests/testthat/test-na-omit.R b/r/tests/testthat/test-na-omit.R
index c2d0fd1b71..cfc71445d4 100644
--- a/r/tests/testthat/test-na-omit.R
+++ b/r/tests/testthat/test-na-omit.R
@@ -42,8 +42,8 @@ test_that("na.fail on Array and ChunkedArray", {
test_that("na.omit on Table", {
tbl <- Table$create(example_data)
- expect_equal(
- as.data.frame(na.omit(tbl)),
+ expect_equal_data_frame(
+ na.omit(tbl),
na.omit(example_data),
# We don't include an attribute with the rows omitted
ignore_attr = "na.action"
@@ -52,8 +52,8 @@ test_that("na.omit on Table", {
test_that("na.exclude on Table", {
tbl <- Table$create(example_data)
- expect_equal(
- as.data.frame(na.exclude(tbl)),
+ expect_equal_data_frame(
+ na.exclude(tbl),
na.exclude(example_data),
ignore_attr = "na.action"
)
@@ -66,8 +66,8 @@ test_that("na.fail on Table", {
test_that("na.omit on RecordBatch", {
batch <- record_batch(example_data)
- expect_equal(
- as.data.frame(na.omit(batch)),
+ expect_equal_data_frame(
+ na.omit(batch),
na.omit(example_data),
ignore_attr = "na.action"
)
@@ -75,8 +75,8 @@ test_that("na.omit on RecordBatch", {
test_that("na.exclude on RecordBatch", {
batch <- record_batch(example_data)
- expect_equal(
- as.data.frame(na.exclude(batch)),
+ expect_equal_data_frame(
+ na.exclude(batch),
na.omit(example_data),
ignore_attr = "na.action"
)
diff --git a/r/tests/testthat/test-python-flight.R b/r/tests/testthat/test-python-flight.R
index d2b6fd491e..2e60957d2f 100644
--- a/r/tests/testthat/test-python-flight.R
+++ b/r/tests/testthat/test-python-flight.R
@@ -53,13 +53,13 @@ if (process_is_running("demo_flight_server")) {
})
test_that("flight_get", {
- expect_identical(as.data.frame(flight_get(client, flight_obj)), example_data)
+ expect_equal_data_frame(flight_get(client, flight_obj), example_data)
})
test_that("flight_put with RecordBatch", {
flight_obj2 <- tempfile()
flight_put(client, RecordBatch$create(example_data), path = flight_obj2)
- expect_identical(as.data.frame(flight_get(client, flight_obj2)), example_data)
+ expect_equal_data_frame(flight_get(client, flight_obj2), example_data)
})
test_that("flight_put with overwrite = FALSE", {
@@ -69,7 +69,7 @@ if (process_is_running("demo_flight_server")) {
)
# Default is TRUE so this will overwrite
flight_put(client, example_with_times, path = flight_obj)
- expect_identical(as.data.frame(flight_get(client, flight_obj)), example_with_times)
+ expect_equal_data_frame(flight_get(client, flight_obj), example_with_times)
})
test_that("flight_disconnect", {
diff --git a/r/tests/testthat/test-python.R b/r/tests/testthat/test-python.R
index 968d72119c..58eb84f5d6 100644
--- a/r/tests/testthat/test-python.R
+++ b/r/tests/testthat/test-python.R
@@ -108,7 +108,7 @@ test_that("RecordBatch with metadata roundtrip", {
expect_identical(rbatch$metadata, batch$metadata)
expect_equal(rbatch$a, batch$a)
expect_equal(rbatch[c("b", "c", "d")], batch[c("b", "c", "d")])
- expect_identical(as.data.frame(rbatch), example_with_metadata)
+ expect_equal_data_frame(rbatch, example_with_metadata)
})
test_that("Table with metadata roundtrip", {
@@ -123,7 +123,7 @@ test_that("Table with metadata roundtrip", {
expect_identical(rtab$metadata, tab$metadata)
expect_equal(rtab$a, tab$a)
expect_equal(rtab[c("b", "c", "d")], tab[c("b", "c", "d")])
- expect_identical(as.data.frame(rtab), example_with_metadata)
+ expect_equal_data_frame(rtab, example_with_metadata)
})
test_that("DataType roundtrip", {
@@ -160,8 +160,8 @@ test_that("RecordBatchReader to python", {
expect_s3_class(pytab, "pyarrow.lib.Table")
back_to_r <- reticulate::py_to_r(pytab)
expect_r6_class(back_to_r, "Table")
- expect_identical(
- as.data.frame(back_to_r),
+ expect_equal_data_frame(
+ back_to_r,
example_data %>%
select(int, lgl) %>%
filter(int > 6)
@@ -178,7 +178,7 @@ test_that("RecordBatchReader from python", {
back_to_r <- reticulate::py_to_r(pyreader)
rt_table <- back_to_r$read_table()
expect_r6_class(rt_table, "Table")
- expect_identical(as.data.frame(rt_table), example_data)
+ expect_equal_data_frame(rt_table, example_data)
scan <- Scanner$create(tab)
reader <- scan$ToRecordBatchReader()
diff --git a/r/tests/testthat/test-read-write.R b/r/tests/testthat/test-read-write.R
index 66f6db56d9..9475788ee5 100644
--- a/r/tests/testthat/test-read-write.R
+++ b/r/tests/testthat/test-read-write.R
@@ -119,7 +119,7 @@ test_that("reading/writing a raw vector (sparklyr integration)", {
}
bytes <- write_to_raw(example_data)
expect_type(bytes, "raw")
- expect_identical(read_from_raw_test(bytes), example_data)
+ expect_equal_data_frame(read_from_raw_test(bytes), example_data)
# this could just be `read_ipc_stream(x)`; propose that
- expect_identical(read_ipc_stream(bytes), example_data)
+ expect_equal_data_frame(read_ipc_stream(bytes), example_data)
})
diff --git a/r/tests/testthat/test-utf.R b/r/tests/testthat/test-utf.R
index f7553da5b4..660b2a4784 100644
--- a/r/tests/testthat/test-utf.R
+++ b/r/tests/testthat/test-utf.R
@@ -45,24 +45,24 @@ test_that("We handle non-UTF strings", {
expect_identical(as.vector(ChunkedArray$create(df)), df)
# Table (including field name)
- expect_identical(as.data.frame(Table$create(df)), df)
- expect_identical(as.data.frame(Table$create(df_struct)), df_struct)
+ expect_equal_data_frame(Table$create(df), df)
+ expect_equal_data_frame(Table$create(df_struct), df_struct)
# RecordBatch
- expect_identical(as.data.frame(record_batch(df)), df)
- expect_identical(as.data.frame(record_batch(df_struct)), df_struct)
+ expect_equal_data_frame(record_batch(df), df)
+ expect_equal_data_frame(record_batch(df_struct), df_struct)
# Schema field name
- df_schema <- do.call(schema, raw_schema)
+ df_schema <- schema(raw_schema)
expect_identical(names(df_schema), names(df))
df_struct_schema <- schema(a = do.call(struct, raw_schema))
# Create table/batch with schema
- expect_identical(as.data.frame(Table$create(df, schema = df_schema)), df)
- expect_identical(as.data.frame(Table$create(df_struct, schema = df_struct_schema)), df_struct)
- expect_identical(as.data.frame(record_batch(df, schema = df_schema)), df)
- expect_identical(as.data.frame(record_batch(df_struct, schema = df_struct_schema)), df_struct)
+ expect_equal_data_frame(Table$create(df, schema = df_schema), df)
+ expect_equal_data_frame(Table$create(df_struct, schema = df_struct_schema), df_struct)
+ expect_equal_data_frame(record_batch(df, schema = df_schema), df)
+ expect_equal_data_frame(record_batch(df_struct, schema = df_struct_schema), df_struct)
# Serialization
feather_file <- tempfile()