You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by th...@apache.org on 2023/05/03 14:53:43 UTC
[arrow] branch main updated: GH-34775: [R] arrow_table: as.data.frame() sometimes returns a tbl and sometimes a data.frame (#35173)

This is an automated email from the ASF dual-hosted git repository.

thisisnic pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 205ceb9938 GH-34775: [R] arrow_table: as.data.frame() sometimes returns a tbl and sometimes a data.frame (#35173)
205ceb9938 is described below

commit 205ceb99389d2d5060551ebcba17635185c74d9d
Author: Nic Crane <th...@gmail.com>
AuthorDate: Wed May 3 16:53:35 2023 +0200

    GH-34775: [R] arrow_table: as.data.frame() sometimes returns a tbl and sometimes a data.frame (#35173)
    
    Features of this PR:
    * Ensures that calling `as.data.frame()` on Arrow objects returns base R `data.frame` objects.
    * Drops the `class` attribute metadata of input objects of `data.frame` class (i.e. that don't have inherit from any additional classes other than `data.frame`).  This results in us sacrificing roundtrip class fidelity for `data.frame` objects (i.e. if we input a base R data.frame, convert it to an Arrow Table, and then convert it back to R, we get a tibble).  However, we now have consistency in the type of returned objects, retain roundtrip fidelity for other (non-class) metadata, an [...]
    * Implements `dplyr::collect()` for StructArrays so that these objects can still be returned as tibbles if needed.
    * Renames `expect_data_frame()` to `expect_equal_data_frame()` for clarity, and updates it to convert both the object and expected object to data.frames.
    
    * Closes: #34775
    
    Authored-by: Nic Crane <th...@gmail.com>
    Signed-off-by: Nic Crane <th...@gmail.com>
---
 r/R/array.R                                  |   2 +-
 r/R/arrow-tabular.R                          |   3 +-
 r/R/csv.R                                    |   2 +-
 r/R/dplyr-collect.R                          |   7 +-
 r/R/dplyr.R                                  |   3 +-
 r/R/feather.R                                |   3 +-
 r/R/ipc-stream.R                             |   2 +-
 r/R/json.R                                   |   2 +-
 r/R/metadata.R                               |   9 +++
 r/R/parquet.R                                |   2 +-
 r/tests/testthat/helper-expectation.R        |   5 +-
 r/tests/testthat/test-RecordBatch.R          |  83 ++++++++++----------
 r/tests/testthat/test-Table.R                | 110 +++++++++++++++------------
 r/tests/testthat/test-compute-aggregate.R    |   2 +-
 r/tests/testthat/test-compute-sort.R         |   8 +-
 r/tests/testthat/test-dataset-csv.R          |   2 +-
 r/tests/testthat/test-dataset.R              |  50 ++++++------
 r/tests/testthat/test-dplyr-funcs-datetime.R |   8 +-
 r/tests/testthat/test-dplyr-query.R          |  26 +++----
 r/tests/testthat/test-duckdb.R               |   3 +-
 r/tests/testthat/test-feather.R              |   2 +-
 r/tests/testthat/test-metadata.R             |  25 ++++--
 r/tests/testthat/test-na-omit.R              |  16 ++--
 r/tests/testthat/test-python-flight.R        |   6 +-
 r/tests/testthat/test-python.R               |  10 +--
 r/tests/testthat/test-read-write.R           |   4 +-
 r/tests/testthat/test-utf.R                  |  18 ++---
 27 files changed, 229 insertions(+), 184 deletions(-)

diff --git a/r/R/array.R b/r/R/array.R
index 109f6daaa2..3e9e0eae1a 100644
--- a/r/R/array.R
+++ b/r/R/array.R
@@ -474,7 +474,7 @@ dim.StructArray <- function(x, ...) c(length(x), x$type$num_fields)
 
 #' @export
 as.data.frame.StructArray <- function(x, row.names = NULL, optional = FALSE, ...) {
-  as.vector(x)
+  as.data.frame(collect.StructArray(x), row.names = row.names, optional = optional, ...)
 }
 
 #' @rdname array
diff --git a/r/R/arrow-tabular.R b/r/R/arrow-tabular.R
index ae68cc2118..e62547d291 100644
--- a/r/R/arrow-tabular.R
+++ b/r/R/arrow-tabular.R
@@ -94,7 +94,8 @@ ArrowTabular <- R6Class("ArrowTabular",
 #' @export
 as.data.frame.ArrowTabular <- function(x, row.names = NULL, optional = FALSE, ...) {
   df <- x$to_data_frame()
-  apply_arrow_r_metadata(df, x$metadata$r)
+  out <- apply_arrow_r_metadata(df, x$metadata$r)
+  as.data.frame(out, row.names = row.names, optional = optional, ...)
 }
 
 #' @export
diff --git a/r/R/csv.R b/r/R/csv.R
index 8224323866..88008bd682 100644
--- a/r/R/csv.R
+++ b/r/R/csv.R
@@ -248,7 +248,7 @@ read_delim_arrow <- function(file,
   }
 
   if (isTRUE(as_data_frame)) {
-    tab <- as.data.frame(tab)
+    tab <- collect.ArrowTabular(tab)
   }
 
   tab
diff --git a/r/R/dplyr-collect.R b/r/R/dplyr-collect.R
index 9205a31b14..970722e86a 100644
--- a/r/R/dplyr-collect.R
+++ b/r/R/dplyr-collect.R
@@ -24,7 +24,8 @@ collect.arrow_dplyr_query <- function(x, as_data_frame = TRUE, ...) {
 }
 collect.ArrowTabular <- function(x, as_data_frame = TRUE, ...) {
   if (as_data_frame) {
-    as.data.frame(x, ...)
+    df <- x$to_data_frame()
+    apply_arrow_r_metadata(df, x$metadata$r)
   } else {
     x
   }
@@ -34,6 +35,10 @@ collect.Dataset <- function(x, as_data_frame = TRUE, ...) {
 }
 collect.RecordBatchReader <- collect.Dataset
 
+collect.StructArray <- function(x, row.names = NULL, optional = FALSE, ...) {
+  as.vector(x)
+}
+
 compute.ArrowTabular <- function(x, ...) x
 compute.arrow_dplyr_query <- function(x, ...) {
   # TODO: should this tryCatch move down into as_arrow_table()?
diff --git a/r/R/dplyr.R b/r/R/dplyr.R
index 54ecc80aad..62b345e1ce 100644
--- a/r/R/dplyr.R
+++ b/r/R/dplyr.R
@@ -216,7 +216,8 @@ unique.RecordBatchReader <- unique.arrow_dplyr_query
 
 #' @export
 as.data.frame.arrow_dplyr_query <- function(x, row.names = NULL, optional = FALSE, ...) {
-  collect.arrow_dplyr_query(x, as_data_frame = TRUE, ...)
+  out <- collect.arrow_dplyr_query(x, as_data_frame = TRUE, ...)
+  as.data.frame(out)
 }
 
 #' @export
diff --git a/r/R/feather.R b/r/R/feather.R
index 1488db29eb..24971669fc 100644
--- a/r/R/feather.R
+++ b/r/R/feather.R
@@ -196,7 +196,8 @@ read_feather <- function(file, col_select = NULL, as_data_frame = TRUE, mmap = T
   )
 
   if (isTRUE(as_data_frame)) {
-    out <- as.data.frame(out)
+    df <- out$to_data_frame()
+    out <- apply_arrow_r_metadata(df, out$metadata$r)
   }
   out
 }
diff --git a/r/R/ipc-stream.R b/r/R/ipc-stream.R
index f0b4a6aae0..7144132393 100644
--- a/r/R/ipc-stream.R
+++ b/r/R/ipc-stream.R
@@ -106,7 +106,7 @@ read_ipc_stream <- function(file, as_data_frame = TRUE, ...) {
   # https://issues.apache.org/jira/browse/ARROW-6830
   out <- RecordBatchStreamReader$create(file)$read_table()
   if (as_data_frame) {
-    out <- as.data.frame(out)
+    out <- collect.ArrowTabular(out)
   }
   out
 }
diff --git a/r/R/json.R b/r/R/json.R
index cdbe850b32..e8131b37f2 100644
--- a/r/R/json.R
+++ b/r/R/json.R
@@ -84,7 +84,7 @@ read_json_arrow <- function(file,
   }
 
   if (isTRUE(as_data_frame)) {
-    tab <- as.data.frame(tab)
+    tab <- collect.ArrowTabular(tab)
   }
   tab
 }
diff --git a/r/R/metadata.R b/r/R/metadata.R
index 6a54b3e384..3ae2db4eaa 100644
--- a/r/R/metadata.R
+++ b/r/R/metadata.R
@@ -22,6 +22,14 @@
   # drop problems attributes (most likely from readr)
   x[["attributes"]][["problems"]] <- NULL
 
+  # remove the class if it's just data.frame
+  if (identical(x$attributes$class, "data.frame")) {
+    x$attributes <- x$attributes[names(x$attributes) != "class"]
+    if (is_empty(x$attributes)) {
+      x <- x[names(x) != "attributes"]
+    }
+  }
+
   out <- serialize(x, NULL, ascii = TRUE)
 
   # if the metadata is over 100 kB, compress
@@ -62,6 +70,7 @@ apply_arrow_r_metadata <- function(x, r_metadata) {
     expr = {
       columns_metadata <- r_metadata$columns
       if (is.data.frame(x)) {
+        # if columns metadata exists, apply it here
         if (length(names(x)) && !is.null(columns_metadata)) {
           for (name in intersect(names(columns_metadata), names(x))) {
             x[[name]] <- apply_arrow_r_metadata(x[[name]], columns_metadata[[name]])
diff --git a/r/R/parquet.R b/r/R/parquet.R
index f3d384e8c2..1335e85219 100644
--- a/r/R/parquet.R
+++ b/r/R/parquet.R
@@ -70,7 +70,7 @@ read_parquet <- function(file,
   }
 
   if (as_data_frame) {
-    tab <- as.data.frame(tab)
+    tab <- collect.ArrowTabular(tab)
   }
   tab
 }
diff --git a/r/tests/testthat/helper-expectation.R b/r/tests/testthat/helper-expectation.R
index 303a96ead7..090ed36aa7 100644
--- a/r/tests/testthat/helper-expectation.R
+++ b/r/tests/testthat/helper-expectation.R
@@ -19,8 +19,9 @@ expect_as_vector <- function(x, y, ...) {
   expect_equal(as.vector(x), y, ...)
 }
 
-expect_data_frame <- function(x, y, ...) {
-  expect_equal(as.data.frame(x), y, ...)
+# expect both objects to contain equal values when converted to data.frame objects
+expect_equal_data_frame <- function(x, y, ...) {
+  expect_equal(as.data.frame(x), as.data.frame(y), ...)
 }
 
 expect_r6_class <- function(object, class) {
diff --git a/r/tests/testthat/test-RecordBatch.R b/r/tests/testthat/test-RecordBatch.R
index 7e7084542d..f29b75dbf4 100644
--- a/r/tests/testthat/test-RecordBatch.R
+++ b/r/tests/testthat/test-RecordBatch.R
@@ -89,7 +89,7 @@ test_that("RecordBatch", {
     schema(dbl = float64(), lgl = boolean(), chr = utf8(), fct = dictionary(int8(), utf8()))
   )
   expect_equal(batch2$column(0), batch$column(1))
-  expect_data_frame(batch2, tbl[, -1])
+  expect_equal_data_frame(batch2, tbl[, -1])
 
   # input validation
   expect_error(batch$RemoveColumn(NA), "'i' cannot be NA")
@@ -109,10 +109,10 @@ test_that("RecordBatch S3 methods", {
 
 test_that("RecordBatch$Slice", {
   batch3 <- batch$Slice(5)
-  expect_data_frame(batch3, tbl[6:10, ])
+  expect_equal_data_frame(batch3, tbl[6:10, ])
 
   batch4 <- batch$Slice(5, 2)
-  expect_data_frame(batch4, tbl[6:7, ])
+  expect_equal_data_frame(batch4, tbl[6:7, ])
 
   # Input validation
   expect_error(batch$Slice("ten"))
@@ -131,20 +131,20 @@ test_that("RecordBatch$Slice", {
 })
 
 test_that("[ on RecordBatch", {
-  expect_data_frame(batch[6:7, ], tbl[6:7, ])
-  expect_data_frame(batch[c(6, 7), ], tbl[6:7, ])
-  expect_data_frame(batch[6:7, 2:4], tbl[6:7, 2:4])
-  expect_data_frame(batch[, c("dbl", "fct")], tbl[, c(2, 5)])
+  expect_equal_data_frame(batch[6:7, ], tbl[6:7, ])
+  expect_equal_data_frame(batch[c(6, 7), ], tbl[6:7, ])
+  expect_equal_data_frame(batch[6:7, 2:4], tbl[6:7, 2:4])
+  expect_equal_data_frame(batch[, c("dbl", "fct")], tbl[, c(2, 5)])
   expect_identical(as.vector(batch[, "chr", drop = TRUE]), tbl$chr)
-  expect_data_frame(batch[c(7, 3, 5), 2:4], tbl[c(7, 3, 5), 2:4])
-  expect_data_frame(
+  expect_equal_data_frame(batch[c(7, 3, 5), 2:4], tbl[c(7, 3, 5), 2:4])
+  expect_equal_data_frame(
     batch[rep(c(FALSE, TRUE), 5), ],
     tbl[c(2, 4, 6, 8, 10), ]
   )
   # bool Array
-  expect_data_frame(batch[batch$lgl, ], tbl[tbl$lgl, ])
+  expect_equal_data_frame(batch[batch$lgl, ], tbl[tbl$lgl, ])
   # int Array
-  expect_data_frame(batch[Array$create(5:6), 2:4], tbl[6:7, 2:4])
+  expect_equal_data_frame(batch[Array$create(5:6), 2:4], tbl[6:7, 2:4])
 
   # input validation
   expect_error(batch[, c("dbl", "NOTACOLUMN")], 'Column not found: "NOTACOLUMN"')
@@ -176,15 +176,15 @@ test_that("[[<- assignment", {
 
   # can remove a column
   batch[["chr"]] <- NULL
-  expect_data_frame(batch, tbl[-4])
+  expect_equal_data_frame(batch, tbl[-4])
 
   # can remove a column by index
   batch[[4]] <- NULL
-  expect_data_frame(batch, tbl[1:3])
+  expect_equal_data_frame(batch, tbl[1:3])
 
   # can add a named column
   batch[["new"]] <- letters[10:1]
-  expect_data_frame(batch, dplyr::bind_cols(tbl[1:3], new = letters[10:1]))
+  expect_equal_data_frame(batch, dplyr::bind_cols(tbl[1:3], new = letters[10:1]))
 
   # can replace a column by index
   batch[[2]] <- as.numeric(10:1)
@@ -239,16 +239,16 @@ test_that("head and tail on RecordBatch", {
     fct = factor(letters[1:10])
   )
   batch <- RecordBatch$create(tbl)
-  expect_data_frame(head(batch), head(tbl))
-  expect_data_frame(head(batch, 4), head(tbl, 4))
-  expect_data_frame(head(batch, 40), head(tbl, 40))
-  expect_data_frame(head(batch, -4), head(tbl, -4))
-  expect_data_frame(head(batch, -40), head(tbl, -40))
-  expect_data_frame(tail(batch), tail(tbl))
-  expect_data_frame(tail(batch, 4), tail(tbl, 4))
-  expect_data_frame(tail(batch, 40), tail(tbl, 40))
-  expect_data_frame(tail(batch, -4), tail(tbl, -4))
-  expect_data_frame(tail(batch, -40), tail(tbl, -40))
+  expect_equal_data_frame(head(batch), head(tbl))
+  expect_equal_data_frame(head(batch, 4), head(tbl, 4))
+  expect_equal_data_frame(head(batch, 40), head(tbl, 40))
+  expect_equal_data_frame(head(batch, -4), head(tbl, -4))
+  expect_equal_data_frame(head(batch, -40), head(tbl, -40))
+  expect_equal_data_frame(tail(batch), tail(tbl))
+  expect_equal_data_frame(tail(batch, 4), tail(tbl, 4))
+  expect_equal_data_frame(tail(batch, 40), tail(tbl, 40))
+  expect_equal_data_frame(tail(batch, -4), tail(tbl, -4))
+  expect_equal_data_frame(tail(batch, -40), tail(tbl, -40))
 })
 
 test_that("RecordBatch print method", {
@@ -346,8 +346,8 @@ test_that("record_batch() handles data frame columns", {
       b = struct(x = int32(), y = int32())
     )
   )
-  out <- as.data.frame(batch)
-  expect_equal(out, tibble::tibble(a = 1:10, b = tib))
+
+  expect_equal_data_frame(batch, tibble::tibble(a = 1:10, b = tib))
 
   # if not named, columns from tib are auto spliced
   batch2 <- record_batch(a = 1:10, tib)
@@ -355,8 +355,8 @@ test_that("record_batch() handles data frame columns", {
     batch2$schema,
     schema(a = int32(), x = int32(), y = int32())
   )
-  out <- as.data.frame(batch2)
-  expect_equal(out, tibble::tibble(a = 1:10, !!!tib))
+
+  expect_equal_data_frame(batch2, tibble::tibble(a = 1:10, !!!tib))
 })
 
 test_that("record_batch() handles data frame columns with schema spec", {
@@ -366,8 +366,7 @@ test_that("record_batch() handles data frame columns with schema spec", {
   schema <- schema(a = int32(), b = struct(x = int16(), y = float64()))
   batch <- record_batch(a = 1:10, b = tib, schema = schema)
   expect_equal(batch$schema, schema)
-  out <- as.data.frame(batch)
-  expect_equal(out, tibble::tibble(a = 1:10, b = tib_float))
+  expect_equal_data_frame(batch, tibble::tibble(a = 1:10, b = tib_float))
 
   schema <- schema(a = int32(), b = struct(x = int16(), y = utf8()))
   expect_error(record_batch(a = 1:10, b = tib, schema = schema))
@@ -379,15 +378,15 @@ test_that("record_batch() auto splices (ARROW-5718)", {
   batch2 <- record_batch(!!!df)
   expect_equal(batch1, batch2)
   expect_equal(batch1$schema, schema(x = int32(), y = utf8()))
-  expect_data_frame(batch1, df)
+  expect_equal_data_frame(batch1, df)
 
   batch3 <- record_batch(df, z = 1:10)
   batch4 <- record_batch(!!!df, z = 1:10)
   expect_equal(batch3, batch4)
   expect_equal(batch3$schema, schema(x = int32(), y = utf8(), z = int32()))
-  expect_equal(
-    as.data.frame(batch3),
-    tibble::as_tibble(cbind(df, data.frame(z = 1:10)))
+  expect_equal_data_frame(
+    batch3,
+    cbind(df, data.frame(z = 1:10))
   )
 
   s <- schema(x = float64(), y = utf8())
@@ -395,16 +394,16 @@ test_that("record_batch() auto splices (ARROW-5718)", {
   batch6 <- record_batch(!!!df, schema = s)
   expect_equal(batch5, batch6)
   expect_equal(batch5$schema, s)
-  expect_equal(as.data.frame(batch5), df)
+  expect_equal_data_frame(batch5, df)
 
   s2 <- schema(x = float64(), y = utf8(), z = int16())
   batch7 <- record_batch(df, z = 1:10, schema = s2)
   batch8 <- record_batch(!!!df, z = 1:10, schema = s2)
   expect_equal(batch7, batch8)
   expect_equal(batch7$schema, s2)
-  expect_equal(
-    as.data.frame(batch7),
-    tibble::as_tibble(cbind(df, data.frame(z = 1:10)))
+  expect_equal_data_frame(
+    batch7,
+    cbind(df, data.frame(z = 1:10))
   )
 })
 
@@ -425,24 +424,24 @@ test_that("record_batch() handles null type (ARROW-7064)", {
 })
 
 test_that("record_batch() scalar recycling with vectors", {
-  expect_data_frame(
+  expect_equal_data_frame(
     record_batch(a = 1:10, b = 5),
     tibble::tibble(a = 1:10, b = 5)
   )
 })
 
 test_that("record_batch() scalar recycling with Scalars, Arrays, and ChunkedArrays", {
-  expect_data_frame(
+  expect_equal_data_frame(
     record_batch(a = Array$create(1:10), b = Scalar$create(5)),
     tibble::tibble(a = 1:10, b = 5)
   )
 
-  expect_data_frame(
+  expect_equal_data_frame(
     record_batch(a = Array$create(1:10), b = Array$create(5)),
     tibble::tibble(a = 1:10, b = 5)
   )
 
-  expect_data_frame(
+  expect_equal_data_frame(
     record_batch(a = Array$create(1:10), b = ChunkedArray$create(5)),
     tibble::tibble(a = 1:10, b = 5)
   )
diff --git a/r/tests/testthat/test-Table.R b/r/tests/testthat/test-Table.R
index 233705323e..ce3254a158 100644
--- a/r/tests/testthat/test-Table.R
+++ b/r/tests/testthat/test-Table.R
@@ -68,26 +68,26 @@ tab <- Table$create(tbl)
 test_that("[, [[, $ for Table", {
   expect_identical(names(tab), names(tbl))
 
-  expect_data_frame(tab[6:7, ], tbl[6:7, ])
-  expect_data_frame(tab[6:7, 2:4], tbl[6:7, 2:4])
-  expect_data_frame(tab[, c("dbl", "fct")], tbl[, c(2, 5)])
+  expect_equal_data_frame(tab[6:7, ], tbl[6:7, ])
+  expect_equal_data_frame(tab[6:7, 2:4], tbl[6:7, 2:4])
+  expect_equal_data_frame(tab[, c("dbl", "fct")], tbl[, c(2, 5)])
   expect_as_vector(tab[, "chr", drop = TRUE], tbl$chr)
   # Take within a single chunk
-  expect_data_frame(tab[c(7, 3, 5), 2:4], tbl[c(7, 3, 5), 2:4])
-  expect_data_frame(tab[rep(c(FALSE, TRUE), 5), ], tbl[c(2, 4, 6, 8, 10), ])
+  expect_equal_data_frame(tab[c(7, 3, 5), 2:4], tbl[c(7, 3, 5), 2:4])
+  expect_equal_data_frame(tab[rep(c(FALSE, TRUE), 5), ], tbl[c(2, 4, 6, 8, 10), ])
   # bool ChunkedArray (with one chunk)
-  expect_data_frame(tab[tab$lgl, ], tbl[tbl$lgl, ])
+  expect_equal_data_frame(tab[tab$lgl, ], tbl[tbl$lgl, ])
   # ChunkedArray with multiple chunks
   c1 <- c(TRUE, FALSE, TRUE, TRUE, FALSE)
   c2 <- c(FALSE, FALSE, TRUE, TRUE, FALSE)
   ca <- ChunkedArray$create(c1, c2)
-  expect_data_frame(tab[ca, ], tbl[c(1, 3, 4, 8, 9), ])
+  expect_equal_data_frame(tab[ca, ], tbl[c(1, 3, 4, 8, 9), ])
   # int Array
-  expect_data_frame(tab[Array$create(5:6), 2:4], tbl[6:7, 2:4])
+  expect_equal_data_frame(tab[Array$create(5:6), 2:4], tbl[6:7, 2:4])
   # ChunkedArray
-  expect_data_frame(tab[ChunkedArray$create(5L, 6L), 2:4], tbl[6:7, 2:4])
+  expect_equal_data_frame(tab[ChunkedArray$create(5L, 6L), 2:4], tbl[6:7, 2:4])
   # Expression
-  expect_data_frame(tab[tab$int > 6, ], tbl[tbl$int > 6, ])
+  expect_equal_data_frame(tab[tab$int > 6, ], tbl[tbl$int > 6, ])
 
   expect_as_vector(tab[["int"]], tbl$int)
   expect_as_vector(tab$int, tbl$int)
@@ -95,9 +95,9 @@ test_that("[, [[, $ for Table", {
   expect_null(tab$qwerty)
   expect_null(tab[["asdf"]])
   # List-like column slicing
-  expect_data_frame(tab[2:4], tbl[2:4])
-  expect_data_frame(tab[c(2, 1)], tbl[c(2, 1)])
-  expect_data_frame(tab[-3], tbl[-3])
+  expect_equal_data_frame(tab[2:4], tbl[2:4])
+  expect_equal_data_frame(tab[c(2, 1)], tbl[c(2, 1)])
+  expect_equal_data_frame(tab[-3], tbl[-3])
 
   expect_error(tab[[c(4, 3)]])
   expect_error(tab[[NA]], "'i' must be character or numeric, not logical")
@@ -112,21 +112,21 @@ test_that("[, [[, $ for Table", {
   expect_error(tab[, c(6, NA)], "Column indices cannot be NA")
 
   skip("Table with 0 cols doesn't know how many rows it should have")
-  expect_data_frame(tab[0], tbl[0])
+  expect_equal_data_frame(tab[0], tbl[0])
 })
 
 test_that("[[<- assignment", {
   # can remove a column
   tab[["chr"]] <- NULL
-  expect_data_frame(tab, tbl[-4])
+  expect_equal_data_frame(tab, tbl[-4])
 
   # can remove a column by index
   tab[[4]] <- NULL
-  expect_data_frame(tab, tbl[1:3])
+  expect_equal_data_frame(tab, tbl[1:3])
 
   # can add a named column
   tab[["new"]] <- letters[10:1]
-  expect_data_frame(tab, dplyr::bind_cols(tbl[1:3], new = letters[10:1]))
+  expect_equal_data_frame(tab, dplyr::bind_cols(tbl[1:3], new = letters[10:1]))
 
   # can replace a column by index
   tab[[2]] <- as.numeric(10:1)
@@ -177,10 +177,10 @@ test_that("[[<- assignment", {
 
 test_that("Table$Slice", {
   tab2 <- tab$Slice(5)
-  expect_data_frame(tab2, tbl[6:10, ])
+  expect_equal_data_frame(tab2, tbl[6:10, ])
 
   tab3 <- tab$Slice(5, 2)
-  expect_data_frame(tab3, tbl[6:7, ])
+  expect_equal_data_frame(tab3, tbl[6:7, ])
 
   # Input validation
   expect_error(tab$Slice("ten"))
@@ -199,16 +199,16 @@ test_that("Table$Slice", {
 })
 
 test_that("head and tail on Table", {
-  expect_data_frame(head(tab), head(tbl))
-  expect_data_frame(head(tab, 4), head(tbl, 4))
-  expect_data_frame(head(tab, 40), head(tbl, 40))
-  expect_data_frame(head(tab, -4), head(tbl, -4))
-  expect_data_frame(head(tab, -40), head(tbl, -40))
-  expect_data_frame(tail(tab), tail(tbl))
-  expect_data_frame(tail(tab, 4), tail(tbl, 4))
-  expect_data_frame(tail(tab, 40), tail(tbl, 40))
-  expect_data_frame(tail(tab, -4), tail(tbl, -4))
-  expect_data_frame(tail(tab, -40), tail(tbl, -40))
+  expect_equal_data_frame(head(tab), head(tbl))
+  expect_equal_data_frame(head(tab, 4), head(tbl, 4))
+  expect_equal_data_frame(head(tab, 40), head(tbl, 40))
+  expect_equal_data_frame(head(tab, -4), head(tbl, -4))
+  expect_equal_data_frame(head(tab, -40), head(tbl, -40))
+  expect_equal_data_frame(tail(tab), tail(tbl))
+  expect_equal_data_frame(tail(tab, 4), tail(tbl, 4))
+  expect_equal_data_frame(tail(tab, 40), tail(tbl, 40))
+  expect_equal_data_frame(tail(tab, -4), tail(tbl, -4))
+  expect_equal_data_frame(tail(tab, -40), tail(tbl, -40))
 })
 
 test_that("Table print method", {
@@ -265,10 +265,9 @@ test_that("table() handles ... of arrays, chunked arrays, vectors", {
     tab$schema,
     schema(a = int32(), b = int32(), c = float64(), x = int32(), y = utf8())
   )
-  res <- as.data.frame(tab)
-  expect_equal(names(res), c("a", "b", "c", "x", "y"))
-  expect_equal(
-    res,
+
+  expect_equal_data_frame(
+    tab,
     tibble::tibble(a = 1:10, b = 1:10, c = v, x = 1:10, y = letters[1:10])
   )
 })
@@ -280,14 +279,14 @@ test_that("table() auto splices (ARROW-5718)", {
   tab2 <- Table$create(!!!df)
   expect_equal(tab1, tab2)
   expect_equal(tab1$schema, schema(x = int32(), y = utf8()))
-  expect_equal(as.data.frame(tab1), df)
+  expect_equal_data_frame(tab1, df)
 
   s <- schema(x = float64(), y = utf8())
   tab3 <- Table$create(df, schema = s)
   tab4 <- Table$create(!!!df, schema = s)
   expect_equal(tab3, tab4)
   expect_equal(tab3$schema, s)
-  expect_equal(as.data.frame(tab3), df)
+  expect_equal_data_frame(tab3, df)
 })
 
 test_that("Validation when creating table with schema (ARROW-10953)", {
@@ -366,7 +365,7 @@ test_that("Can create table with specific dictionary types", {
     expect_equal(sch, tab$schema)
     if (i != int64()) {
       # TODO: same downcast to int32 as we do for int64() type elsewhere
-      expect_identical(as.data.frame(tab), fact)
+      expect_equal_data_frame(tab, fact)
     }
   }
 })
@@ -380,7 +379,7 @@ test_that("Table unifies dictionary on conversion back to R (ARROW-8374)", {
   res <- tibble::tibble(f = factor(c("a", "c", NA), levels = c("a", "b", "c", "d")))
   tab <- Table$create(b1, b2, b3, b4)
 
-  expect_identical(as.data.frame(tab), res)
+  expect_equal_data_frame(tab, res)
 })
 
 test_that("Table$SelectColumns()", {
@@ -410,24 +409,24 @@ test_that("Table$create() with different length columns", {
 })
 
 test_that("Table$create() scalar recycling with vectors", {
-  expect_data_frame(
+  expect_equal_data_frame(
     Table$create(a = 1:10, b = 5),
     tibble::tibble(a = 1:10, b = 5)
   )
 })
 
 test_that("Table$create() scalar recycling with Scalars, Arrays, and ChunkedArrays", {
-  expect_data_frame(
+  expect_equal_data_frame(
     Table$create(a = Array$create(1:10), b = Scalar$create(5)),
     tibble::tibble(a = 1:10, b = 5)
   )
 
-  expect_data_frame(
+  expect_equal_data_frame(
     Table$create(a = Array$create(1:10), b = Array$create(5)),
     tibble::tibble(a = 1:10, b = 5)
   )
 
-  expect_data_frame(
+  expect_equal_data_frame(
     Table$create(a = Array$create(1:10), b = ChunkedArray$create(5)),
     tibble::tibble(a = 1:10, b = 5)
   )
@@ -712,21 +711,32 @@ test_that("as_arrow_table() errors on data.frame with NULL names", {
   expect_error(as_arrow_table(df), "Input data frame columns must be named")
 })
 
-test_that("we only preserve metadata of input to arrow_table when passed a single data.frame", {
-  # data.frame in, data.frame out
+test_that("# GH-35038 - passing in multiple arguments doesn't affect return type", {
+
+  df <- data.frame(x = 1)
+  out1 <- as.data.frame(arrow_table(df, name = "1"))
+  out2 <- as.data.frame(arrow_table(name = "1", df))
+
+  expect_s3_class(out1, c("data.frame"), exact = TRUE)
+  expect_s3_class(out2, c("data.frame"), exact = TRUE)
+})
+
+test_that("as.data.frame() on ArrowTabular objects returns a base R data.frame regardless of input type", {
   df <- data.frame(x = 1)
   out1 <- as.data.frame(arrow_table(df))
   expect_s3_class(out1, "data.frame", exact = TRUE)
 
-  # tibble in, tibble out
   tib <- tibble::tibble(x = 1)
   out2 <- as.data.frame(arrow_table(tib))
-  expect_s3_class(out2, c("tbl_df", "tbl", "data.frame"), exact = TRUE)
+  expect_s3_class(out2, "data.frame", exact = TRUE)
+})
 
-  # GH-35038 - passing in multiple arguments doesn't affect return type
-  out3 <- as.data.frame(arrow_table(df, name = "1"))
-  out4 <- as.data.frame(arrow_table(name = "1", df))
+test_that("collect() on ArrowTabular objects returns a tibble regardless of input type", {
+  df <- data.frame(x = 1)
+  out1 <- dplyr::collect(arrow_table(df))
+  expect_s3_class(out1, c("tbl_df", "tbl", "data.frame"), exact = TRUE)
 
-  expect_s3_class(out3, c("tbl_df", "tbl", "data.frame"), exact = TRUE)
-  expect_s3_class(out4, c("tbl_df", "tbl", "data.frame"), exact = TRUE)
+  tib <- tibble::tibble(x = 1)
+  out2 <- dplyr::collect(arrow_table(tib))
+  expect_s3_class(out2, c("tbl_df", "tbl", "data.frame"), exact = TRUE)
 })
diff --git a/r/tests/testthat/test-compute-aggregate.R b/r/tests/testthat/test-compute-aggregate.R
index 98face44ff..2732cdef3e 100644
--- a/r/tests/testthat/test-compute-aggregate.R
+++ b/r/tests/testthat/test-compute-aggregate.R
@@ -377,7 +377,7 @@ test_that("value_counts", {
     type = struct(values = float64(), counts = int64())
   )
   expect_equal(value_counts(a), result)
-  expect_identical(as.data.frame(value_counts(a)), result_df)
+  expect_equal_data_frame(value_counts(a), result_df)
   expect_identical(as.vector(value_counts(a)$counts), result_df$counts)
 })
 
diff --git a/r/tests/testthat/test-compute-sort.R b/r/tests/testthat/test-compute-sort.R
index ba3039c331..b8c8482d64 100644
--- a/r/tests/testthat/test-compute-sort.R
+++ b/r/tests/testthat/test-compute-sort.R
@@ -145,16 +145,16 @@ test_that("Table$SortIndices()", {
     as.vector(x$Take(x$SortIndices("chr"))$chr),
     sort(tbl$chr, na.last = TRUE)
   )
-  expect_identical(
-    as.data.frame(x$Take(x$SortIndices(c("int", "dbl"), c(FALSE, FALSE)))),
+  expect_equal_data_frame(
+    x$Take(x$SortIndices(c("int", "dbl"), c(FALSE, FALSE))),
     tbl %>% arrange(int, dbl)
   )
 })
 
 test_that("RecordBatch$SortIndices()", {
   x <- record_batch(tbl)
-  expect_identical(
-    as.data.frame(x$Take(x$SortIndices(c("chr", "int", "dbl"), TRUE))),
+  expect_equal_data_frame(
+    x$Take(x$SortIndices(c("chr", "int", "dbl"), TRUE)),
     tbl %>% arrange(desc(chr), desc(int), desc(dbl))
   )
 })
diff --git a/r/tests/testthat/test-dataset-csv.R b/r/tests/testthat/test-dataset-csv.R
index df58f853a1..98858a7d16 100644
--- a/r/tests/testthat/test-dataset-csv.R
+++ b/r/tests/testthat/test-dataset-csv.R
@@ -91,7 +91,7 @@ test_that("CSV scan options", {
   sb$FragmentScanOptions(options)
 
   tab <- sb$Finish()$ToTable()
-  expect_equal(as.data.frame(tab), tibble(chr = c("foo", NA)))
+  expect_equal_data_frame(tab, data.frame(chr = c("foo", NA)))
 
   # Set default convert options in CsvFileFormat
   csv_format <- CsvFileFormat$create(
diff --git a/r/tests/testthat/test-dataset.R b/r/tests/testthat/test-dataset.R
index 91b405fc01..b9972901a7 100644
--- a/r/tests/testthat/test-dataset.R
+++ b/r/tests/testthat/test-dataset.R
@@ -85,8 +85,8 @@ expect_scan_result <- function(ds, schm) {
   tab <- scn$ToTable()
   expect_r6_class(tab, "Table")
 
-  expect_equal(
-    as.data.frame(tab),
+  expect_equal_data_frame(
+    tab,
     df1[8, c("chr", "lgl")]
   )
 }
@@ -806,19 +806,19 @@ test_that("head/tail", {
   big_df <- rbind(df1, df2)
 
   # No n provided (default is 6, all from one batch)
-  expect_equal(as.data.frame(head(ds)), head(df1))
-  expect_equal(as.data.frame(tail(ds)), tail(df2))
+  expect_equal_data_frame(head(ds), head(df1))
+  expect_equal_data_frame(tail(ds), tail(df2))
 
   # n = 0: have to drop `fct` because factor levels don't come through from
   # arrow when there are 0 rows
   zero_df <- big_df[FALSE, names(big_df) != "fct"]
-  expect_equal(as.data.frame(head(ds, 0))[, names(ds) != "fct"], zero_df)
-  expect_equal(as.data.frame(tail(ds, 0))[, names(ds) != "fct"], zero_df)
+  expect_equal_data_frame(as.data.frame(head(ds, 0))[, names(ds) != "fct"], zero_df)
+  expect_equal_data_frame(as.data.frame(tail(ds, 0))[, names(ds) != "fct"], zero_df)
 
   # Two more cases: more than 1 batch, and more than nrow
   for (n in c(12, 1000)) {
-    expect_equal(as.data.frame(head(ds, n)), head(big_df, n))
-    expect_equal(as.data.frame(tail(ds, n)), tail(big_df, n))
+    expect_equal_data_frame(head(ds, n), head(big_df, n))
+    expect_equal_data_frame(tail(ds, n), tail(big_df, n))
   }
   expect_error(head(ds, -1)) # Not yet implemented
   expect_error(tail(ds, -1)) # Not yet implemented
@@ -864,18 +864,18 @@ test_that("unique()", {
 test_that("Dataset [ (take by index)", {
   ds <- open_dataset(dataset_dir)
   # Taking only from one file
-  expect_equal(
-    as.data.frame(ds[c(4, 5, 9), 3:4]),
+  expect_equal_data_frame(
+    ds[c(4, 5, 9), 3:4],
     df1[c(4, 5, 9), 3:4]
   )
   # Taking from more than one
-  expect_equal(
-    as.data.frame(ds[c(4, 5, 9, 12, 13), 3:4]),
+  expect_equal_data_frame(
+    ds[c(4, 5, 9, 12, 13), 3:4],
     rbind(df1[c(4, 5, 9), 3:4], df2[2:3, 3:4])
   )
   # Taking out of order
-  expect_equal(
-    as.data.frame(ds[c(4, 13, 9, 12, 5), ]),
+  expect_equal_data_frame(
+    ds[c(4, 13, 9, 12, 5), ],
     rbind(
       df1[4, ],
       df2[3, ],
@@ -889,8 +889,8 @@ test_that("Dataset [ (take by index)", {
   ds2 <- ds %>%
     filter(int > 6) %>%
     select(int, lgl)
-  expect_equal(
-    as.data.frame(ds2[c(2, 5), ]),
+  expect_equal_data_frame(
+    ds2[c(2, 5), ],
     rbind(
       df1[8, c("int", "lgl")],
       df2[1, c("int", "lgl")]
@@ -957,7 +957,9 @@ test_that("Can delete filesystem dataset files after collection", {
   write_dataset(ds0, dataset_dir2)
 
   ds <- open_dataset(dataset_dir2)
-  collected <- ds %>% arrange(int) %>% collect()
+  collected <- ds %>%
+    arrange(int) %>%
+    collect()
   unlink(dataset_dir2, recursive = TRUE)
   expect_false(dir.exists(dataset_dir2))
 
@@ -971,7 +973,11 @@ test_that("Can delete filesystem dataset files after collection", {
   # dataset
   write_dataset(ds0, dataset_dir2)
   ds <- open_dataset(dataset_dir2)
-  collected <- ds %>% arrange(int) %>% head() %>% arrange(int) %>% collect()
+  collected <- ds %>%
+    arrange(int) %>%
+    head() %>%
+    arrange(int) %>%
+    collect()
   unlink(dataset_dir2, recursive = TRUE)
   expect_false(dir.exists(dataset_dir2))
 
@@ -985,11 +991,11 @@ test_that("Scanner$ScanBatches", {
   ds <- open_dataset(ipc_dir, format = "feather")
   batches <- ds$NewScan()$Finish()$ScanBatches()
   table <- Table$create(!!!batches)
-  expect_equal(as.data.frame(table), rbind(df1, df2))
+  expect_equal_data_frame(table, rbind(df1, df2))
 
   batches <- ds$NewScan()$Finish()$ScanBatches()
   table <- Table$create(!!!batches)
-  expect_equal(as.data.frame(table), rbind(df1, df2))
+  expect_equal_data_frame(table, rbind(df1, df2))
 })
 
 test_that("Scanner$ToRecordBatchReader()", {
@@ -1001,8 +1007,8 @@ test_that("Scanner$ToRecordBatchReader()", {
     Scanner$create()
   reader <- scan$ToRecordBatchReader()
   expect_r6_class(reader, "RecordBatchReader")
-  expect_identical(
-    as.data.frame(reader$read_table()),
+  expect_equal_data_frame(
+    reader$read_table(),
     df1[df1$int > 6, c("int", "lgl")]
   )
 })
diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R
index d47f923211..d59356ad65 100644
--- a/r/tests/testthat/test-dplyr-funcs-datetime.R
+++ b/r/tests/testthat/test-dplyr-funcs-datetime.R
@@ -310,7 +310,7 @@ test_that("timestamp round trip correctly via strftime and strptime", {
     fmt2 <- paste(base_format2, fmt)
     fmt <- paste(base_format, paste0("%", fmt))
     test_df <- tibble::tibble(x = strftime(times, format = fmt))
-    expect_equal(
+    expect_equal_data_frame(
       test_df %>%
         arrow_table() %>%
         mutate(!!fmt := strptime(x, format = fmt2)) %>%
@@ -1028,7 +1028,7 @@ test_that("leap_year mirror lubridate", {
     .input %>%
       mutate(x = leap_year(test_year)) %>%
       collect(),
-    data.frame(
+    tibble::tibble(
       test_year = as.Date(c(
         "1998-01-01", # not leap year
         "1996-01-01", # leap year (divide by 4 rule)
@@ -1048,7 +1048,9 @@ test_that("am/pm mirror lubridate", {
         am2 = lubridate::am(test_time),
         pm2 = lubridate::pm(test_time)
       ) %>%
-      collect(),
+      # can't use collect() here due to how tibbles store datetimes
+      # TODO: add better explanation above
+      as.data.frame(),
     data.frame(
       test_time = strptime(
         x = c(
diff --git a/r/tests/testthat/test-dplyr-query.R b/r/tests/testthat/test-dplyr-query.R
index e478d0e4c4..bab81a463e 100644
--- a/r/tests/testthat/test-dplyr-query.R
+++ b/r/tests/testthat/test-dplyr-query.R
@@ -119,14 +119,14 @@ test_that("collect(as_data_frame=FALSE)", {
   # collect(as_data_frame = FALSE) always returns Table now
   expect_r6_class(b2, "Table")
   expected <- tbl[tbl$int > 5 & !is.na(tbl$int), c("int", "chr")]
-  expect_equal(as.data.frame(b2), expected)
+  expect_equal_data_frame(b2, expected)
 
   b3 <- batch %>%
     select(int, strng = chr) %>%
     filter(int > 5) %>%
     collect(as_data_frame = FALSE)
   expect_r6_class(b3, "Table")
-  expect_equal(as.data.frame(b3), set_names(expected, c("int", "strng")))
+  expect_equal_data_frame(b3, set_names(expected, c("int", "strng")))
 
   b4 <- batch %>%
     select(int, strng = chr) %>%
@@ -134,8 +134,8 @@ test_that("collect(as_data_frame=FALSE)", {
     group_by(int) %>%
     collect(as_data_frame = FALSE)
   expect_r6_class(b4, "Table")
-  expect_equal(
-    as.data.frame(b4),
+  expect_equal_data_frame(
+    b4,
     expected %>%
       rename(strng = chr) %>%
       group_by(int)
@@ -156,14 +156,14 @@ test_that("compute()", {
 
   expect_r6_class(b2, "Table")
   expected <- tbl[tbl$int > 5 & !is.na(tbl$int), c("int", "chr")]
-  expect_equal(as.data.frame(b2), expected)
+  expect_equal_data_frame(b2, expected)
 
   b3 <- batch %>%
     select(int, strng = chr) %>%
     filter(int > 5) %>%
     compute()
   expect_r6_class(b3, "Table")
-  expect_equal(as.data.frame(b3), set_names(expected, c("int", "strng")))
+  expect_equal_data_frame(b3, set_names(expected, c("int", "strng")))
 
   b4 <- batch %>%
     select(int, strng = chr) %>%
@@ -171,8 +171,8 @@ test_that("compute()", {
     group_by(int) %>%
     compute()
   expect_r6_class(b4, "Table")
-  expect_equal(
-    as.data.frame(b4),
+  expect_equal_data_frame(
+    b4,
     expected %>%
       rename(strng = chr) %>%
       group_by(int)
@@ -210,8 +210,7 @@ test_that("arrange then head returns the right data (ARROW-14162)", {
       arrange(mpg, disp) %>%
       head(4) %>%
       collect(),
-    mtcars,
-    ignore_attr = "row.names"
+    tibble::as_tibble(mtcars)
   )
 })
 
@@ -222,8 +221,7 @@ test_that("arrange then tail returns the right data", {
       arrange(mpg, disp) %>%
       tail(4) %>%
       collect(),
-    mtcars,
-    ignore_attr = "row.names"
+    tibble::as_tibble(mtcars)
   )
 })
 
@@ -559,8 +557,8 @@ test_that("compute() on a grouped query returns a Table with groups in metadata"
     group_by(int) %>%
     compute()
   expect_r6_class(tab1, "Table")
-  expect_equal(
-    as.data.frame(tab1),
+  expect_equal_data_frame(
+    tab1,
     tbl %>%
       group_by(int)
   )
diff --git a/r/tests/testthat/test-duckdb.R b/r/tests/testthat/test-duckdb.R
index 24e8cadf2e..409e99b70f 100644
--- a/r/tests/testthat/test-duckdb.R
+++ b/r/tests/testthat/test-duckdb.R
@@ -168,7 +168,8 @@ test_that("to_arrow roundtrip, with dataset", {
       filter(int > 5 & part > 1) %>%
       mutate(dbl_plus = dbl + 1) %>%
       collect() %>%
-      arrange(part, int)
+      arrange(part, int) %>%
+      as.data.frame()
   )
 })
 
diff --git a/r/tests/testthat/test-feather.R b/r/tests/testthat/test-feather.R
index 4caadc27c4..188a562fe8 100644
--- a/r/tests/testthat/test-feather.R
+++ b/r/tests/testthat/test-feather.R
@@ -177,7 +177,7 @@ test_that("feather read/write round trip", {
   tab1 <- read_feather(feather_file, as_data_frame = FALSE)
   expect_r6_class(tab1, "Table")
 
-  expect_equal(tib, as.data.frame(tab1))
+  expect_equal_data_frame(tib, tab1)
 })
 
 test_that("Read feather from raw vector", {
diff --git a/r/tests/testthat/test-metadata.R b/r/tests/testthat/test-metadata.R
index 4cf8e49af1..e44cd71038 100644
--- a/r/tests/testthat/test-metadata.R
+++ b/r/tests/testthat/test-metadata.R
@@ -57,7 +57,7 @@ test_that("Table R metadata", {
     "$r$columns$c$columns$c1$attributes$extra_attr",
     fixed = TRUE
   )
-  expect_identical(as.data.frame(tab), example_with_metadata)
+  expect_equal_data_frame(tab, example_with_metadata)
 })
 
 test_that("R metadata is not stored for types that map to Arrow types (factor, Date, etc.)", {
@@ -94,7 +94,7 @@ test_that("Garbage R metadata doesn't break things", {
   tab <- Table$create(example_data[1:6])
   tab$metadata$r <- "garbage"
   expect_warning(
-    expect_identical(as.data.frame(tab), example_data[1:6]),
+    as.data.frame(tab),
     "Invalid metadata$r",
     fixed = TRUE
   )
@@ -103,7 +103,7 @@ test_that("Garbage R metadata doesn't break things", {
   tab <- Table$create(example_data[1:6])
   tab$metadata$r <- rawToChar(serialize("garbage", NULL, ascii = TRUE))
   expect_warning(
-    expect_identical(as.data.frame(tab), example_data[1:6]),
+    as.data.frame(tab),
     "Invalid metadata$r",
     fixed = TRUE
   )
@@ -164,7 +164,7 @@ test_that("RecordBatch metadata", {
 })
 
 test_that("RecordBatch R metadata", {
-  expect_identical(as.data.frame(record_batch(example_with_metadata)), example_with_metadata)
+  expect_equal_data_frame(record_batch(example_with_metadata), example_with_metadata)
 })
 
 test_that("R metadata roundtrip via parquet", {
@@ -195,14 +195,14 @@ test_that("haven types roundtrip via feather", {
 test_that("Date/time type roundtrip", {
   rb <- record_batch(example_with_times)
   expect_r6_class(rb$schema$posixlt$type, "VctrsExtensionType")
-  expect_identical(as.data.frame(rb), example_with_times)
+  expect_equal_data_frame(rb, example_with_times)
 })
 
 test_that("metadata keeps attribute of top level data frame", {
   df <- structure(data.frame(x = 1, y = 2), foo = "bar")
   tab <- Table$create(df)
   expect_identical(attr(as.data.frame(tab), "foo"), "bar")
-  expect_identical(as.data.frame(tab), df)
+  expect_equal_data_frame(tab, df)
 })
 
 
@@ -387,7 +387,18 @@ test_that("grouped_df non-arrow metadata is preserved", {
   grouped_tab <- arrow_table(grouped)
 
   expect_equal(
-    attributes(as.data.frame(grouped_tab))$other_metadata,
+    attributes(collect.ArrowTabular(grouped_tab))$other_metadata,
     "look I'm still here!"
   )
 })
+
+test_that("data.frame class attribute is not saved", {
+  df <- data.frame(x = 1:5)
+  df_arrow <- arrow_table(df)
+  expect_null(df_arrow$r_metadata$attributes)
+
+  df <- data.frame(x = 1:5)
+  attributes(df)$foo <- "bar"
+  df_arrow <- arrow_table(df)
+  expect_identical(df_arrow$r_metadata, list(attributes = list(foo = "bar"), columns = list(x = NULL)))
+})
diff --git a/r/tests/testthat/test-na-omit.R b/r/tests/testthat/test-na-omit.R
index c2d0fd1b71..cfc71445d4 100644
--- a/r/tests/testthat/test-na-omit.R
+++ b/r/tests/testthat/test-na-omit.R
@@ -42,8 +42,8 @@ test_that("na.fail on Array and ChunkedArray", {
 
 test_that("na.omit on Table", {
   tbl <- Table$create(example_data)
-  expect_equal(
-    as.data.frame(na.omit(tbl)),
+  expect_equal_data_frame(
+    na.omit(tbl),
     na.omit(example_data),
     # We don't include an attribute with the rows omitted
     ignore_attr = "na.action"
@@ -52,8 +52,8 @@ test_that("na.omit on Table", {
 
 test_that("na.exclude on Table", {
   tbl <- Table$create(example_data)
-  expect_equal(
-    as.data.frame(na.exclude(tbl)),
+  expect_equal_data_frame(
+    na.exclude(tbl),
     na.exclude(example_data),
     ignore_attr = "na.action"
   )
@@ -66,8 +66,8 @@ test_that("na.fail on Table", {
 
 test_that("na.omit on RecordBatch", {
   batch <- record_batch(example_data)
-  expect_equal(
-    as.data.frame(na.omit(batch)),
+  expect_equal_data_frame(
+    na.omit(batch),
     na.omit(example_data),
     ignore_attr = "na.action"
   )
@@ -75,8 +75,8 @@ test_that("na.omit on RecordBatch", {
 
 test_that("na.exclude on RecordBatch", {
   batch <- record_batch(example_data)
-  expect_equal(
-    as.data.frame(na.exclude(batch)),
+  expect_equal_data_frame(
+    na.exclude(batch),
     na.omit(example_data),
     ignore_attr = "na.action"
   )
diff --git a/r/tests/testthat/test-python-flight.R b/r/tests/testthat/test-python-flight.R
index d2b6fd491e..2e60957d2f 100644
--- a/r/tests/testthat/test-python-flight.R
+++ b/r/tests/testthat/test-python-flight.R
@@ -53,13 +53,13 @@ if (process_is_running("demo_flight_server")) {
   })
 
   test_that("flight_get", {
-    expect_identical(as.data.frame(flight_get(client, flight_obj)), example_data)
+    expect_equal_data_frame(flight_get(client, flight_obj), example_data)
   })
 
   test_that("flight_put with RecordBatch", {
     flight_obj2 <- tempfile()
     flight_put(client, RecordBatch$create(example_data), path = flight_obj2)
-    expect_identical(as.data.frame(flight_get(client, flight_obj2)), example_data)
+    expect_equal_data_frame(flight_get(client, flight_obj2), example_data)
   })
 
   test_that("flight_put with overwrite = FALSE", {
@@ -69,7 +69,7 @@ if (process_is_running("demo_flight_server")) {
     )
     # Default is TRUE so this will overwrite
     flight_put(client, example_with_times, path = flight_obj)
-    expect_identical(as.data.frame(flight_get(client, flight_obj)), example_with_times)
+    expect_equal_data_frame(flight_get(client, flight_obj), example_with_times)
   })
 
   test_that("flight_disconnect", {
diff --git a/r/tests/testthat/test-python.R b/r/tests/testthat/test-python.R
index 968d72119c..58eb84f5d6 100644
--- a/r/tests/testthat/test-python.R
+++ b/r/tests/testthat/test-python.R
@@ -108,7 +108,7 @@ test_that("RecordBatch with metadata roundtrip", {
   expect_identical(rbatch$metadata, batch$metadata)
   expect_equal(rbatch$a, batch$a)
   expect_equal(rbatch[c("b", "c", "d")], batch[c("b", "c", "d")])
-  expect_identical(as.data.frame(rbatch), example_with_metadata)
+  expect_equal_data_frame(rbatch, example_with_metadata)
 })
 
 test_that("Table with metadata roundtrip", {
@@ -123,7 +123,7 @@ test_that("Table with metadata roundtrip", {
   expect_identical(rtab$metadata, tab$metadata)
   expect_equal(rtab$a, tab$a)
   expect_equal(rtab[c("b", "c", "d")], tab[c("b", "c", "d")])
-  expect_identical(as.data.frame(rtab), example_with_metadata)
+  expect_equal_data_frame(rtab, example_with_metadata)
 })
 
 test_that("DataType roundtrip", {
@@ -160,8 +160,8 @@ test_that("RecordBatchReader to python", {
   expect_s3_class(pytab, "pyarrow.lib.Table")
   back_to_r <- reticulate::py_to_r(pytab)
   expect_r6_class(back_to_r, "Table")
-  expect_identical(
-    as.data.frame(back_to_r),
+  expect_equal_data_frame(
+    back_to_r,
     example_data %>%
       select(int, lgl) %>%
       filter(int > 6)
@@ -178,7 +178,7 @@ test_that("RecordBatchReader from python", {
   back_to_r <- reticulate::py_to_r(pyreader)
   rt_table <- back_to_r$read_table()
   expect_r6_class(rt_table, "Table")
-  expect_identical(as.data.frame(rt_table), example_data)
+  expect_equal_data_frame(rt_table, example_data)
 
   scan <- Scanner$create(tab)
   reader <- scan$ToRecordBatchReader()
diff --git a/r/tests/testthat/test-read-write.R b/r/tests/testthat/test-read-write.R
index 66f6db56d9..9475788ee5 100644
--- a/r/tests/testthat/test-read-write.R
+++ b/r/tests/testthat/test-read-write.R
@@ -119,7 +119,7 @@ test_that("reading/writing a raw vector (sparklyr integration)", {
   }
   bytes <- write_to_raw(example_data)
   expect_type(bytes, "raw")
-  expect_identical(read_from_raw_test(bytes), example_data)
+  expect_equal_data_frame(read_from_raw_test(bytes), example_data)
   # this could just be `read_ipc_stream(x)`; propose that
-  expect_identical(read_ipc_stream(bytes), example_data)
+  expect_equal_data_frame(read_ipc_stream(bytes), example_data)
 })
diff --git a/r/tests/testthat/test-utf.R b/r/tests/testthat/test-utf.R
index f7553da5b4..660b2a4784 100644
--- a/r/tests/testthat/test-utf.R
+++ b/r/tests/testthat/test-utf.R
@@ -45,24 +45,24 @@ test_that("We handle non-UTF strings", {
   expect_identical(as.vector(ChunkedArray$create(df)), df)
 
   # Table (including field name)
-  expect_identical(as.data.frame(Table$create(df)), df)
-  expect_identical(as.data.frame(Table$create(df_struct)), df_struct)
+  expect_equal_data_frame(Table$create(df), df)
+  expect_equal_data_frame(Table$create(df_struct), df_struct)
 
   # RecordBatch
-  expect_identical(as.data.frame(record_batch(df)), df)
-  expect_identical(as.data.frame(record_batch(df_struct)), df_struct)
+  expect_equal_data_frame(record_batch(df), df)
+  expect_equal_data_frame(record_batch(df_struct), df_struct)
 
   # Schema field name
-  df_schema <- do.call(schema, raw_schema)
+  df_schema <- schema(raw_schema)
   expect_identical(names(df_schema), names(df))
 
   df_struct_schema <- schema(a = do.call(struct, raw_schema))
 
   # Create table/batch with schema
-  expect_identical(as.data.frame(Table$create(df, schema = df_schema)), df)
-  expect_identical(as.data.frame(Table$create(df_struct, schema = df_struct_schema)), df_struct)
-  expect_identical(as.data.frame(record_batch(df, schema = df_schema)), df)
-  expect_identical(as.data.frame(record_batch(df_struct, schema = df_struct_schema)), df_struct)
+  expect_equal_data_frame(Table$create(df, schema = df_schema), df)
+  expect_equal_data_frame(Table$create(df_struct, schema = df_struct_schema), df_struct)
+  expect_equal_data_frame(record_batch(df, schema = df_schema), df)
+  expect_equal_data_frame(record_batch(df_struct, schema = df_struct_schema), df_struct)
 
   # Serialization
   feather_file <- tempfile()