You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by np...@apache.org on 2022/04/22 17:40:40 UTC
[arrow] branch master updated: ARROW-15989: [R] rbind & cbind for Table & RecordBatch

This is an automated email from the ASF dual-hosted git repository.

npr pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new f23965ceed ARROW-15989: [R] rbind & cbind for Table & RecordBatch
f23965ceed is described below

commit f23965ceedb403f0f480a0892806f23ab81a2db0
Author: Will Jones <wi...@gmail.com>
AuthorDate: Fri Apr 22 13:40:34 2022 -0400

    ARROW-15989: [R] rbind & cbind for Table & RecordBatch
    
    ## Summary of Changes
    
     * Added `rbind` and `cbind` for Table
     * Added `cbind` for RecordBatch. `rbind` just redirects the user to use `Table$create`
     * Changed `c.Array()` implementation to use either `concat_array()` or `ChunkedArray$create()` depending on whether the user wants a single array or zero-copy.
     * Implemented `c.ChunkedArray`
    
    Closes #12751 from wjones127/ARROW-15989-rbind-table
    
    Lead-authored-by: Will Jones <wi...@gmail.com>
    Co-authored-by: Neal Richardson <ne...@gmail.com>
    Co-authored-by: Dewey Dunnington <de...@fishandwhistle.net>
    Signed-off-by: Neal Richardson <ne...@gmail.com>
---
 r/NAMESPACE                            |   7 ++
 r/R/array.R                            |  12 +--
 r/R/arrow-package.R                    |   2 +-
 r/R/arrowExports.R                     |   4 +
 r/R/chunked-array.R                    |  19 ++++-
 r/R/record-batch.R                     |  53 +++++++++++++
 r/R/table.R                            |  81 +++++++++++++++++++
 r/_pkgdown.yml                         |   1 +
 r/man/ChunkedArray.Rd                  |   5 ++
 r/man/concat_arrays.Rd                 |   5 +-
 r/man/concat_tables.Rd                 |  29 +++++++
 r/src/arrowExports.cpp                 |  10 +++
 r/src/chunkedarray.cpp                 |   3 +-
 r/src/table.cpp                        |   8 ++
 r/tests/testthat/_snaps/Array.md       |   6 ++
 r/tests/testthat/_snaps/RecordBatch.md |   9 +++
 r/tests/testthat/_snaps/Table.md       |   5 ++
 r/tests/testthat/test-Array.R          |  24 +-----
 r/tests/testthat/test-RecordBatch.R    |  99 ++++++++++++++++++++++-
 r/tests/testthat/test-Table.R          | 141 +++++++++++++++++++++++++++++++++
 r/tests/testthat/test-chunked-array.R  |  33 ++++++++
 21 files changed, 521 insertions(+), 35 deletions(-)

diff --git a/r/NAMESPACE b/r/NAMESPACE
index da43a3f511..d6e67c85a8 100644
--- a/r/NAMESPACE
+++ b/r/NAMESPACE
@@ -39,7 +39,10 @@ S3method(as.list,Schema)
 S3method(as.raw,Buffer)
 S3method(as.vector,ArrowDatum)
 S3method(c,Array)
+S3method(c,ChunkedArray)
 S3method(c,Dataset)
+S3method(cbind,RecordBatch)
+S3method(cbind,Table)
 S3method(dim,ArrowTabular)
 S3method(dim,Dataset)
 S3method(dim,RecordBatchReader)
@@ -86,6 +89,8 @@ S3method(print,arrow_dplyr_query)
 S3method(print,arrow_info)
 S3method(print,arrow_r_metadata)
 S3method(quantile,ArrowDatum)
+S3method(rbind,RecordBatch)
+S3method(rbind,Table)
 S3method(read_message,InputStream)
 S3method(read_message,MessageReader)
 S3method(read_message,default)
@@ -218,6 +223,7 @@ export(cast_options)
 export(chunked_array)
 export(codec_is_available)
 export(concat_arrays)
+export(concat_tables)
 export(contains)
 export(copy_files)
 export(cpu_count)
@@ -325,6 +331,7 @@ importFrom(bit64,print.integer64)
 importFrom(bit64,str.integer64)
 importFrom(methods,as)
 importFrom(purrr,as_mapper)
+importFrom(purrr,flatten)
 importFrom(purrr,imap)
 importFrom(purrr,imap_chr)
 importFrom(purrr,keep)
diff --git a/r/R/array.R b/r/R/array.R
index 965e3bfc33..4e7fbdab7c 100644
--- a/r/R/array.R
+++ b/r/R/array.R
@@ -222,10 +222,7 @@ Array$import_from_c <- ImportArray
 #' Concatenates zero or more [Array] objects into a single
 #' array. This operation will make a copy of its input; if you need
 #' the behavior of a single Array but don't need a
-#' single object, use [ChunkedArray]. Note that a [c()]
-#' method is provided for convenience but that it may
-#' produce surprising results when used with other
-#' classes of objects.
+#' single object, use [ChunkedArray].
 #'
 #' @param ... zero or more [Array] objects to concatenate
 #' @param type An optional `type` describing the desired
@@ -236,7 +233,6 @@ Array$import_from_c <- ImportArray
 #'
 #' @examplesIf arrow_available()
 #' concat_arrays(Array$create(1:3), Array$create(4:5))
-#'
 concat_arrays <- function(..., type = NULL) {
   dots <- lapply(list2(...), Array$create, type = type)
 
@@ -256,7 +252,11 @@ concat_arrays <- function(..., type = NULL) {
 #' @rdname concat_arrays
 #' @export
 c.Array <- function(...) {
-  concat_arrays(...)
+  abort(c(
+    "Use `concat_arrays()` or `ChunkedArray$create()` instead.",
+    i = "`concat_arrays()` creates a new Array by copying data.",
+    i = "`ChunkedArray$create()` uses the arrays as chunks for zero-copy concatenation."
+  ))
 }
 
 #' @rdname array
diff --git a/r/R/arrow-package.R b/r/R/arrow-package.R
index 3c810bb8f2..624cca9e7d 100644
--- a/r/R/arrow-package.R
+++ b/r/R/arrow-package.R
@@ -17,7 +17,7 @@
 
 #' @importFrom stats quantile median na.omit na.exclude na.pass na.fail
 #' @importFrom R6 R6Class
-#' @importFrom purrr as_mapper map map2 map_chr map2_chr map_dfr map_int map_lgl keep imap imap_chr
+#' @importFrom purrr as_mapper map map2 map_chr map2_chr map_dfr map_int map_lgl keep imap imap_chr flatten
 #' @importFrom assertthat assert_that is.string
 #' @importFrom rlang list2 %||% is_false abort dots_n warn enquo quo_is_null enquos is_integerish quos
 #' @importFrom rlang eval_tidy new_data_mask syms env new_environment env_bind set_names exec
diff --git a/r/R/arrowExports.R b/r/R/arrowExports.R
index 6bf9a75d0f..23309f70d1 100644
--- a/r/R/arrowExports.R
+++ b/r/R/arrowExports.R
@@ -1980,6 +1980,10 @@ Table__ReferencedBufferSize <- function(table) {
   .Call(`_arrow_Table__ReferencedBufferSize`, table)
 }
 
+Table__ConcatenateTables <- function(tables, unify_schemas) {
+  .Call(`_arrow_Table__ConcatenateTables`, tables, unify_schemas)
+}
+
 GetCpuThreadPoolCapacity <- function() {
   .Call(`_arrow_GetCpuThreadPoolCapacity`)
 }
diff --git a/r/R/chunked-array.R b/r/R/chunked-array.R
index 95a05aba5b..c91b125af4 100644
--- a/r/R/chunked-array.R
+++ b/r/R/chunked-array.R
@@ -77,6 +77,11 @@
 #' # When constructing a ChunkedArray, the first chunk is used to infer type.
 #' doubles <- chunked_array(c(1, 2, 3), c(5L, 6L, 7L))
 #' doubles$type
+#'
+#' # Concatenating chunked arrays returns a new chunked array containing all chunks
+#' a <- chunked_array(c(1, 2), 3)
+#' b <- chunked_array(c(4, 5), 6)
+#' c(a, b)
 #' @export
 ChunkedArray <- R6Class("ChunkedArray",
   inherit = ArrowDatum,
@@ -145,7 +150,19 @@ ChunkedArray$create <- function(..., type = NULL) {
   if (!is.null(type)) {
     type <- as_type(type)
   }
-  ChunkedArray__from_list(list2(...), type)
+  chunks <- flatten(map(list2(...), function(arr) {
+    if (inherits(arr, "ChunkedArray")) {
+      arr$chunks
+    } else {
+      list(arr)
+    }
+  }))
+  ChunkedArray__from_list(chunks, type)
+}
+
+#' @export
+c.ChunkedArray <- function(...) {
+  ChunkedArray$create(...)
 }
 
 #' @param \dots Vectors to coerce
diff --git a/r/R/record-batch.R b/r/R/record-batch.R
index 24bd61535e..03f97a5130 100644
--- a/r/R/record-batch.R
+++ b/r/R/record-batch.R
@@ -189,3 +189,56 @@ record_batch <- RecordBatch$create
 
 #' @export
 names.RecordBatch <- function(x) x$names()
+
+#' @export
+rbind.RecordBatch <- function(...) {
+  abort("Use `Table$create()` to combine RecordBatches into a Table")
+}
+
+cbind_check_length <- function(inputs, call = caller_env()) {
+  sizes <- map_int(inputs, NROW)
+  ok_lengths <- sizes %in% c(head(sizes, 1), 1L)
+  if (!all(ok_lengths)) {
+    first_bad_one <- which.min(ok_lengths)
+    abort(
+      c("Non-scalar inputs must have an equal number of rows.",
+        i = sprintf("..1 has %d, ..%d has %d", sizes[[1]], first_bad_one, sizes[[first_bad_one]])),
+      call = call
+    )
+  }
+}
+
+#' @export
+cbind.RecordBatch <- function(...) {
+  call <- sys.call()
+  inputs <- list(...)
+  arg_names <- if (is.null(names(inputs))) {
+    rep("", length(inputs))
+  } else {
+    names(inputs)
+  }
+
+  cbind_check_length(inputs, call)
+
+  columns <- flatten(map(seq_along(inputs), function(i) {
+    input <- inputs[[i]]
+    name <- arg_names[i]
+
+    if (inherits(input, "RecordBatch")) {
+      set_names(input$columns, names(input))
+    } else if (inherits(input, "data.frame")) {
+      as.list(input)
+    } else if (inherits(input, "Table") || inherits(input, "ChunkedArray")) {
+      abort("Cannot cbind a RecordBatch with Tables or ChunkedArrays",
+            i = "Hint: consider converting the RecordBatch into a Table first")
+    } else {
+      if (name == "") {
+        abort("Vector and array arguments must have names",
+              i = sprintf("Argument ..%d is missing a name", i))
+      }
+      list2("{name}" := input)
+    }
+  }))
+
+  RecordBatch$create(!!! columns)
+}
diff --git a/r/R/table.R b/r/R/table.R
index 07750786ee..102d0ecd10 100644
--- a/r/R/table.R
+++ b/r/R/table.R
@@ -149,6 +149,87 @@ Table$create <- function(..., schema = NULL) {
 #' @export
 names.Table <- function(x) x$ColumnNames()
 
+#' Concatenate one or more Tables
+#'
+#' Concatenate one or more [Table] objects into a single table. This operation
+#' does not copy array data, but instead creates new chunked arrays for each
+#' column that point at existing array data.
+#'
+#' @param ... A [Table]
+#' @param unify_schemas If TRUE, the schemas of the tables will be first unified
+#' with fields of the same name being merged, then each table will be promoted
+#' to the unified schema before being concatenated. Otherwise, all tables should
+#' have the same schema.
+#' @examplesIf arrow_available()
+#' tbl <- arrow_table(name = rownames(mtcars), mtcars)
+#' prius <- arrow_table(name = "Prius", mpg = 58, cyl = 4, disp = 1.8)
+#' combined <- concat_tables(tbl, prius)
+#' tail(combined)$to_data_frame()
+#' @export
+concat_tables <- function(..., unify_schemas = TRUE) {
+  tables <- list2(...)
+
+  if (length(tables) == 0) {
+    abort("Must pass at least one Table.")
+  }
+
+  if (!unify_schemas) {
+    # assert they have same schema
+    schema <- tables[[1]]$schema
+    unequal_schema_idx <- which.min(lapply(tables, function(x) x$schema == schema))
+    if (unequal_schema_idx != 1) {
+      abort(c(
+        sprintf("Schema at index %i does not match the first schema.", unequal_schema_idx),
+        i = paste0("Schema 1:\n", schema$ToString()),
+        i = paste0(
+          sprintf("Schema %d:\n", unequal_schema_idx),
+          tables[[unequal_schema_idx]]$schema$ToString()
+        )
+      ))
+    }
+  }
+
+  Table__ConcatenateTables(tables, unify_schemas)
+}
+
+#' @export
+rbind.Table <- function(...) {
+  concat_tables(..., unify_schemas = FALSE)
+}
+
+#' @export
+cbind.Table <- function(...) {
+  call <- sys.call()
+  inputs <- list(...)
+  arg_names <- if (is.null(names(inputs))) {
+    rep("", length(inputs))
+  } else {
+    names(inputs)
+  }
+
+  cbind_check_length(inputs, call)
+
+  columns <- flatten(map(seq_along(inputs), function(i) {
+    input <- inputs[[i]]
+    name <- arg_names[i]
+
+    if (inherits(input, "ArrowTabular")) {
+      set_names(input$columns, names(input))
+    } else if (inherits(input, "data.frame")) {
+      as.list(input)
+    } else {
+      if (name == "") {
+        abort("Vector and array arguments must have names",
+          i = sprintf("Argument ..%d is missing a name", i)
+        )
+      }
+      list2("{name}" := input)
+    }
+  }))
+
+  Table$create(!!!columns)
+}
+
 #' @param ... A `data.frame` or a named set of Arrays or vectors. If given a
 #' mixture of data.frames and named vectors, the inputs will be autospliced together
 #' (see examples). Alternatively, you can provide a single Arrow IPC
diff --git a/r/_pkgdown.yml b/r/_pkgdown.yml
index c3810cdf09..713af8578f 100644
--- a/r/_pkgdown.yml
+++ b/r/_pkgdown.yml
@@ -144,6 +144,7 @@ reference:
       - buffer
       - read_message
       - concat_arrays
+      - concat_tables
       - ExtensionArray
       - vctrs_extension_array
   - title: Arrow data types and schema
diff --git a/r/man/ChunkedArray.Rd b/r/man/ChunkedArray.Rd
index 4da31d28e2..ab5e0f73c2 100644
--- a/r/man/ChunkedArray.Rd
+++ b/r/man/ChunkedArray.Rd
@@ -74,6 +74,11 @@ list_scores$num_chunks
 # When constructing a ChunkedArray, the first chunk is used to infer type.
 doubles <- chunked_array(c(1, 2, 3), c(5L, 6L, 7L))
 doubles$type
+
+# Concatenating chunked arrays returns a new chunked array containing all chunks
+a <- chunked_array(c(1, 2), 3)
+b <- chunked_array(c(4, 5), 6)
+c(a, b)
 \dontshow{\}) # examplesIf}
 }
 \seealso{
diff --git a/r/man/concat_arrays.Rd b/r/man/concat_arrays.Rd
index 795445b1c2..0cbe7ba578 100644
--- a/r/man/concat_arrays.Rd
+++ b/r/man/concat_arrays.Rd
@@ -22,10 +22,7 @@ A single \link{Array}
 Concatenates zero or more \link{Array} objects into a single
 array. This operation will make a copy of its input; if you need
 the behavior of a single Array but don't need a
-single object, use \link{ChunkedArray}. Note that a \code{\link[=c]{c()}}
-method is provided for convenience but that it may
-produce surprising results when used with other
-classes of objects.
+single object, use \link{ChunkedArray}.
 }
 \examples{
 \dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
diff --git a/r/man/concat_tables.Rd b/r/man/concat_tables.Rd
new file mode 100644
index 0000000000..a03fc49a33
--- /dev/null
+++ b/r/man/concat_tables.Rd
@@ -0,0 +1,29 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/table.R
+\name{concat_tables}
+\alias{concat_tables}
+\title{Concatenate one or more Tables}
+\usage{
+concat_tables(..., unify_schemas = TRUE)
+}
+\arguments{
+\item{...}{A \link{Table}}
+
+\item{unify_schemas}{If TRUE, the schemas of the tables will be first unified
+with fields of the same name being merged, then each table will be promoted
+to the unified schema before being concatenated. Otherwise, all tables should
+have the same schema.}
+}
+\description{
+Concatenate one or more \link{Table} objects into a single table. This operation
+does not copy array data, but instead creates new chunked arrays for each
+column that point at existing array data.
+}
+\examples{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+tbl <- arrow_table(name = rownames(mtcars), mtcars)
+prius <- arrow_table(name = "Prius", mpg = 58, cyl = 4, disp = 1.8)
+combined <- concat_tables(tbl, prius)
+tail(combined)$to_data_frame()
+\dontshow{\}) # examplesIf}
+}
diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp
index 760b71a5be..5440dd3e62 100644
--- a/r/src/arrowExports.cpp
+++ b/r/src/arrowExports.cpp
@@ -5019,6 +5019,15 @@ BEGIN_CPP11
 	return cpp11::as_sexp(Table__ReferencedBufferSize(table));
 END_CPP11
 }
+// table.cpp
+std::shared_ptr<arrow::Table> Table__ConcatenateTables(const std::vector<std::shared_ptr<arrow::Table>>& tables, bool unify_schemas);
+extern "C" SEXP _arrow_Table__ConcatenateTables(SEXP tables_sexp, SEXP unify_schemas_sexp){
+BEGIN_CPP11
+	arrow::r::Input<const std::vector<std::shared_ptr<arrow::Table>>&>::type tables(tables_sexp);
+	arrow::r::Input<bool>::type unify_schemas(unify_schemas_sexp);
+	return cpp11::as_sexp(Table__ConcatenateTables(tables, unify_schemas));
+END_CPP11
+}
 // threadpool.cpp
 int GetCpuThreadPoolCapacity();
 extern "C" SEXP _arrow_GetCpuThreadPoolCapacity(){
@@ -5615,6 +5624,7 @@ static const R_CallMethodDef CallEntries[] = {
 		{ "_arrow_all_record_batches", (DL_FUNC) &_arrow_all_record_batches, 1}, 
 		{ "_arrow_Table__from_record_batches", (DL_FUNC) &_arrow_Table__from_record_batches, 2}, 
 		{ "_arrow_Table__ReferencedBufferSize", (DL_FUNC) &_arrow_Table__ReferencedBufferSize, 1}, 
+		{ "_arrow_Table__ConcatenateTables", (DL_FUNC) &_arrow_Table__ConcatenateTables, 2}, 
 		{ "_arrow_GetCpuThreadPoolCapacity", (DL_FUNC) &_arrow_GetCpuThreadPoolCapacity, 0}, 
 		{ "_arrow_SetCpuThreadPoolCapacity", (DL_FUNC) &_arrow_SetCpuThreadPoolCapacity, 1}, 
 		{ "_arrow_GetIOThreadPoolCapacity", (DL_FUNC) &_arrow_GetIOThreadPoolCapacity, 0}, 
diff --git a/r/src/chunkedarray.cpp b/r/src/chunkedarray.cpp
index 72ae420061..bfc08008de 100644
--- a/r/src/chunkedarray.cpp
+++ b/r/src/chunkedarray.cpp
@@ -141,7 +141,8 @@ std::shared_ptr<arrow::ChunkedArray> ChunkedArray__from_list(cpp11::list chunks,
     }
   }
 
-  return std::make_shared<arrow::ChunkedArray>(std::move(vec));
+  // Use Make so we validate that chunk types are all the same
+  return ValueOrStop(arrow::ChunkedArray::Make(std::move(vec)));
 }
 
 // [[arrow::export]]
diff --git a/r/src/table.cpp b/r/src/table.cpp
index 5168dc9784..2d2d35b06a 100644
--- a/r/src/table.cpp
+++ b/r/src/table.cpp
@@ -289,4 +289,12 @@ int64_t Table__ReferencedBufferSize(const std::shared_ptr<arrow::Table>& table)
   return ValueOrStop(arrow::util::ReferencedBufferSize(*table));
 }
 
+// [[arrow::export]]
+std::shared_ptr<arrow::Table> Table__ConcatenateTables(
+    const std::vector<std::shared_ptr<arrow::Table>>& tables, bool unify_schemas) {
+  arrow::ConcatenateTablesOptions options;
+  options.unify_schemas = unify_schemas;
+  return ValueOrStop(arrow::ConcatenateTables(tables, options));
+}
+
 #endif
diff --git a/r/tests/testthat/_snaps/Array.md b/r/tests/testthat/_snaps/Array.md
new file mode 100644
index 0000000000..3f8ebe966d
--- /dev/null
+++ b/r/tests/testthat/_snaps/Array.md
@@ -0,0 +1,6 @@
+# Array doesn't support c()
+
+    Use `concat_arrays()` or `ChunkedArray$create()` instead.
+    i `concat_arrays()` creates a new Array by copying data.
+    i `ChunkedArray$create()` uses the arrays as chunks for zero-copy concatenation.
+
diff --git a/r/tests/testthat/_snaps/RecordBatch.md b/r/tests/testthat/_snaps/RecordBatch.md
new file mode 100644
index 0000000000..30aef6164a
--- /dev/null
+++ b/r/tests/testthat/_snaps/RecordBatch.md
@@ -0,0 +1,9 @@
+# RecordBatch doesn't support rbind
+
+    Use `Table$create()` to combine RecordBatches into a Table
+
+# RecordBatch supports cbind
+
+    Non-scalar inputs must have an equal number of rows.
+    i ..1 has 10, ..2 has 2
+
diff --git a/r/tests/testthat/_snaps/Table.md b/r/tests/testthat/_snaps/Table.md
new file mode 100644
index 0000000000..47a565be68
--- /dev/null
+++ b/r/tests/testthat/_snaps/Table.md
@@ -0,0 +1,5 @@
+# Table supports cbind
+
+    Non-scalar inputs must have an equal number of rows.
+    i ..1 has 10, ..2 has 2
+
diff --git a/r/tests/testthat/test-Array.R b/r/tests/testthat/test-Array.R
index 2f75efb3d6..1e774d7fb6 100644
--- a/r/tests/testthat/test-Array.R
+++ b/r/tests/testthat/test-Array.R
@@ -1056,30 +1056,12 @@ test_that("concat_arrays() coerces its input to Array", {
   )
 })
 
-test_that("c() works for Array", {
-  expect_r6_class(c(Array$create(1L), Array$create(1L)), "Array")
-
-  struct <- call_function(
-    "make_struct",
-    Array$create(1L),
-    options = list(field_names = "")
+test_that("Array doesn't support c()", {
+  expect_snapshot_error(
+    c(Array$create(1:2), Array$create(3:5))
   )
-  expect_r6_class(c(struct, struct), "StructArray")
-
-  list <- Array$create(list(1))
-  expect_r6_class(c(list, list), "ListArray")
-
-  list <- Array$create(list(), type = large_list_of(float64()))
-  expect_r6_class(c(list, list), "LargeListArray")
-
-  list <- Array$create(list(), type = fixed_size_list_of(float64(), 1L))
-  expect_r6_class(c(list, list), "FixedSizeListArray")
-
-  list <- Array$create(list(), type = map_of(string(), float64()))
-  expect_r6_class(c(list, list), "MapArray")
 })
 
-
 test_that("Array to C-interface", {
   # create a struct array since that's one of the more complicated array types
   df <- tibble::tibble(x = 1:10, y = x / 2, z = letters[1:10])
diff --git a/r/tests/testthat/test-RecordBatch.R b/r/tests/testthat/test-RecordBatch.R
index d280754a33..c284b7b1d5 100644
--- a/r/tests/testthat/test-RecordBatch.R
+++ b/r/tests/testthat/test-RecordBatch.R
@@ -513,6 +513,103 @@ test_that("record_batch() with different length arrays", {
   expect_error(record_batch(a = 1:5, b = 1:6), msg)
 })
 
+test_that("RecordBatch doesn't support rbind", {
+  expect_snapshot_error(
+    rbind(
+      record_batch(a = 1:10),
+      record_batch(a = 2:4)
+    )
+  )
+})
+
+test_that("RecordBatch supports cbind", {
+  expect_snapshot_error(
+    cbind(
+      record_batch(a = 1:10),
+      record_batch(a = c("a", "b"))
+    )
+  )
+  expect_error(
+    cbind(record_batch(a = 1:10), record_batch(b = character(0))),
+    regexp = "Non-scalar inputs must have an equal number of rows"
+  )
+
+  actual <- cbind(
+    record_batch(a = c(1, 2), b = c("a", "b")),
+    record_batch(a = c("d", "c")),
+    record_batch(c = c(2, 3))
+  )
+  expected <- record_batch(
+    a = c(1, 2),
+    b = c("a", "b"),
+    a = c("d", "c"),
+    c = c(2, 3)
+  )
+  expect_equal(actual, expected)
+
+  # cbind() with one argument returns identical table
+  expected <- record_batch(a = 1:10)
+  expect_equal(expected, cbind(expected))
+
+  # Handles arrays
+  expect_equal(
+    cbind(record_batch(a = 1:2), b = Array$create(4:5)),
+    record_batch(a = 1:2, b = 4:5)
+  )
+
+  # Handles data.frames on R 4.0 or greater
+  if (getRversion() >= "4.0.0") {
+    # Prior to R 4.0, cbind would short-circuit to the data.frame implementation
+    # if **any** of the arguments are a data.frame.
+    expect_equal(
+      cbind(record_batch(a = 1:2), data.frame(b = 4:5)),
+      record_batch(a = 1:2, b = 4:5)
+    )
+  }
+
+  # Handles base factors
+  expect_equal(
+    cbind(record_batch(a = 1:2), b = factor(c("a", "b"))),
+    record_batch(a = 1:2, b = factor(c("a", "b")))
+  )
+
+  # Handles base scalars
+  expect_equal(
+    cbind(record_batch(a = 1:2), b = 1L),
+    record_batch(a = 1:2, b = rep(1L, 2))
+  )
+
+  # Handles zero rows
+  expect_equal(
+    cbind(record_batch(a = character(0)), b = Array$create(numeric(0)), c = integer(0)),
+    record_batch(a = character(0), b = numeric(0), c = integer(0)),
+  )
+
+  # Rejects unnamed arrays, even in cases where no named arguments are passed
+  expect_error(
+    cbind(record_batch(a = 1:2), b = 3:4, 5:6),
+    regexp = "Vector and array arguments must have names"
+  )
+  expect_error(
+    cbind(record_batch(a = 1:2), 3:4, 5:6),
+    regexp = "Vector and array arguments must have names"
+  )
+
+  # Rejects Table and ChunkedArray arguments
+  if (getRversion() >= "4.0.0") {
+    # R 3.6 cbind dispatch rules cause cbind to fall back to default impl if
+    # there are multiple arguments with distinct cbind implementations
+    expect_error(
+      cbind(record_batch(a = 1:2), arrow_table(b = 3:4)),
+      regexp = "Cannot cbind a RecordBatch with Tables or ChunkedArrays"
+    )
+  }
+  expect_error(
+    cbind(record_batch(a = 1:2), b = chunked_array(1, 2)),
+    regexp = "Cannot cbind a RecordBatch with Tables or ChunkedArrays"
+  )
+})
+
 test_that("Handling string data with embedded nuls", {
   raws <- Array$create(structure(list(
     as.raw(c(0x70, 0x65, 0x72, 0x73, 0x6f, 0x6e)),
@@ -655,7 +752,7 @@ test_that("RecordBatch to C-interface", {
 
   # then import it and check that the roundtripped value is the same
   circle <- RecordBatch$import_from_c(array_ptr, schema_ptr)
-  expect_equal
+  expect_equal(batch, circle)
 
   # must clean up the pointers or we leak
   delete_arrow_schema(schema_ptr)
diff --git a/r/tests/testthat/test-Table.R b/r/tests/testthat/test-Table.R
index 44144c00ba..89c22b97e1 100644
--- a/r/tests/testthat/test-Table.R
+++ b/r/tests/testthat/test-Table.R
@@ -518,6 +518,147 @@ test_that("Table$create() no recycling with tibbles", {
   )
 })
 
+test_that("Tables can be combined with concat_tables()", {
+  expect_error(
+    concat_tables(arrow_table(a = 1:10), arrow_table(a = c("a", "b")), unify_schemas = FALSE),
+    regexp = "Schema at index 2 does not match the first schema"
+  )
+
+  expect_error(
+    concat_tables(arrow_table(a = 1:10), arrow_table(a = c("a", "b")), unify_schemas = TRUE),
+    regexp = "Unable to merge: Field a has incompatible types: int32 vs string"
+  )
+  expect_error(
+    concat_tables(),
+    regexp = "Must pass at least one Table"
+  )
+
+  expect_equal(
+    concat_tables(
+      arrow_table(a = 1:5),
+      arrow_table(a = 6:7, b = c("d", "e"))
+    ),
+    arrow_table(a = 1:7, b = c(rep(NA, 5), "d", "e"))
+  )
+
+  # concat_tables() with one argument returns identical table
+  expected <- arrow_table(a = 1:10)
+  expect_equal(expected, concat_tables(expected))
+})
+
+test_that("Table supports rbind", {
+  expect_error(
+    rbind(arrow_table(a = 1:10), arrow_table(a = c("a", "b"))),
+    regexp = "Schema at index 2 does not match the first schema"
+  )
+
+  tables <- list(
+    arrow_table(a = 1:10, b = Scalar$create("x")),
+    arrow_table(a = 2:42, b = Scalar$create("y")),
+    arrow_table(a = 8:10, b = Scalar$create("z"))
+  )
+  expected <- Table$create(do.call(rbind, lapply(tables, as.data.frame)))
+  actual <- do.call(rbind, tables)
+  expect_equal(actual, expected, ignore_attr = TRUE)
+
+  # rbind with empty table produces identical table
+  expected <- arrow_table(a = 1:10, b = Scalar$create("x"))
+  expect_equal(
+    rbind(expected, arrow_table(a = integer(0), b = character(0))),
+    expected
+  )
+  # rbind() with one argument returns identical table
+  expect_equal(rbind(expected), expected)
+})
+
+test_that("Table supports cbind", {
+  expect_snapshot_error(
+    cbind(
+      arrow_table(a = 1:10),
+      arrow_table(a = c("a", "b"))
+    )
+  )
+  expect_error(
+    cbind(arrow_table(a = 1:10), arrow_table(b = character(0))),
+    regexp = "Non-scalar inputs must have an equal number of rows"
+  )
+
+  actual <- cbind(
+    arrow_table(a = 1:10, b = Scalar$create("x")),
+    arrow_table(a = 11:20, b = Scalar$create("y")),
+    arrow_table(c = 1:10)
+  )
+  expected <- arrow_table(cbind(
+    tibble::tibble(a = 1:10, b = "x"),
+    tibble::tibble(a = 11:20, b = "y"),
+    tibble::tibble(c = 1:10)
+  ))
+  expect_equal(actual, expected, ignore_attr = TRUE)
+
+  # cbind() with one argument returns identical table
+  expected <- arrow_table(a = 1:10)
+  expect_equal(expected, cbind(expected))
+
+  # Handles Arrow arrays and chunked arrays
+  expect_equal(
+    cbind(arrow_table(a = 1:2), b = Array$create(4:5)),
+    arrow_table(a = 1:2, b = 4:5)
+  )
+  expect_equal(
+    cbind(arrow_table(a = 1:2), b = chunked_array(4, 5)),
+    arrow_table(a = 1:2, b = chunked_array(4, 5))
+  )
+
+  # Handles data.frame
+  if (getRversion() >= "4.0.0") {
+    # Prior to R 4.0, cbind would short-circuit to the data.frame implementation
+    # if **any** of the arguments are a data.frame.
+    expect_equal(
+      cbind(arrow_table(a = 1:2), data.frame(b = 4:5)),
+      arrow_table(a = 1:2, b = 4:5)
+    )
+  }
+
+  # Handles factors
+  expect_equal(
+    cbind(arrow_table(a = 1:2), b = factor(c("a", "b"))),
+    arrow_table(a = 1:2, b = factor(c("a", "b")))
+  )
+
+  # Handles scalar values
+  expect_equal(
+    cbind(arrow_table(a = 1:2), b = "x"),
+    arrow_table(a = 1:2, b = c("x", "x"))
+  )
+
+  # Handles zero rows
+  expect_equal(
+    cbind(arrow_table(a = character(0)), b = Array$create(numeric(0)), c = integer(0)),
+    arrow_table(a = character(0), b = numeric(0), c = integer(0)),
+  )
+
+  # Rejects unnamed arrays, even in cases where no named arguments are passed
+  expect_error(
+    cbind(arrow_table(a = 1:2), b = 3:4, 5:6),
+    regexp = "Vector and array arguments must have names"
+  )
+  expect_error(
+    cbind(arrow_table(a = 1:2), 3:4, 5:6),
+    regexp = "Vector and array arguments must have names"
+  )
+})
+
+test_that("cbind.Table handles record batches and tables", {
+  # R 3.6 cbind dispatch rules cause cbind to fall back to default impl if
+  # there are multiple arguments with distinct cbind implementations
+  skip_if(getRversion() < "4.0.0", "R 3.6 cbind dispatch rules prevent this behavior")
+
+  expect_equal(
+    cbind(arrow_table(a = 1L:2L), record_batch(b = 4:5)),
+    arrow_table(a = 1L:2L, b = 4:5)
+  )
+})
+
 test_that("ARROW-11769 - grouping preserved in table creation", {
   skip_if_not_available("dataset")
 
diff --git a/r/tests/testthat/test-chunked-array.R b/r/tests/testthat/test-chunked-array.R
index 730868fa5e..87d2a9d92a 100644
--- a/r/tests/testthat/test-chunked-array.R
+++ b/r/tests/testthat/test-chunked-array.R
@@ -91,6 +91,27 @@ test_that("ChunkedArray", {
   expect_warning(z$Slice(2, 10), "Slice 'length' greater than available length")
 })
 
+test_that("ChunkedArray can be constructed from Array and ChunkedArrays", {
+  expect_equal(
+    chunked_array(Array$create(1:2), Array$create(3:4)),
+    chunked_array(1:2, 3:4),
+  )
+  expect_equal(
+    chunked_array(chunked_array(1:2, 3:4), chunked_array(5:6)),
+    chunked_array(1:2, 3:4, 5:6),
+  )
+
+  # Cannot mix array types
+  expect_error(
+    chunked_array(Array$create(1:2), Array$create(c("a", "b"))),
+    regexp = "Array chunks must all be same type"
+  )
+  expect_error(
+    chunked_array(chunked_array(1:2), chunked_array(c("a", "b"))),
+    regexp = "Array chunks must all be same type"
+  )
+})
+
 test_that("print ChunkedArray", {
   verify_output(test_path("test-chunked-array.txt"), {
     chunked_array(c(1, 2, 3), c(4, 5, 6))
@@ -100,6 +121,18 @@ test_that("print ChunkedArray", {
   })
 })
 
+test_that("ChunkedArray can be concatenated with c()", {
+  a <- chunked_array(c(1, 2), 3)
+  b <- chunked_array(c(4, 5), 6)
+  expected <- chunked_array(c(1, 2), 3, c(4, 5), 6)
+  expect_equal(c(a, b), expected)
+
+  # Can handle Arrays and base vectors
+  vectors <- list(chunked_array(1:10), Array$create(1:10), 1:10)
+  expected <- chunked_array(1:10, 1:10, 1:10)
+  expect_equal(do.call(c, vectors), expected)
+})
+
 test_that("ChunkedArray handles !!! splicing", {
   data <- list(1, 2, 3)
   x <- chunked_array(!!!data)