You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ap...@apache.org on 2019/06/25 18:20:32 UTC

[arrow] branch master updated: ARROW-5492: [R] Add "col_select" argument to read_* functions to read subset of columns

This is an automated email from the ASF dual-hosted git repository.

apitrou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 2b5fbed  ARROW-5492: [R] Add "col_select" argument to read_* functions to read subset of columns
2b5fbed is described below

commit 2b5fbed581fc97f928697adcd2d09ed9a80a631a
Author: Romain Francois <ro...@rstudio.com>
AuthorDate: Tue Jun 25 20:20:15 2019 +0200

    ARROW-5492: [R] Add "col_select" argument to read_* functions to read subset of columns
    
    ``` r
    library(arrow, warn.conflicts = FALSE)
    library(tibble)
    
    tf <- tempfile()
    write.csv(iris, tf, row.names = FALSE, quote = FALSE)
    
    read_csv_arrow(tf, col_select = starts_with("Sepal"))
    #> # A tibble: 150 x 2
    #>    Sepal.Length Sepal.Width
    #>           <dbl>       <dbl>
    #>  1          5.1         3.5
    #>  2          4.9         3
    #>  3          4.7         3.2
    #>  4          4.6         3.1
    #>  5          5           3.6
    #>  6          5.4         3.9
    #>  7          4.6         3.4
    #>  8          5           3.4
    #>  9          4.4         2.9
    #> 10          4.9         3.1
    #> # … with 140 more rows
    
    tf <- tempfile()
    write_feather(iris, tf)
    read_feather(tf, col_select = contains("."))
    #> # A tibble: 150 x 4
    #>    Sepal.Length Sepal.Width Petal.Length Petal.Width
    #>           <dbl>       <dbl>        <dbl>       <dbl>
    #>  1          5.1         3.5          1.4         0.2
    #>  2          4.9         3            1.4         0.2
    #>  3          4.7         3.2          1.3         0.2
    #>  4          4.6         3.1          1.5         0.2
    #>  5          5           3.6          1.4         0.2
    #>  6          5.4         3.9          1.7         0.4
    #>  7          4.6         3.4          1.4         0.3
    #>  8          5           3.4          1.5         0.2
    #>  9          4.4         2.9          1.4         0.2
    #> 10          4.9         3.1          1.5         0.1
    #> # … with 140 more rows
    ```
    
    <sup>Created on 2019-06-20 by the [reprex package](https://reprex.tidyverse.org) (v0.3.0.9000)</sup>
    
    Author: Romain Francois <ro...@rstudio.com>
    Author: Romain François <ro...@rstudio.com>
    
    Closes #4627 from romainfrancois/ARROW-5492/tidyselect and squashes the following commits:
    
    fe5906ceb <Romain François> Merge branch 'master' into ARROW-5492/tidyselect
    89bcfdd11 <Romain Francois> always mmap
    48fc793cc <Romain Francois> remmove ARROW_R_WITH_PARQUET
    e7403c533 <Romain Francois> use remotes::install_github() instead of pak::pkg_install()
    b73be3651 <Romain Francois> merge hickup
    1e40a0da0 <Romain Francois> read_json_arrow(col_select=)
    e112ff8c5 <Romain Francois> support for col_select=<tidy selection> in read_parquet()
    21561b17b <Romain Francois> FileReader$GetSchema()
    f6d824cf7 <Romain Francois> s/ParquetFileReader/FileReader/
    f5bb537b0 <Romain Francois> expose parquet::arrow::ArrowReaderProperties
    512579e06 <Romain Francois> expose ParquetFileReader at the R side
    c44b9cb98 <Romain Francois> more read_csv_arrow(col_select=) tests
    4d5515f8a <Romain Francois> read_csv_arrow(col_select=, as_tibble=)
    07bcddaf6 <Romain Francois> Table$select(<tidy selection>)
    826cd3aab <Romain Francois> tests for read_feather(col_select=<tidyselect helper>)
    54b179fcf <Romain Francois> re exporting tidyselect helpers for convenience
    e31098a4a <Romain Francois> read_feather(col_select=) using tidy selection, as in vroom::vroom
---
 r/DESCRIPTION                                      |   4 +-
 r/NAMESPACE                                        |  25 +++
 r/R/Table.R                                        |  13 ++
 r/R/arrow-package.R                                |   3 +-
 r/R/arrowExports.R                                 |  52 ++++-
 r/R/csv.R                                          |  30 ++-
 r/R/feather.R                                      |  14 +-
 r/R/json.R                                         |   6 +-
 r/R/parquet.R                                      |  99 +++++++++-
 .../test-parquet.R => R/reexports-tidyselect.R}    |  46 +++--
 r/configure                                        |   2 +-
 r/configure.win                                    |   2 +-
 r/data-raw/codegen.R                               |   6 +-
 r/man/parquet_arrow_reader_properties.Rd           |  14 ++
 r/man/parquet_file_reader.Rd                       |  18 ++
 r/man/read_csv_arrow.Rd                            |  17 +-
 r/man/read_feather.Rd                              |   4 +-
 r/man/read_json_arrow.Rd                           |   4 +-
 r/man/read_parquet.Rd                              |  11 +-
 r/man/reexports.Rd                                 |  13 +-
 r/src/arrowExports.cpp                             | 217 ++++++++++++++++++++-
 r/src/arrow_types.h                                |   3 +
 r/src/feather.cpp                                  |  21 +-
 r/src/parquet.cpp                                  |  89 +++++++--
 r/src/table.cpp                                    |  28 +++
 r/tests/testthat/test-arrow-csv.R                  |  37 +++-
 r/tests/testthat/test-feather.R                    |  30 ++-
 r/tests/testthat/test-json.R                       |  16 ++
 r/tests/testthat/test-parquet.R                    |   8 +
 29 files changed, 742 insertions(+), 90 deletions(-)

diff --git a/r/DESCRIPTION b/r/DESCRIPTION
index 70a6654..45edda1 100644
--- a/r/DESCRIPTION
+++ b/r/DESCRIPTION
@@ -31,7 +31,8 @@ Imports:
     assertthat,
     R6,
     fs,
-    bit64
+    bit64,
+    tidyselect
 Roxygen: list(markdown = TRUE)
 RoxygenNote: 6.1.1
 Suggests:
@@ -76,4 +77,5 @@ Collate:
     'read_record_batch.R'
     'read_table.R'
     'reexports-bit64.R'
+    'reexports-tidyselect.R'
     'write_arrow.R'
diff --git a/r/NAMESPACE b/r/NAMESPACE
index 66f2004..a64c5cb 100644
--- a/r/NAMESPACE
+++ b/r/NAMESPACE
@@ -61,6 +61,9 @@ S3method(json_table_reader,default)
 S3method(json_table_reader,fs_path)
 S3method(length,"arrow::Array")
 S3method(names,"arrow::RecordBatch")
+S3method(parquet_file_reader,"arrow::io::RandomAccessFile")
+S3method(parquet_file_reader,character)
+S3method(parquet_file_reader,fs_path)
 S3method(print,"arrow-enum")
 S3method(read_message,"arrow::io::InputStream")
 S3method(read_message,"arrow::ipc::MessageReader")
@@ -120,6 +123,7 @@ export(buffer)
 export(cast_options)
 export(chunked_array)
 export(compression_codec)
+export(contains)
 export(csv_convert_options)
 export(csv_parse_options)
 export(csv_read_options)
@@ -129,6 +133,8 @@ export(date64)
 export(decimal)
 export(default_memory_pool)
 export(dictionary)
+export(ends_with)
+export(everything)
 export(field)
 export(float16)
 export(float32)
@@ -141,10 +147,16 @@ export(int8)
 export(json_parse_options)
 export(json_read_options)
 export(json_table_reader)
+export(last_col)
 export(list_of)
+export(matches)
 export(mmap_create)
 export(mmap_open)
 export(null)
+export(num_range)
+export(one_of)
+export(parquet_arrow_reader_properties)
+export(parquet_file_reader)
 export(read_arrow)
 export(read_csv_arrow)
 export(read_feather)
@@ -156,6 +168,7 @@ export(read_schema)
 export(read_table)
 export(record_batch)
 export(schema)
+export(starts_with)
 export(struct)
 export(table)
 export(time32)
@@ -182,8 +195,20 @@ importFrom(purrr,map_int)
 importFrom(rlang,"%||%")
 importFrom(rlang,abort)
 importFrom(rlang,dots_n)
+importFrom(rlang,enquo)
+importFrom(rlang,enquos)
 importFrom(rlang,is_false)
 importFrom(rlang,list2)
+importFrom(rlang,quo_is_null)
 importFrom(rlang,warn)
+importFrom(tidyselect,contains)
+importFrom(tidyselect,ends_with)
+importFrom(tidyselect,everything)
+importFrom(tidyselect,last_col)
+importFrom(tidyselect,matches)
+importFrom(tidyselect,num_range)
+importFrom(tidyselect,one_of)
+importFrom(tidyselect,starts_with)
+importFrom(tidyselect,vars_select)
 importFrom(utils,packageVersion)
 useDynLib(arrow, .registration = TRUE)
diff --git a/r/R/Table.R b/r/R/Table.R
index d1e4b18..1aec916 100644
--- a/r/R/Table.R
+++ b/r/R/Table.R
@@ -40,6 +40,19 @@
       assert_that(inherits(options, "arrow::compute::CastOptions"))
       assert_that(identical(self$schema$names, target_schema$names), msg = "incompatible schemas")
       shared_ptr(`arrow::Table`, Table__cast(self, target_schema, options))
+    },
+
+    select = function(spec) {
+      spec <- enquo(spec)
+      if (quo_is_null(spec)) {
+        self
+      } else {
+        all_vars <- Table__column_names(self)
+        vars <- vars_select(all_vars, !!spec)
+        indices <- match(vars, all_vars)
+        shared_ptr(`arrow::Table`, Table__select(self, indices))
+      }
+
     }
   ),
 
diff --git a/r/R/arrow-package.R b/r/R/arrow-package.R
index 77b3a31..d21931c 100644
--- a/r/R/arrow-package.R
+++ b/r/R/arrow-package.R
@@ -18,8 +18,9 @@
 #' @importFrom R6 R6Class
 #' @importFrom purrr map map_int map2
 #' @importFrom assertthat assert_that
-#' @importFrom rlang list2 %||% is_false abort dots_n warn
+#' @importFrom rlang list2 %||% is_false abort dots_n warn enquo quo_is_null enquos
 #' @importFrom Rcpp sourceCpp
+#' @importFrom tidyselect vars_select
 #' @useDynLib arrow, .registration = TRUE
 #' @keywords internal
 "_PACKAGE"
diff --git a/r/R/arrowExports.R b/r/R/arrowExports.R
index fa6cb13..06efab1 100644
--- a/r/R/arrowExports.R
+++ b/r/R/arrowExports.R
@@ -548,6 +548,10 @@ ipc___feather___TableReader__Open <- function(stream){
     .Call(`_arrow_ipc___feather___TableReader__Open` , stream)
 }
 
+ipc___feather___TableReader__column_names <- function(reader){
+    .Call(`_arrow_ipc___feather___TableReader__column_names` , reader)
+}
+
 Field__initialize <- function(name, field, nullable){
     .Call(`_arrow_Field__initialize` , name, field, nullable)
 }
@@ -744,14 +748,50 @@ ipc___ReadMessage <- function(stream){
     .Call(`_arrow_ipc___ReadMessage` , stream)
 }
 
-read_parquet_file <- function(filename){
-    .Call(`_arrow_read_parquet_file` , filename)
+parquet___arrow___ArrowReaderProperties__Make <- function(use_threads){
+    .Call(`_arrow_parquet___arrow___ArrowReaderProperties__Make` , use_threads)
+}
+
+parquet___arrow___ArrowReaderProperties__set_use_threads <- function(properties, use_threads){
+    invisible(.Call(`_arrow_parquet___arrow___ArrowReaderProperties__set_use_threads` , properties, use_threads))
+}
+
+parquet___arrow___ArrowReaderProperties__get_use_threads <- function(properties, use_threads){
+    .Call(`_arrow_parquet___arrow___ArrowReaderProperties__get_use_threads` , properties, use_threads)
+}
+
+parquet___arrow___ArrowReaderProperties__get_read_dictionary <- function(properties, column_index){
+    .Call(`_arrow_parquet___arrow___ArrowReaderProperties__get_read_dictionary` , properties, column_index)
+}
+
+parquet___arrow___ArrowReaderProperties__set_read_dictionary <- function(properties, column_index, read_dict){
+    invisible(.Call(`_arrow_parquet___arrow___ArrowReaderProperties__set_read_dictionary` , properties, column_index, read_dict))
+}
+
+parquet___arrow___FileReader__OpenFile <- function(file, props){
+    .Call(`_arrow_parquet___arrow___FileReader__OpenFile` , file, props)
+}
+
+parquet___arrow___FileReader__ReadTable1 <- function(reader){
+    .Call(`_arrow_parquet___arrow___FileReader__ReadTable1` , reader)
+}
+
+parquet___arrow___FileReader__ReadTable2 <- function(reader, column_indices){
+    .Call(`_arrow_parquet___arrow___FileReader__ReadTable2` , reader, column_indices)
 }
 
 write_parquet_file <- function(table, filename){
     invisible(.Call(`_arrow_write_parquet_file` , table, filename))
 }
 
+parquet___arrow___FileReader__GetSchema2 <- function(reader, indices){
+    .Call(`_arrow_parquet___arrow___FileReader__GetSchema2` , reader, indices)
+}
+
+parquet___arrow___FileReader__GetSchema1 <- function(reader){
+    .Call(`_arrow_parquet___arrow___FileReader__GetSchema1` , reader)
+}
+
 RecordBatch__num_columns <- function(x){
     .Call(`_arrow_RecordBatch__num_columns` , x)
 }
@@ -900,6 +940,14 @@ Table__columns <- function(table){
     .Call(`_arrow_Table__columns` , table)
 }
 
+Table__column_names <- function(table){
+    .Call(`_arrow_Table__column_names` , table)
+}
+
+Table__select <- function(table, indices){
+    .Call(`_arrow_Table__select` , table, indices)
+}
+
 Table__from_dots <- function(lst, schema_sxp){
     .Call(`_arrow_Table__from_dots` , lst, schema_sxp)
 }
diff --git a/r/R/csv.R b/r/R/csv.R
index d34ddcb..03a4b7d 100644
--- a/r/R/csv.R
+++ b/r/R/csv.R
@@ -138,7 +138,7 @@ csv_table_reader.default <- function(file,
   convert_options = csv_convert_options(),
   ...
 ){
-  csv_table_reader(ReadableFile(file),
+  csv_table_reader(mmap_open(file),
     read_options = read_options,
     parse_options = parse_options,
     convert_options = convert_options,
@@ -172,8 +172,30 @@ csv_table_reader.default <- function(file,
 #'
 #' Use arrow::csv::TableReader from [csv_table_reader()]
 #'
-#' @param ... Used to construct an arrow::csv::TableReader
+#' @inheritParams csv_table_reader
+#'
+#' @param col_select [tidy selection specification][tidyselect::vars_select] of columns
+#' @param as_tibble Should the [arrow::Table][arrow__Table] be converted to a data frame.
+#'
 #' @export
-read_csv_arrow <- function(...) {
-  csv_table_reader(...)$Read()
+read_csv_arrow <- function(file,
+  read_options = csv_read_options(),
+  parse_options = csv_parse_options(),
+  convert_options = csv_convert_options(),
+  col_select = NULL,
+  as_tibble = TRUE
+  )
+{
+  reader <- csv_table_reader(file,
+    read_options = read_options,
+    parse_options = parse_options,
+    convert_options = convert_options)
+
+  tab <- reader$Read()$select(!!enquo(col_select))
+
+  if (isTRUE(as_tibble)) {
+    tab <- as.data.frame(tab)
+  }
+
+  tab
 }
diff --git a/r/R/feather.R b/r/R/feather.R
index f197fd0..7877b4b 100644
--- a/r/R/feather.R
+++ b/r/R/feather.R
@@ -162,15 +162,23 @@ FeatherTableReader.fs_path <- function(file, mmap = TRUE, ...) {
 #' Read a feather file
 #'
 #' @param file a arrow::ipc::feather::TableReader or whatever the [FeatherTableReader()] function can handle
-#' @param columns names if the columns to read. The default `NULL` means all columns
+#' @param col_select [tidy selection][tidyselect::vars_select()] of columns to read.
 #' @param as_tibble should the [arrow::Table][arrow__Table] be converted to a tibble.
 #' @param ... additional parameters
 #'
 #' @return a data frame if `as_tibble` is `TRUE` (the default), or a [arrow::Table][arrow__Table] otherwise
 #'
 #' @export
-read_feather <- function(file, columns = NULL, as_tibble = TRUE, ...){
-  out <- FeatherTableReader(file, ...)$Read(columns)
+read_feather <- function(file, col_select = NULL, as_tibble = TRUE, ...){
+  reader <- FeatherTableReader(file, ...)
+
+  all_columns <- ipc___feather___TableReader__column_names(reader)
+  col_select <- enquo(col_select)
+  columns <- if (!quo_is_null(col_select)) {
+    vars_select(all_columns, !!col_select)
+  }
+
+  out <- reader$Read(columns)
   if (isTRUE(as_tibble)) {
     out <- as.data.frame(out)
   }
diff --git a/r/R/json.R b/r/R/json.R
index e51051d..1409e31 100644
--- a/r/R/json.R
+++ b/r/R/json.R
@@ -148,10 +148,12 @@ json_table_reader.default <- function(file,
 #'
 #' @param ... Used to construct an arrow::json::TableReader
 #' @param as_tibble convert the [arrow::Table][arrow__Table] to a data frame
+#' @param col_select [tidy selection][tidyselect::vars_select] of columns
 #'
 #' @export
-read_json_arrow <- function(..., as_tibble = TRUE) {
-  tab <- json_table_reader(...)$Read()
+read_json_arrow <- function(..., as_tibble = TRUE, col_select = NULL) {
+  tab <- json_table_reader(...)$Read()$select(!!enquo(col_select))
+
   if (isTRUE(as_tibble)) {
     tab <- as.data.frame(tab)
   }
diff --git a/r/R/parquet.R b/r/R/parquet.R
index b5e5884..fb7cc45 100644
--- a/r/R/parquet.R
+++ b/r/R/parquet.R
@@ -15,15 +15,102 @@
 # specific language governing permissions and limitations
 # under the License.
 
+#' @include R6.R
+
+`parquet::arrow::FileReader` <- R6Class("parquet::arrow::FileReader",
+  inherit = `arrow::Object`,
+  public = list(
+    ReadTable = function(col_select = NULL) {
+      col_select <- enquo(col_select)
+      if(quo_is_null(col_select)) {
+        shared_ptr(`arrow::Table`, parquet___arrow___FileReader__ReadTable1(self))
+      } else {
+        all_vars <- shared_ptr(`arrow::Schema`, parquet___arrow___FileReader__GetSchema1(self))$names
+        indices <- match(vars_select(all_vars, !!col_select), all_vars) - 1L
+        shared_ptr(`arrow::Table`, parquet___arrow___FileReader__ReadTable2(self, indices))
+      }
+    },
+    GetSchema = function(column_indices = NULL) {
+      if (is.null(column_indices)) {
+        shared_ptr(`arrow::Schema`, parquet___arrow___FileReader__GetSchema1(self))
+      } else {
+        shared_ptr(`arrow::Schema`, parquet___arrow___FileReader__GetSchema2(self, column_indices))
+      }
+
+    }
+  )
+)
+
+`parquet::arrow::ArrowReaderProperties` <- R6Class("parquet::arrow::ArrowReaderProperties",
+  inherit = `arrow::Object`,
+  public = list(
+    read_dictionary = function(column_index) {
+      parquet___arrow___ArrowReaderProperties__get_read_dictionary(self, column_index)
+    },
+    set_read_dictionary = function(column_index, read_dict) {
+      parquet___arrow___ArrowReaderProperties__set_read_dictionary(self, column_index, read_dict)
+    }
+  ),
+  active = list(
+    use_threads = function(use_threads) {
+      if(missing(use_threads)) {
+        parquet___arrow___ArrowReaderProperties__get_use_threads(self)
+      } else {
+        parquet___arrow___ArrowReaderProperties__set_use_threads(self, use_threads)
+      }
+    }
+  )
+)
+
+#' Create a new ArrowReaderProperties instance
+#'
+#' @param use_threads use threads ?
+#'
+#' @export
+parquet_arrow_reader_properties <- function(use_threads = TRUE) {
+  shared_ptr(`parquet::arrow::ArrowReaderProperties`, parquet___arrow___ArrowReaderProperties__Make(isTRUE(use_threads)))
+}
+
+#' Create a FileReader instance
+#'
+#' @param file file
+#' @param props reader file properties, as created by [parquet_arrow_reader_properties()]
+#'
+#' @param ... additional parameters
+#'
+#' @export
+parquet_file_reader <- function(file, props = parquet_arrow_reader_properties(), ...) {
+  UseMethod("parquet_file_reader")
+}
+
+#' @export
+`parquet_file_reader.arrow::io::RandomAccessFile` <- function(file, props = parquet_arrow_reader_properties(), ...) {
+  unique_ptr(`parquet::arrow::FileReader`, parquet___arrow___FileReader__OpenFile(file, props))
+}
+
+#' @export
+parquet_file_reader.fs_path <- function(file, props = parquet_arrow_reader_properties(), memory_map = TRUE, ...) {
+  if (isTRUE(memory_map)) {
+    parquet_file_reader(mmap_open(file), props = props, ...)
+  } else {
+    parquet_file_reader(ReadableFile(file), props = props, ...)
+  }
+}
+
+#' @export
+parquet_file_reader.character <- function(file, props = parquet_arrow_reader_properties(), memory_map = TRUE, ...) {
+  parquet_file_reader(fs::path_abs(file), props = parquet_arrow_reader_properties(), memory_map = memory_map, ...)
+}
+
 #' Read Parquet file from disk
 #'
 #' '[Parquet](https://parquet.apache.org/)' is a columnar storage file format.
 #' This function enables you to read Parquet files into R.
 #'
-#' @param file a file path
+#' @inheritParams parquet_file_reader
 #' @param as_tibble Should the [arrow::Table][arrow__Table] be converted to a
 #' tibble? Default is `TRUE`.
-#' @param ... Additional arguments, currently ignored
+#' @param col_select [tidy selection][tidyselect::vars_select] of columns to read
 #'
 #' @return A [arrow::Table][arrow__Table], or a `tbl_df` if `as_tibble` is
 #' `TRUE`.
@@ -34,9 +121,11 @@
 #' }
 #'
 #' @export
-read_parquet <- function(file, as_tibble = TRUE, ...) {
-  tab <- shared_ptr(`arrow::Table`, read_parquet_file(file))
-  if (isTRUE(as_tibble)) {
+read_parquet <- function(file, props = parquet_arrow_reader_properties(), as_tibble = TRUE, col_select = NULL, ...) {
+  reader <- parquet_file_reader(file, props = props, ...)
+  tab <- reader$ReadTable(!!enquo(col_select))
+
+  if (as_tibble) {
     tab <- as.data.frame(tab)
   }
   tab
diff --git a/r/tests/testthat/test-parquet.R b/r/R/reexports-tidyselect.R
similarity index 55%
copy from r/tests/testthat/test-parquet.R
copy to r/R/reexports-tidyselect.R
index 554744e..2566207 100644
--- a/r/tests/testthat/test-parquet.R
+++ b/r/R/reexports-tidyselect.R
@@ -15,23 +15,29 @@
 # specific language governing permissions and limitations
 # under the License.
 
-context("Parquet file reading/writing")
-
-pq_file <- system.file("v0.7.1.parquet", package="arrow")
-
-test_that("reading a known Parquet file to tibble", {
-  df <- read_parquet(pq_file)
-  expect_true(tibble::is_tibble(df))
-  expect_identical(dim(df), c(10L, 11L))
-  # TODO: assert more about the contents
-})
-
-test_that("simple int column roundtrip", {
-  df <- tibble::tibble(x = 1:5)
-  pq_tmp_file <- tempfile() # You can specify the .parquet here but that's probably not necessary
-  on.exit(unlink(pq_tmp_file))
-
-  write_parquet(df, pq_tmp_file)
-  df_read <- read_parquet(pq_tmp_file)
-  expect_identical(df, df_read)
-})
+# Alias required for help links in downstream packages
+#' @aliases select_helpers
+#' @importFrom tidyselect contains
+#' @export
+tidyselect::contains
+#' @importFrom tidyselect ends_with
+#' @export
+tidyselect::ends_with
+#' @importFrom tidyselect everything
+#' @export
+tidyselect::everything
+#' @importFrom tidyselect matches
+#' @export
+tidyselect::matches
+#' @importFrom tidyselect num_range
+#' @export
+tidyselect::num_range
+#' @importFrom tidyselect one_of
+#' @export
+tidyselect::one_of
+#' @importFrom tidyselect starts_with
+#' @export
+tidyselect::starts_with
+#' @importFrom tidyselect last_col
+#' @export
+tidyselect::last_col
diff --git a/r/configure b/r/configure
index d5a2688..da3d868 100755
--- a/r/configure
+++ b/r/configure
@@ -110,7 +110,7 @@ if [ $? -ne 0 ]; then
   PKG_LIBS=""
   PKG_CFLAGS=""
 else
-  PKG_CFLAGS="$PKG_CFLAGS -DARROW_R_WITH_ARROW -DARROW_R_WITH_PARQUET"
+  PKG_CFLAGS="$PKG_CFLAGS -DARROW_R_WITH_ARROW"
   echo "PKG_CFLAGS=$PKG_CFLAGS"
   echo "PKG_LIBS=$PKG_LIBS"
 fi
diff --git a/r/configure.win b/r/configure.win
index f3d7aad..5ad2a95 100644
--- a/r/configure.win
+++ b/r/configure.win
@@ -38,7 +38,7 @@ if [ $? -ne 0 ]; then
 else
   # Set the right flags to point to and enable arrow/parquet
   RWINLIB="../windows/arrow-${VERSION}"
-  PKG_CFLAGS="-I${RWINLIB}/include -DARROW_STATIC -DPARQUET_STATIC -DARROW_R_WITH_PARQUET -DARROW_R_WITH_ARROW"
+  PKG_CFLAGS="-I${RWINLIB}/include -DARROW_STATIC -DPARQUET_STATIC -DARROW_R_WITH_ARROW"
   PKG_LIBS="-L${RWINLIB}/lib"'$(subst gcc,,$(COMPILED_BY))$(R_ARCH) '"-L${RWINLIB}/lib"'$(R_ARCH) '"-lparquet -larrow -lthrift -lsnappy -lboost_regex-mt-s -lboost_filesystem-mt-s -lboost_system-mt-s -ldouble-conversion -lz -lws2_32"
 fi
 
diff --git a/r/data-raw/codegen.R b/r/data-raw/codegen.R
index 3e56e33..ba5d0cb 100644
--- a/r/data-raw/codegen.R
+++ b/r/data-raw/codegen.R
@@ -35,7 +35,7 @@
 # #endif
 
 suppressPackageStartupMessages({
-  # pak::pkg_install("romainfrancois/decor")
+  # remotes::install_github("romainfrancois/decor")
   library(decor)
 
   library(dplyr)
@@ -43,6 +43,10 @@ suppressPackageStartupMessages({
   library(glue)
 })
 
+if (packageVersion("decor") < '0.0.0.9001') {
+  stop("more recent version of `decor` needed, please install with `remotes::install_github('romainfrancois/decor')`")
+}
+
 decorations <- cpp_decorations() %>%
   filter(decoration == "arrow::export") %>%
   # the three lines below can be expressed with rap()
diff --git a/r/man/parquet_arrow_reader_properties.Rd b/r/man/parquet_arrow_reader_properties.Rd
new file mode 100644
index 0000000..6276d7f
--- /dev/null
+++ b/r/man/parquet_arrow_reader_properties.Rd
@@ -0,0 +1,14 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/parquet.R
+\name{parquet_arrow_reader_properties}
+\alias{parquet_arrow_reader_properties}
+\title{Create a new ArrowReaderProperties instance}
+\usage{
+parquet_arrow_reader_properties(use_threads = TRUE)
+}
+\arguments{
+\item{use_threads}{use threads ?}
+}
+\description{
+Create a new ArrowReaderProperties instance
+}
diff --git a/r/man/parquet_file_reader.Rd b/r/man/parquet_file_reader.Rd
new file mode 100644
index 0000000..8cbe6b0
--- /dev/null
+++ b/r/man/parquet_file_reader.Rd
@@ -0,0 +1,18 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/parquet.R
+\name{parquet_file_reader}
+\alias{parquet_file_reader}
+\title{Create a FileReader instance}
+\usage{
+parquet_file_reader(file, props = parquet_arrow_reader_properties(), ...)
+}
+\arguments{
+\item{file}{file}
+
+\item{props}{reader file properties, as created by \code{\link[=parquet_arrow_reader_properties]{parquet_arrow_reader_properties()}}}
+
+\item{...}{additional parameters}
+}
+\description{
+Create a FileReader instance
+}
diff --git a/r/man/read_csv_arrow.Rd b/r/man/read_csv_arrow.Rd
index 4cdca91..47e5158 100644
--- a/r/man/read_csv_arrow.Rd
+++ b/r/man/read_csv_arrow.Rd
@@ -4,10 +4,23 @@
 \alias{read_csv_arrow}
 \title{Read csv file into an arrow::Table}
 \usage{
-read_csv_arrow(...)
+read_csv_arrow(file, read_options = csv_read_options(),
+  parse_options = csv_parse_options(),
+  convert_options = csv_convert_options(), col_select = NULL,
+  as_tibble = TRUE)
 }
 \arguments{
-\item{...}{Used to construct an arrow::csv::TableReader}
+\item{file}{file}
+
+\item{read_options}{see \code{\link[=csv_read_options]{csv_read_options()}}}
+
+\item{parse_options}{see \code{\link[=csv_parse_options]{csv_parse_options()}}}
+
+\item{convert_options}{see \code{\link[=csv_convert_options]{csv_convert_options()}}}
+
+\item{col_select}{\link[tidyselect:vars_select]{tidy selection specification} of columns}
+
+\item{as_tibble}{Should the \link[=arrow__Table]{arrow::Table} be converted to a data frame.}
 }
 \description{
 Use arrow::csv::TableReader from \code{\link[=csv_table_reader]{csv_table_reader()}}
diff --git a/r/man/read_feather.Rd b/r/man/read_feather.Rd
index 31fd36a..0492cce 100644
--- a/r/man/read_feather.Rd
+++ b/r/man/read_feather.Rd
@@ -4,12 +4,12 @@
 \alias{read_feather}
 \title{Read a feather file}
 \usage{
-read_feather(file, columns = NULL, as_tibble = TRUE, ...)
+read_feather(file, col_select = NULL, as_tibble = TRUE, ...)
 }
 \arguments{
 \item{file}{a arrow::ipc::feather::TableReader or whatever the \code{\link[=FeatherTableReader]{FeatherTableReader()}} function can handle}
 
-\item{columns}{names if the columns to read. The default \code{NULL} means all columns}
+\item{col_select}{\link[tidyselect:vars_select]{tidy selection} of columns to read.}
 
 \item{as_tibble}{should the \link[=arrow__Table]{arrow::Table} be converted to a tibble.}
 
diff --git a/r/man/read_json_arrow.Rd b/r/man/read_json_arrow.Rd
index aa98b69..75ccd69 100644
--- a/r/man/read_json_arrow.Rd
+++ b/r/man/read_json_arrow.Rd
@@ -4,12 +4,14 @@
 \alias{read_json_arrow}
 \title{Read json file into an arrow::Table}
 \usage{
-read_json_arrow(..., as_tibble = TRUE)
+read_json_arrow(..., as_tibble = TRUE, col_select = NULL)
 }
 \arguments{
 \item{...}{Used to construct an arrow::json::TableReader}
 
 \item{as_tibble}{convert the \link[=arrow__Table]{arrow::Table} to a data frame}
+
+\item{col_select}{\link[tidyselect:vars_select]{tidy selection} of columns}
 }
 \description{
 Use \link[=arrow__json__TableReader]{arrow::json::TableReader} from \code{\link[=json_table_reader]{json_table_reader()}}
diff --git a/r/man/read_parquet.Rd b/r/man/read_parquet.Rd
index 3b1973b..145ef65 100644
--- a/r/man/read_parquet.Rd
+++ b/r/man/read_parquet.Rd
@@ -4,15 +4,20 @@
 \alias{read_parquet}
 \title{Read Parquet file from disk}
 \usage{
-read_parquet(file, as_tibble = TRUE, ...)
+read_parquet(file, props = parquet_arrow_reader_properties(),
+  as_tibble = TRUE, col_select = NULL, ...)
 }
 \arguments{
-\item{file}{a file path}
+\item{file}{file}
+
+\item{props}{reader file properties, as created by \code{\link[=parquet_arrow_reader_properties]{parquet_arrow_reader_properties()}}}
 
 \item{as_tibble}{Should the \link[=arrow__Table]{arrow::Table} be converted to a
 tibble? Default is \code{TRUE}.}
 
-\item{...}{Additional arguments, currently ignored}
+\item{col_select}{\link[tidyselect:vars_select]{tidy selection} of columns to read}
+
+\item{...}{additional parameters}
 }
 \value{
 A \link[=arrow__Table]{arrow::Table}, or a \code{tbl_df} if \code{as_tibble} is
diff --git a/r/man/reexports.Rd b/r/man/reexports.Rd
index 6951fcf..3b1e527 100644
--- a/r/man/reexports.Rd
+++ b/r/man/reexports.Rd
@@ -1,10 +1,19 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/reexports-bit64.R
+% Please edit documentation in R/reexports-bit64.R, R/reexports-tidyselect.R
 \docType{import}
 \name{reexports}
 \alias{reexports}
 \alias{print.integer64}
 \alias{str.integer64}
+\alias{contains}
+\alias{select_helpers}
+\alias{ends_with}
+\alias{everything}
+\alias{matches}
+\alias{num_range}
+\alias{one_of}
+\alias{starts_with}
+\alias{last_col}
 \title{Objects exported from other packages}
 \keyword{internal}
 \description{
@@ -13,5 +22,7 @@ below to see their documentation.
 
 \describe{
   \item{bit64}{\code{\link[bit64]{print.integer64}}, \code{\link[bit64]{str.integer64}}}
+
+  \item{tidyselect}{\code{\link[tidyselect]{contains}}, \code{\link[tidyselect]{ends_with}}, \code{\link[tidyselect]{everything}}, \code{\link[tidyselect]{matches}}, \code{\link[tidyselect]{num_range}}, \code{\link[tidyselect]{one_of}}, \code{\link[tidyselect]{starts_with}}, \code{\link[tidyselect]{last_col}}}
 }}
 
diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp
index 49dbf78..d2962fa 100644
--- a/r/src/arrowExports.cpp
+++ b/r/src/arrowExports.cpp
@@ -2103,6 +2103,21 @@ RcppExport SEXP _arrow_ipc___feather___TableReader__Open(SEXP stream_sexp){
 }
 #endif
 
+// feather.cpp
+#if defined(ARROW_R_WITH_ARROW)
+Rcpp::CharacterVector ipc___feather___TableReader__column_names(const std::unique_ptr<arrow::ipc::feather::TableReader>& reader);
+RcppExport SEXP _arrow_ipc___feather___TableReader__column_names(SEXP reader_sexp){
+BEGIN_RCPP
+	Rcpp::traits::input_parameter<const std::unique_ptr<arrow::ipc::feather::TableReader>&>::type reader(reader_sexp);
+	return Rcpp::wrap(ipc___feather___TableReader__column_names(reader));
+END_RCPP
+}
+#else
+RcppExport SEXP _arrow_ipc___feather___TableReader__column_names(SEXP reader_sexp){
+	Rf_error("Cannot call ipc___feather___TableReader__column_names(). Please use arrow::install_arrow() to install required runtime libraries. ");
+}
+#endif
+
 // field.cpp
 #if defined(ARROW_R_WITH_ARROW)
 std::shared_ptr<arrow::Field> Field__initialize(const std::string& name, const std::shared_ptr<arrow::DataType>& field, bool nullable);
@@ -2860,16 +2875,130 @@ RcppExport SEXP _arrow_ipc___ReadMessage(SEXP stream_sexp){
 
 // parquet.cpp
 #if defined(ARROW_R_WITH_ARROW)
-std::shared_ptr<arrow::Table> read_parquet_file(std::string filename);
-RcppExport SEXP _arrow_read_parquet_file(SEXP filename_sexp){
+std::shared_ptr<parquet::arrow::ArrowReaderProperties> parquet___arrow___ArrowReaderProperties__Make(bool use_threads);
+RcppExport SEXP _arrow_parquet___arrow___ArrowReaderProperties__Make(SEXP use_threads_sexp){
 BEGIN_RCPP
-	Rcpp::traits::input_parameter<std::string>::type filename(filename_sexp);
-	return Rcpp::wrap(read_parquet_file(filename));
+	Rcpp::traits::input_parameter<bool>::type use_threads(use_threads_sexp);
+	return Rcpp::wrap(parquet___arrow___ArrowReaderProperties__Make(use_threads));
+END_RCPP
+}
+#else
+RcppExport SEXP _arrow_parquet___arrow___ArrowReaderProperties__Make(SEXP use_threads_sexp){
+	Rf_error("Cannot call parquet___arrow___ArrowReaderProperties__Make(). Please use arrow::install_arrow() to install required runtime libraries. ");
+}
+#endif
+
+// parquet.cpp
+#if defined(ARROW_R_WITH_ARROW)
+void parquet___arrow___ArrowReaderProperties__set_use_threads(const std::shared_ptr<parquet::arrow::ArrowReaderProperties>& properties, bool use_threads);
+RcppExport SEXP _arrow_parquet___arrow___ArrowReaderProperties__set_use_threads(SEXP properties_sexp, SEXP use_threads_sexp){
+BEGIN_RCPP
+	Rcpp::traits::input_parameter<const std::shared_ptr<parquet::arrow::ArrowReaderProperties>&>::type properties(properties_sexp);
+	Rcpp::traits::input_parameter<bool>::type use_threads(use_threads_sexp);
+	parquet___arrow___ArrowReaderProperties__set_use_threads(properties, use_threads);
+	return R_NilValue;
+END_RCPP
+}
+#else
+RcppExport SEXP _arrow_parquet___arrow___ArrowReaderProperties__set_use_threads(SEXP properties_sexp, SEXP use_threads_sexp){
+	Rf_error("Cannot call parquet___arrow___ArrowReaderProperties__set_use_threads(). Please use arrow::install_arrow() to install required runtime libraries. ");
+}
+#endif
+
+// parquet.cpp
+#if defined(ARROW_R_WITH_ARROW)
+bool parquet___arrow___ArrowReaderProperties__get_use_threads(const std::shared_ptr<parquet::arrow::ArrowReaderProperties>& properties, bool use_threads);
+RcppExport SEXP _arrow_parquet___arrow___ArrowReaderProperties__get_use_threads(SEXP properties_sexp, SEXP use_threads_sexp){
+BEGIN_RCPP
+	Rcpp::traits::input_parameter<const std::shared_ptr<parquet::arrow::ArrowReaderProperties>&>::type properties(properties_sexp);
+	Rcpp::traits::input_parameter<bool>::type use_threads(use_threads_sexp);
+	return Rcpp::wrap(parquet___arrow___ArrowReaderProperties__get_use_threads(properties, use_threads));
+END_RCPP
+}
+#else
+RcppExport SEXP _arrow_parquet___arrow___ArrowReaderProperties__get_use_threads(SEXP properties_sexp, SEXP use_threads_sexp){
+	Rf_error("Cannot call parquet___arrow___ArrowReaderProperties__get_use_threads(). Please use arrow::install_arrow() to install required runtime libraries. ");
+}
+#endif
+
+// parquet.cpp
+#if defined(ARROW_R_WITH_ARROW)
+bool parquet___arrow___ArrowReaderProperties__get_read_dictionary(const std::shared_ptr<parquet::arrow::ArrowReaderProperties>& properties, int column_index);
+RcppExport SEXP _arrow_parquet___arrow___ArrowReaderProperties__get_read_dictionary(SEXP properties_sexp, SEXP column_index_sexp){
+BEGIN_RCPP
+	Rcpp::traits::input_parameter<const std::shared_ptr<parquet::arrow::ArrowReaderProperties>&>::type properties(properties_sexp);
+	Rcpp::traits::input_parameter<int>::type column_index(column_index_sexp);
+	return Rcpp::wrap(parquet___arrow___ArrowReaderProperties__get_read_dictionary(properties, column_index));
+END_RCPP
+}
+#else
+RcppExport SEXP _arrow_parquet___arrow___ArrowReaderProperties__get_read_dictionary(SEXP properties_sexp, SEXP column_index_sexp){
+	Rf_error("Cannot call parquet___arrow___ArrowReaderProperties__get_read_dictionary(). Please use arrow::install_arrow() to install required runtime libraries. ");
+}
+#endif
+
+// parquet.cpp
+#if defined(ARROW_R_WITH_ARROW)
+void parquet___arrow___ArrowReaderProperties__set_read_dictionary(const std::shared_ptr<parquet::arrow::ArrowReaderProperties>& properties, int column_index, bool read_dict);
+RcppExport SEXP _arrow_parquet___arrow___ArrowReaderProperties__set_read_dictionary(SEXP properties_sexp, SEXP column_index_sexp, SEXP read_dict_sexp){
+BEGIN_RCPP
+	Rcpp::traits::input_parameter<const std::shared_ptr<parquet::arrow::ArrowReaderProperties>&>::type properties(properties_sexp);
+	Rcpp::traits::input_parameter<int>::type column_index(column_index_sexp);
+	Rcpp::traits::input_parameter<bool>::type read_dict(read_dict_sexp);
+	parquet___arrow___ArrowReaderProperties__set_read_dictionary(properties, column_index, read_dict);
+	return R_NilValue;
+END_RCPP
+}
+#else
+RcppExport SEXP _arrow_parquet___arrow___ArrowReaderProperties__set_read_dictionary(SEXP properties_sexp, SEXP column_index_sexp, SEXP read_dict_sexp){
+	Rf_error("Cannot call parquet___arrow___ArrowReaderProperties__set_read_dictionary(). Please use arrow::install_arrow() to install required runtime libraries. ");
+}
+#endif
+
+// parquet.cpp
+#if defined(ARROW_R_WITH_ARROW)
+std::unique_ptr<parquet::arrow::FileReader> parquet___arrow___FileReader__OpenFile(const std::shared_ptr<arrow::io::RandomAccessFile>& file, const std::shared_ptr<parquet::arrow::ArrowReaderProperties>& props);
+RcppExport SEXP _arrow_parquet___arrow___FileReader__OpenFile(SEXP file_sexp, SEXP props_sexp){
+BEGIN_RCPP
+	Rcpp::traits::input_parameter<const std::shared_ptr<arrow::io::RandomAccessFile>&>::type file(file_sexp);
+	Rcpp::traits::input_parameter<const std::shared_ptr<parquet::arrow::ArrowReaderProperties>&>::type props(props_sexp);
+	return Rcpp::wrap(parquet___arrow___FileReader__OpenFile(file, props));
+END_RCPP
+}
+#else
+RcppExport SEXP _arrow_parquet___arrow___FileReader__OpenFile(SEXP file_sexp, SEXP props_sexp){
+	Rf_error("Cannot call parquet___arrow___FileReader__OpenFile(). Please use arrow::install_arrow() to install required runtime libraries. ");
+}
+#endif
+
+// parquet.cpp
+#if defined(ARROW_R_WITH_ARROW)
+std::shared_ptr<arrow::Table> parquet___arrow___FileReader__ReadTable1(const std::unique_ptr<parquet::arrow::FileReader>& reader);
+RcppExport SEXP _arrow_parquet___arrow___FileReader__ReadTable1(SEXP reader_sexp){
+BEGIN_RCPP
+	Rcpp::traits::input_parameter<const std::unique_ptr<parquet::arrow::FileReader>&>::type reader(reader_sexp);
+	return Rcpp::wrap(parquet___arrow___FileReader__ReadTable1(reader));
+END_RCPP
+}
+#else
+RcppExport SEXP _arrow_parquet___arrow___FileReader__ReadTable1(SEXP reader_sexp){
+	Rf_error("Cannot call parquet___arrow___FileReader__ReadTable1(). Please use arrow::install_arrow() to install required runtime libraries. ");
+}
+#endif
+
+// parquet.cpp
+#if defined(ARROW_R_WITH_ARROW)
+std::shared_ptr<arrow::Table> parquet___arrow___FileReader__ReadTable2(const std::unique_ptr<parquet::arrow::FileReader>& reader, const std::vector<int>& column_indices);
+RcppExport SEXP _arrow_parquet___arrow___FileReader__ReadTable2(SEXP reader_sexp, SEXP column_indices_sexp){
+BEGIN_RCPP
+	Rcpp::traits::input_parameter<const std::unique_ptr<parquet::arrow::FileReader>&>::type reader(reader_sexp);
+	Rcpp::traits::input_parameter<const std::vector<int>&>::type column_indices(column_indices_sexp);
+	return Rcpp::wrap(parquet___arrow___FileReader__ReadTable2(reader, column_indices));
 END_RCPP
 }
 #else
-RcppExport SEXP _arrow_read_parquet_file(SEXP filename_sexp){
-	Rf_error("Cannot call read_parquet_file(). Please use arrow::install_arrow() to install required runtime libraries. ");
+RcppExport SEXP _arrow_parquet___arrow___FileReader__ReadTable2(SEXP reader_sexp, SEXP column_indices_sexp){
+	Rf_error("Cannot call parquet___arrow___FileReader__ReadTable2(). Please use arrow::install_arrow() to install required runtime libraries. ");
 }
 #endif
 
@@ -2890,6 +3019,37 @@ RcppExport SEXP _arrow_write_parquet_file(SEXP table_sexp, SEXP filename_sexp){
 }
 #endif
 
+// parquet.cpp
+#if defined(ARROW_R_WITH_ARROW)
+std::shared_ptr<arrow::Schema> parquet___arrow___FileReader__GetSchema2(const std::unique_ptr<parquet::arrow::FileReader>& reader, const std::vector<int>& indices);
+RcppExport SEXP _arrow_parquet___arrow___FileReader__GetSchema2(SEXP reader_sexp, SEXP indices_sexp){
+BEGIN_RCPP
+	Rcpp::traits::input_parameter<const std::unique_ptr<parquet::arrow::FileReader>&>::type reader(reader_sexp);
+	Rcpp::traits::input_parameter<const std::vector<int>&>::type indices(indices_sexp);
+	return Rcpp::wrap(parquet___arrow___FileReader__GetSchema2(reader, indices));
+END_RCPP
+}
+#else
+RcppExport SEXP _arrow_parquet___arrow___FileReader__GetSchema2(SEXP reader_sexp, SEXP indices_sexp){
+	Rf_error("Cannot call parquet___arrow___FileReader__GetSchema2(). Please use arrow::install_arrow() to install required runtime libraries. ");
+}
+#endif
+
+// parquet.cpp
+#if defined(ARROW_R_WITH_ARROW)
+std::shared_ptr<arrow::Schema> parquet___arrow___FileReader__GetSchema1(const std::unique_ptr<parquet::arrow::FileReader>& reader);
+RcppExport SEXP _arrow_parquet___arrow___FileReader__GetSchema1(SEXP reader_sexp){
+BEGIN_RCPP
+	Rcpp::traits::input_parameter<const std::unique_ptr<parquet::arrow::FileReader>&>::type reader(reader_sexp);
+	return Rcpp::wrap(parquet___arrow___FileReader__GetSchema1(reader));
+END_RCPP
+}
+#else
+RcppExport SEXP _arrow_parquet___arrow___FileReader__GetSchema1(SEXP reader_sexp){
+	Rf_error("Cannot call parquet___arrow___FileReader__GetSchema1(). Please use arrow::install_arrow() to install required runtime libraries. ");
+}
+#endif
+
 // recordbatch.cpp
 #if defined(ARROW_R_WITH_ARROW)
 int RecordBatch__num_columns(const std::shared_ptr<arrow::RecordBatch>& x);
@@ -3465,6 +3625,37 @@ RcppExport SEXP _arrow_Table__columns(SEXP table_sexp){
 
 // table.cpp
 #if defined(ARROW_R_WITH_ARROW)
+Rcpp::CharacterVector Table__column_names(const std::shared_ptr<arrow::Table>& table);
+RcppExport SEXP _arrow_Table__column_names(SEXP table_sexp){
+BEGIN_RCPP
+	Rcpp::traits::input_parameter<const std::shared_ptr<arrow::Table>&>::type table(table_sexp);
+	return Rcpp::wrap(Table__column_names(table));
+END_RCPP
+}
+#else
+RcppExport SEXP _arrow_Table__column_names(SEXP table_sexp){
+	Rf_error("Cannot call Table__column_names(). Please use arrow::install_arrow() to install required runtime libraries. ");
+}
+#endif
+
+// table.cpp
+#if defined(ARROW_R_WITH_ARROW)
+std::shared_ptr<arrow::Table> Table__select(const std::shared_ptr<arrow::Table>& table, const Rcpp::IntegerVector& indices);
+RcppExport SEXP _arrow_Table__select(SEXP table_sexp, SEXP indices_sexp){
+BEGIN_RCPP
+	Rcpp::traits::input_parameter<const std::shared_ptr<arrow::Table>&>::type table(table_sexp);
+	Rcpp::traits::input_parameter<const Rcpp::IntegerVector&>::type indices(indices_sexp);
+	return Rcpp::wrap(Table__select(table, indices));
+END_RCPP
+}
+#else
+RcppExport SEXP _arrow_Table__select(SEXP table_sexp, SEXP indices_sexp){
+	Rf_error("Cannot call Table__select(). Please use arrow::install_arrow() to install required runtime libraries. ");
+}
+#endif
+
+// table.cpp
+#if defined(ARROW_R_WITH_ARROW)
 std::shared_ptr<arrow::Table> Table__from_dots(SEXP lst, SEXP schema_sxp);
 RcppExport SEXP _arrow_Table__from_dots(SEXP lst_sexp, SEXP schema_sxp_sexp){
 BEGIN_RCPP
@@ -3659,6 +3850,7 @@ static const R_CallMethodDef CallEntries[] = {
 		{ "_arrow_ipc___feather___TableReader__GetColumn", (DL_FUNC) &_arrow_ipc___feather___TableReader__GetColumn, 2}, 
 		{ "_arrow_ipc___feather___TableReader__Read", (DL_FUNC) &_arrow_ipc___feather___TableReader__Read, 2}, 
 		{ "_arrow_ipc___feather___TableReader__Open", (DL_FUNC) &_arrow_ipc___feather___TableReader__Open, 1}, 
+		{ "_arrow_ipc___feather___TableReader__column_names", (DL_FUNC) &_arrow_ipc___feather___TableReader__column_names, 1}, 
 		{ "_arrow_Field__initialize", (DL_FUNC) &_arrow_Field__initialize, 3}, 
 		{ "_arrow_Field__ToString", (DL_FUNC) &_arrow_Field__ToString, 1}, 
 		{ "_arrow_Field__name", (DL_FUNC) &_arrow_Field__name, 1}, 
@@ -3708,8 +3900,17 @@ static const R_CallMethodDef CallEntries[] = {
 		{ "_arrow_ipc___MessageReader__Open", (DL_FUNC) &_arrow_ipc___MessageReader__Open, 1}, 
 		{ "_arrow_ipc___MessageReader__ReadNextMessage", (DL_FUNC) &_arrow_ipc___MessageReader__ReadNextMessage, 1}, 
 		{ "_arrow_ipc___ReadMessage", (DL_FUNC) &_arrow_ipc___ReadMessage, 1}, 
-		{ "_arrow_read_parquet_file", (DL_FUNC) &_arrow_read_parquet_file, 1}, 
+		{ "_arrow_parquet___arrow___ArrowReaderProperties__Make", (DL_FUNC) &_arrow_parquet___arrow___ArrowReaderProperties__Make, 1}, 
+		{ "_arrow_parquet___arrow___ArrowReaderProperties__set_use_threads", (DL_FUNC) &_arrow_parquet___arrow___ArrowReaderProperties__set_use_threads, 2}, 
+		{ "_arrow_parquet___arrow___ArrowReaderProperties__get_use_threads", (DL_FUNC) &_arrow_parquet___arrow___ArrowReaderProperties__get_use_threads, 2}, 
+		{ "_arrow_parquet___arrow___ArrowReaderProperties__get_read_dictionary", (DL_FUNC) &_arrow_parquet___arrow___ArrowReaderProperties__get_read_dictionary, 2}, 
+		{ "_arrow_parquet___arrow___ArrowReaderProperties__set_read_dictionary", (DL_FUNC) &_arrow_parquet___arrow___ArrowReaderProperties__set_read_dictionary, 3}, 
+		{ "_arrow_parquet___arrow___FileReader__OpenFile", (DL_FUNC) &_arrow_parquet___arrow___FileReader__OpenFile, 2}, 
+		{ "_arrow_parquet___arrow___FileReader__ReadTable1", (DL_FUNC) &_arrow_parquet___arrow___FileReader__ReadTable1, 1}, 
+		{ "_arrow_parquet___arrow___FileReader__ReadTable2", (DL_FUNC) &_arrow_parquet___arrow___FileReader__ReadTable2, 2}, 
 		{ "_arrow_write_parquet_file", (DL_FUNC) &_arrow_write_parquet_file, 2}, 
+		{ "_arrow_parquet___arrow___FileReader__GetSchema2", (DL_FUNC) &_arrow_parquet___arrow___FileReader__GetSchema2, 2}, 
+		{ "_arrow_parquet___arrow___FileReader__GetSchema1", (DL_FUNC) &_arrow_parquet___arrow___FileReader__GetSchema1, 1}, 
 		{ "_arrow_RecordBatch__num_columns", (DL_FUNC) &_arrow_RecordBatch__num_columns, 1}, 
 		{ "_arrow_RecordBatch__num_rows", (DL_FUNC) &_arrow_RecordBatch__num_rows, 1}, 
 		{ "_arrow_RecordBatch__schema", (DL_FUNC) &_arrow_RecordBatch__schema, 1}, 
@@ -3747,6 +3948,8 @@ static const R_CallMethodDef CallEntries[] = {
 		{ "_arrow_Table__schema", (DL_FUNC) &_arrow_Table__schema, 1}, 
 		{ "_arrow_Table__column", (DL_FUNC) &_arrow_Table__column, 2}, 
 		{ "_arrow_Table__columns", (DL_FUNC) &_arrow_Table__columns, 1}, 
+		{ "_arrow_Table__column_names", (DL_FUNC) &_arrow_Table__column_names, 1}, 
+		{ "_arrow_Table__select", (DL_FUNC) &_arrow_Table__select, 2}, 
 		{ "_arrow_Table__from_dots", (DL_FUNC) &_arrow_Table__from_dots, 2}, 
 		{ "_arrow_GetCpuThreadPoolCapacity", (DL_FUNC) &_arrow_GetCpuThreadPoolCapacity, 0}, 
 		{ "_arrow_SetCpuThreadPoolCapacity", (DL_FUNC) &_arrow_SetCpuThreadPoolCapacity, 1}, 
diff --git a/r/src/arrow_types.h b/r/src/arrow_types.h
index c93d448..3ff1a3d 100644
--- a/r/src/arrow_types.h
+++ b/r/src/arrow_types.h
@@ -176,6 +176,9 @@ inline std::shared_ptr<T> extract(SEXP x) {
 #include <arrow/json/reader.h>
 #include <arrow/type.h>
 #include <arrow/util/compression.h>
+#include <parquet/arrow/reader.h>
+#include <parquet/arrow/writer.h>
+#include <parquet/exception.h>
 
 RCPP_EXPOSED_ENUM_NODECL(arrow::Type::type)
 RCPP_EXPOSED_ENUM_NODECL(arrow::DateUnit)
diff --git a/r/src/feather.cpp b/r/src/feather.cpp
index 2836ba8..a519881 100644
--- a/r/src/feather.cpp
+++ b/r/src/feather.cpp
@@ -119,16 +119,6 @@ std::shared_ptr<arrow::Table> ipc___feather___TableReader__Read(
   std::shared_ptr<arrow::Table> table;
 
   switch (TYPEOF(columns)) {
-    case INTSXP: {
-      R_xlen_t n = XLENGTH(columns);
-      std::vector<int> indices(n);
-      int* p_columns = INTEGER(columns);
-      for (int i = 0; i < n; i++) {
-        indices[i] = p_columns[i] - 1;
-      }
-      STOP_IF_NOT_OK(reader->Read(indices, &table));
-      break;
-    }
     case STRSXP: {
       R_xlen_t n = XLENGTH(columns);
       std::vector<std::string> names(n);
@@ -157,4 +147,15 @@ std::unique_ptr<arrow::ipc::feather::TableReader> ipc___feather___TableReader__O
   return reader;
 }
 
+// [[arrow::export]]
+Rcpp::CharacterVector ipc___feather___TableReader__column_names(
+    const std::unique_ptr<arrow::ipc::feather::TableReader>& reader) {
+  int64_t n = reader->num_columns();
+  Rcpp::CharacterVector out(n);
+  for (int64_t i = 0; i < n; i++) {
+    out[i] = reader->GetColumnName(i);
+  }
+  return out;
+}
+
 #endif
diff --git a/r/src/parquet.cpp b/r/src/parquet.cpp
index a89801e..9ad1438 100644
--- a/r/src/parquet.cpp
+++ b/r/src/parquet.cpp
@@ -19,44 +19,101 @@
 
 #if defined(ARROW_R_WITH_ARROW)
 
-#ifdef ARROW_R_WITH_PARQUET
 #include <parquet/arrow/reader.h>
 #include <parquet/arrow/writer.h>
 #include <parquet/exception.h>
-#endif
 
 // [[arrow::export]]
-std::shared_ptr<arrow::Table> read_parquet_file(std::string filename) {
-#ifdef ARROW_R_WITH_PARQUET
-  std::shared_ptr<arrow::io::ReadableFile> infile;
-  PARQUET_THROW_NOT_OK(
-      arrow::io::ReadableFile::Open(filename, arrow::default_memory_pool(), &infile));
+std::shared_ptr<parquet::arrow::ArrowReaderProperties>
+parquet___arrow___ArrowReaderProperties__Make(bool use_threads) {
+  return std::make_shared<parquet::arrow::ArrowReaderProperties>(use_threads);
+}
+
+// [[arrow::export]]
+void parquet___arrow___ArrowReaderProperties__set_use_threads(
+    const std::shared_ptr<parquet::arrow::ArrowReaderProperties>& properties,
+    bool use_threads) {
+  properties->set_use_threads(use_threads);
+}
+
+// [[arrow::export]]
+bool parquet___arrow___ArrowReaderProperties__get_use_threads(
+    const std::shared_ptr<parquet::arrow::ArrowReaderProperties>& properties,
+    bool use_threads) {
+  return properties->use_threads();
+}
 
+// [[arrow::export]]
+bool parquet___arrow___ArrowReaderProperties__get_read_dictionary(
+    const std::shared_ptr<parquet::arrow::ArrowReaderProperties>& properties,
+    int column_index) {
+  return properties->read_dictionary(column_index);
+}
+
+// [[arrow::export]]
+void parquet___arrow___ArrowReaderProperties__set_read_dictionary(
+    const std::shared_ptr<parquet::arrow::ArrowReaderProperties>& properties,
+    int column_index, bool read_dict) {
+  properties->set_read_dictionary(column_index, read_dict);
+}
+
+// [[arrow::export]]
+std::unique_ptr<parquet::arrow::FileReader> parquet___arrow___FileReader__OpenFile(
+    const std::shared_ptr<arrow::io::RandomAccessFile>& file,
+    const std::shared_ptr<parquet::arrow::ArrowReaderProperties>& props) {
   std::unique_ptr<parquet::arrow::FileReader> reader;
   PARQUET_THROW_NOT_OK(
-      parquet::arrow::OpenFile(infile, arrow::default_memory_pool(), &reader));
+      parquet::arrow::OpenFile(file, arrow::default_memory_pool(), *props, &reader));
+  return reader;
+}
+
+// [[arrow::export]]
+std::shared_ptr<arrow::Table> parquet___arrow___FileReader__ReadTable1(
+    const std::unique_ptr<parquet::arrow::FileReader>& reader) {
   std::shared_ptr<arrow::Table> table;
   PARQUET_THROW_NOT_OK(reader->ReadTable(&table));
-
   return table;
-#else
-  Rcpp::stop("Support for Parquet is not available.");
+}
 
+// [[arrow::export]]
+std::shared_ptr<arrow::Table> parquet___arrow___FileReader__ReadTable2(
+    const std::unique_ptr<parquet::arrow::FileReader>& reader,
+    const std::vector<int>& column_indices) {
   std::shared_ptr<arrow::Table> table;
+  PARQUET_THROW_NOT_OK(reader->ReadTable(column_indices, &table));
   return table;
-#endif
 }
 
 // [[arrow::export]]
 void write_parquet_file(const std::shared_ptr<arrow::Table>& table,
                         std::string filename) {
-#ifdef ARROW_R_WITH_PARQUET
   std::shared_ptr<arrow::io::OutputStream> sink;
   PARQUET_THROW_NOT_OK(arrow::io::FileOutputStream::Open(filename, &sink));
   PARQUET_THROW_NOT_OK(parquet::arrow::WriteTable(*table, arrow::default_memory_pool(),
                                                   sink, table->num_rows()));
-#else
-  Rcpp::stop("Support for Parquet is not available.");
-#endif
 }
+
+// [[arrow::export]]
+std::shared_ptr<arrow::Schema> parquet___arrow___FileReader__GetSchema2(
+    const std::unique_ptr<parquet::arrow::FileReader>& reader,
+    const std::vector<int>& indices) {
+  std::shared_ptr<arrow::Schema> schema;
+  STOP_IF_NOT_OK(reader->GetSchema(indices, &schema));
+  return schema;
+}
+
+// [[arrow::export]]
+std::shared_ptr<arrow::Schema> parquet___arrow___FileReader__GetSchema1(
+    const std::unique_ptr<parquet::arrow::FileReader>& reader) {
+  // FileReader does not have this exposed
+  // std::shared_ptr<arrow::Schema> schema;
+  // STOP_IF_NOT_OK(reader->GetSchema(&schema));
+
+  // so going indirectly about it
+  std::shared_ptr<arrow::RecordBatchReader> record_batch_reader;
+  STOP_IF_NOT_OK(reader->GetRecordBatchReader({}, &record_batch_reader));
+
+  return record_batch_reader->schema();
+}
+
 #endif
diff --git a/r/src/table.cpp b/r/src/table.cpp
index b181c1c..2d9135b 100644
--- a/r/src/table.cpp
+++ b/r/src/table.cpp
@@ -63,6 +63,34 @@ std::vector<std::shared_ptr<arrow::Column>> Table__columns(
   return res;
 }
 
+// [[arrow::export]]
+Rcpp::CharacterVector Table__column_names(const std::shared_ptr<arrow::Table>& table) {
+  int nc = table->num_columns();
+  Rcpp::CharacterVector res(nc);
+  for (int i = 0; i < nc; i++) {
+    res[i] = table->column(i)->name();
+  }
+  return res;
+}
+
+// [[arrow::export]]
+std::shared_ptr<arrow::Table> Table__select(const std::shared_ptr<arrow::Table>& table,
+                                            const Rcpp::IntegerVector& indices) {
+  R_xlen_t n = indices.size();
+
+  std::vector<std::shared_ptr<arrow::Field>> fields(n);
+  std::vector<std::shared_ptr<arrow::Column>> columns(n);
+
+  for (R_xlen_t i = 0; i < n; i++) {
+    int pos = indices[i] - 1;
+    fields[i] = table->schema()->field(pos);
+    columns[i] = table->column(pos);
+  }
+
+  auto schema = std::make_shared<arrow::Schema>(std::move(fields));
+  return arrow::Table::Make(schema, columns);
+}
+
 bool all_record_batches(SEXP lst) {
   R_xlen_t n = XLENGTH(lst);
   for (R_xlen_t i = 0; i < n; i++) {
diff --git a/r/tests/testthat/test-arrow-csv.R b/r/tests/testthat/test-arrow-csv.R
index ee09b60..7f0c1ae 100644
--- a/r/tests/testthat/test-arrow-csv.R
+++ b/r/tests/testthat/test-arrow-csv.R
@@ -22,9 +22,9 @@ test_that("Can read csv file", {
 
   write.csv(iris, tf, row.names = FALSE, quote = FALSE)
 
-  tab1 <- read_csv_arrow(tf)
-  tab2 <- read_csv_arrow(mmap_open(tf))
-  tab3 <- read_csv_arrow(ReadableFile(tf))
+  tab1 <- read_csv_arrow(tf, as_tibble = FALSE)
+  tab2 <- read_csv_arrow(mmap_open(tf), as_tibble = FALSE)
+  tab3 <- read_csv_arrow(ReadableFile(tf), as_tibble = FALSE)
 
   iris$Species <- as.character(iris$Species)
   tab0 <- table(!!!iris)
@@ -34,3 +34,34 @@ test_that("Can read csv file", {
 
   unlink(tf)
 })
+
+test_that("read_csv_arrow(as_tibble=TRUE)", {
+  tf <- tempfile()
+
+  write.csv(iris, tf, row.names = FALSE, quote = FALSE)
+
+  tab1 <- read_csv_arrow(tf, as_tibble = TRUE)
+  tab2 <- read_csv_arrow(mmap_open(tf), as_tibble = TRUE)
+  tab3 <- read_csv_arrow(ReadableFile(tf), as_tibble = TRUE)
+
+  iris$Species <- as.character(iris$Species)
+  expect_equivalent(iris, tab1)
+  expect_equivalent(iris, tab2)
+  expect_equivalent(iris, tab3)
+
+  unlink(tf)
+})
+
+test_that("read_csv_arrow() respects col_select", {
+  tf <- tempfile()
+
+  write.csv(iris, tf, row.names = FALSE, quote = FALSE)
+
+  tab <- read_csv_arrow(tf, col_select = starts_with("Sepal"), as_tibble = FALSE)
+  expect_equal(tab, table(Sepal.Length = iris$Sepal.Length, Sepal.Width = iris$Sepal.Width))
+
+  tib <- read_csv_arrow(tf, col_select = starts_with("Sepal"), as_tibble = TRUE)
+  expect_equal(tib, tibble::tibble(Sepal.Length = iris$Sepal.Length, Sepal.Width = iris$Sepal.Width))
+
+  unlink(tf)
+})
diff --git a/r/tests/testthat/test-feather.R b/r/tests/testthat/test-feather.R
index 6d874b3..adf8151 100644
--- a/r/tests/testthat/test-feather.R
+++ b/r/tests/testthat/test-feather.R
@@ -62,14 +62,14 @@ test_that("feather read/write round trip", {
   unlink(tf3)
 })
 
-test_that("feather handles columns = <names>", {
+test_that("feather handles col_select = <names>", {
   tib <- tibble::tibble(x = 1:10, y = rnorm(10), z = letters[1:10])
 
   tf1 <- tempfile()
   write_feather(tib, tf1)
   expect_true(fs::file_exists(tf1))
 
-  tab1 <- read_feather(tf1, columns = c("x", "y"))
+  tab1 <- read_feather(tf1, col_select = c("x", "y"))
   expect_is(tab1, "data.frame")
 
   expect_equal(tib$x, tab1$x)
@@ -78,14 +78,14 @@ test_that("feather handles columns = <names>", {
   unlink(tf1)
 })
 
-test_that("feather handles columns = <integer>", {
+test_that("feather handles col_select = <integer>", {
   tib <- tibble::tibble(x = 1:10, y = rnorm(10), z = letters[1:10])
 
   tf1 <- tempfile()
   write_feather(tib, tf1)
   expect_true(fs::file_exists(tf1))
 
-  tab1 <- read_feather(tf1, columns = 1:2)
+  tab1 <- read_feather(tf1, col_select = 1:2)
   expect_is(tab1, "data.frame")
 
   expect_equal(tib$x, tab1$x)
@@ -93,6 +93,28 @@ test_that("feather handles columns = <integer>", {
   unlink(tf1)
 })
 
+test_that("feather handles col_select = <tidyselect helper>", {
+  tib <- tibble::tibble(x = 1:10, y = rnorm(10), z = letters[1:10])
+
+  tf1 <- tempfile()
+  write_feather(tib, tf1)
+  expect_true(fs::file_exists(tf1))
+
+  tab1 <- read_feather(tf1, col_select = everything())
+  expect_identical(tib, tab1)
+
+  tab2 <- read_feather(tf1, col_select = starts_with("x"))
+  expect_identical(tab2, tib[, "x", drop = FALSE])
+
+  tab3 <- read_feather(tf1, col_select = c(starts_with("x"), contains("y")))
+  expect_identical(tab3, tib[, c("x", "y"), drop = FALSE])
+
+  tab4 <- read_feather(tf1, col_select = -z)
+  expect_identical(tab4, tib[, c("x", "y"), drop = FALSE])
+
+  unlink(tf1)
+})
+
 test_that("feather read/write round trip", {
   tib <- tibble::tibble(x = 1:10, y = rnorm(10), z = letters[1:10])
 
diff --git a/r/tests/testthat/test-json.R b/r/tests/testthat/test-json.R
index 1521627..9413904 100644
--- a/r/tests/testthat/test-json.R
+++ b/r/tests/testthat/test-json.R
@@ -70,6 +70,22 @@ test_that("read_json_arrow() converts to tibble", {
   expect_equal(tab1$yo, c("thing", NA, "\u5fcd", NA))
 })
 
+test_that("read_json_arrow() supports col_select=", {
+  tf <- tempfile()
+  writeLines('
+    { "hello": 3.5, "world": false, "yo": "thing" }
+    { "hello": 3.25, "world": null }
+    { "hello": 3.125, "world": null, "yo": "\u5fcd" }
+    { "hello": 0.0, "world": true, "yo": null }
+  ', tf)
+
+  tab1 <- read_json_arrow(tf, col_select = c(hello, world))
+  expect_equal(names(tab1), c("hello", "world"))
+
+  tab2 <- read_json_arrow(tf, col_select = 1:2)
+  expect_equal(names(tab2), c("hello", "world"))
+})
+
 test_that("Can read json file with nested columns (ARROW-5503)", {
   tf <- tempfile()
   on.exit(unlink(tf))
diff --git a/r/tests/testthat/test-parquet.R b/r/tests/testthat/test-parquet.R
index 554744e..f23c37f 100644
--- a/r/tests/testthat/test-parquet.R
+++ b/r/tests/testthat/test-parquet.R
@@ -35,3 +35,11 @@ test_that("simple int column roundtrip", {
   df_read <- read_parquet(pq_tmp_file)
   expect_identical(df, df_read)
 })
+
+test_that("read_parquet() supports col_select", {
+  df <- read_parquet(pq_file, col_select = c(x, y, z))
+  expect_equal(names(df), c("x", "y", "z"))
+
+  df <- read_parquet(pq_file, col_select = starts_with("c"))
+  expect_equal(names(df), c("carat", "cut", "color", "clarity"))
+})