You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2019/06/27 22:24:54 UTC

[arrow] branch master updated: ARROW-5500: [R] read_csv_arrow() signature should match readr::read_csv()

This is an automated email from the ASF dual-hosted git repository.

wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new d8b3be9  ARROW-5500: [R] read_csv_arrow() signature should match readr::read_csv()
d8b3be9 is described below

commit d8b3be9069f6172e41d5f1dda05ab37810d900ce
Author: Neal Richardson <ne...@gmail.com>
AuthorDate: Thu Jun 27 17:24:41 2019 -0500

    ARROW-5500: [R] read_csv_arrow() signature should match readr::read_csv()
    
    This patch enumerates the various CSV parsing options and exposes them in an R-familiar way in the signature of `read_csv_arrow()`. It also adds a generic `read_delim_arrow()` for providing other delimiting characters, as well as a `read_tsv_arrow()`. In the process, I identified some limitations of the current reader (https://issues.apache.org/jira/browse/ARROW-5747) and of the R bindings to it (not yet ticketed), and added more docs and tests.
    
    Other release-prep cleanup in here includes organization of the DESCRIPTION file, adding new functions to the pkgdown config, and adding a NEWS.md.
    
    Author: Neal Richardson <ne...@gmail.com>
    
    Closes #4711 from nealrichardson/readr-csv and squashes the following commits:
    
    92b0a2788 <Neal Richardson> :rat:
    22268d960 <Neal Richardson> Rename man topic in pkgdown.yml
    fc156e3e8 <Neal Richardson> Doc :nailcare:, add read_delim_arrow and read_tsv_arrow
    fb75af1fa <Neal Richardson> More docs and tests for csv parse options; skip a few that aren't supported
    8e2fa2d9a <Neal Richardson> Some cleanup of pkgdown site prep and DESCRIPTION. Start on implementing readr::read_csv arguments
---
 r/DESCRIPTION                     |  28 ++---
 r/NAMESPACE                       |   2 +
 r/NEWS.md                         |  26 ++++
 r/R/csv.R                         | 259 +++++++++++++++++++++++++++++++-------
 r/_pkgdown.yml                    |  15 ++-
 r/man/arrow-package.Rd            |   1 -
 r/man/csv_parse_options.Rd        |   4 +-
 r/man/csv_table_reader.Rd         |  12 +-
 r/man/read_csv_arrow.Rd           |  27 ----
 r/man/read_delim_arrow.Rd         |  79 ++++++++++++
 r/tests/testthat/test-arrow-csv.R |  87 +++++++++++--
 11 files changed, 442 insertions(+), 98 deletions(-)

diff --git a/r/DESCRIPTION b/r/DESCRIPTION
index 45edda1..47eccc8 100644
--- a/r/DESCRIPTION
+++ b/r/DESCRIPTION
@@ -3,10 +3,10 @@ Title: Integration to 'Apache' 'Arrow'
 Version: 0.13.0.9000
 Authors@R: c(
     person("Romain", "Fran\u00e7ois", email = "romain@rstudio.com", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-2444-4226")),
-    person("Javier", "Luraschi", email = "javier@rstudio.com", role = c("ctb")),
-    person("Jeffrey", "Wong", email = "jeffreyw@netflix.com", role = c("ctb")),
     person("Jeroen", "Ooms", email = "jeroen@berkeley.edu", role = c("aut")),
     person("Neal", "Richardson", email = "neal@ursalabs.org", role = c("aut")),
+    person("Javier", "Luraschi", email = "javier@rstudio.com", role = c("ctb")),
+    person("Jeffrey", "Wong", email = "jeffreyw@netflix.com", role = c("ctb")),
     person("Apache Arrow", email = "dev@arrow.apache.org", role = c("aut", "cph"))
   )
 Description: 'Apache' 'Arrow' <https://arrow.apache.org/> is a cross-language
@@ -16,7 +16,7 @@ Description: 'Apache' 'Arrow' <https://arrow.apache.org/> is a cross-language
     package provides an interface to the Arrow C++ library.
 Depends: R (>= 3.1)
 License: Apache License (>= 2.0)
-URL: https://arrow.apache.org/docs/r/, https://github.com/apache/arrow/
+URL: https://github.com/apache/arrow/
 BugReports: https://issues.apache.org/jira/projects/ARROW/issues
 Encoding: UTF-8
 LazyData: true
@@ -24,27 +24,27 @@ SystemRequirements: C++11
 LinkingTo:
     Rcpp (>= 1.0.1)
 Imports:
-    utils,
-    Rcpp (>= 1.0.1),
-    rlang,
-    purrr,
     assertthat,
-    R6,
-    fs,
     bit64,
-    tidyselect
+    fs,
+    purrr,
+    R6,
+    Rcpp (>= 1.0.1),
+    rlang,
+    tidyselect,
+    utils
 Roxygen: list(markdown = TRUE)
 RoxygenNote: 6.1.1
 Suggests:
-    tibble,
     covr,
+    hms,
+    lubridate,
     pkgdown,
     rmarkdown,
     roxygen2,
     testthat,
-    lubridate,
-    vctrs,
-    hms
+    tibble,
+    vctrs
 Collate:
     'enums.R'
     'R6.R'
diff --git a/r/NAMESPACE b/r/NAMESPACE
index e82b30a..e4b367d 100644
--- a/r/NAMESPACE
+++ b/r/NAMESPACE
@@ -162,6 +162,7 @@ export(parquet_arrow_reader_properties)
 export(parquet_file_reader)
 export(read_arrow)
 export(read_csv_arrow)
+export(read_delim_arrow)
 export(read_feather)
 export(read_json_arrow)
 export(read_message)
@@ -169,6 +170,7 @@ export(read_parquet)
 export(read_record_batch)
 export(read_schema)
 export(read_table)
+export(read_tsv_arrow)
 export(record_batch)
 export(schema)
 export(starts_with)
diff --git a/r/NEWS.md b/r/NEWS.md
new file mode 100644
index 0000000..fa6b25a
--- /dev/null
+++ b/r/NEWS.md
@@ -0,0 +1,26 @@
+<!---
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+
+# arrow 0.13.0.9000
+
+Initial CRAN release of the `arrow` package. Key features include:
+
+* Read and write support for various file formats, including Parquet, Feather/Arrow, CSV, and JSON.
+* API bindings to the C++ library for Arrow data types and objects, as well as mapping between Arrow types and R data types.
+* Tools for helping with C++ library configuration and installation.
diff --git a/r/R/csv.R b/r/R/csv.R
index 03a4b7d..8f4370a 100644
--- a/r/R/csv.R
+++ b/r/R/csv.R
@@ -15,6 +15,168 @@
 # specific language governing permissions and limitations
 # under the License.
 
+#' Read a CSV or other delimited file with Arrow
+#'
+#' These functions uses the Arrow C++ CSV reader to read into a `data.frame`.
+#' Arrow C++ options have been mapped to argument names that follow those of
+#' [readr::read_delim()], and `col_select` was inspired by [vroom::vroom()].
+#'
+#' `read_csv_arrow()` and `read_tsv_arrow()` are wrappers around
+#' `read_delim_arrow()` that specify a delimiter.
+#'
+#' Note that not all `readr` options are currently implemented here. Please file
+#' an issue if you encounter one that `arrow` should support.
+#'
+#' If you need to control Arrow-specific reader parameters that don't have an
+#' equivalent in `readr::read_csv()`, you can either provide them in the
+#' `parse_options`, `convert_options`, or `read_options` arguments, or you can
+#' call [csv_table_reader()] directly for lower-level access.
+#'
+#' @param file A character path to a local file, or an Arrow input stream
+#' @param delim Single character used to separate fields within a record.
+#' @param quote Single character used to quote strings.
+#' @param escape_double Does the file escape quotes by doubling them?
+#' i.e. If this option is `TRUE`, the value `""""` represents
+#' a single quote, `\"`.
+#' @param escape_backslash Does the file use backslashes to escape special
+#' characters? This is more general than `escape_double` as backslashes
+#' can be used to escape the delimiter character, the quote character, or
+#' to add special characters like `\\n`.
+# #' @param col_names If `TRUE`, the first row of the input will be used as the
+# #' column names and will not be included in the data frame. Note that `FALSE`
+# #' is not currently supported, nor is specifying a character vector of column
+# #' names.
+#' @param col_select A [tidy selection specification][tidyselect::vars_select]
+#' of columns, as used in `dplyr::select()`.
+#' @param skip_empty_rows Should blank rows be ignored altogether? If
+#' `TRUE`, blank rows will not be represented at all. If `FALSE`, they will be
+#' filled with missings.
+# #' @param skip Number of lines to skip before reading data.
+#' @param parse_options see [csv_parse_options()]. If given, this overrides any
+#' parsing options provided in other arguments (e.g. `delim`, `quote`, etc.).
+#' @param convert_options see [csv_convert_options()]
+#' @param read_options see [csv_read_options()]
+#' @param as_tibble Should the function return a `data.frame` or an
+#' [arrow::Table][arrow__Table]?
+#'
+#' @return A `data.frame`, or an `arrow::Table` if `as_tibble = FALSE`.
+#' @export
+read_delim_arrow <- function(file,
+                             delim = ",",
+                             quote = '"',
+                             escape_double = TRUE,
+                             escape_backslash = FALSE,
+                             # col_names = TRUE,
+                             # col_types = TRUE,
+                             col_select = NULL,
+                             # na = c("", "NA"),
+                             # quoted_na = TRUE,
+                             skip_empty_rows = TRUE,
+                             # skip = 0L,
+                             parse_options = NULL,
+                             convert_options = NULL,
+                             read_options = csv_read_options(),
+                             as_tibble = TRUE) {
+
+  # These are hardcoded pending https://issues.apache.org/jira/browse/ARROW-5747
+  col_names <- TRUE
+  skip <- 0L
+
+  if (is.null(parse_options)) {
+    if (isTRUE(col_names)) {
+      # Add one row to skip, to match arrow's header_rows
+      skip <- skip + 1L
+      # Note that with the hardcoding, header_rows is always 1, which
+      # turns out to be the only value that works meaningfully
+    }
+    parse_options <- readr_to_csv_parse_options(
+      delim,
+      quote,
+      escape_double,
+      escape_backslash,
+      skip_empty_rows,
+      skip
+    )
+  }
+
+  if (is.null(convert_options)) {
+    # TODO:
+    # * na strings (needs wiring in csv_convert_options)
+    # * col_types (needs wiring in csv_convert_options). Note that we can't do
+    # col_types if col_names is strings because the column type specification
+    # requires a map of name: type, but the CSV reader doesn't handle user-
+    # provided names--they're renamed after the fact.
+    convert_options <- csv_convert_options()
+  }
+
+  reader <- csv_table_reader(
+    file,
+    read_options = read_options,
+    parse_options = parse_options,
+    convert_options = convert_options
+  )
+
+  tab <- reader$Read()$select(!!enquo(col_select))
+  if (is.character(col_names)) {
+    # TODO: Rename `tab`'s columns
+    # See https://github.com/apache/arrow/pull/4557
+  }
+
+  if (isTRUE(as_tibble)) {
+    tab <- as.data.frame(tab)
+  }
+
+  tab
+}
+
+#' @rdname read_delim_arrow
+#' @export
+read_csv_arrow <- function(file,
+                           quote = '"',
+                           escape_double = TRUE,
+                           escape_backslash = FALSE,
+                           # col_names = TRUE,
+                           # col_types = TRUE,
+                           col_select = NULL,
+                           # na = c("", "NA"),
+                           # quoted_na = TRUE,
+                           skip_empty_rows = TRUE,
+                           # skip = 0L,
+                           parse_options = NULL,
+                           convert_options = NULL,
+                           read_options = csv_read_options(),
+                           as_tibble = TRUE) {
+
+  mc <- match.call()
+  mc$delim <- ","
+  mc[[1]] <- as.name("read_delim_arrow")
+  eval.parent(mc)
+}
+
+#' @rdname read_delim_arrow
+#' @export
+read_tsv_arrow <- function(file,
+                           quote = '"',
+                           escape_double = TRUE,
+                           escape_backslash = FALSE,
+                           # col_names = TRUE,
+                           # col_types = TRUE,
+                           col_select = NULL,
+                           # na = c("", "NA"),
+                           # quoted_na = TRUE,
+                           skip_empty_rows = TRUE,
+                           # skip = 0L,
+                           parse_options = NULL,
+                           convert_options = NULL,
+                           read_options = csv_read_options(),
+                           as_tibble = TRUE) {
+
+  mc <- match.call()
+  mc$delim <- "\t"
+  mc[[1]] <- as.name("read_delim_arrow")
+  eval.parent(mc)
+}
+
 #' @include R6.R
 
 `arrow::csv::TableReader` <- R6Class("arrow::csv::TableReader", inherit = `arrow::Object`,
@@ -41,7 +203,29 @@ csv_read_options <- function(block_size = 1048576L) {
   ))
 }
 
-#' Parsing options
+readr_to_csv_parse_options <- function(delim = ",",
+                                       quote = '"',
+                                       escape_double = TRUE,
+                                       escape_backslash = FALSE,
+                                       skip_empty_rows = TRUE,
+                                       skip = 0L) {
+  # This function translates from the readr argument list to the arrow arg names
+  # TODO: validate inputs
+  csv_parse_options(
+    delimiter = delim,
+    quoting = nzchar(quote),
+    quote_char = quote,
+    double_quote = escape_double,
+    escaping = escape_backslash,
+    escape_char = '\\',
+    newlines_in_values = escape_backslash,
+    ignore_empty_lines = skip_empty_rows,
+    header_rows = skip
+  )
+}
+
+#' CSV parsing options
+#'
 #'
 #' @param delimiter Field delimiter
 #' @param quoting Whether quoting is used
@@ -54,12 +238,16 @@ csv_read_options <- function(block_size = 1048576L) {
 #' @param header_rows Number of header rows to skip (including the first row containing column names)
 #'
 #' @export
-csv_parse_options <- function(
-  delimiter = ",", quoting = TRUE, quote_char = '"',
-  double_quote = TRUE, escaping = FALSE, escape_char = '\\',
-  newlines_in_values = FALSE, ignore_empty_lines = TRUE,
-  header_rows = 1L
-){
+csv_parse_options <- function(delimiter = ",",
+                              quoting = TRUE,
+                              quote_char = '"',
+                              double_quote = TRUE,
+                              escaping = FALSE,
+                              escape_char = '\\',
+                              newlines_in_values = FALSE,
+                              ignore_empty_lines = TRUE,
+                              header_rows = 1L) {
+
   shared_ptr(`arrow::csv::ParseOptions`, csv___ParseOptions__initialize(
     list(
       delimiter = delimiter,
@@ -80,7 +268,20 @@ csv_parse_options <- function(
 #' @param check_utf8 Whether to check UTF8 validity of string columns
 #'
 #' @export
-csv_convert_options <- function(check_utf8 = TRUE){
+csv_convert_options <- function(check_utf8 = TRUE) {
+  # TODO: there are more conversion options available:
+  # // Optional per-column types (disabling type inference on those columns)
+  # std::unordered_map<std::string, std::shared_ptr<DataType>> column_types;
+  # // Recognized spellings for null values
+  # std::vector<std::string> null_values;
+  # // Recognized spellings for boolean values
+  # std::vector<std::string> true_values;
+  # std::vector<std::string> false_values;
+  # // Whether string / binary columns can have null values.
+  # // If true, then strings in "null_values" are considered null for string columns.
+  # // If false, then all strings are valid string values.
+  # bool strings_can_be_null = false;
+
   shared_ptr(`arrow::csv::ConvertOptions`, csv___ConvertOptions__initialize(
     list(
       check_utf8 = check_utf8
@@ -88,14 +289,20 @@ csv_convert_options <- function(check_utf8 = TRUE){
   ))
 }
 
-#' CSV table reader
+#' Arrow CSV table reader
+#'
+#' These methods wrap the Arrow C++ CSV table reader.
+#' For an interface to the CSV reader that's more familiar for R users, see
+#' [read_csv_arrow()]
 #'
-#' @param file file
+#' @param file A character path to a local file, or an Arrow input stream
 #' @param read_options, see [csv_read_options()]
 #' @param parse_options, see [csv_parse_options()]
 #' @param convert_options, see [csv_convert_options()]
 #' @param ... additional parameters.
 #'
+#' @return An `arrow::csv::TableReader` R6 object. Call `$Read()` on it to get
+#' an Arrow Table.
 #' @export
 csv_table_reader <- function(file,
   read_options = csv_read_options(),
@@ -167,35 +374,3 @@ csv_table_reader.default <- function(file,
 ){
   file
 }
-
-#' Read csv file into an arrow::Table
-#'
-#' Use arrow::csv::TableReader from [csv_table_reader()]
-#'
-#' @inheritParams csv_table_reader
-#'
-#' @param col_select [tidy selection specification][tidyselect::vars_select] of columns
-#' @param as_tibble Should the [arrow::Table][arrow__Table] be converted to a data frame.
-#'
-#' @export
-read_csv_arrow <- function(file,
-  read_options = csv_read_options(),
-  parse_options = csv_parse_options(),
-  convert_options = csv_convert_options(),
-  col_select = NULL,
-  as_tibble = TRUE
-  )
-{
-  reader <- csv_table_reader(file,
-    read_options = read_options,
-    parse_options = parse_options,
-    convert_options = convert_options)
-
-  tab <- reader$Read()$select(!!enquo(col_select))
-
-  if (isTRUE(as_tibble)) {
-    tab <- as.data.frame(tab)
-  }
-
-  tab
-}
diff --git a/r/_pkgdown.yml b/r/_pkgdown.yml
index 69c02e0..648085b 100644
--- a/r/_pkgdown.yml
+++ b/r/_pkgdown.yml
@@ -39,17 +39,28 @@ navbar:
       text: Reference
       href: reference/index.html
 reference:
+- title: Installation helpers
+  contents:
+  - arrow_available
+  - install_arrow
 - title: Reading and writing files
   contents:
-  - read_csv_arrow
+  - read_delim_arrow
+  - read_json_arrow
   - read_feather
   - read_parquet
   - write_arrow
   - write_feather
+  - write_parquet
   - csv_convert_options
   - csv_parse_options
   - csv_read_options
   - csv_table_reader
+  - json_parse_options
+  - json_read_options
+  - parquet_arrow_reader_properties
+  - json_table_reader
+  - parquet_file_reader
 - title: Arrow data containers
   contents:
   - buffer
@@ -92,6 +103,7 @@ reference:
   - arrow__io__RandomAccessFile
   - arrow__io__Readable
   - arrow__io__ReadableFile
+  - arrow__json__TableReader
   - arrow__ipc__Message
   - arrow__ipc__MessageReader
   - arrow__ipc__RecordBatchFileReader
@@ -117,7 +129,6 @@ reference:
   - RecordBatchFileWriter
   - RecordBatchStreamReader
   - RecordBatchStreamWriter
-  - threadpool
   - cast_options
   - compression_codec
   - default_memory_pool
diff --git a/r/man/arrow-package.Rd b/r/man/arrow-package.Rd
index 1f4b5fb..c3da92d 100644
--- a/r/man/arrow-package.Rd
+++ b/r/man/arrow-package.Rd
@@ -15,7 +15,6 @@
 \seealso{
 Useful links:
 \itemize{
-  \item \url{https://arrow.apache.org/docs/r/}
   \item \url{https://github.com/apache/arrow/}
   \item Report bugs at \url{https://issues.apache.org/jira/projects/ARROW/issues}
 }
diff --git a/r/man/csv_parse_options.Rd b/r/man/csv_parse_options.Rd
index 7e6ab77..ac98262 100644
--- a/r/man/csv_parse_options.Rd
+++ b/r/man/csv_parse_options.Rd
@@ -2,7 +2,7 @@
 % Please edit documentation in R/csv.R
 \name{csv_parse_options}
 \alias{csv_parse_options}
-\title{Parsing options}
+\title{CSV parsing options}
 \usage{
 csv_parse_options(delimiter = ",", quoting = TRUE,
   quote_char = "\\"", double_quote = TRUE, escaping = FALSE,
@@ -29,5 +29,5 @@ csv_parse_options(delimiter = ",", quoting = TRUE,
 \item{header_rows}{Number of header rows to skip (including the first row containing column names)}
 }
 \description{
-Parsing options
+CSV parsing options
 }
diff --git a/r/man/csv_table_reader.Rd b/r/man/csv_table_reader.Rd
index 029cd0b..862aefb 100644
--- a/r/man/csv_table_reader.Rd
+++ b/r/man/csv_table_reader.Rd
@@ -2,14 +2,14 @@
 % Please edit documentation in R/csv.R
 \name{csv_table_reader}
 \alias{csv_table_reader}
-\title{CSV table reader}
+\title{Arrow CSV table reader}
 \usage{
 csv_table_reader(file, read_options = csv_read_options(),
   parse_options = csv_parse_options(),
   convert_options = csv_convert_options(), ...)
 }
 \arguments{
-\item{file}{file}
+\item{file}{A character path to a local file, or an Arrow input stream}
 
 \item{read_options, }{see \code{\link[=csv_read_options]{csv_read_options()}}}
 
@@ -19,6 +19,12 @@ csv_table_reader(file, read_options = csv_read_options(),
 
 \item{...}{additional parameters.}
 }
+\value{
+An \code{arrow::csv::TableReader} R6 object. Call \code{$Read()} on it to get
+an Arrow Table.
+}
 \description{
-CSV table reader
+These methods wrap the Arrow C++ CSV table reader.
+For an interface to the CSV reader that's more familiar for R users, see
+\code{\link[=read_csv_arrow]{read_csv_arrow()}}
 }
diff --git a/r/man/read_csv_arrow.Rd b/r/man/read_csv_arrow.Rd
deleted file mode 100644
index 47e5158..0000000
--- a/r/man/read_csv_arrow.Rd
+++ /dev/null
@@ -1,27 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/csv.R
-\name{read_csv_arrow}
-\alias{read_csv_arrow}
-\title{Read csv file into an arrow::Table}
-\usage{
-read_csv_arrow(file, read_options = csv_read_options(),
-  parse_options = csv_parse_options(),
-  convert_options = csv_convert_options(), col_select = NULL,
-  as_tibble = TRUE)
-}
-\arguments{
-\item{file}{file}
-
-\item{read_options}{see \code{\link[=csv_read_options]{csv_read_options()}}}
-
-\item{parse_options}{see \code{\link[=csv_parse_options]{csv_parse_options()}}}
-
-\item{convert_options}{see \code{\link[=csv_convert_options]{csv_convert_options()}}}
-
-\item{col_select}{\link[tidyselect:vars_select]{tidy selection specification} of columns}
-
-\item{as_tibble}{Should the \link[=arrow__Table]{arrow::Table} be converted to a data frame.}
-}
-\description{
-Use arrow::csv::TableReader from \code{\link[=csv_table_reader]{csv_table_reader()}}
-}
diff --git a/r/man/read_delim_arrow.Rd b/r/man/read_delim_arrow.Rd
new file mode 100644
index 0000000..e1ca16f
--- /dev/null
+++ b/r/man/read_delim_arrow.Rd
@@ -0,0 +1,79 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/csv.R
+\name{read_delim_arrow}
+\alias{read_delim_arrow}
+\alias{read_csv_arrow}
+\alias{read_tsv_arrow}
+\title{Read a CSV or other delimited file with Arrow}
+\usage{
+read_delim_arrow(file, delim = ",", quote = "\\"",
+  escape_double = TRUE, escape_backslash = FALSE, col_select = NULL,
+  skip_empty_rows = TRUE, parse_options = NULL,
+  convert_options = NULL, read_options = csv_read_options(),
+  as_tibble = TRUE)
+
+read_csv_arrow(file, quote = "\\"", escape_double = TRUE,
+  escape_backslash = FALSE, col_select = NULL,
+  skip_empty_rows = TRUE, parse_options = NULL,
+  convert_options = NULL, read_options = csv_read_options(),
+  as_tibble = TRUE)
+
+read_tsv_arrow(file, quote = "\\"", escape_double = TRUE,
+  escape_backslash = FALSE, col_select = NULL,
+  skip_empty_rows = TRUE, parse_options = NULL,
+  convert_options = NULL, read_options = csv_read_options(),
+  as_tibble = TRUE)
+}
+\arguments{
+\item{file}{A character path to a local file, or an Arrow input stream}
+
+\item{delim}{Single character used to separate fields within a record.}
+
+\item{quote}{Single character used to quote strings.}
+
+\item{escape_double}{Does the file escape quotes by doubling them?
+i.e. If this option is \code{TRUE}, the value \code{""""} represents
+a single quote, \code{\"}.}
+
+\item{escape_backslash}{Does the file use backslashes to escape special
+characters? This is more general than \code{escape_double} as backslashes
+can be used to escape the delimiter character, the quote character, or
+to add special characters like \code{\\n}.}
+
+\item{col_select}{A \link[tidyselect:vars_select]{tidy selection specification}
+of columns, as used in \code{dplyr::select()}.}
+
+\item{skip_empty_rows}{Should blank rows be ignored altogether? If
+\code{TRUE}, blank rows will not be represented at all. If \code{FALSE}, they will be
+filled with missings.}
+
+\item{parse_options}{see \code{\link[=csv_parse_options]{csv_parse_options()}}. If given, this overrides any
+parsing options provided in other arguments (e.g. \code{delim}, \code{quote}, etc.).}
+
+\item{convert_options}{see \code{\link[=csv_convert_options]{csv_convert_options()}}}
+
+\item{read_options}{see \code{\link[=csv_read_options]{csv_read_options()}}}
+
+\item{as_tibble}{Should the function return a \code{data.frame} or an
+\link[=arrow__Table]{arrow::Table}?}
+}
+\value{
+A \code{data.frame}, or an \code{arrow::Table} if \code{as_tibble = FALSE}.
+}
+\description{
+These functions uses the Arrow C++ CSV reader to read into a \code{data.frame}.
+Arrow C++ options have been mapped to argument names that follow those of
+\code{\link[readr:read_delim]{readr::read_delim()}}, and \code{col_select} was inspired by \code{\link[vroom:vroom]{vroom::vroom()}}.
+}
+\details{
+\code{read_csv_arrow()} and \code{read_tsv_arrow()} are wrappers around
+\code{read_delim_arrow()} that specify a delimiter.
+
+Note that not all \code{readr} options are currently implemented here. Please file
+an issue if you encounter one that \code{arrow} should support.
+
+If you need to control Arrow-specific reader parameters that don't have an
+equivalent in \code{readr::read_csv()}, you can either provide them in the
+\code{parse_options}, \code{convert_options}, or \code{read_options} arguments, or you can
+call \code{\link[=csv_table_reader]{csv_table_reader()}} directly for lower-level access.
+}
diff --git a/r/tests/testthat/test-arrow-csv.R b/r/tests/testthat/test-arrow-csv.R
index 7f0c1ae..aed9638 100644
--- a/r/tests/testthat/test-arrow-csv.R
+++ b/r/tests/testthat/test-arrow-csv.R
@@ -19,8 +19,9 @@ context("arrow::csv::TableReader")
 
 test_that("Can read csv file", {
   tf <- tempfile()
+  on.exit(unlink(tf))
 
-  write.csv(iris, tf, row.names = FALSE, quote = FALSE)
+  write.csv(iris, tf, row.names = FALSE)
 
   tab1 <- read_csv_arrow(tf, as_tibble = FALSE)
   tab2 <- read_csv_arrow(mmap_open(tf), as_tibble = FALSE)
@@ -31,14 +32,13 @@ test_that("Can read csv file", {
   expect_equal(tab0, tab1)
   expect_equal(tab0, tab2)
   expect_equal(tab0, tab3)
-
-  unlink(tf)
 })
 
 test_that("read_csv_arrow(as_tibble=TRUE)", {
   tf <- tempfile()
+  on.exit(unlink(tf))
 
-  write.csv(iris, tf, row.names = FALSE, quote = FALSE)
+  write.csv(iris, tf, row.names = FALSE)
 
   tab1 <- read_csv_arrow(tf, as_tibble = TRUE)
   tab2 <- read_csv_arrow(mmap_open(tf), as_tibble = TRUE)
@@ -48,12 +48,87 @@ test_that("read_csv_arrow(as_tibble=TRUE)", {
   expect_equivalent(iris, tab1)
   expect_equivalent(iris, tab2)
   expect_equivalent(iris, tab3)
+})
+
+test_that("read_delim_arrow parsing options: delim", {
+  tf <- tempfile()
+  on.exit(unlink(tf))
+
+  write.table(iris, tf, sep = "\t", row.names = FALSE)
+  tab1 <- read_tsv_arrow(tf)
+  tab2 <- read_delim_arrow(tf, delim = "\t")
+  expect_equivalent(tab1, tab2)
+
+  iris$Species <- as.character(iris$Species)
+  expect_equivalent(iris, tab1)
+})
 
-  unlink(tf)
+test_that("read_delim_arrow parsing options: quote", {
+  tf <- tempfile()
+  on.exit(unlink(tf))
+
+  df <- data.frame(a=c(1, 2), b=c("'abc'", "'def'"))
+  write.table(df, sep=";", tf, row.names = FALSE, quote = FALSE)
+  tab1 <- read_delim_arrow(tf, delim = ";", quote = "'")
+
+  # Is this a problem?
+  # Component “a”: target is integer64, current is numeric
+  tab1$a <- as.numeric(tab1$a)
+  expect_equivalent(
+    tab1,
+    data.frame(a=c(1, 2), b=c("abc", "def"), stringsAsFactors = FALSE)
+  )
 })
 
+test_that("read_csv_arrow parsing options: col_names", {
+  skip("Invalid: Empty CSV file")
+  tf <- tempfile()
+  on.exit(unlink(tf))
+
+  write.table(iris, tf, sep = ",", row.names = FALSE, col.names = FALSE)
+  tab1 <- read_csv_arrow(tf, col_names = FALSE)
+
+  expect_identical(names(tab1), names(iris))
+  iris$Species <- as.character(iris$Species)
+  expect_equivalent(iris, tab1)
+})
+
+test_that("read_csv_arrow parsing options: skip", {
+  skip("Invalid: Empty CSV file")
+  tf <- tempfile()
+  on.exit(unlink(tf))
+
+  cat("asdf\nqwer\n", file = tf)
+  suppressWarnings(write.table(iris, tf, sep = ",", row.names = FALSE, append = TRUE))
+  # This works:
+  # print(head(readr::read_csv(tf, skip = 2)))
+
+  # This errors:
+  tab1 <- read_csv_arrow(tf, skip = 2)
+
+  expect_identical(names(tab1), names(iris))
+  iris$Species <- as.character(iris$Species)
+  expect_equivalent(iris, tab1)
+})
+
+test_that("read_csv_arrow parsing options: skip_empty_rows", {
+  skip("Invalid: Empty CSV file")
+  tf <- tempfile()
+  on.exit(unlink(tf))
+
+  write.csv(iris, tf, row.names = FALSE)
+  cat("\n\n", file = tf, append = TRUE)
+
+  tab1 <- read_csv_arrow(tf, skip_empty_rows = FALSE)
+
+  expect_equal(nrow(tab1), nrow(iris) + 2)
+  expect_true(is.na(tail(iris, 1)[[1]]))
+})
+
+
 test_that("read_csv_arrow() respects col_select", {
   tf <- tempfile()
+  on.exit(unlink(tf))
 
   write.csv(iris, tf, row.names = FALSE, quote = FALSE)
 
@@ -62,6 +137,4 @@ test_that("read_csv_arrow() respects col_select", {
 
   tib <- read_csv_arrow(tf, col_select = starts_with("Sepal"), as_tibble = TRUE)
   expect_equal(tib, tibble::tibble(Sepal.Length = iris$Sepal.Length, Sepal.Width = iris$Sepal.Width))
-
-  unlink(tf)
 })