You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2019/01/04 21:36:56 UTC
[arrow] branch master updated: ARROW-3760: [R] Support Arrow CSV
reader
This is an automated email from the ASF dual-hosted git repository.
wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new fba4f32 ARROW-3760: [R] Support Arrow CSV reader
fba4f32 is described below
commit fba4f32001386b2ed593a69ec6d546a104eb45ba
Author: Romain Francois <ro...@purrple.cat>
AuthorDate: Fri Jan 4 15:36:41 2019 -0600
ARROW-3760: [R] Support Arrow CSV reader
The main entry point is the `csv_read()` function, all it does is create a `csv::TableReader` with the `csv_table_reader()` generic and then `$Read()` from it.
as in the #2947 for feather format, `csv_table_reader` is generic with the methods:
- arrow::io::InputStream: calls the TableReader actor with the other options
- character and fs_path: depending on the `mmap` option (TRUE by default) it opens the file with `mmap_open()` of `file_open()` and then calls the other method.
``` r
library(arrow)
tf <- tempfile()
readr::write_csv(iris, tf)
tab1 <- csv_read(tf)
tab1
#> arrow::Table
as_tibble(tab1)
#> # A tibble: 150 x 5
#> Sepal.Length Sepal.Width Petal.Length Petal.Width Species
#> <dbl> <dbl> <dbl> <dbl> <chr>
#> 1 5.1 3.5 1.4 0.2 setosa
#> 2 4.9 3 1.4 0.2 setosa
#> 3 4.7 3.2 1.3 0.2 setosa
#> 4 4.6 3.1 1.5 0.2 setosa
#> 5 5 3.6 1.4 0.2 setosa
#> 6 5.4 3.9 1.7 0.4 setosa
#> 7 4.6 3.4 1.4 0.3 setosa
#> 8 5 3.4 1.5 0.2 setosa
#> 9 4.4 2.9 1.4 0.2 setosa
#> 10 4.9 3.1 1.5 0.1 setosa
#> # … with 140 more rows
```
<sup>Created on 2018-11-13 by the [reprex package](https://reprex.tidyverse.org) (v0.2.1.9000)</sup>
Author: Romain Francois <ro...@purrple.cat>
Closes #2949 from romainfrancois/ARROW-3760/csv_reader and squashes the following commits:
951e9f58b <Romain Francois> s/csv_read/read_csv_arrow/
7770ec54c <Romain Francois> not using readr:: at this point
bb13a76e0 <Romain Francois> rebase
83b51621a <Romain Francois> s/file_open/ReadableFile/
959020c91 <Romain Francois> No need to special use mmap for file path method
6e740037d <Romain Francois> going through CharacterVector makes sure this is a character vector
258550143 <Romain Francois> line breaks for readability
0ab839783 <Romain Francois> linting
09187e63b <Romain Francois> Expose arrow::csv::TableReader, functions csv_table_reader() + csv_read()
---
r/DESCRIPTION | 1 +
r/NAMESPACE | 11 +++
r/R/RcppExports.R | 20 ++++
r/R/csv.R | 182 +++++++++++++++++++++++++++++++++++++
r/man/csv_convert_options.Rd | 14 +++
r/man/csv_parse_options.Rd | 33 +++++++
r/man/csv_read_options.Rd | 16 ++++
r/man/csv_table_reader.Rd | 24 +++++
r/man/read_csv_arrow.Rd | 14 +++
r/src/RcppExports.cpp | 63 +++++++++++++
r/src/arrow_types.h | 1 +
r/src/csv.cpp | 76 ++++++++++++++++
r/tests/testthat/test-arrow-csv-.R | 33 +++++++
13 files changed, 488 insertions(+)
diff --git a/r/DESCRIPTION b/r/DESCRIPTION
index 45e0f83..a263297 100644
--- a/r/DESCRIPTION
+++ b/r/DESCRIPTION
@@ -55,6 +55,7 @@ Collate:
'array.R'
'buffer.R'
'compute.R'
+ 'csv.R'
'dictionary.R'
'feather.R'
'io.R'
diff --git a/r/NAMESPACE b/r/NAMESPACE
index 65d60d8..8846def 100644
--- a/r/NAMESPACE
+++ b/r/NAMESPACE
@@ -39,6 +39,11 @@ S3method(buffer,default)
S3method(buffer,integer)
S3method(buffer,numeric)
S3method(buffer,raw)
+S3method(csv_table_reader,"arrow::csv::TableReader")
+S3method(csv_table_reader,"arrow::io::InputStream")
+S3method(csv_table_reader,character)
+S3method(csv_table_reader,default)
+S3method(csv_table_reader,fs_path)
S3method(length,"arrow::Array")
S3method(names,"arrow::RecordBatch")
S3method(print,"arrow-enum")
@@ -92,6 +97,10 @@ export(boolean)
export(buffer)
export(cast_options)
export(chunked_array)
+export(csv_convert_options)
+export(csv_parse_options)
+export(csv_read_options)
+export(csv_table_reader)
export(date32)
export(date64)
export(decimal)
@@ -111,6 +120,7 @@ export(mmap_open)
export(null)
export(print.integer64)
export(read_arrow)
+export(read_csv_arrow)
export(read_feather)
export(read_message)
export(read_record_batch)
@@ -141,6 +151,7 @@ importFrom(glue,glue)
importFrom(purrr,map)
importFrom(purrr,map2)
importFrom(purrr,map_int)
+importFrom(rlang,abort)
importFrom(rlang,dots_n)
importFrom(rlang,list2)
importFrom(rlang,warn)
diff --git a/r/R/RcppExports.R b/r/R/RcppExports.R
index 0310eab..55b9ab3 100644
--- a/r/R/RcppExports.R
+++ b/r/R/RcppExports.R
@@ -193,6 +193,26 @@ Table__cast <- function(table, schema, options) {
.Call(`_arrow_Table__cast`, table, schema, options)
}
+csv___ReadOptions__initialize <- function(options) {
+ .Call(`_arrow_csv___ReadOptions__initialize`, options)
+}
+
+csv___ParseOptions__initialize <- function(options) {
+ .Call(`_arrow_csv___ParseOptions__initialize`, options)
+}
+
+csv___ConvertOptions__initialize <- function(options) {
+ .Call(`_arrow_csv___ConvertOptions__initialize`, options)
+}
+
+csv___TableReader__Make <- function(input, read_options, parse_options, convert_options) {
+ .Call(`_arrow_csv___TableReader__Make`, input, read_options, parse_options, convert_options)
+}
+
+csv___TableReader__Read <- function(table_reader) {
+ .Call(`_arrow_csv___TableReader__Read`, table_reader)
+}
+
shared_ptr_is_null <- function(xp) {
.Call(`_arrow_shared_ptr_is_null`, xp)
}
diff --git a/r/R/csv.R b/r/R/csv.R
new file mode 100644
index 0000000..bad8755
--- /dev/null
+++ b/r/R/csv.R
@@ -0,0 +1,182 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+#' @include R6.R
+
+`arrow::csv::TableReader` <- R6Class("arrow::csv::TableReader", inherit = `arrow::Object`,
+ public = list(
+ Read = function() shared_ptr(`arrow::Table`, csv___TableReader__Read(self))
+ )
+)
+
+`arrow::csv::ReadOptions` <- R6Class("arrow::csv::ReadOptions", inherit = `arrow::Object`)
+`arrow::csv::ParseOptions` <- R6Class("arrow::csv::ParseOptions", inherit = `arrow::Object`)
+`arrow::csv::ConvertOptions` <- R6Class("arrow::csv::ConvertOptions", inherit = `arrow::Object`)
+
+#' read options for the csv reader
+#'
+#' @param use_threads Whether to use the global CPU thread pool
+#' @param block_size Block size we request from the IO layer; also determines the size of chunks when use_threads is `TRUE`
+#'
+#' @export
+csv_read_options <- function(use_threads = TRUE, block_size = 1048576L) {
+ shared_ptr(`arrow::csv::ReadOptions`, csv___ReadOptions__initialize(
+ list(
+ use_threads = use_threads,
+ block_size = block_size
+ )
+ ))
+}
+
+#' Parsing options
+#'
+#' @param delimiter Field delimiter
+#' @param quoting Whether quoting is used
+#' @param quote_char Quoting character (if `quoting` is `TRUE`)
+#' @param double_quote Whether a quote inside a value is double-quoted
+#' @param escaping Whether escaping is used
+#' @param escape_char Escaping character (if `escaping` is `TRUE`)
+#' @param newlines_in_values Whether values are allowed to contain CR (`0x0d``) and LF (`0x0a``) characters
+#' @param ignore_empty_lines Whether empty lines are ignored. If false, an empty line represents
+#' @param header_rows Number of header rows to skip (including the first row containing column names)
+#'
+#' @export
+csv_parse_options <- function(
+ delimiter = ",", quoting = TRUE, quote_char = '"',
+ double_quote = TRUE, escaping = FALSE, escape_char = '\\',
+ newlines_in_values = FALSE, ignore_empty_lines = TRUE,
+ header_rows = 1L
+){
+ shared_ptr(`arrow::csv::ParseOptions`, csv___ParseOptions__initialize(
+ list(
+ delimiter = delimiter,
+ quoting = quoting,
+ quote_char = quote_char,
+ double_quote = double_quote,
+ escaping = escaping,
+ escape_char = escape_char,
+ newlines_in_values = newlines_in_values,
+ ignore_empty_lines = ignore_empty_lines,
+ header_rows = header_rows
+ )
+ ))
+}
+
+#' Conversion Options for the csv reader
+#'
+#' @param check_utf8 Whether to check UTF8 validity of string columns
+#'
+#' @export
+csv_convert_options <- function(check_utf8 = TRUE){
+ shared_ptr(`arrow::csv::ConvertOptions`, csv___ConvertOptions__initialize(
+ list(
+ check_utf8 = check_utf8
+ )
+ ))
+}
+
+#' CSV table reader
+#'
+#' @param file file
+#' @param read_options, see [csv_read_options()]
+#' @param parse_options, see [csv_parse_options()]
+#' @param convert_options, see [csv_convert_options()]
+#' @param ... additional parameters.
+#'
+#' @export
+csv_table_reader <- function(file,
+ read_options = csv_read_options(),
+ parse_options = csv_parse_options(),
+ convert_options = csv_convert_options(),
+ ...
+){
+ UseMethod("csv_table_reader")
+}
+
+#' @importFrom rlang abort
+#' @export
+csv_table_reader.default <- function(file,
+ read_options = csv_read_options(),
+ parse_options = csv_parse_options(),
+ convert_options = csv_convert_options(),
+ ...
+) {
+ abort("unsupported")
+}
+
+#' @export
+`csv_table_reader.character` <- function(file,
+ read_options = csv_read_options(),
+ parse_options = csv_parse_options(),
+ convert_options = csv_convert_options(),
+ ...
+){
+ csv_table_reader(fs::path_abs(file),
+ read_options = read_options,
+ parse_options = parse_options,
+ convert_options = convert_options,
+ ...
+ )
+}
+
+#' @export
+`csv_table_reader.fs_path` <- function(file,
+ read_options = csv_read_options(),
+ parse_options = csv_parse_options(),
+ convert_options = csv_convert_options(),
+ ...
+){
+ csv_table_reader(ReadableFile(file),
+ read_options = read_options,
+ parse_options = parse_options,
+ convert_options = convert_options,
+ ...
+ )
+}
+
+#' @export
+`csv_table_reader.arrow::io::InputStream` <- function(file,
+ read_options = csv_read_options(),
+ parse_options = csv_parse_options(),
+ convert_options = csv_convert_options(),
+ ...
+){
+ shared_ptr(`arrow::csv::TableReader`,
+ csv___TableReader__Make(file, read_options, parse_options, convert_options)
+ )
+}
+
+#' @export
+`csv_table_reader.arrow::csv::TableReader` <- function(file,
+ read_options = csv_read_options(),
+ parse_options = csv_parse_options(),
+ convert_options = csv_convert_options(),
+ ...
+){
+ file
+}
+
+#' Read csv file into an arrow::Table
+#'
+#' Use arrow::csv::TableReader from [csv_table_reader()]
+#'
+#' @param ... Used to construct an arrow::csv::TableReader
+#' @export
+read_csv_arrow <- function(...) {
+ csv_table_reader(...)$Read()
+}
+
diff --git a/r/man/csv_convert_options.Rd b/r/man/csv_convert_options.Rd
new file mode 100644
index 0000000..323c6e0
--- /dev/null
+++ b/r/man/csv_convert_options.Rd
@@ -0,0 +1,14 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/csv.R
+\name{csv_convert_options}
+\alias{csv_convert_options}
+\title{Conversion Options for the csv reader}
+\usage{
+csv_convert_options(check_utf8 = TRUE)
+}
+\arguments{
+\item{check_utf8}{Whether to check UTF8 validity of string columns}
+}
+\description{
+Conversion Options for the csv reader
+}
diff --git a/r/man/csv_parse_options.Rd b/r/man/csv_parse_options.Rd
new file mode 100644
index 0000000..9540771
--- /dev/null
+++ b/r/man/csv_parse_options.Rd
@@ -0,0 +1,33 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/csv.R
+\name{csv_parse_options}
+\alias{csv_parse_options}
+\title{Parsing options}
+\usage{
+csv_parse_options(delimiter = ",", quoting = TRUE,
+ quote_char = "\\"", double_quote = TRUE, escaping = FALSE,
+ escape_char = "\\\\", newlines_in_values = FALSE,
+ ignore_empty_lines = TRUE, header_rows = 1L)
+}
+\arguments{
+\item{delimiter}{Field delimiter}
+
+\item{quoting}{Whether quoting is used}
+
+\item{quote_char}{Quoting character (if \code{quoting} is \code{TRUE})}
+
+\item{double_quote}{Whether a quote inside a value is double-quoted}
+
+\item{escaping}{Whether escaping is used}
+
+\item{escape_char}{Escaping character (if \code{escaping} is \code{TRUE})}
+
+\item{newlines_in_values}{Whether values are allowed to contain CR (\code{0x0d``) and LF (}0x0a``) characters}
+
+\item{ignore_empty_lines}{Whether empty lines are ignored. If false, an empty line represents}
+
+\item{header_rows}{Number of header rows to skip (including the first row containing column names)}
+}
+\description{
+Parsing options
+}
diff --git a/r/man/csv_read_options.Rd b/r/man/csv_read_options.Rd
new file mode 100644
index 0000000..3fa2d8c
--- /dev/null
+++ b/r/man/csv_read_options.Rd
@@ -0,0 +1,16 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/csv.R
+\name{csv_read_options}
+\alias{csv_read_options}
+\title{read options for the csv reader}
+\usage{
+csv_read_options(use_threads = TRUE, block_size = 1048576L)
+}
+\arguments{
+\item{use_threads}{Whether to use the global CPU thread pool}
+
+\item{block_size}{Block size we request from the IO layer; also determines the size of chunks when use_threads is \code{TRUE}}
+}
+\description{
+read options for the csv reader
+}
diff --git a/r/man/csv_table_reader.Rd b/r/man/csv_table_reader.Rd
new file mode 100644
index 0000000..029cd0b
--- /dev/null
+++ b/r/man/csv_table_reader.Rd
@@ -0,0 +1,24 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/csv.R
+\name{csv_table_reader}
+\alias{csv_table_reader}
+\title{CSV table reader}
+\usage{
+csv_table_reader(file, read_options = csv_read_options(),
+ parse_options = csv_parse_options(),
+ convert_options = csv_convert_options(), ...)
+}
+\arguments{
+\item{file}{file}
+
+\item{read_options, }{see \code{\link[=csv_read_options]{csv_read_options()}}}
+
+\item{parse_options, }{see \code{\link[=csv_parse_options]{csv_parse_options()}}}
+
+\item{convert_options, }{see \code{\link[=csv_convert_options]{csv_convert_options()}}}
+
+\item{...}{additional parameters.}
+}
+\description{
+CSV table reader
+}
diff --git a/r/man/read_csv_arrow.Rd b/r/man/read_csv_arrow.Rd
new file mode 100644
index 0000000..4cdca91
--- /dev/null
+++ b/r/man/read_csv_arrow.Rd
@@ -0,0 +1,14 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/csv.R
+\name{read_csv_arrow}
+\alias{read_csv_arrow}
+\title{Read csv file into an arrow::Table}
+\usage{
+read_csv_arrow(...)
+}
+\arguments{
+\item{...}{Used to construct an arrow::csv::TableReader}
+}
+\description{
+Use arrow::csv::TableReader from \code{\link[=csv_table_reader]{csv_table_reader()}}
+}
diff --git a/r/src/RcppExports.cpp b/r/src/RcppExports.cpp
index e5a784e..c752afb 100644
--- a/r/src/RcppExports.cpp
+++ b/r/src/RcppExports.cpp
@@ -558,6 +558,64 @@ BEGIN_RCPP
return rcpp_result_gen;
END_RCPP
}
+// csv___ReadOptions__initialize
+std::shared_ptr<arrow::csv::ReadOptions> csv___ReadOptions__initialize(List_ options);
+RcppExport SEXP _arrow_csv___ReadOptions__initialize(SEXP optionsSEXP) {
+BEGIN_RCPP
+ Rcpp::RObject rcpp_result_gen;
+ Rcpp::RNGScope rcpp_rngScope_gen;
+ Rcpp::traits::input_parameter< List_ >::type options(optionsSEXP);
+ rcpp_result_gen = Rcpp::wrap(csv___ReadOptions__initialize(options));
+ return rcpp_result_gen;
+END_RCPP
+}
+// csv___ParseOptions__initialize
+std::shared_ptr<arrow::csv::ParseOptions> csv___ParseOptions__initialize(List_ options);
+RcppExport SEXP _arrow_csv___ParseOptions__initialize(SEXP optionsSEXP) {
+BEGIN_RCPP
+ Rcpp::RObject rcpp_result_gen;
+ Rcpp::RNGScope rcpp_rngScope_gen;
+ Rcpp::traits::input_parameter< List_ >::type options(optionsSEXP);
+ rcpp_result_gen = Rcpp::wrap(csv___ParseOptions__initialize(options));
+ return rcpp_result_gen;
+END_RCPP
+}
+// csv___ConvertOptions__initialize
+std::shared_ptr<arrow::csv::ConvertOptions> csv___ConvertOptions__initialize(List_ options);
+RcppExport SEXP _arrow_csv___ConvertOptions__initialize(SEXP optionsSEXP) {
+BEGIN_RCPP
+ Rcpp::RObject rcpp_result_gen;
+ Rcpp::RNGScope rcpp_rngScope_gen;
+ Rcpp::traits::input_parameter< List_ >::type options(optionsSEXP);
+ rcpp_result_gen = Rcpp::wrap(csv___ConvertOptions__initialize(options));
+ return rcpp_result_gen;
+END_RCPP
+}
+// csv___TableReader__Make
+std::shared_ptr<arrow::csv::TableReader> csv___TableReader__Make(const std::shared_ptr<arrow::io::InputStream>& input, const std::shared_ptr<arrow::csv::ReadOptions>& read_options, const std::shared_ptr<arrow::csv::ParseOptions>& parse_options, const std::shared_ptr<arrow::csv::ConvertOptions>& convert_options);
+RcppExport SEXP _arrow_csv___TableReader__Make(SEXP inputSEXP, SEXP read_optionsSEXP, SEXP parse_optionsSEXP, SEXP convert_optionsSEXP) {
+BEGIN_RCPP
+ Rcpp::RObject rcpp_result_gen;
+ Rcpp::RNGScope rcpp_rngScope_gen;
+ Rcpp::traits::input_parameter< const std::shared_ptr<arrow::io::InputStream>& >::type input(inputSEXP);
+ Rcpp::traits::input_parameter< const std::shared_ptr<arrow::csv::ReadOptions>& >::type read_options(read_optionsSEXP);
+ Rcpp::traits::input_parameter< const std::shared_ptr<arrow::csv::ParseOptions>& >::type parse_options(parse_optionsSEXP);
+ Rcpp::traits::input_parameter< const std::shared_ptr<arrow::csv::ConvertOptions>& >::type convert_options(convert_optionsSEXP);
+ rcpp_result_gen = Rcpp::wrap(csv___TableReader__Make(input, read_options, parse_options, convert_options));
+ return rcpp_result_gen;
+END_RCPP
+}
+// csv___TableReader__Read
+std::shared_ptr<arrow::Table> csv___TableReader__Read(const std::shared_ptr<arrow::csv::TableReader>& table_reader);
+RcppExport SEXP _arrow_csv___TableReader__Read(SEXP table_readerSEXP) {
+BEGIN_RCPP
+ Rcpp::RObject rcpp_result_gen;
+ Rcpp::RNGScope rcpp_rngScope_gen;
+ Rcpp::traits::input_parameter< const std::shared_ptr<arrow::csv::TableReader>& >::type table_reader(table_readerSEXP);
+ rcpp_result_gen = Rcpp::wrap(csv___TableReader__Read(table_reader));
+ return rcpp_result_gen;
+END_RCPP
+}
// shared_ptr_is_null
bool shared_ptr_is_null(SEXP xp);
RcppExport SEXP _arrow_shared_ptr_is_null(SEXP xpSEXP) {
@@ -2200,6 +2258,11 @@ static const R_CallMethodDef CallEntries[] = {
{"_arrow_ChunkedArray__cast", (DL_FUNC) &_arrow_ChunkedArray__cast, 3},
{"_arrow_RecordBatch__cast", (DL_FUNC) &_arrow_RecordBatch__cast, 3},
{"_arrow_Table__cast", (DL_FUNC) &_arrow_Table__cast, 3},
+ {"_arrow_csv___ReadOptions__initialize", (DL_FUNC) &_arrow_csv___ReadOptions__initialize, 1},
+ {"_arrow_csv___ParseOptions__initialize", (DL_FUNC) &_arrow_csv___ParseOptions__initialize, 1},
+ {"_arrow_csv___ConvertOptions__initialize", (DL_FUNC) &_arrow_csv___ConvertOptions__initialize, 1},
+ {"_arrow_csv___TableReader__Make", (DL_FUNC) &_arrow_csv___TableReader__Make, 4},
+ {"_arrow_csv___TableReader__Read", (DL_FUNC) &_arrow_csv___TableReader__Read, 1},
{"_arrow_shared_ptr_is_null", (DL_FUNC) &_arrow_shared_ptr_is_null, 1},
{"_arrow_unique_ptr_is_null", (DL_FUNC) &_arrow_unique_ptr_is_null, 1},
{"_arrow_Int8__initialize", (DL_FUNC) &_arrow_Int8__initialize, 0},
diff --git a/r/src/arrow_types.h b/r/src/arrow_types.h
index dba7a91..6fef799 100644
--- a/r/src/arrow_types.h
+++ b/r/src/arrow_types.h
@@ -22,6 +22,7 @@
#undef Free
#include <arrow/api.h>
#include <arrow/compute/api.h>
+#include <arrow/csv/reader.h>
#include <arrow/io/file.h>
#include <arrow/io/memory.h>
#include <arrow/ipc/feather.h>
diff --git a/r/src/csv.cpp b/r/src/csv.cpp
new file mode 100644
index 0000000..0e1d09f
--- /dev/null
+++ b/r/src/csv.cpp
@@ -0,0 +1,76 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow_types.h"
+
+using namespace Rcpp;
+
+// [[Rcpp::export]]
+std::shared_ptr<arrow::csv::ReadOptions> csv___ReadOptions__initialize(List_ options) {
+ auto res =
+ std::make_shared<arrow::csv::ReadOptions>(arrow::csv::ReadOptions::Defaults());
+ res->use_threads = options["use_threads"];
+ res->block_size = options["block_size"];
+ return res;
+}
+
+inline char get_char(CharacterVector x) { return CHAR(STRING_ELT(x, 0))[0]; }
+
+// [[Rcpp::export]]
+std::shared_ptr<arrow::csv::ParseOptions> csv___ParseOptions__initialize(List_ options) {
+ auto res =
+ std::make_shared<arrow::csv::ParseOptions>(arrow::csv::ParseOptions::Defaults());
+ res->delimiter = get_char(options["delimiter"]);
+ res->quoting = options["quoting"];
+ res->quote_char = get_char(options["quote_char"]);
+ res->double_quote = options["double_quote"];
+ res->escape_char = get_char(options["escape_char"]);
+ res->newlines_in_values = options["newlines_in_values"];
+ res->header_rows = options["header_rows"];
+ res->ignore_empty_lines = options["ignore_empty_lines"];
+ return res;
+}
+
+// [[Rcpp::export]]
+std::shared_ptr<arrow::csv::ConvertOptions> csv___ConvertOptions__initialize(
+ List_ options) {
+ auto res = std::make_shared<arrow::csv::ConvertOptions>(
+ arrow::csv::ConvertOptions::Defaults());
+ res->check_utf8 = options["check_utf8"];
+ return res;
+}
+
+// [[Rcpp::export]]
+std::shared_ptr<arrow::csv::TableReader> csv___TableReader__Make(
+ const std::shared_ptr<arrow::io::InputStream>& input,
+ const std::shared_ptr<arrow::csv::ReadOptions>& read_options,
+ const std::shared_ptr<arrow::csv::ParseOptions>& parse_options,
+ const std::shared_ptr<arrow::csv::ConvertOptions>& convert_options) {
+ std::shared_ptr<arrow::csv::TableReader> table_reader;
+ STOP_IF_NOT_OK(arrow::csv::TableReader::Make(arrow::default_memory_pool(), input,
+ *read_options, *parse_options,
+ *convert_options, &table_reader));
+ return table_reader;
+}
+
+// [[Rcpp::export]]
+std::shared_ptr<arrow::Table> csv___TableReader__Read(
+ const std::shared_ptr<arrow::csv::TableReader>& table_reader) {
+ std::shared_ptr<arrow::Table> table;
+ STOP_IF_NOT_OK(table_reader->Read(&table));
+ return table;
+}
diff --git a/r/tests/testthat/test-arrow-csv-.R b/r/tests/testthat/test-arrow-csv-.R
new file mode 100644
index 0000000..2afd062
--- /dev/null
+++ b/r/tests/testthat/test-arrow-csv-.R
@@ -0,0 +1,33 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+context("arrow::csv::TableReader")
+
+test_that("Can read csv file", {
+ tf <- local_tempfile()
+ write.csv(iris, tf, row.names = FALSE, quote = FALSE)
+
+ tab1 <- read_csv_arrow(tf)
+ tab2 <- read_csv_arrow(mmap_open(tf))
+ tab3 <- read_csv_arrow(ReadableFile(tf))
+
+ iris$Species <- as.character(iris$Species)
+ tab0 <- table(iris)
+ expect_equal(tab0, tab1)
+ expect_equal(tab0, tab2)
+ expect_equal(tab0, tab3)
+})