You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by th...@apache.org on 2023/06/11 14:58:13 UTC

[arrow] branch main updated: GH-35949: [R] CSV File reader options class objects should print the selected values (#35955)

This is an automated email from the ASF dual-hosted git repository.

thisisnic pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new fe6093228f GH-35949: [R] CSV File reader options class objects should print the selected values (#35955)
fe6093228f is described below

commit fe6093228fd27e902b83031d09eef2765d615ed7
Author: Nic Crane <th...@gmail.com>
AuthorDate: Sun Jun 11 15:58:00 2023 +0100

    GH-35949: [R] CSV File reader options class objects should print the selected values (#35955)
    
    Fixes #35949
    * Closes: #35949
    
    Authored-by: Nic Crane <th...@gmail.com>
    Signed-off-by: Nic Crane <th...@gmail.com>
---
 r/R/arrowExports.R                  | 20 +++++++++++++++++
 r/R/csv.R                           | 20 +++++++++++++++--
 r/src/arrowExports.cpp              | 45 +++++++++++++++++++++++++++++++++++++
 r/src/csv.cpp                       | 30 +++++++++++++++++++++++++
 r/tests/testthat/test-dataset-csv.R | 19 ++++++++++++++++
 5 files changed, 132 insertions(+), 2 deletions(-)

diff --git a/r/R/arrowExports.R b/r/R/arrowExports.R
index 8a76e678ba..6af43a6958 100644
--- a/r/R/arrowExports.R
+++ b/r/R/arrowExports.R
@@ -568,6 +568,26 @@ csv___ReadOptions__column_names <- function(options) {
   .Call(`_arrow_csv___ReadOptions__column_names`, options)
 }
 
+csv___ReadOptions__block_size <- function(options) {
+  .Call(`_arrow_csv___ReadOptions__block_size`, options)
+}
+
+csv___ReadOptions__skip_rows <- function(options) {
+  .Call(`_arrow_csv___ReadOptions__skip_rows`, options)
+}
+
+csv___ReadOptions__autogenerate_column_names <- function(options) {
+  .Call(`_arrow_csv___ReadOptions__autogenerate_column_names`, options)
+}
+
+csv___ReadOptions__use_threads <- function(options) {
+  .Call(`_arrow_csv___ReadOptions__use_threads`, options)
+}
+
+csv___ReadOptions__skip_rows_after_names <- function(options) {
+  .Call(`_arrow_csv___ReadOptions__skip_rows_after_names`, options)
+}
+
 csv___ConvertOptions__initialize <- function(options) {
   .Call(`_arrow_csv___ConvertOptions__initialize`, options)
 }
diff --git a/r/R/csv.R b/r/R/csv.R
index 88008bd682..6af46acb25 100644
--- a/r/R/csv.R
+++ b/r/R/csv.R
@@ -464,10 +464,25 @@ CsvTableReader$create <- function(file,
 CsvReadOptions <- R6Class("CsvReadOptions",
   inherit = ArrowObject,
   public = list(
-    encoding = NULL
+    encoding = NULL,
+    print = function(...) {
+      cat("CsvReadOptions\n")
+      for (attr in c(
+        "column_names", "block_size", "skip_rows", "autogenerate_column_names",
+        "use_threads", "skip_rows_after_names", "encoding"
+      )) {
+        cat(sprintf("%s: %s\n", attr, self[[attr]]))
+      }
+      invisible(self)
+    }
   ),
   active = list(
-    column_names = function() csv___ReadOptions__column_names(self)
+    column_names = function() csv___ReadOptions__column_names(self),
+    block_size = function() csv___ReadOptions__block_size(self),
+    skip_rows = function() csv___ReadOptions__skip_rows(self),
+    autogenerate_column_names = function() csv___ReadOptions__autogenerate_column_names(self),
+    use_threads = function() csv___ReadOptions__use_threads(self),
+    skip_rows_after_names = function() csv___ReadOptions__skip_rows_after_names(self)
   )
 )
 CsvReadOptions$create <- function(use_threads = option_use_threads(),
@@ -491,6 +506,7 @@ CsvReadOptions$create <- function(use_threads = option_use_threads(),
   )
 
   options$encoding <- encoding
+
   options
 }
 
diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp
index ca4a4be38d..01705bd7fa 100644
--- a/r/src/arrowExports.cpp
+++ b/r/src/arrowExports.cpp
@@ -1414,6 +1414,46 @@ BEGIN_CPP11
 END_CPP11
 }
 // csv.cpp
+SEXP csv___ReadOptions__block_size(const std::shared_ptr<arrow::csv::ReadOptions>& options);
+extern "C" SEXP _arrow_csv___ReadOptions__block_size(SEXP options_sexp){
+BEGIN_CPP11
+	arrow::r::Input<const std::shared_ptr<arrow::csv::ReadOptions>&>::type options(options_sexp);
+	return cpp11::as_sexp(csv___ReadOptions__block_size(options));
+END_CPP11
+}
+// csv.cpp
+SEXP csv___ReadOptions__skip_rows(const std::shared_ptr<arrow::csv::ReadOptions>& options);
+extern "C" SEXP _arrow_csv___ReadOptions__skip_rows(SEXP options_sexp){
+BEGIN_CPP11
+	arrow::r::Input<const std::shared_ptr<arrow::csv::ReadOptions>&>::type options(options_sexp);
+	return cpp11::as_sexp(csv___ReadOptions__skip_rows(options));
+END_CPP11
+}
+// csv.cpp
+SEXP csv___ReadOptions__autogenerate_column_names(const std::shared_ptr<arrow::csv::ReadOptions>& options);
+extern "C" SEXP _arrow_csv___ReadOptions__autogenerate_column_names(SEXP options_sexp){
+BEGIN_CPP11
+	arrow::r::Input<const std::shared_ptr<arrow::csv::ReadOptions>&>::type options(options_sexp);
+	return cpp11::as_sexp(csv___ReadOptions__autogenerate_column_names(options));
+END_CPP11
+}
+// csv.cpp
+SEXP csv___ReadOptions__use_threads(const std::shared_ptr<arrow::csv::ReadOptions>& options);
+extern "C" SEXP _arrow_csv___ReadOptions__use_threads(SEXP options_sexp){
+BEGIN_CPP11
+	arrow::r::Input<const std::shared_ptr<arrow::csv::ReadOptions>&>::type options(options_sexp);
+	return cpp11::as_sexp(csv___ReadOptions__use_threads(options));
+END_CPP11
+}
+// csv.cpp
+SEXP csv___ReadOptions__skip_rows_after_names(const std::shared_ptr<arrow::csv::ReadOptions>& options);
+extern "C" SEXP _arrow_csv___ReadOptions__skip_rows_after_names(SEXP options_sexp){
+BEGIN_CPP11
+	arrow::r::Input<const std::shared_ptr<arrow::csv::ReadOptions>&>::type options(options_sexp);
+	return cpp11::as_sexp(csv___ReadOptions__skip_rows_after_names(options));
+END_CPP11
+}
+// csv.cpp
 std::shared_ptr<arrow::csv::ConvertOptions> csv___ConvertOptions__initialize(cpp11::list options);
 extern "C" SEXP _arrow_csv___ConvertOptions__initialize(SEXP options_sexp){
 BEGIN_CPP11
@@ -5649,6 +5689,11 @@ static const R_CallMethodDef CallEntries[] = {
 		{ "_arrow_csv___ReadOptions__initialize", (DL_FUNC) &_arrow_csv___ReadOptions__initialize, 1}, 
 		{ "_arrow_csv___ParseOptions__initialize", (DL_FUNC) &_arrow_csv___ParseOptions__initialize, 1}, 
 		{ "_arrow_csv___ReadOptions__column_names", (DL_FUNC) &_arrow_csv___ReadOptions__column_names, 1}, 
+		{ "_arrow_csv___ReadOptions__block_size", (DL_FUNC) &_arrow_csv___ReadOptions__block_size, 1}, 
+		{ "_arrow_csv___ReadOptions__skip_rows", (DL_FUNC) &_arrow_csv___ReadOptions__skip_rows, 1}, 
+		{ "_arrow_csv___ReadOptions__autogenerate_column_names", (DL_FUNC) &_arrow_csv___ReadOptions__autogenerate_column_names, 1}, 
+		{ "_arrow_csv___ReadOptions__use_threads", (DL_FUNC) &_arrow_csv___ReadOptions__use_threads, 1}, 
+		{ "_arrow_csv___ReadOptions__skip_rows_after_names", (DL_FUNC) &_arrow_csv___ReadOptions__skip_rows_after_names, 1}, 
 		{ "_arrow_csv___ConvertOptions__initialize", (DL_FUNC) &_arrow_csv___ConvertOptions__initialize, 1}, 
 		{ "_arrow_csv___TableReader__Make", (DL_FUNC) &_arrow_csv___TableReader__Make, 4}, 
 		{ "_arrow_csv___TableReader__Read", (DL_FUNC) &_arrow_csv___TableReader__Read, 1}, 
diff --git a/r/src/csv.cpp b/r/src/csv.cpp
index 2eb22d7d81..d04caf5c1f 100644
--- a/r/src/csv.cpp
+++ b/r/src/csv.cpp
@@ -77,6 +77,36 @@ SEXP csv___ReadOptions__column_names(
   return cpp11::as_sexp(options->column_names);
 }
 
+// [[arrow::export]]
+SEXP csv___ReadOptions__block_size(
+    const std::shared_ptr<arrow::csv::ReadOptions>& options) {
+  return cpp11::as_sexp(options->block_size);
+}
+
+// [[arrow::export]]
+SEXP csv___ReadOptions__skip_rows(
+    const std::shared_ptr<arrow::csv::ReadOptions>& options) {
+  return cpp11::as_sexp(options->skip_rows);
+}
+
+// [[arrow::export]]
+SEXP csv___ReadOptions__autogenerate_column_names(
+    const std::shared_ptr<arrow::csv::ReadOptions>& options) {
+  return cpp11::as_sexp(options->autogenerate_column_names);
+}
+
+// [[arrow::export]]
+SEXP csv___ReadOptions__use_threads(
+    const std::shared_ptr<arrow::csv::ReadOptions>& options) {
+  return cpp11::as_sexp(options->use_threads);
+}
+
+// [[arrow::export]]
+SEXP csv___ReadOptions__skip_rows_after_names(
+    const std::shared_ptr<arrow::csv::ReadOptions>& options) {
+  return cpp11::as_sexp(options->skip_rows_after_names);
+}
+
 // [[arrow::export]]
 std::shared_ptr<arrow::csv::ConvertOptions> csv___ConvertOptions__initialize(
     cpp11::list options) {
diff --git a/r/tests/testthat/test-dataset-csv.R b/r/tests/testthat/test-dataset-csv.R
index db1ce20ace..c83c30ff90 100644
--- a/r/tests/testthat/test-dataset-csv.R
+++ b/r/tests/testthat/test-dataset-csv.R
@@ -574,3 +574,22 @@ test_that("open_delim_dataset params passed through to open_dataset", {
 
   expect_equal(ds$time, "16-01-2023")
 })
+
+test_that("CSVReadOptions printing", {
+  default_read_options <- CsvReadOptions$create()
+  custom_read_options <- CsvReadOptions$create(skip_rows = 102)
+
+  expect_output(print(default_read_options), "skip_rows: 0")
+  expect_output(print(custom_read_options), "skip_rows: 102")
+})
+
+test_that("CSVReadOptions field access", {
+  options <- CsvReadOptions$create()
+  expect_equal(options$skip_rows, 0)
+  expect_equal(options$autogenerate_column_names, FALSE)
+  expect_equal(options$skip_rows_after_names, 0)
+  expect_equal(options$use_threads, option_use_threads())
+  expect_equal(options$column_names, character(0))
+  expect_equal(options$block_size, 1048576L)
+  expect_equal(options$encoding, "UTF-8")
+})