You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ro...@apache.org on 2019/06/12 11:34:24 UTC

[arrow] branch master updated: ARROW-3815 [R]: refine record batch factory

This is an automated email from the ASF dual-hosted git repository.

romainfrancois pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 72287df  ARROW-3815 [R]: refine record batch factory
72287df is described below

commit 72287dfef7109bfa49affd4fc7c1016a86b25416
Author: Romain Francois <ro...@rstudio.com>
AuthorDate: Wed Jun 12 13:34:09 2019 +0200

    ARROW-3815 [R]: refine record batch factory
    
    so that `array()` when given an `arrow::Array` is identity
    
    ``` r
    library(arrow, warn.conflicts = FALSE)
    
    array(array(1:10))
    #> arrow::Array
    #> [
    #>   1,
    #>   2,
    #>   3,
    #>   4,
    #>   5,
    #>   6,
    #>   7,
    #>   8,
    #>   9,
    #>   10
    #> ]
    ```
    
    so that we may supply already made `arrow::Array` in `record_batch()`
    
    ```r
    batch <- record_batch(x = 1:10, y = arrow::array(1:10))
    as_tibble(batch)
    #> # A tibble: 10 x 2
    #>        x     y
    #>    <int> <int>
    #>  1     1     1
    #>  2     2     2
    #>  3     3     3
    #>  4     4     4
    #>  5     5     5
    #>  6     6     6
    #>  7     7     7
    #>  8     8     8
    #>  9     9     9
    #> 10    10    10
    ```
    
    <sup>Created on 2019-06-10 by the [reprex package](https://reprex.tidyverse.org) (v0.3.0.9000)</sup>
    
    Author: Romain Francois <ro...@rstudio.com>
    
    Closes #4508 from romainfrancois/ARROW-3815/record_batch and squashes the following commits:
    
    189ab978 <Romain Francois> more idiomatic C++11 range foor loop
    9e479c81 <Romain Francois> linting
    4fb4f332 <Romain Francois> additional chunked_array() test
    f17ae031 <Romain Francois> Array__from_vector() recosgnise arrow::Array
    2925a740 <Romain Francois> InferType recognizes arrow::Array
---
 .travis.yml                          |  1 +
 r/src/array_from_vector.cpp          | 12 ++++++++++++
 r/tests/testthat/test-Array.R        |  5 +++++
 r/tests/testthat/test-RecordBatch.R  |  5 +++++
 r/tests/testthat/test-chunkedarray.R |  7 +++++++
 5 files changed, 30 insertions(+)

diff --git a/.travis.yml b/.travis.yml
index 74b5bad..36a2dcc 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -362,6 +362,7 @@ matrix:
     - $TRAVIS_BUILD_DIR/ci/travis_before_script_cpp.sh --only-library
     - export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$TRAVIS_BUILD_DIR/cpp-install/lib
     - export PKG_CONFIG_PATH=$PKG_CONFIG_PATH:$TRAVIS_BUILD_DIR/cpp-install/lib/pkgconfig
+    - export CXX11FLAGS=-Wall
     - pushd ${TRAVIS_BUILD_DIR}/r
     after_success:
     - Rscript ../ci/travis_upload_r_coverage.R
diff --git a/r/src/array_from_vector.cpp b/r/src/array_from_vector.cpp
index 0c21e94..0d4e318 100644
--- a/r/src/array_from_vector.cpp
+++ b/r/src/array_from_vector.cpp
@@ -763,6 +763,13 @@ std::shared_ptr<arrow::DataType> GetFactorType(SEXP factor) {
 
 std::shared_ptr<arrow::DataType> InferType(SEXP x) {
   switch (TYPEOF(x)) {
+    case ENVSXP:
+      if (Rf_inherits(x, "arrow::Array")) {
+        Rcpp::ConstReferenceSmartPtrInputParameter<std::shared_ptr<arrow::Array>> array(
+            x);
+        return static_cast<std::shared_ptr<arrow::Array>>(array)->type();
+      }
+      break;
     case LGLSXP:
       return boolean();
     case INTSXP:
@@ -915,6 +922,11 @@ bool CheckCompatibleFactor(SEXP obj, const std::shared_ptr<arrow::DataType>& typ
 
 std::shared_ptr<arrow::Array> Array__from_vector(
     SEXP x, const std::shared_ptr<arrow::DataType>& type, bool type_infered) {
+  // short circuit if `x` is already an Array
+  if (Rf_inherits(x, "arrow::Array")) {
+    return Rcpp::ConstReferenceSmartPtrInputParameter<std::shared_ptr<arrow::Array>>(x);
+  }
+
   // special case when we can just use the data from the R vector
   // directly. This still needs to handle the null bitmap
   if (arrow::r::can_reuse_memory(x, type)) {
diff --git a/r/tests/testthat/test-Array.R b/r/tests/testthat/test-Array.R
index 68fd9dc..fd2fe51 100644
--- a/r/tests/testthat/test-Array.R
+++ b/r/tests/testthat/test-Array.R
@@ -410,3 +410,8 @@ test_that("Array<int8>$as_vector() converts to integer (ARROW-3794)", {
   expect_equal(a$type, uint8())
   expect_equal(a$as_vector(), 0:255)
 })
+
+test_that("array() recognise arrow::Array (ARROW-3815)", {
+  a <- array(1:10)
+  expect_equal(a, array(a))
+})
diff --git a/r/tests/testthat/test-RecordBatch.R b/r/tests/testthat/test-RecordBatch.R
index c0295ba..a492f0f 100644
--- a/r/tests/testthat/test-RecordBatch.R
+++ b/r/tests/testthat/test-RecordBatch.R
@@ -145,3 +145,8 @@ test_that("RecordBatch dim() and nrow() (ARROW-3816)", {
   expect_equal(dim(batch), c(10L, 2L))
   expect_equal(nrow(batch), 10L)
 })
+
+test_that("record_batch() handles arrow::Array", {
+  batch <- record_batch(x = 1:10, y = arrow::array(1:10))
+  expect_equal(batch$schema, schema(x = int32(), y = int32()))
+})
diff --git a/r/tests/testthat/test-chunkedarray.R b/r/tests/testthat/test-chunkedarray.R
index 5e22e5c..9505b8f 100644
--- a/r/tests/testthat/test-chunkedarray.R
+++ b/r/tests/testthat/test-chunkedarray.R
@@ -265,3 +265,10 @@ test_that("chunked_array() handles 0 chunks if given a type", {
     expect_equal(a$length(), 0L)
   }
 })
+
+test_that("chunked_array() can ingest arrays (ARROW-3815)", {
+  expect_equal(
+    chunked_array(1:5, array(6:10))$as_vector(),
+    1:10
+  )
+})