You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ro...@apache.org on 2019/06/12 11:34:24 UTC
[arrow] branch master updated: ARROW-3815 [R]: refine record batch
factory
This is an automated email from the ASF dual-hosted git repository.
romainfrancois pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 72287df ARROW-3815 [R]: refine record batch factory
72287df is described below
commit 72287dfef7109bfa49affd4fc7c1016a86b25416
Author: Romain Francois <ro...@rstudio.com>
AuthorDate: Wed Jun 12 13:34:09 2019 +0200
ARROW-3815 [R]: refine record batch factory
so that `array()` when given an `arrow::Array` is identity
``` r
library(arrow, warn.conflicts = FALSE)
array(array(1:10))
#> arrow::Array
#> [
#> 1,
#> 2,
#> 3,
#> 4,
#> 5,
#> 6,
#> 7,
#> 8,
#> 9,
#> 10
#> ]
```
so that we may supply already made `arrow::Array` in `record_batch()`
```r
batch <- record_batch(x = 1:10, y = arrow::array(1:10))
as_tibble(batch)
#> # A tibble: 10 x 2
#> x y
#> <int> <int>
#> 1 1 1
#> 2 2 2
#> 3 3 3
#> 4 4 4
#> 5 5 5
#> 6 6 6
#> 7 7 7
#> 8 8 8
#> 9 9 9
#> 10 10 10
```
<sup>Created on 2019-06-10 by the [reprex package](https://reprex.tidyverse.org) (v0.3.0.9000)</sup>
Author: Romain Francois <ro...@rstudio.com>
Closes #4508 from romainfrancois/ARROW-3815/record_batch and squashes the following commits:
189ab978 <Romain Francois> more idiomatic C++11 range foor loop
9e479c81 <Romain Francois> linting
4fb4f332 <Romain Francois> additional chunked_array() test
f17ae031 <Romain Francois> Array__from_vector() recosgnise arrow::Array
2925a740 <Romain Francois> InferType recognizes arrow::Array
---
.travis.yml | 1 +
r/src/array_from_vector.cpp | 12 ++++++++++++
r/tests/testthat/test-Array.R | 5 +++++
r/tests/testthat/test-RecordBatch.R | 5 +++++
r/tests/testthat/test-chunkedarray.R | 7 +++++++
5 files changed, 30 insertions(+)
diff --git a/.travis.yml b/.travis.yml
index 74b5bad..36a2dcc 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -362,6 +362,7 @@ matrix:
- $TRAVIS_BUILD_DIR/ci/travis_before_script_cpp.sh --only-library
- export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$TRAVIS_BUILD_DIR/cpp-install/lib
- export PKG_CONFIG_PATH=$PKG_CONFIG_PATH:$TRAVIS_BUILD_DIR/cpp-install/lib/pkgconfig
+ - export CXX11FLAGS=-Wall
- pushd ${TRAVIS_BUILD_DIR}/r
after_success:
- Rscript ../ci/travis_upload_r_coverage.R
diff --git a/r/src/array_from_vector.cpp b/r/src/array_from_vector.cpp
index 0c21e94..0d4e318 100644
--- a/r/src/array_from_vector.cpp
+++ b/r/src/array_from_vector.cpp
@@ -763,6 +763,13 @@ std::shared_ptr<arrow::DataType> GetFactorType(SEXP factor) {
std::shared_ptr<arrow::DataType> InferType(SEXP x) {
switch (TYPEOF(x)) {
+ case ENVSXP:
+ if (Rf_inherits(x, "arrow::Array")) {
+ Rcpp::ConstReferenceSmartPtrInputParameter<std::shared_ptr<arrow::Array>> array(
+ x);
+ return static_cast<std::shared_ptr<arrow::Array>>(array)->type();
+ }
+ break;
case LGLSXP:
return boolean();
case INTSXP:
@@ -915,6 +922,11 @@ bool CheckCompatibleFactor(SEXP obj, const std::shared_ptr<arrow::DataType>& typ
std::shared_ptr<arrow::Array> Array__from_vector(
SEXP x, const std::shared_ptr<arrow::DataType>& type, bool type_infered) {
+ // short circuit if `x` is already an Array
+ if (Rf_inherits(x, "arrow::Array")) {
+ return Rcpp::ConstReferenceSmartPtrInputParameter<std::shared_ptr<arrow::Array>>(x);
+ }
+
// special case when we can just use the data from the R vector
// directly. This still needs to handle the null bitmap
if (arrow::r::can_reuse_memory(x, type)) {
diff --git a/r/tests/testthat/test-Array.R b/r/tests/testthat/test-Array.R
index 68fd9dc..fd2fe51 100644
--- a/r/tests/testthat/test-Array.R
+++ b/r/tests/testthat/test-Array.R
@@ -410,3 +410,8 @@ test_that("Array<int8>$as_vector() converts to integer (ARROW-3794)", {
expect_equal(a$type, uint8())
expect_equal(a$as_vector(), 0:255)
})
+
+test_that("array() recognise arrow::Array (ARROW-3815)", {
+ a <- array(1:10)
+ expect_equal(a, array(a))
+})
diff --git a/r/tests/testthat/test-RecordBatch.R b/r/tests/testthat/test-RecordBatch.R
index c0295ba..a492f0f 100644
--- a/r/tests/testthat/test-RecordBatch.R
+++ b/r/tests/testthat/test-RecordBatch.R
@@ -145,3 +145,8 @@ test_that("RecordBatch dim() and nrow() (ARROW-3816)", {
expect_equal(dim(batch), c(10L, 2L))
expect_equal(nrow(batch), 10L)
})
+
+test_that("record_batch() handles arrow::Array", {
+ batch <- record_batch(x = 1:10, y = arrow::array(1:10))
+ expect_equal(batch$schema, schema(x = int32(), y = int32()))
+})
diff --git a/r/tests/testthat/test-chunkedarray.R b/r/tests/testthat/test-chunkedarray.R
index 5e22e5c..9505b8f 100644
--- a/r/tests/testthat/test-chunkedarray.R
+++ b/r/tests/testthat/test-chunkedarray.R
@@ -265,3 +265,10 @@ test_that("chunked_array() handles 0 chunks if given a type", {
expect_equal(a$length(), 0L)
}
})
+
+test_that("chunked_array() can ingest arrays (ARROW-3815)", {
+ expect_equal(
+ chunked_array(1:5, array(6:10))$as_vector(),
+ 1:10
+ )
+})