You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by th...@apache.org on 2022/07/04 18:47:54 UTC

[arrow] branch master updated: ARROW-14989: [R] Update num_rows methods to output doubles not integers to prevent integer overflow

This is an automated email from the ASF dual-hosted git repository.

thisisnic pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 7124bafbb1 ARROW-14989: [R] Update num_rows methods to output doubles not integers to prevent integer overflow
7124bafbb1 is described below

commit 7124bafbb16ce6ae353b81d6be39c37869ee53ab
Author: Nic Crane <th...@gmail.com>
AuthorDate: Mon Jul 4 19:47:39 2022 +0100

    ARROW-14989: [R] Update num_rows methods to output doubles not integers to prevent integer overflow
    
    This PR enables `num_rows()` methods to be called on `Table` and `RecordBatch` objects without integer overflow when the value of `num_rows()` is higher than `.Machine$integer.max`.  I originally wrote some tests but they take ages to run and crashed on CI anyway so I removed them, but they can be seen in https://github.com/apache/arrow/pull/13482/commits/e7cf8a66beab6d1b7d85304362086b6205a31279/.
    
    Closes #13482 from thisisnic/ARROW-14989_num_rows_double
    
    Authored-by: Nic Crane <th...@gmail.com>
    Signed-off-by: Nic Crane <th...@gmail.com>
---
 r/src/arrow_cpp11.h   | 15 +++++++++++++++
 r/src/recordbatch.cpp |  4 ++--
 r/src/table.cpp       |  4 +++-
 3 files changed, 20 insertions(+), 3 deletions(-)

diff --git a/r/src/arrow_cpp11.h b/r/src/arrow_cpp11.h
index f1338c02ca..123875325c 100644
--- a/r/src/arrow_cpp11.h
+++ b/r/src/arrow_cpp11.h
@@ -407,6 +407,12 @@ cpp11::writable::list to_r_list(const std::vector<std::shared_ptr<T>>& x) {
 }  // namespace r
 }  // namespace arrow
 
+struct r_vec_size {
+  explicit r_vec_size(R_xlen_t x) : value(x) {}
+
+  R_xlen_t value;
+};
+
 namespace cpp11 {
 
 template <typename T>
@@ -428,4 +434,13 @@ SEXP as_sexp(const std::shared_ptr<T>& ptr) {
   return cpp11::to_r6<T>(ptr);
 }
 
+inline SEXP as_sexp(r_vec_size size) {
+  R_xlen_t x = size.value;
+  if (x > std::numeric_limits<int>::max()) {
+    return Rf_ScalarReal(x);
+  } else {
+    return Rf_ScalarInteger(x);
+  }
+}
+
 }  // namespace cpp11
diff --git a/r/src/recordbatch.cpp b/r/src/recordbatch.cpp
index 558628a9cc..01bd8a3f35 100644
--- a/r/src/recordbatch.cpp
+++ b/r/src/recordbatch.cpp
@@ -32,8 +32,8 @@ int RecordBatch__num_columns(const std::shared_ptr<arrow::RecordBatch>& x) {
 }
 
 // [[arrow::export]]
-int RecordBatch__num_rows(const std::shared_ptr<arrow::RecordBatch>& x) {
-  return x->num_rows();
+r_vec_size RecordBatch__num_rows(const std::shared_ptr<arrow::RecordBatch>& x) {
+  return r_vec_size(x->num_rows());
 }
 
 // [[arrow::export]]
diff --git a/r/src/table.cpp b/r/src/table.cpp
index 051647979f..07bf44750a 100644
--- a/r/src/table.cpp
+++ b/r/src/table.cpp
@@ -28,7 +28,9 @@ int Table__num_columns(const std::shared_ptr<arrow::Table>& x) {
 }
 
 // [[arrow::export]]
-int Table__num_rows(const std::shared_ptr<arrow::Table>& x) { return x->num_rows(); }
+r_vec_size Table__num_rows(const std::shared_ptr<arrow::Table>& x) {
+  return r_vec_size(x->num_rows());
+}
 
 // [[arrow::export]]
 std::shared_ptr<arrow::Schema> Table__schema(const std::shared_ptr<arrow::Table>& x) {