You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by th...@apache.org on 2022/07/04 18:47:54 UTC
[arrow] branch master updated: ARROW-14989: [R] Update num_rows methods to output doubles not integers to prevent integer overflow
This is an automated email from the ASF dual-hosted git repository.
thisisnic pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 7124bafbb1 ARROW-14989: [R] Update num_rows methods to output doubles not integers to prevent integer overflow
7124bafbb1 is described below
commit 7124bafbb16ce6ae353b81d6be39c37869ee53ab
Author: Nic Crane <th...@gmail.com>
AuthorDate: Mon Jul 4 19:47:39 2022 +0100
ARROW-14989: [R] Update num_rows methods to output doubles not integers to prevent integer overflow
This PR enables `num_rows()` methods to be called on `Table` and `RecordBatch` objects without integer overflow when the value of `num_rows()` is higher than `.Machine$integer.max`. I originally wrote some tests but they take ages to run and crashed on CI anyway so I removed them, but they can be seen in https://github.com/apache/arrow/pull/13482/commits/e7cf8a66beab6d1b7d85304362086b6205a31279/.
Closes #13482 from thisisnic/ARROW-14989_num_rows_double
Authored-by: Nic Crane <th...@gmail.com>
Signed-off-by: Nic Crane <th...@gmail.com>
---
r/src/arrow_cpp11.h | 15 +++++++++++++++
r/src/recordbatch.cpp | 4 ++--
r/src/table.cpp | 4 +++-
3 files changed, 20 insertions(+), 3 deletions(-)
diff --git a/r/src/arrow_cpp11.h b/r/src/arrow_cpp11.h
index f1338c02ca..123875325c 100644
--- a/r/src/arrow_cpp11.h
+++ b/r/src/arrow_cpp11.h
@@ -407,6 +407,12 @@ cpp11::writable::list to_r_list(const std::vector<std::shared_ptr<T>>& x) {
} // namespace r
} // namespace arrow
+struct r_vec_size {
+ explicit r_vec_size(R_xlen_t x) : value(x) {}
+
+ R_xlen_t value;
+};
+
namespace cpp11 {
template <typename T>
@@ -428,4 +434,13 @@ SEXP as_sexp(const std::shared_ptr<T>& ptr) {
return cpp11::to_r6<T>(ptr);
}
+inline SEXP as_sexp(r_vec_size size) {
+ R_xlen_t x = size.value;
+ if (x > std::numeric_limits<int>::max()) {
+ return Rf_ScalarReal(x);
+ } else {
+ return Rf_ScalarInteger(x);
+ }
+}
+
} // namespace cpp11
diff --git a/r/src/recordbatch.cpp b/r/src/recordbatch.cpp
index 558628a9cc..01bd8a3f35 100644
--- a/r/src/recordbatch.cpp
+++ b/r/src/recordbatch.cpp
@@ -32,8 +32,8 @@ int RecordBatch__num_columns(const std::shared_ptr<arrow::RecordBatch>& x) {
}
// [[arrow::export]]
-int RecordBatch__num_rows(const std::shared_ptr<arrow::RecordBatch>& x) {
- return x->num_rows();
+r_vec_size RecordBatch__num_rows(const std::shared_ptr<arrow::RecordBatch>& x) {
+ return r_vec_size(x->num_rows());
}
// [[arrow::export]]
diff --git a/r/src/table.cpp b/r/src/table.cpp
index 051647979f..07bf44750a 100644
--- a/r/src/table.cpp
+++ b/r/src/table.cpp
@@ -28,7 +28,9 @@ int Table__num_columns(const std::shared_ptr<arrow::Table>& x) {
}
// [[arrow::export]]
-int Table__num_rows(const std::shared_ptr<arrow::Table>& x) { return x->num_rows(); }
+r_vec_size Table__num_rows(const std::shared_ptr<arrow::Table>& x) {
+ return r_vec_size(x->num_rows());
+}
// [[arrow::export]]
std::shared_ptr<arrow::Schema> Table__schema(const std::shared_ptr<arrow::Table>& x) {