You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2018/12/05 17:07:40 UTC

[arrow] branch master updated: ARROW-3871: [R] Replace usages of C++ GetValuesSafely with new methods on ArrayData

This is an automated email from the ASF dual-hosted git repository.

wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 42a52d7  ARROW-3871: [R] Replace usages of C++ GetValuesSafely with new methods on ArrayData
42a52d7 is described below

commit 42a52d7f2536392f457d8d9daec69a18745d3c4a
Author: Romain Francois <ro...@purrple.cat>
AuthorDate: Wed Dec 5 11:07:34 2018 -0600

    ARROW-3871: [R] Replace usages of C++ GetValuesSafely with new methods on ArrayData
    
    To get rid of `GetValuesSafely` I've had to add variants of `ArrayData.GetValues<>` that take and `offset` as a parameter, this is useful e.g. for boolean arrays or string arrays.
    
    Author: Romain Francois <ro...@purrple.cat>
    
    Closes #3103 from romainfrancois/ARROW-3871/GetValuesSafely and squashes the following commits:
    
    bcb108242 <Romain Francois> s/offset/absolute_offset/
    acd96e3eb <Romain Francois> s/index_/index/
    a1ec32f2f <Romain Francois> + ArrayData->GetValues<>(i, offset_)
    ad17f4a57 <Romain Francois> using ArrayData.GetValues<>
---
 cpp/src/arrow/array.h | 18 ++++++++++++++----
 r/src/array.cpp       | 18 +++++++++---------
 r/src/arrow_types.h   | 11 -----------
 3 files changed, 23 insertions(+), 24 deletions(-)

diff --git a/cpp/src/arrow/array.h b/cpp/src/arrow/array.h
index 0274c15..467da16 100644
--- a/cpp/src/arrow/array.h
+++ b/cpp/src/arrow/array.h
@@ -170,24 +170,34 @@ struct ARROW_EXPORT ArrayData {
 
   // Access a buffer's data as a typed C pointer
   template <typename T>
-  inline const T* GetValues(int i) const {
+  inline const T* GetValues(int i, int64_t absolute_offset) const {
     if (buffers[i]) {
-      return reinterpret_cast<const T*>(buffers[i]->data()) + offset;
+      return reinterpret_cast<const T*>(buffers[i]->data()) + absolute_offset;
     } else {
       return NULLPTR;
     }
   }
 
+  template <typename T>
+  inline const T* GetValues(int i) const {
+    return GetValues<T>(i, offset);
+  }
+
   // Access a buffer's data as a typed C pointer
   template <typename T>
-  inline T* GetMutableValues(int i) {
+  inline T* GetMutableValues(int i, int64_t absolute_offset) {
     if (buffers[i]) {
-      return reinterpret_cast<T*>(buffers[i]->mutable_data()) + offset;
+      return reinterpret_cast<T*>(buffers[i]->mutable_data()) + absolute_offset;
     } else {
       return NULLPTR;
     }
   }
 
+  template <typename T>
+  inline T* GetMutableValues(int i) {
+    return GetMutableValues<T>(i, offset);
+  }
+
   std::shared_ptr<DataType> type;
   int64_t length;
   int64_t null_count;
diff --git a/r/src/array.cpp b/r/src/array.cpp
index 038d786..901f2b6 100644
--- a/r/src/array.cpp
+++ b/r/src/array.cpp
@@ -534,7 +534,7 @@ struct Converter_SimpleArray {
     if (n == null_count) {
       std::fill_n(data.begin() + start, n, default_value<RTYPE>());
     } else {
-      auto p_values = GetValuesSafely<value_type>(array->data(), 1, array->offset());
+      auto p_values = array->data()->GetValues<value_type>(1);
       STOP_IF_NULL(p_values);
 
       // first copy all the data
@@ -566,9 +566,9 @@ struct Converter_String {
     if (null_count == n) {
       std::fill_n(data.begin(), n, NA_STRING);
     } else {
-      auto p_offset = GetValuesSafely<int32_t>(array->data(), 1, array->offset());
+      auto p_offset = array->data()->GetValues<int32_t>(1);
       STOP_IF_NULL(p_offset);
-      auto p_data = GetValuesSafely<char>(array->data(), 2, *p_offset);
+      auto p_data = array->data()->GetValues<char>(2, *p_offset);
       if (!p_data) {
         // There is an offset buffer, but the data buffer is null
         // There is at least one value in the array and not all the values are null
@@ -615,7 +615,7 @@ struct Converter_Boolean {
       std::fill_n(data.begin() + start, n, NA_LOGICAL);
     } else {
       // process the data
-      auto p_data = GetValuesSafely<uint8_t>(array->data(), 1, 0);
+      auto p_data = array->data()->GetValues<uint8_t>(1, 0);
       STOP_IF_NULL(p_data);
 
       arrow::internal::BitmapReader data_reader(p_data, array->offset(), n);
@@ -661,7 +661,7 @@ struct Converter_Dictionary_Int32Indices {
       std::fill_n(data.begin() + start, n, NA_INTEGER);
     } else {
       std::shared_ptr<Array> indices = dict_array->indices();
-      auto p_array = GetValuesSafely<value_type>(indices->data(), 1, indices->offset());
+      auto p_array = indices->data()->GetValues<value_type>(1);
       STOP_IF_NULL(p_array);
 
       if (array->null_count()) {
@@ -692,7 +692,7 @@ struct Converter_Date64 {
     if (null_count == n) {
       std::fill_n(data.begin() + start, n, NA_REAL);
     } else {
-      auto p_values = GetValuesSafely<int64_t>(array->data(), 1, array->offset());
+      auto p_values = array->data()->GetValues<int64_t>(1);
       STOP_IF_NULL(p_values);
       auto p_vec = data.begin() + start;
 
@@ -726,7 +726,7 @@ struct Converter_Promotion {
     if (null_count == n) {
       std::fill_n(data.begin() + start, n, default_value<RTYPE>());
     } else {
-      auto p_values = GetValuesSafely<value_type>(array->data(), 1, array->offset());
+      auto p_values = array->data()->GetValues<value_type>(1);
       STOP_IF_NULL(p_values);
 
       auto value_convert = [](value_type value) {
@@ -766,7 +766,7 @@ struct Converter_Time {
     if (n == null_count) {
       std::fill_n(data.begin() + start, n, NA_REAL);
     } else {
-      auto p_values = GetValuesSafely<value_type>(array->data(), 1, array->offset());
+      auto p_values = array->data()->GetValues<value_type>(1);
       STOP_IF_NULL(p_values);
       auto p_vec = data.begin() + start;
       auto convert = [this](value_type value) {
@@ -803,7 +803,7 @@ struct Converter_Int64 {
     if (null_count == n) {
       std::fill_n(reinterpret_cast<int64_t*>(data.begin()) + start, n, NA_INT64);
     } else {
-      auto p_values = GetValuesSafely<int64_t>(array->data(), 1, array->offset());
+      auto p_values = array->data()->GetValues<int64_t>(1);
       STOP_IF_NULL(p_values);
       auto p_vec = reinterpret_cast<int64_t*>(data.begin()) + start;
 
diff --git a/r/src/arrow_types.h b/r/src/arrow_types.h
index 9ebc558..dba7a91 100644
--- a/r/src/arrow_types.h
+++ b/r/src/arrow_types.h
@@ -173,17 +173,6 @@ std::shared_ptr<arrow::RecordBatch> RecordBatch__from_dataframe(Rcpp::DataFrame
 namespace arrow {
 namespace r {
 
-template <typename T>
-inline const T* GetValuesSafely(const std::shared_ptr<ArrayData>& data, int i,
-                                int64_t offset) {
-  auto buffer = data->buffers[i];
-  if (!buffer) {
-    return nullptr;
-  } else {
-    return reinterpret_cast<const T*>(buffer->data()) + offset;
-  }
-}
-
 template <int RTYPE, typename Vec = Rcpp::Vector<RTYPE>>
 class RBuffer : public MutableBuffer {
  public: