You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by li...@apache.org on 2023/01/26 14:56:58 UTC

[arrow] branch master updated: GH-33872: [C++] Remove hacky shared_ptr construction in AppendScalar (#33866)

This is an automated email from the ASF dual-hosted git repository.

lidavidm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 17ea6fcb5a GH-33872: [C++] Remove hacky shared_ptr construction in AppendScalar (#33866)
17ea6fcb5a is described below

commit 17ea6fcb5a38c1cb13eb58a877ed75399ee326e7
Author: Felipe Oliveira Carvalho <fe...@gmail.com>
AuthorDate: Thu Jan 26 11:56:47 2023 -0300

    GH-33872: [C++] Remove hacky shared_ptr construction in AppendScalar (#33866)
    
    `AppendScalarImpl` was created for appending scalars based on a contiguous array of `std::shared_ptr<Scalar>`, so when a single `Scalar` reference is passed, a dummy `shared_ptr` is created before the instantiation of `AppendScalarImpl`.
    
    ```cpp
      std::shared_ptr<Scalar> shared{const_cast<Scalar*>(&scalar), [](Scalar*) {}};
      return AppendScalarImpl{&shared, &shared + 1, n_repeats, this}.Convert();
    ```
    
    This seems non-idiomatic [1] and likely inefficient as a control block needs to be heap-allocated for the `shared_ptr`. An alternative way (that simplifies `AppendScalarImpl` as well) is a custom iterator class the wraps the source iterator with an extra dereference operation.
    
    [1] Pointers to `shared_ptr` as class members set a bad-style precedent
    
    ### Component(s)
    
    C++
    
    * Closes: #33872
    
    Authored-by: Felipe Oliveira Carvalho <fe...@gmail.com>
    Signed-off-by: David Li <li...@gmail.com>
---
 cpp/src/arrow/array/builder_base.cc | 91 +++++++++++++++++++++++--------------
 1 file changed, 57 insertions(+), 34 deletions(-)

diff --git a/cpp/src/arrow/array/builder_base.cc b/cpp/src/arrow/array/builder_base.cc
index e9d5fb44ac..34585ce3c5 100644
--- a/cpp/src/arrow/array/builder_base.cc
+++ b/cpp/src/arrow/array/builder_base.cc
@@ -101,6 +101,7 @@ Status ArrayBuilder::Advance(int64_t elements) {
 
 namespace {
 
+template <typename ConstIterator>
 struct AppendScalarImpl {
   template <typename T>
   enable_if_t<has_c_type<T>::value || is_decimal_type<T>::value ||
@@ -111,11 +112,10 @@ struct AppendScalarImpl {
     RETURN_NOT_OK(builder->Reserve(n_repeats_ * (scalars_end_ - scalars_begin_)));
 
     for (int64_t i = 0; i < n_repeats_; i++) {
-      for (const std::shared_ptr<Scalar>* raw = scalars_begin_; raw != scalars_end_;
-           raw++) {
-        auto scalar = checked_cast<const typename TypeTraits<T>::ScalarType*>(raw->get());
-        if (scalar->is_valid) {
-          builder->UnsafeAppend(scalar->value);
+      for (auto it = scalars_begin_; it != scalars_end_; ++it) {
+        const auto& scalar = checked_cast<const typename TypeTraits<T>::ScalarType&>(*it);
+        if (scalar.is_valid) {
+          builder->UnsafeAppend(scalar.value);
         } else {
           builder->UnsafeAppendNull();
         }
@@ -127,11 +127,10 @@ struct AppendScalarImpl {
   template <typename T>
   enable_if_base_binary<T, Status> Visit(const T&) {
     int64_t data_size = 0;
-    for (const std::shared_ptr<Scalar>* raw = scalars_begin_; raw != scalars_end_;
-         raw++) {
-      auto scalar = checked_cast<const typename TypeTraits<T>::ScalarType*>(raw->get());
-      if (scalar->is_valid) {
-        data_size += scalar->value->size();
+    for (auto it = scalars_begin_; it != scalars_end_; ++it) {
+      const auto& scalar = checked_cast<const typename TypeTraits<T>::ScalarType&>(*it);
+      if (scalar.is_valid) {
+        data_size += scalar.value->size();
       }
     }
 
@@ -140,11 +139,10 @@ struct AppendScalarImpl {
     RETURN_NOT_OK(builder->ReserveData(n_repeats_ * data_size));
 
     for (int64_t i = 0; i < n_repeats_; i++) {
-      for (const std::shared_ptr<Scalar>* raw = scalars_begin_; raw != scalars_end_;
-           raw++) {
-        auto scalar = checked_cast<const typename TypeTraits<T>::ScalarType*>(raw->get());
-        if (scalar->is_valid) {
-          builder->UnsafeAppend(std::string_view{*scalar->value});
+      for (auto it = scalars_begin_; it != scalars_end_; ++it) {
+        const auto& scalar = checked_cast<const typename TypeTraits<T>::ScalarType&>(*it);
+        if (scalar.is_valid) {
+          builder->UnsafeAppend(std::string_view{*scalar.value});
         } else {
           builder->UnsafeAppendNull();
         }
@@ -157,19 +155,17 @@ struct AppendScalarImpl {
   enable_if_list_like<T, Status> Visit(const T&) {
     auto builder = checked_cast<typename TypeTraits<T>::BuilderType*>(builder_);
     int64_t num_children = 0;
-    for (const std::shared_ptr<Scalar>* scalar = scalars_begin_; scalar != scalars_end_;
-         scalar++) {
-      if (!(*scalar)->is_valid) continue;
-      num_children += checked_cast<const BaseListScalar&>(**scalar).value->length();
+    for (auto it = scalars_begin_; it != scalars_end_; ++it) {
+      if (!it->is_valid) continue;
+      num_children += checked_cast<const BaseListScalar&>(*it).value->length();
     }
     RETURN_NOT_OK(builder->value_builder()->Reserve(num_children * n_repeats_));
 
     for (int64_t i = 0; i < n_repeats_; i++) {
-      for (const std::shared_ptr<Scalar>* scalar = scalars_begin_; scalar != scalars_end_;
-           scalar++) {
-        if ((*scalar)->is_valid) {
+      for (auto it = scalars_begin_; it != scalars_end_; ++it) {
+        if (it->is_valid) {
           RETURN_NOT_OK(builder->Append());
-          const Array& list = *checked_cast<const BaseListScalar&>(**scalar).value;
+          const Array& list = *checked_cast<const BaseListScalar&>(*it).value;
           for (int64_t i = 0; i < list.length(); i++) {
             ARROW_ASSIGN_OR_RAISE(auto scalar, list.GetScalar(i));
             RETURN_NOT_OK(builder->value_builder()->AppendScalar(*scalar));
@@ -190,8 +186,8 @@ struct AppendScalarImpl {
       RETURN_NOT_OK(builder->field_builder(field_index)->Reserve(count));
     }
     for (int64_t i = 0; i < n_repeats_; i++) {
-      for (const std::shared_ptr<Scalar>* s = scalars_begin_; s != scalars_end_; s++) {
-        const auto& scalar = checked_cast<const StructScalar&>(**s);
+      for (auto it = scalars_begin_; it != scalars_end_; ++it) {
+        const auto& scalar = checked_cast<const StructScalar&>(*it);
         for (int field_index = 0; field_index < type.num_fields(); ++field_index) {
           if (!scalar.is_valid || !scalar.value[field_index]) {
             RETURN_NOT_OK(builder->field_builder(field_index)->AppendNull());
@@ -268,8 +264,8 @@ struct AppendScalarImpl {
     }
 
     for (int64_t i = 0; i < n_repeats_; i++) {
-      for (const std::shared_ptr<Scalar>* s = scalars_begin_; s != scalars_end_; s++) {
-        RETURN_NOT_OK(AppendUnionScalar(type, **s, builder));
+      for (auto it = scalars_begin_; it != scalars_end_; ++it) {
+        RETURN_NOT_OK(AppendUnionScalar(type, *it, builder));
       }
     }
     return Status::OK();
@@ -279,14 +275,40 @@ struct AppendScalarImpl {
     return Status::NotImplemented("AppendScalar for type ", type);
   }
 
-  Status Convert() { return VisitTypeInline(*(*scalars_begin_)->type, this); }
+  Status Convert() { return VisitTypeInline(*scalars_begin_->type, this); }
 
-  const std::shared_ptr<Scalar>* scalars_begin_;
-  const std::shared_ptr<Scalar>* scalars_end_;
+  ConstIterator scalars_begin_;
+  ConstIterator scalars_end_;
   int64_t n_repeats_;
   ArrayBuilder* builder_;
 };
 
+// Wraps a const_iterator that has a pointer (or pointer-like) to Scalar as the
+// value_type and turns it into an iterator with Scalar as value_type.
+template <typename ConstIterator>
+struct DerefConstIterator {
+  ConstIterator it;
+
+  using value_type = Scalar;
+  using pointer = const Scalar*;
+  using difference_type = typename ConstIterator::difference_type;
+
+  const value_type& operator*() const { return *(*it); }
+
+  DerefConstIterator& operator++() {
+    ++it;
+    return *this;
+  }
+
+  difference_type operator-(const DerefConstIterator& other) const {
+    return it - other.it;
+  }
+
+  bool operator!=(const DerefConstIterator& other) const { return it != other.it; }
+
+  pointer operator->() const { return &(**it); }
+};
+
 }  // namespace
 
 Status ArrayBuilder::AppendScalar(const Scalar& scalar, int64_t n_repeats) {
@@ -294,8 +316,7 @@ Status ArrayBuilder::AppendScalar(const Scalar& scalar, int64_t n_repeats) {
     return Status::Invalid("Cannot append scalar of type ", scalar.type->ToString(),
                            " to builder for type ", type()->ToString());
   }
-  std::shared_ptr<Scalar> shared{const_cast<Scalar*>(&scalar), [](Scalar*) {}};
-  return AppendScalarImpl{&shared, &shared + 1, n_repeats, this}.Convert();
+  return AppendScalarImpl<const Scalar*>{&scalar, &scalar + 1, n_repeats, this}.Convert();
 }
 
 Status ArrayBuilder::AppendScalars(const ScalarVector& scalars) {
@@ -307,8 +328,10 @@ Status ArrayBuilder::AppendScalars(const ScalarVector& scalars) {
                              " to builder for type ", type()->ToString());
     }
   }
-  return AppendScalarImpl{scalars.data(), scalars.data() + scalars.size(),
-                          /*n_repeats=*/1, this}
+
+  using Iterator = DerefConstIterator<ScalarVector::const_iterator>;
+  return AppendScalarImpl<Iterator>{Iterator{scalars.begin()}, Iterator{scalars.end()},
+                                    /*n_repeats=*/1, this}
       .Convert();
 }