You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2022/04/29 19:30:06 UTC

[GitHub] [arrow] AlvinJ15 commented on a diff in pull request #13009: ARROW-602: [C++] Provide iterator access to primitive elements inside an Array

AlvinJ15 commented on code in PR #13009:
URL: https://github.com/apache/arrow/pull/13009#discussion_r862089851


##########
cpp/src/arrow/stl_iterator.h:
##########
@@ -128,6 +131,148 @@ class ArrayIterator {
   int64_t index_;
 };
 
+template <typename ArrayType,
+          typename ValueAccessor = detail::DefaultValueAccessor<ArrayType>>
+class ChunkedArrayIterator {
+ public:
+  using value_type = arrow::util::optional<typename ValueAccessor::ValueType>;
+  using difference_type = int64_t;
+  using pointer = value_type*;
+  using reference = value_type&;
+  using iterator_category = std::random_access_iterator_tag;
+
+  // Some algorithms need to default-construct an iterator
+  ChunkedArrayIterator() : chunked_array_(NULLPTR), index_(0), current_chunk_index_(0) {}
+
+  explicit ChunkedArrayIterator(const ChunkedArray& chunked_array, int64_t index = 0)
+      : chunked_array_(&chunked_array), index_(index) {
+    auto chunk_location = GetChunkLocation(this->index_);
+    current_array_iterator_ =
+        ArrayIterator<ArrayType>(*arrow::internal::checked_pointer_cast<ArrayType>(
+            chunked_array_->chunk(chunk_location.chunk_index)), index_);
+    this->current_chunk_index_ = chunk_location.chunk_index;
+    current_array_iterator_ -=
+        this->index() - chunk_location.index_in_chunk;
+  }
+
+  // Value access
+  value_type operator*() const { return *current_array_iterator_; }
+
+  value_type operator[](difference_type n) const {
+    auto chunk_location = GetChunkLocation(index_ + n);
+    if (current_chunk_index_ == chunk_location.chunk_index) {
+      return current_array_iterator_[chunk_location.index_in_chunk -
+                                     current_array_iterator_.index()];
+    } else {
+      ArrayIterator<ArrayType> target_iterator{
+          *arrow::internal::checked_pointer_cast<ArrayType>(
+              chunked_array_->chunk(chunk_location.chunk_index))};
+      return target_iterator[chunk_location.index_in_chunk];
+    }
+  }
+
+  int64_t index() const { return index_; }
+
+  // Forward / backward
+  ChunkedArrayIterator& operator++() {
+    (*this) += 1;
+    return *this;
+  }
+  ChunkedArrayIterator& operator--() {
+    (*this) -= 1;
+    return *this;
+  }
+
+  ChunkedArrayIterator operator++(int) {
+    ChunkedArrayIterator tmp(*this);
+    ++*this;
+    return tmp;
+  }
+  ChunkedArrayIterator operator--(int) {
+    ChunkedArrayIterator tmp(*this);
+    --*this;
+    return tmp;
+  }
+
+  // Arithmetic
+  difference_type operator-(const ChunkedArrayIterator& other) const {
+    return index_ - other.index_;
+  }
+  ChunkedArrayIterator operator+(difference_type n) const {
+    return ChunkedArrayIterator(*chunked_array_, index_ + n);
+  }
+  ChunkedArrayIterator operator-(difference_type n) const {
+    return ChunkedArrayIterator(*chunked_array_, index_ - n);
+  }
+  friend inline ChunkedArrayIterator operator+(difference_type diff,
+                                               const ChunkedArrayIterator& other) {
+    return ChunkedArrayIterator(*other.chunked_array_, diff + other.index_);
+  }
+  friend inline ChunkedArrayIterator operator-(difference_type diff,
+                                               const ChunkedArrayIterator& other) {
+    return ChunkedArrayIterator(*other.chunked_array_, diff - other.index_);
+  }
+  ChunkedArrayIterator& operator+=(difference_type n) {
+    index_ += n;
+    auto chunk_location = GetChunkLocation(index_);
+    if (current_chunk_index_ == chunk_location.chunk_index) {
+      current_array_iterator_ -=
+          current_array_iterator_.index() - chunk_location.index_in_chunk;
+    } else {
+      current_array_iterator_ =
+          ArrayIterator<ArrayType>(*arrow::internal::checked_pointer_cast<ArrayType>(
+                                       chunked_array_->chunk(chunk_location.chunk_index)),
+                                   chunk_location.index_in_chunk);
+      current_chunk_index_ = chunk_location.chunk_index;
+    }
+    return *this;
+  }
+  ChunkedArrayIterator& operator-=(difference_type n) {
+    (*this) += -n;
+    return *this;
+  }
+
+  // Comparisons
+  bool operator==(const ChunkedArrayIterator& other) const {
+    return index_ == other.index_;
+  }
+  bool operator!=(const ChunkedArrayIterator& other) const {
+    return index_ != other.index_;
+  }
+  bool operator<(const ChunkedArrayIterator& other) const {
+    return index_ < other.index_;
+  }
+  bool operator>(const ChunkedArrayIterator& other) const {
+    return index_ > other.index_;
+  }
+  bool operator<=(const ChunkedArrayIterator& other) const {
+    return index_ <= other.index_;
+  }
+  bool operator>=(const ChunkedArrayIterator& other) const {
+    return index_ >= other.index_;
+  }
+
+ private:
+  arrow::internal::ChunkLocation GetChunkLocation(int64_t index) const {
+    return chunked_array_->chunk_resolver_.Resolve(index);
+  }
+
+  const ChunkedArray* chunked_array_;
+  int64_t index_;
+  int64_t current_chunk_index_;
+  ArrayIterator<ArrayType> current_array_iterator_;
+};
+
+template <typename Type, typename ArrayType = typename TypeTraits<Type>::ArrayType>
+ArrayIterator<ArrayType> Iterate(const Array& array) {
+  return stl::ArrayIterator<ArrayType>(&array);
+}
+
+template <typename Type, typename ArrayType = typename TypeTraits<Type>::ArrayType>
+ChunkedArrayIterator<ArrayType> Iterate(const ChunkedArray& chunked_array) {
+  return stl::ChunkedArrayIterator<ArrayType>(chunked_array);
+}

Review Comment:
   I could add a condition for validate that and raise properly an error message, but I don't know if this is a good solution for that problem.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org