You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by "bkietz (via GitHub)" <gi...@apache.org> on 2023/07/07 18:45:02 UTC

[GitHub] [arrow] bkietz commented on a diff in pull request #36521: GH-36502: [C++] Add run-end encoded array support to ReferencedByteRanges

bkietz commented on code in PR #36521:
URL: https://github.com/apache/arrow/pull/36521#discussion_r1256282979


##########
cpp/src/arrow/util/ree_util.h:
##########
@@ -73,24 +73,39 @@ int64_t FindPhysicalIndex(const RunEndCType* run_ends, int64_t run_ends_size, in
   return result;
 }
 
-/// \brief Uses binary-search to calculate the number of physical values (and
+/// \brief Uses binary-search to calculate the range of physical values (and
 /// run-ends) necessary to represent the logical range of values from
 /// offset to length
+///
+/// \return a pair of physical offset and physical length
 template <typename RunEndCType>
-int64_t FindPhysicalLength(const RunEndCType* run_ends, int64_t run_ends_size,
-                           int64_t length, int64_t offset) {
+std::pair<int64_t, int64_t> FindPhysicalRange(const RunEndCType* run_ends,
+                                              int64_t run_ends_size, int64_t length,
+                                              int64_t offset) {
+  const int64_t physical_offset =
+      FindPhysicalIndex<RunEndCType>(run_ends, run_ends_size, 0, offset);
   // The physical length is calculated by finding the offset of the last element
   // and adding 1 to it, so first we ensure there is at least one element.
   if (length == 0) {
-    return 0;
+    return {physical_offset, 0};
   }
-  const int64_t physical_offset =
-      FindPhysicalIndex<RunEndCType>(run_ends, run_ends_size, 0, offset);
   const int64_t physical_index_of_last = FindPhysicalIndex<RunEndCType>(
       run_ends + physical_offset, run_ends_size - physical_offset, length - 1, offset);
 
   assert(physical_index_of_last < run_ends_size - physical_offset);
-  return physical_index_of_last + 1;
+  return {physical_offset, physical_index_of_last + 1};
+}
+
+/// \brief Uses binary-search to calculate the number of physical values (and
+/// run-ends) necessary to represent the logical range of values from
+/// offset to length
+template <typename RunEndCType>
+int64_t FindPhysicalLength(const RunEndCType* run_ends, int64_t run_ends_size,
+                           int64_t length, int64_t offset) {
+  int64_t physical_length;
+  std::tie(std::ignore, physical_length) =
+      FindPhysicalRange<RunEndCType>(run_ends, run_ends_size, length, offset);
+  return physical_length;

Review Comment:
   Nit:
   ```suggestion
     auto [_, physical_length] =
         FindPhysicalRange<RunEndCType>(run_ends, run_ends_size, length, offset);
     return physical_length;
   ```



##########
cpp/src/arrow/util/byte_size.cc:
##########
@@ -294,6 +295,21 @@ struct GetByteRangesArray {
     return Status::OK();
   }
 
+  Status Visit(const RunEndEncodedType& type) const {
+    auto physical_range = ree_util::FindPhysicalRange(input, offset, length);
+    for (int i = 0; i < type.num_fields(); i++) {
+      GetByteRangesArray child{
+          *input.child_data[i],
+          /*offset=*/input.child_data[i]->offset + physical_range.first,
+          /*length=*/physical_range.second,
+          range_starts,
+          range_offsets,
+          range_lengths};

Review Comment:
   ```suggestion
       auto [phys_offset, phys_length] = ree_util::FindPhysicalRange(input, offset, length);
       for (int i = 0; i < type.num_fields(); i++) {
         GetByteRangesArray child{
             *input.child_data[i],
             /*offset=*/input.child_data[i]->offset + phys_offset,
             /*length=*/phys_length,
             range_starts,
             range_offsets,
             range_lengths};
   ```



##########
cpp/src/arrow/util/ree_util.cc:
##########
@@ -85,6 +85,26 @@ int64_t FindPhysicalLength(const ArraySpan& span) {
   return internal::FindPhysicalLength<int64_t>(span);
 }
 
+std::pair<int64_t, int64_t> FindPhysicalRange(const ArraySpan& span, int64_t offset,
+                                              int64_t length) {
+  auto& run_ends_span = RunEndsArray(span);

Review Comment:
   Nit
   ```suggestion
     const auto& run_ends_span = RunEndsArray(span);
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org