You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by "pitrou (via GitHub)" <gi...@apache.org> on 2023/06/05 10:11:38 UTC

[GitHub] [arrow] pitrou commented on a diff in pull request #35750: GH-35749: [C++] Handle run-end encoded filters in compute kernels

pitrou commented on code in PR #35750:
URL: https://github.com/apache/arrow/pull/35750#discussion_r1217751388


##########
cpp/src/arrow/compute/kernels/ree_util_internal.h:
##########
@@ -83,6 +83,18 @@ class ReadWriteValue<ArrowType, in_has_validity_buffer, out_has_validity_buffer,
     return valid;
   }
 
+  /// Pre-conditions guaranteed by the callers:
+  /// - i and j are valid indices into the values buffer
+  /// - the values in i and j are valid
+  bool CompareValuesAt(int64_t i, int64_t j) const {

Review Comment:
   I don't understand what this is doing in REE utils? This is essentially representing value access in primitive arrays.



##########
cpp/src/arrow/compute/kernels/vector_selection_internal.cc:
##########
@@ -82,6 +85,89 @@ Status PreallocatePrimitiveArrayData(KernelContext* ctx, int64_t length, int bit
 
 namespace {
 
+/// \brief Iterate over a REE filter, emitting ranges of a plain values array that
+/// would pass the filter.
+///
+/// Differently from REExREE, and REExPlain filtering, PlainxREE filtering
+/// does not produce a REE output, but rather a plain output array. As such it's
+/// much simpler.
+///
+/// \param filter_may_have_nulls Only pass false if you know the filter has no nulls.
+template <typename FilterRunEndType>
+void VisitPlainxREEFilterOutputSegmentsImpl(
+    const ArraySpan& filter, bool filter_may_have_nulls,
+    FilterOptions::NullSelectionBehavior null_selection,
+    const EmitREEFilterSegment& emit_segment) {
+  using FilterRunEndCType = typename FilterRunEndType::c_type;
+  const ArraySpan& filter_values = arrow::ree_util::ValuesArray(filter);
+  const int64_t filter_values_offset = filter_values.offset;
+  const uint8_t* filter_is_valid = filter_values.buffers[0].data;
+  const uint8_t* filter_selection = filter_values.buffers[1].data;
+  filter_may_have_nulls = filter_may_have_nulls && filter_is_valid != NULLPTR &&

Review Comment:
   Nit: NULLPTR is only needed in .h files.
   ```suggestion
     filter_may_have_nulls = filter_may_have_nulls && filter_is_valid != nullptr &&
   ```



##########
cpp/src/arrow/compute/kernels/vector_selection_filter_internal.cc:
##########
@@ -161,39 +173,88 @@ class PrimitiveFilterImpl {
       out_is_valid_ = out_arr->buffers[0]->mutable_data();
     }
     out_data_ = reinterpret_cast<T*>(out_arr->buffers[1]->mutable_data());
-    out_offset_ = out_arr->offset;
     out_length_ = out_arr->length;
+    DCHECK_EQ(out_arr->offset, 0);

Review Comment:
   I'm curious, are we sure about this?



##########
cpp/src/arrow/compute/kernels/vector_selection_filter_internal.cc:
##########
@@ -84,13 +85,27 @@ int64_t GetBitmapFilterOutputSize(const ArraySpan& filter,
   return output_size;
 }
 
-// TODO(pr-35750): Handle run-end encoded filters in compute kernels
+int64_t GetREEFilterOutputSize(const ArraySpan& filter,
+                               FilterOptions::NullSelectionBehavior null_selection) {
+  const auto& ree_type = checked_cast<const RunEndEncodedType&>(*filter.type);
+  DCHECK_EQ(ree_type.value_type()->id(), Type::BOOL);
+  int64_t output_size = 0;
+  VisitPlainxREEFilterOutputSegments(
+      filter, true, null_selection,

Review Comment:
   ```suggestion
         filter, /*filter_may_have_nulls=*/ true, null_selection,
   ```



##########
cpp/src/arrow/compute/kernels/vector_selection_test.cc:
##########
@@ -55,6 +88,13 @@ TEST(GetTakeIndices, Basics) {
     auto indices_array = MakeArray(indices);
     ValidateOutput(indices);
     AssertArraysEqual(*expected_indices, *indices_array, /*verbose=*/true);
+
+    ASSERT_OK_AND_ASSIGN(auto ree_filter, REEncode(*filter));
+    ASSERT_OK_AND_ASSIGN(auto indices_from_ree,
+                         internal::GetTakeIndices(*ree_filter->data(), null_selection));
+    auto indices_from_ree_array = MakeArray(indices);
+    ValidateOutput(indices_from_ree);
+    AssertArraysEqual(*expected_indices, *indices_from_ree_array, /*verbose=*/true);

Review Comment:
   This is repeated in subsequent tests. Perhaps factor it out in a utility function or test fixture method?



##########
cpp/src/arrow/compute/kernels/vector_selection_filter_internal.cc:
##########
@@ -444,48 +503,64 @@ Status PrimitiveFilterExec(KernelContext* ctx, const ExecSpan& batch, ExecResult
 
 // Optimized binary filter for the case where neither values nor filter have
 // nulls
-template <typename Type>
+template <typename ArrowType>
 Status BinaryFilterNonNullImpl(KernelContext* ctx, const ArraySpan& values,
                                const ArraySpan& filter, int64_t output_length,
                                FilterOptions::NullSelectionBehavior null_selection,
                                ArrayData* out) {
-  using offset_type = typename Type::offset_type;
-  const auto filter_data = filter.buffers[1].data;
+  using offset_type = typename ArrowType::offset_type;
+  const bool is_ree_filter = filter.type->id() == Type::RUN_END_ENCODED;
 
   BINARY_FILTER_SETUP_COMMON();
 
-  RETURN_NOT_OK(arrow::internal::VisitSetBitRuns(
-      filter_data, filter.offset, filter.length, [&](int64_t position, int64_t length) {
-        // Bulk-append raw data
-        const offset_type run_data_bytes =
-            (raw_offsets[position + length] - raw_offsets[position]);
-        APPEND_RAW_DATA(raw_data + raw_offsets[position], run_data_bytes);
-        // Append offsets
-        offset_type cur_offset = raw_offsets[position];
-        for (int64_t i = 0; i < length; ++i) {
-          offset_builder.UnsafeAppend(offset);
-          offset += raw_offsets[i + position + 1] - cur_offset;
-          cur_offset = raw_offsets[i + position + 1];
-        }
-        return Status::OK();
-      }));
+  auto emit_segment = [&](int64_t position, int64_t length) {
+    // Bulk-append raw data
+    const offset_type run_data_bytes =
+        (raw_offsets[position + length] - raw_offsets[position]);
+    APPEND_RAW_DATA(raw_data + raw_offsets[position], run_data_bytes);
+    // Append offsets
+    offset_type cur_offset = raw_offsets[position];
+    for (int64_t i = 0; i < length; ++i) {
+      offset_builder.UnsafeAppend(offset);
+      offset += raw_offsets[i + position + 1] - cur_offset;
+      cur_offset = raw_offsets[i + position + 1];

Review Comment:
   Why not simply:
   ```suggestion
         offset += raw_offsets[i + position + 1] - raw_offsets[i + position];
   ```



##########
cpp/src/arrow/compute/kernels/vector_selection_internal.cc:
##########
@@ -82,6 +85,89 @@ Status PreallocatePrimitiveArrayData(KernelContext* ctx, int64_t length, int bit
 
 namespace {
 
+/// \brief Iterate over a REE filter, emitting ranges of a plain values array that
+/// would pass the filter.
+///
+/// Differently from REExREE, and REExPlain filtering, PlainxREE filtering
+/// does not produce a REE output, but rather a plain output array. As such it's
+/// much simpler.
+///
+/// \param filter_may_have_nulls Only pass false if you know the filter has no nulls.
+template <typename FilterRunEndType>
+void VisitPlainxREEFilterOutputSegmentsImpl(
+    const ArraySpan& filter, bool filter_may_have_nulls,
+    FilterOptions::NullSelectionBehavior null_selection,
+    const EmitREEFilterSegment& emit_segment) {
+  using FilterRunEndCType = typename FilterRunEndType::c_type;
+  const ArraySpan& filter_values = arrow::ree_util::ValuesArray(filter);
+  const int64_t filter_values_offset = filter_values.offset;
+  const uint8_t* filter_is_valid = filter_values.buffers[0].data;
+  const uint8_t* filter_selection = filter_values.buffers[1].data;
+  filter_may_have_nulls = filter_may_have_nulls && filter_is_valid != NULLPTR &&
+                          filter_values.null_count != 0;
+
+  const arrow::ree_util::RunEndEncodedArraySpan<FilterRunEndCType> filter_span(filter);
+  auto it = filter_span.begin();
+  if (filter_may_have_nulls) {
+    if (null_selection == FilterOptions::EMIT_NULL) {
+      while (!it.is_end(filter_span)) {

Review Comment:
   FTR, why didn't you respect the usual C++ conventions for iterators when defining the REE iteration facilities?



##########
cpp/src/arrow/compute/kernels/vector_selection_internal.cc:
##########
@@ -239,6 +309,43 @@ struct Selection {
       }
     };
 
+    if (is_ree_filter) {
+      Status status;

Review Comment:
   Why not let `emit_segment` return a `Status` instead?



##########
cpp/src/arrow/compute/kernels/vector_selection_filter_internal.cc:
##########
@@ -84,13 +85,27 @@ int64_t GetBitmapFilterOutputSize(const ArraySpan& filter,
   return output_size;
 }
 
-// TODO(pr-35750): Handle run-end encoded filters in compute kernels
+int64_t GetREEFilterOutputSize(const ArraySpan& filter,
+                               FilterOptions::NullSelectionBehavior null_selection) {
+  const auto& ree_type = checked_cast<const RunEndEncodedType&>(*filter.type);
+  DCHECK_EQ(ree_type.value_type()->id(), Type::BOOL);
+  int64_t output_size = 0;
+  VisitPlainxREEFilterOutputSegments(
+      filter, true, null_selection,
+      [&output_size](int64_t, int64_t segment_length, bool) {
+        output_size += segment_length;
+      });
+  return output_size;
+}
 
 }  // namespace
 
 int64_t GetFilterOutputSize(const ArraySpan& filter,
                             FilterOptions::NullSelectionBehavior null_selection) {
-  return GetBitmapFilterOutputSize(filter, null_selection);
+  if (filter.type->id() == Type::BOOL) {
+    return GetBitmapFilterOutputSize(filter, null_selection);
+  }

Review Comment:
   ```suggestion
     }
     DCHECK_EQ(filter.type->id(), Type::RUN_END_ENCODED);
   ```



##########
cpp/src/arrow/compute/kernels/vector_selection_internal.cc:
##########
@@ -82,6 +85,89 @@ Status PreallocatePrimitiveArrayData(KernelContext* ctx, int64_t length, int bit
 
 namespace {
 
+/// \brief Iterate over a REE filter, emitting ranges of a plain values array that
+/// would pass the filter.
+///
+/// Differently from REExREE, and REExPlain filtering, PlainxREE filtering
+/// does not produce a REE output, but rather a plain output array. As such it's
+/// much simpler.
+///
+/// \param filter_may_have_nulls Only pass false if you know the filter has no nulls.
+template <typename FilterRunEndType>
+void VisitPlainxREEFilterOutputSegmentsImpl(
+    const ArraySpan& filter, bool filter_may_have_nulls,
+    FilterOptions::NullSelectionBehavior null_selection,
+    const EmitREEFilterSegment& emit_segment) {
+  using FilterRunEndCType = typename FilterRunEndType::c_type;
+  const ArraySpan& filter_values = arrow::ree_util::ValuesArray(filter);
+  const int64_t filter_values_offset = filter_values.offset;
+  const uint8_t* filter_is_valid = filter_values.buffers[0].data;
+  const uint8_t* filter_selection = filter_values.buffers[1].data;
+  filter_may_have_nulls = filter_may_have_nulls && filter_is_valid != NULLPTR &&
+                          filter_values.null_count != 0;
+
+  const arrow::ree_util::RunEndEncodedArraySpan<FilterRunEndCType> filter_span(filter);
+  auto it = filter_span.begin();
+  if (filter_may_have_nulls) {
+    if (null_selection == FilterOptions::EMIT_NULL) {
+      while (!it.is_end(filter_span)) {
+        const int64_t i = filter_values_offset + it.index_into_array();
+        const bool valid = bit_util::GetBit(filter_is_valid, i);
+        const bool emit = !valid || bit_util::GetBit(filter_selection, i);
+        if (emit) {
+          emit_segment(it.logical_position(), it.run_length(), valid);

Review Comment:
   I'm not sure whether you tried to time these new kernels, but `emit_segment` being a `std::function` will come with its own overhead (I'm not sure by how much).
   
   Another approach would be to give `emit_segment` a batch of ranges:
   ```c++
   struct REEFilterSegment {
     int64_t position;
     int64_t segment_length;
     bool filter_valid; // false means emit none
   };
   
   using EmitREEFilterSegment =
       std::function<void(const REEFilterSegment*, int32_t num_segments)>;
   ```
   



##########
cpp/src/arrow/compute/kernels/vector_selection_filter_internal.cc:
##########
@@ -161,39 +173,88 @@ class PrimitiveFilterImpl {
       out_is_valid_ = out_arr->buffers[0]->mutable_data();
     }
     out_data_ = reinterpret_cast<T*>(out_arr->buffers[1]->mutable_data());
-    out_offset_ = out_arr->offset;
     out_length_ = out_arr->length;
+    DCHECK_EQ(out_arr->offset, 0);
     out_position_ = 0;
   }
 
-  void ExecNonNull() {
-    // Fast filter when values and filter are not null
-    ::arrow::internal::VisitSetBitRunsVoid(
-        filter_data_, filter_offset_, values_length_,
-        [&](int64_t position, int64_t length) { WriteValueSegment(position, length); });
+  void ExecREEFilter() {
+    if (filter_.child_data[1].null_count == 0 && values_null_count_ == 0) {

Review Comment:
   Note that `null_count` may be unknown (it's lazily computed), you should have called `GetNullCount` to make sure it's known. Perhaps in the constructor?



##########
cpp/src/arrow/compute/kernels/vector_selection_filter_internal.cc:
##########
@@ -161,39 +173,88 @@ class PrimitiveFilterImpl {
       out_is_valid_ = out_arr->buffers[0]->mutable_data();
     }
     out_data_ = reinterpret_cast<T*>(out_arr->buffers[1]->mutable_data());
-    out_offset_ = out_arr->offset;
     out_length_ = out_arr->length;
+    DCHECK_EQ(out_arr->offset, 0);
     out_position_ = 0;
   }
 
-  void ExecNonNull() {
-    // Fast filter when values and filter are not null
-    ::arrow::internal::VisitSetBitRunsVoid(
-        filter_data_, filter_offset_, values_length_,
-        [&](int64_t position, int64_t length) { WriteValueSegment(position, length); });
+  void ExecREEFilter() {
+    if (filter_.child_data[1].null_count == 0 && values_null_count_ == 0) {
+      DCHECK(!out_is_valid_);
+      // Fastest: no nulls in either filter or values
+      return VisitPlainxREEFilterOutputSegments(
+          filter_, /*filter_may_have_nulls=*/false, null_selection_,
+          [&](int64_t position, int64_t segment_length, bool filter_valid) {
+            // Fastest path: all values in range are included and not null
+            WriteValueSegment(position, segment_length);
+            DCHECK(filter_valid);
+          });
+    }
+    if (values_is_valid_ && out_is_valid_) {

Review Comment:
   Hmm... shouldn't the output always have a validity bitmap if the input has?
   ```suggestion
       if (values_is_valid_) {
         DCHECK(out_is_valid_);
   ```



##########
cpp/src/arrow/compute/kernels/vector_selection_filter_internal.cc:
##########
@@ -161,39 +173,88 @@ class PrimitiveFilterImpl {
       out_is_valid_ = out_arr->buffers[0]->mutable_data();
     }
     out_data_ = reinterpret_cast<T*>(out_arr->buffers[1]->mutable_data());
-    out_offset_ = out_arr->offset;
     out_length_ = out_arr->length;
+    DCHECK_EQ(out_arr->offset, 0);
     out_position_ = 0;
   }
 
-  void ExecNonNull() {
-    // Fast filter when values and filter are not null
-    ::arrow::internal::VisitSetBitRunsVoid(
-        filter_data_, filter_offset_, values_length_,
-        [&](int64_t position, int64_t length) { WriteValueSegment(position, length); });
+  void ExecREEFilter() {
+    if (filter_.child_data[1].null_count == 0 && values_null_count_ == 0) {
+      DCHECK(!out_is_valid_);
+      // Fastest: no nulls in either filter or values
+      return VisitPlainxREEFilterOutputSegments(
+          filter_, /*filter_may_have_nulls=*/false, null_selection_,
+          [&](int64_t position, int64_t segment_length, bool filter_valid) {
+            // Fastest path: all values in range are included and not null
+            WriteValueSegment(position, segment_length);
+            DCHECK(filter_valid);
+          });
+    }
+    if (values_is_valid_ && out_is_valid_) {
+      // Fast path: values can be null, so the validity bitmap should be copied

Review Comment:
   This is the slow path, rather, no? The path below will probably be faster, since it does not issue individual `CopyBitmap` calls.



##########
cpp/src/arrow/compute/kernels/vector_selection_filter_internal.cc:
##########
@@ -863,7 +976,13 @@ class FilterMetaFunction : public MetaFunction {
   Result<Datum> ExecuteImpl(const std::vector<Datum>& args,
                             const FunctionOptions* options,
                             ExecContext* ctx) const override {
-    if (args[1].type()->id() != Type::BOOL) {
+    auto& filter_type = *args[1].type();

Review Comment:
   Style nit
   ```suggestion
       const auto& filter_type = *args[1].type();
   ```



##########
cpp/src/arrow/compute/kernels/vector_selection_internal.cc:
##########
@@ -82,6 +85,89 @@ Status PreallocatePrimitiveArrayData(KernelContext* ctx, int64_t length, int bit
 
 namespace {
 
+/// \brief Iterate over a REE filter, emitting ranges of a plain values array that
+/// would pass the filter.
+///
+/// Differently from REExREE, and REExPlain filtering, PlainxREE filtering
+/// does not produce a REE output, but rather a plain output array. As such it's
+/// much simpler.
+///
+/// \param filter_may_have_nulls Only pass false if you know the filter has no nulls.
+template <typename FilterRunEndType>
+void VisitPlainxREEFilterOutputSegmentsImpl(
+    const ArraySpan& filter, bool filter_may_have_nulls,
+    FilterOptions::NullSelectionBehavior null_selection,
+    const EmitREEFilterSegment& emit_segment) {
+  using FilterRunEndCType = typename FilterRunEndType::c_type;
+  const ArraySpan& filter_values = arrow::ree_util::ValuesArray(filter);
+  const int64_t filter_values_offset = filter_values.offset;
+  const uint8_t* filter_is_valid = filter_values.buffers[0].data;
+  const uint8_t* filter_selection = filter_values.buffers[1].data;
+  filter_may_have_nulls = filter_may_have_nulls && filter_is_valid != NULLPTR &&
+                          filter_values.null_count != 0;
+
+  const arrow::ree_util::RunEndEncodedArraySpan<FilterRunEndCType> filter_span(filter);
+  auto it = filter_span.begin();
+  if (filter_may_have_nulls) {
+    if (null_selection == FilterOptions::EMIT_NULL) {
+      while (!it.is_end(filter_span)) {
+        const int64_t i = filter_values_offset + it.index_into_array();
+        const bool valid = bit_util::GetBit(filter_is_valid, i);
+        const bool emit = !valid || bit_util::GetBit(filter_selection, i);
+        if (emit) {
+          emit_segment(it.logical_position(), it.run_length(), valid);

Review Comment:
   Of course, _ideally_ a REE-encoded array has long enough runs to make REE encoding worthwhile...



##########
cpp/src/arrow/compute/kernels/vector_selection_take_internal.cc:
##########
@@ -196,14 +285,37 @@ Result<std::shared_ptr<ArrayData>> GetTakeIndicesFromBitmap(
   }
 }
 
-// TODO(pr-35750): Handle run-end encoded filters in compute kernels
+Result<std::shared_ptr<ArrayData>> GetTakeIndicesFromREEBitmap(
+    const ArraySpan& filter, FilterOptions::NullSelectionBehavior null_selection,
+    MemoryPool* memory_pool) {
+  auto& ree_type = checked_cast<const RunEndEncodedType&>(*filter.type);

Review Comment:
   Style nit
   ```suggestion
     const auto& ree_type = checked_cast<const RunEndEncodedType&>(*filter.type);
   ```



##########
cpp/src/arrow/compute/kernels/vector_selection_test.cc:
##########
@@ -78,63 +118,102 @@ TEST(GetTakeIndices, NullValidityBuffer) {
   ValidateOutput(indices);
   AssertArraysEqual(*expected_indices, *indices_array, /*verbose=*/true);
 
+  ASSERT_OK_AND_ASSIGN(auto ree_filter, REEncode(filter));
+  ASSERT_OK_AND_ASSIGN(
+      auto indices_from_ree,
+      internal::GetTakeIndices(*ree_filter->data(), FilterOptions::DROP));
+  auto indices_from_ree_array = MakeArray(indices);
+  ValidateOutput(indices_from_ree);
+  AssertArraysEqual(*expected_indices, *indices_from_ree_array, /*verbose=*/true);
+
   ASSERT_OK_AND_ASSIGN(
       indices, internal::GetTakeIndices(*filter.data(), FilterOptions::EMIT_NULL));
   indices_array = MakeArray(indices);
   ValidateOutput(indices);
   AssertArraysEqual(*expected_indices, *indices_array, /*verbose=*/true);
+
+  ASSERT_OK_AND_ASSIGN(
+      indices_from_ree,
+      internal::GetTakeIndices(*ree_filter->data(), FilterOptions::EMIT_NULL));
+  indices_from_ree_array = MakeArray(indices);
+  ValidateOutput(indices_from_ree);
+  AssertArraysEqual(*expected_indices, *indices_from_ree_array, /*verbose=*/true);
 }
 
 template <typename IndexArrayType>
 void CheckGetTakeIndicesCase(const Array& untyped_filter) {
   const auto& filter = checked_cast<const BooleanArray&>(untyped_filter);
+  ASSERT_OK_AND_ASSIGN(auto ree_filter, REEncode(*filter.data()));
+
   ASSERT_OK_AND_ASSIGN(std::shared_ptr<ArrayData> drop_indices,
                        internal::GetTakeIndices(*filter.data(), FilterOptions::DROP));
+  ASSERT_OK_AND_ASSIGN(
+      std::shared_ptr<ArrayData> drop_indices_from_ree,
+      internal::GetTakeIndices(*ree_filter->data(), FilterOptions::DROP));
   // Verify DROP indices
   {
     IndexArrayType indices(drop_indices);
+    IndexArrayType indices_from_ree(drop_indices);
     ValidateOutput(indices);
+    ValidateOutput(indices_from_ree);
 
     int64_t out_position = 0;
     for (int64_t i = 0; i < filter.length(); ++i) {
       if (filter.IsValid(i)) {
         if (filter.Value(i)) {
           ASSERT_EQ(indices.Value(out_position), i);
+          ASSERT_EQ(indices_from_ree.Value(out_position), i);
           ++out_position;
         }
       }
     }
     ASSERT_EQ(out_position, indices.length());
+    ASSERT_EQ(out_position, indices_from_ree.length());
+
     // Check that the end length agrees with the output of GetFilterOutputSize
     ASSERT_EQ(out_position,
               internal::GetFilterOutputSize(*filter.data(), FilterOptions::DROP));
+    ASSERT_EQ(out_position,
+              internal::GetFilterOutputSize(*ree_filter->data(), FilterOptions::DROP));
   }
 
   ASSERT_OK_AND_ASSIGN(
       std::shared_ptr<ArrayData> emit_indices,
       internal::GetTakeIndices(*filter.data(), FilterOptions::EMIT_NULL));
+  ASSERT_OK_AND_ASSIGN(
+      std::shared_ptr<ArrayData> emit_indices_from_ree,
+      internal::GetTakeIndices(*ree_filter->data(), FilterOptions::EMIT_NULL));
   // Verify EMIT_NULL indices
   {
     IndexArrayType indices(emit_indices);
+    IndexArrayType indices_from_ree(emit_indices);
     ValidateOutput(indices);
+    ValidateOutput(indices_from_ree);
 
     int64_t out_position = 0;
     for (int64_t i = 0; i < filter.length(); ++i) {
       if (filter.IsValid(i)) {
         if (filter.Value(i)) {
           ASSERT_EQ(indices.Value(out_position), i);
+          ASSERT_EQ(indices_from_ree.Value(out_position), i);
           ++out_position;
         }
       } else {
         ASSERT_TRUE(indices.IsNull(out_position));
+        ASSERT_TRUE(indices_from_ree.IsNull(out_position));
         ++out_position;
       }
     }
 
     ASSERT_EQ(out_position, indices.length());
+    ASSERT_EQ(out_position, indices_from_ree.length());
+
     // Check that the end length agrees with the output of GetFilterOutputSize
     ASSERT_EQ(out_position,
               internal::GetFilterOutputSize(*filter.data(), FilterOptions::EMIT_NULL));
+    ASSERT_OK_AND_ASSIGN(auto ree_filter, REEncode(*filter.data()));

Review Comment:
   Didn't we already create a `ree_filter` above? Or am I misreading?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org