You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2021/06/15 18:31:08 UTC

[GitHub] [arrow] bkietz commented on a change in pull request #10487: ARROW-13010: [C++][Compute] Support outputting to slices from kleene kernels

bkietz commented on a change in pull request #10487:
URL: https://github.com/apache/arrow/pull/10487#discussion_r652003314



##########
File path: cpp/src/arrow/util/bit_util.cc
##########
@@ -67,5 +69,58 @@ void SetBitsTo(uint8_t* bits, int64_t start_offset, int64_t length, bool bits_ar
   bits[bytes_end - 1] |= static_cast<uint8_t>(fill_byte & ~last_byte_mask);
 }
 
+template <bool value>
+void SetBitmapImpl(uint8_t* data, int64_t offset, int64_t length) {
+  //                 offset  length
+  // data              |<------------->|
+  //   |--------|...|--------|...|--------|
+  //                   |<--->|   |<--->|
+  //                     pro       epi
+  if (ARROW_PREDICT_FALSE(length == 0)) {
+    return;
+  }
+
+  constexpr uint8_t set_byte = value ? UINT8_MAX : 0;
+
+  int prologue = static_cast<int>(((offset + 7) / 8) * 8 - offset);
+  DCHECK_LT(prologue, 8);
+
+  if (length < prologue) {  // special case where a mask is required
+    //             offset length
+    // data             |<->|
+    //   |--------|...|--------|...
+    //         mask --> |111|
+    //                  |<---->|
+    //                     pro
+    uint8_t mask = BitUtil::kPrecedingBitmask[8 - prologue] ^
+                   BitUtil::kPrecedingBitmask[8 - prologue + length];
+    data[offset / 8] |= mask;
+    return;
+  }
+
+  // align to a byte boundary
+  data[offset / 8] = BitUtil::SpliceWord(offset, data[offset / 8], set_byte);

Review comment:
       ```suggestion
     data[offset / 8] = BitUtil::SpliceWord(prologue, data[offset / 8], set_byte);
   ```

##########
File path: cpp/src/arrow/util/bit_util.cc
##########
@@ -67,5 +69,58 @@ void SetBitsTo(uint8_t* bits, int64_t start_offset, int64_t length, bool bits_ar
   bits[bytes_end - 1] |= static_cast<uint8_t>(fill_byte & ~last_byte_mask);
 }
 
+template <bool value>
+void SetBitmapImpl(uint8_t* data, int64_t offset, int64_t length) {
+  //                 offset  length
+  // data              |<------------->|
+  //   |--------|...|--------|...|--------|
+  //                   |<--->|   |<--->|
+  //                     pro       epi
+  if (ARROW_PREDICT_FALSE(length == 0)) {
+    return;
+  }
+
+  constexpr uint8_t set_byte = value ? UINT8_MAX : 0;
+
+  int prologue = static_cast<int>(((offset + 7) / 8) * 8 - offset);

Review comment:
       ```suggestion
     auto prologue = BitUtil::RoundUp(offset, 8) - offset;
   ```

##########
File path: cpp/src/arrow/util/bitmap.h
##########
@@ -73,6 +76,11 @@ class ARROW_EXPORT Bitmap : public util::ToStringOstreamable<Bitmap>,
     return Bitmap(buffer_, offset_ + offset, length);
   }
 
+  void Stride(int64_t stride) {
+    this->offset_ += stride;
+    this->length_ -= stride;
+  }
+

Review comment:
       ```suggestion
   ```
   
   This seems to be unused

##########
File path: cpp/src/arrow/util/bit_util_test.cc
##########
@@ -2156,5 +2187,169 @@ TEST(Bitmap, VisitWordsAnd) {
   }
 }
 
+void random_bool_vector(std::vector<bool>& vec, int64_t size, double p = 0.5) {
+  vec.reserve(size);
+  std::random_device rd;
+  std::mt19937 gen(rd());
+  std::bernoulli_distribution d(p);
+
+  for (int n = 0; n < size; ++n) {
+    vec.push_back(d(gen));
+  }
+}
+
+std::string VectorToString(const std::vector<bool>& v) {
+  std::string out(v.size() + +((v.size() - 1) / 8), ' ');
+  for (size_t i = 0; i < v.size(); ++i) {
+    out[i + (i / 8)] = v[i] ? '1' : '0';
+  }
+  return out;
+}
+
+void VerifyBoolVectorAndBitmap(const Bitmap& bitmap, const std::vector<bool>& expected) {
+  arrow::BooleanBuilder boolean_builder;
+  ASSERT_OK(boolean_builder.AppendValues(expected));
+  ASSERT_OK_AND_ASSIGN(auto arr, boolean_builder.Finish());
+
+  ASSERT_TRUE(BitmapEquals(bitmap.buffer()->data(), bitmap.offset(),
+                           arr->data()->buffers[1]->data(), 0, expected.size()))
+      << "exp: " << VectorToString(expected) << "\ngot: " << bitmap.ToString();
+}
+
+class TestBitmapVisitAndWriteOutputNoOffset : public ::testing::TestWithParam<int32_t> {};
+
+TEST_P(TestBitmapVisitAndWriteOutputNoOffset, Test1) {
+  auto part = GetParam();
+  int64_t bits = 4 * part;
+  std::vector<bool> data;
+  random_bool_vector(data, bits);
+
+  arrow::BooleanBuilder boolean_builder;
+  ASSERT_OK(boolean_builder.AppendValues(data));
+  ASSERT_OK_AND_ASSIGN(auto arrow_data, boolean_builder.Finish());
+
+  std::shared_ptr<Buffer>& arrow_buffer = arrow_data->data()->buffers[1];
+
+  Bitmap bm0(arrow_buffer, 0, part);
+  Bitmap bm1 = bm0.Slice(part * 1, part);  // this goes beyond bm0's len
+  Bitmap bm2 = bm0.Slice(part * 2, part);  // this goes beyond bm0's len
+
+  std::array<Bitmap, 2> out_bms;
+  ASSERT_OK_AND_ASSIGN(auto out0, AllocateBitmap(part));
+  ASSERT_OK_AND_ASSIGN(auto out1, AllocateBitmap(part));
+  out_bms[0] = Bitmap(out0, 0, part);
+  out_bms[1] = Bitmap(out1, 0, part);
+
+  std::vector<bool> v0(data.begin(), data.begin() + part);
+  std::vector<bool> v1(data.begin() + part * 1, data.begin() + part * 2);
+  std::vector<bool> v2(data.begin() + part * 2, data.begin() + part * 3);
+
+  // out0 = bm0 & bm1, out1= bm0 | bm2
+  std::array<Bitmap, 3> in_bms{bm0, bm1, bm2};
+  Bitmap::VisitWordsAndWrite(
+      in_bms, &out_bms,
+      [](const std::array<uint64_t, 3>& in, std::array<uint64_t, 2>* out) {
+        out->at(0) = in[0] & in[1];
+        out->at(1) = in[0] | in[2];
+      });
+
+  std::vector<bool> out_v0(part);
+  std::vector<bool> out_v1(part);
+  // v3 = v0 & v1
+  std::transform(v0.begin(), v0.end(), v1.begin(), out_v0.begin(),
+                 std::logical_and<bool>());
+  // v3 |= v2
+  std::transform(v0.begin(), v0.end(), v2.begin(), out_v1.begin(),
+                 std::logical_or<bool>());
+
+  //  std::cout << "v0: " << VectorToString(v0) << "\n"
+  //            << "b0: " << bm0.ToString() << "\n"
+  //            << "v1: " << VectorToString(v1) << "\n"
+  //            << "b1: " << bm1.ToString() << "\n"
+  //            << "v2: " << VectorToString(v2) << "\n"
+  //            << "b2: " << bm2.ToString() << "\n";
+

Review comment:
       ```suggestion
   ```
   Alternatively, you could attach this output to an assertion so it only shows if a test fails

##########
File path: cpp/src/arrow/util/bit_util_test.cc
##########
@@ -2156,5 +2187,169 @@ TEST(Bitmap, VisitWordsAnd) {
   }
 }
 
+void random_bool_vector(std::vector<bool>& vec, int64_t size, double p = 0.5) {
+  vec.reserve(size);
+  std::random_device rd;
+  std::mt19937 gen(rd());
+  std::bernoulli_distribution d(p);
+
+  for (int n = 0; n < size; ++n) {
+    vec.push_back(d(gen));
+  }
+}
+
+std::string VectorToString(const std::vector<bool>& v) {
+  std::string out(v.size() + +((v.size() - 1) / 8), ' ');
+  for (size_t i = 0; i < v.size(); ++i) {
+    out[i + (i / 8)] = v[i] ? '1' : '0';
+  }
+  return out;
+}
+
+void VerifyBoolVectorAndBitmap(const Bitmap& bitmap, const std::vector<bool>& expected) {
+  arrow::BooleanBuilder boolean_builder;
+  ASSERT_OK(boolean_builder.AppendValues(expected));
+  ASSERT_OK_AND_ASSIGN(auto arr, boolean_builder.Finish());
+
+  ASSERT_TRUE(BitmapEquals(bitmap.buffer()->data(), bitmap.offset(),
+                           arr->data()->buffers[1]->data(), 0, expected.size()))
+      << "exp: " << VectorToString(expected) << "\ngot: " << bitmap.ToString();
+}
+
+class TestBitmapVisitAndWriteOutputNoOffset : public ::testing::TestWithParam<int32_t> {};
+
+TEST_P(TestBitmapVisitAndWriteOutputNoOffset, Test1) {
+  auto part = GetParam();
+  int64_t bits = 4 * part;
+  std::vector<bool> data;
+  random_bool_vector(data, bits);

Review comment:
       It's worth noting that we already have some utilities for generating random data. For example, we could reuse `RandomArrayGenerator`
   
   ```c++
     random::RandomArrayGenerator rng(42);
     std::shared_ptr<Buffer> buffer = rng.NullBitmap(bits, 0.5);
   ```

##########
File path: cpp/src/arrow/util/bit_util.cc
##########
@@ -67,5 +69,58 @@ void SetBitsTo(uint8_t* bits, int64_t start_offset, int64_t length, bool bits_ar
   bits[bytes_end - 1] |= static_cast<uint8_t>(fill_byte & ~last_byte_mask);
 }
 
+template <bool value>
+void SetBitmapImpl(uint8_t* data, int64_t offset, int64_t length) {
+  //                 offset  length
+  // data              |<------------->|
+  //   |--------|...|--------|...|--------|
+  //                   |<--->|   |<--->|
+  //                     pro       epi
+  if (ARROW_PREDICT_FALSE(length == 0)) {
+    return;
+  }
+
+  constexpr uint8_t set_byte = value ? UINT8_MAX : 0;
+
+  int prologue = static_cast<int>(((offset + 7) / 8) * 8 - offset);
+  DCHECK_LT(prologue, 8);
+
+  if (length < prologue) {  // special case where a mask is required
+    //             offset length
+    // data             |<->|
+    //   |--------|...|--------|...
+    //         mask --> |111|
+    //                  |<---->|
+    //                     pro
+    uint8_t mask = BitUtil::kPrecedingBitmask[8 - prologue] ^
+                   BitUtil::kPrecedingBitmask[8 - prologue + length];
+    data[offset / 8] |= mask;

Review comment:
       This seems to only be correct for `set_byte == 0xFF`, since we always set the masked bits. Additionally I think it could be written more clearly:
   ```suggestion
       uint8_t old_bits = BitUtil::LeadingBits(data[offset / 8], offset) |
                          BitUtil::TrailingBits(data[offset / 8], length + offset);
       uint8_t new_bits = BitUtil::TrailingBits(set_byte, offset) &
                          BitUtil::LeadingBits(set_byte, length + offset);
       data[offset / 8] = old_bits | new_bits;
   ```

##########
File path: cpp/src/arrow/util/bit_util.h
##########
@@ -316,5 +316,29 @@ static inline void SetBitTo(uint8_t* bits, int64_t i, bool bit_is_set) {
 ARROW_EXPORT
 void SetBitsTo(uint8_t* bits, int64_t start_offset, int64_t length, bool bits_are_set);
 
+/// \brief Sets all bits in the bitmap to true
+ARROW_EXPORT
+void SetBitmap(uint8_t* data, int64_t offset, int64_t length);
+
+/// \brief Clears all bits in the bitmap (set to false)
+ARROW_EXPORT
+void ClearBitmap(uint8_t* data, int64_t offset, int64_t length);
+
+template <typename Word>
+constexpr Word WordBitMask(int i) {
+  return (static_cast<Word>(1) << i) - 1;
+}

Review comment:
       This incurs undefined behavior for `i >= sizeof(Word) * 8` because we'll shift that 1 right out of the integer. Also please add a comment explaining the output, maybe rename too
   ```suggestion
   template <typename Word, Word all = static_cast<Word>(~static_cast<Word>(0))>
   constexpr Word TrailingWordBitmask(int i) {
     return ARROW_PREDICT_FALSE(i >= sizeof(Word) * 8) ? 0 : all << i;
   }
   ```

##########
File path: cpp/src/arrow/compute/kernels/scalar_boolean.cc
##########
@@ -551,14 +552,11 @@ void RegisterScalarBoolean(FunctionRegistry* registry) {
 
   // The Kleene logic kernels cannot write into sliced output bitmaps

Review comment:
       ```suggestion
   ```

##########
File path: cpp/src/arrow/util/bit_util.cc
##########
@@ -67,5 +69,58 @@ void SetBitsTo(uint8_t* bits, int64_t start_offset, int64_t length, bool bits_ar
   bits[bytes_end - 1] |= static_cast<uint8_t>(fill_byte & ~last_byte_mask);
 }
 
+template <bool value>
+void SetBitmapImpl(uint8_t* data, int64_t offset, int64_t length) {
+  //                 offset  length
+  // data              |<------------->|
+  //   |--------|...|--------|...|--------|
+  //                   |<--->|   |<--->|
+  //                     pro       epi
+  if (ARROW_PREDICT_FALSE(length == 0)) {
+    return;
+  }
+
+  constexpr uint8_t set_byte = value ? UINT8_MAX : 0;
+
+  int prologue = static_cast<int>(((offset + 7) / 8) * 8 - offset);
+  DCHECK_LT(prologue, 8);
+
+  if (length < prologue) {  // special case where a mask is required
+    //             offset length
+    // data             |<->|
+    //   |--------|...|--------|...
+    //         mask --> |111|
+    //                  |<---->|
+    //                     pro
+    uint8_t mask = BitUtil::kPrecedingBitmask[8 - prologue] ^
+                   BitUtil::kPrecedingBitmask[8 - prologue + length];
+    data[offset / 8] |= mask;
+    return;
+  }
+
+  // align to a byte boundary
+  data[offset / 8] = BitUtil::SpliceWord(offset, data[offset / 8], set_byte);
+  offset += prologue;
+  length -= prologue;
+
+  // set values per byte
+  DCHECK_EQ(offset % 8, 0);
+  std::memset(data + offset / 8, set_byte, length / 8);
+  offset += ((length / 8) * 8);
+  length -= ((length / 8) * 8);

Review comment:
       ```suggestion
     offset += BitUtil::RoundDown(length, 8);
     length -= BitUtil::RoundDown(length, 8);
   ```

##########
File path: cpp/src/arrow/util/bit_util.h
##########
@@ -316,5 +316,29 @@ static inline void SetBitTo(uint8_t* bits, int64_t i, bool bit_is_set) {
 ARROW_EXPORT
 void SetBitsTo(uint8_t* bits, int64_t start_offset, int64_t length, bool bits_are_set);
 
+/// \brief Sets all bits in the bitmap to true
+ARROW_EXPORT
+void SetBitmap(uint8_t* data, int64_t offset, int64_t length);

Review comment:
       We also have SetBitsTo, which has similar functionality. In a follow up let's benchmark and deduplicate them by rewriting the less efficient one as a wrapper of the other (probably they will have identical performance, in which case we keep whichever is easier to read)

##########
File path: cpp/src/arrow/compute/kernels/scalar_if_else.cc
##########
@@ -72,18 +72,6 @@ Status PromoteNullsVisitor(KernelContext* ctx, const Datum& cond_d, const Datum&
   Bitmap cond_valid{cond.buffers[0], cond.offset, cond.length};
   Bitmap left_valid = GetBitmap(left_d, 0);
   Bitmap right_valid = GetBitmap(right_d, 0);
-  // sometimes Bitmaps will be ignored, in which case we replace access to them with
-  // duplicated (probably elided) access to cond_data
-  const Bitmap& _ = cond_data;
-
-  // lambda function that will be used inside the visitor
-  uint64_t* out_validity = nullptr;
-  int64_t i = 0;
-  auto apply = [&](uint64_t c_valid, uint64_t c_data, uint64_t l_valid,
-                   uint64_t r_valid) {
-    out_validity[i] = c_valid & ((c_data & l_valid) | (~c_data & r_valid));
-    i++;
-  };
 
   // cond.valid & (cond.data & left.valid | ~cond.data & right.valid)
   // In the following cases, we dont need to allocate out_valid bitmap

Review comment:
       These cases need to be reexamined since we're writing into slices- it's an error to allocate or steal a buffer since we might be discarding values already written to other slices of `output`

##########
File path: cpp/src/arrow/util/bitmap.h
##########
@@ -225,6 +248,99 @@ class ARROW_EXPORT Bitmap : public util::ToStringOstreamable<Bitmap>,
     return min_offset;
   }
 
+  /// \brief Visit words of bits from each input bitmap as array<Word, N> and collects
+  /// outputs to an array<Word, M>, to be written into the output bitmaps accordingly.
+  ///
+  /// All bitmaps must have identical length. The first bit in a visited bitmap
+  /// may be offset within the first visited word, but words will otherwise contain
+  /// densely packed bits loaded from the bitmap. That offset within the first word is
+  /// returned.
+  /// Visitor is expected to have the following signature
+  ///     [](const std::array<Word, N>& in_words, std::array<Word, M>* out_words){...}
+  ///
+  // NOTE: this function is efficient on 3+ sufficiently large bitmaps.
+  // It also has a large prolog / epilog overhead and should be used
+  // carefully in other cases.
+  // For 2 bitmaps or less, and/or smaller bitmaps, see also VisitTwoBitBlocksVoid
+  // and BitmapUInt64Reader.
+  template <size_t N, size_t M, typename Visitor,
+            typename Word = typename std::decay<
+                internal::call_traits::argument_type<0, Visitor&&>>::type::value_type>
+  static void VisitWordsAndWrite(const std::array<Bitmap, N>& bitmaps_arg,
+                                 std::array<Bitmap, M>* out_bitmaps_arg,
+                                 Visitor&& visitor) {
+    constexpr int64_t kBitWidth = sizeof(Word) * 8;
+
+    int64_t bit_length = BitLength(bitmaps_arg);
+    assert(bit_length == BitLength(*out_bitmaps_arg));
+
+    std::array<BitmapWordReader<Word>, N> readers;
+    for (size_t i = 0; i < N; ++i) {
+      readers[i] = BitmapWordReader<Word>(bitmaps_arg[i].buffer_->data(),
+                                          bitmaps_arg[i].offset_, bitmaps_arg[i].length_);
+    }
+
+    std::array<BitmapWordWriter<Word>, M> writers;
+    for (size_t i = 0; i < M; ++i) {
+      const Bitmap& out_bitmap = out_bitmaps_arg->at(i);
+      writers[i] = BitmapWordWriter<Word>(out_bitmap.buffer_->mutable_data(),
+                                          out_bitmap.offset_, out_bitmap.length_);
+    }
+
+    std::array<Word, N> visited_words;
+    visited_words.fill(0);
+    std::array<Word, M> output_words;
+    output_words.fill(0);
+
+    // every reader will have same number of words, since they are same length'ed
+    // todo this will be inefficient in some cases. When there are offsets beyond Word
+    //  boundary, every Word would have to be created from 2 adjoining Words

Review comment:
       Please open a follow up JIRA for this. One way to handle this would be adding `BitmapWordReader::NextLeadingByte()` so that we can consume bits until at least one bitmap has offset == 0
   ```suggestion
       // TODO($JIRA) this will be inefficient in some cases. When there are offsets beyond Word
       // boundary, every Word would have to be created from 2 adjoining Words
   ```

##########
File path: cpp/src/arrow/util/bit_util_test.cc
##########
@@ -2156,5 +2187,169 @@ TEST(Bitmap, VisitWordsAnd) {
   }
 }
 
+void random_bool_vector(std::vector<bool>& vec, int64_t size, double p = 0.5) {
+  vec.reserve(size);
+  std::random_device rd;
+  std::mt19937 gen(rd());
+  std::bernoulli_distribution d(p);
+
+  for (int n = 0; n < size; ++n) {
+    vec.push_back(d(gen));
+  }
+}
+
+std::string VectorToString(const std::vector<bool>& v) {
+  std::string out(v.size() + +((v.size() - 1) / 8), ' ');
+  for (size_t i = 0; i < v.size(); ++i) {
+    out[i + (i / 8)] = v[i] ? '1' : '0';
+  }
+  return out;
+}
+
+void VerifyBoolVectorAndBitmap(const Bitmap& bitmap, const std::vector<bool>& expected) {
+  arrow::BooleanBuilder boolean_builder;
+  ASSERT_OK(boolean_builder.AppendValues(expected));
+  ASSERT_OK_AND_ASSIGN(auto arr, boolean_builder.Finish());
+
+  ASSERT_TRUE(BitmapEquals(bitmap.buffer()->data(), bitmap.offset(),
+                           arr->data()->buffers[1]->data(), 0, expected.size()))
+      << "exp: " << VectorToString(expected) << "\ngot: " << bitmap.ToString();
+}
+
+class TestBitmapVisitAndWriteOutputNoOffset : public ::testing::TestWithParam<int32_t> {};
+
+TEST_P(TestBitmapVisitAndWriteOutputNoOffset, Test1) {
+  auto part = GetParam();
+  int64_t bits = 4 * part;
+  std::vector<bool> data;
+  random_bool_vector(data, bits);

Review comment:
       Also, if you use `Bitmap` consistently instead of `std::vector<bool>` you can reuse `Bitmap::{Equals, ToString, Diff}`

##########
File path: cpp/src/arrow/util/bitmap.h
##########
@@ -225,6 +248,99 @@ class ARROW_EXPORT Bitmap : public util::ToStringOstreamable<Bitmap>,
     return min_offset;
   }
 
+  /// \brief Visit words of bits from each input bitmap as array<Word, N> and collects
+  /// outputs to an array<Word, M>, to be written into the output bitmaps accordingly.
+  ///
+  /// All bitmaps must have identical length. The first bit in a visited bitmap
+  /// may be offset within the first visited word, but words will otherwise contain
+  /// densely packed bits loaded from the bitmap. That offset within the first word is
+  /// returned.
+  /// Visitor is expected to have the following signature
+  ///     [](const std::array<Word, N>& in_words, std::array<Word, M>* out_words){...}
+  ///
+  // NOTE: this function is efficient on 3+ sufficiently large bitmaps.
+  // It also has a large prolog / epilog overhead and should be used
+  // carefully in other cases.
+  // For 2 bitmaps or less, and/or smaller bitmaps, see also VisitTwoBitBlocksVoid
+  // and BitmapUInt64Reader.
+  template <size_t N, size_t M, typename Visitor,
+            typename Word = typename std::decay<
+                internal::call_traits::argument_type<0, Visitor&&>>::type::value_type>
+  static void VisitWordsAndWrite(const std::array<Bitmap, N>& bitmaps_arg,
+                                 std::array<Bitmap, M>* out_bitmaps_arg,
+                                 Visitor&& visitor) {
+    constexpr int64_t kBitWidth = sizeof(Word) * 8;
+
+    int64_t bit_length = BitLength(bitmaps_arg);
+    assert(bit_length == BitLength(*out_bitmaps_arg));
+
+    std::array<BitmapWordReader<Word>, N> readers;
+    for (size_t i = 0; i < N; ++i) {
+      readers[i] = BitmapWordReader<Word>(bitmaps_arg[i].buffer_->data(),
+                                          bitmaps_arg[i].offset_, bitmaps_arg[i].length_);
+    }
+
+    std::array<BitmapWordWriter<Word>, M> writers;
+    for (size_t i = 0; i < M; ++i) {
+      const Bitmap& out_bitmap = out_bitmaps_arg->at(i);
+      writers[i] = BitmapWordWriter<Word>(out_bitmap.buffer_->mutable_data(),
+                                          out_bitmap.offset_, out_bitmap.length_);
+    }
+
+    std::array<Word, N> visited_words;
+    visited_words.fill(0);
+    std::array<Word, M> output_words;
+    output_words.fill(0);
+
+    // every reader will have same number of words, since they are same length'ed
+    // todo this will be inefficient in some cases. When there are offsets beyond Word
+    //  boundary, every Word would have to be created from 2 adjoining Words

Review comment:
       Additionally, IMO this usage of `/BitmapWord(Reader|Writer)/` is more readable than the implementation of VisitWords. Maybe not in this PR, but: let's rewrite/remove the complicated one

##########
File path: cpp/src/arrow/util/bit_util_test.cc
##########
@@ -1975,6 +1975,37 @@ TEST(BitUtil, BitsetStack) {
   ASSERT_EQ(stack.TopSize(), 0);
 }
 
+template <typename Word>
+void CheckSplice(int n, Word low, Word high) {
+  std::bitset<sizeof(Word) * 8> ret;
+  for (size_t i = 0; i < ret.size(); i++) {
+    ret[i] = i < static_cast<size_t>(n)
+                 ? BitUtil::GetBit(reinterpret_cast<uint8_t*>(&low), i)
+                 : BitUtil::GetBit(reinterpret_cast<uint8_t*>(&high), i);
+  }
+
+  ASSERT_EQ(static_cast<Word>(ret.to_ulong()), BitUtil::SpliceWord(n, low, high));

Review comment:
       For GTest assertions, the expected value is on the right
   ```suggestion
     ASSERT_EQ(BitUtil::SpliceWord(n, low, high), static_cast<Word>(ret.to_ulong());
   ```




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org