You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2020/06/04 06:02:17 UTC

[GitHub] [arrow] emkornfield commented on a change in pull request #7346: ARROW-9029: [C++] Implement BitBlockCounter for much faster block popcounts of bitmaps

emkornfield commented on a change in pull request #7346:
URL: https://github.com/apache/arrow/pull/7346#discussion_r435010411



##########
File path: cpp/src/arrow/util/bit_util.cc
##########
@@ -598,5 +598,49 @@ Result<std::shared_ptr<Buffer>> BitmapAllButOne(MemoryPool* pool, int64_t length
   return std::move(buffer);
 }
 
+BitBlockCounter::Block BitBlockCounter::NextBlock() {
+  auto load_word = [](const uint8_t* bytes) -> uint64_t {
+    return BitUtil::ToLittleEndian(util::SafeLoadAs<uint64_t>(bytes));
+  };
+  auto shift_word = [](uint64_t current, uint64_t next, int64_t shift) -> uint64_t {
+    return (current >> shift) | (next << (64 - shift));
+  };
+
+  // When the offset is > 0, we need there to be a word beyond the last aligned
+  // word in the bitmap for the bit shifting logic.
+  const int64_t bits_required_to_scan_words = offset_ == 0 ? 256 : 256 + (64 - offset_);
+  if (bits_remaining_ < bits_required_to_scan_words) {
+    // End of the bitmap, leave it to the caller to decide how to best check
+    // these bits, no need to do redundant computation here.
+    const int16_t run_length = static_cast<int16_t>(bits_remaining_);
+    bits_remaining_ -= run_length;
+    return {run_length, static_cast<int16_t>(CountSetBits(bitmap_, offset_, run_length))};
+  }
+
+  int64_t total_popcount = 0;
+  if (offset_ == 0) {
+    total_popcount += __builtin_popcountll(load_word(bitmap_));
+    total_popcount += __builtin_popcountll(load_word(bitmap_ + 8));
+    total_popcount += __builtin_popcountll(load_word(bitmap_ + 16));
+    total_popcount += __builtin_popcountll(load_word(bitmap_ + 24));
+  } else {
+    auto current = load_word(bitmap_);
+    auto next = load_word(bitmap_ + 8);
+    total_popcount += __builtin_popcountll(shift_word(current, next, offset_));
+    current = next;
+    next = load_word(bitmap_ + 16);
+    total_popcount += __builtin_popcountll(shift_word(current, next, offset_));
+    current = next;
+    next = load_word(bitmap_ + 24);
+    total_popcount += __builtin_popcountll(shift_word(current, next, offset_));
+    current = next;
+    next = load_word(bitmap_ + 32);
+    total_popcount += __builtin_popcountll(shift_word(current, next, offset_));

Review comment:
       This raised an interesting question of should this be tied in with existing popcount methods and the improvements applied in both places?




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org