You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by uw...@apache.org on 2018/04/21 13:23:46 UTC

[arrow] branch master updated: ARROW-1928: [C++] Add BitmapReader/BitmapWriter benchmarks

This is an automated email from the ASF dual-hosted git repository.

uwe pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 249e039  ARROW-1928: [C++] Add BitmapReader/BitmapWriter benchmarks
249e039 is described below

commit 249e0397ef4e552be2fa7a884c76e7e152063a8f
Author: Antoine Pitrou <an...@python.org>
AuthorDate: Sat Apr 21 14:22:45 2018 +0100

    ARROW-1928: [C++] Add BitmapReader/BitmapWriter benchmarks
    
    Also improve the BitmapWriter implementation (around 18% faster here - on Ubuntu 16.04 with gcc 4.9).
    
    I experimented with a different implementation for BitmapReader (storing the bit mask instead of the bit offset), but it came out surprisingly slower.
    
    Author: Antoine Pitrou <an...@python.org>
    
    Closes #1915 from pitrou/ARROW-1928-bitmap-benchmarks and squashes the following commits:
    
    733cc81c <Antoine Pitrou> ARROW-1928:  Add BitmapReader/BitmapWriter benchmarks
---
 cpp/src/arrow/util/bit-util-benchmark.cc | 158 ++++++++++++++++++++++++++++++-
 cpp/src/arrow/util/bit-util.h            |  27 +++---
 2 files changed, 166 insertions(+), 19 deletions(-)

diff --git a/cpp/src/arrow/util/bit-util-benchmark.cc b/cpp/src/arrow/util/bit-util-benchmark.cc
index 8969dd8..06aa2ed 100644
--- a/cpp/src/arrow/util/bit-util-benchmark.cc
+++ b/cpp/src/arrow/util/bit-util-benchmark.cc
@@ -28,13 +28,147 @@
 namespace arrow {
 namespace BitUtil {
 
-static void BM_CopyBitmap(benchmark::State& state) {  // NOLINT non-const reference
-  const int kBufferSize = state.range(0);
+// A naive bitmap reader implementation, meant as a baseline against
+// internal::BitmapReader
+
+class NaiveBitmapReader {
+ public:
+  NaiveBitmapReader(const uint8_t* bitmap, int64_t start_offset, int64_t length)
+      : bitmap_(bitmap), position_(0) {}
+
+  bool IsSet() const {
+    const int64_t byte_offset = position_ / 8;
+    const int64_t bit_offset = position_ % 8;
+    return (bitmap_[byte_offset] & (1 << bit_offset)) == 0;
+  }
+
+  bool IsNotSet() const { return !IsSet(); }
+
+  void Next() { ++position_; }
 
+ private:
+  const uint8_t* bitmap_;
+  int64_t position_;
+};
+
+// A naive bitmap writer implementation, meant as a baseline against
+// internal::BitmapWriter
+
+class NaiveBitmapWriter {
+ public:
+  NaiveBitmapWriter(uint8_t* bitmap, int64_t start_offset, int64_t length)
+      : bitmap_(bitmap), position_(0) {}
+
+  void Set() {
+    const int64_t byte_offset = position_ / 8;
+    const int64_t bit_offset = position_ % 8;
+    bitmap_[byte_offset] |= static_cast<uint8_t>(1 << bit_offset);
+  }
+
+  void Clear() {
+    const int64_t byte_offset = position_ / 8;
+    const int64_t bit_offset = position_ % 8;
+    bitmap_[byte_offset] &= 0xFF ^ static_cast<uint8_t>(1 << bit_offset);
+  }
+
+  void Next() { ++position_; }
+
+  void Finish() {}
+
+  int64_t position() const { return position_; }
+
+ private:
+  uint8_t* bitmap_;
+  int64_t position_;
+};
+
+static std::shared_ptr<Buffer> CreateRandomBuffer(int64_t nbytes) {
   std::shared_ptr<Buffer> buffer;
-  ASSERT_OK(AllocateBuffer(default_memory_pool(), kBufferSize, &buffer));
-  memset(buffer->mutable_data(), 0, kBufferSize);
-  test::random_bytes(kBufferSize, 0, buffer->mutable_data());
+  ABORT_NOT_OK(AllocateBuffer(default_memory_pool(), nbytes, &buffer));
+  memset(buffer->mutable_data(), 0, nbytes);
+  test::random_bytes(nbytes, 0, buffer->mutable_data());
+  return buffer;
+}
+
+template <typename BitmapReaderType>
+static void BenchmarkBitmapReader(benchmark::State& state, int64_t nbytes) {
+  std::shared_ptr<Buffer> buffer = CreateRandomBuffer(nbytes);
+
+  const int64_t num_bits = nbytes * 8;
+  const uint8_t* bitmap = buffer->data();
+
+  while (state.KeepRunning()) {
+    {
+      BitmapReaderType reader(bitmap, 0, num_bits);
+      int64_t total = 0;
+      for (int64_t i = 0; i < num_bits; i++) {
+        total += reader.IsSet();
+        reader.Next();
+      }
+      benchmark::DoNotOptimize(total);
+    }
+    {
+      BitmapReaderType reader(bitmap, 0, num_bits);
+      int64_t total = 0;
+      for (int64_t i = 0; i < num_bits; i++) {
+        total += !reader.IsNotSet();
+        reader.Next();
+      }
+      benchmark::DoNotOptimize(total);
+    }
+  }
+  state.SetBytesProcessed(2 * int64_t(state.iterations()) * nbytes);
+}
+
+template <typename BitmapWriterType>
+static void BenchmarkBitmapWriter(benchmark::State& state, int64_t nbytes) {
+  std::shared_ptr<Buffer> buffer = CreateRandomBuffer(nbytes);
+
+  const int64_t num_bits = nbytes * 8;
+  uint8_t* bitmap = buffer->mutable_data();
+
+  while (state.KeepRunning()) {
+    {
+      BitmapWriterType writer(bitmap, 0, num_bits);
+      for (int64_t i = 0; i < num_bits; i++) {
+        writer.Set();
+        writer.Next();
+      }
+      writer.Finish();
+      benchmark::ClobberMemory();
+    }
+    {
+      BitmapWriterType writer(bitmap, 0, num_bits);
+      for (int64_t i = 0; i < num_bits; i++) {
+        writer.Clear();
+        writer.Next();
+      }
+      writer.Finish();
+      benchmark::ClobberMemory();
+    }
+  }
+  state.SetBytesProcessed(2 * int64_t(state.iterations()) * nbytes);
+}
+
+static void BM_NaiveBitmapReader(benchmark::State& state) {
+  BenchmarkBitmapReader<NaiveBitmapReader>(state, state.range(0));
+}
+
+static void BM_BitmapReader(benchmark::State& state) {
+  BenchmarkBitmapReader<internal::BitmapReader>(state, state.range(0));
+}
+
+static void BM_NaiveBitmapWriter(benchmark::State& state) {
+  BenchmarkBitmapWriter<NaiveBitmapWriter>(state, state.range(0));
+}
+
+static void BM_BitmapWriter(benchmark::State& state) {
+  BenchmarkBitmapWriter<internal::BitmapWriter>(state, state.range(0));
+}
+
+static void BM_CopyBitmap(benchmark::State& state) {  // NOLINT non-const reference
+  const int kBufferSize = state.range(0);
+  std::shared_ptr<Buffer> buffer = CreateRandomBuffer(kBufferSize);
 
   const int num_bits = kBufferSize * 8;
   const uint8_t* src = buffer->data();
@@ -54,5 +188,19 @@ BENCHMARK(BM_CopyBitmap)
     ->MinTime(1.0)
     ->Unit(benchmark::kMicrosecond);
 
+BENCHMARK(BM_NaiveBitmapReader)
+    ->Args({100000})
+    ->MinTime(1.0)
+    ->Unit(benchmark::kMicrosecond);
+
+BENCHMARK(BM_BitmapReader)->Args({100000})->MinTime(1.0)->Unit(benchmark::kMicrosecond);
+
+BENCHMARK(BM_NaiveBitmapWriter)
+    ->Args({100000})
+    ->MinTime(1.0)
+    ->Unit(benchmark::kMicrosecond);
+
+BENCHMARK(BM_BitmapWriter)->Args({100000})->MinTime(1.0)->Unit(benchmark::kMicrosecond);
+
 }  // namespace BitUtil
 }  // namespace arrow
diff --git a/cpp/src/arrow/util/bit-util.h b/cpp/src/arrow/util/bit-util.h
index 5f171a0..3255ead 100644
--- a/cpp/src/arrow/util/bit-util.h
+++ b/cpp/src/arrow/util/bit-util.h
@@ -428,7 +428,7 @@ class BitmapReader {
   void Next() {
     ++bit_offset_;
     ++position_;
-    if (bit_offset_ == 8) {
+    if (ARROW_PREDICT_FALSE(bit_offset_ == 8)) {
       bit_offset_ = 0;
       ++byte_offset_;
       if (ARROW_PREDICT_TRUE(position_ < length_)) {
@@ -453,23 +453,23 @@ class BitmapWriter {
       : bitmap_(bitmap), position_(0), length_(length) {
     current_byte_ = 0;
     byte_offset_ = start_offset / 8;
-    bit_offset_ = start_offset % 8;
+    bit_mask_ = static_cast<uint8_t>(1 << (start_offset % 8));
     if (length > 0) {
       current_byte_ = bitmap[byte_offset_];
     }
   }
 
-  void Set() { current_byte_ |= BitUtil::kBitmask[bit_offset_]; }
+  void Set() { current_byte_ |= bit_mask_; }
 
-  void Clear() { current_byte_ &= BitUtil::kFlippedBitmask[bit_offset_]; }
+  void Clear() { current_byte_ &= bit_mask_ ^ 0xFF; }
 
   void Next() {
-    ++bit_offset_;
+    bit_mask_ = static_cast<uint8_t>(bit_mask_ << 1);
     ++position_;
-    bitmap_[byte_offset_] = current_byte_;
-    if (bit_offset_ == 8) {
-      bit_offset_ = 0;
-      ++byte_offset_;
+    if (bit_mask_ == 0) {
+      // Finished this byte, need advancing
+      bit_mask_ = 0x01;
+      bitmap_[byte_offset_++] = current_byte_;
       if (ARROW_PREDICT_TRUE(position_ < length_)) {
         current_byte_ = bitmap_[byte_offset_];
       }
@@ -477,10 +477,9 @@ class BitmapWriter {
   }
 
   void Finish() {
-    if (ARROW_PREDICT_TRUE(position_ < length_)) {
-      if (bit_offset_ != 0) {
-        bitmap_[byte_offset_] = current_byte_;
-      }
+    // Store current byte if we didn't went past bitmap storage
+    if (bit_mask_ != 0x01 || position_ < length_) {
+      bitmap_[byte_offset_] = current_byte_;
     }
   }
 
@@ -492,8 +491,8 @@ class BitmapWriter {
   int64_t length_;
 
   uint8_t current_byte_;
+  uint8_t bit_mask_;
   int64_t byte_offset_;
-  int64_t bit_offset_;
 };
 
 }  // namespace internal

-- 
To stop receiving notification emails like this one, please contact
uwe@apache.org.