You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by uw...@apache.org on 2018/04/21 13:23:46 UTC
[arrow] branch master updated: ARROW-1928: [C++] Add
BitmapReader/BitmapWriter benchmarks
This is an automated email from the ASF dual-hosted git repository.
uwe pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 249e039 ARROW-1928: [C++] Add BitmapReader/BitmapWriter benchmarks
249e039 is described below
commit 249e0397ef4e552be2fa7a884c76e7e152063a8f
Author: Antoine Pitrou <an...@python.org>
AuthorDate: Sat Apr 21 14:22:45 2018 +0100
ARROW-1928: [C++] Add BitmapReader/BitmapWriter benchmarks
Also improve the BitmapWriter implementation (around 18% faster here - on Ubuntu 16.04 with gcc 4.9).
I experimented with a different implementation for BitmapReader (storing the bit mask instead of the bit offset), but it came out surprisingly slower.
Author: Antoine Pitrou <an...@python.org>
Closes #1915 from pitrou/ARROW-1928-bitmap-benchmarks and squashes the following commits:
733cc81c <Antoine Pitrou> ARROW-1928: Add BitmapReader/BitmapWriter benchmarks
---
cpp/src/arrow/util/bit-util-benchmark.cc | 158 ++++++++++++++++++++++++++++++-
cpp/src/arrow/util/bit-util.h | 27 +++---
2 files changed, 166 insertions(+), 19 deletions(-)
diff --git a/cpp/src/arrow/util/bit-util-benchmark.cc b/cpp/src/arrow/util/bit-util-benchmark.cc
index 8969dd8..06aa2ed 100644
--- a/cpp/src/arrow/util/bit-util-benchmark.cc
+++ b/cpp/src/arrow/util/bit-util-benchmark.cc
@@ -28,13 +28,147 @@
namespace arrow {
namespace BitUtil {
-static void BM_CopyBitmap(benchmark::State& state) { // NOLINT non-const reference
- const int kBufferSize = state.range(0);
+// A naive bitmap reader implementation, meant as a baseline against
+// internal::BitmapReader
+
+class NaiveBitmapReader {
+ public:
+ NaiveBitmapReader(const uint8_t* bitmap, int64_t start_offset, int64_t length)
+ : bitmap_(bitmap), position_(0) {}
+
+ bool IsSet() const {
+ const int64_t byte_offset = position_ / 8;
+ const int64_t bit_offset = position_ % 8;
+ return (bitmap_[byte_offset] & (1 << bit_offset)) == 0;
+ }
+
+ bool IsNotSet() const { return !IsSet(); }
+
+ void Next() { ++position_; }
+ private:
+ const uint8_t* bitmap_;
+ int64_t position_;
+};
+
+// A naive bitmap writer implementation, meant as a baseline against
+// internal::BitmapWriter
+
+class NaiveBitmapWriter {
+ public:
+ NaiveBitmapWriter(uint8_t* bitmap, int64_t start_offset, int64_t length)
+ : bitmap_(bitmap), position_(0) {}
+
+ void Set() {
+ const int64_t byte_offset = position_ / 8;
+ const int64_t bit_offset = position_ % 8;
+ bitmap_[byte_offset] |= static_cast<uint8_t>(1 << bit_offset);
+ }
+
+ void Clear() {
+ const int64_t byte_offset = position_ / 8;
+ const int64_t bit_offset = position_ % 8;
+ bitmap_[byte_offset] &= 0xFF ^ static_cast<uint8_t>(1 << bit_offset);
+ }
+
+ void Next() { ++position_; }
+
+ void Finish() {}
+
+ int64_t position() const { return position_; }
+
+ private:
+ uint8_t* bitmap_;
+ int64_t position_;
+};
+
+static std::shared_ptr<Buffer> CreateRandomBuffer(int64_t nbytes) {
std::shared_ptr<Buffer> buffer;
- ASSERT_OK(AllocateBuffer(default_memory_pool(), kBufferSize, &buffer));
- memset(buffer->mutable_data(), 0, kBufferSize);
- test::random_bytes(kBufferSize, 0, buffer->mutable_data());
+ ABORT_NOT_OK(AllocateBuffer(default_memory_pool(), nbytes, &buffer));
+ memset(buffer->mutable_data(), 0, nbytes);
+ test::random_bytes(nbytes, 0, buffer->mutable_data());
+ return buffer;
+}
+
+template <typename BitmapReaderType>
+static void BenchmarkBitmapReader(benchmark::State& state, int64_t nbytes) {
+ std::shared_ptr<Buffer> buffer = CreateRandomBuffer(nbytes);
+
+ const int64_t num_bits = nbytes * 8;
+ const uint8_t* bitmap = buffer->data();
+
+ while (state.KeepRunning()) {
+ {
+ BitmapReaderType reader(bitmap, 0, num_bits);
+ int64_t total = 0;
+ for (int64_t i = 0; i < num_bits; i++) {
+ total += reader.IsSet();
+ reader.Next();
+ }
+ benchmark::DoNotOptimize(total);
+ }
+ {
+ BitmapReaderType reader(bitmap, 0, num_bits);
+ int64_t total = 0;
+ for (int64_t i = 0; i < num_bits; i++) {
+ total += !reader.IsNotSet();
+ reader.Next();
+ }
+ benchmark::DoNotOptimize(total);
+ }
+ }
+ state.SetBytesProcessed(2 * int64_t(state.iterations()) * nbytes);
+}
+
+template <typename BitmapWriterType>
+static void BenchmarkBitmapWriter(benchmark::State& state, int64_t nbytes) {
+ std::shared_ptr<Buffer> buffer = CreateRandomBuffer(nbytes);
+
+ const int64_t num_bits = nbytes * 8;
+ uint8_t* bitmap = buffer->mutable_data();
+
+ while (state.KeepRunning()) {
+ {
+ BitmapWriterType writer(bitmap, 0, num_bits);
+ for (int64_t i = 0; i < num_bits; i++) {
+ writer.Set();
+ writer.Next();
+ }
+ writer.Finish();
+ benchmark::ClobberMemory();
+ }
+ {
+ BitmapWriterType writer(bitmap, 0, num_bits);
+ for (int64_t i = 0; i < num_bits; i++) {
+ writer.Clear();
+ writer.Next();
+ }
+ writer.Finish();
+ benchmark::ClobberMemory();
+ }
+ }
+ state.SetBytesProcessed(2 * int64_t(state.iterations()) * nbytes);
+}
+
+static void BM_NaiveBitmapReader(benchmark::State& state) {
+ BenchmarkBitmapReader<NaiveBitmapReader>(state, state.range(0));
+}
+
+static void BM_BitmapReader(benchmark::State& state) {
+ BenchmarkBitmapReader<internal::BitmapReader>(state, state.range(0));
+}
+
+static void BM_NaiveBitmapWriter(benchmark::State& state) {
+ BenchmarkBitmapWriter<NaiveBitmapWriter>(state, state.range(0));
+}
+
+static void BM_BitmapWriter(benchmark::State& state) {
+ BenchmarkBitmapWriter<internal::BitmapWriter>(state, state.range(0));
+}
+
+static void BM_CopyBitmap(benchmark::State& state) { // NOLINT non-const reference
+ const int kBufferSize = state.range(0);
+ std::shared_ptr<Buffer> buffer = CreateRandomBuffer(kBufferSize);
const int num_bits = kBufferSize * 8;
const uint8_t* src = buffer->data();
@@ -54,5 +188,19 @@ BENCHMARK(BM_CopyBitmap)
->MinTime(1.0)
->Unit(benchmark::kMicrosecond);
+BENCHMARK(BM_NaiveBitmapReader)
+ ->Args({100000})
+ ->MinTime(1.0)
+ ->Unit(benchmark::kMicrosecond);
+
+BENCHMARK(BM_BitmapReader)->Args({100000})->MinTime(1.0)->Unit(benchmark::kMicrosecond);
+
+BENCHMARK(BM_NaiveBitmapWriter)
+ ->Args({100000})
+ ->MinTime(1.0)
+ ->Unit(benchmark::kMicrosecond);
+
+BENCHMARK(BM_BitmapWriter)->Args({100000})->MinTime(1.0)->Unit(benchmark::kMicrosecond);
+
} // namespace BitUtil
} // namespace arrow
diff --git a/cpp/src/arrow/util/bit-util.h b/cpp/src/arrow/util/bit-util.h
index 5f171a0..3255ead 100644
--- a/cpp/src/arrow/util/bit-util.h
+++ b/cpp/src/arrow/util/bit-util.h
@@ -428,7 +428,7 @@ class BitmapReader {
void Next() {
++bit_offset_;
++position_;
- if (bit_offset_ == 8) {
+ if (ARROW_PREDICT_FALSE(bit_offset_ == 8)) {
bit_offset_ = 0;
++byte_offset_;
if (ARROW_PREDICT_TRUE(position_ < length_)) {
@@ -453,23 +453,23 @@ class BitmapWriter {
: bitmap_(bitmap), position_(0), length_(length) {
current_byte_ = 0;
byte_offset_ = start_offset / 8;
- bit_offset_ = start_offset % 8;
+ bit_mask_ = static_cast<uint8_t>(1 << (start_offset % 8));
if (length > 0) {
current_byte_ = bitmap[byte_offset_];
}
}
- void Set() { current_byte_ |= BitUtil::kBitmask[bit_offset_]; }
+ void Set() { current_byte_ |= bit_mask_; }
- void Clear() { current_byte_ &= BitUtil::kFlippedBitmask[bit_offset_]; }
+ void Clear() { current_byte_ &= bit_mask_ ^ 0xFF; }
void Next() {
- ++bit_offset_;
+ bit_mask_ = static_cast<uint8_t>(bit_mask_ << 1);
++position_;
- bitmap_[byte_offset_] = current_byte_;
- if (bit_offset_ == 8) {
- bit_offset_ = 0;
- ++byte_offset_;
+ if (bit_mask_ == 0) {
+ // Finished this byte, need advancing
+ bit_mask_ = 0x01;
+ bitmap_[byte_offset_++] = current_byte_;
if (ARROW_PREDICT_TRUE(position_ < length_)) {
current_byte_ = bitmap_[byte_offset_];
}
@@ -477,10 +477,9 @@ class BitmapWriter {
}
void Finish() {
- if (ARROW_PREDICT_TRUE(position_ < length_)) {
- if (bit_offset_ != 0) {
- bitmap_[byte_offset_] = current_byte_;
- }
+ // Store current byte if we didn't went past bitmap storage
+ if (bit_mask_ != 0x01 || position_ < length_) {
+ bitmap_[byte_offset_] = current_byte_;
}
}
@@ -492,8 +491,8 @@ class BitmapWriter {
int64_t length_;
uint8_t current_byte_;
+ uint8_t bit_mask_;
int64_t byte_offset_;
- int64_t bit_offset_;
};
} // namespace internal
--
To stop receiving notification emails like this one, please contact
uwe@apache.org.