You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2018/12/02 23:05:36 UTC
[arrow] branch master updated: ARROW-3922: [C++]
Micro-optimizations to BitUtil::GetBit
This is an automated email from the ASF dual-hosted git repository.
wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 1621868 ARROW-3922: [C++] Micro-optimizations to BitUtil::GetBit
1621868 is described below
commit 16218682336681cad79f143bd01484cf96332eb7
Author: Wes McKinney <we...@apache.org>
AuthorDate: Sun Dec 2 17:05:30 2018 -0600
ARROW-3922: [C++] Micro-optimizations to BitUtil::GetBit
The results are fairly noisy so I couldn't really measure any improvement, but it seems to be at most 0.5-1%. I changed the NaiveBitmapReader to use `BitUtil::GetBit` so it is an apples-to-apples comparison. On my laptop (with CPU throttling disabled) the difference is within 1 standard deviation of the mean, so not statistically significant. Since the generated assembly is smaller, I would say it's a reasonable change
after
```
-----------------------------------------------------------------------------------------------------
Benchmark Time CPU Iterations
-----------------------------------------------------------------------------------------------------
BM_NaiveBitmapReader/1000000/min_time:1.000/repeats:10 12255 us 12255 us 111 155.636MB/s
BM_NaiveBitmapReader/1000000/min_time:1.000/repeats:10 12367 us 12367 us 111 154.228MB/s
BM_NaiveBitmapReader/1000000/min_time:1.000/repeats:10 12243 us 12243 us 111 155.786MB/s
BM_NaiveBitmapReader/1000000/min_time:1.000/repeats:10 12235 us 12236 us 111 155.885MB/s
BM_NaiveBitmapReader/1000000/min_time:1.000/repeats:10 12426 us 12426 us 111 153.491MB/s
BM_NaiveBitmapReader/1000000/min_time:1.000/repeats:10 12372 us 12372 us 111 154.164MB/s
BM_NaiveBitmapReader/1000000/min_time:1.000/repeats:10 12283 us 12283 us 111 155.285MB/s
BM_NaiveBitmapReader/1000000/min_time:1.000/repeats:10 12340 us 12340 us 111 154.567MB/s
BM_NaiveBitmapReader/1000000/min_time:1.000/repeats:10 12389 us 12390 us 111 153.946MB/s
BM_NaiveBitmapReader/1000000/min_time:1.000/repeats:10 12489 us 12489 us 111 152.722MB/s
BM_NaiveBitmapReader/1000000/min_time:1.000/repeats:10_mean 12340 us 12340 us 111 154.571MB/s
BM_NaiveBitmapReader/1000000/min_time:1.000/repeats:10_median 12353 us 12354 us 111 154.397MB/s
BM_NaiveBitmapReader/1000000/min_time:1.000/repeats:10_stddev 85 us 85 us 111 1085.01kB/s
BM_BitmapReader/1000000/min_time:1.000/repeats:10 10717 us 10717 us 132 177.969MB/s
BM_BitmapReader/1000000/min_time:1.000/repeats:10 10656 us 10657 us 132 178.982MB/s
BM_BitmapReader/1000000/min_time:1.000/repeats:10 10835 us 10836 us 132 176.028MB/s
BM_BitmapReader/1000000/min_time:1.000/repeats:10 10735 us 10735 us 132 177.672MB/s
BM_BitmapReader/1000000/min_time:1.000/repeats:10 10700 us 10700 us 132 178.253MB/s
BM_BitmapReader/1000000/min_time:1.000/repeats:10 11481 us 11481 us 132 166.131MB/s
BM_BitmapReader/1000000/min_time:1.000/repeats:10 10850 us 10850 us 132 175.797MB/s
BM_BitmapReader/1000000/min_time:1.000/repeats:10 10591 us 10591 us 132 180.095MB/s
BM_BitmapReader/1000000/min_time:1.000/repeats:10 10684 us 10684 us 132 178.523MB/s
BM_BitmapReader/1000000/min_time:1.000/repeats:10 10705 us 10705 us 132 178.167MB/s
BM_BitmapReader/1000000/min_time:1.000/repeats:10_mean 10795 us 10796 us 132 176.762MB/s
BM_BitmapReader/1000000/min_time:1.000/repeats:10_median 10711 us 10711 us 132 178.068MB/s
BM_BitmapReader/1000000/min_time:1.000/repeats:10_stddev 253 us 253 us 132 3.94562MB/s
```
previous
```
-----------------------------------------------------------------------------------------------------
Benchmark Time CPU Iterations
-----------------------------------------------------------------------------------------------------
BM_NaiveBitmapReader/1000000/min_time:1.000/repeats:10 12349 us 12348 us 107 154.464MB/s
BM_NaiveBitmapReader/1000000/min_time:1.000/repeats:10 12289 us 12288 us 107 155.225MB/s
BM_NaiveBitmapReader/1000000/min_time:1.000/repeats:10 12368 us 12366 us 107 154.235MB/s
BM_NaiveBitmapReader/1000000/min_time:1.000/repeats:10 12340 us 12339 us 107 154.578MB/s
BM_NaiveBitmapReader/1000000/min_time:1.000/repeats:10 12446 us 12445 us 107 153.268MB/s
BM_NaiveBitmapReader/1000000/min_time:1.000/repeats:10 12450 us 12449 us 107 153.211MB/s
BM_NaiveBitmapReader/1000000/min_time:1.000/repeats:10 12377 us 12376 us 107 154.112MB/s
BM_NaiveBitmapReader/1000000/min_time:1.000/repeats:10 12455 us 12454 us 107 153.157MB/s
BM_NaiveBitmapReader/1000000/min_time:1.000/repeats:10 12431 us 12429 us 107 153.454MB/s
BM_NaiveBitmapReader/1000000/min_time:1.000/repeats:10 12382 us 12381 us 107 154.052MB/s
BM_NaiveBitmapReader/1000000/min_time:1.000/repeats:10_mean 12389 us 12388 us 107 153.976MB/s
BM_NaiveBitmapReader/1000000/min_time:1.000/repeats:10_median 12380 us 12379 us 107 154.082MB/s
BM_NaiveBitmapReader/1000000/min_time:1.000/repeats:10_stddev 55 us 55 us 107 706.181kB/s
BM_BitmapReader/1000000/min_time:1.000/repeats:10 10898 us 10897 us 130 175.038MB/s
BM_BitmapReader/1000000/min_time:1.000/repeats:10 10916 us 10915 us 130 174.739MB/s
BM_BitmapReader/1000000/min_time:1.000/repeats:10 10923 us 10922 us 130 174.64MB/s
BM_BitmapReader/1000000/min_time:1.000/repeats:10 11002 us 11000 us 130 173.39MB/s
BM_BitmapReader/1000000/min_time:1.000/repeats:10 10875 us 10874 us 130 175.403MB/s
BM_BitmapReader/1000000/min_time:1.000/repeats:10 11165 us 11163 us 130 170.859MB/s
BM_BitmapReader/1000000/min_time:1.000/repeats:10 11018 us 11016 us 130 173.141MB/s
BM_BitmapReader/1000000/min_time:1.000/repeats:10 10948 us 10947 us 130 174.24MB/s
BM_BitmapReader/1000000/min_time:1.000/repeats:10 11079 us 11078 us 130 172.171MB/s
BM_BitmapReader/1000000/min_time:1.000/repeats:10 10915 us 10914 us 130 174.766MB/s
BM_BitmapReader/1000000/min_time:1.000/repeats:10_mean 10974 us 10973 us 130 173.839MB/s
BM_BitmapReader/1000000/min_time:1.000/repeats:10_median 10935 us 10934 us 130 174.44MB/s
BM_BitmapReader/1000000/min_time:1.000/repeats:10_stddev 92 us 92 us 130 1.44155MB/s
```
Author: Wes McKinney <we...@apache.org>
Closes #3067 from wesm/ARROW-3922 and squashes the following commits:
9c3fe8cf3 <Wes McKinney> clang-format
15f6a7b66 <Wes McKinney> Use c++11 syntax with gbenchmark
eb3a1a893 <Wes McKinney> A little bit less noisy benchmarks
4f56f2b10 <Wes McKinney> Micro-optimizations to BitUtil::GetBit
---
cpp/src/arrow/util/bit-util-benchmark.cc | 16 ++++++----------
cpp/src/arrow/util/bit-util.h | 4 ++--
2 files changed, 8 insertions(+), 12 deletions(-)
diff --git a/cpp/src/arrow/util/bit-util-benchmark.cc b/cpp/src/arrow/util/bit-util-benchmark.cc
index beb48df..cc71078 100644
--- a/cpp/src/arrow/util/bit-util-benchmark.cc
+++ b/cpp/src/arrow/util/bit-util-benchmark.cc
@@ -39,11 +39,7 @@ class NaiveBitmapReader {
NaiveBitmapReader(const uint8_t* bitmap, int64_t start_offset, int64_t length)
: bitmap_(bitmap), position_(0) {}
- bool IsSet() const {
- const int64_t byte_offset = position_ / 8;
- const int64_t bit_offset = position_ % 8;
- return (bitmap_[byte_offset] & (1 << bit_offset)) == 0;
- }
+ bool IsSet() const { return BitUtil::GetBit(bitmap_, position_); }
bool IsNotSet() const { return !IsSet(); }
@@ -51,7 +47,7 @@ class NaiveBitmapReader {
private:
const uint8_t* bitmap_;
- int64_t position_;
+ uint64_t position_;
};
// A naive bitmap writer implementation, meant as a baseline against
@@ -100,7 +96,7 @@ static void BenchmarkBitmapReader(benchmark::State& state, int64_t nbytes) {
const int64_t num_bits = nbytes * 8;
const uint8_t* bitmap = buffer->data();
- while (state.KeepRunning()) {
+ for (auto _ : state) {
{
BitmapReaderType reader(bitmap, 0, num_bits);
int64_t total = 0;
@@ -240,11 +236,11 @@ BENCHMARK(BM_CopyBitmap)
->Unit(benchmark::kMicrosecond);
BENCHMARK(BM_NaiveBitmapReader)
- ->Args({100000})
- ->MinTime(1.0)
+ ->Args({1000000})
+ ->MinTime(5.0)
->Unit(benchmark::kMicrosecond);
-BENCHMARK(BM_BitmapReader)->Args({100000})->MinTime(1.0)->Unit(benchmark::kMicrosecond);
+BENCHMARK(BM_BitmapReader)->Args({1000000})->MinTime(5.0)->Unit(benchmark::kMicrosecond);
BENCHMARK(BM_NaiveBitmapWriter)
->Args({100000})
diff --git a/cpp/src/arrow/util/bit-util.h b/cpp/src/arrow/util/bit-util.h
index cd3d5b0..93b6cb2 100644
--- a/cpp/src/arrow/util/bit-util.h
+++ b/cpp/src/arrow/util/bit-util.h
@@ -310,8 +310,8 @@ static constexpr uint8_t kPrecedingBitmask[] = {0, 1, 3, 7, 15, 31, 63, 127};
// the bitwise complement version of kPrecedingBitmask
static constexpr uint8_t kTrailingBitmask[] = {255, 254, 252, 248, 240, 224, 192, 128};
-static inline bool GetBit(const uint8_t* bits, int64_t i) {
- return (bits[i / 8] & kBitmask[i % 8]) != 0;
+static inline bool GetBit(const uint8_t* bits, uint64_t i) {
+ return (bits[i >> 3] >> (i & 0x07)) & 1;
}
static inline void ClearBit(uint8_t* bits, int64_t i) {