You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2018/12/02 23:05:36 UTC

[arrow] branch master updated: ARROW-3922: [C++] Micro-optimizations to BitUtil::GetBit

This is an automated email from the ASF dual-hosted git repository.

wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 1621868  ARROW-3922: [C++] Micro-optimizations to BitUtil::GetBit
1621868 is described below

commit 16218682336681cad79f143bd01484cf96332eb7
Author: Wes McKinney <we...@apache.org>
AuthorDate: Sun Dec 2 17:05:30 2018 -0600

    ARROW-3922: [C++] Micro-optimizations to BitUtil::GetBit
    
    The results are fairly noisy so I couldn't really measure any improvement, but it seems to be at most 0.5-1%. I changed the NaiveBitmapReader to use `BitUtil::GetBit` so it is an apples-to-apples comparison. On my laptop (with CPU throttling disabled) the difference is within 1 standard deviation of the mean, so not statistically significant. Since the generated assembly is smaller, I would say it's a reasonable change
    
    after
    ```
    -----------------------------------------------------------------------------------------------------
    Benchmark                                                              Time           CPU Iterations
    -----------------------------------------------------------------------------------------------------
    BM_NaiveBitmapReader/1000000/min_time:1.000/repeats:10             12255 us      12255 us        111   155.636MB/s
    BM_NaiveBitmapReader/1000000/min_time:1.000/repeats:10             12367 us      12367 us        111   154.228MB/s
    BM_NaiveBitmapReader/1000000/min_time:1.000/repeats:10             12243 us      12243 us        111   155.786MB/s
    BM_NaiveBitmapReader/1000000/min_time:1.000/repeats:10             12235 us      12236 us        111   155.885MB/s
    BM_NaiveBitmapReader/1000000/min_time:1.000/repeats:10             12426 us      12426 us        111   153.491MB/s
    BM_NaiveBitmapReader/1000000/min_time:1.000/repeats:10             12372 us      12372 us        111   154.164MB/s
    BM_NaiveBitmapReader/1000000/min_time:1.000/repeats:10             12283 us      12283 us        111   155.285MB/s
    BM_NaiveBitmapReader/1000000/min_time:1.000/repeats:10             12340 us      12340 us        111   154.567MB/s
    BM_NaiveBitmapReader/1000000/min_time:1.000/repeats:10             12389 us      12390 us        111   153.946MB/s
    BM_NaiveBitmapReader/1000000/min_time:1.000/repeats:10             12489 us      12489 us        111   152.722MB/s
    BM_NaiveBitmapReader/1000000/min_time:1.000/repeats:10_mean        12340 us      12340 us        111   154.571MB/s
    BM_NaiveBitmapReader/1000000/min_time:1.000/repeats:10_median      12353 us      12354 us        111   154.397MB/s
    BM_NaiveBitmapReader/1000000/min_time:1.000/repeats:10_stddev         85 us         85 us        111   1085.01kB/s
    BM_BitmapReader/1000000/min_time:1.000/repeats:10                  10717 us      10717 us        132   177.969MB/s
    BM_BitmapReader/1000000/min_time:1.000/repeats:10                  10656 us      10657 us        132   178.982MB/s
    BM_BitmapReader/1000000/min_time:1.000/repeats:10                  10835 us      10836 us        132   176.028MB/s
    BM_BitmapReader/1000000/min_time:1.000/repeats:10                  10735 us      10735 us        132   177.672MB/s
    BM_BitmapReader/1000000/min_time:1.000/repeats:10                  10700 us      10700 us        132   178.253MB/s
    BM_BitmapReader/1000000/min_time:1.000/repeats:10                  11481 us      11481 us        132   166.131MB/s
    BM_BitmapReader/1000000/min_time:1.000/repeats:10                  10850 us      10850 us        132   175.797MB/s
    BM_BitmapReader/1000000/min_time:1.000/repeats:10                  10591 us      10591 us        132   180.095MB/s
    BM_BitmapReader/1000000/min_time:1.000/repeats:10                  10684 us      10684 us        132   178.523MB/s
    BM_BitmapReader/1000000/min_time:1.000/repeats:10                  10705 us      10705 us        132   178.167MB/s
    BM_BitmapReader/1000000/min_time:1.000/repeats:10_mean             10795 us      10796 us        132   176.762MB/s
    BM_BitmapReader/1000000/min_time:1.000/repeats:10_median           10711 us      10711 us        132   178.068MB/s
    BM_BitmapReader/1000000/min_time:1.000/repeats:10_stddev             253 us        253 us        132   3.94562MB/s
    ```
    
    previous
    
    ```
    -----------------------------------------------------------------------------------------------------
    Benchmark                                                              Time           CPU Iterations
    -----------------------------------------------------------------------------------------------------
    BM_NaiveBitmapReader/1000000/min_time:1.000/repeats:10             12349 us      12348 us        107   154.464MB/s
    BM_NaiveBitmapReader/1000000/min_time:1.000/repeats:10             12289 us      12288 us        107   155.225MB/s
    BM_NaiveBitmapReader/1000000/min_time:1.000/repeats:10             12368 us      12366 us        107   154.235MB/s
    BM_NaiveBitmapReader/1000000/min_time:1.000/repeats:10             12340 us      12339 us        107   154.578MB/s
    BM_NaiveBitmapReader/1000000/min_time:1.000/repeats:10             12446 us      12445 us        107   153.268MB/s
    BM_NaiveBitmapReader/1000000/min_time:1.000/repeats:10             12450 us      12449 us        107   153.211MB/s
    BM_NaiveBitmapReader/1000000/min_time:1.000/repeats:10             12377 us      12376 us        107   154.112MB/s
    BM_NaiveBitmapReader/1000000/min_time:1.000/repeats:10             12455 us      12454 us        107   153.157MB/s
    BM_NaiveBitmapReader/1000000/min_time:1.000/repeats:10             12431 us      12429 us        107   153.454MB/s
    BM_NaiveBitmapReader/1000000/min_time:1.000/repeats:10             12382 us      12381 us        107   154.052MB/s
    BM_NaiveBitmapReader/1000000/min_time:1.000/repeats:10_mean        12389 us      12388 us        107   153.976MB/s
    BM_NaiveBitmapReader/1000000/min_time:1.000/repeats:10_median      12380 us      12379 us        107   154.082MB/s
    BM_NaiveBitmapReader/1000000/min_time:1.000/repeats:10_stddev         55 us         55 us        107   706.181kB/s
    BM_BitmapReader/1000000/min_time:1.000/repeats:10                  10898 us      10897 us        130   175.038MB/s
    BM_BitmapReader/1000000/min_time:1.000/repeats:10                  10916 us      10915 us        130   174.739MB/s
    BM_BitmapReader/1000000/min_time:1.000/repeats:10                  10923 us      10922 us        130    174.64MB/s
    BM_BitmapReader/1000000/min_time:1.000/repeats:10                  11002 us      11000 us        130    173.39MB/s
    BM_BitmapReader/1000000/min_time:1.000/repeats:10                  10875 us      10874 us        130   175.403MB/s
    BM_BitmapReader/1000000/min_time:1.000/repeats:10                  11165 us      11163 us        130   170.859MB/s
    BM_BitmapReader/1000000/min_time:1.000/repeats:10                  11018 us      11016 us        130   173.141MB/s
    BM_BitmapReader/1000000/min_time:1.000/repeats:10                  10948 us      10947 us        130    174.24MB/s
    BM_BitmapReader/1000000/min_time:1.000/repeats:10                  11079 us      11078 us        130   172.171MB/s
    BM_BitmapReader/1000000/min_time:1.000/repeats:10                  10915 us      10914 us        130   174.766MB/s
    BM_BitmapReader/1000000/min_time:1.000/repeats:10_mean             10974 us      10973 us        130   173.839MB/s
    BM_BitmapReader/1000000/min_time:1.000/repeats:10_median           10935 us      10934 us        130    174.44MB/s
    BM_BitmapReader/1000000/min_time:1.000/repeats:10_stddev              92 us         92 us        130   1.44155MB/s
    ```
    
    Author: Wes McKinney <we...@apache.org>
    
    Closes #3067 from wesm/ARROW-3922 and squashes the following commits:
    
    9c3fe8cf3 <Wes McKinney> clang-format
    15f6a7b66 <Wes McKinney> Use c++11 syntax with gbenchmark
    eb3a1a893 <Wes McKinney> A little bit less noisy benchmarks
    4f56f2b10 <Wes McKinney> Micro-optimizations to BitUtil::GetBit
---
 cpp/src/arrow/util/bit-util-benchmark.cc | 16 ++++++----------
 cpp/src/arrow/util/bit-util.h            |  4 ++--
 2 files changed, 8 insertions(+), 12 deletions(-)

diff --git a/cpp/src/arrow/util/bit-util-benchmark.cc b/cpp/src/arrow/util/bit-util-benchmark.cc
index beb48df..cc71078 100644
--- a/cpp/src/arrow/util/bit-util-benchmark.cc
+++ b/cpp/src/arrow/util/bit-util-benchmark.cc
@@ -39,11 +39,7 @@ class NaiveBitmapReader {
   NaiveBitmapReader(const uint8_t* bitmap, int64_t start_offset, int64_t length)
       : bitmap_(bitmap), position_(0) {}
 
-  bool IsSet() const {
-    const int64_t byte_offset = position_ / 8;
-    const int64_t bit_offset = position_ % 8;
-    return (bitmap_[byte_offset] & (1 << bit_offset)) == 0;
-  }
+  bool IsSet() const { return BitUtil::GetBit(bitmap_, position_); }
 
   bool IsNotSet() const { return !IsSet(); }
 
@@ -51,7 +47,7 @@ class NaiveBitmapReader {
 
  private:
   const uint8_t* bitmap_;
-  int64_t position_;
+  uint64_t position_;
 };
 
 // A naive bitmap writer implementation, meant as a baseline against
@@ -100,7 +96,7 @@ static void BenchmarkBitmapReader(benchmark::State& state, int64_t nbytes) {
   const int64_t num_bits = nbytes * 8;
   const uint8_t* bitmap = buffer->data();
 
-  while (state.KeepRunning()) {
+  for (auto _ : state) {
     {
       BitmapReaderType reader(bitmap, 0, num_bits);
       int64_t total = 0;
@@ -240,11 +236,11 @@ BENCHMARK(BM_CopyBitmap)
     ->Unit(benchmark::kMicrosecond);
 
 BENCHMARK(BM_NaiveBitmapReader)
-    ->Args({100000})
-    ->MinTime(1.0)
+    ->Args({1000000})
+    ->MinTime(5.0)
     ->Unit(benchmark::kMicrosecond);
 
-BENCHMARK(BM_BitmapReader)->Args({100000})->MinTime(1.0)->Unit(benchmark::kMicrosecond);
+BENCHMARK(BM_BitmapReader)->Args({1000000})->MinTime(5.0)->Unit(benchmark::kMicrosecond);
 
 BENCHMARK(BM_NaiveBitmapWriter)
     ->Args({100000})
diff --git a/cpp/src/arrow/util/bit-util.h b/cpp/src/arrow/util/bit-util.h
index cd3d5b0..93b6cb2 100644
--- a/cpp/src/arrow/util/bit-util.h
+++ b/cpp/src/arrow/util/bit-util.h
@@ -310,8 +310,8 @@ static constexpr uint8_t kPrecedingBitmask[] = {0, 1, 3, 7, 15, 31, 63, 127};
 // the bitwise complement version of kPrecedingBitmask
 static constexpr uint8_t kTrailingBitmask[] = {255, 254, 252, 248, 240, 224, 192, 128};
 
-static inline bool GetBit(const uint8_t* bits, int64_t i) {
-  return (bits[i / 8] & kBitmask[i % 8]) != 0;
+static inline bool GetBit(const uint8_t* bits, uint64_t i) {
+  return (bits[i >> 3] >> (i & 0x07)) & 1;
 }
 
 static inline void ClearBit(uint8_t* bits, int64_t i) {