You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ap...@apache.org on 2020/12/02 07:37:46 UTC

[arrow] branch master updated: ARROW-10697: [C++] Add notes about bitmap readers

This is an automated email from the ASF dual-hosted git repository.

apitrou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new c080548  ARROW-10697: [C++] Add notes about bitmap readers
c080548 is described below

commit c08054867d08231a160288f14cd69216b46b221c
Author: Antoine Pitrou <an...@python.org>
AuthorDate: Wed Dec 2 08:36:59 2020 +0100

    ARROW-10697: [C++] Add notes about bitmap readers
    
    Add some notes obtained after benchmarking various solutions.
    
    Add a benchmark of scalar boolean kernels.
    
    Closes #8761 from pitrou/ARROW-10697-bitmap-readers
    
    Authored-by: Antoine Pitrou <an...@python.org>
    Signed-off-by: Antoine Pitrou <an...@python.org>
---
 cpp/src/arrow/compare.cc                           |  6 ++-
 cpp/src/arrow/compute/kernels/CMakeLists.txt       |  1 +
 .../compute/kernels/scalar_boolean_benchmark.cc    | 59 ++++++++++++++++++++++
 cpp/src/arrow/compute/kernels/vector_selection.cc  |  1 -
 cpp/src/arrow/compute/kernels/vector_sort.cc       | 19 ++-----
 cpp/src/arrow/util/bit_util_benchmark.cc           | 28 +++++++++-
 cpp/src/arrow/util/bitmap.h                        |  5 ++
 cpp/src/arrow/util/bitmap_ops.cc                   |  4 ++
 8 files changed, 104 insertions(+), 19 deletions(-)

diff --git a/cpp/src/arrow/compare.cc b/cpp/src/arrow/compare.cc
index b696c76..d9cbe21 100644
--- a/cpp/src/arrow/compare.cc
+++ b/cpp/src/arrow/compare.cc
@@ -136,7 +136,7 @@ class RangeDataEqualsImpl {
           }
         }
         return true;
-      } else {
+      } else if (length <= 1024) {
         BitmapUInt64Reader left_reader(left_bits, left_start_idx_ + left_.offset + i,
                                        length);
         BitmapUInt64Reader right_reader(right_bits, right_start_idx_ + right_.offset + i,
@@ -147,6 +147,10 @@ class RangeDataEqualsImpl {
           }
         }
         DCHECK_EQ(right_reader.position(), length);
+      } else {
+        // BitmapEquals is the fastest method on large runs
+        return BitmapEquals(left_bits, left_start_idx_ + left_.offset + i, right_bits,
+                            right_start_idx_ + right_.offset + i, length);
       }
       return true;
     };
diff --git a/cpp/src/arrow/compute/kernels/CMakeLists.txt b/cpp/src/arrow/compute/kernels/CMakeLists.txt
index 0bca453..577b250 100644
--- a/cpp/src/arrow/compute/kernels/CMakeLists.txt
+++ b/cpp/src/arrow/compute/kernels/CMakeLists.txt
@@ -32,6 +32,7 @@ add_arrow_compute_test(scalar_test
                        test_util.cc)
 
 add_arrow_benchmark(scalar_arithmetic_benchmark PREFIX "arrow-compute")
+add_arrow_benchmark(scalar_boolean_benchmark PREFIX "arrow-compute")
 add_arrow_benchmark(scalar_cast_benchmark PREFIX "arrow-compute")
 add_arrow_benchmark(scalar_compare_benchmark PREFIX "arrow-compute")
 add_arrow_benchmark(scalar_set_lookup_benchmark PREFIX "arrow-compute")
diff --git a/cpp/src/arrow/compute/kernels/scalar_boolean_benchmark.cc b/cpp/src/arrow/compute/kernels/scalar_boolean_benchmark.cc
new file mode 100644
index 0000000..969b91a
--- /dev/null
+++ b/cpp/src/arrow/compute/kernels/scalar_boolean_benchmark.cc
@@ -0,0 +1,59 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "benchmark/benchmark.h"
+
+#include <vector>
+
+#include "arrow/compute/api_scalar.h"
+#include "arrow/compute/kernels/test_util.h"
+#include "arrow/testing/gtest_util.h"
+#include "arrow/testing/random.h"
+#include "arrow/util/benchmark_util.h"
+
+namespace arrow {
+namespace compute {
+
+constexpr auto kSeed = 0x94378165;
+
+using BooleanBinaryOp = Result<Datum>(const Datum&, const Datum&, ExecContext*);
+
+template <BooleanBinaryOp& Op>
+static void ArrayArrayKernel(benchmark::State& state) {
+  RegressionArgs args(state);
+
+  const int64_t array_size = args.size * 8;
+
+  auto rand = random::RandomArrayGenerator(kSeed);
+  auto lhs = rand.Boolean(array_size, /*true_probability=*/0.5, args.null_proportion);
+  auto rhs = rand.Boolean(array_size, /*true_probability=*/0.5, args.null_proportion);
+
+  for (auto _ : state) {
+    ABORT_NOT_OK(Op(lhs, rhs, nullptr).status());
+  }
+  state.SetItemsProcessed(state.iterations() * array_size);
+}
+
+void SetArgs(benchmark::internal::Benchmark* bench) {
+  BenchmarkSetArgsWithSizes(bench, {kL1Size, kL2Size});
+}
+
+BENCHMARK_TEMPLATE(ArrayArrayKernel, And)->Apply(SetArgs);
+BENCHMARK_TEMPLATE(ArrayArrayKernel, KleeneAnd)->Apply(SetArgs);
+
+}  // namespace compute
+}  // namespace arrow
diff --git a/cpp/src/arrow/compute/kernels/vector_selection.cc b/cpp/src/arrow/compute/kernels/vector_selection.cc
index 1062b53..ccca0e5 100644
--- a/cpp/src/arrow/compute/kernels/vector_selection.cc
+++ b/cpp/src/arrow/compute/kernels/vector_selection.cc
@@ -46,7 +46,6 @@ namespace arrow {
 using internal::BinaryBitBlockCounter;
 using internal::BitBlockCount;
 using internal::BitBlockCounter;
-using internal::BitmapReader;
 using internal::CheckIndexBounds;
 using internal::CopyBitmap;
 using internal::CountSetBits;
diff --git a/cpp/src/arrow/compute/kernels/vector_sort.cc b/cpp/src/arrow/compute/kernels/vector_sort.cc
index 9f7788a..c6ff5cd 100644
--- a/cpp/src/arrow/compute/kernels/vector_sort.cc
+++ b/cpp/src/arrow/compute/kernels/vector_sort.cc
@@ -26,6 +26,7 @@
 #include "arrow/compute/api_vector.h"
 #include "arrow/compute/kernels/common.h"
 #include "arrow/table.h"
+#include "arrow/util/bit_block_counter.h"
 #include "arrow/util/checked_cast.h"
 #include "arrow/util/optional.h"
 
@@ -238,21 +239,9 @@ inline void VisitRawValuesInline(const ArrayType& values,
                                  VisitorNotNull&& visitor_not_null,
                                  VisitorNull&& visitor_null) {
   const auto data = values.raw_values();
-  if (values.null_count() > 0) {
-    BitmapReader reader(values.null_bitmap_data(), values.offset(), values.length());
-    for (int64_t i = 0; i < values.length(); ++i) {
-      if (reader.IsSet()) {
-        visitor_not_null(data[i]);
-      } else {
-        visitor_null();
-      }
-      reader.Next();
-    }
-  } else {
-    for (int64_t i = 0; i < values.length(); ++i) {
-      visitor_not_null(data[i]);
-    }
-  }
+  VisitBitBlocksVoid(
+      values.null_bitmap(), values.offset(), values.length(),
+      [&](int64_t i) { visitor_not_null(data[i]); }, [&]() { visitor_null(); });
 }
 
 template <typename ArrowType>
diff --git a/cpp/src/arrow/util/bit_util_benchmark.cc b/cpp/src/arrow/util/bit_util_benchmark.cc
index a9c472b..434c6f6 100644
--- a/cpp/src/arrow/util/bit_util_benchmark.cc
+++ b/cpp/src/arrow/util/bit_util_benchmark.cc
@@ -45,6 +45,8 @@
 namespace arrow {
 namespace BitUtil {
 
+constexpr int64_t kBufferSize = 1024 * 8;
+
 #ifdef ARROW_WITH_BENCHMARKS_REFERENCE
 
 // A naive bitmap reader implementation, meant as a baseline against
@@ -318,6 +320,26 @@ static void BitmapReader(benchmark::State& state) {
   BenchmarkBitmapReader<internal::BitmapReader>(state, state.range(0));
 }
 
+static void BitmapUInt64Reader(benchmark::State& state) {
+  const int64_t nbytes = state.range(0);
+  std::shared_ptr<Buffer> buffer = CreateRandomBuffer(nbytes);
+
+  const int64_t num_bits = nbytes * 8;
+  const uint8_t* bitmap = buffer->data();
+
+  for (auto _ : state) {
+    {
+      internal::BitmapUInt64Reader reader(bitmap, 0, num_bits);
+      uint64_t total = 0;
+      for (int64_t i = 0; i < num_bits; i += 64) {
+        total += reader.NextWord();
+      }
+      benchmark::DoNotOptimize(total);
+    }
+  }
+  state.SetBytesProcessed(state.iterations() * nbytes);
+}
+
 static void BitRunReader(benchmark::State& state) {
   BenchmarkBitRunReader<internal::BitRunReader>(state, state.range(0));
 }
@@ -390,8 +412,6 @@ static void SetBitsTo(benchmark::State& state) {
   state.SetBytesProcessed(state.iterations() * nbytes);
 }
 
-constexpr int64_t kBufferSize = 1024 * 8;
-
 template <int64_t OffsetSrc, int64_t OffsetDest = 0>
 static void CopyBitmap(benchmark::State& state) {  // NOLINT non-const reference
   const int64_t buffer_size = state.range(0);
@@ -458,6 +478,8 @@ BENCHMARK(ReferenceNaiveBitmapReader)->Arg(kBufferSize);
 #endif
 
 BENCHMARK(BitmapReader)->Arg(kBufferSize);
+BENCHMARK(BitmapUInt64Reader)->Arg(kBufferSize);
+
 BENCHMARK(BitRunReader)
     ->Arg(-1)
     ->Arg(0)
@@ -476,6 +498,7 @@ BENCHMARK(BitRunReaderLinear)
     ->Arg(60)
     ->Arg(75)
     ->Arg(99);
+
 BENCHMARK(VisitBits)->Arg(kBufferSize);
 BENCHMARK(VisitBitsUnrolled)->Arg(kBufferSize);
 BENCHMARK(SetBitsTo)->Arg(2)->Arg(1 << 4)->Arg(1 << 10)->Arg(1 << 17);
@@ -490,6 +513,7 @@ BENCHMARK(ReferenceNaiveBitmapWriter)->Arg(kBufferSize);
 
 BENCHMARK(BitmapWriter)->Arg(kBufferSize);
 BENCHMARK(FirstTimeBitmapWriter)->Arg(kBufferSize);
+
 BENCHMARK(GenerateBits)->Arg(kBufferSize);
 BENCHMARK(GenerateBitsUnrolled)->Arg(kBufferSize);
 
diff --git a/cpp/src/arrow/util/bitmap.h b/cpp/src/arrow/util/bitmap.h
index 43d59f0..c26d75f 100644
--- a/cpp/src/arrow/util/bitmap.h
+++ b/cpp/src/arrow/util/bitmap.h
@@ -116,6 +116,11 @@ class ARROW_EXPORT Bitmap : public util::ToStringOstreamable<Bitmap>,
   /// returned.
   ///
   /// TODO(bkietz) allow for early termination
+  // NOTE: this function is efficient on 3+ sufficiently large bitmaps.
+  // It also has a large prolog / epilog overhead and should be used
+  // carefully in other cases.
+  // For 2 bitmaps or less, and/or smaller bitmaps, see also VisitTwoBitBlocksVoid
+  // and BitmapUInt64Reader.
   template <size_t N, typename Visitor,
             typename Word = typename std::decay<
                 internal::call_traits::argument_type<0, Visitor&&>>::type::value_type>
diff --git a/cpp/src/arrow/util/bitmap_ops.cc b/cpp/src/arrow/util/bitmap_ops.cc
index acc5bd9..9f1c636 100644
--- a/cpp/src/arrow/util/bitmap_ops.cc
+++ b/cpp/src/arrow/util/bitmap_ops.cc
@@ -86,6 +86,10 @@ int64_t CountSetBits(const uint8_t* data, int64_t bit_offset, int64_t length) {
 
 namespace {
 
+// BitmapWordReader here is faster than BitmapUInt64Reader (in bitmap_reader.h)
+// on sufficiently large inputs.  However, it has a larger prolog / epilog overhead
+// and should probably not be used for small bitmaps.
+
 template <typename Word>
 class BitmapWordReader {
  public: