You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ap...@apache.org on 2020/12/02 07:37:46 UTC
[arrow] branch master updated: ARROW-10697: [C++] Add notes about
bitmap readers
This is an automated email from the ASF dual-hosted git repository.
apitrou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new c080548 ARROW-10697: [C++] Add notes about bitmap readers
c080548 is described below
commit c08054867d08231a160288f14cd69216b46b221c
Author: Antoine Pitrou <an...@python.org>
AuthorDate: Wed Dec 2 08:36:59 2020 +0100
ARROW-10697: [C++] Add notes about bitmap readers
Add some notes obtained after benchmarking various solutions.
Add a benchmark of scalar boolean kernels.
Closes #8761 from pitrou/ARROW-10697-bitmap-readers
Authored-by: Antoine Pitrou <an...@python.org>
Signed-off-by: Antoine Pitrou <an...@python.org>
---
cpp/src/arrow/compare.cc | 6 ++-
cpp/src/arrow/compute/kernels/CMakeLists.txt | 1 +
.../compute/kernels/scalar_boolean_benchmark.cc | 59 ++++++++++++++++++++++
cpp/src/arrow/compute/kernels/vector_selection.cc | 1 -
cpp/src/arrow/compute/kernels/vector_sort.cc | 19 ++-----
cpp/src/arrow/util/bit_util_benchmark.cc | 28 +++++++++-
cpp/src/arrow/util/bitmap.h | 5 ++
cpp/src/arrow/util/bitmap_ops.cc | 4 ++
8 files changed, 104 insertions(+), 19 deletions(-)
diff --git a/cpp/src/arrow/compare.cc b/cpp/src/arrow/compare.cc
index b696c76..d9cbe21 100644
--- a/cpp/src/arrow/compare.cc
+++ b/cpp/src/arrow/compare.cc
@@ -136,7 +136,7 @@ class RangeDataEqualsImpl {
}
}
return true;
- } else {
+ } else if (length <= 1024) {
BitmapUInt64Reader left_reader(left_bits, left_start_idx_ + left_.offset + i,
length);
BitmapUInt64Reader right_reader(right_bits, right_start_idx_ + right_.offset + i,
@@ -147,6 +147,10 @@ class RangeDataEqualsImpl {
}
}
DCHECK_EQ(right_reader.position(), length);
+ } else {
+ // BitmapEquals is the fastest method on large runs
+ return BitmapEquals(left_bits, left_start_idx_ + left_.offset + i, right_bits,
+ right_start_idx_ + right_.offset + i, length);
}
return true;
};
diff --git a/cpp/src/arrow/compute/kernels/CMakeLists.txt b/cpp/src/arrow/compute/kernels/CMakeLists.txt
index 0bca453..577b250 100644
--- a/cpp/src/arrow/compute/kernels/CMakeLists.txt
+++ b/cpp/src/arrow/compute/kernels/CMakeLists.txt
@@ -32,6 +32,7 @@ add_arrow_compute_test(scalar_test
test_util.cc)
add_arrow_benchmark(scalar_arithmetic_benchmark PREFIX "arrow-compute")
+add_arrow_benchmark(scalar_boolean_benchmark PREFIX "arrow-compute")
add_arrow_benchmark(scalar_cast_benchmark PREFIX "arrow-compute")
add_arrow_benchmark(scalar_compare_benchmark PREFIX "arrow-compute")
add_arrow_benchmark(scalar_set_lookup_benchmark PREFIX "arrow-compute")
diff --git a/cpp/src/arrow/compute/kernels/scalar_boolean_benchmark.cc b/cpp/src/arrow/compute/kernels/scalar_boolean_benchmark.cc
new file mode 100644
index 0000000..969b91a
--- /dev/null
+++ b/cpp/src/arrow/compute/kernels/scalar_boolean_benchmark.cc
@@ -0,0 +1,59 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "benchmark/benchmark.h"
+
+#include <vector>
+
+#include "arrow/compute/api_scalar.h"
+#include "arrow/compute/kernels/test_util.h"
+#include "arrow/testing/gtest_util.h"
+#include "arrow/testing/random.h"
+#include "arrow/util/benchmark_util.h"
+
+namespace arrow {
+namespace compute {
+
+constexpr auto kSeed = 0x94378165;
+
+using BooleanBinaryOp = Result<Datum>(const Datum&, const Datum&, ExecContext*);
+
+template <BooleanBinaryOp& Op>
+static void ArrayArrayKernel(benchmark::State& state) {
+ RegressionArgs args(state);
+
+ const int64_t array_size = args.size * 8;
+
+ auto rand = random::RandomArrayGenerator(kSeed);
+ auto lhs = rand.Boolean(array_size, /*true_probability=*/0.5, args.null_proportion);
+ auto rhs = rand.Boolean(array_size, /*true_probability=*/0.5, args.null_proportion);
+
+ for (auto _ : state) {
+ ABORT_NOT_OK(Op(lhs, rhs, nullptr).status());
+ }
+ state.SetItemsProcessed(state.iterations() * array_size);
+}
+
+void SetArgs(benchmark::internal::Benchmark* bench) {
+ BenchmarkSetArgsWithSizes(bench, {kL1Size, kL2Size});
+}
+
+BENCHMARK_TEMPLATE(ArrayArrayKernel, And)->Apply(SetArgs);
+BENCHMARK_TEMPLATE(ArrayArrayKernel, KleeneAnd)->Apply(SetArgs);
+
+} // namespace compute
+} // namespace arrow
diff --git a/cpp/src/arrow/compute/kernels/vector_selection.cc b/cpp/src/arrow/compute/kernels/vector_selection.cc
index 1062b53..ccca0e5 100644
--- a/cpp/src/arrow/compute/kernels/vector_selection.cc
+++ b/cpp/src/arrow/compute/kernels/vector_selection.cc
@@ -46,7 +46,6 @@ namespace arrow {
using internal::BinaryBitBlockCounter;
using internal::BitBlockCount;
using internal::BitBlockCounter;
-using internal::BitmapReader;
using internal::CheckIndexBounds;
using internal::CopyBitmap;
using internal::CountSetBits;
diff --git a/cpp/src/arrow/compute/kernels/vector_sort.cc b/cpp/src/arrow/compute/kernels/vector_sort.cc
index 9f7788a..c6ff5cd 100644
--- a/cpp/src/arrow/compute/kernels/vector_sort.cc
+++ b/cpp/src/arrow/compute/kernels/vector_sort.cc
@@ -26,6 +26,7 @@
#include "arrow/compute/api_vector.h"
#include "arrow/compute/kernels/common.h"
#include "arrow/table.h"
+#include "arrow/util/bit_block_counter.h"
#include "arrow/util/checked_cast.h"
#include "arrow/util/optional.h"
@@ -238,21 +239,9 @@ inline void VisitRawValuesInline(const ArrayType& values,
VisitorNotNull&& visitor_not_null,
VisitorNull&& visitor_null) {
const auto data = values.raw_values();
- if (values.null_count() > 0) {
- BitmapReader reader(values.null_bitmap_data(), values.offset(), values.length());
- for (int64_t i = 0; i < values.length(); ++i) {
- if (reader.IsSet()) {
- visitor_not_null(data[i]);
- } else {
- visitor_null();
- }
- reader.Next();
- }
- } else {
- for (int64_t i = 0; i < values.length(); ++i) {
- visitor_not_null(data[i]);
- }
- }
+ VisitBitBlocksVoid(
+ values.null_bitmap(), values.offset(), values.length(),
+ [&](int64_t i) { visitor_not_null(data[i]); }, [&]() { visitor_null(); });
}
template <typename ArrowType>
diff --git a/cpp/src/arrow/util/bit_util_benchmark.cc b/cpp/src/arrow/util/bit_util_benchmark.cc
index a9c472b..434c6f6 100644
--- a/cpp/src/arrow/util/bit_util_benchmark.cc
+++ b/cpp/src/arrow/util/bit_util_benchmark.cc
@@ -45,6 +45,8 @@
namespace arrow {
namespace BitUtil {
+constexpr int64_t kBufferSize = 1024 * 8;
+
#ifdef ARROW_WITH_BENCHMARKS_REFERENCE
// A naive bitmap reader implementation, meant as a baseline against
@@ -318,6 +320,26 @@ static void BitmapReader(benchmark::State& state) {
BenchmarkBitmapReader<internal::BitmapReader>(state, state.range(0));
}
+static void BitmapUInt64Reader(benchmark::State& state) {
+ const int64_t nbytes = state.range(0);
+ std::shared_ptr<Buffer> buffer = CreateRandomBuffer(nbytes);
+
+ const int64_t num_bits = nbytes * 8;
+ const uint8_t* bitmap = buffer->data();
+
+ for (auto _ : state) {
+ {
+ internal::BitmapUInt64Reader reader(bitmap, 0, num_bits);
+ uint64_t total = 0;
+ for (int64_t i = 0; i < num_bits; i += 64) {
+ total += reader.NextWord();
+ }
+ benchmark::DoNotOptimize(total);
+ }
+ }
+ state.SetBytesProcessed(state.iterations() * nbytes);
+}
+
static void BitRunReader(benchmark::State& state) {
BenchmarkBitRunReader<internal::BitRunReader>(state, state.range(0));
}
@@ -390,8 +412,6 @@ static void SetBitsTo(benchmark::State& state) {
state.SetBytesProcessed(state.iterations() * nbytes);
}
-constexpr int64_t kBufferSize = 1024 * 8;
-
template <int64_t OffsetSrc, int64_t OffsetDest = 0>
static void CopyBitmap(benchmark::State& state) { // NOLINT non-const reference
const int64_t buffer_size = state.range(0);
@@ -458,6 +478,8 @@ BENCHMARK(ReferenceNaiveBitmapReader)->Arg(kBufferSize);
#endif
BENCHMARK(BitmapReader)->Arg(kBufferSize);
+BENCHMARK(BitmapUInt64Reader)->Arg(kBufferSize);
+
BENCHMARK(BitRunReader)
->Arg(-1)
->Arg(0)
@@ -476,6 +498,7 @@ BENCHMARK(BitRunReaderLinear)
->Arg(60)
->Arg(75)
->Arg(99);
+
BENCHMARK(VisitBits)->Arg(kBufferSize);
BENCHMARK(VisitBitsUnrolled)->Arg(kBufferSize);
BENCHMARK(SetBitsTo)->Arg(2)->Arg(1 << 4)->Arg(1 << 10)->Arg(1 << 17);
@@ -490,6 +513,7 @@ BENCHMARK(ReferenceNaiveBitmapWriter)->Arg(kBufferSize);
BENCHMARK(BitmapWriter)->Arg(kBufferSize);
BENCHMARK(FirstTimeBitmapWriter)->Arg(kBufferSize);
+
BENCHMARK(GenerateBits)->Arg(kBufferSize);
BENCHMARK(GenerateBitsUnrolled)->Arg(kBufferSize);
diff --git a/cpp/src/arrow/util/bitmap.h b/cpp/src/arrow/util/bitmap.h
index 43d59f0..c26d75f 100644
--- a/cpp/src/arrow/util/bitmap.h
+++ b/cpp/src/arrow/util/bitmap.h
@@ -116,6 +116,11 @@ class ARROW_EXPORT Bitmap : public util::ToStringOstreamable<Bitmap>,
/// returned.
///
/// TODO(bkietz) allow for early termination
+ // NOTE: this function is efficient on 3+ sufficiently large bitmaps.
+ // It also has a large prolog / epilog overhead and should be used
+ // carefully in other cases.
+ // For 2 bitmaps or less, and/or smaller bitmaps, see also VisitTwoBitBlocksVoid
+ // and BitmapUInt64Reader.
template <size_t N, typename Visitor,
typename Word = typename std::decay<
internal::call_traits::argument_type<0, Visitor&&>>::type::value_type>
diff --git a/cpp/src/arrow/util/bitmap_ops.cc b/cpp/src/arrow/util/bitmap_ops.cc
index acc5bd9..9f1c636 100644
--- a/cpp/src/arrow/util/bitmap_ops.cc
+++ b/cpp/src/arrow/util/bitmap_ops.cc
@@ -86,6 +86,10 @@ int64_t CountSetBits(const uint8_t* data, int64_t bit_offset, int64_t length) {
namespace {
+// BitmapWordReader here is faster than BitmapUInt64Reader (in bitmap_reader.h)
+// on sufficiently large inputs. However, it has a larger prolog / epilog overhead
+// and should probably not be used for small bitmaps.
+
template <typename Word>
class BitmapWordReader {
public: