You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2018/01/10 22:10:48 UTC
[arrow] branch master updated: ARROW-764: [C++] Improves
performance of CopyBitmap and adds benchmarks
This is an automated email from the ASF dual-hosted git repository.
wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 9eae508 ARROW-764: [C++] Improves performance of CopyBitmap and adds benchmarks
9eae508 is described below
commit 9eae5089b5099548f67bbeca9927ca81df9d5de8
Author: Adam Seibert <se...@users.noreply.github.com>
AuthorDate: Wed Jan 10 17:10:39 2018 -0500
ARROW-764: [C++] Improves performance of CopyBitmap and adds benchmarks
I took a swing at improving the CopyBitmap performance (benchmarks below). I'm a C/C++ novice, so I thought I'd get some feedback before I went too much further.
**Starting Point**
```
Run on (4 X 2208 MHz CPU s)
12/13/17 21:15:18
Benchmark Time CPU Iterations
------------------------------------------------------------------------------
BM_CopyBitmap/97.6563k/0/min_time:1.000 4779 us 4758 us 289 20.0445MB/s
BM_CopyBitmap/976.563k/0/min_time:1.000 47740 us 47476 us 26 20.0875MB/s
BM_CopyBitmap/97.6563k/4/min_time:1.000 4858 us 4866 us 289 19.5991MB/s
BM_CopyBitmap/976.563k/4/min_time:1.000 48117 us 47953 us 29 19.8879MB/s
```
**Using stanford bithacks for SetBitTo**
```
Run on (4 X 2208 MHz CPU s)
12/13/17 21:22:05
Benchmark Time CPU Iterations
------------------------------------------------------------------------------
BM_CopyBitmap/97.6563k/0/min_time:1.000 1647 us 1649 us 815 57.8415MB/s
BM_CopyBitmap/976.563k/0/min_time:1.000 16368 us 16397 us 81 58.1629MB/s
BM_CopyBitmap/97.6563k/4/min_time:1.000 1599 us 1610 us 815 59.2186MB/s
BM_CopyBitmap/976.563k/4/min_time:1.000 16026 us 16011 us 81 59.5644MB/s
```
**memcpy + shifting**
*This solution provides varying performance depending on whether or not the bit offset is a multiple of 8*
```
Run on (4 X 2208 MHz CPU s)
12/13/17 21:23:44
Benchmark Time CPU Iterations
------------------------------------------------------------------------------
BM_CopyBitmap/97.6563k/0/min_time:1.000 5 us 5 us 280000 18.9651GB/s
BM_CopyBitmap/976.563k/0/min_time:1.000 62 us 61 us 22400 15.1721GB/s
BM_CopyBitmap/97.6563k/4/min_time:1.000 171 us 170 us 6892 560.872MB/s
BM_CopyBitmap/976.563k/4/min_time:1.000 1639 us 1639 us 896 581.782MB/s
```
Author: Adam Seibert <se...@users.noreply.github.com>
Closes #1422 from seibs/ARROW-764 and squashes the following commits:
c813c8e2 [Adam Seibert] ARROW-764: [C++] Improves performance of CopyBitmap and adds benchmarks
---
cpp/src/arrow/util/CMakeLists.txt | 6 ++--
cpp/src/arrow/util/bit-util-benchmark.cc | 58 ++++++++++++++++++++++++++++++++
cpp/src/arrow/util/bit-util-test.cc | 17 +++++-----
cpp/src/arrow/util/bit-util.cc | 32 ++++++++++++++++--
cpp/src/arrow/util/bit-util.h | 9 ++---
5 files changed, 104 insertions(+), 18 deletions(-)
diff --git a/cpp/src/arrow/util/CMakeLists.txt b/cpp/src/arrow/util/CMakeLists.txt
index a36dffb..8b61a3a 100644
--- a/cpp/src/arrow/util/CMakeLists.txt
+++ b/cpp/src/arrow/util/CMakeLists.txt
@@ -63,10 +63,10 @@ if (ARROW_BUILD_BENCHMARKS)
Shlwapi.lib
)
else()
- target_link_libraries(arrow_benchmark_main
+ target_link_libraries(arrow_benchmark_main
benchmark
pthread
- )
+ )
endif()
# TODO(wesm): Some benchmarks include gtest.h
@@ -80,4 +80,6 @@ ADD_ARROW_TEST(key-value-metadata-test)
ADD_ARROW_TEST(rle-encoding-test)
ADD_ARROW_TEST(stl-util-test)
+ADD_ARROW_BENCHMARK(bit-util-benchmark)
+
add_subdirectory(variant)
diff --git a/cpp/src/arrow/util/bit-util-benchmark.cc b/cpp/src/arrow/util/bit-util-benchmark.cc
new file mode 100644
index 0000000..8969dd8
--- /dev/null
+++ b/cpp/src/arrow/util/bit-util-benchmark.cc
@@ -0,0 +1,58 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "benchmark/benchmark.h"
+
+#include <vector>
+
+#include "arrow/buffer.h"
+#include "arrow/builder.h"
+#include "arrow/memory_pool.h"
+#include "arrow/test-util.h"
+#include "arrow/util/bit-util.h"
+
+namespace arrow {
+namespace BitUtil {
+
+static void BM_CopyBitmap(benchmark::State& state) { // NOLINT non-const reference
+ const int kBufferSize = state.range(0);
+
+ std::shared_ptr<Buffer> buffer;
+ ASSERT_OK(AllocateBuffer(default_memory_pool(), kBufferSize, &buffer));
+ memset(buffer->mutable_data(), 0, kBufferSize);
+ test::random_bytes(kBufferSize, 0, buffer->mutable_data());
+
+ const int num_bits = kBufferSize * 8;
+ const uint8_t* src = buffer->data();
+
+ std::shared_ptr<Buffer> copy;
+ while (state.KeepRunning()) {
+ ABORT_NOT_OK(CopyBitmap(default_memory_pool(), src, state.range(1), num_bits, ©));
+ }
+ state.SetBytesProcessed(state.iterations() * kBufferSize * sizeof(int8_t));
+}
+
+BENCHMARK(BM_CopyBitmap)
+ ->Args({100000, 0})
+ ->Args({1000000, 0})
+ ->Args({100000, 4})
+ ->Args({1000000, 4})
+ ->MinTime(1.0)
+ ->Unit(benchmark::kMicrosecond);
+
+} // namespace BitUtil
+} // namespace arrow
diff --git a/cpp/src/arrow/util/bit-util-test.cc b/cpp/src/arrow/util/bit-util-test.cc
index 92bdcb5..4c64dea 100644
--- a/cpp/src/arrow/util/bit-util-test.cc
+++ b/cpp/src/arrow/util/bit-util-test.cc
@@ -165,19 +165,20 @@ TEST(BitUtilTests, TestCopyBitmap) {
memset(buffer->mutable_data(), 0, kBufferSize);
test::random_bytes(kBufferSize, 0, buffer->mutable_data());
- const int num_bits = kBufferSize * 8;
-
const uint8_t* src = buffer->data();
+ std::vector<int64_t> lengths = {kBufferSize * 8 - 4, kBufferSize * 8};
std::vector<int64_t> offsets = {0, 12, 16, 32, 37, 63, 64, 128};
- for (int64_t offset : offsets) {
- const int64_t copy_length = num_bits - offset;
+ for (int64_t num_bits : lengths) {
+ for (int64_t offset : offsets) {
+ const int64_t copy_length = num_bits - offset;
- std::shared_ptr<Buffer> copy;
- ASSERT_OK(CopyBitmap(default_memory_pool(), src, offset, copy_length, ©));
+ std::shared_ptr<Buffer> copy;
+ ASSERT_OK(CopyBitmap(default_memory_pool(), src, offset, copy_length, ©));
- for (int64_t i = 0; i < copy_length; ++i) {
- ASSERT_EQ(BitUtil::GetBit(src, i + offset), BitUtil::GetBit(copy->data(), i));
+ for (int64_t i = 0; i < copy_length; ++i) {
+ ASSERT_EQ(BitUtil::GetBit(src, i + offset), BitUtil::GetBit(copy->data(), i));
+ }
}
}
}
diff --git a/cpp/src/arrow/util/bit-util.cc b/cpp/src/arrow/util/bit-util.cc
index 4dd91e9..c77f0d0 100644
--- a/cpp/src/arrow/util/bit-util.cc
+++ b/cpp/src/arrow/util/bit-util.cc
@@ -109,9 +109,37 @@ Status CopyBitmap(MemoryPool* pool, const uint8_t* data, int64_t offset, int64_t
std::shared_ptr<Buffer> buffer;
RETURN_NOT_OK(GetEmptyBitmap(pool, length, &buffer));
uint8_t* dest = buffer->mutable_data();
- for (int64_t i = 0; i < length; ++i) {
- BitUtil::SetBitTo(dest, i, BitUtil::GetBit(data, i + offset));
+
+ int64_t byte_offset = offset / 8;
+ int64_t bit_offset = offset % 8;
+ int64_t num_bytes = BitUtil::BytesForBits(length);
+ int64_t bits_to_zero = num_bytes * 8 - length;
+
+ if (bit_offset > 0) {
+ uint32_t carry_mask = BitUtil::kBitmask[bit_offset] - 1U;
+ uint32_t carry_shift = 8U - static_cast<uint32_t>(bit_offset);
+
+ uint32_t carry = 0U;
+ if (BitUtil::BytesForBits(length + bit_offset) > num_bytes) {
+ carry = (data[byte_offset + num_bytes] & carry_mask) << carry_shift;
+ }
+
+ int64_t i = num_bytes - 1;
+ while (i + 1 > 0) {
+ uint8_t cur_byte = data[byte_offset + i];
+ dest[i] = static_cast<uint8_t>((cur_byte >> bit_offset) | carry);
+ carry = (cur_byte & carry_mask) << carry_shift;
+ --i;
+ }
+ } else {
+ std::memcpy(dest, data + byte_offset, static_cast<size_t>(num_bytes));
+ }
+
+ for (int64_t i = length; i < length + bits_to_zero; ++i) {
+ // Both branches may copy extra bits - unsetting to match specification.
+ BitUtil::SetBitTo(dest, i, false);
}
+
*out = buffer;
return Status::OK();
}
diff --git a/cpp/src/arrow/util/bit-util.h b/cpp/src/arrow/util/bit-util.h
index cab3c9e..86c17d1 100644
--- a/cpp/src/arrow/util/bit-util.h
+++ b/cpp/src/arrow/util/bit-util.h
@@ -139,13 +139,10 @@ static inline void SetArrayBit(uint8_t* bits, int i, bool is_set) {
}
static inline void SetBitTo(uint8_t* bits, int64_t i, bool bit_is_set) {
- // TODO: speed up. See https://graphics.stanford.edu/~seander/bithacks.html
+ // https://graphics.stanford.edu/~seander/bithacks.html
// "Conditionally set or clear bits without branching"
- if (bit_is_set) {
- SetBit(bits, i);
- } else {
- ClearBit(bits, i);
- }
+ bits[i / 8] ^= static_cast<uint8_t>(-static_cast<uint8_t>(bit_is_set) ^ bits[i / 8]) &
+ kBitmask[i % 8];
}
// Returns the minimum number of bits needed to represent the value of 'x'
--
To stop receiving notification emails like this one, please contact
['"commits@arrow.apache.org" <co...@arrow.apache.org>'].