You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2017/02/13 14:03:52 UTC
arrow git commit: ARROW-553: C++: Faster valid bitmap building
Repository: arrow
Updated Branches:
refs/heads/master 1f26040f5 -> ad0157547
ARROW-553: C++: Faster valid bitmap building
Author: Uwe L. Korn <uw...@xhochy.com>
Closes #338 from xhochy/ARROW-553 and squashes the following commits:
1c1ee3d [Uwe L. Korn] ARROW-553: C++: Faster valid bitmap building
Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/ad015754
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/ad015754
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/ad015754
Branch: refs/heads/master
Commit: ad0157547a4f5e6e51fa2f712c2ed9477489a20c
Parents: 1f26040
Author: Uwe L. Korn <uw...@xhochy.com>
Authored: Mon Feb 13 09:03:46 2017 -0500
Committer: Wes McKinney <we...@twosigma.com>
Committed: Mon Feb 13 09:03:46 2017 -0500
----------------------------------------------------------------------
cpp/src/arrow/builder.cc | 41 ++++++++++++++++++--
.../jemalloc/jemalloc-builder-benchmark.cc | 2 +-
2 files changed, 38 insertions(+), 5 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/arrow/blob/ad015754/cpp/src/arrow/builder.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/builder.cc b/cpp/src/arrow/builder.cc
index dddadee..f5c13f9 100644
--- a/cpp/src/arrow/builder.cc
+++ b/cpp/src/arrow/builder.cc
@@ -114,18 +114,51 @@ void ArrayBuilder::UnsafeAppendToBitmap(const uint8_t* valid_bytes, int32_t leng
UnsafeSetNotNull(length);
return;
}
+
+ int byte_offset = length_ / 8;
+ int bit_offset = length_ % 8;
+ uint8_t bitset = null_bitmap_data_[byte_offset];
+
for (int32_t i = 0; i < length; ++i) {
- // TODO(emkornfield) Optimize for large values of length?
- UnsafeAppendToBitmap(valid_bytes[i] > 0);
+ if (valid_bytes[i]) {
+ bitset |= (1 << bit_offset);
+ } else {
+ bitset &= ~(1 << bit_offset);
+ ++null_count_;
+ }
+
+ bit_offset++;
+ if (bit_offset == 8) {
+ bit_offset = 0;
+ null_bitmap_data_[byte_offset] = bitset;
+ byte_offset++;
+ // TODO: Except for the last byte, this shouldn't be needed
+ bitset = null_bitmap_data_[byte_offset];
+ }
}
+ if (bit_offset != 0) { null_bitmap_data_[byte_offset] = bitset; }
+ length_ += length;
}
void ArrayBuilder::UnsafeSetNotNull(int32_t length) {
const int32_t new_length = length + length_;
- // TODO(emkornfield) Optimize for large values of length?
- for (int32_t i = length_; i < new_length; ++i) {
+
+ // Fill up the bytes until we have a byte alignment
+ int32_t pad_to_byte = 8 - (length_ % 8);
+ if (pad_to_byte == 8) { pad_to_byte = 0; }
+ for (int32_t i = 0; i < pad_to_byte; ++i) {
+ BitUtil::SetBit(null_bitmap_data_, i);
+ }
+
+ // Fast bitsetting
+ int32_t fast_length = (length - pad_to_byte) / 8;
+ memset(null_bitmap_data_ + ((length_ + pad_to_byte) / 8), 255, fast_length);
+
+ // Trailing bytes
+ for (int32_t i = length_ + pad_to_byte + (fast_length * 8); i < new_length; ++i) {
BitUtil::SetBit(null_bitmap_data_, i);
}
+
length_ = new_length;
}
http://git-wip-us.apache.org/repos/asf/arrow/blob/ad015754/cpp/src/arrow/jemalloc/jemalloc-builder-benchmark.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/jemalloc/jemalloc-builder-benchmark.cc b/cpp/src/arrow/jemalloc/jemalloc-builder-benchmark.cc
index 58dbaa3..d69c304 100644
--- a/cpp/src/arrow/jemalloc/jemalloc-builder-benchmark.cc
+++ b/cpp/src/arrow/jemalloc/jemalloc-builder-benchmark.cc
@@ -42,6 +42,6 @@ static void BM_BuildPrimitiveArrayNoNulls(
state.iterations() * data.size() * sizeof(int64_t) * kFinalSize);
}
-BENCHMARK(BM_BuildPrimitiveArrayNoNulls)->Repetitions(3)->Unit(benchmark::kMillisecond);
+BENCHMARK(BM_BuildPrimitiveArrayNoNulls)->Repetitions(5)->Unit(benchmark::kMillisecond);
} // namespace arrow