You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ap...@apache.org on 2020/05/11 16:19:19 UTC
[arrow] branch master updated: ARROW-8756: [C++] Fix Bitmap Words
tests' failures on big-endian platforms
This is an automated email from the ASF dual-hosted git repository.
apitrou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new bf722a0 ARROW-8756: [C++] Fix Bitmap Words tests' failures on big-endian platforms
bf722a0 is described below
commit bf722a01eebb42e5d49450dd6695469bac99ffcd
Author: Kazuaki Ishizaki <is...@jp.ibm.com>
AuthorDate: Mon May 11 18:18:49 2020 +0200
ARROW-8756: [C++] Fix Bitmap Words tests' failures on big-endian platforms
This PR adds support of multiple-word operation on big-endian platforms. There are optimized code to concat multiple words into one word with bit shift operations. The current code assumes a little-endian platform.
This code support bit-endian by adding conversions between little-endian and big-endian. This is because the shift operations assume the little-endian layout. It is easy to implement by adding conversions.
With #7136 and this PR, the following failures in arrow-utility-test will be fixed.
```
[ FAILED ] Bitmap.VisitWordsAnd
[ FAILED ] Bitmap.ShiftingWordsOptimization
```
Closes #7145 from kiszk/ARROW-8756
Lead-authored-by: Kazuaki Ishizaki <is...@jp.ibm.com>
Co-authored-by: Antoine Pitrou <an...@python.org>
Signed-off-by: Antoine Pitrou <an...@python.org>
---
cpp/src/arrow/util/bit_util.h | 6 ++++--
cpp/src/arrow/util/bit_util_test.cc | 16 ++++++++++------
2 files changed, 14 insertions(+), 8 deletions(-)
diff --git a/cpp/src/arrow/util/bit_util.h b/cpp/src/arrow/util/bit_util.h
index 6e294c6..e708c37 100644
--- a/cpp/src/arrow/util/bit_util.h
+++ b/cpp/src/arrow/util/bit_util.h
@@ -999,8 +999,10 @@ class ARROW_EXPORT Bitmap : public util::ToStringOstreamable<Bitmap>,
if (offsets[i] == 0) {
visited_words[i] = words[i][word_i];
} else {
- visited_words[i] = words[i][word_i] >> offsets[i];
- visited_words[i] |= words[i][word_i + 1] << (kBitWidth - offsets[i]);
+ auto words0 = BitUtil::ToLittleEndian(words[i][word_i]);
+ auto words1 = BitUtil::ToLittleEndian(words[i][word_i + 1]);
+ visited_words[i] = BitUtil::FromLittleEndian(
+ (words0 >> offsets[i]) | (words1 << (kBitWidth - offsets[i])));
}
}
visitor(visited_words);
diff --git a/cpp/src/arrow/util/bit_util_test.cc b/cpp/src/arrow/util/bit_util_test.cc
index 11eaf94..43b6615 100644
--- a/cpp/src/arrow/util/bit_util_test.cc
+++ b/cpp/src/arrow/util/bit_util_test.cc
@@ -1127,15 +1127,16 @@ TEST(Bitmap, ShiftingWordsOptimization) {
for (int seed = 0; seed < 64; ++seed) {
random_bytes(sizeof(word), seed, bytes);
+ uint64_t native_word = BitUtil::FromLittleEndian(word);
// bits are accessible through simple bit shifting of the word
for (size_t i = 0; i < kBitWidth; ++i) {
- ASSERT_EQ(BitUtil::GetBit(bytes, i), bool((word >> i) & 1));
+ ASSERT_EQ(BitUtil::GetBit(bytes, i), bool((native_word >> i) & 1));
}
// bit offset can therefore be accommodated by shifting the word
for (size_t offset = 0; offset < (kBitWidth * 3) / 4; ++offset) {
- uint64_t shifted_word = word >> offset;
+ uint64_t shifted_word = arrow::BitUtil::ToLittleEndian(native_word >> offset);
auto shifted_bytes = reinterpret_cast<uint8_t*>(&shifted_word);
ASSERT_TRUE(
internal::BitmapEquals(bytes, offset, shifted_bytes, 0, kBitWidth - offset));
@@ -1151,20 +1152,23 @@ TEST(Bitmap, ShiftingWordsOptimization) {
for (int seed = 0; seed < 64; ++seed) {
random_bytes(sizeof(words), seed, bytes);
+ uint64_t native_words0 = BitUtil::FromLittleEndian(words[0]);
+ uint64_t native_words1 = BitUtil::FromLittleEndian(words[1]);
// bits are accessible through simple bit shifting of a word
for (size_t i = 0; i < kBitWidth; ++i) {
- ASSERT_EQ(BitUtil::GetBit(bytes, i), bool((words[0] >> i) & 1));
+ ASSERT_EQ(BitUtil::GetBit(bytes, i), bool((native_words0 >> i) & 1));
}
for (size_t i = 0; i < kBitWidth; ++i) {
- ASSERT_EQ(BitUtil::GetBit(bytes, i + kBitWidth), bool((words[1] >> i) & 1));
+ ASSERT_EQ(BitUtil::GetBit(bytes, i + kBitWidth), bool((native_words1 >> i) & 1));
}
// bit offset can therefore be accommodated by shifting the word
for (size_t offset = 1; offset < (kBitWidth * 3) / 4; offset += 3) {
uint64_t shifted_words[2];
- shifted_words[0] = words[0] >> offset | (words[1] << (kBitWidth - offset));
- shifted_words[1] = words[1] >> offset;
+ shifted_words[0] = arrow::BitUtil::ToLittleEndian(
+ native_words0 >> offset | (native_words1 << (kBitWidth - offset)));
+ shifted_words[1] = arrow::BitUtil::ToLittleEndian(native_words1 >> offset);
auto shifted_bytes = reinterpret_cast<uint8_t*>(shifted_words);
// from offset to unshifted word boundary