You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ap...@apache.org on 2020/05/11 16:19:19 UTC

[arrow] branch master updated: ARROW-8756: [C++] Fix Bitmap Words tests' failures on big-endian platforms

This is an automated email from the ASF dual-hosted git repository.

apitrou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new bf722a0  ARROW-8756: [C++] Fix Bitmap Words tests' failures on big-endian platforms
bf722a0 is described below

commit bf722a01eebb42e5d49450dd6695469bac99ffcd
Author: Kazuaki Ishizaki <is...@jp.ibm.com>
AuthorDate: Mon May 11 18:18:49 2020 +0200

    ARROW-8756: [C++] Fix Bitmap Words tests' failures on big-endian platforms
    
    This PR adds support of multiple-word operation on big-endian platforms. There are optimized code to concat multiple words into one word with bit shift operations. The current code assumes a little-endian platform.
    This code support bit-endian by adding conversions between little-endian and big-endian. This is because the shift operations assume the little-endian layout. It is easy to implement by adding conversions.
    
    With #7136 and this PR, the following failures in arrow-utility-test will be fixed.
    ```
    [  FAILED  ] Bitmap.VisitWordsAnd
    [  FAILED  ] Bitmap.ShiftingWordsOptimization
    ```
    
    Closes #7145 from kiszk/ARROW-8756
    
    Lead-authored-by: Kazuaki Ishizaki <is...@jp.ibm.com>
    Co-authored-by: Antoine Pitrou <an...@python.org>
    Signed-off-by: Antoine Pitrou <an...@python.org>
---
 cpp/src/arrow/util/bit_util.h       |  6 ++++--
 cpp/src/arrow/util/bit_util_test.cc | 16 ++++++++++------
 2 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/cpp/src/arrow/util/bit_util.h b/cpp/src/arrow/util/bit_util.h
index 6e294c6..e708c37 100644
--- a/cpp/src/arrow/util/bit_util.h
+++ b/cpp/src/arrow/util/bit_util.h
@@ -999,8 +999,10 @@ class ARROW_EXPORT Bitmap : public util::ToStringOstreamable<Bitmap>,
           if (offsets[i] == 0) {
             visited_words[i] = words[i][word_i];
           } else {
-            visited_words[i] = words[i][word_i] >> offsets[i];
-            visited_words[i] |= words[i][word_i + 1] << (kBitWidth - offsets[i]);
+            auto words0 = BitUtil::ToLittleEndian(words[i][word_i]);
+            auto words1 = BitUtil::ToLittleEndian(words[i][word_i + 1]);
+            visited_words[i] = BitUtil::FromLittleEndian(
+                (words0 >> offsets[i]) | (words1 << (kBitWidth - offsets[i])));
           }
         }
         visitor(visited_words);
diff --git a/cpp/src/arrow/util/bit_util_test.cc b/cpp/src/arrow/util/bit_util_test.cc
index 11eaf94..43b6615 100644
--- a/cpp/src/arrow/util/bit_util_test.cc
+++ b/cpp/src/arrow/util/bit_util_test.cc
@@ -1127,15 +1127,16 @@ TEST(Bitmap, ShiftingWordsOptimization) {
 
     for (int seed = 0; seed < 64; ++seed) {
       random_bytes(sizeof(word), seed, bytes);
+      uint64_t native_word = BitUtil::FromLittleEndian(word);
 
       // bits are accessible through simple bit shifting of the word
       for (size_t i = 0; i < kBitWidth; ++i) {
-        ASSERT_EQ(BitUtil::GetBit(bytes, i), bool((word >> i) & 1));
+        ASSERT_EQ(BitUtil::GetBit(bytes, i), bool((native_word >> i) & 1));
       }
 
       // bit offset can therefore be accommodated by shifting the word
       for (size_t offset = 0; offset < (kBitWidth * 3) / 4; ++offset) {
-        uint64_t shifted_word = word >> offset;
+        uint64_t shifted_word = arrow::BitUtil::ToLittleEndian(native_word >> offset);
         auto shifted_bytes = reinterpret_cast<uint8_t*>(&shifted_word);
         ASSERT_TRUE(
             internal::BitmapEquals(bytes, offset, shifted_bytes, 0, kBitWidth - offset));
@@ -1151,20 +1152,23 @@ TEST(Bitmap, ShiftingWordsOptimization) {
 
     for (int seed = 0; seed < 64; ++seed) {
       random_bytes(sizeof(words), seed, bytes);
+      uint64_t native_words0 = BitUtil::FromLittleEndian(words[0]);
+      uint64_t native_words1 = BitUtil::FromLittleEndian(words[1]);
 
       // bits are accessible through simple bit shifting of a word
       for (size_t i = 0; i < kBitWidth; ++i) {
-        ASSERT_EQ(BitUtil::GetBit(bytes, i), bool((words[0] >> i) & 1));
+        ASSERT_EQ(BitUtil::GetBit(bytes, i), bool((native_words0 >> i) & 1));
       }
       for (size_t i = 0; i < kBitWidth; ++i) {
-        ASSERT_EQ(BitUtil::GetBit(bytes, i + kBitWidth), bool((words[1] >> i) & 1));
+        ASSERT_EQ(BitUtil::GetBit(bytes, i + kBitWidth), bool((native_words1 >> i) & 1));
       }
 
       // bit offset can therefore be accommodated by shifting the word
       for (size_t offset = 1; offset < (kBitWidth * 3) / 4; offset += 3) {
         uint64_t shifted_words[2];
-        shifted_words[0] = words[0] >> offset | (words[1] << (kBitWidth - offset));
-        shifted_words[1] = words[1] >> offset;
+        shifted_words[0] = arrow::BitUtil::ToLittleEndian(
+            native_words0 >> offset | (native_words1 << (kBitWidth - offset)));
+        shifted_words[1] = arrow::BitUtil::ToLittleEndian(native_words1 >> offset);
         auto shifted_bytes = reinterpret_cast<uint8_t*>(shifted_words);
 
         // from offset to unshifted word boundary