You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2020/06/09 16:36:23 UTC

[GitHub] [arrow] pitrou commented on a change in pull request #7373: ARROW-8974: [C++] Simplify TransferBitmap

pitrou commented on a change in pull request #7373:
URL: https://github.com/apache/arrow/pull/7373#discussion_r436733347



##########
File path: cpp/src/arrow/util/bitmap_ops.cc
##########
@@ -298,52 +297,52 @@ void TransferBitmap(const uint8_t* data, int64_t offset, int64_t length,
     auto nwords = reader.words();
     while (nwords--) {
       auto word = reader.NextWord();
-      writer.PutNextWord(invert_bits ? ~word : word);
+      writer.PutNextWord(mode == TransferMode::Invert ? ~word : word);
     }
     auto nbytes = reader.trailing_bytes();
     while (nbytes--) {
       int valid_bits;
       auto byte = reader.NextTrailingByte(valid_bits);
-      writer.PutNextTrailingByte(invert_bits ? ~byte : byte, valid_bits);
+      writer.PutNextTrailingByte(mode == TransferMode::Invert ? ~byte : byte, valid_bits);
     }
-  } else {
-    // Shift dest by its byte offset
-    dest += dest_byte_offset;
+  } else if (length) {
+    int64_t num_bytes = BitUtil::BytesForBits(length);
+
+    // Shift by its byte offset
+    data += offset / 8;
+    dest += dest_offset / 8;
 
     // Take care of the trailing bits in the last byte
+    // E.g., if trailing_bits = 5, last byte should be
+    // - low  3 bits: new bits from last byte of data buffer
+    // - high 5 bits: old bits from last byte of dest buffer
     int64_t trailing_bits = num_bytes * 8 - length;
-    uint8_t trail = 0;
-    if (trailing_bits && restore_trailing_bits) {
-      trail = dest[num_bytes - 1];
-    }
+    uint8_t trail_mask = (1U << (8 - trailing_bits)) - 1;

Review comment:
       Can probably use `kTrailingBitmask` or `kPrecedingWrappingBitmask`.

##########
File path: cpp/src/arrow/util/bitmap_ops.cc
##########
@@ -298,52 +297,52 @@ void TransferBitmap(const uint8_t* data, int64_t offset, int64_t length,
     auto nwords = reader.words();
     while (nwords--) {
       auto word = reader.NextWord();
-      writer.PutNextWord(invert_bits ? ~word : word);
+      writer.PutNextWord(mode == TransferMode::Invert ? ~word : word);
     }
     auto nbytes = reader.trailing_bytes();
     while (nbytes--) {
       int valid_bits;
       auto byte = reader.NextTrailingByte(valid_bits);
-      writer.PutNextTrailingByte(invert_bits ? ~byte : byte, valid_bits);
+      writer.PutNextTrailingByte(mode == TransferMode::Invert ? ~byte : byte, valid_bits);
     }
-  } else {
-    // Shift dest by its byte offset
-    dest += dest_byte_offset;
+  } else if (length) {
+    int64_t num_bytes = BitUtil::BytesForBits(length);
+
+    // Shift by its byte offset
+    data += offset / 8;
+    dest += dest_offset / 8;
 
     // Take care of the trailing bits in the last byte
+    // E.g., if trailing_bits = 5, last byte should be
+    // - low  3 bits: new bits from last byte of data buffer
+    // - high 5 bits: old bits from last byte of dest buffer
     int64_t trailing_bits = num_bytes * 8 - length;
-    uint8_t trail = 0;
-    if (trailing_bits && restore_trailing_bits) {
-      trail = dest[num_bytes - 1];
-    }
+    uint8_t kTrailingBitmask = (1U << (8 - trailing_bits)) - 1;

Review comment:
       I meant use the `kTrailingBitmask` constant array that's exposed in `bit_util.h` :-)

##########
File path: cpp/src/arrow/util/bitmap_ops.cc
##########
@@ -298,52 +297,52 @@ void TransferBitmap(const uint8_t* data, int64_t offset, int64_t length,
     auto nwords = reader.words();
     while (nwords--) {
       auto word = reader.NextWord();
-      writer.PutNextWord(invert_bits ? ~word : word);
+      writer.PutNextWord(mode == TransferMode::Invert ? ~word : word);
     }
     auto nbytes = reader.trailing_bytes();
     while (nbytes--) {
       int valid_bits;
       auto byte = reader.NextTrailingByte(valid_bits);
-      writer.PutNextTrailingByte(invert_bits ? ~byte : byte, valid_bits);
+      writer.PutNextTrailingByte(mode == TransferMode::Invert ? ~byte : byte, valid_bits);
     }
-  } else {
-    // Shift dest by its byte offset
-    dest += dest_byte_offset;
+  } else if (length) {
+    int64_t num_bytes = BitUtil::BytesForBits(length);
+
+    // Shift by its byte offset
+    data += offset / 8;
+    dest += dest_offset / 8;
 
     // Take care of the trailing bits in the last byte
+    // E.g., if trailing_bits = 5, last byte should be
+    // - low  3 bits: new bits from last byte of data buffer
+    // - high 5 bits: old bits from last byte of dest buffer
     int64_t trailing_bits = num_bytes * 8 - length;
-    uint8_t trail = 0;
-    if (trailing_bits && restore_trailing_bits) {
-      trail = dest[num_bytes - 1];
-    }
+    uint8_t kTrailingBitmask = (1U << (8 - trailing_bits)) - 1;

Review comment:
       Hmm, let's simply revert the renaming then :-)




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org