You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2020/06/09 16:34:44 UTC

[GitHub] [arrow] wesm commented on a change in pull request #7356: ARROW-9034: [C++] Implement "BinaryBitBlockCounter", add single-word functions to BitBlockCounter

wesm commented on a change in pull request #7356:
URL: https://github.com/apache/arrow/pull/7356#discussion_r436764326



##########
File path: cpp/src/arrow/util/bit_block_counter.h
##########
@@ -19,37 +19,108 @@
 
 #include <cstdint>
 
+#include "arrow/util/bit_util.h"
+#include "arrow/util/bitmap_ops.h"
+#include "arrow/util/ubsan.h"
 #include "arrow/util/visibility.h"
 
 namespace arrow {
 namespace internal {
 
-/// \brief A class that scans through a true/false bitmap to yield blocks of up
-/// to 256 bits at a time along with their popcount. This is used to accelerate
-/// processing of mostly-not-null array data.
+/// \brief Return value from bit block counters: the total number of bits and
+/// the number of set bits.
+struct BitBlockCount {
+  int16_t length;
+  int16_t popcount;
+};
+
+/// \brief A class that scans through a true/false bitmap to compute popcounts
+/// 64 or 256 bits at a time. This is used to accelerate processing of
+/// mostly-not-null array data.
 class ARROW_EXPORT BitBlockCounter {
  public:
-  struct Block {
-    int16_t length;
-    int16_t popcount;
-  };
-
-  static constexpr int16_t kTargetBlockLength = 256;
-
   BitBlockCounter(const uint8_t* bitmap, int64_t start_offset, int64_t length)
       : bitmap_(bitmap + start_offset / 8),
         bits_remaining_(length),
         offset_(start_offset % 8) {}
 
-  /// \brief Return the next run of available bits, up to 256. The returned
-  /// pair contains the size of run and the number of true values
-  Block NextBlock();
+  /// \brief Return the next run of available bits, usually 256. The returned
+  /// pair contains the size of run and the number of true values. When the
+  /// offset is greater than zero, the last block may be longer than 256.
+  BitBlockCount NextFourWords();
+
+  /// \brief Return the next run of available bits, usually 64. The returned
+  /// pair contains the size of run and the number of true values. When the
+  /// offset is greater than zero, the last block may be longer than 64.
+  BitBlockCount NextWord();
+
+  /// \brief Inlineable implementation of NextWord

Review comment:
       It was largely for the sake of benchmarking. I don't think it's worth keeping the inline version, agreed? 

##########
File path: cpp/src/arrow/util/bit_block_counter.h
##########
@@ -19,37 +19,108 @@
 
 #include <cstdint>
 
+#include "arrow/util/bit_util.h"
+#include "arrow/util/bitmap_ops.h"
+#include "arrow/util/ubsan.h"
 #include "arrow/util/visibility.h"
 
 namespace arrow {
 namespace internal {
 
-/// \brief A class that scans through a true/false bitmap to yield blocks of up
-/// to 256 bits at a time along with their popcount. This is used to accelerate
-/// processing of mostly-not-null array data.
+/// \brief Return value from bit block counters: the total number of bits and
+/// the number of set bits.
+struct BitBlockCount {
+  int16_t length;
+  int16_t popcount;
+};
+
+/// \brief A class that scans through a true/false bitmap to compute popcounts
+/// 64 or 256 bits at a time. This is used to accelerate processing of
+/// mostly-not-null array data.
 class ARROW_EXPORT BitBlockCounter {
  public:
-  struct Block {
-    int16_t length;
-    int16_t popcount;
-  };
-
-  static constexpr int16_t kTargetBlockLength = 256;
-
   BitBlockCounter(const uint8_t* bitmap, int64_t start_offset, int64_t length)
       : bitmap_(bitmap + start_offset / 8),
         bits_remaining_(length),
         offset_(start_offset % 8) {}
 
-  /// \brief Return the next run of available bits, up to 256. The returned
-  /// pair contains the size of run and the number of true values
-  Block NextBlock();
+  /// \brief Return the next run of available bits, usually 256. The returned
+  /// pair contains the size of run and the number of true values. When the
+  /// offset is greater than zero, the last block may be longer than 256.

Review comment:
       Well, I can alter the code to yield two blocks at the end rather than a single overfull block like now. You think that is preferable? 




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org