You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2019/03/05 02:42:17 UTC

[arrow] branch master updated: ARROW-4707: [C++] moving BitsetStack to BitUtil::

This is an automated email from the ASF dual-hosted git repository.

wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 4e8e072  ARROW-4707: [C++] moving BitsetStack to BitUtil::
4e8e072 is described below

commit 4e8e072eaa9d46d2a69031d22e81914da504ba0d
Author: Benjamin Kietzman <be...@gmail.com>
AuthorDate: Mon Mar 4 20:42:07 2019 -0600

    ARROW-4707: [C++] moving BitsetStack to BitUtil::
    
    Author: Benjamin Kietzman <be...@gmail.com>
    
    Closes #3780 from bkietz/ARROW-4707-expose-BitsetStack and squashes the following commits:
    
    95a1375b6 <Benjamin Kietzman> moving BitsetStack to BitUtil::
---
 cpp/src/arrow/json/parser.cc        | 29 +--------------------------
 cpp/src/arrow/util/bit-util-test.cc | 22 ++++++++++++++++++++
 cpp/src/arrow/util/bit-util.h       | 40 +++++++++++++++++++++++++++++++++++++
 3 files changed, 63 insertions(+), 28 deletions(-)

diff --git a/cpp/src/arrow/json/parser.cc b/cpp/src/arrow/json/parser.cc
index 6182495..99b8911 100644
--- a/cpp/src/arrow/json/parser.cc
+++ b/cpp/src/arrow/json/parser.cc
@@ -44,6 +44,7 @@
 namespace arrow {
 namespace json {
 
+using internal::BitsetStack;
 using internal::checked_cast;
 using util::string_view;
 
@@ -106,34 +107,6 @@ class UnsafeStringBuilder {
   std::shared_ptr<Buffer> values_buffer_;
 };
 
-/// Store a stack of bitsets efficiently. The top bitset may be accessed and its bits may
-/// be modified, but it may not be resized.
-class BitsetStack {
- public:
-  using reference = typename std::vector<bool>::reference;
-
-  void Push(int size, bool value) {
-    offsets_.push_back(bit_count());
-    bits_.resize(bit_count() + size, value);
-  }
-
-  int TopSize() const { return bit_count() - offsets_.back(); }
-
-  void Pop() {
-    bits_.resize(offsets_.back());
-    offsets_.pop_back();
-  }
-
-  reference operator[](int i) { return bits_[offsets_.back() + i]; }
-
-  bool operator[](int i) const { return bits_[offsets_.back() + i]; }
-
- private:
-  int bit_count() const { return static_cast<int>(bits_.size()); }
-  std::vector<bool> bits_;
-  std::vector<int> offsets_;
-};
-
 /// \brief ArrayBuilder for parsed but unconverted arrays
 template <Kind::type>
 class RawArrayBuilder;
diff --git a/cpp/src/arrow/util/bit-util-test.cc b/cpp/src/arrow/util/bit-util-test.cc
index 774d3bf..20b1a92 100644
--- a/cpp/src/arrow/util/bit-util-test.cc
+++ b/cpp/src/arrow/util/bit-util-test.cc
@@ -40,6 +40,7 @@ namespace arrow {
 using internal::BitmapAnd;
 using internal::BitmapOr;
 using internal::BitmapXor;
+using internal::BitsetStack;
 using internal::CopyBitmap;
 using internal::CountSetBits;
 using internal::InvertBitmap;
@@ -941,4 +942,25 @@ TEST(BitUtil, RoundTripBigEndianTest) {
   ASSERT_EQ(value, from_big_endian);
 }
 
+TEST(BitUtil, BitsetStack) {
+  BitsetStack stack;
+  ASSERT_EQ(stack.TopSize(), 0);
+  stack.Push(3, false);
+  ASSERT_EQ(stack.TopSize(), 3);
+  stack[1] = true;
+  stack.Push(5, true);
+  ASSERT_EQ(stack.TopSize(), 5);
+  stack[1] = false;
+  for (int i = 0; i != 5; ++i) {
+    ASSERT_EQ(stack[i], i != 1);
+  }
+  stack.Pop();
+  ASSERT_EQ(stack.TopSize(), 3);
+  for (int i = 0; i != 3; ++i) {
+    ASSERT_EQ(stack[i], i == 1);
+  }
+  stack.Pop();
+  ASSERT_EQ(stack.TopSize(), 0);
+}
+
 }  // namespace arrow
diff --git a/cpp/src/arrow/util/bit-util.h b/cpp/src/arrow/util/bit-util.h
index 6724c29..22bf8fc 100644
--- a/cpp/src/arrow/util/bit-util.h
+++ b/cpp/src/arrow/util/bit-util.h
@@ -808,6 +808,46 @@ ARROW_EXPORT
 void BitmapXor(const uint8_t* left, int64_t left_offset, const uint8_t* right,
                int64_t right_offset, int64_t length, int64_t out_offset, uint8_t* out);
 
+/// \brief Store a stack of bitsets efficiently. The top bitset may be
+/// accessed and its bits may be modified, but it may not be resized.
+class BitsetStack {
+ public:
+  using reference = typename std::vector<bool>::reference;
+
+  /// \brief push a bitset onto the stack
+  /// \param size number of bits in the next bitset
+  /// \param value initial value for bits in the pushed bitset
+  void Push(int size, bool value) {
+    offsets_.push_back(bit_count());
+    bits_.resize(bit_count() + size, value);
+  }
+
+  /// \brief number of bits in the bitset at the top of the stack
+  int TopSize() const {
+    if (offsets_.size() == 0) return 0;
+    return bit_count() - offsets_.back();
+  }
+
+  /// \brief pop a bitset off the stack
+  void Pop() {
+    bits_.resize(offsets_.back());
+    offsets_.pop_back();
+  }
+
+  /// \brief get the value of a bit in the top bitset
+  /// \param i index of the bit to access
+  bool operator[](int i) const { return bits_[offsets_.back() + i]; }
+
+  /// \brief get a mutable reference to a bit in the top bitset
+  /// \param i index of the bit to access
+  reference operator[](int i) { return bits_[offsets_.back() + i]; }
+
+ private:
+  int bit_count() const { return static_cast<int>(bits_.size()); }
+  std::vector<bool> bits_;
+  std::vector<int> offsets_;
+};
+
 }  // namespace internal
 }  // namespace arrow