You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2018/11/18 16:31:45 UTC

[arrow] branch master updated: ARROW-3793: [C++] TestScalarAppendUnsafe is not testing unsafe appends

This is an automated email from the ASF dual-hosted git repository.

wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 767ff8c  ARROW-3793: [C++] TestScalarAppendUnsafe is not testing unsafe appends
767ff8c is described below

commit 767ff8c8d5af305e044a09f2e847700fd545f273
Author: Benjamin Kietzman <be...@gmail.com>
AuthorDate: Sun Nov 18 11:31:37 2018 -0500

    ARROW-3793: [C++] TestScalarAppendUnsafe is not testing unsafe appends
    
    Perform enough repetitions that the Buffers will be more than the minimum allocation size (64 bytes) and reserve enough space for all repetitions. Also use UnsafeAppendNull rather than AppendNull to ensure that nothing is calling Reserve in the test's loop
    
    Author: Benjamin Kietzman <be...@gmail.com>
    
    Closes #2977 from bkietz/master and squashes the following commits:
    
    123b7b697 <Benjamin Kietzman> ArrayBuilder:: methods refactoring
    e48f359e1 <Benjamin Kietzman> adding doccomment about downcasting
    c041af631 <Benjamin Kietzman> adding explicit cast to int32_t
    52b50cdb1 <Benjamin Kietzman> make ArrayBuilder methods which modify bitmap protected
    e6f13e675 <Benjamin Kietzman> moving break statements into PP block
    37a47ff5b <Benjamin Kietzman> add explicit cast to int
    7d2680187 <Benjamin Kietzman> fix format issue
    16c9dc709 <Benjamin Kietzman> UnsafeAppendNull() doesn't return Status
    27562a2aa <Benjamin Kietzman> ARROW-3793:  TestScalarAppendUnsafe is not testing unsafe appends
---
 cpp/src/arrow/array-test.cc           | 13 +++++---
 cpp/src/arrow/builder.h               | 57 +++++++++++++++++++++--------------
 cpp/src/arrow/compute/kernels/hash.cc |  2 +-
 cpp/src/arrow/util/compression.cc     | 12 ++++----
 4 files changed, 49 insertions(+), 35 deletions(-)

diff --git a/cpp/src/arrow/array-test.cc b/cpp/src/arrow/array-test.cc
index e647ff8..8e8bfcf 100644
--- a/cpp/src/arrow/array-test.cc
+++ b/cpp/src/arrow/array-test.cc
@@ -1497,25 +1497,28 @@ TEST_F(TestBinaryBuilder, TestScalarAppendUnsafe) {
   vector<uint8_t> is_null = {0, 0, 0, 1, 0};
 
   int N = static_cast<int>(strings.size());
-  int reps = 10;
+  int reps = 13;
+  int total_length = 0;
+  for (auto&& s : strings) total_length += static_cast<int>(s.size());
 
-  ASSERT_OK(builder_->Reserve(5));
-  ASSERT_OK(builder_->ReserveData(6));
+  ASSERT_OK(builder_->Reserve(N * reps));
+  ASSERT_OK(builder_->ReserveData(total_length * reps));
 
   for (int j = 0; j < reps; ++j) {
     for (int i = 0; i < N; ++i) {
       if (is_null[i]) {
-        ASSERT_OK(builder_->AppendNull());
+        builder_->UnsafeAppendNull();
       } else {
         builder_->UnsafeAppend(strings[i]);
       }
     }
   }
+  ASSERT_EQ(builder_->value_data_length(), total_length * reps);
   Done();
   ASSERT_OK(ValidateArray(*result_));
   ASSERT_EQ(reps * N, result_->length());
   ASSERT_EQ(reps, result_->null_count());
-  ASSERT_EQ(reps * 6, result_->value_data()->size());
+  ASSERT_EQ(reps * total_length, result_->value_data()->size());
 
   int32_t length;
   for (int i = 0; i < N * reps; ++i) {
diff --git a/cpp/src/arrow/builder.h b/cpp/src/arrow/builder.h
index 40edd74..6ddc0e9 100644
--- a/cpp/src/arrow/builder.h
+++ b/cpp/src/arrow/builder.h
@@ -53,10 +53,13 @@ constexpr int64_t kListMaximumElements = std::numeric_limits<int32_t>::max() - 1
 constexpr int64_t kMinBuilderCapacity = 1 << 5;
 
 /// Base class for all data array builders.
-//
+///
 /// This class provides a facilities for incrementally building the null bitmap
 /// (see Append methods) and as a side effect the current number of slots and
 /// the null count.
+///
+/// \note Users are expected to use builders as one of the concrete types below.
+/// For example, ArrayBuilder* pointing to BinaryBuilder should be downcast before use.
 class ARROW_EXPORT ArrayBuilder {
  public:
   explicit ArrayBuilder(const std::shared_ptr<DataType>& type, MemoryPool* pool)
@@ -80,16 +83,6 @@ class ARROW_EXPORT ArrayBuilder {
   int64_t null_count() const { return null_count_; }
   int64_t capacity() const { return capacity_; }
 
-  /// Append to null bitmap
-  Status AppendToBitmap(bool is_valid);
-
-  /// Vector append. Treat each zero byte as a null.   If valid_bytes is null
-  /// assume all of length bits are valid.
-  Status AppendToBitmap(const uint8_t* valid_bytes, int64_t length);
-
-  /// Set the next length bits to not null (i.e. valid).
-  Status SetNotNull(int64_t length);
-
   /// \brief Ensure that enough memory has been allocated to fit the indicated
   /// number of total elements in the builder, including any that have already
   /// been appended. Does not account for reallocations that may be due to
@@ -131,8 +124,23 @@ class ARROW_EXPORT ArrayBuilder {
 
   std::shared_ptr<DataType> type() const { return type_; }
 
+ protected:
+  ArrayBuilder() {}
+
+  /// Append to null bitmap
+  Status AppendToBitmap(bool is_valid);
+
+  /// Vector append. Treat each zero byte as a null.   If valid_bytes is null
+  /// assume all of length bits are valid.
+  Status AppendToBitmap(const uint8_t* valid_bytes, int64_t length);
+
+  /// Set the next length bits to not null (i.e. valid).
+  Status SetNotNull(int64_t length);
+
   // Unsafe operations (don't check capacity/don't resize)
 
+  void UnsafeAppendNull() { UnsafeAppendToBitmap(false); }
+
   // Append to null bitmap, update the length
   void UnsafeAppendToBitmap(bool is_valid) {
     if (is_valid) {
@@ -175,10 +183,14 @@ class ARROW_EXPORT ArrayBuilder {
     length_ += std::distance(begin, end);
   }
 
-  void UnsafeAppendNull() { UnsafeAppendToBitmap(false); }
+  // Vector append. Treat each zero byte as a nullzero. If valid_bytes is null
+  // assume all of length bits are valid.
+  void UnsafeAppendToBitmap(const uint8_t* valid_bytes, int64_t length);
 
- protected:
-  ArrayBuilder() {}
+  void UnsafeAppendToBitmap(const std::vector<bool>& is_valid);
+
+  // Set the next length bits to not null (i.e. valid).
+  void UnsafeSetNotNull(int64_t length);
 
   std::shared_ptr<DataType> type_;
   MemoryPool* pool_;
@@ -195,15 +207,6 @@ class ARROW_EXPORT ArrayBuilder {
   // Child value array builders. These are owned by this class
   std::vector<std::unique_ptr<ArrayBuilder>> children_;
 
-  // Vector append. Treat each zero byte as a nullzero. If valid_bytes is null
-  // assume all of length bits are valid.
-  void UnsafeAppendToBitmap(const uint8_t* valid_bytes, int64_t length);
-
-  void UnsafeAppendToBitmap(const std::vector<bool>& is_valid);
-
-  // Set the next length bits to not null (i.e. valid).
-  void UnsafeSetNotNull(int64_t length);
-
  private:
   ARROW_DISALLOW_COPY_AND_ASSIGN(ArrayBuilder);
 };
@@ -371,6 +374,7 @@ class ARROW_EXPORT NumericBuilder : public PrimitiveBuilder<T> {
           ARROW_MEMORY_POOL_DEFAULT)
       : PrimitiveBuilder<T1>(TypeTraits<T1>::type_singleton(), pool) {}
 
+  using ArrayBuilder::UnsafeAppendNull;
   using PrimitiveBuilder<T>::AppendValues;
   using PrimitiveBuilder<T>::Resize;
   using PrimitiveBuilder<T>::Reserve;
@@ -627,6 +631,7 @@ class ARROW_EXPORT BooleanBuilder : public ArrayBuilder {
   explicit BooleanBuilder(const std::shared_ptr<DataType>& type, MemoryPool* pool);
 
   using ArrayBuilder::Advance;
+  using ArrayBuilder::UnsafeAppendNull;
 
   /// Write nulls as uint8_t* (0 value indicates null) into pre-allocated memory
   Status AppendNulls(const uint8_t* valid_bytes, int64_t length) {
@@ -878,6 +883,12 @@ class ARROW_EXPORT BinaryBuilder : public ArrayBuilder {
     UnsafeAppend(value.c_str(), static_cast<int32_t>(value.size()));
   }
 
+  void UnsafeAppendNull() {
+    const int64_t num_bytes = value_data_builder_.length();
+    offsets_builder_.UnsafeAppend(static_cast<int32_t>(num_bytes));
+    UnsafeAppendToBitmap(false);
+  }
+
   void Reset() override;
   Status Resize(int64_t capacity) override;
 
diff --git a/cpp/src/arrow/compute/kernels/hash.cc b/cpp/src/arrow/compute/kernels/hash.cc
index fc95a37..c004429 100644
--- a/cpp/src/arrow/compute/kernels/hash.cc
+++ b/cpp/src/arrow/compute/kernels/hash.cc
@@ -749,7 +749,7 @@ class DictEncodeImpl : public HashTableKernel<Type, DictEncodeImpl<Type>> {
 
   Status Reserve(const int64_t length) { return indices_builder_.Reserve(length); }
 
-  void ObserveNull() { indices_builder_.UnsafeAppendToBitmap(false); }
+  void ObserveNull() { indices_builder_.UnsafeAppendNull(); }
 
   void ObserveFound(const hash_slot_t slot) { indices_builder_.UnsafeAppend(slot); }
 
diff --git a/cpp/src/arrow/util/compression.cc b/cpp/src/arrow/util/compression.cc
index 7b13ade..4924c14 100644
--- a/cpp/src/arrow/util/compression.cc
+++ b/cpp/src/arrow/util/compression.cc
@@ -61,47 +61,47 @@ Status Codec::Create(Compression::type codec_type, std::unique_ptr<Codec>* resul
     case Compression::SNAPPY:
 #ifdef ARROW_WITH_SNAPPY
       result->reset(new SnappyCodec());
+      break;
 #else
       return Status::NotImplemented("Snappy codec support not built");
 #endif
-      break;
     case Compression::GZIP:
 #ifdef ARROW_WITH_ZLIB
       result->reset(new GZipCodec());
+      break;
 #else
       return Status::NotImplemented("Gzip codec support not built");
 #endif
-      break;
     case Compression::LZO:
       return Status::NotImplemented("LZO codec not implemented");
     case Compression::BROTLI:
 #ifdef ARROW_WITH_BROTLI
       result->reset(new BrotliCodec());
+      break;
 #else
       return Status::NotImplemented("Brotli codec support not built");
 #endif
-      break;
     case Compression::LZ4:
 #ifdef ARROW_WITH_LZ4
       result->reset(new Lz4Codec());
+      break;
 #else
       return Status::NotImplemented("LZ4 codec support not built");
 #endif
-      break;
     case Compression::ZSTD:
 #ifdef ARROW_WITH_ZSTD
       result->reset(new ZSTDCodec());
+      break;
 #else
       return Status::NotImplemented("ZSTD codec support not built");
 #endif
-      break;
     case Compression::BZ2:
 #ifdef ARROW_WITH_BZ2
       result->reset(new BZ2Codec());
+      break;
 #else
       return Status::NotImplemented("BZ2 codec support not built");
 #endif
-      break;
     default:
       return Status::Invalid("Unrecognized codec");
   }