You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by em...@apache.org on 2019/06/11 05:23:50 UTC

[arrow] branch master updated: ARROW-5528: [C++] Fixed a bug when Concatenate() arrays with no value buffers.

This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 7b0b796  ARROW-5528: [C++] Fixed a bug when Concatenate() arrays with no value buffers.
7b0b796 is described below

commit 7b0b79627a71be4d8dfa68e3477725a911c58030
Author: Zhuo Peng <18...@users.noreply.github.com>
AuthorDate: Sat Jun 8 23:24:35 2019 -0700

    ARROW-5528: [C++] Fixed a bug when Concatenate() arrays with no value buffers.
    
    An empty BinaryArray or a BinaryArray contains only nulls has no value
    buffer.
    
    Author: Zhuo Peng <18...@users.noreply.github.com>
    
    Closes #4498 from brills/conc-bugfix and squashes the following commits:
    
    89882f022 <Zhuo Peng> Use range-based for loop where it applies
    5bf18bb57 <Zhuo Peng> fixed lint
    eab98f56c <Zhuo Peng> Fixed lint
    42063bb52 <Zhuo Peng> Fixed a bug when Concatenate() arrays with no value buffers.
---
 cpp/src/arrow/array/concatenate-test.cc | 19 +++++++++++++++++
 cpp/src/arrow/array/concatenate.cc      | 38 +++++++++++++++++++++++++--------
 2 files changed, 48 insertions(+), 9 deletions(-)

diff --git a/cpp/src/arrow/array/concatenate-test.cc b/cpp/src/arrow/array/concatenate-test.cc
index 90f55bf..cf105ce 100644
--- a/cpp/src/arrow/array/concatenate-test.cc
+++ b/cpp/src/arrow/array/concatenate-test.cc
@@ -32,6 +32,7 @@
 #include "arrow/array.h"
 #include "arrow/array/concatenate.h"
 #include "arrow/buffer.h"
+#include "arrow/builder.h"
 #include "arrow/status.h"
 #include "arrow/testing/gtest_common.h"
 #include "arrow/testing/random.h"
@@ -109,6 +110,24 @@ class ConcatenateTest : public ::testing::Test {
   std::vector<double> null_probabilities_;
 };
 
+TEST(ConcatenateEmptyArraysTest, TestValueBuffersNullPtr) {
+  ArrayVector inputs;
+
+  std::shared_ptr<Array> binary_array;
+  BinaryBuilder builder;
+  ASSERT_OK(builder.Finish(&binary_array));
+  inputs.push_back(std::move(binary_array));
+
+  builder.Reset();
+  ASSERT_OK(builder.AppendNull());
+  ASSERT_OK(builder.Finish(&binary_array));
+  inputs.push_back(std::move(binary_array));
+
+  std::shared_ptr<Array> actual;
+  ASSERT_OK(Concatenate(inputs, default_memory_pool(), &actual));
+  AssertArraysEqual(*actual, *inputs[1]);
+}
+
 template <typename PrimitiveType>
 class PrimitiveConcatenateTest : public ConcatenateTest {
  public:
diff --git a/cpp/src/arrow/array/concatenate.cc b/cpp/src/arrow/array/concatenate.cc
index ff6ead3..60da0d3 100644
--- a/cpp/src/arrow/array/concatenate.cc
+++ b/cpp/src/arrow/array/concatenate.cc
@@ -243,21 +243,35 @@ class ConcatenateImpl {
  private:
   // Gather the index-th buffer of each input into a vector.
   // Bytes are sliced with that input's offset and length.
+  // Note that BufferVector will not contain the buffer of in_[i] if it's
+  // nullptr.
   BufferVector Buffers(size_t index) {
-    BufferVector buffers(in_.size());
-    for (size_t i = 0; i < in_.size(); ++i) {
-      buffers[i] = SliceBuffer(in_[i].buffers[index], in_[i].offset, in_[i].length);
+    BufferVector buffers;
+    buffers.reserve(in_.size());
+    for (const ArrayData& array_data : in_) {
+      const auto& buffer = array_data.buffers[index];
+      if (buffer != nullptr) {
+        buffers.push_back(SliceBuffer(buffer, array_data.offset, array_data.length));
+      }
     }
     return buffers;
   }
 
   // Gather the index-th buffer of each input into a vector.
   // Bytes are sliced with the explicitly passed ranges.
+  // Note that BufferVector will not contain the buffer of in_[i] if it's
+  // nullptr.
   BufferVector Buffers(size_t index, const std::vector<Range>& ranges) {
     DCHECK_EQ(in_.size(), ranges.size());
-    BufferVector buffers(in_.size());
+    BufferVector buffers;
+    buffers.reserve(in_.size());
     for (size_t i = 0; i < in_.size(); ++i) {
-      buffers[i] = SliceBuffer(in_[i].buffers[index], ranges[i].offset, ranges[i].length);
+      const auto& buffer = in_[i].buffers[index];
+      if (buffer != nullptr) {
+        buffers.push_back(SliceBuffer(buffer, ranges[i].offset, ranges[i].length));
+      } else {
+        DCHECK_EQ(ranges[i].length, 0);
+      }
     }
     return buffers;
   }
@@ -265,13 +279,19 @@ class ConcatenateImpl {
   // Gather the index-th buffer of each input into a vector.
   // Buffers are assumed to contain elements of fixed.bit_width(),
   // those elements are sliced with that input's offset and length.
+  // Note that BufferVector will not contain the buffer of in_[i] if it's
+  // nullptr.
   BufferVector Buffers(size_t index, const FixedWidthType& fixed) {
     DCHECK_EQ(fixed.bit_width() % 8, 0);
     auto byte_width = fixed.bit_width() / 8;
-    BufferVector buffers(in_.size());
-    for (size_t i = 0; i < in_.size(); ++i) {
-      buffers[i] = SliceBuffer(in_[i].buffers[index], in_[i].offset * byte_width,
-                               in_[i].length * byte_width);
+    BufferVector buffers;
+    buffers.reserve(in_.size());
+    for (const ArrayData& array_data : in_) {
+      const auto& buffer = array_data.buffers[index];
+      if (buffer != nullptr) {
+        buffers.push_back(SliceBuffer(buffer, array_data.offset * byte_width,
+                                      array_data.length * byte_width));
+      }
     }
     return buffers;
   }