You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2017/02/10 18:15:53 UTC
arrow git commit: ARROW-544: [C++] Test writing zero-length record
batches, zero-length BinaryArray fixes
Repository: arrow
Updated Branches:
refs/heads/master 0ab425245 -> 42b55d98c
ARROW-544: [C++] Test writing zero-length record batches, zero-length BinaryArray fixes
I believe this should fix the failure reported in the Spark integration work. We'll need to upgrade the conda test packages to verify. cc @BryanCutler
Author: Wes McKinney <we...@twosigma.com>
Closes #333 from wesm/ARROW-544 and squashes the following commits:
f80d58f [Wes McKinney] Protect zero-length record batches from incomplete buffer metadata
f876dce [Wes McKinney] Test with null value_offsets too
1dc7733 [Wes McKinney] Test writing zero-length record batches, misc zero-length fixes
Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/42b55d98
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/42b55d98
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/42b55d98
Branch: refs/heads/master
Commit: 42b55d98c151d87e5a7c6442800f3e5b9316499b
Parents: 0ab4252
Author: Wes McKinney <we...@twosigma.com>
Authored: Fri Feb 10 13:15:39 2017 -0500
Committer: Wes McKinney <we...@twosigma.com>
Committed: Fri Feb 10 13:15:39 2017 -0500
----------------------------------------------------------------------
cpp/src/arrow/array-string-test.cc | 4 ++
cpp/src/arrow/array.cc | 11 +++-
cpp/src/arrow/ipc/adapter.cc | 27 ++++++---
cpp/src/arrow/ipc/ipc-adapter-test.cc | 89 +++++++++++++++++++++---------
4 files changed, 94 insertions(+), 37 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/arrow/blob/42b55d98/cpp/src/arrow/array-string-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/array-string-test.cc b/cpp/src/arrow/array-string-test.cc
index c4d9bf4..d8a3585 100644
--- a/cpp/src/arrow/array-string-test.cc
+++ b/cpp/src/arrow/array-string-test.cc
@@ -470,4 +470,8 @@ TEST_F(TestStringArray, TestSliceEquality) {
CheckSliceEquality<BinaryType>();
}
+TEST_F(TestBinaryArray, LengthZeroCtor) {
+ BinaryArray array(0, nullptr, nullptr);
+}
+
} // namespace arrow
http://git-wip-us.apache.org/repos/asf/arrow/blob/42b55d98/cpp/src/arrow/array.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/array.cc b/cpp/src/arrow/array.cc
index bf368d9..81678e3 100644
--- a/cpp/src/arrow/array.cc
+++ b/cpp/src/arrow/array.cc
@@ -270,9 +270,12 @@ BinaryArray::BinaryArray(const std::shared_ptr<DataType>& type, int32_t length,
const std::shared_ptr<Buffer>& null_bitmap, int32_t null_count, int32_t offset)
: Array(type, length, null_bitmap, null_count, offset),
value_offsets_(value_offsets),
- raw_value_offsets_(reinterpret_cast<const int32_t*>(value_offsets_->data())),
+ raw_value_offsets_(nullptr),
data_(data),
raw_data_(nullptr) {
+ if (value_offsets_ != nullptr) {
+ raw_value_offsets_ = reinterpret_cast<const int32_t*>(value_offsets_->data());
+ }
if (data_ != nullptr) { raw_data_ = data_->data(); }
}
@@ -384,8 +387,10 @@ UnionArray::UnionArray(const std::shared_ptr<DataType>& type, int32_t length,
: Array(type, length, null_bitmap, null_count, offset),
children_(children),
type_ids_(type_ids),
- value_offsets_(value_offsets) {
- raw_type_ids_ = reinterpret_cast<const uint8_t*>(type_ids->data());
+ raw_type_ids_(nullptr),
+ value_offsets_(value_offsets),
+ raw_value_offsets_(nullptr) {
+ if (type_ids) { raw_type_ids_ = reinterpret_cast<const uint8_t*>(type_ids->data()); }
if (value_offsets) {
raw_value_offsets_ = reinterpret_cast<const int32_t*>(value_offsets->data());
}
http://git-wip-us.apache.org/repos/asf/arrow/blob/42b55d98/cpp/src/arrow/ipc/adapter.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/ipc/adapter.cc b/cpp/src/arrow/ipc/adapter.cc
index 3613ccb..f36ff37 100644
--- a/cpp/src/arrow/ipc/adapter.cc
+++ b/cpp/src/arrow/ipc/adapter.cc
@@ -602,8 +602,13 @@ class ArrayLoader : public TypeVisitor {
std::shared_ptr<Buffer> offsets;
std::shared_ptr<Buffer> values;
- RETURN_NOT_OK(GetBuffer(context_->buffer_index++, &offsets));
- RETURN_NOT_OK(GetBuffer(context_->buffer_index++, &values));
+ if (field_meta.length > 0) {
+ RETURN_NOT_OK(GetBuffer(context_->buffer_index++, &offsets));
+ RETURN_NOT_OK(GetBuffer(context_->buffer_index++, &values));
+ } else {
+ context_->buffer_index += 2;
+ offsets = values = nullptr;
+ }
result_ = std::make_shared<CONTAINER>(
field_meta.length, offsets, values, null_bitmap, field_meta.null_count);
@@ -661,7 +666,12 @@ class ArrayLoader : public TypeVisitor {
RETURN_NOT_OK(LoadCommon(&field_meta, &null_bitmap));
std::shared_ptr<Buffer> offsets;
- RETURN_NOT_OK(GetBuffer(context_->buffer_index++, &offsets));
+ if (field_meta.length > 0) {
+ RETURN_NOT_OK(GetBuffer(context_->buffer_index, &offsets));
+ } else {
+ offsets = nullptr;
+ }
+ ++context_->buffer_index;
const int num_children = type.num_children();
if (num_children != 1) {
@@ -708,13 +718,16 @@ class ArrayLoader : public TypeVisitor {
std::shared_ptr<Buffer> null_bitmap;
RETURN_NOT_OK(LoadCommon(&field_meta, &null_bitmap));
- std::shared_ptr<Buffer> type_ids;
+ std::shared_ptr<Buffer> type_ids = nullptr;
std::shared_ptr<Buffer> offsets = nullptr;
- RETURN_NOT_OK(GetBuffer(context_->buffer_index++, &type_ids));
- if (type.mode == UnionMode::DENSE) {
- RETURN_NOT_OK(GetBuffer(context_->buffer_index++, &offsets));
+ if (field_meta.length > 0) {
+ RETURN_NOT_OK(GetBuffer(context_->buffer_index, &type_ids));
+ if (type.mode == UnionMode::DENSE) {
+ RETURN_NOT_OK(GetBuffer(context_->buffer_index + 1, &offsets));
+ }
}
+ context_->buffer_index += type.mode == UnionMode::DENSE? 2 : 1;
std::vector<std::shared_ptr<Array>> fields;
RETURN_NOT_OK(LoadChildren(type.children(), &fields));
http://git-wip-us.apache.org/repos/asf/arrow/blob/42b55d98/cpp/src/arrow/ipc/ipc-adapter-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/ipc/ipc-adapter-test.cc b/cpp/src/arrow/ipc/ipc-adapter-test.cc
index bae6578..d11b95b 100644
--- a/cpp/src/arrow/ipc/ipc-adapter-test.cc
+++ b/cpp/src/arrow/ipc/ipc-adapter-test.cc
@@ -71,6 +71,42 @@ class TestWriteRecordBatch : public ::testing::TestWithParam<MakeRecordBatch*>,
return ReadRecordBatch(metadata, batch.schema(), &buffer_reader, batch_result);
}
+ void CheckRoundtrip(const RecordBatch& batch, int64_t buffer_size) {
+ std::shared_ptr<RecordBatch> batch_result;
+
+ ASSERT_OK(RoundTripHelper(batch, 1 << 16, &batch_result));
+ EXPECT_EQ(batch.num_rows(), batch_result->num_rows());
+
+ ASSERT_TRUE(batch.schema()->Equals(batch_result->schema()));
+ ASSERT_EQ(batch.num_columns(), batch_result->num_columns())
+ << batch.schema()->ToString()
+ << " result: " << batch_result->schema()->ToString();
+
+ for (int i = 0; i < batch.num_columns(); ++i) {
+ const auto& left = *batch.column(i);
+ const auto& right = *batch_result->column(i);
+ if (!left.Equals(right)) {
+ std::stringstream pp_result;
+ std::stringstream pp_expected;
+
+ ASSERT_OK(PrettyPrint(left, 0, &pp_expected));
+ ASSERT_OK(PrettyPrint(right, 0, &pp_result));
+
+ FAIL() << "Index: " << i << " Expected: " << pp_expected.str()
+ << "\nGot: " << pp_result.str();
+ }
+ }
+ }
+
+ void CheckRoundtrip(const std::shared_ptr<Array>& array, int64_t buffer_size) {
+ auto f0 = arrow::field("f0", array->type());
+ std::vector<std::shared_ptr<Field>> fields = {f0};
+ auto schema = std::make_shared<Schema>(fields);
+
+ RecordBatch batch(schema, 0, {array});
+ CheckRoundtrip(batch, buffer_size);
+ }
+
protected:
std::shared_ptr<io::MemoryMappedFile> mmap_;
MemoryPool* pool_;
@@ -79,48 +115,47 @@ class TestWriteRecordBatch : public ::testing::TestWithParam<MakeRecordBatch*>,
TEST_P(TestWriteRecordBatch, RoundTrip) {
std::shared_ptr<RecordBatch> batch;
ASSERT_OK((*GetParam())(&batch)); // NOLINT clang-tidy gtest issue
- std::shared_ptr<RecordBatch> batch_result;
- ASSERT_OK(RoundTripHelper(*batch, 1 << 16, &batch_result));
-
- // do checks
- ASSERT_TRUE(batch->schema()->Equals(batch_result->schema()));
- ASSERT_EQ(batch->num_columns(), batch_result->num_columns())
- << batch->schema()->ToString() << " result: " << batch_result->schema()->ToString();
- EXPECT_EQ(batch->num_rows(), batch_result->num_rows());
- for (int i = 0; i < batch->num_columns(); ++i) {
- EXPECT_TRUE(batch->column(i)->Equals(batch_result->column(i)))
- << "Idx: " << i << " Name: " << batch->column_name(i);
- }
+
+ CheckRoundtrip(*batch, 1 << 20);
}
TEST_P(TestWriteRecordBatch, SliceRoundTrip) {
std::shared_ptr<RecordBatch> batch;
ASSERT_OK((*GetParam())(&batch)); // NOLINT clang-tidy gtest issue
- std::shared_ptr<RecordBatch> batch_result;
// Skip the zero-length case
if (batch->num_rows() < 2) { return; }
auto sliced_batch = batch->Slice(2, 10);
+ CheckRoundtrip(*sliced_batch, 1 << 20);
+}
+
+TEST_P(TestWriteRecordBatch, ZeroLengthArrays) {
+ std::shared_ptr<RecordBatch> batch;
+ ASSERT_OK((*GetParam())(&batch)); // NOLINT clang-tidy gtest issue
- ASSERT_OK(RoundTripHelper(*sliced_batch, 1 << 16, &batch_result));
+ std::shared_ptr<RecordBatch> zero_length_batch;
+ if (batch->num_rows() > 2) {
+ zero_length_batch = batch->Slice(2, 0);
+ } else {
+ zero_length_batch = batch->Slice(0, 0);
+ }
- EXPECT_EQ(sliced_batch->num_rows(), batch_result->num_rows());
+ CheckRoundtrip(*zero_length_batch, 1 << 20);
- for (int i = 0; i < sliced_batch->num_columns(); ++i) {
- const auto& left = *sliced_batch->column(i);
- const auto& right = *batch_result->column(i);
- if (!left.Equals(right)) {
- std::stringstream pp_result;
- std::stringstream pp_expected;
+ // ARROW-544: check binary array
+ std::shared_ptr<MutableBuffer> value_offsets;
+ ASSERT_OK(AllocateBuffer(pool_, sizeof(int32_t), &value_offsets));
+ *reinterpret_cast<int32_t*>(value_offsets->mutable_data()) = 0;
- ASSERT_OK(PrettyPrint(left, 0, &pp_expected));
- ASSERT_OK(PrettyPrint(right, 0, &pp_result));
+ std::shared_ptr<Array> bin_array = std::make_shared<BinaryArray>(0, value_offsets,
+ std::make_shared<Buffer>(nullptr, 0), std::make_shared<Buffer>(nullptr, 0));
- FAIL() << "Index: " << i << " Expected: " << pp_expected.str()
- << "\nGot: " << pp_result.str();
- }
- }
+ // null value_offsets
+ std::shared_ptr<Array> bin_array2 = std::make_shared<BinaryArray>(0, nullptr, nullptr);
+
+ CheckRoundtrip(bin_array, 1 << 20);
+ CheckRoundtrip(bin_array2, 1 << 20);
}
INSTANTIATE_TEST_CASE_P(