You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@arrow.apache.org by "Antoine Pitrou (JIRA)" <ji...@apache.org> on 2019/08/08 13:53:00 UTC
[jira] [Created] (ARROW-6174) [C++] Parquet tests produce invalid
array
Antoine Pitrou created ARROW-6174:
-------------------------------------
Summary: [C++] Parquet tests produce invalid array
Key: ARROW-6174
URL: https://issues.apache.org/jira/browse/ARROW-6174
Project: Apache Arrow
Issue Type: Bug
Components: C++
Reporter: Antoine Pitrou
If I patch {{Table::Validate()}} to also validate the underlying arrays:
{code:c++}
diff --git a/cpp/src/arrow/table.cc b/cpp/src/arrow/table.cc
index 446010f93..e617470b5 100644
--- a/cpp/src/arrow/table.cc
+++ b/cpp/src/arrow/table.cc
@@ -21,6 +21,7 @@
#include <cstdlib>
#include <limits>
#include <memory>
+#include <sstream>
#include <utility>
#include "arrow/array.h"
@@ -184,10 +185,18 @@ Status ChunkedArray::Validate() const {
}
const auto& type = *chunks_[0]->type();
+ // Make sure chunks all have the same type, and validate them
for (size_t i = 1; i < chunks_.size(); ++i) {
- if (!chunks_[i]->type()->Equals(type)) {
+ const Array& chunk = *chunks_[i];
+ if (!chunk.type()->Equals(type)) {
return Status::Invalid("In chunk ", i, " expected type ", type.ToString(),
- " but saw ", chunks_[i]->type()->ToString());
+ " but saw ", chunk.type()->ToString());
+ }
+ Status st = ValidateArray(chunk);
+ if (!st.ok()) {
+ std::stringstream ss;
+ ss << "Chunk " << i << ": " << st.message();
+ return st.WithMessage(ss.str());
}
}
return Status::OK();
@@ -343,7 +352,7 @@ class SimpleTable : public Table {
}
}
- // Make sure columns are all the same length
+ // Make sure columns are all the same length, and validate them
for (int i = 0; i < num_columns(); ++i) {
const ChunkedArray* col = columns_[i].get();
if (col->length() != num_rows_) {
@@ -351,6 +360,12 @@ class SimpleTable : public Table {
" expected length ", num_rows_, " but got length ",
col->length());
}
+ Status st = col->Validate();
+ if (!st.ok()) {
+ std::stringstream ss;
+ ss << "Column " << i << ": " << st.message();
+ return st.WithMessage(ss.str());
+ }
}
return Status::OK();
}
{code}
... then {{parquet-arrow-test}} fails and then crashes:
{code}
[...]
[ RUN ] TestArrowReadWrite.TableWithChunkedColumns
../src/parquet/arrow/arrow-reader-writer-test.cc:347: Failure
Failed
'WriteTable(*table, ::arrow::default_memory_pool(), sink, row_group_size, default_writer_properties(), arrow_properties)' failed with Invalid: Column 0: Chunk 1: Final offset invariant not equal to values length: 210!=733
In ../src/arrow/array.cc, line 1229, code: ValidateListArray(array)
In ../src/parquet/arrow/writer.cc, line 1210, code: table.Validate()
In ../src/parquet/arrow/writer.cc, line 1252, code: writer->WriteTable(table, chunk_size)
../src/parquet/arrow/arrow-reader-writer-test.cc:419: Failure
Expected: WriteTableToBuffer(table, row_group_size, arrow_properties, &buffer) doesn't generate new fatal failures in the current thread.
Actual: it does.
/home/antoine/arrow/dev/cpp/build-support/run-test.sh : ligne 97 : 28927 Erreur de segmentation $TEST_EXECUTABLE "$@" 2>&1
28930 Fini | $ROOT/build-support/asan_symbolize.py
28933 Fini | ${CXXFILT:-c++filt}
28936 Fini | $ROOT/build-support/stacktrace_addr2line.pl $TEST_EXECUTABLE
28939 Fini | $pipe_cmd 2>&1
28941 Fini | tee $LOGFILE
~/arrow/dev/cpp/build-test/src/parquet
{code}
--
This message was sent by Atlassian JIRA
(v7.6.14#76016)