You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@arrow.apache.org by "Antoine Pitrou (JIRA)" <ji...@apache.org> on 2019/08/08 13:53:00 UTC

[jira] [Created] (ARROW-6174) [C++] Parquet tests produce invalid array

Antoine Pitrou created ARROW-6174:
-------------------------------------

             Summary: [C++] Parquet tests produce invalid array
                 Key: ARROW-6174
                 URL: https://issues.apache.org/jira/browse/ARROW-6174
             Project: Apache Arrow
          Issue Type: Bug
          Components: C++
            Reporter: Antoine Pitrou


If I patch {{Table::Validate()}} to also validate the underlying arrays:
{code:c++}
diff --git a/cpp/src/arrow/table.cc b/cpp/src/arrow/table.cc
index 446010f93..e617470b5 100644
--- a/cpp/src/arrow/table.cc
+++ b/cpp/src/arrow/table.cc
@@ -21,6 +21,7 @@
 #include <cstdlib>
 #include <limits>
 #include <memory>
+#include <sstream>
 #include <utility>
 
 #include "arrow/array.h"
@@ -184,10 +185,18 @@ Status ChunkedArray::Validate() const {
   }
 
   const auto& type = *chunks_[0]->type();
+  // Make sure chunks all have the same type, and validate them
   for (size_t i = 1; i < chunks_.size(); ++i) {
-    if (!chunks_[i]->type()->Equals(type)) {
+    const Array& chunk = *chunks_[i];
+    if (!chunk.type()->Equals(type)) {
       return Status::Invalid("In chunk ", i, " expected type ", type.ToString(),
-                             " but saw ", chunks_[i]->type()->ToString());
+                             " but saw ", chunk.type()->ToString());
+    }
+    Status st = ValidateArray(chunk);
+    if (!st.ok()) {
+      std::stringstream ss;
+      ss << "Chunk " << i << ": " << st.message();
+      return st.WithMessage(ss.str());
     }
   }
   return Status::OK();
@@ -343,7 +352,7 @@ class SimpleTable : public Table {
       }
     }
 
-    // Make sure columns are all the same length
+    // Make sure columns are all the same length, and validate them
     for (int i = 0; i < num_columns(); ++i) {
       const ChunkedArray* col = columns_[i].get();
       if (col->length() != num_rows_) {
@@ -351,6 +360,12 @@ class SimpleTable : public Table {
                                " expected length ", num_rows_, " but got length ",
                                col->length());
       }
+      Status st = col->Validate();
+      if (!st.ok()) {
+        std::stringstream ss;
+        ss << "Column " << i << ": " << st.message();
+        return st.WithMessage(ss.str());
+      }
     }
     return Status::OK();
   }
{code}

... then {{parquet-arrow-test}} fails and then crashes:
{code}
[...]
[ RUN      ] TestArrowReadWrite.TableWithChunkedColumns
../src/parquet/arrow/arrow-reader-writer-test.cc:347: Failure
Failed
'WriteTable(*table, ::arrow::default_memory_pool(), sink, row_group_size, default_writer_properties(), arrow_properties)' failed with Invalid: Column 0: Chunk 1: Final offset invariant not equal to values length: 210!=733
In ../src/arrow/array.cc, line 1229, code: ValidateListArray(array)
In ../src/parquet/arrow/writer.cc, line 1210, code: table.Validate()
In ../src/parquet/arrow/writer.cc, line 1252, code: writer->WriteTable(table, chunk_size)
../src/parquet/arrow/arrow-reader-writer-test.cc:419: Failure
Expected: WriteTableToBuffer(table, row_group_size, arrow_properties, &buffer) doesn't generate new fatal failures in the current thread.
  Actual: it does.
/home/antoine/arrow/dev/cpp/build-support/run-test.sh : ligne 97 : 28927 Erreur de segmentation  $TEST_EXECUTABLE "$@" 2>&1
     28930 Fini                    | $ROOT/build-support/asan_symbolize.py
     28933 Fini                    | ${CXXFILT:-c++filt}
     28936 Fini                    | $ROOT/build-support/stacktrace_addr2line.pl $TEST_EXECUTABLE
     28939 Fini                    | $pipe_cmd 2>&1
     28941 Fini                    | tee $LOGFILE
~/arrow/dev/cpp/build-test/src/parquet

{code}



--
This message was sent by Atlassian JIRA
(v7.6.14#76016)