You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2018/09/15 19:01:57 UTC

[arrow] branch master updated: ARROW-3236: [C++] Fix stream accounting bug causing garbled schema message when writing IPC file format

This is an automated email from the ASF dual-hosted git repository.

wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 362ba74  ARROW-3236: [C++] Fix stream accounting bug causing garbled schema message when writing IPC file format
362ba74 is described below

commit 362ba7442d4574cd42494fc6dc728ff80f0f727f
Author: Wes McKinney <we...@apache.org>
AuthorDate: Sat Sep 15 15:01:48 2018 -0400

    ARROW-3236: [C++] Fix stream accounting bug causing garbled schema message when writing IPC file format
    
    We were never reading this message (because the schema is contained in the footer), and the fact that it was malformed was not impacting the other messages in the file (obviously, since we have many unit tests verifying this).
    
    Fixes #2559
    
    Author: Wes McKinney <we...@apache.org>
    
    Closes #2560 from wesm/ARROW-3236 and squashes the following commits:
    
    bf0856f1e <Wes McKinney> Fix stream accounting bug causing garbled schema message when writing IPC file format. Clean code
---
 cpp/src/arrow/ipc/writer.cc | 26 +++++++++++++++++++-------
 1 file changed, 19 insertions(+), 7 deletions(-)

diff --git a/cpp/src/arrow/ipc/writer.cc b/cpp/src/arrow/ipc/writer.cc
index 80c824c..ec6c5d0 100644
--- a/cpp/src/arrow/ipc/writer.cc
+++ b/cpp/src/arrow/ipc/writer.cc
@@ -735,6 +735,12 @@ class StreamBookKeeper {
 
   Status UpdatePosition() { return sink_->Tell(&position_); }
 
+  Status UpdatePositionCheckAligned() {
+    RETURN_NOT_OK(UpdatePosition());
+    DCHECK_EQ(0, position_ % 8) << "Stream is not aligned";
+    return Status::OK();
+  }
+
   Status Align(int64_t alignment = kArrowIpcAlignment) {
     // Adds padding bytes if necessary to ensure all memory blocks are written on
     // 8-byte (or other alignment) boundaries.
@@ -764,13 +770,17 @@ class SchemaWriter : public StreamBookKeeper {
       : StreamBookKeeper(sink), schema_(schema), dictionary_memo_(dictionary_memo) {}
 
   Status WriteSchema() {
+#ifndef NDEBUG
+    // Catch bug fixed in ARROW-3236
+    RETURN_NOT_OK(UpdatePositionCheckAligned());
+#endif
+
     std::shared_ptr<Buffer> schema_fb;
     RETURN_NOT_OK(internal::WriteSchemaMessage(schema_, dictionary_memo_, &schema_fb));
 
     int32_t metadata_length = 0;
     RETURN_NOT_OK(internal::WriteMessage(*schema_fb, sink_, &metadata_length));
-    RETURN_NOT_OK(UpdatePosition());
-    DCHECK_EQ(0, position_ % 8) << "WriteSchema did not perform an aligned write";
+    RETURN_NOT_OK(UpdatePositionCheckAligned());
     return Status::OK();
   }
 
@@ -790,8 +800,7 @@ class SchemaWriter : public StreamBookKeeper {
       const int64_t buffer_start_offset = 0;
       RETURN_NOT_OK(WriteDictionary(entry.first, entry.second, buffer_start_offset, sink_,
                                     &block->metadata_length, &block->body_length, pool_));
-      RETURN_NOT_OK(UpdatePosition());
-      DCHECK(position_ % 8 == 0) << "WriteDictionary did not perform aligned writes";
+      RETURN_NOT_OK(UpdatePositionCheckAligned());
     }
 
     return Status::OK();
@@ -856,9 +865,7 @@ class RecordBatchStreamWriter::RecordBatchStreamWriterImpl : public StreamBookKe
     RETURN_NOT_OK(arrow::ipc::WriteRecordBatch(
         batch, buffer_start_offset, sink_, &block->metadata_length, &block->body_length,
         pool_, kMaxNestingDepth, allow_64bit));
-    RETURN_NOT_OK(UpdatePosition());
-
-    DCHECK(position_ % 8 == 0) << "WriteRecordBatch did not perform aligned writes";
+    RETURN_NOT_OK(UpdatePositionCheckAligned());
 
     return Status::OK();
   }
@@ -922,6 +929,11 @@ class RecordBatchFileWriter::RecordBatchFileWriterImpl
       : BASE(sink, schema) {}
 
   Status Start() override {
+    // ARROW-3236: The initial position -1 needs to be updated to the stream's
+    // current position otherwise an incorrect amount of padding will be
+    // written to new files.
+    RETURN_NOT_OK(UpdatePosition());
+
     // It is only necessary to align to 8-byte boundary at the start of the file
     RETURN_NOT_OK(Write(kArrowMagicBytes, strlen(kArrowMagicBytes)));
     RETURN_NOT_OK(Align());