You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2018/12/05 16:58:50 UTC

[arrow] branch master updated: ARROW-3894: [C++] Ensure that IPC file is properly initialized even if no record batches are written

This is an automated email from the ASF dual-hosted git repository.

wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 8152916  ARROW-3894: [C++] Ensure that IPC file is properly initialized even if no record batches are written
8152916 is described below

commit 8152916386a7f62a6c6cc7581a487856d9500b1a
Author: Wes McKinney <we...@apache.org>
AuthorDate: Wed Dec 5 10:55:45 2018 -0600

    ARROW-3894: [C++] Ensure that IPC file is properly initialized even if no record batches are written
    
    Without invoking `Start()`, the file cannot be read
    
    Author: Wes McKinney <we...@apache.org>
    
    Closes #3095 from wesm/ARROW-3894 and squashes the following commits:
    
    5fc65282d <Wes McKinney> Also compare schemas
    1074a690a <Wes McKinney> Ensure that IPC file is properly initialized even if no record batches are written
---
 cpp/src/arrow/ipc/writer.cc      |  4 ++++
 python/pyarrow/tests/test_ipc.py | 16 ++++++++++++++++
 2 files changed, 20 insertions(+)

diff --git a/cpp/src/arrow/ipc/writer.cc b/cpp/src/arrow/ipc/writer.cc
index 8225cce..3d3355d 100644
--- a/cpp/src/arrow/ipc/writer.cc
+++ b/cpp/src/arrow/ipc/writer.cc
@@ -949,6 +949,10 @@ class RecordBatchFileWriter::RecordBatchFileWriterImpl
   }
 
   Status Close() override {
+    // Write the schema if not already written
+    // User is responsible for closing the OutputStream
+    RETURN_NOT_OK(CheckStarted());
+
     // Write metadata
     RETURN_NOT_OK(UpdatePosition());
 
diff --git a/python/pyarrow/tests/test_ipc.py b/python/pyarrow/tests/test_ipc.py
index b1fa06f..0fb66f8 100644
--- a/python/pyarrow/tests/test_ipc.py
+++ b/python/pyarrow/tests/test_ipc.py
@@ -639,3 +639,19 @@ def read_file(source):
     reader = pa.open_file(source)
     return [reader.get_batch(i)
             for i in range(reader.num_record_batches)]
+
+
+def test_write_empty_ipc_file():
+    # ARROW-3894: IPC file was not being properly initialized when no record
+    # batches are being written
+    schema = pa.schema([('field', pa.int64())])
+
+    sink = pa.BufferOutputStream()
+    writer = pa.RecordBatchFileWriter(sink, schema)
+    writer.close()
+
+    buf = sink.getvalue()
+    reader = pa.RecordBatchFileReader(pa.BufferReader(buf))
+    table = reader.read_all()
+    assert len(table) == 0
+    assert table.schema.equals(schema)