You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2018/12/05 16:58:50 UTC
[arrow] branch master updated: ARROW-3894: [C++] Ensure that IPC
file is properly initialized even if no record batches are written
This is an automated email from the ASF dual-hosted git repository.
wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 8152916 ARROW-3894: [C++] Ensure that IPC file is properly initialized even if no record batches are written
8152916 is described below
commit 8152916386a7f62a6c6cc7581a487856d9500b1a
Author: Wes McKinney <we...@apache.org>
AuthorDate: Wed Dec 5 10:55:45 2018 -0600
ARROW-3894: [C++] Ensure that IPC file is properly initialized even if no record batches are written
Without invoking `Start()`, the file cannot be read
Author: Wes McKinney <we...@apache.org>
Closes #3095 from wesm/ARROW-3894 and squashes the following commits:
5fc65282d <Wes McKinney> Also compare schemas
1074a690a <Wes McKinney> Ensure that IPC file is properly initialized even if no record batches are written
---
cpp/src/arrow/ipc/writer.cc | 4 ++++
python/pyarrow/tests/test_ipc.py | 16 ++++++++++++++++
2 files changed, 20 insertions(+)
diff --git a/cpp/src/arrow/ipc/writer.cc b/cpp/src/arrow/ipc/writer.cc
index 8225cce..3d3355d 100644
--- a/cpp/src/arrow/ipc/writer.cc
+++ b/cpp/src/arrow/ipc/writer.cc
@@ -949,6 +949,10 @@ class RecordBatchFileWriter::RecordBatchFileWriterImpl
}
Status Close() override {
+ // Write the schema if not already written
+ // User is responsible for closing the OutputStream
+ RETURN_NOT_OK(CheckStarted());
+
// Write metadata
RETURN_NOT_OK(UpdatePosition());
diff --git a/python/pyarrow/tests/test_ipc.py b/python/pyarrow/tests/test_ipc.py
index b1fa06f..0fb66f8 100644
--- a/python/pyarrow/tests/test_ipc.py
+++ b/python/pyarrow/tests/test_ipc.py
@@ -639,3 +639,19 @@ def read_file(source):
reader = pa.open_file(source)
return [reader.get_batch(i)
for i in range(reader.num_record_batches)]
+
+
+def test_write_empty_ipc_file():
+ # ARROW-3894: IPC file was not being properly initialized when no record
+ # batches are being written
+ schema = pa.schema([('field', pa.int64())])
+
+ sink = pa.BufferOutputStream()
+ writer = pa.RecordBatchFileWriter(sink, schema)
+ writer.close()
+
+ buf = sink.getvalue()
+ reader = pa.RecordBatchFileReader(pa.BufferReader(buf))
+ table = reader.read_all()
+ assert len(table) == 0
+ assert table.schema.equals(schema)