You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ap...@apache.org on 2020/05/04 09:48:02 UTC
[arrow] branch master updated: ARROW-8669: [C++] Add
IpcWriteOptions argument to GetRecordBatchSize()
This is an automated email from the ASF dual-hosted git repository.
apitrou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new a26f163 ARROW-8669: [C++] Add IpcWriteOptions argument to GetRecordBatchSize()
a26f163 is described below
commit a26f16317d12ed756b84ad1d323baaa229a11877
Author: Sutou Kouhei <ko...@clear-code.com>
AuthorDate: Mon May 4 11:47:33 2020 +0200
ARROW-8669: [C++] Add IpcWriteOptions argument to GetRecordBatchSize()
Closes #7087 from kou/cpp-ipc-get-record-batch-size-options
Authored-by: Sutou Kouhei <ko...@clear-code.com>
Signed-off-by: Antoine Pitrou <an...@python.org>
---
cpp/src/arrow/ipc/read_write_test.cc | 2 +-
cpp/src/arrow/ipc/writer.cc | 12 ++++++++----
cpp/src/arrow/ipc/writer.h | 10 ++++++++++
3 files changed, 19 insertions(+), 5 deletions(-)
diff --git a/cpp/src/arrow/ipc/read_write_test.cc b/cpp/src/arrow/ipc/read_write_test.cc
index 1b5073e..455d147 100644
--- a/cpp/src/arrow/ipc/read_write_test.cc
+++ b/cpp/src/arrow/ipc/read_write_test.cc
@@ -649,7 +649,7 @@ void TestGetRecordBatchSize(const IpcWriteOptions& options,
int64_t size = -1;
ASSERT_OK(WriteRecordBatch(*batch, 0, &mock, &mock_metadata_length, &mock_body_length,
options));
- ASSERT_OK(GetRecordBatchSize(*batch, &size));
+ ASSERT_OK(GetRecordBatchSize(*batch, options, &size));
ASSERT_EQ(mock.GetExtentBytesWritten(), size);
}
diff --git a/cpp/src/arrow/ipc/writer.cc b/cpp/src/arrow/ipc/writer.cc
index 6579d47..4adfe8a 100644
--- a/cpp/src/arrow/ipc/writer.cc
+++ b/cpp/src/arrow/ipc/writer.cc
@@ -867,8 +867,12 @@ Result<std::unique_ptr<Message>> GetSparseTensorMessage(const SparseTensor& spar
}
Status GetRecordBatchSize(const RecordBatch& batch, int64_t* size) {
+ return GetRecordBatchSize(batch, IpcWriteOptions::Defaults(), size);
+}
+
+Status GetRecordBatchSize(const RecordBatch& batch, const IpcWriteOptions& options,
+ int64_t* size) {
// emulates the behavior of Write without actually writing
- auto options = IpcWriteOptions::Defaults();
int32_t metadata_length = 0;
int64_t body_length = 0;
io::MockOutputStream dst;
@@ -1198,12 +1202,12 @@ Result<std::unique_ptr<RecordBatchWriter>> OpenRecordBatchWriter(
Result<std::shared_ptr<Buffer>> SerializeRecordBatch(const RecordBatch& batch,
std::shared_ptr<MemoryManager> mm) {
+ auto options = IpcWriteOptions::Defaults();
int64_t size = 0;
- RETURN_NOT_OK(GetRecordBatchSize(batch, &size));
+ RETURN_NOT_OK(GetRecordBatchSize(batch, options, &size));
ARROW_ASSIGN_OR_RAISE(auto buffer, mm->AllocateBuffer(size));
ARROW_ASSIGN_OR_RAISE(auto writer, Buffer::GetWriter(buffer));
- IpcWriteOptions options;
// XXX Should we have a helper function for getting a MemoryPool
// for any MemoryManager (not only CPU)?
if (mm->is_cpu()) {
@@ -1217,7 +1221,7 @@ Result<std::shared_ptr<Buffer>> SerializeRecordBatch(const RecordBatch& batch,
Result<std::shared_ptr<Buffer>> SerializeRecordBatch(const RecordBatch& batch,
const IpcWriteOptions& options) {
int64_t size = 0;
- RETURN_NOT_OK(GetRecordBatchSize(batch, &size));
+ RETURN_NOT_OK(GetRecordBatchSize(batch, options, &size));
ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Buffer> buffer,
AllocateBuffer(size, options.memory_pool));
diff --git a/cpp/src/arrow/ipc/writer.h b/cpp/src/arrow/ipc/writer.h
index da8c7ed..37c4997 100644
--- a/cpp/src/arrow/ipc/writer.h
+++ b/cpp/src/arrow/ipc/writer.h
@@ -185,6 +185,16 @@ Status WriteRecordBatchStream(const std::vector<std::shared_ptr<RecordBatch>>& b
ARROW_EXPORT
Status GetRecordBatchSize(const RecordBatch& batch, int64_t* size);
+/// \brief Compute the number of bytes needed to write a record batch including metadata
+///
+/// \param[in] batch the record batch to write
+/// \param[in] options options for serialization
+/// \param[out] size the size of the complete encapsulated message
+/// \return Status
+ARROW_EXPORT
+Status GetRecordBatchSize(const RecordBatch& batch, const IpcWriteOptions& options,
+ int64_t* size);
+
/// \brief Compute the number of bytes needed to write a tensor including metadata
///
/// \param[in] tensor the tensor to write