You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ap...@apache.org on 2020/05/04 09:48:02 UTC

[arrow] branch master updated: ARROW-8669: [C++] Add IpcWriteOptions argument to GetRecordBatchSize()

This is an automated email from the ASF dual-hosted git repository.

apitrou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new a26f163  ARROW-8669: [C++] Add IpcWriteOptions argument to GetRecordBatchSize()
a26f163 is described below

commit a26f16317d12ed756b84ad1d323baaa229a11877
Author: Sutou Kouhei <ko...@clear-code.com>
AuthorDate: Mon May 4 11:47:33 2020 +0200

    ARROW-8669: [C++] Add IpcWriteOptions argument to GetRecordBatchSize()
    
    Closes #7087 from kou/cpp-ipc-get-record-batch-size-options
    
    Authored-by: Sutou Kouhei <ko...@clear-code.com>
    Signed-off-by: Antoine Pitrou <an...@python.org>
---
 cpp/src/arrow/ipc/read_write_test.cc |  2 +-
 cpp/src/arrow/ipc/writer.cc          | 12 ++++++++----
 cpp/src/arrow/ipc/writer.h           | 10 ++++++++++
 3 files changed, 19 insertions(+), 5 deletions(-)

diff --git a/cpp/src/arrow/ipc/read_write_test.cc b/cpp/src/arrow/ipc/read_write_test.cc
index 1b5073e..455d147 100644
--- a/cpp/src/arrow/ipc/read_write_test.cc
+++ b/cpp/src/arrow/ipc/read_write_test.cc
@@ -649,7 +649,7 @@ void TestGetRecordBatchSize(const IpcWriteOptions& options,
   int64_t size = -1;
   ASSERT_OK(WriteRecordBatch(*batch, 0, &mock, &mock_metadata_length, &mock_body_length,
                              options));
-  ASSERT_OK(GetRecordBatchSize(*batch, &size));
+  ASSERT_OK(GetRecordBatchSize(*batch, options, &size));
   ASSERT_EQ(mock.GetExtentBytesWritten(), size);
 }
 
diff --git a/cpp/src/arrow/ipc/writer.cc b/cpp/src/arrow/ipc/writer.cc
index 6579d47..4adfe8a 100644
--- a/cpp/src/arrow/ipc/writer.cc
+++ b/cpp/src/arrow/ipc/writer.cc
@@ -867,8 +867,12 @@ Result<std::unique_ptr<Message>> GetSparseTensorMessage(const SparseTensor& spar
 }
 
 Status GetRecordBatchSize(const RecordBatch& batch, int64_t* size) {
+  return GetRecordBatchSize(batch, IpcWriteOptions::Defaults(), size);
+}
+
+Status GetRecordBatchSize(const RecordBatch& batch, const IpcWriteOptions& options,
+                          int64_t* size) {
   // emulates the behavior of Write without actually writing
-  auto options = IpcWriteOptions::Defaults();
   int32_t metadata_length = 0;
   int64_t body_length = 0;
   io::MockOutputStream dst;
@@ -1198,12 +1202,12 @@ Result<std::unique_ptr<RecordBatchWriter>> OpenRecordBatchWriter(
 
 Result<std::shared_ptr<Buffer>> SerializeRecordBatch(const RecordBatch& batch,
                                                      std::shared_ptr<MemoryManager> mm) {
+  auto options = IpcWriteOptions::Defaults();
   int64_t size = 0;
-  RETURN_NOT_OK(GetRecordBatchSize(batch, &size));
+  RETURN_NOT_OK(GetRecordBatchSize(batch, options, &size));
   ARROW_ASSIGN_OR_RAISE(auto buffer, mm->AllocateBuffer(size));
   ARROW_ASSIGN_OR_RAISE(auto writer, Buffer::GetWriter(buffer));
 
-  IpcWriteOptions options;
   // XXX Should we have a helper function for getting a MemoryPool
   // for any MemoryManager (not only CPU)?
   if (mm->is_cpu()) {
@@ -1217,7 +1221,7 @@ Result<std::shared_ptr<Buffer>> SerializeRecordBatch(const RecordBatch& batch,
 Result<std::shared_ptr<Buffer>> SerializeRecordBatch(const RecordBatch& batch,
                                                      const IpcWriteOptions& options) {
   int64_t size = 0;
-  RETURN_NOT_OK(GetRecordBatchSize(batch, &size));
+  RETURN_NOT_OK(GetRecordBatchSize(batch, options, &size));
   ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Buffer> buffer,
                         AllocateBuffer(size, options.memory_pool));
 
diff --git a/cpp/src/arrow/ipc/writer.h b/cpp/src/arrow/ipc/writer.h
index da8c7ed..37c4997 100644
--- a/cpp/src/arrow/ipc/writer.h
+++ b/cpp/src/arrow/ipc/writer.h
@@ -185,6 +185,16 @@ Status WriteRecordBatchStream(const std::vector<std::shared_ptr<RecordBatch>>& b
 ARROW_EXPORT
 Status GetRecordBatchSize(const RecordBatch& batch, int64_t* size);
 
+/// \brief Compute the number of bytes needed to write a record batch including metadata
+///
+/// \param[in] batch the record batch to write
+/// \param[in] options options for serialization
+/// \param[out] size the size of the complete encapsulated message
+/// \return Status
+ARROW_EXPORT
+Status GetRecordBatchSize(const RecordBatch& batch, const IpcWriteOptions& options,
+                          int64_t* size);
+
 /// \brief Compute the number of bytes needed to write a tensor including metadata
 ///
 /// \param[in] tensor the tensor to write