You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by li...@apache.org on 2022/06/21 19:46:41 UTC

[arrow] branch master updated: ARROW-16800: [C++] RecordBatchBuilder deprecate Status APIs, add Result APIs (#13356)

This is an automated email from the ASF dual-hosted git repository.

lidavidm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new ac0949bd5f ARROW-16800: [C++] RecordBatchBuilder deprecate Status APIs, add Result APIs (#13356)
ac0949bd5f is described below

commit ac0949bd5fb627e57785799b76232688f1a348f5
Author: Will Jones <wi...@gmail.com>
AuthorDate: Tue Jun 21 12:46:34 2022 -0700

    ARROW-16800: [C++] RecordBatchBuilder deprecate Status APIs, add Result APIs (#13356)
    
    Also added the class to the C++ API docs.
    
    Authored-by: Will Jones <wi...@gmail.com>
    Signed-off-by: David Li <li...@gmail.com>
---
 c_glib/arrow-glib/table-builder.cpp   | 16 ++++++------
 cpp/src/arrow/adapters/orc/adapter.cc | 12 ++++-----
 cpp/src/arrow/table_builder.cc        | 46 ++++++++++++++++++++++++++---------
 cpp/src/arrow/table_builder.h         | 28 +++++++++++++++++++++
 cpp/src/arrow/table_builder_test.cc   | 16 ++++++------
 docs/source/cpp/api/builder.rst       |  7 ++++++
 6 files changed, 91 insertions(+), 34 deletions(-)

diff --git a/c_glib/arrow-glib/table-builder.cpp b/c_glib/arrow-glib/table-builder.cpp
index 27839565cd..0ea41281db 100644
--- a/c_glib/arrow-glib/table-builder.cpp
+++ b/c_glib/arrow-glib/table-builder.cpp
@@ -155,11 +155,10 @@ garrow_record_batch_builder_new(GArrowSchema *schema, GError **error)
 {
   auto arrow_schema = garrow_schema_get_raw(schema);
   auto memory_pool = arrow::default_memory_pool();
-  std::unique_ptr<arrow::RecordBatchBuilder> arrow_builder;
-  auto status = arrow::RecordBatchBuilder::Make(arrow_schema,
-                                                memory_pool,
-                                                &arrow_builder);
-  if (garrow_error_check(error, status, "[record-batch-builder][new]")) {
+  auto builder_result = arrow::RecordBatchBuilder::Make(arrow_schema, memory_pool);
+
+  if (garrow::check(error, builder_result, "[record-batch-builder][new]")) {
+    std::unique_ptr<arrow::RecordBatchBuilder> arrow_builder = std::move(builder_result).ValueOrDie();
     return garrow_record_batch_builder_new_raw(arrow_builder.release());
   } else {
     return NULL;
@@ -309,9 +308,10 @@ garrow_record_batch_builder_flush(GArrowRecordBatchBuilder *builder,
                                   GError **error)
 {
   auto arrow_builder = garrow_record_batch_builder_get_raw(builder);
-  std::shared_ptr<arrow::RecordBatch> arrow_record_batch;
-  auto status = arrow_builder->Flush(&arrow_record_batch);
-  if (garrow_error_check(error, status, "[record-batch-builder][flush]")) {
+  auto batch_result = arrow_builder->Flush();
+
+  if (garrow::check(error, batch_result, "[record-batch-builder][flush]")) {
+    std::shared_ptr<arrow::RecordBatch> arrow_record_batch = *batch_result;
     return garrow_record_batch_new_raw(&arrow_record_batch);
   } else {
     return NULL;
diff --git a/cpp/src/arrow/adapters/orc/adapter.cc b/cpp/src/arrow/adapters/orc/adapter.cc
index a95b8325f5..5af5ebccc8 100644
--- a/cpp/src/arrow/adapters/orc/adapter.cc
+++ b/cpp/src/arrow/adapters/orc/adapter.cc
@@ -158,7 +158,8 @@ class OrcStripeReader : public RecordBatchReader {
     }
 
     std::unique_ptr<RecordBatchBuilder> builder;
-    RETURN_NOT_OK(RecordBatchBuilder::Make(schema_, pool_, batch->numElements, &builder));
+    ARROW_ASSIGN_OR_RAISE(builder,
+                          RecordBatchBuilder::Make(schema_, pool_, batch->numElements));
 
     // The top-level type must be a struct to read into an arrow table
     const auto& struct_batch = checked_cast<liborc::StructVectorBatch&>(*batch);
@@ -168,7 +169,7 @@ class OrcStripeReader : public RecordBatchReader {
                                 batch->numElements, builder->GetField(i)));
     }
 
-    RETURN_NOT_OK(builder->Flush(out));
+    ARROW_ASSIGN_OR_RAISE(*out, builder->Flush());
     return Status::OK();
   }
 
@@ -445,7 +446,7 @@ class ORCFileReader::Impl {
     ORC_END_CATCH_NOT_OK
 
     std::unique_ptr<RecordBatchBuilder> builder;
-    RETURN_NOT_OK(RecordBatchBuilder::Make(schema, pool_, nrows, &builder));
+    ARROW_ASSIGN_OR_RAISE(builder, RecordBatchBuilder::Make(schema, pool_, nrows));
 
     // The top-level type must be a struct to read into an arrow table
     const auto& struct_batch = checked_cast<liborc::StructVectorBatch&>(*batch);
@@ -457,9 +458,8 @@ class ORCFileReader::Impl {
                                   batch->numElements, builder->GetField(i)));
       }
     }
-    std::shared_ptr<RecordBatch> out;
-    RETURN_NOT_OK(builder->Flush(&out));
-    return out;
+
+    return builder->Flush();
   }
 
   Status Seek(int64_t row_number) {
diff --git a/cpp/src/arrow/table_builder.cc b/cpp/src/arrow/table_builder.cc
index c026c35575..414aa263cc 100644
--- a/cpp/src/arrow/table_builder.cc
+++ b/cpp/src/arrow/table_builder.cc
@@ -38,19 +38,43 @@ RecordBatchBuilder::RecordBatchBuilder(const std::shared_ptr<Schema>& schema,
 
 Status RecordBatchBuilder::Make(const std::shared_ptr<Schema>& schema, MemoryPool* pool,
                                 std::unique_ptr<RecordBatchBuilder>* builder) {
-  return Make(schema, pool, kMinBuilderCapacity, builder);
+  ARROW_ASSIGN_OR_RAISE(*builder, Make(schema, pool, kMinBuilderCapacity))
+  return Status::OK();
 }
 
 Status RecordBatchBuilder::Make(const std::shared_ptr<Schema>& schema, MemoryPool* pool,
                                 int64_t initial_capacity,
                                 std::unique_ptr<RecordBatchBuilder>* builder) {
-  builder->reset(new RecordBatchBuilder(schema, pool, initial_capacity));
-  RETURN_NOT_OK((*builder)->CreateBuilders());
-  return (*builder)->InitBuilders();
+  ARROW_ASSIGN_OR_RAISE(*builder, Make(schema, pool, initial_capacity))
+  return Status::OK();
+}
+
+Result<std::unique_ptr<RecordBatchBuilder>> RecordBatchBuilder::Make(
+    const std::shared_ptr<Schema>& schema, MemoryPool* pool) {
+  return Make(schema, pool, kMinBuilderCapacity);
+}
+
+Result<std::unique_ptr<RecordBatchBuilder>> RecordBatchBuilder::Make(
+    const std::shared_ptr<Schema>& schema, MemoryPool* pool, int64_t initial_capacity) {
+  auto builder = std::unique_ptr<RecordBatchBuilder>(
+      new RecordBatchBuilder(schema, pool, initial_capacity));
+  RETURN_NOT_OK(builder->CreateBuilders());
+  RETURN_NOT_OK(builder->InitBuilders());
+  return std::move(builder);
 }
 
 Status RecordBatchBuilder::Flush(bool reset_builders,
                                  std::shared_ptr<RecordBatch>* batch) {
+  ARROW_ASSIGN_OR_RAISE(*batch, Flush(reset_builders));
+  return Status::OK();
+}
+
+Status RecordBatchBuilder::Flush(std::shared_ptr<RecordBatch>* batch) {
+  ARROW_ASSIGN_OR_RAISE(*batch, Flush(true));
+  return Status::OK();
+}
+
+Result<std::shared_ptr<RecordBatch>> RecordBatchBuilder::Flush(bool reset_builders) {
   std::vector<std::shared_ptr<Array>> fields;
   fields.resize(this->num_fields());
 
@@ -76,18 +100,18 @@ Status RecordBatchBuilder::Flush(bool reset_builders,
   std::shared_ptr<Schema> schema =
       std::make_shared<Schema>(std::move(schema_fields), schema_->metadata());
 
-  *batch = RecordBatch::Make(std::move(schema), length, std::move(fields));
+  std::shared_ptr<RecordBatch> batch =
+      RecordBatch::Make(std::move(schema), length, std::move(fields));
+
   if (reset_builders) {
-    return InitBuilders();
-  } else {
-    return Status::OK();
+    ARROW_RETURN_NOT_OK(InitBuilders());
   }
-}
 
-Status RecordBatchBuilder::Flush(std::shared_ptr<RecordBatch>* batch) {
-  return Flush(true, batch);
+  return batch;
 }
 
+Result<std::shared_ptr<RecordBatch>> RecordBatchBuilder::Flush() { return Flush(true); }
+
 void RecordBatchBuilder::SetInitialCapacity(int64_t capacity) {
   ARROW_CHECK_GT(capacity, 0) << "Initial capacity must be positive";
   initial_capacity_ = capacity;
diff --git a/cpp/src/arrow/table_builder.h b/cpp/src/arrow/table_builder.h
index db130d3895..f2e030fff0 100644
--- a/cpp/src/arrow/table_builder.h
+++ b/cpp/src/arrow/table_builder.h
@@ -42,6 +42,7 @@ class ARROW_EXPORT RecordBatchBuilder {
   /// \param[in] schema The schema for the record batch
   /// \param[in] pool A MemoryPool to use for allocations
   /// \param[in] builder the created builder instance
+  ARROW_DEPRECATED("Deprecated in 9.0.0. Use Result-returning variant.")
   static Status Make(const std::shared_ptr<Schema>& schema, MemoryPool* pool,
                      std::unique_ptr<RecordBatchBuilder>* builder);
 
@@ -50,10 +51,26 @@ class ARROW_EXPORT RecordBatchBuilder {
   /// \param[in] pool A MemoryPool to use for allocations
   /// \param[in] initial_capacity The initial capacity for the builders
   /// \param[in] builder the created builder instance
+  ARROW_DEPRECATED("Deprecated in 9.0.0. Use Result-returning variant.")
   static Status Make(const std::shared_ptr<Schema>& schema, MemoryPool* pool,
                      int64_t initial_capacity,
                      std::unique_ptr<RecordBatchBuilder>* builder);
 
+  /// \brief Create and initialize a RecordBatchBuilder
+  /// \param[in] schema The schema for the record batch
+  /// \param[in] pool A MemoryPool to use for allocations
+  /// \return the created builder instance
+  static Result<std::unique_ptr<RecordBatchBuilder>> Make(
+      const std::shared_ptr<Schema>& schema, MemoryPool* pool);
+
+  /// \brief Create and initialize a RecordBatchBuilder
+  /// \param[in] schema The schema for the record batch
+  /// \param[in] pool A MemoryPool to use for allocations
+  /// \param[in] initial_capacity The initial capacity for the builders
+  /// \return the created builder instance
+  static Result<std::unique_ptr<RecordBatchBuilder>> Make(
+      const std::shared_ptr<Schema>& schema, MemoryPool* pool, int64_t initial_capacity);
+
   /// \brief Get base pointer to field builder
   /// \param i the field index
   /// \return pointer to ArrayBuilder
@@ -71,13 +88,24 @@ class ARROW_EXPORT RecordBatchBuilder {
   /// \param[in] reset_builders the resulting RecordBatch
   /// \param[out] batch the resulting RecordBatch
   /// \return Status
+  ARROW_DEPRECATED("Deprecated in 9.0.0. Use Result-returning variant.")
   Status Flush(bool reset_builders, std::shared_ptr<RecordBatch>* batch);
 
   /// \brief Finish current batch and reset
   /// \param[out] batch the resulting RecordBatch
   /// \return Status
+  ARROW_DEPRECATED("Deprecated in 9.0.0. Use Result-returning variant.")
   Status Flush(std::shared_ptr<RecordBatch>* batch);
 
+  /// \brief Finish current batch and optionally reset
+  /// \param[in] reset_builders the resulting RecordBatch
+  /// \return the resulting RecordBatch
+  Result<std::shared_ptr<RecordBatch>> Flush(bool reset_builders);
+
+  /// \brief Finish current batch and reset
+  /// \return the resulting RecordBatch
+  Result<std::shared_ptr<RecordBatch>> Flush();
+
   /// \brief Set the initial capacity for new builders
   void SetInitialCapacity(int64_t capacity);
 
diff --git a/cpp/src/arrow/table_builder_test.cc b/cpp/src/arrow/table_builder_test.cc
index e04a4a4478..ea56ea4110 100644
--- a/cpp/src/arrow/table_builder_test.cc
+++ b/cpp/src/arrow/table_builder_test.cc
@@ -83,7 +83,7 @@ TEST_F(TestRecordBatchBuilder, Basics) {
   auto schema = ExampleSchema1();
 
   std::unique_ptr<RecordBatchBuilder> builder;
-  ASSERT_OK(RecordBatchBuilder::Make(schema, pool_, &builder));
+  ASSERT_OK_AND_ASSIGN(builder, RecordBatchBuilder::Make(schema, pool_));
 
   std::vector<bool> is_valid = {false, true, true, true};
   std::vector<int32_t> f0_values = {0, 1, 2, 3};
@@ -124,9 +124,9 @@ TEST_F(TestRecordBatchBuilder, Basics) {
 
     if (i == kIter - 1) {
       // Do not flush in last iteration
-      ASSERT_OK(builder->Flush(false, &batch));
+      ASSERT_OK_AND_ASSIGN(batch, builder->Flush(false));
     } else {
-      ASSERT_OK(builder->Flush(&batch));
+      ASSERT_OK_AND_ASSIGN(batch, builder->Flush());
     }
 
     ASSERT_BATCHES_EQUAL(*expected, *batch);
@@ -141,7 +141,7 @@ TEST_F(TestRecordBatchBuilder, InvalidFieldLength) {
   auto schema = ExampleSchema1();
 
   std::unique_ptr<RecordBatchBuilder> builder;
-  ASSERT_OK(RecordBatchBuilder::Make(schema, pool_, &builder));
+  ASSERT_OK_AND_ASSIGN(builder, RecordBatchBuilder::Make(schema, pool_));
 
   std::vector<bool> is_valid = {false, true, true, true};
   std::vector<int32_t> f0_values = {0, 1, 2, 3};
@@ -149,8 +149,7 @@ TEST_F(TestRecordBatchBuilder, InvalidFieldLength) {
   AppendValues<Int32Builder, int32_t>(builder->GetFieldAs<Int32Builder>(0), f0_values,
                                       is_valid);
 
-  std::shared_ptr<RecordBatch> dummy;
-  ASSERT_RAISES(Invalid, builder->Flush(&dummy));
+  ASSERT_RAISES(Invalid, builder->Flush());
 }
 
 // In #ARROW-9969 dictionary types were not updated
@@ -168,14 +167,13 @@ TEST_F(TestRecordBatchBuilder, DictionaryTypes) {
   auto schema = ::arrow::schema({f0});
 
   std::unique_ptr<RecordBatchBuilder> builder;
-  ASSERT_OK(RecordBatchBuilder::Make(schema, pool_, &builder));
+  ASSERT_OK_AND_ASSIGN(builder, RecordBatchBuilder::Make(schema, pool_));
 
   auto b0 = builder->GetFieldAs<StringDictionaryBuilder>(0);
 
   AppendValues<StringDictionaryBuilder, std::string>(b0, f0_values, is_valid);
 
-  std::shared_ptr<RecordBatch> batch;
-  ASSERT_OK(builder->Flush(&batch));
+  ASSERT_OK_AND_ASSIGN(std::shared_ptr<RecordBatch> batch, builder->Flush());
 
   AssertTypeEqual(batch->column(0)->type(), batch->schema()->field(0)->type());
 }
diff --git a/docs/source/cpp/api/builder.rst b/docs/source/cpp/api/builder.rst
index 17f201b84f..1342ba2655 100644
--- a/docs/source/cpp/api/builder.rst
+++ b/docs/source/cpp/api/builder.rst
@@ -64,3 +64,10 @@ Dictionary-encoded
 
 .. doxygenclass:: arrow::DictionaryBuilder
    :members:
+
+
+Record Batch Builder
+====================
+
+.. doxygenclass:: arrow::RecordBatchBuilder
+   :members: