You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by li...@apache.org on 2022/06/21 19:46:41 UTC
[arrow] branch master updated: ARROW-16800: [C++] RecordBatchBuilder deprecate Status APIs, add Result APIs (#13356)
This is an automated email from the ASF dual-hosted git repository.
lidavidm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new ac0949bd5f ARROW-16800: [C++] RecordBatchBuilder deprecate Status APIs, add Result APIs (#13356)
ac0949bd5f is described below
commit ac0949bd5fb627e57785799b76232688f1a348f5
Author: Will Jones <wi...@gmail.com>
AuthorDate: Tue Jun 21 12:46:34 2022 -0700
ARROW-16800: [C++] RecordBatchBuilder deprecate Status APIs, add Result APIs (#13356)
Also added the class to the C++ API docs.
Authored-by: Will Jones <wi...@gmail.com>
Signed-off-by: David Li <li...@gmail.com>
---
c_glib/arrow-glib/table-builder.cpp | 16 ++++++------
cpp/src/arrow/adapters/orc/adapter.cc | 12 ++++-----
cpp/src/arrow/table_builder.cc | 46 ++++++++++++++++++++++++++---------
cpp/src/arrow/table_builder.h | 28 +++++++++++++++++++++
cpp/src/arrow/table_builder_test.cc | 16 ++++++------
docs/source/cpp/api/builder.rst | 7 ++++++
6 files changed, 91 insertions(+), 34 deletions(-)
diff --git a/c_glib/arrow-glib/table-builder.cpp b/c_glib/arrow-glib/table-builder.cpp
index 27839565cd..0ea41281db 100644
--- a/c_glib/arrow-glib/table-builder.cpp
+++ b/c_glib/arrow-glib/table-builder.cpp
@@ -155,11 +155,10 @@ garrow_record_batch_builder_new(GArrowSchema *schema, GError **error)
{
auto arrow_schema = garrow_schema_get_raw(schema);
auto memory_pool = arrow::default_memory_pool();
- std::unique_ptr<arrow::RecordBatchBuilder> arrow_builder;
- auto status = arrow::RecordBatchBuilder::Make(arrow_schema,
- memory_pool,
- &arrow_builder);
- if (garrow_error_check(error, status, "[record-batch-builder][new]")) {
+ auto builder_result = arrow::RecordBatchBuilder::Make(arrow_schema, memory_pool);
+
+ if (garrow::check(error, builder_result, "[record-batch-builder][new]")) {
+ std::unique_ptr<arrow::RecordBatchBuilder> arrow_builder = std::move(builder_result).ValueOrDie();
return garrow_record_batch_builder_new_raw(arrow_builder.release());
} else {
return NULL;
@@ -309,9 +308,10 @@ garrow_record_batch_builder_flush(GArrowRecordBatchBuilder *builder,
GError **error)
{
auto arrow_builder = garrow_record_batch_builder_get_raw(builder);
- std::shared_ptr<arrow::RecordBatch> arrow_record_batch;
- auto status = arrow_builder->Flush(&arrow_record_batch);
- if (garrow_error_check(error, status, "[record-batch-builder][flush]")) {
+ auto batch_result = arrow_builder->Flush();
+
+ if (garrow::check(error, batch_result, "[record-batch-builder][flush]")) {
+ std::shared_ptr<arrow::RecordBatch> arrow_record_batch = *batch_result;
return garrow_record_batch_new_raw(&arrow_record_batch);
} else {
return NULL;
diff --git a/cpp/src/arrow/adapters/orc/adapter.cc b/cpp/src/arrow/adapters/orc/adapter.cc
index a95b8325f5..5af5ebccc8 100644
--- a/cpp/src/arrow/adapters/orc/adapter.cc
+++ b/cpp/src/arrow/adapters/orc/adapter.cc
@@ -158,7 +158,8 @@ class OrcStripeReader : public RecordBatchReader {
}
std::unique_ptr<RecordBatchBuilder> builder;
- RETURN_NOT_OK(RecordBatchBuilder::Make(schema_, pool_, batch->numElements, &builder));
+ ARROW_ASSIGN_OR_RAISE(builder,
+ RecordBatchBuilder::Make(schema_, pool_, batch->numElements));
// The top-level type must be a struct to read into an arrow table
const auto& struct_batch = checked_cast<liborc::StructVectorBatch&>(*batch);
@@ -168,7 +169,7 @@ class OrcStripeReader : public RecordBatchReader {
batch->numElements, builder->GetField(i)));
}
- RETURN_NOT_OK(builder->Flush(out));
+ ARROW_ASSIGN_OR_RAISE(*out, builder->Flush());
return Status::OK();
}
@@ -445,7 +446,7 @@ class ORCFileReader::Impl {
ORC_END_CATCH_NOT_OK
std::unique_ptr<RecordBatchBuilder> builder;
- RETURN_NOT_OK(RecordBatchBuilder::Make(schema, pool_, nrows, &builder));
+ ARROW_ASSIGN_OR_RAISE(builder, RecordBatchBuilder::Make(schema, pool_, nrows));
// The top-level type must be a struct to read into an arrow table
const auto& struct_batch = checked_cast<liborc::StructVectorBatch&>(*batch);
@@ -457,9 +458,8 @@ class ORCFileReader::Impl {
batch->numElements, builder->GetField(i)));
}
}
- std::shared_ptr<RecordBatch> out;
- RETURN_NOT_OK(builder->Flush(&out));
- return out;
+
+ return builder->Flush();
}
Status Seek(int64_t row_number) {
diff --git a/cpp/src/arrow/table_builder.cc b/cpp/src/arrow/table_builder.cc
index c026c35575..414aa263cc 100644
--- a/cpp/src/arrow/table_builder.cc
+++ b/cpp/src/arrow/table_builder.cc
@@ -38,19 +38,43 @@ RecordBatchBuilder::RecordBatchBuilder(const std::shared_ptr<Schema>& schema,
Status RecordBatchBuilder::Make(const std::shared_ptr<Schema>& schema, MemoryPool* pool,
std::unique_ptr<RecordBatchBuilder>* builder) {
- return Make(schema, pool, kMinBuilderCapacity, builder);
+ ARROW_ASSIGN_OR_RAISE(*builder, Make(schema, pool, kMinBuilderCapacity))
+ return Status::OK();
}
Status RecordBatchBuilder::Make(const std::shared_ptr<Schema>& schema, MemoryPool* pool,
int64_t initial_capacity,
std::unique_ptr<RecordBatchBuilder>* builder) {
- builder->reset(new RecordBatchBuilder(schema, pool, initial_capacity));
- RETURN_NOT_OK((*builder)->CreateBuilders());
- return (*builder)->InitBuilders();
+ ARROW_ASSIGN_OR_RAISE(*builder, Make(schema, pool, initial_capacity))
+ return Status::OK();
+}
+
+Result<std::unique_ptr<RecordBatchBuilder>> RecordBatchBuilder::Make(
+ const std::shared_ptr<Schema>& schema, MemoryPool* pool) {
+ return Make(schema, pool, kMinBuilderCapacity);
+}
+
+Result<std::unique_ptr<RecordBatchBuilder>> RecordBatchBuilder::Make(
+ const std::shared_ptr<Schema>& schema, MemoryPool* pool, int64_t initial_capacity) {
+ auto builder = std::unique_ptr<RecordBatchBuilder>(
+ new RecordBatchBuilder(schema, pool, initial_capacity));
+ RETURN_NOT_OK(builder->CreateBuilders());
+ RETURN_NOT_OK(builder->InitBuilders());
+ return std::move(builder);
}
Status RecordBatchBuilder::Flush(bool reset_builders,
std::shared_ptr<RecordBatch>* batch) {
+ ARROW_ASSIGN_OR_RAISE(*batch, Flush(reset_builders));
+ return Status::OK();
+}
+
+Status RecordBatchBuilder::Flush(std::shared_ptr<RecordBatch>* batch) {
+ ARROW_ASSIGN_OR_RAISE(*batch, Flush(true));
+ return Status::OK();
+}
+
+Result<std::shared_ptr<RecordBatch>> RecordBatchBuilder::Flush(bool reset_builders) {
std::vector<std::shared_ptr<Array>> fields;
fields.resize(this->num_fields());
@@ -76,18 +100,18 @@ Status RecordBatchBuilder::Flush(bool reset_builders,
std::shared_ptr<Schema> schema =
std::make_shared<Schema>(std::move(schema_fields), schema_->metadata());
- *batch = RecordBatch::Make(std::move(schema), length, std::move(fields));
+ std::shared_ptr<RecordBatch> batch =
+ RecordBatch::Make(std::move(schema), length, std::move(fields));
+
if (reset_builders) {
- return InitBuilders();
- } else {
- return Status::OK();
+ ARROW_RETURN_NOT_OK(InitBuilders());
}
-}
-Status RecordBatchBuilder::Flush(std::shared_ptr<RecordBatch>* batch) {
- return Flush(true, batch);
+ return batch;
}
+Result<std::shared_ptr<RecordBatch>> RecordBatchBuilder::Flush() { return Flush(true); }
+
void RecordBatchBuilder::SetInitialCapacity(int64_t capacity) {
ARROW_CHECK_GT(capacity, 0) << "Initial capacity must be positive";
initial_capacity_ = capacity;
diff --git a/cpp/src/arrow/table_builder.h b/cpp/src/arrow/table_builder.h
index db130d3895..f2e030fff0 100644
--- a/cpp/src/arrow/table_builder.h
+++ b/cpp/src/arrow/table_builder.h
@@ -42,6 +42,7 @@ class ARROW_EXPORT RecordBatchBuilder {
/// \param[in] schema The schema for the record batch
/// \param[in] pool A MemoryPool to use for allocations
/// \param[in] builder the created builder instance
+ ARROW_DEPRECATED("Deprecated in 9.0.0. Use Result-returning variant.")
static Status Make(const std::shared_ptr<Schema>& schema, MemoryPool* pool,
std::unique_ptr<RecordBatchBuilder>* builder);
@@ -50,10 +51,26 @@ class ARROW_EXPORT RecordBatchBuilder {
/// \param[in] pool A MemoryPool to use for allocations
/// \param[in] initial_capacity The initial capacity for the builders
/// \param[in] builder the created builder instance
+ ARROW_DEPRECATED("Deprecated in 9.0.0. Use Result-returning variant.")
static Status Make(const std::shared_ptr<Schema>& schema, MemoryPool* pool,
int64_t initial_capacity,
std::unique_ptr<RecordBatchBuilder>* builder);
+ /// \brief Create and initialize a RecordBatchBuilder
+ /// \param[in] schema The schema for the record batch
+ /// \param[in] pool A MemoryPool to use for allocations
+ /// \return the created builder instance
+ static Result<std::unique_ptr<RecordBatchBuilder>> Make(
+ const std::shared_ptr<Schema>& schema, MemoryPool* pool);
+
+ /// \brief Create and initialize a RecordBatchBuilder
+ /// \param[in] schema The schema for the record batch
+ /// \param[in] pool A MemoryPool to use for allocations
+ /// \param[in] initial_capacity The initial capacity for the builders
+ /// \return the created builder instance
+ static Result<std::unique_ptr<RecordBatchBuilder>> Make(
+ const std::shared_ptr<Schema>& schema, MemoryPool* pool, int64_t initial_capacity);
+
/// \brief Get base pointer to field builder
/// \param i the field index
/// \return pointer to ArrayBuilder
@@ -71,13 +88,24 @@ class ARROW_EXPORT RecordBatchBuilder {
/// \param[in] reset_builders the resulting RecordBatch
/// \param[out] batch the resulting RecordBatch
/// \return Status
+ ARROW_DEPRECATED("Deprecated in 9.0.0. Use Result-returning variant.")
Status Flush(bool reset_builders, std::shared_ptr<RecordBatch>* batch);
/// \brief Finish current batch and reset
/// \param[out] batch the resulting RecordBatch
/// \return Status
+ ARROW_DEPRECATED("Deprecated in 9.0.0. Use Result-returning variant.")
Status Flush(std::shared_ptr<RecordBatch>* batch);
+ /// \brief Finish current batch and optionally reset
+ /// \param[in] reset_builders the resulting RecordBatch
+ /// \return the resulting RecordBatch
+ Result<std::shared_ptr<RecordBatch>> Flush(bool reset_builders);
+
+ /// \brief Finish current batch and reset
+ /// \return the resulting RecordBatch
+ Result<std::shared_ptr<RecordBatch>> Flush();
+
/// \brief Set the initial capacity for new builders
void SetInitialCapacity(int64_t capacity);
diff --git a/cpp/src/arrow/table_builder_test.cc b/cpp/src/arrow/table_builder_test.cc
index e04a4a4478..ea56ea4110 100644
--- a/cpp/src/arrow/table_builder_test.cc
+++ b/cpp/src/arrow/table_builder_test.cc
@@ -83,7 +83,7 @@ TEST_F(TestRecordBatchBuilder, Basics) {
auto schema = ExampleSchema1();
std::unique_ptr<RecordBatchBuilder> builder;
- ASSERT_OK(RecordBatchBuilder::Make(schema, pool_, &builder));
+ ASSERT_OK_AND_ASSIGN(builder, RecordBatchBuilder::Make(schema, pool_));
std::vector<bool> is_valid = {false, true, true, true};
std::vector<int32_t> f0_values = {0, 1, 2, 3};
@@ -124,9 +124,9 @@ TEST_F(TestRecordBatchBuilder, Basics) {
if (i == kIter - 1) {
// Do not flush in last iteration
- ASSERT_OK(builder->Flush(false, &batch));
+ ASSERT_OK_AND_ASSIGN(batch, builder->Flush(false));
} else {
- ASSERT_OK(builder->Flush(&batch));
+ ASSERT_OK_AND_ASSIGN(batch, builder->Flush());
}
ASSERT_BATCHES_EQUAL(*expected, *batch);
@@ -141,7 +141,7 @@ TEST_F(TestRecordBatchBuilder, InvalidFieldLength) {
auto schema = ExampleSchema1();
std::unique_ptr<RecordBatchBuilder> builder;
- ASSERT_OK(RecordBatchBuilder::Make(schema, pool_, &builder));
+ ASSERT_OK_AND_ASSIGN(builder, RecordBatchBuilder::Make(schema, pool_));
std::vector<bool> is_valid = {false, true, true, true};
std::vector<int32_t> f0_values = {0, 1, 2, 3};
@@ -149,8 +149,7 @@ TEST_F(TestRecordBatchBuilder, InvalidFieldLength) {
AppendValues<Int32Builder, int32_t>(builder->GetFieldAs<Int32Builder>(0), f0_values,
is_valid);
- std::shared_ptr<RecordBatch> dummy;
- ASSERT_RAISES(Invalid, builder->Flush(&dummy));
+ ASSERT_RAISES(Invalid, builder->Flush());
}
// In #ARROW-9969 dictionary types were not updated
@@ -168,14 +167,13 @@ TEST_F(TestRecordBatchBuilder, DictionaryTypes) {
auto schema = ::arrow::schema({f0});
std::unique_ptr<RecordBatchBuilder> builder;
- ASSERT_OK(RecordBatchBuilder::Make(schema, pool_, &builder));
+ ASSERT_OK_AND_ASSIGN(builder, RecordBatchBuilder::Make(schema, pool_));
auto b0 = builder->GetFieldAs<StringDictionaryBuilder>(0);
AppendValues<StringDictionaryBuilder, std::string>(b0, f0_values, is_valid);
- std::shared_ptr<RecordBatch> batch;
- ASSERT_OK(builder->Flush(&batch));
+ ASSERT_OK_AND_ASSIGN(std::shared_ptr<RecordBatch> batch, builder->Flush());
AssertTypeEqual(batch->column(0)->type(), batch->schema()->field(0)->type());
}
diff --git a/docs/source/cpp/api/builder.rst b/docs/source/cpp/api/builder.rst
index 17f201b84f..1342ba2655 100644
--- a/docs/source/cpp/api/builder.rst
+++ b/docs/source/cpp/api/builder.rst
@@ -64,3 +64,10 @@ Dictionary-encoded
.. doxygenclass:: arrow::DictionaryBuilder
:members:
+
+
+Record Batch Builder
+====================
+
+.. doxygenclass:: arrow::RecordBatchBuilder
+ :members: