You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2018/11/04 19:08:10 UTC
[arrow] branch master updated: ARROW-3696: [C++] Add
feather::TableWriter::Write(table)
This is an automated email from the ASF dual-hosted git repository.
wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new b1d490d ARROW-3696: [C++] Add feather::TableWriter::Write(table)
b1d490d is described below
commit b1d490daf4ca78c8e57634c76ec35483b609fcfc
Author: Kouhei Sutou <ko...@clear-code.com>
AuthorDate: Sun Nov 4 14:07:59 2018 -0500
ARROW-3696: [C++] Add feather::TableWriter::Write(table)
Author: Kouhei Sutou <ko...@clear-code.com>
Closes #2898 from kou/cpp-feather-write-table and squashes the following commits:
21699b588 <Kouhei Sutou> Simplify
8d4cb96b2 <Kouhei Sutou> Add feather::TableWriter::Write(table)
---
cpp/src/arrow/ipc/feather-test.cc | 46 +++++++++++----------------------------
cpp/src/arrow/ipc/feather.cc | 15 +++++++++++++
cpp/src/arrow/ipc/feather.h | 6 +++++
3 files changed, 34 insertions(+), 33 deletions(-)
diff --git a/cpp/src/arrow/ipc/feather-test.cc b/cpp/src/arrow/ipc/feather-test.cc
index 2e44257..d032710 100644
--- a/cpp/src/arrow/ipc/feather-test.cc
+++ b/cpp/src/arrow/ipc/feather-test.cc
@@ -276,24 +276,6 @@ void CheckBatches(const RecordBatch& expected, const RecordBatch& result) {
}
}
-std::shared_ptr<RecordBatch> TableToBatch(const Table& table) {
- TableBatchReader reader(table);
- std::shared_ptr<RecordBatch> batch;
- EXPECT_OK(reader.ReadNext(&batch));
- return batch;
-}
-
-void CheckTables(const Table& expected, const Table& result) {
- if (!result.Equals(expected)) {
- std::stringstream pp_result;
- std::stringstream pp_expected;
-
- EXPECT_OK(PrettyPrint(*TableToBatch(result), 0, &pp_result));
- EXPECT_OK(PrettyPrint(*TableToBatch(expected), 0, &pp_expected));
- FAIL() << "Got: " << pp_result.str() << "\nExpected: " << pp_expected.str();
- }
-}
-
class TestTableReader : public ::testing::Test {
public:
void SetUp() {
@@ -341,7 +323,7 @@ TEST_F(TestTableReader, ReadIndices) {
fields.push_back(std::make_shared<Field>("f3", int32()));
arrays.push_back(batch2->column(1));
auto expected = Table::Make(std::make_shared<Schema>(fields), arrays);
- CheckTables(*expected, *result);
+ AssertTablesEqual(*expected, *result);
}
TEST_F(TestTableReader, ReadNames) {
@@ -366,7 +348,7 @@ TEST_F(TestTableReader, ReadNames) {
fields.push_back(std::make_shared<Field>("f3", int32()));
arrays.push_back(batch2->column(1));
auto expected = Table::Make(std::make_shared<Schema>(fields), arrays);
- CheckTables(*expected, *result);
+ AssertTablesEqual(*expected, *result);
}
class TestTableWriter : public ::testing::Test {
@@ -386,18 +368,16 @@ class TestTableWriter : public ::testing::Test {
ASSERT_OK(TableReader::Open(buffer, &reader_));
}
- void CheckBatch(const RecordBatch& batch) {
- for (int i = 0; i < batch.num_columns(); ++i) {
- ASSERT_OK(writer_->Append(batch.column_name(i), *batch.column(i)));
- }
+ void CheckBatch(std::shared_ptr<RecordBatch> batch) {
+ std::shared_ptr<Table> table;
+ std::vector<std::shared_ptr<RecordBatch>> batches = {batch};
+ ASSERT_OK(Table::FromRecordBatches(batches, &table));
+ ASSERT_OK(writer_->Write(*table));
Finish();
- std::shared_ptr<Table> table;
- ASSERT_OK(reader_->Read(&table));
- TableBatchReader table_batch_reader(*table);
- std::shared_ptr<RecordBatch> result;
- ASSERT_OK(table_batch_reader.ReadNext(&result));
- CheckBatches(batch, *result);
+ std::shared_ptr<Table> read_table;
+ ASSERT_OK(reader_->Read(&read_table));
+ AssertTablesEqual(*table, *read_table);
}
protected:
@@ -457,7 +437,7 @@ TEST_F(TestTableWriter, PrimitiveRoundTrip) {
TEST_F(TestTableWriter, CategoryRoundtrip) {
std::shared_ptr<RecordBatch> batch;
ASSERT_OK(MakeDictionaryFlat(&batch));
- CheckBatch(*batch);
+ CheckBatch(batch);
}
TEST_F(TestTableWriter, TimeTypes) {
@@ -503,13 +483,13 @@ TEST_F(TestTableWriter, TimeTypes) {
}
auto batch = RecordBatch::Make(schema, 7, std::move(arrays));
- CheckBatch(*batch);
+ CheckBatch(batch);
}
TEST_F(TestTableWriter, VLenPrimitiveRoundTrip) {
std::shared_ptr<RecordBatch> batch;
ASSERT_OK(MakeStringTypesRecordBatch(&batch));
- CheckBatch(*batch);
+ CheckBatch(batch);
}
TEST_F(TestTableWriter, PrimitiveNullRoundTrip) {
diff --git a/cpp/src/arrow/ipc/feather.cc b/cpp/src/arrow/ipc/feather.cc
index 749b3f9..ebdb335 100644
--- a/cpp/src/arrow/ipc/feather.cc
+++ b/cpp/src/arrow/ipc/feather.cc
@@ -810,6 +810,19 @@ class TableWriter::TableWriterImpl : public ArrayVisitor {
return current_column_->Finish();
}
+ Status Write(const Table& table) {
+ for (int i = 0; i < table.num_columns(); ++i) {
+ auto column = table.column(i);
+ current_column_ = metadata_.AddColumn(column->name());
+ auto chunked_array = column->data();
+ for (const auto chunk : chunked_array->chunks()) {
+ RETURN_NOT_OK(chunk->Accept(this));
+ }
+ RETURN_NOT_OK(current_column_->Finish());
+ }
+ return Status::OK();
+ }
+
private:
Status CheckStarted() {
if (!initialized_stream_) {
@@ -850,6 +863,8 @@ Status TableWriter::Append(const std::string& name, const Array& values) {
return impl_->Append(name, values);
}
+Status TableWriter::Write(const Table& table) { return impl_->Write(table); }
+
Status TableWriter::Finalize() { return impl_->Finalize(); }
} // namespace feather
diff --git a/cpp/src/arrow/ipc/feather.h b/cpp/src/arrow/ipc/feather.h
index 579392a..b6bd4ff 100644
--- a/cpp/src/arrow/ipc/feather.h
+++ b/cpp/src/arrow/ipc/feather.h
@@ -150,6 +150,12 @@ class ARROW_EXPORT TableWriter {
/// \return Status
Status Append(const std::string& name, const Array& values);
+ /// \brief Write a table to the file
+ ///
+ /// \param[in] table the table to be written
+ /// \return Status
+ Status Write(const Table& table);
+
/// \brief Finalize the file by writing the file metadata and footer
/// \return Status
Status Finalize();