You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2019/06/26 01:18:54 UTC
[arrow] branch master updated: ARROW-5702: [C++]
parquet::arrow::FileReader::GetSchema()
This is an automated email from the ASF dual-hosted git repository.
wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 58c8902 ARROW-5702: [C++] parquet::arrow::FileReader::GetSchema()
58c8902 is described below
commit 58c89028e5fb31507d973e80a155e903309a8ebe
Author: Wes McKinney <we...@apache.org>
AuthorDate: Tue Jun 25 20:18:46 2019 -0500
ARROW-5702: [C++] parquet::arrow::FileReader::GetSchema()
This adds this method to `parquet::arrow::FileReader` :
```cpp
/// \brief Return arrow schema for all the columns.
::arrow::Status GetSchema(std::shared_ptr<::arrow::Schema>* out);
```
might be useful for e.g. #4627
Author: Wes McKinney <we...@apache.org>
Author: Romain Francois <ro...@rstudio.com>
Closes #4668 from romainfrancois/ARROW-5702/FileReader_GetSchema and squashes the following commits:
66dac6fe5 <Wes McKinney> Fix compilation, use GetSchema in a unit test
c0f0655f7 <Romain Francois> + parquet::arrow::FileReader::GetSchema() method
---
cpp/src/parquet/arrow/arrow-reader-writer-test.cc | 5 +++++
cpp/src/parquet/arrow/reader.cc | 14 +++++++++++---
cpp/src/parquet/arrow/reader.h | 3 +++
3 files changed, 19 insertions(+), 3 deletions(-)
diff --git a/cpp/src/parquet/arrow/arrow-reader-writer-test.cc b/cpp/src/parquet/arrow/arrow-reader-writer-test.cc
index 5781ad5..2c5c5df 100644
--- a/cpp/src/parquet/arrow/arrow-reader-writer-test.cc
+++ b/cpp/src/parquet/arrow/arrow-reader-writer-test.cc
@@ -2686,6 +2686,11 @@ TEST_P(TestArrowReaderAdHocSparkAndHvr, ReadDecimals) {
std::shared_ptr<::arrow::Table> table;
ASSERT_OK_NO_THROW(arrow_reader->ReadTable(&table));
+ std::shared_ptr<::arrow::Schema> schema;
+ ASSERT_OK_NO_THROW(arrow_reader->GetSchema(&schema));
+ ASSERT_EQ(1, schema->num_fields());
+ ASSERT_TRUE(schema->field(0)->type()->Equals(*decimal_type));
+
ASSERT_EQ(1, table->num_columns());
constexpr int32_t expected_length = 24;
diff --git a/cpp/src/parquet/arrow/reader.cc b/cpp/src/parquet/arrow/reader.cc
index 484719e..3fe37b0 100644
--- a/cpp/src/parquet/arrow/reader.cc
+++ b/cpp/src/parquet/arrow/reader.cc
@@ -485,9 +485,13 @@ Status FileReader::Impl::ReadColumn(int i, std::shared_ptr<ChunkedArray>* out) {
Status FileReader::Impl::GetSchema(const std::vector<int>& indices,
std::shared_ptr<::arrow::Schema>* out) {
- auto descr = reader_->metadata()->schema();
- auto parquet_key_value_metadata = reader_->metadata()->key_value_metadata();
- return FromParquetSchema(descr, indices, parquet_key_value_metadata, out);
+ return FromParquetSchema(reader_->metadata()->schema(), indices,
+ reader_->metadata()->key_value_metadata(), out);
+}
+
+Status FileReader::Impl::GetSchema(std::shared_ptr<::arrow::Schema>* out) {
+ return FromParquetSchema(reader_->metadata()->schema(),
+ reader_->metadata()->key_value_metadata(), out);
}
Status FileReader::Impl::ReadColumnChunk(int column_index, int row_group_index,
@@ -739,6 +743,10 @@ Status FileReader::GetColumn(int i, std::unique_ptr<ColumnReader>* out) {
return impl_->GetColumn(i, iterator_factory, out);
}
+Status FileReader::GetSchema(std::shared_ptr<::arrow::Schema>* out) {
+ return impl_->GetSchema(out);
+}
+
Status FileReader::GetSchema(const std::vector<int>& indices,
std::shared_ptr<::arrow::Schema>* out) {
return impl_->GetSchema(indices, out);
diff --git a/cpp/src/parquet/arrow/reader.h b/cpp/src/parquet/arrow/reader.h
index 48c9237..97e93b9 100644
--- a/cpp/src/parquet/arrow/reader.h
+++ b/cpp/src/parquet/arrow/reader.h
@@ -168,6 +168,9 @@ class PARQUET_EXPORT FileReader {
// Returns error status if the column of interest is not flat.
::arrow::Status GetColumn(int i, std::unique_ptr<ColumnReader>* out);
+ /// \brief Return arrow schema for all the columns.
+ ::arrow::Status GetSchema(std::shared_ptr<::arrow::Schema>* out);
+
/// \brief Return arrow schema by apply selection of column indices.
/// \returns error status if passed wrong indices.
::arrow::Status GetSchema(const std::vector<int>& indices,