You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2019/06/26 01:18:54 UTC

[arrow] branch master updated: ARROW-5702: [C++] parquet::arrow::FileReader::GetSchema()

This is an automated email from the ASF dual-hosted git repository.

wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 58c8902  ARROW-5702: [C++] parquet::arrow::FileReader::GetSchema()
58c8902 is described below

commit 58c89028e5fb31507d973e80a155e903309a8ebe
Author: Wes McKinney <we...@apache.org>
AuthorDate: Tue Jun 25 20:18:46 2019 -0500

    ARROW-5702: [C++] parquet::arrow::FileReader::GetSchema()
    
    This adds this method to `parquet::arrow::FileReader` :
    
    ```cpp
    /// \brief Return arrow schema for all the columns.
      ::arrow::Status GetSchema(std::shared_ptr<::arrow::Schema>* out);
    ```
    
    might be useful for e.g. #4627
    
    Author: Wes McKinney <we...@apache.org>
    Author: Romain Francois <ro...@rstudio.com>
    
    Closes #4668 from romainfrancois/ARROW-5702/FileReader_GetSchema and squashes the following commits:
    
    66dac6fe5 <Wes McKinney> Fix compilation, use GetSchema in a unit test
    c0f0655f7 <Romain Francois> + parquet::arrow::FileReader::GetSchema() method
---
 cpp/src/parquet/arrow/arrow-reader-writer-test.cc |  5 +++++
 cpp/src/parquet/arrow/reader.cc                   | 14 +++++++++++---
 cpp/src/parquet/arrow/reader.h                    |  3 +++
 3 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/cpp/src/parquet/arrow/arrow-reader-writer-test.cc b/cpp/src/parquet/arrow/arrow-reader-writer-test.cc
index 5781ad5..2c5c5df 100644
--- a/cpp/src/parquet/arrow/arrow-reader-writer-test.cc
+++ b/cpp/src/parquet/arrow/arrow-reader-writer-test.cc
@@ -2686,6 +2686,11 @@ TEST_P(TestArrowReaderAdHocSparkAndHvr, ReadDecimals) {
   std::shared_ptr<::arrow::Table> table;
   ASSERT_OK_NO_THROW(arrow_reader->ReadTable(&table));
 
+  std::shared_ptr<::arrow::Schema> schema;
+  ASSERT_OK_NO_THROW(arrow_reader->GetSchema(&schema));
+  ASSERT_EQ(1, schema->num_fields());
+  ASSERT_TRUE(schema->field(0)->type()->Equals(*decimal_type));
+
   ASSERT_EQ(1, table->num_columns());
 
   constexpr int32_t expected_length = 24;
diff --git a/cpp/src/parquet/arrow/reader.cc b/cpp/src/parquet/arrow/reader.cc
index 484719e..3fe37b0 100644
--- a/cpp/src/parquet/arrow/reader.cc
+++ b/cpp/src/parquet/arrow/reader.cc
@@ -485,9 +485,13 @@ Status FileReader::Impl::ReadColumn(int i, std::shared_ptr<ChunkedArray>* out) {
 
 Status FileReader::Impl::GetSchema(const std::vector<int>& indices,
                                    std::shared_ptr<::arrow::Schema>* out) {
-  auto descr = reader_->metadata()->schema();
-  auto parquet_key_value_metadata = reader_->metadata()->key_value_metadata();
-  return FromParquetSchema(descr, indices, parquet_key_value_metadata, out);
+  return FromParquetSchema(reader_->metadata()->schema(), indices,
+                           reader_->metadata()->key_value_metadata(), out);
+}
+
+Status FileReader::Impl::GetSchema(std::shared_ptr<::arrow::Schema>* out) {
+  return FromParquetSchema(reader_->metadata()->schema(),
+                           reader_->metadata()->key_value_metadata(), out);
 }
 
 Status FileReader::Impl::ReadColumnChunk(int column_index, int row_group_index,
@@ -739,6 +743,10 @@ Status FileReader::GetColumn(int i, std::unique_ptr<ColumnReader>* out) {
   return impl_->GetColumn(i, iterator_factory, out);
 }
 
+Status FileReader::GetSchema(std::shared_ptr<::arrow::Schema>* out) {
+  return impl_->GetSchema(out);
+}
+
 Status FileReader::GetSchema(const std::vector<int>& indices,
                              std::shared_ptr<::arrow::Schema>* out) {
   return impl_->GetSchema(indices, out);
diff --git a/cpp/src/parquet/arrow/reader.h b/cpp/src/parquet/arrow/reader.h
index 48c9237..97e93b9 100644
--- a/cpp/src/parquet/arrow/reader.h
+++ b/cpp/src/parquet/arrow/reader.h
@@ -168,6 +168,9 @@ class PARQUET_EXPORT FileReader {
   // Returns error status if the column of interest is not flat.
   ::arrow::Status GetColumn(int i, std::unique_ptr<ColumnReader>* out);
 
+  /// \brief Return arrow schema for all the columns.
+  ::arrow::Status GetSchema(std::shared_ptr<::arrow::Schema>* out);
+
   /// \brief Return arrow schema by apply selection of column indices.
   /// \returns error status if passed wrong indices.
   ::arrow::Status GetSchema(const std::vector<int>& indices,