You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@parquet.apache.org by we...@apache.org on 2016/09/06 20:26:46 UTC

parquet-cpp git commit: PARQUET-710: Remove unneeded private member variables from RowGroupReader ABI

Repository: parquet-cpp
Updated Branches:
  refs/heads/master 708507f3e -> 441d85b43


PARQUET-710: Remove unneeded private member variables from RowGroupReader ABI

The injection capability is retained.
 The private members of the `RowGroupReader` class are removed.

Author: Deepak Majeti <de...@hpe.com>

Closes #154 from majetideepak/PARQUET-710 and squashes the following commits:

11bfaa6 [Deepak Majeti] Modify comments that mention PIMPL
6dfd40d [Deepak Majeti] clang-format
4d58dfa [Deepak Majeti] Removed private members in Rowgroup


Project: http://git-wip-us.apache.org/repos/asf/parquet-cpp/repo
Commit: http://git-wip-us.apache.org/repos/asf/parquet-cpp/commit/441d85b4
Tree: http://git-wip-us.apache.org/repos/asf/parquet-cpp/tree/441d85b4
Diff: http://git-wip-us.apache.org/repos/asf/parquet-cpp/diff/441d85b4

Branch: refs/heads/master
Commit: 441d85b433ecc5f103ba3da68be34256599adf11
Parents: 708507f
Author: Deepak Majeti <de...@hpe.com>
Authored: Tue Sep 6 16:26:38 2016 -0400
Committer: Wes McKinney <we...@twosigma.com>
Committed: Tue Sep 6 16:26:38 2016 -0400

----------------------------------------------------------------------
 src/parquet/file/metadata.cc        | 24 +++++++++++++++++-------
 src/parquet/file/metadata.h         |  7 +++++--
 src/parquet/file/reader-internal.cc |  7 +++++--
 src/parquet/file/reader-internal.h  |  2 ++
 src/parquet/file/reader.cc          | 15 ++++++++-------
 src/parquet/file/reader.h           | 22 ++++++++++------------
 src/parquet/file/writer.h           | 12 ++++++++----
 7 files changed, 55 insertions(+), 34 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/441d85b4/src/parquet/file/metadata.cc
----------------------------------------------------------------------
diff --git a/src/parquet/file/metadata.cc b/src/parquet/file/metadata.cc
index c1fd767..4e298a8 100644
--- a/src/parquet/file/metadata.cc
+++ b/src/parquet/file/metadata.cc
@@ -166,8 +166,9 @@ int64_t ColumnChunkMetaData::total_compressed_size() const {
 // row-group metadata
 class RowGroupMetaData::RowGroupMetaDataImpl {
  public:
-  explicit RowGroupMetaDataImpl(const format::RowGroup* row_group)
-      : row_group_(row_group) {}
+  explicit RowGroupMetaDataImpl(
+      const format::RowGroup* row_group, const SchemaDescriptor* schema)
+      : row_group_(row_group), schema_(schema) {}
   ~RowGroupMetaDataImpl() {}
 
   inline int num_columns() const { return row_group_->columns.size(); }
@@ -176,6 +177,8 @@ class RowGroupMetaData::RowGroupMetaDataImpl {
 
   inline int64_t total_byte_size() const { return row_group_->total_byte_size; }
 
+  inline const SchemaDescriptor* schema() const { return schema_; }
+
   std::unique_ptr<ColumnChunkMetaData> ColumnChunk(int i) {
     DCHECK(i < num_columns()) << "The file only has " << num_columns()
                               << " columns, requested metadata for column: " << i;
@@ -185,15 +188,18 @@ class RowGroupMetaData::RowGroupMetaDataImpl {
 
  private:
   const format::RowGroup* row_group_;
+  const SchemaDescriptor* schema_;
 };
 
-std::unique_ptr<RowGroupMetaData> RowGroupMetaData::Make(const uint8_t* metadata) {
-  return std::unique_ptr<RowGroupMetaData>(new RowGroupMetaData(metadata));
+std::unique_ptr<RowGroupMetaData> RowGroupMetaData::Make(
+    const uint8_t* metadata, const SchemaDescriptor* schema) {
+  return std::unique_ptr<RowGroupMetaData>(new RowGroupMetaData(metadata, schema));
 }
 
-RowGroupMetaData::RowGroupMetaData(const uint8_t* metadata)
+RowGroupMetaData::RowGroupMetaData(
+    const uint8_t* metadata, const SchemaDescriptor* schema)
     : impl_{std::unique_ptr<RowGroupMetaDataImpl>(new RowGroupMetaDataImpl(
-          reinterpret_cast<const format::RowGroup*>(metadata)))} {}
+          reinterpret_cast<const format::RowGroup*>(metadata), schema))} {}
 RowGroupMetaData::~RowGroupMetaData() {}
 
 int RowGroupMetaData::num_columns() const {
@@ -208,6 +214,10 @@ int64_t RowGroupMetaData::total_byte_size() const {
   return impl_->total_byte_size();
 }
 
+const SchemaDescriptor* RowGroupMetaData::schema() const {
+  return impl_->schema();
+}
+
 std::unique_ptr<ColumnChunkMetaData> RowGroupMetaData::ColumnChunk(int i) const {
   return impl_->ColumnChunk(i);
 }
@@ -238,7 +248,7 @@ class FileMetaData::FileMetaDataImpl {
         << "The file only has " << num_row_groups()
         << " row groups, requested metadata for row group: " << i;
     return RowGroupMetaData::Make(
-        reinterpret_cast<const uint8_t*>(&metadata_->row_groups[i]));
+        reinterpret_cast<const uint8_t*>(&metadata_->row_groups[i]), &schema_);
   }
 
   const SchemaDescriptor* schema_descriptor() const { return &schema_; }

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/441d85b4/src/parquet/file/metadata.h
----------------------------------------------------------------------
diff --git a/src/parquet/file/metadata.h b/src/parquet/file/metadata.h
index c35f82f..78ea53b 100644
--- a/src/parquet/file/metadata.h
+++ b/src/parquet/file/metadata.h
@@ -75,7 +75,8 @@ class PARQUET_EXPORT ColumnChunkMetaData {
 class PARQUET_EXPORT RowGroupMetaData {
  public:
   // API convenience to get a MetaData accessor
-  static std::unique_ptr<RowGroupMetaData> Make(const uint8_t* metadata);
+  static std::unique_ptr<RowGroupMetaData> Make(
+      const uint8_t* metadata, const SchemaDescriptor* schema);
 
   ~RowGroupMetaData();
 
@@ -83,10 +84,12 @@ class PARQUET_EXPORT RowGroupMetaData {
   int num_columns() const;
   int64_t num_rows() const;
   int64_t total_byte_size() const;
+  // Return const-pointer to make it clear that this object is not to be copied
+  const SchemaDescriptor* schema() const;
   std::unique_ptr<ColumnChunkMetaData> ColumnChunk(int i) const;
 
  private:
-  explicit RowGroupMetaData(const uint8_t* metadata);
+  explicit RowGroupMetaData(const uint8_t* metadata, const SchemaDescriptor* schema);
   // PIMPL Idiom
   class RowGroupMetaDataImpl;
   std::unique_ptr<RowGroupMetaDataImpl> impl_;

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/441d85b4/src/parquet/file/reader-internal.cc
----------------------------------------------------------------------
diff --git a/src/parquet/file/reader-internal.cc b/src/parquet/file/reader-internal.cc
index 5c1bc37..1cb91f0 100644
--- a/src/parquet/file/reader-internal.cc
+++ b/src/parquet/file/reader-internal.cc
@@ -145,6 +145,10 @@ const RowGroupMetaData* SerializedRowGroup::metadata() const {
   return row_group_metadata_.get();
 }
 
+const ReaderProperties* SerializedRowGroup::properties() const {
+  return &properties_;
+}
+
 std::unique_ptr<PageReader> SerializedRowGroup::GetColumnPageReader(int i) {
   // Read column chunk from the file
   auto col = row_group_metadata_->ColumnChunk(i);
@@ -195,8 +199,7 @@ std::shared_ptr<RowGroupReader> SerializedFile::GetRowGroup(int i) {
   std::unique_ptr<SerializedRowGroup> contents(new SerializedRowGroup(
       source_.get(), std::move(file_metadata_->RowGroup(i)), properties_));
 
-  return std::make_shared<RowGroupReader>(
-      file_metadata_->schema_descriptor(), std::move(contents), properties_.allocator());
+  return std::make_shared<RowGroupReader>(std::move(contents));
 }
 
 const FileMetaData* SerializedFile::metadata() const {

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/441d85b4/src/parquet/file/reader-internal.h
----------------------------------------------------------------------
diff --git a/src/parquet/file/reader-internal.h b/src/parquet/file/reader-internal.h
index 48e2daf..faedda0 100644
--- a/src/parquet/file/reader-internal.h
+++ b/src/parquet/file/reader-internal.h
@@ -76,6 +76,8 @@ class SerializedRowGroup : public RowGroupReader::Contents {
 
   virtual const RowGroupMetaData* metadata() const;
 
+  virtual const ReaderProperties* properties() const;
+
   virtual std::unique_ptr<PageReader> GetColumnPageReader(int i);
 
  private:

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/441d85b4/src/parquet/file/reader.cc
----------------------------------------------------------------------
diff --git a/src/parquet/file/reader.cc b/src/parquet/file/reader.cc
index 62481f7..b593ea0 100644
--- a/src/parquet/file/reader.cc
+++ b/src/parquet/file/reader.cc
@@ -41,17 +41,18 @@ namespace parquet {
 // ----------------------------------------------------------------------
 // RowGroupReader public API
 
-RowGroupReader::RowGroupReader(const SchemaDescriptor* schema,
-    std::unique_ptr<Contents> contents, MemoryAllocator* allocator)
-    : schema_(schema), contents_(std::move(contents)), allocator_(allocator) {}
+RowGroupReader::RowGroupReader(std::unique_ptr<Contents> contents)
+    : contents_(std::move(contents)) {}
 
 std::shared_ptr<ColumnReader> RowGroupReader::Column(int i) {
-  DCHECK(i < schema_->num_columns()) << "The RowGroup only has " << schema_->num_columns()
-                                     << "columns, requested column: " << i;
-  const ColumnDescriptor* descr = schema_->Column(i);
+  DCHECK(i < metadata()->num_columns()) << "The RowGroup only has "
+                                        << metadata()->num_columns()
+                                        << "columns, requested column: " << i;
+  const ColumnDescriptor* descr = metadata()->schema()->Column(i);
 
   std::unique_ptr<PageReader> page_reader = contents_->GetColumnPageReader(i);
-  return ColumnReader::Make(descr, std::move(page_reader), allocator_);
+  return ColumnReader::Make(descr, std::move(page_reader),
+      const_cast<ReaderProperties*>(contents_->properties())->allocator());
 }
 
 // Returns the rowgroup metadata

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/441d85b4/src/parquet/file/reader.h
----------------------------------------------------------------------
diff --git a/src/parquet/file/reader.h b/src/parquet/file/reader.h
index baa3e30..c1ee9d4 100644
--- a/src/parquet/file/reader.h
+++ b/src/parquet/file/reader.h
@@ -38,15 +38,17 @@ class RandomAccessSource;
 
 class PARQUET_EXPORT RowGroupReader {
  public:
-  // Forward declare the PIMPL
+  // Forward declare a virtual class 'Contents' to aid dependency injection and more
+  // easily create test fixtures
+  // An implementation of the Contents class is defined in the .cc file
   struct Contents {
     virtual ~Contents() {}
     virtual std::unique_ptr<PageReader> GetColumnPageReader(int i) = 0;
     virtual const RowGroupMetaData* metadata() const = 0;
+    virtual const ReaderProperties* properties() const = 0;
   };
 
-  RowGroupReader(const SchemaDescriptor* schema, std::unique_ptr<Contents> contents,
-      MemoryAllocator* allocator);
+  explicit RowGroupReader(std::unique_ptr<Contents> contents);
 
   // Returns the rowgroup metadata
   const RowGroupMetaData* metadata() const;
@@ -56,17 +58,15 @@ class PARQUET_EXPORT RowGroupReader {
   std::shared_ptr<ColumnReader> Column(int i);
 
  private:
-  const SchemaDescriptor* schema_;
-  // PIMPL idiom
-  // This is declared in the .cc file so that we can hide compiled Thrift
-  // headers from the public API and also more easily create test fixtures.
+  // Holds a pointer to an instance of Contents implementation
   std::unique_ptr<Contents> contents_;
-  MemoryAllocator* allocator_;
 };
 
 class PARQUET_EXPORT ParquetFileReader {
  public:
-  // Forward declare the PIMPL
+  // Forward declare a virtual class 'Contents' to aid dependency injection and more
+  // easily create test fixtures
+  // An implementation of the Contents class is defined in the .cc file
   struct Contents {
     virtual ~Contents() {}
     // Perform any cleanup associated with the file contents
@@ -99,9 +99,7 @@ class PARQUET_EXPORT ParquetFileReader {
       std::ostream& stream, std::list<int> selected_columns, bool print_values = true);
 
  private:
-  // PIMPL idiom
-  // This is declared in the .cc file so that we can hide compiled Thrift
-  // headers from the public API and also more easily create test fixtures.
+  // Holds a pointer to an instance of Contents implementation
   std::unique_ptr<Contents> contents_;
 };
 

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/441d85b4/src/parquet/file/writer.h
----------------------------------------------------------------------
diff --git a/src/parquet/file/writer.h b/src/parquet/file/writer.h
index a5e6e99..0e64961 100644
--- a/src/parquet/file/writer.h
+++ b/src/parquet/file/writer.h
@@ -35,6 +35,9 @@ class OutputStream;
 
 class PARQUET_EXPORT RowGroupWriter {
  public:
+  // Forward declare a virtual class 'Contents' to aid dependency injection and more
+  // easily create test fixtures
+  // An implementation of the Contents class is defined in the .cc file
   struct Contents {
     virtual int num_columns() const = 0;
     virtual int64_t num_rows() const = 0;
@@ -75,8 +78,7 @@ class PARQUET_EXPORT RowGroupWriter {
   // Owned by the parent ParquetFileWriter
   const SchemaDescriptor* schema_;
 
-  // This is declared in the .cc file so that we can hide compiled Thrift
-  // headers from the public API and also more easily create test fixtures.
+  // Holds a pointer to an instance of Contents implementation
   std::unique_ptr<Contents> contents_;
 
   MemoryAllocator* allocator_;
@@ -84,6 +86,9 @@ class PARQUET_EXPORT RowGroupWriter {
 
 class PARQUET_EXPORT ParquetFileWriter {
  public:
+  // Forward declare a virtual class 'Contents' to aid dependency injection and more
+  // easily create test fixtures
+  // An implementation of the Contents class is defined in the .cc file
   struct Contents {
     virtual ~Contents() {}
     // Perform any cleanup associated with the file contents
@@ -155,8 +160,7 @@ class PARQUET_EXPORT ParquetFileWriter {
   const ColumnDescriptor* column_schema(int i) const { return schema_->Column(i); }
 
  private:
-  // This is declared in the .cc file so that we can hide compiled Thrift
-  // headers from the public API and also more easily create test fixtures.
+  // Holds a pointer to an instance of Contents implementation
   std::unique_ptr<Contents> contents_;
 
   // The SchemaDescriptor is provided by the Contents impl