You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@parquet.apache.org by we...@apache.org on 2016/09/06 20:26:46 UTC
parquet-cpp git commit: PARQUET-710: Remove unneeded private member
variables from RowGroupReader ABI
Repository: parquet-cpp
Updated Branches:
refs/heads/master 708507f3e -> 441d85b43
PARQUET-710: Remove unneeded private member variables from RowGroupReader ABI
The injection capability is retained.
The private members of the `RowGroupReader` class are removed.
Author: Deepak Majeti <de...@hpe.com>
Closes #154 from majetideepak/PARQUET-710 and squashes the following commits:
11bfaa6 [Deepak Majeti] Modify comments that mention PIMPL
6dfd40d [Deepak Majeti] clang-format
4d58dfa [Deepak Majeti] Removed private members in Rowgroup
Project: http://git-wip-us.apache.org/repos/asf/parquet-cpp/repo
Commit: http://git-wip-us.apache.org/repos/asf/parquet-cpp/commit/441d85b4
Tree: http://git-wip-us.apache.org/repos/asf/parquet-cpp/tree/441d85b4
Diff: http://git-wip-us.apache.org/repos/asf/parquet-cpp/diff/441d85b4
Branch: refs/heads/master
Commit: 441d85b433ecc5f103ba3da68be34256599adf11
Parents: 708507f
Author: Deepak Majeti <de...@hpe.com>
Authored: Tue Sep 6 16:26:38 2016 -0400
Committer: Wes McKinney <we...@twosigma.com>
Committed: Tue Sep 6 16:26:38 2016 -0400
----------------------------------------------------------------------
src/parquet/file/metadata.cc | 24 +++++++++++++++++-------
src/parquet/file/metadata.h | 7 +++++--
src/parquet/file/reader-internal.cc | 7 +++++--
src/parquet/file/reader-internal.h | 2 ++
src/parquet/file/reader.cc | 15 ++++++++-------
src/parquet/file/reader.h | 22 ++++++++++------------
src/parquet/file/writer.h | 12 ++++++++----
7 files changed, 55 insertions(+), 34 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/441d85b4/src/parquet/file/metadata.cc
----------------------------------------------------------------------
diff --git a/src/parquet/file/metadata.cc b/src/parquet/file/metadata.cc
index c1fd767..4e298a8 100644
--- a/src/parquet/file/metadata.cc
+++ b/src/parquet/file/metadata.cc
@@ -166,8 +166,9 @@ int64_t ColumnChunkMetaData::total_compressed_size() const {
// row-group metadata
class RowGroupMetaData::RowGroupMetaDataImpl {
public:
- explicit RowGroupMetaDataImpl(const format::RowGroup* row_group)
- : row_group_(row_group) {}
+ explicit RowGroupMetaDataImpl(
+ const format::RowGroup* row_group, const SchemaDescriptor* schema)
+ : row_group_(row_group), schema_(schema) {}
~RowGroupMetaDataImpl() {}
inline int num_columns() const { return row_group_->columns.size(); }
@@ -176,6 +177,8 @@ class RowGroupMetaData::RowGroupMetaDataImpl {
inline int64_t total_byte_size() const { return row_group_->total_byte_size; }
+ inline const SchemaDescriptor* schema() const { return schema_; }
+
std::unique_ptr<ColumnChunkMetaData> ColumnChunk(int i) {
DCHECK(i < num_columns()) << "The file only has " << num_columns()
<< " columns, requested metadata for column: " << i;
@@ -185,15 +188,18 @@ class RowGroupMetaData::RowGroupMetaDataImpl {
private:
const format::RowGroup* row_group_;
+ const SchemaDescriptor* schema_;
};
-std::unique_ptr<RowGroupMetaData> RowGroupMetaData::Make(const uint8_t* metadata) {
- return std::unique_ptr<RowGroupMetaData>(new RowGroupMetaData(metadata));
+std::unique_ptr<RowGroupMetaData> RowGroupMetaData::Make(
+ const uint8_t* metadata, const SchemaDescriptor* schema) {
+ return std::unique_ptr<RowGroupMetaData>(new RowGroupMetaData(metadata, schema));
}
-RowGroupMetaData::RowGroupMetaData(const uint8_t* metadata)
+RowGroupMetaData::RowGroupMetaData(
+ const uint8_t* metadata, const SchemaDescriptor* schema)
: impl_{std::unique_ptr<RowGroupMetaDataImpl>(new RowGroupMetaDataImpl(
- reinterpret_cast<const format::RowGroup*>(metadata)))} {}
+ reinterpret_cast<const format::RowGroup*>(metadata), schema))} {}
RowGroupMetaData::~RowGroupMetaData() {}
int RowGroupMetaData::num_columns() const {
@@ -208,6 +214,10 @@ int64_t RowGroupMetaData::total_byte_size() const {
return impl_->total_byte_size();
}
+const SchemaDescriptor* RowGroupMetaData::schema() const {
+ return impl_->schema();
+}
+
std::unique_ptr<ColumnChunkMetaData> RowGroupMetaData::ColumnChunk(int i) const {
return impl_->ColumnChunk(i);
}
@@ -238,7 +248,7 @@ class FileMetaData::FileMetaDataImpl {
<< "The file only has " << num_row_groups()
<< " row groups, requested metadata for row group: " << i;
return RowGroupMetaData::Make(
- reinterpret_cast<const uint8_t*>(&metadata_->row_groups[i]));
+ reinterpret_cast<const uint8_t*>(&metadata_->row_groups[i]), &schema_);
}
const SchemaDescriptor* schema_descriptor() const { return &schema_; }
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/441d85b4/src/parquet/file/metadata.h
----------------------------------------------------------------------
diff --git a/src/parquet/file/metadata.h b/src/parquet/file/metadata.h
index c35f82f..78ea53b 100644
--- a/src/parquet/file/metadata.h
+++ b/src/parquet/file/metadata.h
@@ -75,7 +75,8 @@ class PARQUET_EXPORT ColumnChunkMetaData {
class PARQUET_EXPORT RowGroupMetaData {
public:
// API convenience to get a MetaData accessor
- static std::unique_ptr<RowGroupMetaData> Make(const uint8_t* metadata);
+ static std::unique_ptr<RowGroupMetaData> Make(
+ const uint8_t* metadata, const SchemaDescriptor* schema);
~RowGroupMetaData();
@@ -83,10 +84,12 @@ class PARQUET_EXPORT RowGroupMetaData {
int num_columns() const;
int64_t num_rows() const;
int64_t total_byte_size() const;
+ // Return const-pointer to make it clear that this object is not to be copied
+ const SchemaDescriptor* schema() const;
std::unique_ptr<ColumnChunkMetaData> ColumnChunk(int i) const;
private:
- explicit RowGroupMetaData(const uint8_t* metadata);
+ explicit RowGroupMetaData(const uint8_t* metadata, const SchemaDescriptor* schema);
// PIMPL Idiom
class RowGroupMetaDataImpl;
std::unique_ptr<RowGroupMetaDataImpl> impl_;
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/441d85b4/src/parquet/file/reader-internal.cc
----------------------------------------------------------------------
diff --git a/src/parquet/file/reader-internal.cc b/src/parquet/file/reader-internal.cc
index 5c1bc37..1cb91f0 100644
--- a/src/parquet/file/reader-internal.cc
+++ b/src/parquet/file/reader-internal.cc
@@ -145,6 +145,10 @@ const RowGroupMetaData* SerializedRowGroup::metadata() const {
return row_group_metadata_.get();
}
+const ReaderProperties* SerializedRowGroup::properties() const {
+ return &properties_;
+}
+
std::unique_ptr<PageReader> SerializedRowGroup::GetColumnPageReader(int i) {
// Read column chunk from the file
auto col = row_group_metadata_->ColumnChunk(i);
@@ -195,8 +199,7 @@ std::shared_ptr<RowGroupReader> SerializedFile::GetRowGroup(int i) {
std::unique_ptr<SerializedRowGroup> contents(new SerializedRowGroup(
source_.get(), std::move(file_metadata_->RowGroup(i)), properties_));
- return std::make_shared<RowGroupReader>(
- file_metadata_->schema_descriptor(), std::move(contents), properties_.allocator());
+ return std::make_shared<RowGroupReader>(std::move(contents));
}
const FileMetaData* SerializedFile::metadata() const {
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/441d85b4/src/parquet/file/reader-internal.h
----------------------------------------------------------------------
diff --git a/src/parquet/file/reader-internal.h b/src/parquet/file/reader-internal.h
index 48e2daf..faedda0 100644
--- a/src/parquet/file/reader-internal.h
+++ b/src/parquet/file/reader-internal.h
@@ -76,6 +76,8 @@ class SerializedRowGroup : public RowGroupReader::Contents {
virtual const RowGroupMetaData* metadata() const;
+ virtual const ReaderProperties* properties() const;
+
virtual std::unique_ptr<PageReader> GetColumnPageReader(int i);
private:
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/441d85b4/src/parquet/file/reader.cc
----------------------------------------------------------------------
diff --git a/src/parquet/file/reader.cc b/src/parquet/file/reader.cc
index 62481f7..b593ea0 100644
--- a/src/parquet/file/reader.cc
+++ b/src/parquet/file/reader.cc
@@ -41,17 +41,18 @@ namespace parquet {
// ----------------------------------------------------------------------
// RowGroupReader public API
-RowGroupReader::RowGroupReader(const SchemaDescriptor* schema,
- std::unique_ptr<Contents> contents, MemoryAllocator* allocator)
- : schema_(schema), contents_(std::move(contents)), allocator_(allocator) {}
+RowGroupReader::RowGroupReader(std::unique_ptr<Contents> contents)
+ : contents_(std::move(contents)) {}
std::shared_ptr<ColumnReader> RowGroupReader::Column(int i) {
- DCHECK(i < schema_->num_columns()) << "The RowGroup only has " << schema_->num_columns()
- << "columns, requested column: " << i;
- const ColumnDescriptor* descr = schema_->Column(i);
+ DCHECK(i < metadata()->num_columns()) << "The RowGroup only has "
+ << metadata()->num_columns()
+ << "columns, requested column: " << i;
+ const ColumnDescriptor* descr = metadata()->schema()->Column(i);
std::unique_ptr<PageReader> page_reader = contents_->GetColumnPageReader(i);
- return ColumnReader::Make(descr, std::move(page_reader), allocator_);
+ return ColumnReader::Make(descr, std::move(page_reader),
+ const_cast<ReaderProperties*>(contents_->properties())->allocator());
}
// Returns the rowgroup metadata
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/441d85b4/src/parquet/file/reader.h
----------------------------------------------------------------------
diff --git a/src/parquet/file/reader.h b/src/parquet/file/reader.h
index baa3e30..c1ee9d4 100644
--- a/src/parquet/file/reader.h
+++ b/src/parquet/file/reader.h
@@ -38,15 +38,17 @@ class RandomAccessSource;
class PARQUET_EXPORT RowGroupReader {
public:
- // Forward declare the PIMPL
+ // Forward declare a virtual class 'Contents' to aid dependency injection and more
+ // easily create test fixtures
+ // An implementation of the Contents class is defined in the .cc file
struct Contents {
virtual ~Contents() {}
virtual std::unique_ptr<PageReader> GetColumnPageReader(int i) = 0;
virtual const RowGroupMetaData* metadata() const = 0;
+ virtual const ReaderProperties* properties() const = 0;
};
- RowGroupReader(const SchemaDescriptor* schema, std::unique_ptr<Contents> contents,
- MemoryAllocator* allocator);
+ explicit RowGroupReader(std::unique_ptr<Contents> contents);
// Returns the rowgroup metadata
const RowGroupMetaData* metadata() const;
@@ -56,17 +58,15 @@ class PARQUET_EXPORT RowGroupReader {
std::shared_ptr<ColumnReader> Column(int i);
private:
- const SchemaDescriptor* schema_;
- // PIMPL idiom
- // This is declared in the .cc file so that we can hide compiled Thrift
- // headers from the public API and also more easily create test fixtures.
+ // Holds a pointer to an instance of Contents implementation
std::unique_ptr<Contents> contents_;
- MemoryAllocator* allocator_;
};
class PARQUET_EXPORT ParquetFileReader {
public:
- // Forward declare the PIMPL
+ // Forward declare a virtual class 'Contents' to aid dependency injection and more
+ // easily create test fixtures
+ // An implementation of the Contents class is defined in the .cc file
struct Contents {
virtual ~Contents() {}
// Perform any cleanup associated with the file contents
@@ -99,9 +99,7 @@ class PARQUET_EXPORT ParquetFileReader {
std::ostream& stream, std::list<int> selected_columns, bool print_values = true);
private:
- // PIMPL idiom
- // This is declared in the .cc file so that we can hide compiled Thrift
- // headers from the public API and also more easily create test fixtures.
+ // Holds a pointer to an instance of Contents implementation
std::unique_ptr<Contents> contents_;
};
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/441d85b4/src/parquet/file/writer.h
----------------------------------------------------------------------
diff --git a/src/parquet/file/writer.h b/src/parquet/file/writer.h
index a5e6e99..0e64961 100644
--- a/src/parquet/file/writer.h
+++ b/src/parquet/file/writer.h
@@ -35,6 +35,9 @@ class OutputStream;
class PARQUET_EXPORT RowGroupWriter {
public:
+ // Forward declare a virtual class 'Contents' to aid dependency injection and more
+ // easily create test fixtures
+ // An implementation of the Contents class is defined in the .cc file
struct Contents {
virtual int num_columns() const = 0;
virtual int64_t num_rows() const = 0;
@@ -75,8 +78,7 @@ class PARQUET_EXPORT RowGroupWriter {
// Owned by the parent ParquetFileWriter
const SchemaDescriptor* schema_;
- // This is declared in the .cc file so that we can hide compiled Thrift
- // headers from the public API and also more easily create test fixtures.
+ // Holds a pointer to an instance of Contents implementation
std::unique_ptr<Contents> contents_;
MemoryAllocator* allocator_;
@@ -84,6 +86,9 @@ class PARQUET_EXPORT RowGroupWriter {
class PARQUET_EXPORT ParquetFileWriter {
public:
+ // Forward declare a virtual class 'Contents' to aid dependency injection and more
+ // easily create test fixtures
+ // An implementation of the Contents class is defined in the .cc file
struct Contents {
virtual ~Contents() {}
// Perform any cleanup associated with the file contents
@@ -155,8 +160,7 @@ class PARQUET_EXPORT ParquetFileWriter {
const ColumnDescriptor* column_schema(int i) const { return schema_->Column(i); }
private:
- // This is declared in the .cc file so that we can hide compiled Thrift
- // headers from the public API and also more easily create test fixtures.
+ // Holds a pointer to an instance of Contents implementation
std::unique_ptr<Contents> contents_;
// The SchemaDescriptor is provided by the Contents impl