You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@parquet.apache.org by we...@apache.org on 2017/07/31 15:14:59 UTC
[2/5] parquet-cpp git commit: PARQUET-1068: Modify .clang-format to
use straight Google format with 90-character line width
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/encoding-internal.h
----------------------------------------------------------------------
diff --git a/src/parquet/encoding-internal.h b/src/parquet/encoding-internal.h
index 88d781f..69bac32 100644
--- a/src/parquet/encoding-internal.h
+++ b/src/parquet/encoding-internal.h
@@ -78,10 +78,12 @@ class PlainDecoder : public Decoder<DType> {
// Decode routine templated on C++ type rather than type enum
template <typename T>
-inline int DecodePlain(
- const uint8_t* data, int64_t data_size, int num_values, int type_length, T* out) {
+inline int DecodePlain(const uint8_t* data, int64_t data_size, int num_values,
+ int type_length, T* out) {
int bytes_to_decode = num_values * sizeof(T);
- if (data_size < bytes_to_decode) { ParquetException::EofException(); }
+ if (data_size < bytes_to_decode) {
+ ParquetException::EofException();
+ }
memcpy(out, data, bytes_to_decode);
return bytes_to_decode;
}
@@ -90,7 +92,7 @@ inline int DecodePlain(
// own data.
template <>
inline int DecodePlain<ByteArray>(const uint8_t* data, int64_t data_size, int num_values,
- int type_length, ByteArray* out) {
+ int type_length, ByteArray* out) {
int bytes_decoded = 0;
int increment;
for (int i = 0; i < num_values; ++i) {
@@ -109,9 +111,12 @@ inline int DecodePlain<ByteArray>(const uint8_t* data, int64_t data_size, int nu
// own their own data.
template <>
inline int DecodePlain<FixedLenByteArray>(const uint8_t* data, int64_t data_size,
- int num_values, int type_length, FixedLenByteArray* out) {
+ int num_values, int type_length,
+ FixedLenByteArray* out) {
int bytes_to_decode = type_length * num_values;
- if (data_size < bytes_to_decode) { ParquetException::EofException(); }
+ if (data_size < bytes_to_decode) {
+ ParquetException::EofException();
+ }
for (int i = 0; i < num_values; ++i) {
out[i].ptr = data;
data += type_length;
@@ -146,7 +151,9 @@ class PlainDecoder<BooleanType> : public Decoder<BooleanType> {
max_values = std::min(max_values, num_values_);
bool val;
for (int i = 0; i < max_values; ++i) {
- if (!bit_reader_.GetValue(1, &val)) { ParquetException::EofException(); }
+ if (!bit_reader_.GetValue(1, &val)) {
+ ParquetException::EofException();
+ }
BitUtil::SetArrayBit(buffer, i, val);
}
num_values_ -= max_values;
@@ -175,7 +182,7 @@ class PlainEncoder : public Encoder<DType> {
typedef typename DType::c_type T;
explicit PlainEncoder(const ColumnDescriptor* descr,
- ::arrow::MemoryPool* pool = ::arrow::default_memory_pool())
+ ::arrow::MemoryPool* pool = ::arrow::default_memory_pool())
: Encoder<DType>(descr, Encoding::PLAIN, pool) {
values_sink_.reset(new InMemoryOutputStream(pool));
}
@@ -193,13 +200,13 @@ template <>
class PlainEncoder<BooleanType> : public Encoder<BooleanType> {
public:
explicit PlainEncoder(const ColumnDescriptor* descr,
- ::arrow::MemoryPool* pool = ::arrow::default_memory_pool())
+ ::arrow::MemoryPool* pool = ::arrow::default_memory_pool())
: Encoder<BooleanType>(descr, Encoding::PLAIN, pool),
bits_available_(kInMemoryDefaultCapacity * 8),
bits_buffer_(AllocateBuffer(pool, kInMemoryDefaultCapacity)),
values_sink_(new InMemoryOutputStream(pool)) {
- bit_writer_.reset(new ::arrow::BitWriter(
- bits_buffer_->mutable_data(), static_cast<int>(bits_buffer_->size())));
+ bit_writer_.reset(new ::arrow::BitWriter(bits_buffer_->mutable_data(),
+ static_cast<int>(bits_buffer_->size())));
}
int64_t EstimatedDataEncodedSize() override {
@@ -284,7 +291,9 @@ inline void PlainEncoder<ByteArrayType>::Put(const ByteArray* src, int num_value
for (int i = 0; i < num_values; ++i) {
// Write the result to the output stream
values_sink_->Write(reinterpret_cast<const uint8_t*>(&src[i].len), sizeof(uint32_t));
- if (src[i].len > 0) { DCHECK(nullptr != src[i].ptr) << "Value ptr cannot be NULL"; }
+ if (src[i].len > 0) {
+ DCHECK(nullptr != src[i].ptr) << "Value ptr cannot be NULL";
+ }
values_sink_->Write(reinterpret_cast<const uint8_t*>(src[i].ptr), src[i].len);
}
}
@@ -296,8 +305,8 @@ inline void PlainEncoder<FLBAType>::Put(const FixedLenByteArray* src, int num_va
if (descr_->type_length() > 0) {
DCHECK(nullptr != src[i].ptr) << "Value ptr cannot be NULL";
}
- values_sink_->Write(
- reinterpret_cast<const uint8_t*>(src[i].ptr), descr_->type_length());
+ values_sink_->Write(reinterpret_cast<const uint8_t*>(src[i].ptr),
+ descr_->type_length());
}
}
@@ -313,7 +322,7 @@ class DictionaryDecoder : public Decoder<Type> {
// dictionary is not guaranteed to persist in memory after this call so the
// dictionary decoder needs to copy the data out if necessary.
explicit DictionaryDecoder(const ColumnDescriptor* descr,
- ::arrow::MemoryPool* pool = ::arrow::default_memory_pool())
+ ::arrow::MemoryPool* pool = ::arrow::default_memory_pool())
: Decoder<Type>(descr, Encoding::RLE_DICTIONARY),
dictionary_(0, pool),
byte_array_data_(AllocateBuffer(pool, 0)) {}
@@ -334,16 +343,21 @@ class DictionaryDecoder : public Decoder<Type> {
max_values = std::min(max_values, num_values_);
int decoded_values =
idx_decoder_.GetBatchWithDict(dictionary_.data(), buffer, max_values);
- if (decoded_values != max_values) { ParquetException::EofException(); }
+ if (decoded_values != max_values) {
+ ParquetException::EofException();
+ }
num_values_ -= max_values;
return max_values;
}
int DecodeSpaced(T* buffer, int num_values, int null_count, const uint8_t* valid_bits,
- int64_t valid_bits_offset) override {
- int decoded_values = idx_decoder_.GetBatchWithDictSpaced(dictionary_.data(), buffer,
- num_values, null_count, valid_bits, valid_bits_offset);
- if (decoded_values != num_values) { ParquetException::EofException(); }
+ int64_t valid_bits_offset) override {
+ int decoded_values =
+ idx_decoder_.GetBatchWithDictSpaced(dictionary_.data(), buffer, num_values,
+ null_count, valid_bits, valid_bits_offset);
+ if (decoded_values != num_values) {
+ ParquetException::EofException();
+ }
return decoded_values;
}
@@ -439,7 +453,7 @@ class DictEncoder : public Encoder<DType> {
typedef typename DType::c_type T;
explicit DictEncoder(const ColumnDescriptor* desc, ChunkedAllocator* pool = nullptr,
- ::arrow::MemoryPool* allocator = ::arrow::default_memory_pool())
+ ::arrow::MemoryPool* allocator = ::arrow::default_memory_pool())
: Encoder<DType>(desc, Encoding::PLAIN_DICTIONARY, allocator),
allocator_(allocator),
pool_(pool),
@@ -449,7 +463,9 @@ class DictEncoder : public Encoder<DType> {
dict_encoded_size_(0),
type_length_(desc->type_length()) {
hash_slots_.Assign(hash_table_size_, HASH_SLOT_EMPTY);
- if (!::arrow::CpuInfo::initialized()) { ::arrow::CpuInfo::Init(); }
+ if (!::arrow::CpuInfo::initialized()) {
+ ::arrow::CpuInfo::Init();
+ }
}
virtual ~DictEncoder() { DCHECK(buffered_indices_.empty()); }
@@ -498,8 +514,8 @@ class DictEncoder : public Encoder<DType> {
std::shared_ptr<Buffer> FlushValues() override {
std::shared_ptr<PoolBuffer> buffer =
AllocateBuffer(this->allocator_, EstimatedDataEncodedSize());
- int result_size = WriteIndices(
- buffer->mutable_data(), static_cast<int>(EstimatedDataEncodedSize()));
+ int result_size = WriteIndices(buffer->mutable_data(),
+ static_cast<int>(EstimatedDataEncodedSize()));
ClearIndices();
PARQUET_THROW_NOT_OK(buffer->Resize(result_size, false));
return buffer;
@@ -512,10 +528,12 @@ class DictEncoder : public Encoder<DType> {
}
void PutSpaced(const T* src, int num_values, const uint8_t* valid_bits,
- int64_t valid_bits_offset) override {
+ int64_t valid_bits_offset) override {
INIT_BITSET(valid_bits, static_cast<int>(valid_bits_offset));
for (int32_t i = 0; i < num_values; i++) {
- if (bitset_valid_bits & (1 << bit_offset_valid_bits)) { Put(src[i]); }
+ if (bitset_valid_bits & (1 << bit_offset_valid_bits)) {
+ Put(src[i]);
+ }
READ_NEXT_BITSET(valid_bits);
}
}
@@ -576,25 +594,29 @@ inline int DictEncoder<DType>::Hash(const typename DType::c_type& value) const {
template <>
inline int DictEncoder<ByteArrayType>::Hash(const ByteArray& value) const {
- if (value.len > 0) { DCHECK(nullptr != value.ptr) << "Value ptr cannot be NULL"; }
+ if (value.len > 0) {
+ DCHECK(nullptr != value.ptr) << "Value ptr cannot be NULL";
+ }
return HashUtil::Hash(value.ptr, value.len, 0);
}
template <>
inline int DictEncoder<FLBAType>::Hash(const FixedLenByteArray& value) const {
- if (type_length_ > 0) { DCHECK(nullptr != value.ptr) << "Value ptr cannot be NULL"; }
+ if (type_length_ > 0) {
+ DCHECK(nullptr != value.ptr) << "Value ptr cannot be NULL";
+ }
return HashUtil::Hash(value.ptr, type_length_, 0);
}
template <typename DType>
-inline bool DictEncoder<DType>::SlotDifferent(
- const typename DType::c_type& v, hash_slot_t slot) {
+inline bool DictEncoder<DType>::SlotDifferent(const typename DType::c_type& v,
+ hash_slot_t slot) {
return v != uniques_[slot];
}
template <>
-inline bool DictEncoder<FLBAType>::SlotDifferent(
- const FixedLenByteArray& v, hash_slot_t slot) {
+inline bool DictEncoder<FLBAType>::SlotDifferent(const FixedLenByteArray& v,
+ hash_slot_t slot) {
return 0 != memcmp(v.ptr, uniques_[slot].ptr, type_length_);
}
@@ -635,7 +657,9 @@ inline void DictEncoder<DType>::DoubleTableSize() {
for (int i = 0; i < hash_table_size_; ++i) {
index = hash_slots_[i];
- if (index == HASH_SLOT_EMPTY) { continue; }
+ if (index == HASH_SLOT_EMPTY) {
+ continue;
+ }
// Compute the hash value mod the new table size to start looking for an
// empty slot
@@ -669,7 +693,9 @@ inline void DictEncoder<DType>::AddDictKey(const typename DType::c_type& v) {
template <>
inline void DictEncoder<ByteArrayType>::AddDictKey(const ByteArray& v) {
uint8_t* heap = pool_->Allocate(v.len);
- if (UNLIKELY(v.len > 0 && heap == nullptr)) { throw ParquetException("out of memory"); }
+ if (UNLIKELY(v.len > 0 && heap == nullptr)) {
+ throw ParquetException("out of memory");
+ }
memcpy(heap, v.ptr, v.len);
uniques_.push_back(ByteArray(v.len, heap));
dict_encoded_size_ += v.len + sizeof(uint32_t);
@@ -708,7 +734,9 @@ inline void DictEncoder<ByteArrayType>::WriteDict(uint8_t* buffer) {
for (const ByteArray& v : uniques_) {
memcpy(buffer, reinterpret_cast<const void*>(&v.len), sizeof(uint32_t));
buffer += sizeof(uint32_t);
- if (v.len > 0) { DCHECK(nullptr != v.ptr) << "Value ptr cannot be NULL"; }
+ if (v.len > 0) {
+ DCHECK(nullptr != v.ptr) << "Value ptr cannot be NULL";
+ }
memcpy(buffer, v.ptr, v.len);
buffer += v.len;
}
@@ -717,7 +745,9 @@ inline void DictEncoder<ByteArrayType>::WriteDict(uint8_t* buffer) {
template <>
inline void DictEncoder<FLBAType>::WriteDict(uint8_t* buffer) {
for (const FixedLenByteArray& v : uniques_) {
- if (type_length_ > 0) { DCHECK(nullptr != v.ptr) << "Value ptr cannot be NULL"; }
+ if (type_length_ > 0) {
+ DCHECK(nullptr != v.ptr) << "Value ptr cannot be NULL";
+ }
memcpy(buffer, v.ptr, type_length_);
buffer += type_length_;
}
@@ -749,7 +779,7 @@ class DeltaBitPackDecoder : public Decoder<DType> {
typedef typename DType::c_type T;
explicit DeltaBitPackDecoder(const ColumnDescriptor* descr,
- ::arrow::MemoryPool* pool = ::arrow::default_memory_pool())
+ ::arrow::MemoryPool* pool = ::arrow::default_memory_pool())
: Decoder<DType>(descr, Encoding::DELTA_BINARY_PACKED),
delta_bit_widths_(new PoolBuffer(pool)) {
if (DType::type_num != Type::INT32 && DType::type_num != Type::INT64) {
@@ -775,7 +805,9 @@ class DeltaBitPackDecoder : public Decoder<DType> {
int32_t block_size;
if (!decoder_.GetVlqInt(&block_size)) ParquetException::EofException();
if (!decoder_.GetVlqInt(&num_mini_blocks_)) ParquetException::EofException();
- if (!decoder_.GetVlqInt(&values_current_block_)) { ParquetException::EofException(); }
+ if (!decoder_.GetVlqInt(&values_current_block_)) {
+ ParquetException::EofException();
+ }
if (!decoder_.GetZigZagVlqInt(&last_value_)) ParquetException::EofException();
PARQUET_THROW_NOT_OK(delta_bit_widths_->Resize(num_mini_blocks_, false));
@@ -841,7 +873,8 @@ class DeltaBitPackDecoder : public Decoder<DType> {
class DeltaLengthByteArrayDecoder : public Decoder<ByteArrayType> {
public:
- explicit DeltaLengthByteArrayDecoder(const ColumnDescriptor* descr,
+ explicit DeltaLengthByteArrayDecoder(
+ const ColumnDescriptor* descr,
::arrow::MemoryPool* pool = ::arrow::default_memory_pool())
: Decoder<ByteArrayType>(descr, Encoding::DELTA_LENGTH_BYTE_ARRAY),
len_decoder_(nullptr, pool) {}
@@ -882,7 +915,8 @@ class DeltaLengthByteArrayDecoder : public Decoder<ByteArrayType> {
class DeltaByteArrayDecoder : public Decoder<ByteArrayType> {
public:
- explicit DeltaByteArrayDecoder(const ColumnDescriptor* descr,
+ explicit DeltaByteArrayDecoder(
+ const ColumnDescriptor* descr,
::arrow::MemoryPool* pool = ::arrow::default_memory_pool())
: Decoder<ByteArrayType>(descr, Encoding::DELTA_BYTE_ARRAY),
prefix_len_decoder_(nullptr, pool),
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/encoding-test.cc
----------------------------------------------------------------------
diff --git a/src/parquet/encoding-test.cc b/src/parquet/encoding-test.cc
index dcd813d..b0ca050 100644
--- a/src/parquet/encoding-test.cc
+++ b/src/parquet/encoding-test.cc
@@ -15,10 +15,10 @@
// specific language governing permissions and limitations
// under the License.
+#include <gtest/gtest.h>
#include <cstdint>
#include <cstdlib>
#include <cstring>
-#include <gtest/gtest.h>
#include <string>
#include <vector>
@@ -59,8 +59,8 @@ TEST(VectorBooleanTest, TestEncodeDecode) {
vector<uint8_t> decode_buffer(nbytes);
const uint8_t* decode_data = &decode_buffer[0];
- decoder.SetData(
- nvalues, encode_buffer->data(), static_cast<int>(encode_buffer->size()));
+ decoder.SetData(nvalues, encode_buffer->data(),
+ static_cast<int>(encode_buffer->size()));
int values_decoded = decoder.Decode(&decode_buffer[0], nvalues);
ASSERT_EQ(nvalues, values_decoded);
@@ -75,8 +75,8 @@ TEST(VectorBooleanTest, TestEncodeDecode) {
template <typename T>
void GenerateData(int num_values, T* out, vector<uint8_t>* heap) {
// seed the prng so failure is deterministic
- random_numbers(
- num_values, 0, std::numeric_limits<T>::min(), std::numeric_limits<T>::max(), out);
+ random_numbers(num_values, 0, std::numeric_limits<T>::min(),
+ std::numeric_limits<T>::max(), out);
}
template <>
@@ -89,7 +89,7 @@ template <>
void GenerateData<Int96>(int num_values, Int96* out, vector<uint8_t>* heap) {
// seed the prng so failure is deterministic
random_Int96_numbers(num_values, 0, std::numeric_limits<int32_t>::min(),
- std::numeric_limits<int32_t>::max(), out);
+ std::numeric_limits<int32_t>::max(), out);
}
template <>
@@ -135,7 +135,8 @@ std::shared_ptr<ColumnDescriptor> ExampleDescr() {
template <>
std::shared_ptr<ColumnDescriptor> ExampleDescr<FLBAType>() {
auto node = schema::PrimitiveNode::Make("name", Repetition::OPTIONAL,
- Type::FIXED_LEN_BYTE_ARRAY, LogicalType::DECIMAL, flba_length, 10, 2);
+ Type::FIXED_LEN_BYTE_ARRAY,
+ LogicalType::DECIMAL, flba_length, 10, 2);
return std::make_shared<ColumnDescriptor>(node, 0, 0);
}
@@ -220,8 +221,8 @@ class TestPlainEncoding : public TestEncodingBase<Type> {
encoder.Put(draws_, num_values_);
encode_buffer_ = encoder.FlushValues();
- decoder.SetData(
- num_values_, encode_buffer_->data(), static_cast<int>(encode_buffer_->size()));
+ decoder.SetData(num_values_, encode_buffer_->data(),
+ static_cast<int>(encode_buffer_->size()));
int values_decoded = decoder.Decode(decode_buf_, num_values_);
ASSERT_EQ(num_values_, values_decoded);
VerifyResults<T>(decode_buf_, draws_, num_values_);
@@ -233,15 +234,13 @@ class TestPlainEncoding : public TestEncodingBase<Type> {
TYPED_TEST_CASE(TestPlainEncoding, ParquetTypes);
-TYPED_TEST(TestPlainEncoding, BasicRoundTrip) {
- this->Execute(10000, 1);
-}
+TYPED_TEST(TestPlainEncoding, BasicRoundTrip) { this->Execute(10000, 1); }
// ----------------------------------------------------------------------
// Dictionary encoding tests
typedef ::testing::Types<Int32Type, Int64Type, Int96Type, FloatType, DoubleType,
- ByteArrayType, FLBAType>
+ ByteArrayType, FLBAType>
DictEncodedTypes;
template <typename Type>
@@ -267,7 +266,7 @@ class TestDictionaryEncoding : public TestEncodingBase<Type> {
PlainDecoder<Type> dict_decoder(descr_.get());
dict_decoder.SetData(encoder.num_entries(), dict_buffer_->data(),
- static_cast<int>(dict_buffer_->size()));
+ static_cast<int>(dict_buffer_->size()));
DictionaryDecoder<Type> decoder(descr_.get());
decoder.SetDict(&dict_decoder);
@@ -296,9 +295,7 @@ class TestDictionaryEncoding : public TestEncodingBase<Type> {
TYPED_TEST_CASE(TestDictionaryEncoding, DictEncodedTypes);
-TYPED_TEST(TestDictionaryEncoding, BasicRoundTrip) {
- this->Execute(2500, 2);
-}
+TYPED_TEST(TestDictionaryEncoding, BasicRoundTrip) { this->Execute(2500, 2); }
TEST(TestDictionaryEncoding, CannotDictDecodeBoolean) {
PlainDecoder<BooleanType> dict_decoder(nullptr);
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/encoding.h
----------------------------------------------------------------------
diff --git a/src/parquet/encoding.h b/src/parquet/encoding.h
index ecf3940..339eb35 100644
--- a/src/parquet/encoding.h
+++ b/src/parquet/encoding.h
@@ -22,8 +22,8 @@
#include <memory>
#include <sstream>
-#include "arrow/util/bit-util.h"
#include "arrow/status.h"
+#include "arrow/util/bit-util.h"
#include "parquet/exception.h"
#include "parquet/schema.h"
@@ -49,13 +49,13 @@ class Encoder {
virtual std::shared_ptr<Buffer> FlushValues() = 0;
virtual void Put(const T* src, int num_values) = 0;
virtual void PutSpaced(const T* src, int num_values, const uint8_t* valid_bits,
- int64_t valid_bits_offset) {
+ int64_t valid_bits_offset) {
PoolBuffer buffer(pool_);
::arrow::Status status = buffer.Resize(num_values * sizeof(T));
if (!status.ok()) {
std::ostringstream ss;
- ss << "buffer.Resize failed in Encoder.PutSpaced in " <<
- __FILE__ << ", on line " << __LINE__;
+ ss << "buffer.Resize failed in Encoder.PutSpaced in " << __FILE__ << ", on line "
+ << __LINE__;
throw ParquetException(ss.str());
}
int32_t num_valid_values = 0;
@@ -73,8 +73,8 @@ class Encoder {
Encoding::type encoding() const { return encoding_; }
protected:
- explicit Encoder(
- const ColumnDescriptor* descr, Encoding::type encoding, ::arrow::MemoryPool* pool)
+ explicit Encoder(const ColumnDescriptor* descr, Encoding::type encoding,
+ ::arrow::MemoryPool* pool)
: descr_(descr), encoding_(encoding), pool_(pool) {}
// For accessing type-specific metadata, like FIXED_LEN_BYTE_ARRAY
@@ -106,7 +106,7 @@ class Decoder {
// num_values is the size of the def_levels and buffer arrays including the number of
// null values.
virtual int DecodeSpaced(T* buffer, int num_values, int null_count,
- const uint8_t* valid_bits, int64_t valid_bits_offset) {
+ const uint8_t* valid_bits, int64_t valid_bits_offset) {
int values_to_read = num_values - null_count;
int values_read = Decode(buffer, values_to_read);
if (values_read != values_to_read) {
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/exception.cc
----------------------------------------------------------------------
diff --git a/src/parquet/exception.cc b/src/parquet/exception.cc
index 96bbc4b..480eecd 100644
--- a/src/parquet/exception.cc
+++ b/src/parquet/exception.cc
@@ -33,9 +33,7 @@ void ParquetException::NYI(const std::string& msg) {
throw ParquetException(ss.str());
}
-void ParquetException::Throw(const std::string& msg) {
- throw ParquetException(msg);
-}
+void ParquetException::Throw(const std::string& msg) { throw ParquetException(msg); }
ParquetException::ParquetException(const char* msg) : msg_(msg) {}
@@ -45,8 +43,6 @@ ParquetException::ParquetException(const char* msg, std::exception& e) : msg_(ms
ParquetException::~ParquetException() throw() {}
-const char* ParquetException::what() const throw() {
- return msg_.c_str();
-}
+const char* ParquetException::what() const throw() { return msg_.c_str(); }
} // namespace parquet
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/file/file-deserialize-test.cc
----------------------------------------------------------------------
diff --git a/src/parquet/file/file-deserialize-test.cc b/src/parquet/file/file-deserialize-test.cc
index 59d2051..39ea1d9 100644
--- a/src/parquet/file/file-deserialize-test.cc
+++ b/src/parquet/file/file-deserialize-test.cc
@@ -37,10 +37,12 @@
namespace parquet {
-#define ASSERT_OK(expr) \
- do { \
- ::arrow::Status s = (expr); \
- if (!s.ok()) { FAIL() << s.ToString(); } \
+#define ASSERT_OK(expr) \
+ do { \
+ ::arrow::Status s = (expr); \
+ if (!s.ok()) { \
+ FAIL() << s.ToString(); \
+ } \
} while (0)
using ::arrow::io::BufferReader;
@@ -66,8 +68,8 @@ class TestPageSerde : public ::testing::Test {
ResetStream();
}
- void InitSerializedPageReader(
- int64_t num_rows, Compression::type codec = Compression::UNCOMPRESSED) {
+ void InitSerializedPageReader(int64_t num_rows,
+ Compression::type codec = Compression::UNCOMPRESSED) {
EndStream();
std::unique_ptr<InputStream> stream;
stream.reset(new InMemoryInputStream(out_buffer_));
@@ -75,7 +77,7 @@ class TestPageSerde : public ::testing::Test {
}
void WriteDataPageHeader(int max_serialized_len = 1024, int32_t uncompressed_size = 0,
- int32_t compressed_size = 0) {
+ int32_t compressed_size = 0) {
// Simplifying writing serialized data page headers which may or may not
// have meaningful data associated with them
@@ -176,8 +178,8 @@ TEST_F(TestPageSerde, TestFailLargePageHeaders) {
}
TEST_F(TestPageSerde, Compression) {
- Compression::type codec_types[3] = {
- Compression::GZIP, Compression::SNAPPY, Compression::BROTLI};
+ Compression::type codec_types[3] = {Compression::GZIP, Compression::SNAPPY,
+ Compression::BROTLI};
const int32_t num_rows = 32; // dummy value
data_page_header_.num_values = num_rows;
@@ -203,8 +205,8 @@ TEST_F(TestPageSerde, Compression) {
buffer.resize(max_compressed_size);
int64_t actual_size;
- ASSERT_OK(codec->Compress(
- data_size, data, max_compressed_size, &buffer[0], &actual_size));
+ ASSERT_OK(codec->Compress(data_size, data, max_compressed_size, &buffer[0],
+ &actual_size));
WriteDataPageHeader(1024, data_size, static_cast<int32_t>(actual_size));
out_stream_->Write(buffer.data(), actual_size);
@@ -246,8 +248,8 @@ class TestParquetFileReader : public ::testing::Test {
auto reader = std::make_shared<BufferReader>(buffer);
auto wrapper = std::unique_ptr<ArrowInputFile>(new ArrowInputFile(reader));
- ASSERT_THROW(
- reader_->Open(SerializedFile::Open(std::move(wrapper))), ParquetException);
+ ASSERT_THROW(reader_->Open(SerializedFile::Open(std::move(wrapper))),
+ ParquetException);
}
protected:
@@ -257,22 +259,22 @@ class TestParquetFileReader : public ::testing::Test {
TEST_F(TestParquetFileReader, InvalidHeader) {
const char* bad_header = "PAR2";
- auto buffer = std::make_shared<Buffer>(
- reinterpret_cast<const uint8_t*>(bad_header), strlen(bad_header));
+ auto buffer = std::make_shared<Buffer>(reinterpret_cast<const uint8_t*>(bad_header),
+ strlen(bad_header));
AssertInvalidFileThrows(buffer);
}
TEST_F(TestParquetFileReader, InvalidFooter) {
// File is smaller than FOOTER_SIZE
const char* bad_file = "PAR1PAR";
- auto buffer = std::make_shared<Buffer>(
- reinterpret_cast<const uint8_t*>(bad_file), strlen(bad_file));
+ auto buffer = std::make_shared<Buffer>(reinterpret_cast<const uint8_t*>(bad_file),
+ strlen(bad_file));
AssertInvalidFileThrows(buffer);
// Magic number incorrect
const char* bad_file2 = "PAR1PAR2";
- buffer = std::make_shared<Buffer>(
- reinterpret_cast<const uint8_t*>(bad_file2), strlen(bad_file2));
+ buffer = std::make_shared<Buffer>(reinterpret_cast<const uint8_t*>(bad_file2),
+ strlen(bad_file2));
AssertInvalidFileThrows(buffer);
}
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/file/file-metadata-test.cc
----------------------------------------------------------------------
diff --git a/src/parquet/file/file-metadata-test.cc b/src/parquet/file/file-metadata-test.cc
index 10ce40c..a7c438c 100644
--- a/src/parquet/file/file-metadata-test.cc
+++ b/src/parquet/file/file-metadata-test.cc
@@ -15,11 +15,11 @@
// specific language governing permissions and limitations
// under the License.
+#include <gtest/gtest.h>
#include "parquet/file/metadata.h"
#include "parquet/schema.h"
#include "parquet/statistics.h"
#include "parquet/types.h"
-#include <gtest/gtest.h>
namespace parquet {
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/file/file-serialize-test.cc
----------------------------------------------------------------------
diff --git a/src/parquet/file/file-serialize-test.cc b/src/parquet/file/file-serialize-test.cc
index 5736fa1..059df0b 100644
--- a/src/parquet/file/file-serialize-test.cc
+++ b/src/parquet/file/file-serialize-test.cc
@@ -65,8 +65,8 @@ class TestSerialize : public PrimitiveTypedTest<TestType> {
for (int i = 0; i < num_columns_; ++i) {
auto column_writer =
static_cast<TypedColumnWriter<TestType>*>(row_group_writer->NextColumn());
- column_writer->WriteBatch(
- 100, this->def_levels_.data(), nullptr, this->values_ptr_);
+ column_writer->WriteBatch(100, this->def_levels_.data(), nullptr,
+ this->values_ptr_);
column_writer->Close();
}
@@ -96,7 +96,7 @@ class TestSerialize : public PrimitiveTypedTest<TestType> {
std::static_pointer_cast<TypedColumnReader<TestType>>(rg_reader->Column(i));
this->SetupValuesOut(100);
col_reader->ReadBatch(100, def_levels_out.data(), rep_levels_out.data(),
- this->values_out_ptr_, &values_read);
+ this->values_out_ptr_, &values_read);
this->SyncValuesOut();
ASSERT_EQ(100, values_read);
ASSERT_EQ(this->values_, this->values_out_);
@@ -106,7 +106,7 @@ class TestSerialize : public PrimitiveTypedTest<TestType> {
};
typedef ::testing::Types<Int32Type, Int64Type, Int96Type, FloatType, DoubleType,
- BooleanType, ByteArrayType, FLBAType>
+ BooleanType, ByteArrayType, FLBAType>
TestTypes;
TYPED_TEST_CASE(TestSerialize, TestTypes);
@@ -123,9 +123,7 @@ TYPED_TEST(TestSerialize, SmallFileBrotli) {
this->FileSerializeTest(Compression::BROTLI);
}
-TYPED_TEST(TestSerialize, SmallFileGzip) {
- this->FileSerializeTest(Compression::GZIP);
-}
+TYPED_TEST(TestSerialize, SmallFileGzip) { this->FileSerializeTest(Compression::GZIP); }
} // namespace test
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/file/metadata.cc
----------------------------------------------------------------------
diff --git a/src/parquet/file/metadata.cc b/src/parquet/file/metadata.cc
index b37ef4f..d5a96f3 100644
--- a/src/parquet/file/metadata.cc
+++ b/src/parquet/file/metadata.cc
@@ -91,8 +91,9 @@ SortOrder get_sort_order(LogicalType::type converted, Type::type primitive) {
template <typename DType>
static std::shared_ptr<RowGroupStatistics> MakeTypedColumnStats(
const format::ColumnMetaData& metadata, const ColumnDescriptor* descr) {
- return std::make_shared<TypedRowGroupStatistics<DType>>(descr, metadata.statistics.min,
- metadata.statistics.max, metadata.num_values - metadata.statistics.null_count,
+ return std::make_shared<TypedRowGroupStatistics<DType>>(
+ descr, metadata.statistics.min, metadata.statistics.max,
+ metadata.num_values - metadata.statistics.null_count,
metadata.statistics.null_count, metadata.statistics.distinct_count,
metadata.statistics.__isset.max || metadata.statistics.__isset.min);
}
@@ -125,7 +126,8 @@ std::shared_ptr<RowGroupStatistics> MakeColumnStats(
class ColumnChunkMetaData::ColumnChunkMetaDataImpl {
public:
explicit ColumnChunkMetaDataImpl(const format::ColumnChunk* column,
- const ColumnDescriptor* descr, const ApplicationVersion* writer_version)
+ const ColumnDescriptor* descr,
+ const ApplicationVersion* writer_version)
: column_(column), descr_(descr), writer_version_(writer_version) {
const format::ColumnMetaData& meta_data = column->meta_data;
for (auto encoding : meta_data.encodings) {
@@ -205,36 +207,30 @@ class ColumnChunkMetaData::ColumnChunkMetaDataImpl {
const ApplicationVersion* writer_version_;
};
-std::unique_ptr<ColumnChunkMetaData> ColumnChunkMetaData::Make(const uint8_t* metadata,
- const ColumnDescriptor* descr, const ApplicationVersion* writer_version) {
+std::unique_ptr<ColumnChunkMetaData> ColumnChunkMetaData::Make(
+ const uint8_t* metadata, const ColumnDescriptor* descr,
+ const ApplicationVersion* writer_version) {
return std::unique_ptr<ColumnChunkMetaData>(
new ColumnChunkMetaData(metadata, descr, writer_version));
}
ColumnChunkMetaData::ColumnChunkMetaData(const uint8_t* metadata,
- const ColumnDescriptor* descr, const ApplicationVersion* writer_version)
+ const ColumnDescriptor* descr,
+ const ApplicationVersion* writer_version)
: impl_{std::unique_ptr<ColumnChunkMetaDataImpl>(new ColumnChunkMetaDataImpl(
reinterpret_cast<const format::ColumnChunk*>(metadata), descr,
writer_version))} {}
ColumnChunkMetaData::~ColumnChunkMetaData() {}
// column chunk
-int64_t ColumnChunkMetaData::file_offset() const {
- return impl_->file_offset();
-}
+int64_t ColumnChunkMetaData::file_offset() const { return impl_->file_offset(); }
-const std::string& ColumnChunkMetaData::file_path() const {
- return impl_->file_path();
-}
+const std::string& ColumnChunkMetaData::file_path() const { return impl_->file_path(); }
// column metadata
-Type::type ColumnChunkMetaData::type() const {
- return impl_->type();
-}
+Type::type ColumnChunkMetaData::type() const { return impl_->type(); }
-int64_t ColumnChunkMetaData::num_values() const {
- return impl_->num_values();
-}
+int64_t ColumnChunkMetaData::num_values() const { return impl_->num_values(); }
std::shared_ptr<schema::ColumnPath> ColumnChunkMetaData::path_in_schema() const {
return impl_->path_in_schema();
@@ -244,9 +240,7 @@ std::shared_ptr<RowGroupStatistics> ColumnChunkMetaData::statistics() const {
return impl_->statistics();
}
-bool ColumnChunkMetaData::is_stats_set() const {
- return impl_->is_stats_set();
-}
+bool ColumnChunkMetaData::is_stats_set() const { return impl_->is_stats_set(); }
int64_t ColumnChunkMetaData::has_dictionary_page() const {
return impl_->has_dictionary_page();
@@ -284,7 +278,8 @@ int64_t ColumnChunkMetaData::total_compressed_size() const {
class RowGroupMetaData::RowGroupMetaDataImpl {
public:
explicit RowGroupMetaDataImpl(const format::RowGroup* row_group,
- const SchemaDescriptor* schema, const ApplicationVersion* writer_version)
+ const SchemaDescriptor* schema,
+ const ApplicationVersion* writer_version)
: row_group_(row_group), schema_(schema), writer_version_(writer_version) {}
~RowGroupMetaDataImpl() {}
@@ -314,34 +309,28 @@ class RowGroupMetaData::RowGroupMetaDataImpl {
const ApplicationVersion* writer_version_;
};
-std::unique_ptr<RowGroupMetaData> RowGroupMetaData::Make(const uint8_t* metadata,
- const SchemaDescriptor* schema, const ApplicationVersion* writer_version) {
+std::unique_ptr<RowGroupMetaData> RowGroupMetaData::Make(
+ const uint8_t* metadata, const SchemaDescriptor* schema,
+ const ApplicationVersion* writer_version) {
return std::unique_ptr<RowGroupMetaData>(
new RowGroupMetaData(metadata, schema, writer_version));
}
RowGroupMetaData::RowGroupMetaData(const uint8_t* metadata,
- const SchemaDescriptor* schema, const ApplicationVersion* writer_version)
+ const SchemaDescriptor* schema,
+ const ApplicationVersion* writer_version)
: impl_{std::unique_ptr<RowGroupMetaDataImpl>(new RowGroupMetaDataImpl(
reinterpret_cast<const format::RowGroup*>(metadata), schema, writer_version))} {
}
RowGroupMetaData::~RowGroupMetaData() {}
-int RowGroupMetaData::num_columns() const {
- return impl_->num_columns();
-}
+int RowGroupMetaData::num_columns() const { return impl_->num_columns(); }
-int64_t RowGroupMetaData::num_rows() const {
- return impl_->num_rows();
-}
+int64_t RowGroupMetaData::num_rows() const { return impl_->num_rows(); }
-int64_t RowGroupMetaData::total_byte_size() const {
- return impl_->total_byte_size();
-}
+int64_t RowGroupMetaData::total_byte_size() const { return impl_->total_byte_size(); }
-const SchemaDescriptor* RowGroupMetaData::schema() const {
- return impl_->schema();
-}
+const SchemaDescriptor* RowGroupMetaData::schema() const { return impl_->schema(); }
std::unique_ptr<ColumnChunkMetaData> RowGroupMetaData::ColumnChunk(int i) const {
return impl_->ColumnChunk(i);
@@ -408,8 +397,8 @@ class FileMetaData::FileMetaDataImpl {
uint32_t metadata_len_;
std::unique_ptr<format::FileMetaData> metadata_;
void InitSchema() {
- schema::FlatSchemaConverter converter(
- &metadata_->schema[0], static_cast<int>(metadata_->schema.size()));
+ schema::FlatSchemaConverter converter(&metadata_->schema[0],
+ static_cast<int>(metadata_->schema.size()));
schema_.Init(converter.Convert());
}
SchemaDescriptor schema_;
@@ -429,8 +418,8 @@ class FileMetaData::FileMetaDataImpl {
std::shared_ptr<const KeyValueMetadata> key_value_metadata_;
};
-std::shared_ptr<FileMetaData> FileMetaData::Make(
- const uint8_t* metadata, uint32_t* metadata_len) {
+std::shared_ptr<FileMetaData> FileMetaData::Make(const uint8_t* metadata,
+ uint32_t* metadata_len) {
// This FileMetaData ctor is private, not compatible with std::make_shared
return std::shared_ptr<FileMetaData>(new FileMetaData(metadata, metadata_len));
}
@@ -448,21 +437,13 @@ std::unique_ptr<RowGroupMetaData> FileMetaData::RowGroup(int i) const {
return impl_->RowGroup(i);
}
-uint32_t FileMetaData::size() const {
- return impl_->size();
-}
+uint32_t FileMetaData::size() const { return impl_->size(); }
-int FileMetaData::num_columns() const {
- return impl_->num_columns();
-}
+int FileMetaData::num_columns() const { return impl_->num_columns(); }
-int64_t FileMetaData::num_rows() const {
- return impl_->num_rows();
-}
+int64_t FileMetaData::num_rows() const { return impl_->num_rows(); }
-int FileMetaData::num_row_groups() const {
- return impl_->num_row_groups();
-}
+int FileMetaData::num_row_groups() const { return impl_->num_row_groups(); }
ParquetVersion::type FileMetaData::version() const {
switch (impl_->version()) {
@@ -481,25 +462,17 @@ const ApplicationVersion& FileMetaData::writer_version() const {
return impl_->writer_version();
}
-const std::string& FileMetaData::created_by() const {
- return impl_->created_by();
-}
+const std::string& FileMetaData::created_by() const { return impl_->created_by(); }
-int FileMetaData::num_schema_elements() const {
- return impl_->num_schema_elements();
-}
+int FileMetaData::num_schema_elements() const { return impl_->num_schema_elements(); }
-const SchemaDescriptor* FileMetaData::schema() const {
- return impl_->schema();
-}
+const SchemaDescriptor* FileMetaData::schema() const { return impl_->schema(); }
std::shared_ptr<const KeyValueMetadata> FileMetaData::key_value_metadata() const {
return impl_->key_value_metadata();
}
-void FileMetaData::WriteTo(OutputStream* dst) {
- return impl_->WriteTo(dst);
-}
+void FileMetaData::WriteTo(OutputStream* dst) { return impl_->WriteTo(dst); }
ApplicationVersion::ApplicationVersion(const std::string& created_by) {
boost::regex app_regex{ApplicationVersion::APPLICATION_FORMAT};
@@ -509,7 +482,7 @@ ApplicationVersion::ApplicationVersion(const std::string& created_by) {
std::string created_by_lower = created_by;
std::transform(created_by_lower.begin(), created_by_lower.end(),
- created_by_lower.begin(), ::tolower);
+ created_by_lower.begin(), ::tolower);
bool app_success = boost::regex_match(created_by_lower, app_matches, app_regex);
bool ver_success = false;
@@ -572,10 +545,14 @@ bool ApplicationVersion::HasCorrectStatistics(Type::type col_type) const {
// created_by is not populated, which could have been caused by
// parquet-mr during the same time as PARQUET-251, see PARQUET-297
- if (application_ == "unknown") { return true; }
+ if (application_ == "unknown") {
+ return true;
+ }
// PARQUET-251
- if (VersionLt(PARQUET_251_FIXED_VERSION)) { return false; }
+ if (VersionLt(PARQUET_251_FIXED_VERSION)) {
+ return false;
+ }
return true;
}
@@ -585,7 +562,8 @@ bool ApplicationVersion::HasCorrectStatistics(Type::type col_type) const {
class ColumnChunkMetaDataBuilder::ColumnChunkMetaDataBuilderImpl {
public:
explicit ColumnChunkMetaDataBuilderImpl(const std::shared_ptr<WriterProperties>& props,
- const ColumnDescriptor* column, uint8_t* contents)
+ const ColumnDescriptor* column,
+ uint8_t* contents)
: properties_(props), column_(column) {
column_chunk_ = reinterpret_cast<format::ColumnChunk*>(contents);
column_chunk_->meta_data.__set_type(ToThrift(column->physical_type()));
@@ -614,8 +592,9 @@ class ColumnChunkMetaDataBuilder::ColumnChunkMetaDataBuilderImpl {
}
void Finish(int64_t num_values, int64_t dictionary_page_offset,
- int64_t index_page_offset, int64_t data_page_offset, int64_t compressed_size,
- int64_t uncompressed_size, bool has_dictionary, bool dictionary_fallback) {
+ int64_t index_page_offset, int64_t data_page_offset,
+ int64_t compressed_size, int64_t uncompressed_size, bool has_dictionary,
+ bool dictionary_fallback) {
if (dictionary_page_offset > 0) {
column_chunk_->meta_data.__set_dictionary_page_offset(dictionary_page_offset);
column_chunk_->__set_file_offset(dictionary_page_offset + compressed_size);
@@ -642,7 +621,9 @@ class ColumnChunkMetaDataBuilder::ColumnChunkMetaDataBuilderImpl {
thrift_encodings.push_back(ToThrift(Encoding::RLE));
// Only PLAIN encoding is supported for fallback in V1
// TODO(majetideepak): Use user specified encoding for V2
- if (dictionary_fallback) { thrift_encodings.push_back(ToThrift(Encoding::PLAIN)); }
+ if (dictionary_fallback) {
+ thrift_encodings.push_back(ToThrift(Encoding::PLAIN));
+ }
column_chunk_->meta_data.__set_encodings(thrift_encodings);
}
@@ -678,16 +659,16 @@ void ColumnChunkMetaDataBuilder::set_file_path(const std::string& path) {
}
void ColumnChunkMetaDataBuilder::Finish(int64_t num_values,
- int64_t dictionary_page_offset, int64_t index_page_offset, int64_t data_page_offset,
- int64_t compressed_size, int64_t uncompressed_size, bool has_dictionary,
- bool dictionary_fallback) {
+ int64_t dictionary_page_offset,
+ int64_t index_page_offset,
+ int64_t data_page_offset, int64_t compressed_size,
+ int64_t uncompressed_size, bool has_dictionary,
+ bool dictionary_fallback) {
impl_->Finish(num_values, dictionary_page_offset, index_page_offset, data_page_offset,
- compressed_size, uncompressed_size, has_dictionary, dictionary_fallback);
+ compressed_size, uncompressed_size, has_dictionary, dictionary_fallback);
}
-void ColumnChunkMetaDataBuilder::WriteTo(OutputStream* sink) {
- impl_->WriteTo(sink);
-}
+void ColumnChunkMetaDataBuilder::WriteTo(OutputStream* sink) { impl_->WriteTo(sink); }
const ColumnDescriptor* ColumnChunkMetaDataBuilder::descr() const {
return impl_->descr();
@@ -700,8 +681,8 @@ void ColumnChunkMetaDataBuilder::SetStatistics(const EncodedStatistics& result)
class RowGroupMetaDataBuilder::RowGroupMetaDataBuilderImpl {
public:
explicit RowGroupMetaDataBuilderImpl(int64_t num_rows,
- const std::shared_ptr<WriterProperties>& props, const SchemaDescriptor* schema,
- uint8_t* contents)
+ const std::shared_ptr<WriterProperties>& props,
+ const SchemaDescriptor* schema, uint8_t* contents)
: properties_(props), schema_(schema), current_column_(0) {
row_group_ = reinterpret_cast<format::RowGroup*>(contents);
InitializeColumns(schema->num_columns());
@@ -717,7 +698,8 @@ class RowGroupMetaDataBuilder::RowGroupMetaDataBuilderImpl {
throw ParquetException(ss.str());
}
auto column = schema_->Column(current_column_);
- auto column_builder = ColumnChunkMetaDataBuilder::Make(properties_, column,
+ auto column_builder = ColumnChunkMetaDataBuilder::Make(
+ properties_, column,
reinterpret_cast<uint8_t*>(&row_group_->columns[current_column_++]));
auto column_builder_ptr = column_builder.get();
column_builders_.push_back(std::move(column_builder));
@@ -761,16 +743,16 @@ class RowGroupMetaDataBuilder::RowGroupMetaDataBuilderImpl {
int current_column_;
};
-std::unique_ptr<RowGroupMetaDataBuilder> RowGroupMetaDataBuilder::Make(int64_t num_rows,
- const std::shared_ptr<WriterProperties>& props, const SchemaDescriptor* schema_,
- uint8_t* contents) {
+std::unique_ptr<RowGroupMetaDataBuilder> RowGroupMetaDataBuilder::Make(
+ int64_t num_rows, const std::shared_ptr<WriterProperties>& props,
+ const SchemaDescriptor* schema_, uint8_t* contents) {
return std::unique_ptr<RowGroupMetaDataBuilder>(
new RowGroupMetaDataBuilder(num_rows, props, schema_, contents));
}
-RowGroupMetaDataBuilder::RowGroupMetaDataBuilder(int64_t num_rows,
- const std::shared_ptr<WriterProperties>& props, const SchemaDescriptor* schema_,
- uint8_t* contents)
+RowGroupMetaDataBuilder::RowGroupMetaDataBuilder(
+ int64_t num_rows, const std::shared_ptr<WriterProperties>& props,
+ const SchemaDescriptor* schema_, uint8_t* contents)
: impl_{std::unique_ptr<RowGroupMetaDataBuilderImpl>(
new RowGroupMetaDataBuilderImpl(num_rows, props, schema_, contents))} {}
@@ -780,13 +762,9 @@ ColumnChunkMetaDataBuilder* RowGroupMetaDataBuilder::NextColumnChunk() {
return impl_->NextColumnChunk();
}
-int RowGroupMetaDataBuilder::current_column() const {
- return impl_->current_column();
-}
+int RowGroupMetaDataBuilder::current_column() const { return impl_->current_column(); }
-int RowGroupMetaDataBuilder::num_columns() {
- return impl_->num_columns();
-}
+int RowGroupMetaDataBuilder::num_columns() { return impl_->num_columns(); }
void RowGroupMetaDataBuilder::Finish(int64_t total_bytes_written) {
impl_->Finish(total_bytes_written);
@@ -796,8 +774,8 @@ void RowGroupMetaDataBuilder::Finish(int64_t total_bytes_written) {
// TODO(PARQUET-595) Support key_value_metadata
class FileMetaDataBuilder::FileMetaDataBuilderImpl {
public:
- explicit FileMetaDataBuilderImpl(const SchemaDescriptor* schema,
- const std::shared_ptr<WriterProperties>& props,
+ explicit FileMetaDataBuilderImpl(
+ const SchemaDescriptor* schema, const std::shared_ptr<WriterProperties>& props,
const std::shared_ptr<const KeyValueMetadata>& key_value_metadata)
: properties_(props), schema_(schema), key_value_metadata_(key_value_metadata) {
metadata_.reset(new format::FileMetaData());
@@ -879,8 +857,8 @@ std::unique_ptr<FileMetaDataBuilder> FileMetaDataBuilder::Make(
new FileMetaDataBuilder(schema, props, key_value_metadata));
}
-FileMetaDataBuilder::FileMetaDataBuilder(const SchemaDescriptor* schema,
- const std::shared_ptr<WriterProperties>& props,
+FileMetaDataBuilder::FileMetaDataBuilder(
+ const SchemaDescriptor* schema, const std::shared_ptr<WriterProperties>& props,
const std::shared_ptr<const KeyValueMetadata>& key_value_metadata)
: impl_{std::unique_ptr<FileMetaDataBuilderImpl>(
new FileMetaDataBuilderImpl(schema, props, key_value_metadata))} {}
@@ -891,8 +869,6 @@ RowGroupMetaDataBuilder* FileMetaDataBuilder::AppendRowGroup(int64_t num_rows) {
return impl_->AppendRowGroup(num_rows);
}
-std::unique_ptr<FileMetaData> FileMetaDataBuilder::Finish() {
- return impl_->Finish();
-}
+std::unique_ptr<FileMetaData> FileMetaDataBuilder::Finish() { return impl_->Finish(); }
} // namespace parquet
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/file/metadata.h
----------------------------------------------------------------------
diff --git a/src/parquet/file/metadata.h b/src/parquet/file/metadata.h
index 2dc50d1..4250f6b 100644
--- a/src/parquet/file/metadata.h
+++ b/src/parquet/file/metadata.h
@@ -98,8 +98,9 @@ class ApplicationVersion {
class PARQUET_EXPORT ColumnChunkMetaData {
public:
// API convenience to get a MetaData accessor
- static std::unique_ptr<ColumnChunkMetaData> Make(const uint8_t* metadata,
- const ColumnDescriptor* descr, const ApplicationVersion* writer_version = NULL);
+ static std::unique_ptr<ColumnChunkMetaData> Make(
+ const uint8_t* metadata, const ColumnDescriptor* descr,
+ const ApplicationVersion* writer_version = NULL);
~ColumnChunkMetaData();
@@ -124,7 +125,7 @@ class PARQUET_EXPORT ColumnChunkMetaData {
private:
explicit ColumnChunkMetaData(const uint8_t* metadata, const ColumnDescriptor* descr,
- const ApplicationVersion* writer_version = NULL);
+ const ApplicationVersion* writer_version = NULL);
// PIMPL Idiom
class ColumnChunkMetaDataImpl;
std::unique_ptr<ColumnChunkMetaDataImpl> impl_;
@@ -133,8 +134,9 @@ class PARQUET_EXPORT ColumnChunkMetaData {
class PARQUET_EXPORT RowGroupMetaData {
public:
// API convenience to get a MetaData accessor
- static std::unique_ptr<RowGroupMetaData> Make(const uint8_t* metadata,
- const SchemaDescriptor* schema, const ApplicationVersion* writer_version = NULL);
+ static std::unique_ptr<RowGroupMetaData> Make(
+ const uint8_t* metadata, const SchemaDescriptor* schema,
+ const ApplicationVersion* writer_version = NULL);
~RowGroupMetaData();
@@ -148,7 +150,7 @@ class PARQUET_EXPORT RowGroupMetaData {
private:
explicit RowGroupMetaData(const uint8_t* metadata, const SchemaDescriptor* schema,
- const ApplicationVersion* writer_version = NULL);
+ const ApplicationVersion* writer_version = NULL);
// PIMPL Idiom
class RowGroupMetaDataImpl;
std::unique_ptr<RowGroupMetaDataImpl> impl_;
@@ -159,8 +161,8 @@ class FileMetaDataBuilder;
class PARQUET_EXPORT FileMetaData {
public:
// API convenience to get a MetaData accessor
- static std::shared_ptr<FileMetaData> Make(
- const uint8_t* serialized_metadata, uint32_t* metadata_len);
+ static std::shared_ptr<FileMetaData> Make(const uint8_t* serialized_metadata,
+ uint32_t* metadata_len);
~FileMetaData();
@@ -212,15 +214,16 @@ class PARQUET_EXPORT ColumnChunkMetaDataBuilder {
const ColumnDescriptor* descr() const;
// commit the metadata
void Finish(int64_t num_values, int64_t dictonary_page_offset,
- int64_t index_page_offset, int64_t data_page_offset, int64_t compressed_size,
- int64_t uncompressed_size, bool has_dictionary, bool dictionary_fallback);
+ int64_t index_page_offset, int64_t data_page_offset,
+ int64_t compressed_size, int64_t uncompressed_size, bool has_dictionary,
+ bool dictionary_fallback);
// For writing metadata at end of column chunk
void WriteTo(OutputStream* sink);
private:
explicit ColumnChunkMetaDataBuilder(const std::shared_ptr<WriterProperties>& props,
- const ColumnDescriptor* column, uint8_t* contents);
+ const ColumnDescriptor* column, uint8_t* contents);
// PIMPL Idiom
class ColumnChunkMetaDataBuilderImpl;
std::unique_ptr<ColumnChunkMetaDataBuilderImpl> impl_;
@@ -229,9 +232,9 @@ class PARQUET_EXPORT ColumnChunkMetaDataBuilder {
class PARQUET_EXPORT RowGroupMetaDataBuilder {
public:
// API convenience to get a MetaData reader
- static std::unique_ptr<RowGroupMetaDataBuilder> Make(int64_t num_rows,
- const std::shared_ptr<WriterProperties>& props, const SchemaDescriptor* schema_,
- uint8_t* contents);
+ static std::unique_ptr<RowGroupMetaDataBuilder> Make(
+ int64_t num_rows, const std::shared_ptr<WriterProperties>& props,
+ const SchemaDescriptor* schema_, uint8_t* contents);
~RowGroupMetaDataBuilder();
@@ -244,8 +247,8 @@ class PARQUET_EXPORT RowGroupMetaDataBuilder {
private:
explicit RowGroupMetaDataBuilder(int64_t num_rows,
- const std::shared_ptr<WriterProperties>& props, const SchemaDescriptor* schema_,
- uint8_t* contents);
+ const std::shared_ptr<WriterProperties>& props,
+ const SchemaDescriptor* schema_, uint8_t* contents);
// PIMPL Idiom
class RowGroupMetaDataBuilderImpl;
std::unique_ptr<RowGroupMetaDataBuilderImpl> impl_;
@@ -254,8 +257,8 @@ class PARQUET_EXPORT RowGroupMetaDataBuilder {
class PARQUET_EXPORT FileMetaDataBuilder {
public:
// API convenience to get a MetaData reader
- static std::unique_ptr<FileMetaDataBuilder> Make(const SchemaDescriptor* schema,
- const std::shared_ptr<WriterProperties>& props,
+ static std::unique_ptr<FileMetaDataBuilder> Make(
+ const SchemaDescriptor* schema, const std::shared_ptr<WriterProperties>& props,
const std::shared_ptr<const KeyValueMetadata>& key_value_metadata = nullptr);
~FileMetaDataBuilder();
@@ -266,8 +269,8 @@ class PARQUET_EXPORT FileMetaDataBuilder {
std::unique_ptr<FileMetaData> Finish();
private:
- explicit FileMetaDataBuilder(const SchemaDescriptor* schema,
- const std::shared_ptr<WriterProperties>& props,
+ explicit FileMetaDataBuilder(
+ const SchemaDescriptor* schema, const std::shared_ptr<WriterProperties>& props,
const std::shared_ptr<const KeyValueMetadata>& key_value_metadata = nullptr);
// PIMPL Idiom
class FileMetaDataBuilderImpl;
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/file/printer.cc
----------------------------------------------------------------------
diff --git a/src/parquet/file/printer.cc b/src/parquet/file/printer.cc
index 52b2598..2ba9474 100644
--- a/src/parquet/file/printer.cc
+++ b/src/parquet/file/printer.cc
@@ -33,7 +33,7 @@ namespace parquet {
#define COL_WIDTH "30"
void ParquetFilePrinter::DebugPrint(std::ostream& stream, std::list<int> selected_columns,
- bool print_values, const char* filename) {
+ bool print_values, const char* filename) {
const FileMetaData* file_metadata = fileReader->metadata().get();
stream << "File Name: " << filename << "\n";
@@ -101,7 +101,9 @@ void ParquetFilePrinter::DebugPrint(std::ostream& stream, std::list<int> selecte
<< std::endl;
}
- if (!print_values) { continue; }
+ if (!print_values) {
+ continue;
+ }
static constexpr int bufsize = 25;
char buffer[bufsize];
@@ -117,7 +119,7 @@ void ParquetFilePrinter::DebugPrint(std::ostream& stream, std::list<int> selecte
std::string fmt = ss.str();
snprintf(buffer, bufsize, fmt.c_str(),
- file_metadata->schema()->Column(i)->name().c_str());
+ file_metadata->schema()->Column(i)->name().c_str());
stream << buffer;
// This is OK in this method as long as the RowGroupReader does not get
@@ -140,8 +142,8 @@ void ParquetFilePrinter::DebugPrint(std::ostream& stream, std::list<int> selecte
}
}
-void ParquetFilePrinter::JSONPrint(
- std::ostream& stream, std::list<int> selected_columns, const char* filename) {
+void ParquetFilePrinter::JSONPrint(std::ostream& stream, std::list<int> selected_columns,
+ const char* filename) {
const FileMetaData* file_metadata = fileReader->metadata().get();
stream << "{\n";
stream << " \"FileName\": \"" << filename << "\",\n";
@@ -174,7 +176,9 @@ void ParquetFilePrinter::JSONPrint(
<< " \"LogicalType\": \"" << LogicalTypeToString(descr->logical_type())
<< "\" }";
c++;
- if (c != static_cast<int>(selected_columns.size())) { stream << ",\n"; }
+ if (c != static_cast<int>(selected_columns.size())) {
+ stream << ",\n";
+ }
}
stream << "\n ],\n \"RowGroups\": [\n";
@@ -223,11 +227,15 @@ void ParquetFilePrinter::JSONPrint(
// end of a ColumnChunk
stream << "\" }";
c1++;
- if (c1 != static_cast<int>(selected_columns.size())) { stream << ",\n"; }
+ if (c1 != static_cast<int>(selected_columns.size())) {
+ stream << ",\n";
+ }
}
stream << "\n ]\n }";
- if ((r + 1) != static_cast<int>(file_metadata->num_row_groups())) { stream << ",\n"; }
+ if ((r + 1) != static_cast<int>(file_metadata->num_row_groups())) {
+ stream << ",\n";
+ }
}
stream << "\n ]\n}\n";
}
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/file/printer.h
----------------------------------------------------------------------
diff --git a/src/parquet/file/printer.h b/src/parquet/file/printer.h
index a72c17d..a18af4a 100644
--- a/src/parquet/file/printer.h
+++ b/src/parquet/file/printer.h
@@ -38,10 +38,10 @@ class PARQUET_EXPORT ParquetFilePrinter {
~ParquetFilePrinter() {}
void DebugPrint(std::ostream& stream, std::list<int> selected_columns,
- bool print_values = true, const char* fileame = "No Name");
+ bool print_values = true, const char* fileame = "No Name");
void JSONPrint(std::ostream& stream, std::list<int> selected_columns,
- const char* filename = "No Name");
+ const char* filename = "No Name");
};
} // namespace parquet
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/file/reader-internal.cc
----------------------------------------------------------------------
diff --git a/src/parquet/file/reader-internal.cc b/src/parquet/file/reader-internal.cc
index c39d3eb..5ff7398 100644
--- a/src/parquet/file/reader-internal.cc
+++ b/src/parquet/file/reader-internal.cc
@@ -17,10 +17,10 @@
#include "parquet/file/reader-internal.h"
+#include <string.h>
#include <algorithm>
#include <exception>
#include <ostream>
-#include <string.h>
#include <string>
#include <vector>
@@ -42,7 +42,8 @@ namespace parquet {
// assembled in a serialized stream for storing in a Parquet files
SerializedPageReader::SerializedPageReader(std::unique_ptr<InputStream> stream,
- int64_t total_num_rows, Compression::type codec, MemoryPool* pool)
+ int64_t total_num_rows,
+ Compression::type codec, MemoryPool* pool)
: stream_(std::move(stream)),
decompression_buffer_(AllocateBuffer(pool, 0)),
seen_num_rows_(0),
@@ -66,7 +67,9 @@ std::shared_ptr<Page> SerializedPageReader::NextPage() {
// until a maximum allowed header limit
while (true) {
buffer = stream_->Peek(allowed_page_size, &bytes_available);
- if (bytes_available == 0) { return std::shared_ptr<Page>(nullptr); }
+ if (bytes_available == 0) {
+ return std::shared_ptr<Page>(nullptr);
+ }
// This gets used, then set by DeserializeThriftMsg
header_size = static_cast<uint32_t>(bytes_available);
@@ -92,7 +95,9 @@ std::shared_ptr<Page> SerializedPageReader::NextPage() {
// Read the compressed data page.
buffer = stream_->Read(compressed_len, &bytes_read);
- if (bytes_read != compressed_len) { ParquetException::EofException(); }
+ if (bytes_read != compressed_len) {
+ ParquetException::EofException();
+ }
// Uncompress it if we need to
if (decompressor_ != NULL) {
@@ -100,8 +105,9 @@ std::shared_ptr<Page> SerializedPageReader::NextPage() {
if (uncompressed_len > static_cast<int>(decompression_buffer_->size())) {
PARQUET_THROW_NOT_OK(decompression_buffer_->Resize(uncompressed_len, false));
}
- PARQUET_THROW_NOT_OK(decompressor_->Decompress(compressed_len, buffer,
- uncompressed_len, decompression_buffer_->mutable_data()));
+ PARQUET_THROW_NOT_OK(
+ decompressor_->Decompress(compressed_len, buffer, uncompressed_len,
+ decompression_buffer_->mutable_data()));
buffer = decompression_buffer_->data();
}
@@ -114,15 +120,20 @@ std::shared_ptr<Page> SerializedPageReader::NextPage() {
bool is_sorted = dict_header.__isset.is_sorted ? dict_header.is_sorted : false;
return std::make_shared<DictionaryPage>(page_buffer, dict_header.num_values,
- FromThrift(dict_header.encoding), is_sorted);
+ FromThrift(dict_header.encoding),
+ is_sorted);
} else if (current_page_header_.type == format::PageType::DATA_PAGE) {
const format::DataPageHeader& header = current_page_header_.data_page_header;
EncodedStatistics page_statistics;
if (header.__isset.statistics) {
const format::Statistics& stats = header.statistics;
- if (stats.__isset.max) { page_statistics.set_max(stats.max); }
- if (stats.__isset.min) { page_statistics.set_min(stats.min); }
+ if (stats.__isset.max) {
+ page_statistics.set_max(stats.max);
+ }
+ if (stats.__isset.min) {
+ page_statistics.set_min(stats.min);
+ }
if (stats.__isset.null_count) {
page_statistics.set_null_count(stats.null_count);
}
@@ -133,8 +144,9 @@ std::shared_ptr<Page> SerializedPageReader::NextPage() {
seen_num_rows_ += header.num_values;
- return std::make_shared<DataPage>(page_buffer, header.num_values,
- FromThrift(header.encoding), FromThrift(header.definition_level_encoding),
+ return std::make_shared<DataPage>(
+ page_buffer, header.num_values, FromThrift(header.encoding),
+ FromThrift(header.definition_level_encoding),
FromThrift(header.repetition_level_encoding), page_statistics);
} else if (current_page_header_.type == format::PageType::DATA_PAGE_V2) {
const format::DataPageHeaderV2& header = current_page_header_.data_page_header_v2;
@@ -142,10 +154,10 @@ std::shared_ptr<Page> SerializedPageReader::NextPage() {
seen_num_rows_ += header.num_values;
- return std::make_shared<DataPageV2>(page_buffer, header.num_values,
- header.num_nulls, header.num_rows, FromThrift(header.encoding),
- header.definition_levels_byte_length, header.repetition_levels_byte_length,
- is_compressed);
+ return std::make_shared<DataPageV2>(
+ page_buffer, header.num_values, header.num_nulls, header.num_rows,
+ FromThrift(header.encoding), header.definition_levels_byte_length,
+ header.repetition_levels_byte_length, is_compressed);
} else {
// We don't know what this page type is. We're allowed to skip non-data
// pages.
@@ -156,7 +168,8 @@ std::shared_ptr<Page> SerializedPageReader::NextPage() {
}
SerializedRowGroup::SerializedRowGroup(RandomAccessSource* source,
- FileMetaData* file_metadata, int row_group_number, const ReaderProperties& props)
+ FileMetaData* file_metadata, int row_group_number,
+ const ReaderProperties& props)
: source_(source), file_metadata_(file_metadata), properties_(props) {
row_group_metadata_ = file_metadata->RowGroup(row_group_number);
}
@@ -164,9 +177,7 @@ const RowGroupMetaData* SerializedRowGroup::metadata() const {
return row_group_metadata_.get();
}
-const ReaderProperties* SerializedRowGroup::properties() const {
- return &properties_;
-}
+const ReaderProperties* SerializedRowGroup::properties() const { return &properties_; }
// For PARQUET-816
static constexpr int64_t kMaxDictHeaderSize = 100;
@@ -196,8 +207,9 @@ std::unique_ptr<PageReader> SerializedRowGroup::GetColumnPageReader(int i) {
stream = properties_.GetStream(source_, col_start, col_length);
- return std::unique_ptr<PageReader>(new SerializedPageReader(std::move(stream),
- col->num_values(), col->compression(), properties_.memory_pool()));
+ return std::unique_ptr<PageReader>(
+ new SerializedPageReader(std::move(stream), col->num_values(), col->compression(),
+ properties_.memory_pool()));
}
// ----------------------------------------------------------------------
@@ -227,14 +239,13 @@ std::unique_ptr<ParquetFileReader::Contents> SerializedFile::Open(
return result;
}
-void SerializedFile::Close() {
- source_->Close();
-}
+void SerializedFile::Close() { source_->Close(); }
SerializedFile::~SerializedFile() {
try {
Close();
- } catch (...) {}
+ } catch (...) {
+ }
}
std::shared_ptr<RowGroupReader> SerializedFile::GetRowGroup(int i) {
@@ -243,11 +254,10 @@ std::shared_ptr<RowGroupReader> SerializedFile::GetRowGroup(int i) {
return std::make_shared<RowGroupReader>(std::move(contents));
}
-std::shared_ptr<FileMetaData> SerializedFile::metadata() const {
- return file_metadata_;
-}
+std::shared_ptr<FileMetaData> SerializedFile::metadata() const { return file_metadata_; }
-SerializedFile::SerializedFile(std::unique_ptr<RandomAccessSource> source,
+SerializedFile::SerializedFile(
+ std::unique_ptr<RandomAccessSource> source,
const ReaderProperties& props = default_reader_properties())
: source_(std::move(source)), properties_(props) {}
@@ -284,7 +294,7 @@ void SerializedFile::ParseMetaData() {
// Check if the footer_buffer contains the entire metadata
if (footer_read_size >= (metadata_len + FOOTER_SIZE)) {
memcpy(metadata_buffer->mutable_data(),
- footer_buffer + (footer_read_size - metadata_len - FOOTER_SIZE), metadata_len);
+ footer_buffer + (footer_read_size - metadata_len - FOOTER_SIZE), metadata_len);
} else {
bytes_read =
source_->ReadAt(metadata_start, metadata_len, metadata_buffer->mutable_data());
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/file/reader-internal.h
----------------------------------------------------------------------
diff --git a/src/parquet/file/reader-internal.h b/src/parquet/file/reader-internal.h
index 2667fa8..282c534 100644
--- a/src/parquet/file/reader-internal.h
+++ b/src/parquet/file/reader-internal.h
@@ -50,8 +50,8 @@ static constexpr uint32_t DEFAULT_PAGE_HEADER_SIZE = 16 * 1024;
class PARQUET_EXPORT SerializedPageReader : public PageReader {
public:
SerializedPageReader(std::unique_ptr<InputStream> stream, int64_t num_rows,
- Compression::type codec,
- ::arrow::MemoryPool* pool = ::arrow::default_memory_pool());
+ Compression::type codec,
+ ::arrow::MemoryPool* pool = ::arrow::default_memory_pool());
virtual ~SerializedPageReader() {}
@@ -84,7 +84,7 @@ class PARQUET_EXPORT SerializedPageReader : public PageReader {
class PARQUET_EXPORT SerializedRowGroup : public RowGroupReader::Contents {
public:
SerializedRowGroup(RandomAccessSource* source, FileMetaData* file_metadata,
- int row_group_number, const ReaderProperties& props);
+ int row_group_number, const ReaderProperties& props);
virtual const RowGroupMetaData* metadata() const;
@@ -118,8 +118,8 @@ class PARQUET_EXPORT SerializedFile : public ParquetFileReader::Contents {
private:
// This class takes ownership of the provided data source
- explicit SerializedFile(
- std::unique_ptr<RandomAccessSource> source, const ReaderProperties& props);
+ explicit SerializedFile(std::unique_ptr<RandomAccessSource> source,
+ const ReaderProperties& props);
std::unique_ptr<RandomAccessSource> source_;
std::shared_ptr<FileMetaData> file_metadata_;
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/file/reader.cc
----------------------------------------------------------------------
diff --git a/src/parquet/file/reader.cc b/src/parquet/file/reader.cc
index d3247cb..6e78fa4 100644
--- a/src/parquet/file/reader.cc
+++ b/src/parquet/file/reader.cc
@@ -51,14 +51,13 @@ std::shared_ptr<ColumnReader> RowGroupReader::Column(int i) {
const ColumnDescriptor* descr = metadata()->schema()->Column(i);
std::unique_ptr<PageReader> page_reader = contents_->GetColumnPageReader(i);
- return ColumnReader::Make(descr, std::move(page_reader),
+ return ColumnReader::Make(
+ descr, std::move(page_reader),
const_cast<ReaderProperties*>(contents_->properties())->memory_pool());
}
// Returns the rowgroup metadata
-const RowGroupMetaData* RowGroupReader::metadata() const {
- return contents_->metadata();
-}
+const RowGroupMetaData* RowGroupReader::metadata() const { return contents_->metadata(); }
// ----------------------------------------------------------------------
// ParquetFileReader public API
@@ -67,7 +66,8 @@ ParquetFileReader::ParquetFileReader() {}
ParquetFileReader::~ParquetFileReader() {
try {
Close();
- } catch (...) {}
+ } catch (...) {
+ }
}
std::unique_ptr<ParquetFileReader> ParquetFileReader::Open(
@@ -86,8 +86,8 @@ std::unique_ptr<ParquetFileReader> ParquetFileReader::Open(
return result;
}
-std::unique_ptr<ParquetFileReader> ParquetFileReader::OpenFile(const std::string& path,
- bool memory_map, const ReaderProperties& props,
+std::unique_ptr<ParquetFileReader> ParquetFileReader::OpenFile(
+ const std::string& path, bool memory_map, const ReaderProperties& props,
const std::shared_ptr<FileMetaData>& metadata) {
std::shared_ptr<::arrow::io::ReadableFileInterface> source;
if (memory_map) {
@@ -110,7 +110,9 @@ void ParquetFileReader::Open(std::unique_ptr<ParquetFileReader::Contents> conten
}
void ParquetFileReader::Close() {
- if (contents_) { contents_->Close(); }
+ if (contents_) {
+ contents_->Close();
+ }
}
std::shared_ptr<FileMetaData> ParquetFileReader::metadata() const {
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/file/reader.h
----------------------------------------------------------------------
diff --git a/src/parquet/file/reader.h b/src/parquet/file/reader.h
index 1cd287c..eb85235 100644
--- a/src/parquet/file/reader.h
+++ b/src/parquet/file/reader.h
@@ -98,8 +98,9 @@ class PARQUET_EXPORT ParquetFileReader {
// API Convenience to open a serialized Parquet file on disk, using Arrow IO
// interfaces.
- static std::unique_ptr<ParquetFileReader> OpenFile(const std::string& path,
- bool memory_map = true, const ReaderProperties& props = default_reader_properties(),
+ static std::unique_ptr<ParquetFileReader> OpenFile(
+ const std::string& path, bool memory_map = true,
+ const ReaderProperties& props = default_reader_properties(),
const std::shared_ptr<FileMetaData>& metadata = nullptr);
void Open(std::unique_ptr<Contents> contents);
@@ -117,8 +118,8 @@ class PARQUET_EXPORT ParquetFileReader {
};
// Read only Parquet file metadata
-std::shared_ptr<FileMetaData> PARQUET_EXPORT ReadMetaData(
- const std::shared_ptr<::arrow::io::ReadableFileInterface>& source);
+std::shared_ptr<FileMetaData> PARQUET_EXPORT
+ReadMetaData(const std::shared_ptr<::arrow::io::ReadableFileInterface>& source);
} // namespace parquet
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/file/writer-internal.cc
----------------------------------------------------------------------
diff --git a/src/parquet/file/writer-internal.cc b/src/parquet/file/writer-internal.cc
index 1cceb95..5702d2c 100644
--- a/src/parquet/file/writer-internal.cc
+++ b/src/parquet/file/writer-internal.cc
@@ -42,7 +42,8 @@ static constexpr uint8_t PARQUET_MAGIC[4] = {'P', 'A', 'R', '1'};
// SerializedPageWriter
SerializedPageWriter::SerializedPageWriter(OutputStream* sink, Compression::type codec,
- ColumnChunkMetaDataBuilder* metadata, MemoryPool* pool)
+ ColumnChunkMetaDataBuilder* metadata,
+ MemoryPool* pool)
: sink_(sink),
metadata_(metadata),
pool_(pool),
@@ -68,14 +69,15 @@ static format::Statistics ToThrift(const EncodedStatistics& row_group_statistics
void SerializedPageWriter::Close(bool has_dictionary, bool fallback) {
// index_page_offset = 0 since they are not supported
metadata_->Finish(num_values_, dictionary_page_offset_, 0, data_page_offset_,
- total_compressed_size_, total_uncompressed_size_, has_dictionary, fallback);
+ total_compressed_size_, total_uncompressed_size_, has_dictionary,
+ fallback);
// Write metadata at end of column chunk
metadata_->WriteTo(sink_);
}
-void SerializedPageWriter::Compress(
- const Buffer& src_buffer, ResizableBuffer* dest_buffer) {
+void SerializedPageWriter::Compress(const Buffer& src_buffer,
+ ResizableBuffer* dest_buffer) {
DCHECK(compressor_ != nullptr);
// Compress the data
@@ -87,8 +89,9 @@ void SerializedPageWriter::Compress(
PARQUET_THROW_NOT_OK(dest_buffer->Resize(max_compressed_size, false));
int64_t compressed_size;
- PARQUET_THROW_NOT_OK(compressor_->Compress(src_buffer.size(), src_buffer.data(),
- max_compressed_size, dest_buffer->mutable_data(), &compressed_size));
+ PARQUET_THROW_NOT_OK(
+ compressor_->Compress(src_buffer.size(), src_buffer.data(), max_compressed_size,
+ dest_buffer->mutable_data(), &compressed_size));
PARQUET_THROW_NOT_OK(dest_buffer->Resize(compressed_size, false));
}
@@ -113,7 +116,9 @@ int64_t SerializedPageWriter::WriteDataPage(const CompressedDataPage& page) {
// TODO(PARQUET-594) crc checksum
int64_t start_pos = sink_->Tell();
- if (data_page_offset_ == 0) { data_page_offset_ = start_pos; }
+ if (data_page_offset_ == 0) {
+ data_page_offset_ = start_pos;
+ }
int64_t header_size =
SerializeThriftMsg(&page_header, sizeof(format::PageHeader), sink_);
@@ -151,7 +156,9 @@ int64_t SerializedPageWriter::WriteDictionaryPage(const DictionaryPage& page) {
// TODO(PARQUET-594) crc checksum
int64_t start_pos = sink_->Tell();
- if (dictionary_page_offset_ == 0) { dictionary_page_offset_ = start_pos; }
+ if (dictionary_page_offset_ == 0) {
+ dictionary_page_offset_ = start_pos;
+ }
int64_t header_size =
SerializeThriftMsg(&page_header, sizeof(format::PageHeader), sink_);
sink_->Write(compressed_data->data(), compressed_data->size());
@@ -165,32 +172,28 @@ int64_t SerializedPageWriter::WriteDictionaryPage(const DictionaryPage& page) {
// ----------------------------------------------------------------------
// RowGroupSerializer
-int RowGroupSerializer::num_columns() const {
- return metadata_->num_columns();
-}
+int RowGroupSerializer::num_columns() const { return metadata_->num_columns(); }
-int64_t RowGroupSerializer::num_rows() const {
- return num_rows_;
-}
+int64_t RowGroupSerializer::num_rows() const { return num_rows_; }
ColumnWriter* RowGroupSerializer::NextColumn() {
// Throws an error if more columns are being written
auto col_meta = metadata_->NextColumnChunk();
- if (current_column_writer_) { total_bytes_written_ += current_column_writer_->Close(); }
+ if (current_column_writer_) {
+ total_bytes_written_ += current_column_writer_->Close();
+ }
const ColumnDescriptor* column_descr = col_meta->descr();
std::unique_ptr<PageWriter> pager(
new SerializedPageWriter(sink_, properties_->compression(column_descr->path()),
- col_meta, properties_->memory_pool()));
+ col_meta, properties_->memory_pool()));
current_column_writer_ =
ColumnWriter::Make(col_meta, std::move(pager), num_rows_, properties_);
return current_column_writer_.get();
}
-int RowGroupSerializer::current_column() const {
- return metadata_->current_column();
-}
+int RowGroupSerializer::current_column() const { return metadata_->current_column(); }
void RowGroupSerializer::Close() {
if (!closed_) {
@@ -220,7 +223,9 @@ std::unique_ptr<ParquetFileWriter::Contents> FileSerializer::Open(
void FileSerializer::Close() {
if (is_open_) {
- if (row_group_writer_) { row_group_writer_->Close(); }
+ if (row_group_writer_) {
+ row_group_writer_->Close();
+ }
row_group_writer_.reset();
// Write magic bytes and metadata
@@ -231,24 +236,20 @@ void FileSerializer::Close() {
}
}
-int FileSerializer::num_columns() const {
- return schema_.num_columns();
-}
+int FileSerializer::num_columns() const { return schema_.num_columns(); }
-int FileSerializer::num_row_groups() const {
- return num_row_groups_;
-}
+int FileSerializer::num_row_groups() const { return num_row_groups_; }
-int64_t FileSerializer::num_rows() const {
- return num_rows_;
-}
+int64_t FileSerializer::num_rows() const { return num_rows_; }
const std::shared_ptr<WriterProperties>& FileSerializer::properties() const {
return properties_;
}
RowGroupWriter* FileSerializer::AppendRowGroup(int64_t num_rows) {
- if (row_group_writer_) { row_group_writer_->Close(); }
+ if (row_group_writer_) {
+ row_group_writer_->Close();
+ }
num_rows_ += num_rows;
num_row_groups_++;
auto rg_metadata = metadata_->AppendRowGroup(num_rows);
@@ -261,7 +262,8 @@ RowGroupWriter* FileSerializer::AppendRowGroup(int64_t num_rows) {
FileSerializer::~FileSerializer() {
try {
Close();
- } catch (...) {}
+ } catch (...) {
+ }
}
void FileSerializer::WriteMetaData() {
@@ -278,8 +280,8 @@ void FileSerializer::WriteMetaData() {
sink_->Write(PARQUET_MAGIC, 4);
}
-FileSerializer::FileSerializer(const std::shared_ptr<OutputStream>& sink,
- const std::shared_ptr<GroupNode>& schema,
+FileSerializer::FileSerializer(
+ const std::shared_ptr<OutputStream>& sink, const std::shared_ptr<GroupNode>& schema,
const std::shared_ptr<WriterProperties>& properties,
const std::shared_ptr<const KeyValueMetadata>& key_value_metadata)
: ParquetFileWriter::Contents(schema, key_value_metadata),
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/file/writer-internal.h
----------------------------------------------------------------------
diff --git a/src/parquet/file/writer-internal.h b/src/parquet/file/writer-internal.h
index 447579a..5aba994 100644
--- a/src/parquet/file/writer-internal.h
+++ b/src/parquet/file/writer-internal.h
@@ -40,8 +40,8 @@ namespace parquet {
class SerializedPageWriter : public PageWriter {
public:
SerializedPageWriter(OutputStream* sink, Compression::type codec,
- ColumnChunkMetaDataBuilder* metadata,
- ::arrow::MemoryPool* pool = ::arrow::default_memory_pool());
+ ColumnChunkMetaDataBuilder* metadata,
+ ::arrow::MemoryPool* pool = ::arrow::default_memory_pool());
virtual ~SerializedPageWriter() {}
@@ -76,7 +76,8 @@ class SerializedPageWriter : public PageWriter {
class RowGroupSerializer : public RowGroupWriter::Contents {
public:
RowGroupSerializer(int64_t num_rows, OutputStream* sink,
- RowGroupMetaDataBuilder* metadata, const WriterProperties* properties)
+ RowGroupMetaDataBuilder* metadata,
+ const WriterProperties* properties)
: num_rows_(num_rows),
sink_(sink),
metadata_(metadata),
@@ -126,7 +127,8 @@ class FileSerializer : public ParquetFileWriter::Contents {
virtual ~FileSerializer();
private:
- explicit FileSerializer(const std::shared_ptr<OutputStream>& sink,
+ explicit FileSerializer(
+ const std::shared_ptr<OutputStream>& sink,
const std::shared_ptr<schema::GroupNode>& schema,
const std::shared_ptr<WriterProperties>& properties,
const std::shared_ptr<const KeyValueMetadata>& key_value_metadata);
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/file/writer.cc
----------------------------------------------------------------------
diff --git a/src/parquet/file/writer.cc b/src/parquet/file/writer.cc
index d52c25c..a1b9227 100644
--- a/src/parquet/file/writer.cc
+++ b/src/parquet/file/writer.cc
@@ -37,21 +37,13 @@ void RowGroupWriter::Close() {
}
}
-ColumnWriter* RowGroupWriter::NextColumn() {
- return contents_->NextColumn();
-}
+ColumnWriter* RowGroupWriter::NextColumn() { return contents_->NextColumn(); }
-int RowGroupWriter::current_column() {
- return contents_->current_column();
-}
+int RowGroupWriter::current_column() { return contents_->current_column(); }
-int RowGroupWriter::num_columns() const {
- return contents_->num_columns();
-}
+int RowGroupWriter::num_columns() const { return contents_->num_columns(); }
-int64_t RowGroupWriter::num_rows() const {
- return contents_->num_rows();
-}
+int64_t RowGroupWriter::num_rows() const { return contents_->num_rows(); }
// ----------------------------------------------------------------------
// ParquetFileWriter public API
@@ -61,7 +53,8 @@ ParquetFileWriter::ParquetFileWriter() {}
ParquetFileWriter::~ParquetFileWriter() {
try {
Close();
- } catch (...) {}
+ } catch (...) {
+ }
}
std::unique_ptr<ParquetFileWriter> ParquetFileWriter::Open(
@@ -69,8 +62,8 @@ std::unique_ptr<ParquetFileWriter> ParquetFileWriter::Open(
const std::shared_ptr<GroupNode>& schema,
const std::shared_ptr<WriterProperties>& properties,
const std::shared_ptr<const KeyValueMetadata>& key_value_metadata) {
- return Open(
- std::make_shared<ArrowOutputStream>(sink), schema, properties, key_value_metadata);
+ return Open(std::make_shared<ArrowOutputStream>(sink), schema, properties,
+ key_value_metadata);
}
std::unique_ptr<ParquetFileWriter> ParquetFileWriter::Open(
@@ -84,25 +77,17 @@ std::unique_ptr<ParquetFileWriter> ParquetFileWriter::Open(
return result;
}
-const SchemaDescriptor* ParquetFileWriter::schema() const {
- return contents_->schema();
-}
+const SchemaDescriptor* ParquetFileWriter::schema() const { return contents_->schema(); }
const ColumnDescriptor* ParquetFileWriter::descr(int i) const {
return contents_->schema()->Column(i);
}
-int ParquetFileWriter::num_columns() const {
- return contents_->num_columns();
-}
+int ParquetFileWriter::num_columns() const { return contents_->num_columns(); }
-int64_t ParquetFileWriter::num_rows() const {
- return contents_->num_rows();
-}
+int64_t ParquetFileWriter::num_rows() const { return contents_->num_rows(); }
-int ParquetFileWriter::num_row_groups() const {
- return contents_->num_row_groups();
-}
+int ParquetFileWriter::num_row_groups() const { return contents_->num_row_groups(); }
const std::shared_ptr<const KeyValueMetadata>& ParquetFileWriter::key_value_metadata()
const {
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/file/writer.h
----------------------------------------------------------------------
diff --git a/src/parquet/file/writer.h b/src/parquet/file/writer.h
index b22281a..c2b3f91 100644
--- a/src/parquet/file/writer.h
+++ b/src/parquet/file/writer.h
@@ -88,7 +88,7 @@ class PARQUET_EXPORT ParquetFileWriter {
// An implementation of the Contents class is defined in the .cc file
struct Contents {
Contents(const std::shared_ptr<::parquet::schema::GroupNode>& schema,
- const std::shared_ptr<const KeyValueMetadata>& key_value_metadata)
+ const std::shared_ptr<const KeyValueMetadata>& key_value_metadata)
: schema_(), key_value_metadata_(key_value_metadata) {
schema_.Init(schema);
}
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/properties-test.cc
----------------------------------------------------------------------
diff --git a/src/parquet/properties-test.cc b/src/parquet/properties-test.cc
index 0e6d725..c48fc34 100644
--- a/src/parquet/properties-test.cc
+++ b/src/parquet/properties-test.cc
@@ -52,12 +52,12 @@ TEST(TestWriterProperties, AdvancedHandling) {
std::shared_ptr<WriterProperties> props = builder.build();
ASSERT_EQ(Compression::GZIP, props->compression(ColumnPath::FromDotString("gzip")));
- ASSERT_EQ(
- Compression::SNAPPY, props->compression(ColumnPath::FromDotString("delta-length")));
- ASSERT_EQ(
- Encoding::DELTA_BINARY_PACKED, props->encoding(ColumnPath::FromDotString("gzip")));
+ ASSERT_EQ(Compression::SNAPPY,
+ props->compression(ColumnPath::FromDotString("delta-length")));
+ ASSERT_EQ(Encoding::DELTA_BINARY_PACKED,
+ props->encoding(ColumnPath::FromDotString("gzip")));
ASSERT_EQ(Encoding::DELTA_LENGTH_BYTE_ARRAY,
- props->encoding(ColumnPath::FromDotString("delta-length")));
+ props->encoding(ColumnPath::FromDotString("delta-length")));
}
} // namespace test
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/properties.h
----------------------------------------------------------------------
diff --git a/src/parquet/properties.h b/src/parquet/properties.h
index 3ebc3b7..77b0305 100644
--- a/src/parquet/properties.h
+++ b/src/parquet/properties.h
@@ -48,8 +48,8 @@ class PARQUET_EXPORT ReaderProperties {
::arrow::MemoryPool* memory_pool() const { return pool_; }
- std::unique_ptr<InputStream> GetStream(
- RandomAccessSource* source, int64_t start, int64_t num_bytes) {
+ std::unique_ptr<InputStream> GetStream(RandomAccessSource* source, int64_t start,
+ int64_t num_bytes) {
std::unique_ptr<InputStream> stream;
if (buffered_stream_enabled_) {
stream.reset(
@@ -92,9 +92,9 @@ static constexpr Compression::type DEFAULT_COMPRESSION_TYPE = Compression::UNCOM
class PARQUET_EXPORT ColumnProperties {
public:
ColumnProperties(Encoding::type encoding = DEFAULT_ENCODING,
- Compression::type codec = DEFAULT_COMPRESSION_TYPE,
- bool dictionary_enabled = DEFAULT_IS_DICTIONARY_ENABLED,
- bool statistics_enabled = DEFAULT_ARE_STATISTICS_ENABLED)
+ Compression::type codec = DEFAULT_COMPRESSION_TYPE,
+ bool dictionary_enabled = DEFAULT_IS_DICTIONARY_ENABLED,
+ bool statistics_enabled = DEFAULT_ARE_STATISTICS_ENABLED)
: encoding(encoding),
codec(codec),
dictionary_enabled(dictionary_enabled),
@@ -215,8 +215,8 @@ class PARQUET_EXPORT WriterProperties {
* This either apply if dictionary encoding is disabled or if we fallback
* as the dictionary grew too large.
*/
- Builder* encoding(
- const std::shared_ptr<schema::ColumnPath>& path, Encoding::type encoding_type) {
+ Builder* encoding(const std::shared_ptr<schema::ColumnPath>& path,
+ Encoding::type encoding_type) {
return this->encoding(path->ToDotString(), encoding_type);
}
@@ -230,8 +230,8 @@ class PARQUET_EXPORT WriterProperties {
return this;
}
- Builder* compression(
- const std::shared_ptr<schema::ColumnPath>& path, Compression::type codec) {
+ Builder* compression(const std::shared_ptr<schema::ColumnPath>& path,
+ Compression::type codec) {
return this->compression(path->ToDotString(), codec);
}
@@ -273,18 +273,16 @@ class PARQUET_EXPORT WriterProperties {
return it->second;
};
- for (const auto& item : encodings_)
- get(item.first).encoding = item.second;
- for (const auto& item : codecs_)
- get(item.first).codec = item.second;
+ for (const auto& item : encodings_) get(item.first).encoding = item.second;
+ for (const auto& item : codecs_) get(item.first).codec = item.second;
for (const auto& item : dictionary_enabled_)
get(item.first).dictionary_enabled = item.second;
for (const auto& item : statistics_enabled_)
get(item.first).statistics_enabled = item.second;
- return std::shared_ptr<WriterProperties>(new WriterProperties(pool_,
- dictionary_pagesize_limit_, write_batch_size_, pagesize_, version_, created_by_,
- default_column_properties_, column_properties));
+ return std::shared_ptr<WriterProperties>(new WriterProperties(
+ pool_, dictionary_pagesize_limit_, write_batch_size_, pagesize_, version_,
+ created_by_, default_column_properties_, column_properties));
}
private:
@@ -355,7 +353,8 @@ class PARQUET_EXPORT WriterProperties {
}
private:
- explicit WriterProperties(::arrow::MemoryPool* pool, int64_t dictionary_pagesize_limit,
+ explicit WriterProperties(
+ ::arrow::MemoryPool* pool, int64_t dictionary_pagesize_limit,
int64_t write_batch_size, int64_t pagesize, ParquetVersion::type version,
const std::string& created_by, const ColumnProperties& default_column_properties,
const std::unordered_map<std::string, ColumnProperties>& column_properties)
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/public-api-test.cc
----------------------------------------------------------------------
diff --git a/src/parquet/public-api-test.cc b/src/parquet/public-api-test.cc
index 4d6f675..09d399b 100644
--- a/src/parquet/public-api-test.cc
+++ b/src/parquet/public-api-test.cc
@@ -40,9 +40,7 @@ TEST(TestPublicAPI, DoesNotIncludeZlib) {
#endif
}
-void ThrowsParquetException() {
- throw parquet::ParquetException("This function throws");
-}
+void ThrowsParquetException() { throw parquet::ParquetException("This function throws"); }
TEST(TestPublicAPI, CanThrowParquetException) {
ASSERT_THROW(ThrowsParquetException(), parquet::ParquetException);