You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@parquet.apache.org by we...@apache.org on 2017/07/31 15:15:02 UTC
[5/5] parquet-cpp git commit: PARQUET-1068: Modify .clang-format to
use straight Google format with 90-character line width
PARQUET-1068: Modify .clang-format to use straight Google format with 90-character line width
The main change is horizontal alignment. We should also do a clang-tidy pass sometime to do some further scrubbing
Author: Wes McKinney <we...@twosigma.com>
Closes #375 from wesm/PARQUET-1068 and squashes the following commits:
b81145d [Wes McKinney] Modify .clang-format to use straight Google format with 90-character line width
Project: http://git-wip-us.apache.org/repos/asf/parquet-cpp/repo
Commit: http://git-wip-us.apache.org/repos/asf/parquet-cpp/commit/b6f3caeb
Tree: http://git-wip-us.apache.org/repos/asf/parquet-cpp/tree/b6f3caeb
Diff: http://git-wip-us.apache.org/repos/asf/parquet-cpp/diff/b6f3caeb
Branch: refs/heads/master
Commit: b6f3caeb0776889310fe4d6a0e677cc3626cb389
Parents: af96ff0
Author: Wes McKinney <we...@twosigma.com>
Authored: Mon Jul 31 11:14:52 2017 -0400
Committer: Wes McKinney <we...@twosigma.com>
Committed: Mon Jul 31 11:14:52 2017 -0400
----------------------------------------------------------------------
.clang-format | 83 ++----
benchmarks/decode_benchmark.cc | 43 +--
examples/reader-writer.cc | 35 +--
.../arrow/arrow-reader-writer-benchmark.cc | 4 +-
src/parquet/arrow/arrow-reader-writer-test.cc | 210 +++++++-------
src/parquet/arrow/arrow-schema-test.cc | 130 +++++----
src/parquet/arrow/reader.cc | 276 +++++++++++--------
src/parquet/arrow/reader.h | 19 +-
src/parquet/arrow/schema.cc | 82 ++++--
src/parquet/arrow/schema.h | 32 ++-
src/parquet/arrow/test-util.h | 65 +++--
src/parquet/arrow/writer.cc | 177 ++++++------
src/parquet/arrow/writer.h | 38 +--
src/parquet/column-io-benchmark.cc | 35 +--
src/parquet/column_page.h | 21 +-
src/parquet/column_reader-test.cc | 95 ++++---
src/parquet/column_reader.cc | 27 +-
src/parquet/column_reader.h | 72 +++--
src/parquet/column_scanner-test.cc | 65 ++---
src/parquet/column_scanner.cc | 39 +--
src/parquet/column_scanner.h | 63 +++--
src/parquet/column_writer-test.cc | 111 ++++----
src/parquet/column_writer.cc | 198 +++++++------
src/parquet/column_writer.h | 38 +--
src/parquet/encoding-benchmark.cc | 8 +-
src/parquet/encoding-internal.h | 114 +++++---
src/parquet/encoding-test.cc | 31 +--
src/parquet/encoding.h | 14 +-
src/parquet/exception.cc | 8 +-
src/parquet/file/file-deserialize-test.cc | 40 +--
src/parquet/file/file-metadata-test.cc | 2 +-
src/parquet/file/file-serialize-test.cc | 12 +-
src/parquet/file/metadata.cc | 178 ++++++------
src/parquet/file/metadata.h | 43 +--
src/parquet/file/printer.cc | 24 +-
src/parquet/file/printer.h | 4 +-
src/parquet/file/reader-internal.cc | 70 +++--
src/parquet/file/reader-internal.h | 10 +-
src/parquet/file/reader.cc | 18 +-
src/parquet/file/reader.h | 9 +-
src/parquet/file/writer-internal.cc | 68 ++---
src/parquet/file/writer-internal.h | 10 +-
src/parquet/file/writer.cc | 39 +--
src/parquet/file/writer.h | 2 +-
src/parquet/properties-test.cc | 10 +-
src/parquet/properties.h | 33 ++-
src/parquet/public-api-test.cc | 4 +-
src/parquet/reader-test.cc | 8 +-
src/parquet/schema-test.cc | 121 ++++----
src/parquet/schema.cc | 109 ++++----
src/parquet/schema.h | 43 +--
src/parquet/statistics-test.cc | 54 ++--
src/parquet/statistics.cc | 43 +--
src/parquet/statistics.h | 21 +-
src/parquet/test-specialization.h | 10 +-
src/parquet/test-util.h | 99 ++++---
src/parquet/thrift.h | 2 +-
src/parquet/types-test.cc | 16 +-
src/parquet/util/buffer-builder.h | 2 +-
src/parquet/util/comparison-test.cc | 5 +-
src/parquet/util/comparison.h | 4 +-
src/parquet/util/memory.cc | 43 ++-
src/parquet/util/memory.h | 10 +-
src/parquet/util/schema-util.h | 15 +-
src/parquet/util/test-common.h | 22 +-
tools/parquet-scan.cc | 9 +-
66 files changed, 1802 insertions(+), 1543 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/.clang-format
----------------------------------------------------------------------
diff --git a/.clang-format b/.clang-format
index 7d5b3cf..06453df 100644
--- a/.clang-format
+++ b/.clang-format
@@ -1,65 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
---
-Language: Cpp
-# BasedOnStyle: Google
-AccessModifierOffset: -1
-AlignAfterOpenBracket: false
-AlignConsecutiveAssignments: false
-AlignEscapedNewlinesLeft: true
-AlignOperands: true
-AlignTrailingComments: true
-AllowAllParametersOfDeclarationOnNextLine: true
-AllowShortBlocksOnASingleLine: true
-AllowShortCaseLabelsOnASingleLine: false
-AllowShortFunctionsOnASingleLine: Inline
-AllowShortIfStatementsOnASingleLine: true
-AllowShortLoopsOnASingleLine: false
-AlwaysBreakAfterDefinitionReturnType: None
-AlwaysBreakBeforeMultilineStrings: true
-AlwaysBreakTemplateDeclarations: true
-BinPackArguments: true
-BinPackParameters: true
-BreakBeforeBinaryOperators: None
-BreakBeforeBraces: Attach
-BreakBeforeTernaryOperators: true
-BreakConstructorInitializersBeforeComma: false
-ColumnLimit: 90
-CommentPragmas: '^ IWYU pragma:'
-ConstructorInitializerAllOnOneLineOrOnePerLine: true
-ConstructorInitializerIndentWidth: 4
-ContinuationIndentWidth: 4
-Cpp11BracedListStyle: true
-DerivePointerAlignment: false
-DisableFormat: false
-ExperimentalAutoDetectBinPacking: false
-ForEachMacros: [ foreach, Q_FOREACH, BOOST_FOREACH ]
-IndentCaseLabels: true
-IndentWidth: 2
-IndentWrappedFunctionNames: false
-KeepEmptyLinesAtTheStartOfBlocks: false
-MacroBlockBegin: ''
-MacroBlockEnd: ''
-MaxEmptyLinesToKeep: 1
-NamespaceIndentation: None
-ObjCBlockIndentWidth: 2
-ObjCSpaceAfterProperty: false
-ObjCSpaceBeforeProtocolList: false
-PenaltyBreakBeforeFirstCallParameter: 1000
-PenaltyBreakComment: 300
-PenaltyBreakFirstLessLess: 120
-PenaltyBreakString: 1000
-PenaltyExcessCharacter: 1000000
-PenaltyReturnTypeOnItsOwnLine: 200
-PointerAlignment: Left
-SpaceAfterCStyleCast: false
-SpaceBeforeAssignmentOperators: true
-SpaceBeforeParens: ControlStatements
-SpaceInEmptyParentheses: false
-SpacesBeforeTrailingComments: 2
-SpacesInAngles: false
-SpacesInContainerLiterals: true
-SpacesInCStyleCastParentheses: false
-SpacesInParentheses: false
-SpacesInSquareBrackets: false
-Standard: Cpp11
-TabWidth: 8
-UseTab: Never
+BasedOnStyle: Google
+DerivePointerAlignment: false
+ColumnLimit: 90
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/benchmarks/decode_benchmark.cc
----------------------------------------------------------------------
diff --git a/benchmarks/decode_benchmark.cc b/benchmarks/decode_benchmark.cc
index 57279d0..8df45f7 100644
--- a/benchmarks/decode_benchmark.cc
+++ b/benchmarks/decode_benchmark.cc
@@ -15,9 +15,9 @@
// specific language governing permissions and limitations
// under the License.
+#include <stdio.h>
#include <iostream>
#include <random>
-#include <stdio.h>
#include "arrow/util/compression.h"
#include "arrow/util/compression_snappy.h"
@@ -165,8 +165,8 @@ class DeltaByteArrayEncoder {
}
}
prefix_len_encoder_.Add(prefix_len);
- suffix_encoder_.Add(
- reinterpret_cast<const uint8_t*>(s.data()) + prefix_len, s.size() - prefix_len);
+ suffix_encoder_.Add(reinterpret_cast<const uint8_t*>(s.data()) + prefix_len,
+ s.size() - prefix_len);
last_value_ = s;
}
@@ -210,7 +210,8 @@ uint64_t TestPlainIntEncoding(const uint8_t* data, int num_values, int batch_siz
}
uint64_t TestBinaryPackedEncoding(const char* name, const std::vector<int64_t>& values,
- int benchmark_iters = -1, int benchmark_batch_size = 1) {
+ int benchmark_iters = -1,
+ int benchmark_batch_size = 1) {
int mini_block_size;
if (values.size() < 8) {
mini_block_size = 8;
@@ -266,7 +267,7 @@ uint64_t TestBinaryPackedEncoding(const char* name, const std::vector<int64_t>&
uint64_t elapsed = sw.Stop();
double num_ints = values.size() * benchmark_iters * 1000.;
printf("%s rate (batch size = %2d): %0.3fM per second.\n", name, benchmark_batch_size,
- num_ints / elapsed);
+ num_ints / elapsed);
return result;
}
}
@@ -278,10 +279,10 @@ uint64_t TestBinaryPackedEncoding(const char* name, const std::vector<int64_t>&
} \
elapsed = sw.Stop(); \
printf("%s rate (batch size = %2d): %0.3fM per second.\n", NAME, BATCH_SIZE, \
- mult / elapsed);
+ mult / elapsed);
void TestPlainIntCompressed(::arrow::Codec* codec, const std::vector<int64_t>& data,
- int num_iters, int batch_size) {
+ int num_iters, int batch_size) {
const uint8_t* raw_data = reinterpret_cast<const uint8_t*>(&data[0]);
int uncompressed_len = data.size() * sizeof(int64_t);
uint8_t* decompressed_data = new uint8_t[uncompressed_len];
@@ -291,24 +292,24 @@ void TestPlainIntCompressed(::arrow::Codec* codec, const std::vector<int64_t>& d
int64_t compressed_len;
DCHECK(codec
->Compress(uncompressed_len, raw_data, max_compressed_size, compressed_data,
- &compressed_len)
+ &compressed_len)
.ok());
printf("\n%s:\n Uncompressed len: %d\n Compressed len: %d\n", codec->name(),
- uncompressed_len, static_cast<int>(compressed_len));
+ uncompressed_len, static_cast<int>(compressed_len));
double mult = num_iters * data.size() * 1000.;
parquet::StopWatch sw;
sw.Start();
uint64_t r = 0;
for (int i = 0; i < num_iters; ++i) {
- codec->Decompress(
- compressed_len, compressed_data, uncompressed_len, decompressed_data);
+ codec->Decompress(compressed_len, compressed_data, uncompressed_len,
+ decompressed_data);
r += TestPlainIntEncoding(decompressed_data, data.size(), batch_size);
}
int64_t elapsed = sw.Stop();
printf("Compressed(%s) plain int rate (batch size = %2d): %0.3fM per second.\n",
- codec->name(), batch_size, mult / elapsed);
+ codec->name(), batch_size, mult / elapsed);
delete[] compressed_data;
delete[] decompressed_data;
@@ -317,13 +318,11 @@ void TestPlainIntCompressed(::arrow::Codec* codec, const std::vector<int64_t>& d
void TestBinaryPacking() {
std::vector<int64_t> values;
values.clear();
- for (int i = 0; i < 100; ++i)
- values.push_back(0);
+ for (int i = 0; i < 100; ++i) values.push_back(0);
TestBinaryPackedEncoding("Zeros", values);
values.clear();
- for (int i = 1; i <= 5; ++i)
- values.push_back(i);
+ for (int i = 1; i <= 5; ++i) values.push_back(i);
TestBinaryPackedEncoding("Example 1", values);
values.clear();
@@ -373,13 +372,15 @@ void TestDeltaLengthByteArray() {
int len = 0;
uint8_t* buffer = encoder.Encode(&len);
printf("DeltaLengthByteArray\n Raw len: %d\n Encoded len: %d\n",
- encoder.plain_encoded_len(), len);
+ encoder.plain_encoded_len(), len);
decoder.SetData(encoder.num_values(), buffer, len);
for (int i = 0; i < encoder.num_values(); ++i) {
parquet::ByteArray v = {0, NULL};
decoder.Decode(&v, 1);
std::string r = std::string(reinterpret_cast<const char*>(v.ptr), v.len);
- if (r != values[i]) { std::cout << "Bad " << r << " != " << values[i] << std::endl; }
+ if (r != values[i]) {
+ std::cout << "Bad " << r << " != " << values[i] << std::endl;
+ }
}
}
@@ -409,13 +410,15 @@ void TestDeltaByteArray() {
int len = 0;
uint8_t* buffer = encoder.Encode(&len);
printf("DeltaLengthByteArray\n Raw len: %d\n Encoded len: %d\n",
- encoder.plain_encoded_len(), len);
+ encoder.plain_encoded_len(), len);
decoder.SetData(encoder.num_values(), buffer, len);
for (int i = 0; i < encoder.num_values(); ++i) {
parquet::ByteArray v;
decoder.Decode(&v, 1);
std::string r = std::string(reinterpret_cast<const char*>(v.ptr), v.len);
- if (r != values[i]) { std::cout << "Bad " << r << " != " << values[i] << std::endl; }
+ if (r != values[i]) {
+ std::cout << "Bad " << r << " != " << values[i] << std::endl;
+ }
}
}
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/examples/reader-writer.cc
----------------------------------------------------------------------
diff --git a/examples/reader-writer.cc b/examples/reader-writer.cc
index 210968c..7136b28 100644
--- a/examples/reader-writer.cc
+++ b/examples/reader-writer.cc
@@ -59,35 +59,36 @@ static std::shared_ptr<GroupNode> SetupSchema() {
parquet::schema::NodeVector fields;
// Create a primitive node named 'boolean_field' with type:BOOLEAN,
// repetition:REQUIRED
- fields.push_back(PrimitiveNode::Make(
- "boolean_field", Repetition::REQUIRED, Type::BOOLEAN, LogicalType::NONE));
+ fields.push_back(PrimitiveNode::Make("boolean_field", Repetition::REQUIRED,
+ Type::BOOLEAN, LogicalType::NONE));
// Create a primitive node named 'int32_field' with type:INT32, repetition:REQUIRED,
// logical type:TIME_MILLIS
- fields.push_back(PrimitiveNode::Make(
- "int32_field", Repetition::REQUIRED, Type::INT32, LogicalType::TIME_MILLIS));
+ fields.push_back(PrimitiveNode::Make("int32_field", Repetition::REQUIRED, Type::INT32,
+ LogicalType::TIME_MILLIS));
// Create a primitive node named 'int64_field' with type:INT64, repetition:REPEATED
- fields.push_back(PrimitiveNode::Make(
- "int64_field", Repetition::REPEATED, Type::INT64, LogicalType::NONE));
+ fields.push_back(PrimitiveNode::Make("int64_field", Repetition::REPEATED, Type::INT64,
+ LogicalType::NONE));
- fields.push_back(PrimitiveNode::Make(
- "int96_field", Repetition::REQUIRED, Type::INT96, LogicalType::NONE));
+ fields.push_back(PrimitiveNode::Make("int96_field", Repetition::REQUIRED, Type::INT96,
+ LogicalType::NONE));
- fields.push_back(PrimitiveNode::Make(
- "float_field", Repetition::REQUIRED, Type::FLOAT, LogicalType::NONE));
+ fields.push_back(PrimitiveNode::Make("float_field", Repetition::REQUIRED, Type::FLOAT,
+ LogicalType::NONE));
- fields.push_back(PrimitiveNode::Make(
- "double_field", Repetition::REQUIRED, Type::DOUBLE, LogicalType::NONE));
+ fields.push_back(PrimitiveNode::Make("double_field", Repetition::REQUIRED, Type::DOUBLE,
+ LogicalType::NONE));
// Create a primitive node named 'ba_field' with type:BYTE_ARRAY, repetition:OPTIONAL
- fields.push_back(PrimitiveNode::Make(
- "ba_field", Repetition::OPTIONAL, Type::BYTE_ARRAY, LogicalType::NONE));
+ fields.push_back(PrimitiveNode::Make("ba_field", Repetition::OPTIONAL, Type::BYTE_ARRAY,
+ LogicalType::NONE));
// Create a primitive node named 'flba_field' with type:FIXED_LEN_BYTE_ARRAY,
// repetition:REQUIRED, field_length = FIXED_LENGTH
fields.push_back(PrimitiveNode::Make("flba_field", Repetition::REQUIRED,
- Type::FIXED_LEN_BYTE_ARRAY, LogicalType::NONE, FIXED_LENGTH));
+ Type::FIXED_LEN_BYTE_ARRAY, LogicalType::NONE,
+ FIXED_LENGTH));
// Create a GroupNode named 'schema' using the primitive nodes defined above
// This GroupNode is the root node of the schema tree
@@ -308,8 +309,8 @@ int main(int argc, char** argv) {
int64_t value;
// Read one value at a time. The number of rows read is returned. values_read
// contains the number of non-null rows
- rows_read = int64_reader->ReadBatch(
- 1, &definition_level, &repetition_level, &value, &values_read);
+ rows_read = int64_reader->ReadBatch(1, &definition_level, &repetition_level,
+ &value, &values_read);
// Ensure only one value is read
assert(rows_read == 1);
// There are no NULL values in the rows written
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/arrow/arrow-reader-writer-benchmark.cc
----------------------------------------------------------------------
diff --git a/src/parquet/arrow/arrow-reader-writer-benchmark.cc b/src/parquet/arrow/arrow-reader-writer-benchmark.cc
index 677e437..149cc1a 100644
--- a/src/parquet/arrow/arrow-reader-writer-benchmark.cc
+++ b/src/parquet/arrow/arrow-reader-writer-benchmark.cc
@@ -64,8 +64,8 @@ using ArrowType = typename benchmark_traits<ParquetType>::arrow_type;
template <typename ParquetType>
std::shared_ptr<ColumnDescriptor> MakeSchema(Repetition::type repetition) {
auto node = PrimitiveNode::Make("int64", repetition, ParquetType::type_num);
- return std::make_shared<ColumnDescriptor>(
- node, repetition != Repetition::REQUIRED, repetition == Repetition::REPEATED);
+ return std::make_shared<ColumnDescriptor>(node, repetition != Repetition::REQUIRED,
+ repetition == Repetition::REPEATED);
}
template <bool nullable, typename ParquetType>
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/arrow/arrow-reader-writer-test.cc
----------------------------------------------------------------------
diff --git a/src/parquet/arrow/arrow-reader-writer-test.cc b/src/parquet/arrow/arrow-reader-writer-test.cc
index 4424ea6..69c4991 100644
--- a/src/parquet/arrow/arrow-reader-writer-test.cc
+++ b/src/parquet/arrow/arrow-reader-writer-test.cc
@@ -290,28 +290,29 @@ template <typename T>
using ParquetWriter = TypedColumnWriter<ParquetDataType<T>>;
void WriteTableToBuffer(const std::shared_ptr<Table>& table, int num_threads,
- int64_t row_group_size,
- const std::shared_ptr<ArrowWriterProperties>& arrow_properties,
- std::shared_ptr<Buffer>* out) {
+ int64_t row_group_size,
+ const std::shared_ptr<ArrowWriterProperties>& arrow_properties,
+ std::shared_ptr<Buffer>* out) {
auto sink = std::make_shared<InMemoryOutputStream>();
ASSERT_OK_NO_THROW(WriteTable(*table, ::arrow::default_memory_pool(), sink,
- row_group_size, default_writer_properties(), arrow_properties));
+ row_group_size, default_writer_properties(),
+ arrow_properties));
*out = sink->GetBuffer();
}
void DoSimpleRoundtrip(const std::shared_ptr<Table>& table, int num_threads,
- int64_t row_group_size, const std::vector<int>& column_subset,
- std::shared_ptr<Table>* out,
- const std::shared_ptr<ArrowWriterProperties>& arrow_properties =
- default_arrow_writer_properties()) {
+ int64_t row_group_size, const std::vector<int>& column_subset,
+ std::shared_ptr<Table>* out,
+ const std::shared_ptr<ArrowWriterProperties>& arrow_properties =
+ default_arrow_writer_properties()) {
std::shared_ptr<Buffer> buffer;
WriteTableToBuffer(table, num_threads, row_group_size, arrow_properties, &buffer);
std::unique_ptr<FileReader> reader;
- ASSERT_OK_NO_THROW(
- OpenFile(std::make_shared<BufferReader>(buffer), ::arrow::default_memory_pool(),
- ::parquet::default_reader_properties(), nullptr, &reader));
+ ASSERT_OK_NO_THROW(OpenFile(std::make_shared<BufferReader>(buffer),
+ ::arrow::default_memory_pool(),
+ ::parquet::default_reader_properties(), nullptr, &reader));
reader->set_num_threads(num_threads);
@@ -323,8 +324,8 @@ void DoSimpleRoundtrip(const std::shared_ptr<Table>& table, int num_threads,
}
}
-static std::shared_ptr<GroupNode> MakeSimpleSchema(
- const ::arrow::DataType& type, Repetition::type repetition) {
+static std::shared_ptr<GroupNode> MakeSimpleSchema(const ::arrow::DataType& type,
+ Repetition::type repetition) {
int byte_width;
// Decimal is not implemented yet.
switch (type.id()) {
@@ -334,8 +335,8 @@ static std::shared_ptr<GroupNode> MakeSimpleSchema(
default:
byte_width = -1;
}
- auto pnode = PrimitiveNode::Make(
- "column1", repetition, get_physical_type(type), get_logical_type(type), byte_width);
+ auto pnode = PrimitiveNode::Make("column1", repetition, get_physical_type(type),
+ get_logical_type(type), byte_width);
NodePtr node_ =
GroupNode::Make("schema", Repetition::REQUIRED, std::vector<NodePtr>({pnode}));
return std::static_pointer_cast<GroupNode>(node_);
@@ -354,13 +355,13 @@ class TestParquetIO : public ::testing::Test {
void ReaderFromSink(std::unique_ptr<FileReader>* out) {
std::shared_ptr<Buffer> buffer = sink_->GetBuffer();
- ASSERT_OK_NO_THROW(
- OpenFile(std::make_shared<BufferReader>(buffer), ::arrow::default_memory_pool(),
- ::parquet::default_reader_properties(), nullptr, out));
+ ASSERT_OK_NO_THROW(OpenFile(std::make_shared<BufferReader>(buffer),
+ ::arrow::default_memory_pool(),
+ ::parquet::default_reader_properties(), nullptr, out));
}
- void ReadSingleColumnFile(
- std::unique_ptr<FileReader> file_reader, std::shared_ptr<Array>* out) {
+ void ReadSingleColumnFile(std::unique_ptr<FileReader> file_reader,
+ std::shared_ptr<Array>* out) {
std::unique_ptr<ColumnReader> column_reader;
ASSERT_OK_NO_THROW(file_reader->GetColumn(0, &column_reader));
ASSERT_NE(nullptr, column_reader.get());
@@ -378,8 +379,8 @@ class TestParquetIO : public ::testing::Test {
ASSERT_TRUE(values->Equals(out));
}
- void ReadTableFromFile(
- std::unique_ptr<FileReader> reader, std::shared_ptr<Table>* out) {
+ void ReadTableFromFile(std::unique_ptr<FileReader> reader,
+ std::shared_ptr<Table>* out) {
ASSERT_OK_NO_THROW(reader->ReadTable(out));
auto key_value_metadata =
reader->parquet_reader()->metadata()->key_value_metadata().get();
@@ -388,30 +389,30 @@ class TestParquetIO : public ::testing::Test {
}
void PrepareListTable(int64_t size, bool nullable_lists, bool nullable_elements,
- int64_t null_count, std::shared_ptr<Table>* out) {
+ int64_t null_count, std::shared_ptr<Table>* out) {
std::shared_ptr<Array> values;
- ASSERT_OK(NullableArray<TestType>(
- size * size, nullable_elements ? null_count : 0, kDefaultSeed, &values));
+ ASSERT_OK(NullableArray<TestType>(size * size, nullable_elements ? null_count : 0,
+ kDefaultSeed, &values));
// Also test that slice offsets are respected
values = values->Slice(5, values->length() - 5);
std::shared_ptr<ListArray> lists;
- ASSERT_OK(MakeListArray(
- values, size, nullable_lists ? null_count : 0, nullable_elements, &lists));
+ ASSERT_OK(MakeListArray(values, size, nullable_lists ? null_count : 0,
+ nullable_elements, &lists));
*out = MakeSimpleTable(lists->Slice(3, size - 6), nullable_lists);
}
void PrepareListOfListTable(int64_t size, bool nullable_parent_lists,
- bool nullable_lists, bool nullable_elements, int64_t null_count,
- std::shared_ptr<Table>* out) {
+ bool nullable_lists, bool nullable_elements,
+ int64_t null_count, std::shared_ptr<Table>* out) {
std::shared_ptr<Array> values;
- ASSERT_OK(NullableArray<TestType>(
- size * 6, nullable_elements ? null_count : 0, kDefaultSeed, &values));
+ ASSERT_OK(NullableArray<TestType>(size * 6, nullable_elements ? null_count : 0,
+ kDefaultSeed, &values));
std::shared_ptr<ListArray> lists;
- ASSERT_OK(MakeListArray(
- values, size * 3, nullable_lists ? null_count : 0, nullable_elements, &lists));
+ ASSERT_OK(MakeListArray(values, size * 3, nullable_lists ? null_count : 0,
+ nullable_elements, &lists));
std::shared_ptr<ListArray> parent_lists;
ASSERT_OK(MakeListArray(lists, size, nullable_parent_lists ? null_count : 0,
- nullable_lists, &parent_lists));
+ nullable_lists, &parent_lists));
*out = MakeSimpleTable(parent_lists, nullable_parent_lists);
}
@@ -438,7 +439,7 @@ class TestParquetIO : public ::testing::Test {
template <typename ArrayType>
void WriteColumn(const std::shared_ptr<GroupNode>& schema,
- const std::shared_ptr<ArrayType>& values) {
+ const std::shared_ptr<ArrayType>& values) {
FileWriter writer(::arrow::default_memory_pool(), MakeWriter(schema));
ASSERT_OK_NO_THROW(writer.NewRowGroup(values->length()));
ASSERT_OK_NO_THROW(writer.WriteColumnChunk(*values));
@@ -454,9 +455,10 @@ class TestParquetIO : public ::testing::Test {
// Parquet version 1.0.
typedef ::testing::Types<::arrow::BooleanType, ::arrow::UInt8Type, ::arrow::Int8Type,
- ::arrow::UInt16Type, ::arrow::Int16Type, ::arrow::Int32Type, ::arrow::UInt64Type,
- ::arrow::Int64Type, ::arrow::Date32Type, ::arrow::FloatType, ::arrow::DoubleType,
- ::arrow::StringType, ::arrow::BinaryType, ::arrow::FixedSizeBinaryType>
+ ::arrow::UInt16Type, ::arrow::Int16Type, ::arrow::Int32Type,
+ ::arrow::UInt64Type, ::arrow::Int64Type, ::arrow::Date32Type,
+ ::arrow::FloatType, ::arrow::DoubleType, ::arrow::StringType,
+ ::arrow::BinaryType, ::arrow::FixedSizeBinaryType>
TestTypes;
TYPED_TEST_CASE(TestParquetIO, TestTypes);
@@ -478,7 +480,7 @@ TYPED_TEST(TestParquetIO, SingleColumnTableRequiredWrite) {
std::shared_ptr<Table> table = MakeSimpleTable(values, false);
this->sink_ = std::make_shared<InMemoryOutputStream>();
ASSERT_OK_NO_THROW(WriteTable(*table, ::arrow::default_memory_pool(), this->sink_,
- values->length(), default_writer_properties()));
+ values->length(), default_writer_properties()));
std::shared_ptr<Table> out;
std::unique_ptr<FileReader> reader;
@@ -599,8 +601,8 @@ TYPED_TEST(TestParquetIO, SingleColumnTableRequiredChunkedWrite) {
ASSERT_OK(NonNullArray<TypeParam>(LARGE_SIZE, &values));
std::shared_ptr<Table> table = MakeSimpleTable(values, false);
this->sink_ = std::make_shared<InMemoryOutputStream>();
- ASSERT_OK_NO_THROW(WriteTable(
- *table, default_memory_pool(), this->sink_, 512, default_writer_properties()));
+ ASSERT_OK_NO_THROW(WriteTable(*table, default_memory_pool(), this->sink_, 512,
+ default_writer_properties()));
this->ReadAndCheckSingleColumnTable(values);
}
@@ -615,8 +617,8 @@ TYPED_TEST(TestParquetIO, SingleColumnTableRequiredChunkedWriteArrowIO) {
{
// BufferOutputStream closed on gc
auto arrow_sink_ = std::make_shared<::arrow::io::BufferOutputStream>(buffer);
- ASSERT_OK_NO_THROW(WriteTable(
- *table, default_memory_pool(), arrow_sink_, 512, default_writer_properties()));
+ ASSERT_OK_NO_THROW(WriteTable(*table, default_memory_pool(), arrow_sink_, 512,
+ default_writer_properties()));
// XXX: Remove this after ARROW-455 completed
ASSERT_OK(arrow_sink_->Close());
@@ -664,7 +666,7 @@ TYPED_TEST(TestParquetIO, SingleColumnTableOptionalChunkedWrite) {
std::shared_ptr<Table> table = MakeSimpleTable(values, true);
this->sink_ = std::make_shared<InMemoryOutputStream>();
ASSERT_OK_NO_THROW(WriteTable(*table, ::arrow::default_memory_pool(), this->sink_, 512,
- default_writer_properties()));
+ default_writer_properties()));
this->ReadAndCheckSingleColumnTable(values);
}
@@ -713,8 +715,8 @@ TEST_F(TestInt96ParquetIO, ReadIntoTimestamp) {
rg_writer->Close();
writer->Close();
- ::arrow::TimestampBuilder builder(
- default_memory_pool(), ::arrow::timestamp(TimeUnit::NANO));
+ ::arrow::TimestampBuilder builder(default_memory_pool(),
+ ::arrow::timestamp(TimeUnit::NANO));
ASSERT_OK(builder.Append(val));
std::shared_ptr<Array> values;
ASSERT_OK(builder.Finish(&values));
@@ -777,8 +779,8 @@ TEST_F(TestUInt32ParquetIO, Parquet_1_0_Compability) {
const int32_t kOffset = 0;
ASSERT_OK(MakePrimitiveArray(std::make_shared<::arrow::Int64Type>(), values->length(),
- int64_data, values->null_bitmap(), values->null_count(), kOffset,
- &expected_values));
+ int64_data, values->null_bitmap(), values->null_count(),
+ kOffset, &expected_values));
this->ReadAndCheckSingleColumnTable(expected_values);
}
@@ -794,7 +796,7 @@ TEST_F(TestStringParquetIO, EmptyStringColumnRequiredWrite) {
std::shared_ptr<Table> table = MakeSimpleTable(values, false);
this->sink_ = std::make_shared<InMemoryOutputStream>();
ASSERT_OK_NO_THROW(WriteTable(*table, ::arrow::default_memory_pool(), this->sink_,
- values->length(), default_writer_properties()));
+ values->length(), default_writer_properties()));
std::shared_ptr<Table> out;
std::unique_ptr<FileReader> reader;
@@ -815,7 +817,7 @@ TEST_F(TestNullParquetIO, NullColumn) {
std::shared_ptr<Table> table = MakeSimpleTable(values, true);
this->sink_ = std::make_shared<InMemoryOutputStream>();
ASSERT_OK_NO_THROW(WriteTable(*table, ::arrow::default_memory_pool(), this->sink_,
- values->length(), default_writer_properties()));
+ values->length(), default_writer_properties()));
std::shared_ptr<Table> out;
std::unique_ptr<FileReader> reader;
@@ -847,16 +849,16 @@ class TestPrimitiveParquetIO : public TestParquetIO<TestType> {
public:
typedef typename c_type_trait<TestType>::ArrowCType T;
- void MakeTestFile(
- std::vector<T>& values, int num_chunks, std::unique_ptr<FileReader>* reader) {
+ void MakeTestFile(std::vector<T>& values, int num_chunks,
+ std::unique_ptr<FileReader>* reader) {
TestType dummy;
std::shared_ptr<GroupNode> schema = MakeSimpleSchema(dummy, Repetition::REQUIRED);
std::unique_ptr<ParquetFileWriter> file_writer = this->MakeWriter(schema);
size_t chunk_size = values.size() / num_chunks;
// Convert to Parquet's expected physical type
- std::vector<uint8_t> values_buffer(
- sizeof(ParquetCDataType<TestType>) * values.size());
+ std::vector<uint8_t> values_buffer(sizeof(ParquetCDataType<TestType>) *
+ values.size());
auto values_parquet =
reinterpret_cast<ParquetCDataType<TestType>*>(values_buffer.data());
std::copy(values.cbegin(), values.cend(), values_parquet);
@@ -901,8 +903,9 @@ class TestPrimitiveParquetIO : public TestParquetIO<TestType> {
};
typedef ::testing::Types<::arrow::BooleanType, ::arrow::UInt8Type, ::arrow::Int8Type,
- ::arrow::UInt16Type, ::arrow::Int16Type, ::arrow::UInt32Type, ::arrow::Int32Type,
- ::arrow::UInt64Type, ::arrow::Int64Type, ::arrow::FloatType, ::arrow::DoubleType>
+ ::arrow::UInt16Type, ::arrow::Int16Type, ::arrow::UInt32Type,
+ ::arrow::Int32Type, ::arrow::UInt64Type, ::arrow::Int64Type,
+ ::arrow::FloatType, ::arrow::DoubleType>
PrimitiveTestTypes;
TYPED_TEST_CASE(TestPrimitiveParquetIO, PrimitiveTestTypes);
@@ -942,23 +945,23 @@ void MakeDateTimeTypesTable(std::shared_ptr<Table>* out, bool nanos_as_micros =
auto f5 = field("f5", ::arrow::time64(TimeUnit::MICRO));
std::shared_ptr<::arrow::Schema> schema(new ::arrow::Schema({f0, f1, f2, f3, f4, f5}));
- std::vector<int32_t> t32_values = {
- 1489269000, 1489270000, 1489271000, 1489272000, 1489272000, 1489273000};
+ std::vector<int32_t> t32_values = {1489269000, 1489270000, 1489271000,
+ 1489272000, 1489272000, 1489273000};
std::vector<int64_t> t64_values = {1489269000000, 1489270000000, 1489271000000,
- 1489272000000, 1489272000000, 1489273000000};
- std::vector<int64_t> t64_us_values = {
- 1489269000, 1489270000, 1489271000, 1489272000, 1489272000, 1489273000};
+ 1489272000000, 1489272000000, 1489273000000};
+ std::vector<int64_t> t64_us_values = {1489269000, 1489270000, 1489271000,
+ 1489272000, 1489272000, 1489273000};
std::shared_ptr<Array> a0, a1, a2, a3, a4, a5;
ArrayFromVector<::arrow::Date32Type, int32_t>(f0->type(), is_valid, t32_values, &a0);
ArrayFromVector<::arrow::TimestampType, int64_t>(f1->type(), is_valid, t64_values, &a1);
ArrayFromVector<::arrow::TimestampType, int64_t>(f2->type(), is_valid, t64_values, &a2);
if (nanos_as_micros) {
- ArrayFromVector<::arrow::TimestampType, int64_t>(
- f3->type(), is_valid, t64_us_values, &a3);
+ ArrayFromVector<::arrow::TimestampType, int64_t>(f3->type(), is_valid, t64_us_values,
+ &a3);
} else {
- ArrayFromVector<::arrow::TimestampType, int64_t>(
- f3->type(), is_valid, t64_values, &a3);
+ ArrayFromVector<::arrow::TimestampType, int64_t>(f3->type(), is_valid, t64_values,
+ &a3);
}
ArrayFromVector<::arrow::Time32Type, int32_t>(f4->type(), is_valid, t32_values, &a4);
ArrayFromVector<::arrow::Time64Type, int64_t>(f5->type(), is_valid, t64_values, &a5);
@@ -976,7 +979,8 @@ TEST(TestArrowReadWrite, DateTimeTypes) {
// Use deprecated INT96 type
std::shared_ptr<Table> result;
- DoSimpleRoundtrip(table, 1, table->num_rows(), {}, &result,
+ DoSimpleRoundtrip(
+ table, 1, table->num_rows(), {}, &result,
ArrowWriterProperties::Builder().enable_deprecated_int96_timestamps()->build());
ASSERT_TRUE(table->Equals(*result));
@@ -999,7 +1003,7 @@ TEST(TestArrowReadWrite, ConvertedDateTimeTypes) {
std::shared_ptr<::arrow::Schema> schema(new ::arrow::Schema({f0, f1}));
std::vector<int64_t> a0_values = {1489190400000, 1489276800000, 1489363200000,
- 1489449600000, 1489536000000, 1489622400000};
+ 1489449600000, 1489536000000, 1489622400000};
std::vector<int32_t> a1_values = {0, 1, 2, 3, 4, 5};
std::shared_ptr<Array> a0, a1, x0, x1;
@@ -1030,8 +1034,8 @@ TEST(TestArrowReadWrite, ConvertedDateTimeTypes) {
ASSERT_TRUE(result->Equals(*ex_table));
}
-void MakeDoubleTable(
- int num_columns, int num_rows, int nchunks, std::shared_ptr<Table>* out) {
+void MakeDoubleTable(int num_columns, int num_rows, int nchunks,
+ std::shared_ptr<Table>* out) {
std::shared_ptr<::arrow::Column> column;
std::vector<std::shared_ptr<::arrow::Column>> columns(num_columns);
std::vector<std::shared_ptr<::arrow::Field>> fields(num_columns);
@@ -1039,8 +1043,8 @@ void MakeDoubleTable(
for (int i = 0; i < num_columns; ++i) {
std::vector<std::shared_ptr<Array>> arrays;
std::shared_ptr<Array> values;
- ASSERT_OK(NullableArray<::arrow::DoubleType>(
- num_rows, num_rows / 10, static_cast<uint32_t>(i), &values));
+ ASSERT_OK(NullableArray<::arrow::DoubleType>(num_rows, num_rows / 10,
+ static_cast<uint32_t>(i), &values));
std::stringstream ss;
ss << "col" << i;
@@ -1081,9 +1085,9 @@ TEST(TestArrowReadWrite, ReadSingleRowGroup) {
WriteTableToBuffer(table, 1, num_rows / 2, default_arrow_writer_properties(), &buffer);
std::unique_ptr<FileReader> reader;
- ASSERT_OK_NO_THROW(
- OpenFile(std::make_shared<BufferReader>(buffer), ::arrow::default_memory_pool(),
- ::parquet::default_reader_properties(), nullptr, &reader));
+ ASSERT_OK_NO_THROW(OpenFile(std::make_shared<BufferReader>(buffer),
+ ::arrow::default_memory_pool(),
+ ::parquet::default_reader_properties(), nullptr, &reader));
ASSERT_EQ(2, reader->num_row_groups());
@@ -1131,8 +1135,8 @@ TEST(TestArrowWrite, CheckChunkSize) {
auto sink = std::make_shared<InMemoryOutputStream>();
- ASSERT_RAISES(
- Invalid, WriteTable(*table, ::arrow::default_memory_pool(), sink, chunk_size));
+ ASSERT_RAISES(Invalid,
+ WriteTable(*table, ::arrow::default_memory_pool(), sink, chunk_size));
}
class TestNestedSchemaRead : public ::testing::TestWithParam<Repetition::type> {
@@ -1145,13 +1149,13 @@ class TestNestedSchemaRead : public ::testing::TestWithParam<Repetition::type> {
std::shared_ptr<Buffer> buffer = nested_parquet_->GetBuffer();
ASSERT_OK_NO_THROW(
OpenFile(std::make_shared<BufferReader>(buffer), ::arrow::default_memory_pool(),
- ::parquet::default_reader_properties(), nullptr, &reader_));
+ ::parquet::default_reader_properties(), nullptr, &reader_));
}
void InitNewParquetFile(const std::shared_ptr<GroupNode>& schema, int num_rows) {
nested_parquet_ = std::make_shared<InMemoryOutputStream>();
- writer_ = parquet::ParquetFileWriter::Open(
- nested_parquet_, schema, default_writer_properties());
+ writer_ = parquet::ParquetFileWriter::Open(nested_parquet_, schema,
+ default_writer_properties());
row_group_writer_ = writer_->AppendRowGroup(num_rows);
}
@@ -1166,8 +1170,8 @@ class TestNestedSchemaRead : public ::testing::TestWithParam<Repetition::type> {
values_array_ = std::dynamic_pointer_cast<::arrow::Int32Array>(arr);
}
- void WriteColumnData(
- size_t num_rows, int16_t* def_levels, int16_t* rep_levels, int32_t* values) {
+ void WriteColumnData(size_t num_rows, int16_t* def_levels, int16_t* rep_levels,
+ int32_t* values) {
auto typed_writer =
static_cast<TypedColumnWriter<Int32Type>*>(row_group_writer_->NextColumn());
typed_writer->WriteBatch(num_rows, def_levels, rep_levels, values);
@@ -1179,7 +1183,9 @@ class TestNestedSchemaRead : public ::testing::TestWithParam<Repetition::type> {
// Also independently count the nulls
auto local_null_count = 0;
for (int i = 0; i < array.length(); i++) {
- if (array.IsNull(i)) { local_null_count++; }
+ if (array.IsNull(i)) {
+ local_null_count++;
+ }
}
ASSERT_EQ(local_null_count, expected_nulls);
}
@@ -1189,7 +1195,9 @@ class TestNestedSchemaRead : public ::testing::TestWithParam<Repetition::type> {
int j = 0;
for (int i = 0; i < values_array_->length(); i++) {
- if (array.IsNull(i)) { continue; }
+ if (array.IsNull(i)) {
+ continue;
+ }
ASSERT_EQ(array.Value(i), values_array_->Value(j));
j++;
}
@@ -1219,9 +1227,10 @@ class TestNestedSchemaRead : public ::testing::TestWithParam<Repetition::type> {
// }
// required int32 leaf3;
- parquet_fields.push_back(GroupNode::Make("group1", struct_repetition,
+ parquet_fields.push_back(GroupNode::Make(
+ "group1", struct_repetition,
{PrimitiveNode::Make("leaf1", Repetition::REQUIRED, ParquetType::INT32),
- PrimitiveNode::Make("leaf2", Repetition::OPTIONAL, ParquetType::INT32)}));
+ PrimitiveNode::Make("leaf2", Repetition::OPTIONAL, ParquetType::INT32)}));
parquet_fields.push_back(
PrimitiveNode::Make("leaf3", Repetition::REQUIRED, ParquetType::INT32));
@@ -1252,33 +1261,34 @@ class TestNestedSchemaRead : public ::testing::TestWithParam<Repetition::type> {
int32_t* values = reinterpret_cast<int32_t*>(values_array_->values()->mutable_data());
// Create the actual parquet file
- InitNewParquetFile(
- std::static_pointer_cast<GroupNode>(schema_node), NUM_SIMPLE_TEST_ROWS);
+ InitNewParquetFile(std::static_pointer_cast<GroupNode>(schema_node),
+ NUM_SIMPLE_TEST_ROWS);
// leaf1 column
- WriteColumnData(
- NUM_SIMPLE_TEST_ROWS, leaf1_def_levels.data(), rep_levels.data(), values);
+ WriteColumnData(NUM_SIMPLE_TEST_ROWS, leaf1_def_levels.data(), rep_levels.data(),
+ values);
// leaf2 column
- WriteColumnData(
- NUM_SIMPLE_TEST_ROWS, leaf2_def_levels.data(), rep_levels.data(), values);
+ WriteColumnData(NUM_SIMPLE_TEST_ROWS, leaf2_def_levels.data(), rep_levels.data(),
+ values);
// leaf3 column
- WriteColumnData(
- NUM_SIMPLE_TEST_ROWS, leaf3_def_levels.data(), rep_levels.data(), values);
+ WriteColumnData(NUM_SIMPLE_TEST_ROWS, leaf3_def_levels.data(), rep_levels.data(),
+ values);
FinalizeParquetFile();
InitReader();
}
NodePtr CreateSingleTypedNestedGroup(int index, int depth, int num_children,
- Repetition::type node_repetition, ParquetType::type leaf_type) {
+ Repetition::type node_repetition,
+ ParquetType::type leaf_type) {
std::vector<NodePtr> children;
for (int i = 0; i < num_children; i++) {
if (depth <= 1) {
children.push_back(PrimitiveNode::Make("leaf", node_repetition, leaf_type));
} else {
- children.push_back(CreateSingleTypedNestedGroup(
- i, depth - 1, num_children, node_repetition, leaf_type));
+ children.push_back(CreateSingleTypedNestedGroup(i, depth - 1, num_children,
+ node_repetition, leaf_type));
}
}
@@ -1289,7 +1299,7 @@ class TestNestedSchemaRead : public ::testing::TestWithParam<Repetition::type> {
// A deeply nested schema
void CreateMultiLevelNestedParquet(int num_trees, int tree_depth, int num_children,
- int num_rows, Repetition::type node_repetition) {
+ int num_rows, Repetition::type node_repetition) {
// Create the schema
std::vector<NodePtr> parquet_fields;
for (int i = 0; i < num_trees; i++) {
@@ -1327,8 +1337,8 @@ class TestNestedSchemaRead : public ::testing::TestWithParam<Repetition::type> {
class DeepParquetTestVisitor : public ArrayVisitor {
public:
- DeepParquetTestVisitor(
- Repetition::type node_repetition, std::shared_ptr<::arrow::Int32Array> expected)
+ DeepParquetTestVisitor(Repetition::type node_repetition,
+ std::shared_ptr<::arrow::Int32Array> expected)
: node_repetition_(node_repetition), expected_(expected) {}
Status Validate(std::shared_ptr<Array> tree) { return tree->Accept(this); }
@@ -1475,7 +1485,7 @@ TEST_P(TestNestedSchemaRead, DeepNestedSchemaRead) {
}
INSTANTIATE_TEST_CASE_P(Repetition_type, TestNestedSchemaRead,
- ::testing::Values(Repetition::REQUIRED, Repetition::OPTIONAL));
+ ::testing::Values(Repetition::REQUIRED, Repetition::OPTIONAL));
TEST(TestImpalaConversion, NanosecondToImpala) {
// June 20, 2017 16:32:56 and 123456789 nanoseconds
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/arrow/arrow-schema-test.cc
----------------------------------------------------------------------
diff --git a/src/parquet/arrow/arrow-schema-test.cc b/src/parquet/arrow/arrow-schema-test.cc
index 34d4813..22e3adb 100644
--- a/src/parquet/arrow/arrow-schema-test.cc
+++ b/src/parquet/arrow/arrow-schema-test.cc
@@ -72,14 +72,15 @@ class TestConvertParquetSchema : public ::testing::Test {
return FromParquetSchema(&descr_, &result_schema_);
}
- ::arrow::Status ConvertSchema(
- const std::vector<NodePtr>& nodes, const std::vector<int>& column_indices) {
+ ::arrow::Status ConvertSchema(const std::vector<NodePtr>& nodes,
+ const std::vector<int>& column_indices) {
NodePtr schema = GroupNode::Make("schema", Repetition::REPEATED, nodes);
descr_.Init(schema);
return FromParquetSchema(&descr_, column_indices, &result_schema_);
}
- ::arrow::Status ConvertSchema(const std::vector<NodePtr>& nodes,
+ ::arrow::Status ConvertSchema(
+ const std::vector<NodePtr>& nodes,
const std::shared_ptr<const KeyValueMetadata>& key_value_metadata) {
NodePtr schema = GroupNode::Make("schema", Repetition::REPEATED, nodes);
descr_.Init(schema);
@@ -108,15 +109,17 @@ TEST_F(TestConvertParquetSchema, ParquetFlatPrimitives) {
arrow_fields.push_back(std::make_shared<Field>("int64", INT64, false));
parquet_fields.push_back(PrimitiveNode::Make("timestamp", Repetition::REQUIRED,
- ParquetType::INT64, LogicalType::TIMESTAMP_MILLIS));
+ ParquetType::INT64,
+ LogicalType::TIMESTAMP_MILLIS));
arrow_fields.push_back(std::make_shared<Field>("timestamp", TIMESTAMP_MS, false));
parquet_fields.push_back(PrimitiveNode::Make("timestamp[us]", Repetition::REQUIRED,
- ParquetType::INT64, LogicalType::TIMESTAMP_MICROS));
+ ParquetType::INT64,
+ LogicalType::TIMESTAMP_MICROS));
arrow_fields.push_back(std::make_shared<Field>("timestamp[us]", TIMESTAMP_US, false));
- parquet_fields.push_back(PrimitiveNode::Make(
- "date", Repetition::REQUIRED, ParquetType::INT32, LogicalType::DATE));
+ parquet_fields.push_back(PrimitiveNode::Make("date", Repetition::REQUIRED,
+ ParquetType::INT32, LogicalType::DATE));
arrow_fields.push_back(std::make_shared<Field>("date", ::arrow::date32(), false));
parquet_fields.push_back(PrimitiveNode::Make(
@@ -150,7 +153,8 @@ TEST_F(TestConvertParquetSchema, ParquetFlatPrimitives) {
arrow_fields.push_back(std::make_shared<Field>("string", UTF8));
parquet_fields.push_back(PrimitiveNode::Make("flba-binary", Repetition::OPTIONAL,
- ParquetType::FIXED_LEN_BYTE_ARRAY, LogicalType::NONE, 12));
+ ParquetType::FIXED_LEN_BYTE_ARRAY,
+ LogicalType::NONE, 12));
arrow_fields.push_back(
std::make_shared<Field>("flba-binary", ::arrow::fixed_size_binary(12)));
@@ -204,19 +208,23 @@ TEST_F(TestConvertParquetSchema, ParquetFlatDecimals) {
std::vector<std::shared_ptr<Field>> arrow_fields;
parquet_fields.push_back(PrimitiveNode::Make("flba-decimal", Repetition::OPTIONAL,
- ParquetType::FIXED_LEN_BYTE_ARRAY, LogicalType::DECIMAL, 4, 8, 4));
+ ParquetType::FIXED_LEN_BYTE_ARRAY,
+ LogicalType::DECIMAL, 4, 8, 4));
arrow_fields.push_back(std::make_shared<Field>("flba-decimal", DECIMAL_8_4));
parquet_fields.push_back(PrimitiveNode::Make("binary-decimal", Repetition::OPTIONAL,
- ParquetType::BYTE_ARRAY, LogicalType::DECIMAL, -1, 8, 4));
+ ParquetType::BYTE_ARRAY,
+ LogicalType::DECIMAL, -1, 8, 4));
arrow_fields.push_back(std::make_shared<Field>("binary-decimal", DECIMAL_8_4));
parquet_fields.push_back(PrimitiveNode::Make("int32-decimal", Repetition::OPTIONAL,
- ParquetType::INT32, LogicalType::DECIMAL, -1, 8, 4));
+ ParquetType::INT32, LogicalType::DECIMAL,
+ -1, 8, 4));
arrow_fields.push_back(std::make_shared<Field>("int32-decimal", DECIMAL_8_4));
parquet_fields.push_back(PrimitiveNode::Make("int64-decimal", Repetition::OPTIONAL,
- ParquetType::INT64, LogicalType::DECIMAL, -1, 8, 4));
+ ParquetType::INT64, LogicalType::DECIMAL,
+ -1, 8, 4));
arrow_fields.push_back(std::make_shared<Field>("int64-decimal", DECIMAL_8_4));
auto arrow_schema = std::make_shared<::arrow::Schema>(arrow_fields);
@@ -238,8 +246,8 @@ TEST_F(TestConvertParquetSchema, ParquetLists) {
// }
// }
{
- auto element = PrimitiveNode::Make(
- "string", Repetition::OPTIONAL, ParquetType::BYTE_ARRAY, LogicalType::UTF8);
+ auto element = PrimitiveNode::Make("string", Repetition::OPTIONAL,
+ ParquetType::BYTE_ARRAY, LogicalType::UTF8);
auto list = GroupNode::Make("list", Repetition::REPEATED, {element});
parquet_fields.push_back(
GroupNode::Make("my_list", Repetition::REQUIRED, {list}, LogicalType::LIST));
@@ -255,8 +263,8 @@ TEST_F(TestConvertParquetSchema, ParquetLists) {
// }
// }
{
- auto element = PrimitiveNode::Make(
- "string", Repetition::REQUIRED, ParquetType::BYTE_ARRAY, LogicalType::UTF8);
+ auto element = PrimitiveNode::Make("string", Repetition::REQUIRED,
+ ParquetType::BYTE_ARRAY, LogicalType::UTF8);
auto list = GroupNode::Make("list", Repetition::REPEATED, {element});
parquet_fields.push_back(
GroupNode::Make("my_list", Repetition::OPTIONAL, {list}, LogicalType::LIST));
@@ -284,8 +292,8 @@ TEST_F(TestConvertParquetSchema, ParquetLists) {
auto element =
GroupNode::Make("element", Repetition::REQUIRED, {inner_list}, LogicalType::LIST);
auto list = GroupNode::Make("list", Repetition::REPEATED, {element});
- parquet_fields.push_back(GroupNode::Make(
- "array_of_arrays", Repetition::OPTIONAL, {list}, LogicalType::LIST));
+ parquet_fields.push_back(GroupNode::Make("array_of_arrays", Repetition::OPTIONAL,
+ {list}, LogicalType::LIST));
auto arrow_inner_element = std::make_shared<Field>("int32", INT32, false);
auto arrow_inner_list = std::make_shared<::arrow::ListType>(arrow_inner_element);
auto arrow_element = std::make_shared<Field>("element", arrow_inner_list, false);
@@ -300,8 +308,8 @@ TEST_F(TestConvertParquetSchema, ParquetLists) {
// };
// }
{
- auto element = PrimitiveNode::Make(
- "str", Repetition::REQUIRED, ParquetType::BYTE_ARRAY, LogicalType::UTF8);
+ auto element = PrimitiveNode::Make("str", Repetition::REQUIRED,
+ ParquetType::BYTE_ARRAY, LogicalType::UTF8);
auto list = GroupNode::Make("element", Repetition::REPEATED, {element});
parquet_fields.push_back(
GroupNode::Make("my_list", Repetition::OPTIONAL, {list}, LogicalType::LIST));
@@ -332,8 +340,8 @@ TEST_F(TestConvertParquetSchema, ParquetLists) {
// };
// }
{
- auto str_element = PrimitiveNode::Make(
- "str", Repetition::REQUIRED, ParquetType::BYTE_ARRAY, LogicalType::UTF8);
+ auto str_element = PrimitiveNode::Make("str", Repetition::REQUIRED,
+ ParquetType::BYTE_ARRAY, LogicalType::UTF8);
auto num_element =
PrimitiveNode::Make("num", Repetition::REQUIRED, ParquetType::INT32);
auto element =
@@ -357,8 +365,8 @@ TEST_F(TestConvertParquetSchema, ParquetLists) {
// }
// Special case: group is named array
{
- auto element = PrimitiveNode::Make(
- "str", Repetition::REQUIRED, ParquetType::BYTE_ARRAY, LogicalType::UTF8);
+ auto element = PrimitiveNode::Make("str", Repetition::REQUIRED,
+ ParquetType::BYTE_ARRAY, LogicalType::UTF8);
auto array = GroupNode::Make("array", Repetition::REPEATED, {element});
parquet_fields.push_back(
GroupNode::Make("my_list", Repetition::OPTIONAL, {array}, LogicalType::LIST));
@@ -378,8 +386,8 @@ TEST_F(TestConvertParquetSchema, ParquetLists) {
// }
// Special case: group named ends in _tuple
{
- auto element = PrimitiveNode::Make(
- "str", Repetition::REQUIRED, ParquetType::BYTE_ARRAY, LogicalType::UTF8);
+ auto element = PrimitiveNode::Make("str", Repetition::REQUIRED,
+ ParquetType::BYTE_ARRAY, LogicalType::UTF8);
auto array = GroupNode::Make("my_list_tuple", Repetition::REPEATED, {element});
parquet_fields.push_back(
GroupNode::Make("my_list", Repetition::OPTIONAL, {array}, LogicalType::LIST));
@@ -425,14 +433,15 @@ TEST_F(TestConvertParquetSchema, ParquetNestedSchema) {
// }
// required int64 leaf3;
{
- parquet_fields.push_back(GroupNode::Make("group1", Repetition::REQUIRED,
+ parquet_fields.push_back(GroupNode::Make(
+ "group1", Repetition::REQUIRED,
{PrimitiveNode::Make("leaf1", Repetition::REQUIRED, ParquetType::BOOLEAN),
- PrimitiveNode::Make("leaf2", Repetition::REQUIRED, ParquetType::INT32)}));
+ PrimitiveNode::Make("leaf2", Repetition::REQUIRED, ParquetType::INT32)}));
parquet_fields.push_back(
PrimitiveNode::Make("leaf3", Repetition::REQUIRED, ParquetType::INT64));
auto group1_fields = {std::make_shared<Field>("leaf1", BOOL, false),
- std::make_shared<Field>("leaf2", INT32, false)};
+ std::make_shared<Field>("leaf2", INT32, false)};
auto arrow_group1_type = std::make_shared<::arrow::StructType>(group1_fields);
arrow_fields.push_back(std::make_shared<Field>("group1", arrow_group1_type, false));
arrow_fields.push_back(std::make_shared<Field>("leaf3", INT64, false));
@@ -468,12 +477,14 @@ TEST_F(TestConvertParquetSchema, ParquetNestedSchemaPartial) {
// }
// required int64 leaf5;
{
- parquet_fields.push_back(GroupNode::Make("group1", Repetition::REQUIRED,
+ parquet_fields.push_back(GroupNode::Make(
+ "group1", Repetition::REQUIRED,
{PrimitiveNode::Make("leaf1", Repetition::REQUIRED, ParquetType::INT64),
- PrimitiveNode::Make("leaf2", Repetition::REQUIRED, ParquetType::INT64)}));
- parquet_fields.push_back(GroupNode::Make("group2", Repetition::REQUIRED,
+ PrimitiveNode::Make("leaf2", Repetition::REQUIRED, ParquetType::INT64)}));
+ parquet_fields.push_back(GroupNode::Make(
+ "group2", Repetition::REQUIRED,
{PrimitiveNode::Make("leaf3", Repetition::REQUIRED, ParquetType::INT64),
- PrimitiveNode::Make("leaf4", Repetition::REQUIRED, ParquetType::INT64)}));
+ PrimitiveNode::Make("leaf4", Repetition::REQUIRED, ParquetType::INT64)}));
parquet_fields.push_back(
PrimitiveNode::Make("leaf5", Repetition::REQUIRED, ParquetType::INT64));
@@ -517,12 +528,14 @@ TEST_F(TestConvertParquetSchema, ParquetNestedSchemaPartialOrdering) {
// required int64 leaf1;
// }
{
- parquet_fields.push_back(GroupNode::Make("group1", Repetition::REQUIRED,
+ parquet_fields.push_back(GroupNode::Make(
+ "group1", Repetition::REQUIRED,
{PrimitiveNode::Make("leaf1", Repetition::REQUIRED, ParquetType::INT64),
- PrimitiveNode::Make("leaf2", Repetition::REQUIRED, ParquetType::INT64)}));
- parquet_fields.push_back(GroupNode::Make("group2", Repetition::REQUIRED,
+ PrimitiveNode::Make("leaf2", Repetition::REQUIRED, ParquetType::INT64)}));
+ parquet_fields.push_back(GroupNode::Make(
+ "group2", Repetition::REQUIRED,
{PrimitiveNode::Make("leaf3", Repetition::REQUIRED, ParquetType::INT64),
- PrimitiveNode::Make("leaf4", Repetition::REQUIRED, ParquetType::INT64)}));
+ PrimitiveNode::Make("leaf4", Repetition::REQUIRED, ParquetType::INT64)}));
parquet_fields.push_back(
PrimitiveNode::Make("leaf5", Repetition::REQUIRED, ParquetType::INT64));
@@ -554,22 +567,25 @@ TEST_F(TestConvertParquetSchema, ParquetRepeatedNestedSchema) {
// }
parquet_fields.push_back(
PrimitiveNode::Make("leaf1", Repetition::OPTIONAL, ParquetType::INT32));
- parquet_fields.push_back(GroupNode::Make("outerGroup", Repetition::REPEATED,
+ parquet_fields.push_back(GroupNode::Make(
+ "outerGroup", Repetition::REPEATED,
{PrimitiveNode::Make("leaf2", Repetition::OPTIONAL, ParquetType::INT32),
- GroupNode::Make("innerGroup", Repetition::REPEATED,
- {PrimitiveNode::Make(
- "leaf3", Repetition::OPTIONAL, ParquetType::INT32)})}));
+ GroupNode::Make(
+ "innerGroup", Repetition::REPEATED,
+ {PrimitiveNode::Make("leaf3", Repetition::OPTIONAL, ParquetType::INT32)})}));
auto inner_group_fields = {std::make_shared<Field>("leaf3", INT32, true)};
auto inner_group_type = std::make_shared<::arrow::StructType>(inner_group_fields);
- auto outer_group_fields = {std::make_shared<Field>("leaf2", INT32, true),
- std::make_shared<Field>("innerGroup",
- ::arrow::list(std::make_shared<Field>("innerGroup", inner_group_type, false)),
- false)};
+ auto outer_group_fields = {
+ std::make_shared<Field>("leaf2", INT32, true),
+ std::make_shared<Field>("innerGroup", ::arrow::list(std::make_shared<Field>(
+ "innerGroup", inner_group_type, false)),
+ false)};
auto outer_group_type = std::make_shared<::arrow::StructType>(outer_group_fields);
arrow_fields.push_back(std::make_shared<Field>("leaf1", INT32, true));
- arrow_fields.push_back(std::make_shared<Field>("outerGroup",
+ arrow_fields.push_back(std::make_shared<Field>(
+ "outerGroup",
::arrow::list(std::make_shared<Field>("outerGroup", outer_group_type, false)),
false));
}
@@ -626,20 +642,22 @@ TEST_F(TestConvertArrowSchema, ParquetFlatPrimitives) {
PrimitiveNode::Make("int64", Repetition::REQUIRED, ParquetType::INT64));
arrow_fields.push_back(std::make_shared<Field>("int64", INT64, false));
- parquet_fields.push_back(PrimitiveNode::Make(
- "date", Repetition::REQUIRED, ParquetType::INT32, LogicalType::DATE));
+ parquet_fields.push_back(PrimitiveNode::Make("date", Repetition::REQUIRED,
+ ParquetType::INT32, LogicalType::DATE));
arrow_fields.push_back(std::make_shared<Field>("date", ::arrow::date32(), false));
- parquet_fields.push_back(PrimitiveNode::Make(
- "date64", Repetition::REQUIRED, ParquetType::INT32, LogicalType::DATE));
+ parquet_fields.push_back(PrimitiveNode::Make("date64", Repetition::REQUIRED,
+ ParquetType::INT32, LogicalType::DATE));
arrow_fields.push_back(std::make_shared<Field>("date64", ::arrow::date64(), false));
parquet_fields.push_back(PrimitiveNode::Make("timestamp", Repetition::REQUIRED,
- ParquetType::INT64, LogicalType::TIMESTAMP_MILLIS));
+ ParquetType::INT64,
+ LogicalType::TIMESTAMP_MILLIS));
arrow_fields.push_back(std::make_shared<Field>("timestamp", TIMESTAMP_MS, false));
parquet_fields.push_back(PrimitiveNode::Make("timestamp[us]", Repetition::REQUIRED,
- ParquetType::INT64, LogicalType::TIMESTAMP_MICROS));
+ ParquetType::INT64,
+ LogicalType::TIMESTAMP_MICROS));
arrow_fields.push_back(std::make_shared<Field>("timestamp[us]", TIMESTAMP_US, false));
parquet_fields.push_back(
@@ -676,8 +694,8 @@ TEST_F(TestConvertArrowSchema, ParquetLists) {
// }
// }
{
- auto element = PrimitiveNode::Make(
- "string", Repetition::OPTIONAL, ParquetType::BYTE_ARRAY, LogicalType::UTF8);
+ auto element = PrimitiveNode::Make("string", Repetition::OPTIONAL,
+ ParquetType::BYTE_ARRAY, LogicalType::UTF8);
auto list = GroupNode::Make("list", Repetition::REPEATED, {element});
parquet_fields.push_back(
GroupNode::Make("my_list", Repetition::REQUIRED, {list}, LogicalType::LIST));
@@ -693,8 +711,8 @@ TEST_F(TestConvertArrowSchema, ParquetLists) {
// }
// }
{
- auto element = PrimitiveNode::Make(
- "string", Repetition::REQUIRED, ParquetType::BYTE_ARRAY, LogicalType::UTF8);
+ auto element = PrimitiveNode::Make("string", Repetition::REQUIRED,
+ ParquetType::BYTE_ARRAY, LogicalType::UTF8);
auto list = GroupNode::Make("list", Repetition::REPEATED, {element});
parquet_fields.push_back(
GroupNode::Make("my_list", Repetition::OPTIONAL, {list}, LogicalType::LIST));