You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@parquet.apache.org by we...@apache.org on 2017/07/31 15:14:58 UTC
[1/5] parquet-cpp git commit: PARQUET-1068: Modify .clang-format to
use straight Google format with 90-character line width
Repository: parquet-cpp
Updated Branches:
refs/heads/master af96ff0fa -> b6f3caeb0
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/reader-test.cc
----------------------------------------------------------------------
diff --git a/src/parquet/reader-test.cc b/src/parquet/reader-test.cc
index cb40abb..cefa452 100644
--- a/src/parquet/reader-test.cc
+++ b/src/parquet/reader-test.cc
@@ -15,10 +15,10 @@
// specific language governing permissions and limitations
// under the License.
-#include <cstdint>
-#include <cstdlib>
#include <fcntl.h>
#include <gtest/gtest.h>
+#include <cstdint>
+#include <cstdlib>
#include <iostream>
#include <memory>
#include <string>
@@ -227,8 +227,8 @@ TEST_F(TestLocalFile, OpenWithMetadata) {
printer.DebugPrint(ss, columns, true);
// Make sure OpenFile passes on the external metadata, too
- auto reader2 = ParquetFileReader::OpenFile(
- alltypes_plain(), false, default_reader_properties(), metadata);
+ auto reader2 = ParquetFileReader::OpenFile(alltypes_plain(), false,
+ default_reader_properties(), metadata);
// Compare pointers
ASSERT_EQ(metadata.get(), reader2->metadata().get());
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/schema-test.cc
----------------------------------------------------------------------
diff --git a/src/parquet/schema-test.cc b/src/parquet/schema-test.cc
index 203a312..faacb76 100644
--- a/src/parquet/schema-test.cc
+++ b/src/parquet/schema-test.cc
@@ -41,7 +41,8 @@ using format::SchemaElement;
namespace schema {
static inline SchemaElement NewPrimitive(const std::string& name,
- FieldRepetitionType::type repetition, format::Type::type type, int id = 0) {
+ FieldRepetitionType::type repetition,
+ format::Type::type type, int id = 0) {
SchemaElement result;
result.__set_name(name);
result.__set_repetition_type(repetition);
@@ -52,7 +53,8 @@ static inline SchemaElement NewPrimitive(const std::string& name,
}
static inline SchemaElement NewGroup(const std::string& name,
- FieldRepetitionType::type repetition, int num_children, int id = 0) {
+ FieldRepetitionType::type repetition,
+ int num_children, int id = 0) {
SchemaElement result;
result.__set_name(name);
result.__set_repetition_type(repetition);
@@ -156,8 +158,8 @@ TEST_F(TestPrimitiveNode, FromParquet) {
ASSERT_EQ(LogicalType::UTF8, prim_node_->logical_type());
// FIXED_LEN_BYTE_ARRAY
- elt = NewPrimitive(
- name_, FieldRepetitionType::OPTIONAL, format::Type::FIXED_LEN_BYTE_ARRAY, 0);
+ elt = NewPrimitive(name_, FieldRepetitionType::OPTIONAL,
+ format::Type::FIXED_LEN_BYTE_ARRAY, 0);
elt.__set_type_length(16);
Convert(&elt);
@@ -168,8 +170,8 @@ TEST_F(TestPrimitiveNode, FromParquet) {
ASSERT_EQ(16, prim_node_->type_length());
// ConvertedType::Decimal
- elt = NewPrimitive(
- name_, FieldRepetitionType::OPTIONAL, format::Type::FIXED_LEN_BYTE_ARRAY, 0);
+ elt = NewPrimitive(name_, FieldRepetitionType::OPTIONAL,
+ format::Type::FIXED_LEN_BYTE_ARRAY, 0);
elt.__set_converted_type(ConvertedType::DECIMAL);
elt.__set_type_length(6);
elt.__set_scale(2);
@@ -197,21 +199,21 @@ TEST_F(TestPrimitiveNode, Equals) {
ASSERT_TRUE(node1.Equals(&node5));
PrimitiveNode flba1("foo", Repetition::REQUIRED, Type::FIXED_LEN_BYTE_ARRAY,
- LogicalType::DECIMAL, 12, 4, 2);
+ LogicalType::DECIMAL, 12, 4, 2);
PrimitiveNode flba2("foo", Repetition::REQUIRED, Type::FIXED_LEN_BYTE_ARRAY,
- LogicalType::DECIMAL, 1, 4, 2);
+ LogicalType::DECIMAL, 1, 4, 2);
flba2.SetTypeLength(12);
PrimitiveNode flba3("foo", Repetition::REQUIRED, Type::FIXED_LEN_BYTE_ARRAY,
- LogicalType::DECIMAL, 1, 4, 2);
+ LogicalType::DECIMAL, 1, 4, 2);
flba3.SetTypeLength(16);
PrimitiveNode flba4("foo", Repetition::REQUIRED, Type::FIXED_LEN_BYTE_ARRAY,
- LogicalType::DECIMAL, 12, 4, 0);
+ LogicalType::DECIMAL, 12, 4, 0);
PrimitiveNode flba5("foo", Repetition::REQUIRED, Type::FIXED_LEN_BYTE_ARRAY,
- LogicalType::NONE, 12, 4, 0);
+ LogicalType::NONE, 12, 4, 0);
ASSERT_TRUE(flba1.Equals(&flba2));
ASSERT_FALSE(flba1.Equals(&flba3));
@@ -222,52 +224,59 @@ TEST_F(TestPrimitiveNode, Equals) {
TEST_F(TestPrimitiveNode, PhysicalLogicalMapping) {
ASSERT_NO_THROW(
PrimitiveNode::Make("foo", Repetition::REQUIRED, Type::INT32, LogicalType::INT_32));
- ASSERT_NO_THROW(PrimitiveNode::Make(
- "foo", Repetition::REQUIRED, Type::BYTE_ARRAY, LogicalType::JSON));
+ ASSERT_NO_THROW(PrimitiveNode::Make("foo", Repetition::REQUIRED, Type::BYTE_ARRAY,
+ LogicalType::JSON));
ASSERT_THROW(
PrimitiveNode::Make("foo", Repetition::REQUIRED, Type::INT32, LogicalType::JSON),
ParquetException);
- ASSERT_NO_THROW(PrimitiveNode::Make(
- "foo", Repetition::REQUIRED, Type::INT64, LogicalType::TIMESTAMP_MILLIS));
+ ASSERT_NO_THROW(PrimitiveNode::Make("foo", Repetition::REQUIRED, Type::INT64,
+ LogicalType::TIMESTAMP_MILLIS));
ASSERT_THROW(
PrimitiveNode::Make("foo", Repetition::REQUIRED, Type::INT32, LogicalType::INT_64),
ParquetException);
- ASSERT_THROW(PrimitiveNode::Make(
- "foo", Repetition::REQUIRED, Type::BYTE_ARRAY, LogicalType::INT_8),
- ParquetException);
- ASSERT_THROW(PrimitiveNode::Make(
- "foo", Repetition::REQUIRED, Type::BYTE_ARRAY, LogicalType::INTERVAL),
- ParquetException);
- ASSERT_THROW(PrimitiveNode::Make("foo", Repetition::REQUIRED,
- Type::FIXED_LEN_BYTE_ARRAY, LogicalType::ENUM),
- ParquetException);
- ASSERT_NO_THROW(PrimitiveNode::Make(
- "foo", Repetition::REQUIRED, Type::BYTE_ARRAY, LogicalType::ENUM));
+ ASSERT_THROW(PrimitiveNode::Make("foo", Repetition::REQUIRED, Type::BYTE_ARRAY,
+ LogicalType::INT_8),
+ ParquetException);
+ ASSERT_THROW(PrimitiveNode::Make("foo", Repetition::REQUIRED, Type::BYTE_ARRAY,
+ LogicalType::INTERVAL),
+ ParquetException);
ASSERT_THROW(PrimitiveNode::Make("foo", Repetition::REQUIRED,
- Type::FIXED_LEN_BYTE_ARRAY, LogicalType::DECIMAL, 0, 2, 4),
+ Type::FIXED_LEN_BYTE_ARRAY, LogicalType::ENUM),
+ ParquetException);
+ ASSERT_NO_THROW(PrimitiveNode::Make("foo", Repetition::REQUIRED, Type::BYTE_ARRAY,
+ LogicalType::ENUM));
+ ASSERT_THROW(
+ PrimitiveNode::Make("foo", Repetition::REQUIRED, Type::FIXED_LEN_BYTE_ARRAY,
+ LogicalType::DECIMAL, 0, 2, 4),
ParquetException);
ASSERT_THROW(PrimitiveNode::Make("foo", Repetition::REQUIRED, Type::FLOAT,
- LogicalType::DECIMAL, 0, 2, 4),
- ParquetException);
- ASSERT_THROW(PrimitiveNode::Make("foo", Repetition::REQUIRED,
- Type::FIXED_LEN_BYTE_ARRAY, LogicalType::DECIMAL, 0, 4, 0),
+ LogicalType::DECIMAL, 0, 2, 4),
+ ParquetException);
+ ASSERT_THROW(
+ PrimitiveNode::Make("foo", Repetition::REQUIRED, Type::FIXED_LEN_BYTE_ARRAY,
+ LogicalType::DECIMAL, 0, 4, 0),
ParquetException);
- ASSERT_THROW(PrimitiveNode::Make("foo", Repetition::REQUIRED,
- Type::FIXED_LEN_BYTE_ARRAY, LogicalType::DECIMAL, 10, 0, 4),
+ ASSERT_THROW(
+ PrimitiveNode::Make("foo", Repetition::REQUIRED, Type::FIXED_LEN_BYTE_ARRAY,
+ LogicalType::DECIMAL, 10, 0, 4),
ParquetException);
- ASSERT_THROW(PrimitiveNode::Make("foo", Repetition::REQUIRED,
- Type::FIXED_LEN_BYTE_ARRAY, LogicalType::DECIMAL, 10, 4, -1),
+ ASSERT_THROW(
+ PrimitiveNode::Make("foo", Repetition::REQUIRED, Type::FIXED_LEN_BYTE_ARRAY,
+ LogicalType::DECIMAL, 10, 4, -1),
ParquetException);
- ASSERT_THROW(PrimitiveNode::Make("foo", Repetition::REQUIRED,
- Type::FIXED_LEN_BYTE_ARRAY, LogicalType::DECIMAL, 10, 2, 4),
+ ASSERT_THROW(
+ PrimitiveNode::Make("foo", Repetition::REQUIRED, Type::FIXED_LEN_BYTE_ARRAY,
+ LogicalType::DECIMAL, 10, 2, 4),
ParquetException);
ASSERT_NO_THROW(PrimitiveNode::Make("foo", Repetition::REQUIRED,
- Type::FIXED_LEN_BYTE_ARRAY, LogicalType::DECIMAL, 10, 6, 4));
+ Type::FIXED_LEN_BYTE_ARRAY, LogicalType::DECIMAL,
+ 10, 6, 4));
ASSERT_NO_THROW(PrimitiveNode::Make("foo", Repetition::REQUIRED,
- Type::FIXED_LEN_BYTE_ARRAY, LogicalType::INTERVAL, 12));
+ Type::FIXED_LEN_BYTE_ARRAY, LogicalType::INTERVAL,
+ 12));
ASSERT_THROW(PrimitiveNode::Make("foo", Repetition::REQUIRED,
- Type::FIXED_LEN_BYTE_ARRAY, LogicalType::INTERVAL, 10),
- ParquetException);
+ Type::FIXED_LEN_BYTE_ARRAY, LogicalType::INTERVAL, 10),
+ ParquetException);
}
// ----------------------------------------------------------------------
@@ -372,10 +381,14 @@ bool check_for_parent_consistency(const GroupNode* node) {
// Each node should have the group as parent
for (int i = 0; i < node->field_count(); i++) {
const NodePtr& field = node->field(i);
- if (field->parent() != node) { return false; }
+ if (field->parent() != node) {
+ return false;
+ }
if (field->is_group()) {
const GroupNode* group = static_cast<GroupNode*>(field.get());
- if (!check_for_parent_consistency(group)) { return false; }
+ if (!check_for_parent_consistency(group)) {
+ return false;
+ }
}
}
return true;
@@ -468,8 +481,8 @@ class TestSchemaFlatten : public ::testing::Test {
TEST_F(TestSchemaFlatten, DecimalMetadata) {
// Checks that DecimalMetadata is only set for DecimalTypes
- NodePtr node = PrimitiveNode::Make(
- "decimal", Repetition::REQUIRED, Type::INT64, LogicalType::DECIMAL, -1, 8, 4);
+ NodePtr node = PrimitiveNode::Make("decimal", Repetition::REQUIRED, Type::INT64,
+ LogicalType::DECIMAL, -1, 8, 4);
NodePtr group =
GroupNode::Make("group", Repetition::REPEATED, {node}, LogicalType::LIST);
Flatten(reinterpret_cast<GroupNode*>(group.get()));
@@ -526,8 +539,8 @@ TEST_F(TestSchemaFlatten, NestedExample) {
}
TEST(TestColumnDescriptor, TestAttrs) {
- NodePtr node = PrimitiveNode::Make(
- "name", Repetition::OPTIONAL, Type::BYTE_ARRAY, LogicalType::UTF8);
+ NodePtr node = PrimitiveNode::Make("name", Repetition::OPTIONAL, Type::BYTE_ARRAY,
+ LogicalType::UTF8);
ColumnDescriptor descr(node, 4, 1);
ASSERT_EQ("name", descr.name());
@@ -540,7 +553,7 @@ TEST(TestColumnDescriptor, TestAttrs) {
// Test FIXED_LEN_BYTE_ARRAY
node = PrimitiveNode::Make("name", Repetition::OPTIONAL, Type::FIXED_LEN_BYTE_ARRAY,
- LogicalType::DECIMAL, 12, 10, 4);
+ LogicalType::DECIMAL, 12, 10, 4);
descr = ColumnDescriptor(node, 4, 1);
ASSERT_EQ(Type::FIXED_LEN_BYTE_ARRAY, descr.physical_type());
@@ -572,8 +585,8 @@ TEST_F(TestSchemaDescriptor, Equals) {
NodePtr item1 = Int64("item1", Repetition::REQUIRED);
NodePtr item2 = Boolean("item2", Repetition::OPTIONAL);
NodePtr item3 = Int32("item3", Repetition::REPEATED);
- NodePtr list(GroupNode::Make(
- "records", Repetition::REPEATED, {item1, item2, item3}, LogicalType::LIST));
+ NodePtr list(GroupNode::Make("records", Repetition::REPEATED, {item1, item2, item3},
+ LogicalType::LIST));
NodePtr bag(GroupNode::Make("bag", Repetition::OPTIONAL, {list}));
NodePtr bag2(GroupNode::Make("bag", Repetition::REQUIRED, {list}));
@@ -624,8 +637,8 @@ TEST_F(TestSchemaDescriptor, BuildTree) {
NodePtr item1 = Int64("item1", Repetition::REQUIRED);
NodePtr item2 = Boolean("item2", Repetition::OPTIONAL);
NodePtr item3 = Int32("item3", Repetition::REPEATED);
- NodePtr list(GroupNode::Make(
- "records", Repetition::REPEATED, {item1, item2, item3}, LogicalType::LIST));
+ NodePtr list(GroupNode::Make("records", Repetition::REPEATED, {item1, item2, item3},
+ LogicalType::LIST));
NodePtr bag(GroupNode::Make("bag", Repetition::OPTIONAL, {list}));
fields.push_back(bag);
@@ -705,8 +718,8 @@ TEST(TestSchemaPrinter, Examples) {
NodePtr bag(GroupNode::Make("bag", Repetition::OPTIONAL, {list}));
fields.push_back(bag);
- fields.push_back(PrimitiveNode::Make(
- "c", Repetition::REQUIRED, Type::INT32, LogicalType::DECIMAL, -1, 3, 2));
+ fields.push_back(PrimitiveNode::Make("c", Repetition::REQUIRED, Type::INT32,
+ LogicalType::DECIMAL, -1, 3, 2));
NodePtr schema = GroupNode::Make("schema", Repetition::REPEATED, fields);
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/schema.cc
----------------------------------------------------------------------
diff --git a/src/parquet/schema.cc b/src/parquet/schema.cc
index 5fc51fe..ddd8ac1 100644
--- a/src/parquet/schema.cc
+++ b/src/parquet/schema.cc
@@ -72,15 +72,15 @@ std::shared_ptr<ColumnPath> ColumnPath::extend(const std::string& node_name) con
std::string ColumnPath::ToDotString() const {
std::stringstream ss;
for (auto it = path_.cbegin(); it != path_.cend(); ++it) {
- if (it != path_.cbegin()) { ss << "."; }
+ if (it != path_.cbegin()) {
+ ss << ".";
+ }
ss << *it;
}
return ss.str();
}
-const std::vector<std::string>& ColumnPath::ToDotVector() const {
- return path_;
-}
+const std::vector<std::string>& ColumnPath::ToDotVector() const { return path_; }
// ----------------------------------------------------------------------
// Base node
@@ -96,16 +96,14 @@ bool Node::EqualsInternal(const Node* other) const {
repetition_ == other->repetition_ && logical_type_ == other->logical_type_;
}
-void Node::SetParent(const Node* parent) {
- parent_ = parent;
-}
+void Node::SetParent(const Node* parent) { parent_ = parent; }
// ----------------------------------------------------------------------
// Primitive node
PrimitiveNode::PrimitiveNode(const std::string& name, Repetition::type repetition,
- Type::type type, LogicalType::type logical_type, int length, int precision, int scale,
- int id)
+ Type::type type, LogicalType::type logical_type, int length,
+ int precision, int scale, int id)
: Node(Node::PRIMITIVE, name, repetition, logical_type, id),
physical_type_(type),
type_length_(length) {
@@ -224,13 +222,13 @@ bool PrimitiveNode::EqualsInternal(const PrimitiveNode* other) const {
}
bool PrimitiveNode::Equals(const Node* other) const {
- if (!Node::EqualsInternal(other)) { return false; }
+ if (!Node::EqualsInternal(other)) {
+ return false;
+ }
return EqualsInternal(static_cast<const PrimitiveNode*>(other));
}
-void PrimitiveNode::Visit(Node::Visitor* visitor) {
- visitor->Visit(this);
-}
+void PrimitiveNode::Visit(Node::Visitor* visitor) { visitor->Visit(this); }
void PrimitiveNode::VisitConst(Node::ConstVisitor* visitor) const {
visitor->Visit(this);
@@ -240,16 +238,24 @@ void PrimitiveNode::VisitConst(Node::ConstVisitor* visitor) const {
// Group node
bool GroupNode::EqualsInternal(const GroupNode* other) const {
- if (this == other) { return true; }
- if (this->field_count() != other->field_count()) { return false; }
+ if (this == other) {
+ return true;
+ }
+ if (this->field_count() != other->field_count()) {
+ return false;
+ }
for (int i = 0; i < this->field_count(); ++i) {
- if (!this->field(i)->Equals(other->field(i).get())) { return false; }
+ if (!this->field(i)->Equals(other->field(i).get())) {
+ return false;
+ }
}
return true;
}
bool GroupNode::Equals(const Node* other) const {
- if (!Node::EqualsInternal(other)) { return false; }
+ if (!Node::EqualsInternal(other)) {
+ return false;
+ }
return EqualsInternal(static_cast<const GroupNode*>(other));
}
@@ -264,7 +270,9 @@ int GroupNode::FieldIndex(const std::string& name) const {
int GroupNode::FieldIndex(const Node& node) const {
int result = FieldIndex(node.name());
- if (result < 0) { return -1; }
+ if (result < 0) {
+ return -1;
+ }
DCHECK(result < field_count());
if (!node.Equals(field(result).get())) {
// Same name but not the same node
@@ -273,13 +281,9 @@ int GroupNode::FieldIndex(const Node& node) const {
return result;
}
-void GroupNode::Visit(Node::Visitor* visitor) {
- visitor->Visit(this);
-}
+void GroupNode::Visit(Node::Visitor* visitor) { visitor->Visit(this); }
-void GroupNode::VisitConst(Node::ConstVisitor* visitor) const {
- visitor->Visit(this);
-}
+void GroupNode::VisitConst(Node::ConstVisitor* visitor) const { visitor->Visit(this); }
// ----------------------------------------------------------------------
// Node construction from Parquet metadata
@@ -304,25 +308,25 @@ static inline NodeParams GetNodeParams(const format::SchemaElement* element) {
return params;
}
-std::unique_ptr<Node> GroupNode::FromParquet(
- const void* opaque_element, int node_id, const NodeVector& fields) {
+std::unique_ptr<Node> GroupNode::FromParquet(const void* opaque_element, int node_id,
+ const NodeVector& fields) {
const format::SchemaElement* element =
static_cast<const format::SchemaElement*>(opaque_element);
NodeParams params = GetNodeParams(element);
- return std::unique_ptr<Node>(new GroupNode(
- params.name, params.repetition, fields, params.logical_type, node_id));
+ return std::unique_ptr<Node>(new GroupNode(params.name, params.repetition, fields,
+ params.logical_type, node_id));
}
-std::unique_ptr<Node> PrimitiveNode::FromParquet(
- const void* opaque_element, int node_id) {
+std::unique_ptr<Node> PrimitiveNode::FromParquet(const void* opaque_element,
+ int node_id) {
const format::SchemaElement* element =
static_cast<const format::SchemaElement*>(opaque_element);
NodeParams params = GetNodeParams(element);
std::unique_ptr<PrimitiveNode> result =
- std::unique_ptr<PrimitiveNode>(new PrimitiveNode(params.name, params.repetition,
- FromThrift(element->type), params.logical_type, element->type_length,
- element->precision, element->scale, node_id));
+ std::unique_ptr<PrimitiveNode>(new PrimitiveNode(
+ params.name, params.repetition, FromThrift(element->type), params.logical_type,
+ element->type_length, element->precision, element->scale, node_id));
// Return as unique_ptr to the base type
return std::unique_ptr<Node>(result.release());
@@ -442,8 +446,8 @@ class SchemaVisitor : public Node::ConstVisitor {
std::vector<format::SchemaElement>* elements_;
};
-SchemaFlattener::SchemaFlattener(
- const GroupNode* schema, std::vector<format::SchemaElement>* out)
+SchemaFlattener::SchemaFlattener(const GroupNode* schema,
+ std::vector<format::SchemaElement>* out)
: root_(schema), elements_(out) {}
void SchemaFlattener::Flatten() {
@@ -546,7 +550,9 @@ void SchemaPrinter::Visit(const GroupNode* node) {
PrintRepLevel(node->repetition(), stream_);
stream_ << " group " << node->name();
auto lt = node->logical_type();
- if (lt != LogicalType::NONE) { stream_ << " (" << LogicalTypeToString(lt) << ")"; }
+ if (lt != LogicalType::NONE) {
+ stream_ << " (" << LogicalTypeToString(lt) << ")";
+ }
stream_ << " {" << std::endl;
}
@@ -609,17 +615,21 @@ void SchemaDescriptor::Init(const NodePtr& schema) {
}
bool SchemaDescriptor::Equals(const SchemaDescriptor& other) const {
- if (this->num_columns() != other.num_columns()) { return false; }
+ if (this->num_columns() != other.num_columns()) {
+ return false;
+ }
for (int i = 0; i < this->num_columns(); ++i) {
- if (!this->Column(i)->Equals(*other.Column(i))) { return false; }
+ if (!this->Column(i)->Equals(*other.Column(i))) {
+ return false;
+ }
}
return true;
}
void SchemaDescriptor::BuildTree(const NodePtr& node, int16_t max_def_level,
- int16_t max_rep_level, const NodePtr& base) {
+ int16_t max_rep_level, const NodePtr& base) {
if (node->is_optional()) {
++max_def_level;
} else if (node->is_repeated()) {
@@ -639,19 +649,22 @@ void SchemaDescriptor::BuildTree(const NodePtr& node, int16_t max_def_level,
// Primitive node, append to leaves
leaves_.push_back(ColumnDescriptor(node, max_def_level, max_rep_level, this));
leaf_to_base_.emplace(static_cast<int>(leaves_.size()) - 1, base);
- leaf_to_idx_.emplace(
- node->path()->ToDotString(), static_cast<int>(leaves_.size()) - 1);
+ leaf_to_idx_.emplace(node->path()->ToDotString(),
+ static_cast<int>(leaves_.size()) - 1);
}
}
ColumnDescriptor::ColumnDescriptor(const schema::NodePtr& node,
- int16_t max_definition_level, int16_t max_repetition_level,
- const SchemaDescriptor* schema_descr)
+ int16_t max_definition_level,
+ int16_t max_repetition_level,
+ const SchemaDescriptor* schema_descr)
: node_(node),
max_definition_level_(max_definition_level),
max_repetition_level_(max_repetition_level),
schema_descr_(schema_descr) {
- if (!node_->is_primitive()) { throw ParquetException("Must be a primitive type"); }
+ if (!node_->is_primitive()) {
+ throw ParquetException("Must be a primitive type");
+ }
primitive_node_ = static_cast<const PrimitiveNode*>(node_.get());
}
@@ -677,7 +690,9 @@ int SchemaDescriptor::ColumnIndex(const std::string& node_path) const {
int SchemaDescriptor::ColumnIndex(const Node& node) const {
int result = ColumnIndex(node.path()->ToDotString());
- if (result < 0) { return -1; }
+ if (result < 0) {
+ return -1;
+ }
DCHECK(result < num_columns());
if (!node.Equals(Column(result)->schema_node().get())) {
// Same path but not the same node
@@ -699,9 +714,7 @@ int ColumnDescriptor::type_precision() const {
return primitive_node_->decimal_metadata().precision;
}
-int ColumnDescriptor::type_length() const {
- return primitive_node_->type_length();
-}
+int ColumnDescriptor::type_length() const { return primitive_node_->type_length(); }
const std::shared_ptr<ColumnPath> ColumnDescriptor::path() const {
return primitive_node_->path();
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/schema.h
----------------------------------------------------------------------
diff --git a/src/parquet/schema.h b/src/parquet/schema.h
index 856f72d..e240b82 100644
--- a/src/parquet/schema.h
+++ b/src/parquet/schema.h
@@ -108,7 +108,7 @@ class PARQUET_EXPORT Node {
enum type { PRIMITIVE, GROUP };
Node(Node::type type, const std::string& name, Repetition::type repetition,
- LogicalType::type logical_type = LogicalType::NONE, int id = -1)
+ LogicalType::type logical_type = LogicalType::NONE, int id = -1)
: type_(type),
name_(name),
repetition_(repetition),
@@ -195,10 +195,11 @@ class PARQUET_EXPORT PrimitiveNode : public Node {
static std::unique_ptr<Node> FromParquet(const void* opaque_element, int id);
static inline NodePtr Make(const std::string& name, Repetition::type repetition,
- Type::type type, LogicalType::type logical_type = LogicalType::NONE,
- int length = -1, int precision = -1, int scale = -1) {
- return NodePtr(new PrimitiveNode(
- name, repetition, type, logical_type, length, precision, scale));
+ Type::type type,
+ LogicalType::type logical_type = LogicalType::NONE,
+ int length = -1, int precision = -1, int scale = -1) {
+ return NodePtr(new PrimitiveNode(name, repetition, type, logical_type, length,
+ precision, scale));
}
bool Equals(const Node* other) const override;
@@ -215,8 +216,8 @@ class PARQUET_EXPORT PrimitiveNode : public Node {
private:
PrimitiveNode(const std::string& name, Repetition::type repetition, Type::type type,
- LogicalType::type logical_type = LogicalType::NONE, int length = -1,
- int precision = -1, int scale = -1, int id = -1);
+ LogicalType::type logical_type = LogicalType::NONE, int length = -1,
+ int precision = -1, int scale = -1, int id = -1);
Type::type physical_type_;
int32_t type_length_;
@@ -243,11 +244,12 @@ class PARQUET_EXPORT GroupNode : public Node {
public:
// Like PrimitiveNode, GroupNode::FromParquet accepts an opaque void* to avoid exporting
// parquet::SchemaElement into the public API
- static std::unique_ptr<Node> FromParquet(
- const void* opaque_element, int id, const NodeVector& fields);
+ static std::unique_ptr<Node> FromParquet(const void* opaque_element, int id,
+ const NodeVector& fields);
static inline NodePtr Make(const std::string& name, Repetition::type repetition,
- const NodeVector& fields, LogicalType::type logical_type = LogicalType::NONE) {
+ const NodeVector& fields,
+ LogicalType::type logical_type = LogicalType::NONE) {
return NodePtr(new GroupNode(name, repetition, fields, logical_type));
}
@@ -265,8 +267,8 @@ class PARQUET_EXPORT GroupNode : public Node {
private:
GroupNode(const std::string& name, Repetition::type repetition,
- const NodeVector& fields, LogicalType::type logical_type = LogicalType::NONE,
- int id = -1)
+ const NodeVector& fields, LogicalType::type logical_type = LogicalType::NONE,
+ int id = -1)
: Node(Node::GROUP, name, repetition, logical_type, id), fields_(fields) {
field_name_to_idx_.clear();
auto field_idx = 0;
@@ -290,10 +292,10 @@ class PARQUET_EXPORT GroupNode : public Node {
// ----------------------------------------------------------------------
// Convenience primitive type factory functions
-#define PRIMITIVE_FACTORY(FuncName, TYPE) \
- static inline NodePtr FuncName( \
- const std::string& name, Repetition::type repetition = Repetition::OPTIONAL) { \
- return PrimitiveNode::Make(name, repetition, Type::TYPE); \
+#define PRIMITIVE_FACTORY(FuncName, TYPE) \
+ static inline NodePtr FuncName(const std::string& name, \
+ Repetition::type repetition = Repetition::OPTIONAL) { \
+ return PrimitiveNode::Make(name, repetition, Type::TYPE); \
}
PRIMITIVE_FACTORY(Boolean, BOOLEAN);
@@ -304,8 +306,8 @@ PRIMITIVE_FACTORY(Float, FLOAT);
PRIMITIVE_FACTORY(Double, DOUBLE);
PRIMITIVE_FACTORY(ByteArray, BYTE_ARRAY);
-void PARQUET_EXPORT PrintSchema(
- const schema::Node* schema, std::ostream& stream, int indent_width = 2);
+void PARQUET_EXPORT PrintSchema(const schema::Node* schema, std::ostream& stream,
+ int indent_width = 2);
} // namespace schema
@@ -317,7 +319,8 @@ void PARQUET_EXPORT PrintSchema(
class PARQUET_EXPORT ColumnDescriptor {
public:
ColumnDescriptor(const schema::NodePtr& node, int16_t max_definition_level,
- int16_t max_repetition_level, const SchemaDescriptor* schema_descr = nullptr);
+ int16_t max_repetition_level,
+ const SchemaDescriptor* schema_descr = nullptr);
bool Equals(const ColumnDescriptor& other) const;
@@ -402,7 +405,7 @@ class PARQUET_EXPORT SchemaDescriptor {
const schema::GroupNode* group_node_;
void BuildTree(const schema::NodePtr& node, int16_t max_def_level,
- int16_t max_rep_level, const schema::NodePtr& base);
+ int16_t max_rep_level, const schema::NodePtr& base);
// Result of leaf node / tree analysis
std::vector<ColumnDescriptor> leaves_;
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/statistics-test.cc
----------------------------------------------------------------------
diff --git a/src/parquet/statistics-test.cc b/src/parquet/statistics-test.cc
index cbc761d..26352c1 100644
--- a/src/parquet/statistics-test.cc
+++ b/src/parquet/statistics-test.cc
@@ -68,13 +68,13 @@ class TestRowGroupStatistics : public PrimitiveTypedTest<TestType> {
std::string encoded_max = statistics1.EncodeMax();
TypedStats statistics2(this->schema_.Column(0), encoded_min, encoded_max,
- this->values_.size(), 0, 0, true);
+ this->values_.size(), 0, 0, true);
TypedStats statistics3(this->schema_.Column(0));
std::vector<uint8_t> valid_bits(
BitUtil::RoundUpNumBytes(static_cast<uint32_t>(this->values_.size())) + 1, 255);
- statistics3.UpdateSpaced(
- this->values_ptr_, valid_bits.data(), 0, this->values_.size(), 0);
+ statistics3.UpdateSpaced(this->values_ptr_, valid_bits.data(), 0,
+ this->values_.size(), 0);
std::string encoded_min_spaced = statistics3.EncodeMin();
std::string encoded_max_spaced = statistics3.EncodeMax();
@@ -108,13 +108,13 @@ class TestRowGroupStatistics : public PrimitiveTypedTest<TestType> {
TypedStats statistics1(this->schema_.Column(0));
this->GenerateData(1000);
- statistics1.Update(
- this->values_ptr_, this->values_.size() - num_null[0], num_null[0]);
+ statistics1.Update(this->values_ptr_, this->values_.size() - num_null[0],
+ num_null[0]);
TypedStats statistics2(this->schema_.Column(0));
this->GenerateData(1000);
- statistics2.Update(
- this->values_ptr_, this->values_.size() - num_null[1], num_null[1]);
+ statistics2.Update(this->values_ptr_, this->values_.size() - num_null[1],
+ num_null[1]);
TypedStats total(this->schema_.Column(0));
total.Merge(statistics1);
@@ -149,14 +149,14 @@ class TestRowGroupStatistics : public PrimitiveTypedTest<TestType> {
int64_t batch_null_count = i ? null_count : 0;
DCHECK(null_count <= num_values); // avoid too much headache
std::vector<int16_t> definition_levels(batch_null_count, 0);
- definition_levels.insert(
- definition_levels.end(), batch_num_values - batch_null_count, 1);
+ definition_levels.insert(definition_levels.end(),
+ batch_num_values - batch_null_count, 1);
auto beg = this->values_.begin() + i * num_values / 2;
auto end = beg + batch_num_values;
std::vector<T> batch = GetDeepCopy(std::vector<T>(beg, end));
T* batch_values_ptr = GetValuesPointer(batch);
- column_writer->WriteBatch(
- batch_num_values, definition_levels.data(), nullptr, batch_values_ptr);
+ column_writer->WriteBatch(batch_num_values, definition_levels.data(), nullptr,
+ batch_values_ptr);
DeepFree(batch);
}
column_writer->Close();
@@ -263,12 +263,13 @@ void TestRowGroupStatistics<ByteArrayType>::TestMinMaxEncode() {
// encoded is same as unencoded
ASSERT_EQ(encoded_min,
- std::string((const char*)statistics1.min().ptr, statistics1.min().len));
+ std::string((const char*)statistics1.min().ptr, statistics1.min().len));
ASSERT_EQ(encoded_max,
- std::string((const char*)statistics1.max().ptr, statistics1.max().len));
+ std::string((const char*)statistics1.max().ptr, statistics1.max().len));
TypedRowGroupStatistics<ByteArrayType> statistics2(this->schema_.Column(0), encoded_min,
- encoded_max, this->values_.size(), 0, 0, true);
+ encoded_max, this->values_.size(), 0,
+ 0, true);
ASSERT_EQ(encoded_min, statistics2.EncodeMin());
ASSERT_EQ(encoded_max, statistics2.EncodeMax());
@@ -277,7 +278,7 @@ void TestRowGroupStatistics<ByteArrayType>::TestMinMaxEncode() {
}
using TestTypes = ::testing::Types<Int32Type, Int64Type, Int96Type, FloatType, DoubleType,
- ByteArrayType, FLBAType, BooleanType>;
+ ByteArrayType, FLBAType, BooleanType>;
TYPED_TEST_CASE(TestRowGroupStatistics, TestTypes);
@@ -316,19 +317,20 @@ TEST(CorruptStatistics, Basics) {
schema::NodePtr node;
std::vector<schema::NodePtr> fields;
// Test Physical Types
- fields.push_back(schema::PrimitiveNode::Make(
- "col1", Repetition::OPTIONAL, Type::INT32, LogicalType::NONE));
- fields.push_back(schema::PrimitiveNode::Make(
- "col2", Repetition::OPTIONAL, Type::BYTE_ARRAY, LogicalType::NONE));
+ fields.push_back(schema::PrimitiveNode::Make("col1", Repetition::OPTIONAL, Type::INT32,
+ LogicalType::NONE));
+ fields.push_back(schema::PrimitiveNode::Make("col2", Repetition::OPTIONAL,
+ Type::BYTE_ARRAY, LogicalType::NONE));
// Test Logical Types
- fields.push_back(schema::PrimitiveNode::Make(
- "col3", Repetition::OPTIONAL, Type::INT32, LogicalType::DATE));
- fields.push_back(schema::PrimitiveNode::Make(
- "col4", Repetition::OPTIONAL, Type::INT32, LogicalType::UINT_32));
+ fields.push_back(schema::PrimitiveNode::Make("col3", Repetition::OPTIONAL, Type::INT32,
+ LogicalType::DATE));
+ fields.push_back(schema::PrimitiveNode::Make("col4", Repetition::OPTIONAL, Type::INT32,
+ LogicalType::UINT_32));
fields.push_back(schema::PrimitiveNode::Make("col5", Repetition::OPTIONAL,
- Type::FIXED_LEN_BYTE_ARRAY, LogicalType::INTERVAL, 12));
- fields.push_back(schema::PrimitiveNode::Make(
- "col6", Repetition::OPTIONAL, Type::BYTE_ARRAY, LogicalType::UTF8));
+ Type::FIXED_LEN_BYTE_ARRAY,
+ LogicalType::INTERVAL, 12));
+ fields.push_back(schema::PrimitiveNode::Make("col6", Repetition::OPTIONAL,
+ Type::BYTE_ARRAY, LogicalType::UTF8));
node = schema::GroupNode::Make("schema", Repetition::REQUIRED, fields);
schema.Init(node);
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/statistics.cc
----------------------------------------------------------------------
diff --git a/src/parquet/statistics.cc b/src/parquet/statistics.cc
index d99140e..12d1f5b 100644
--- a/src/parquet/statistics.cc
+++ b/src/parquet/statistics.cc
@@ -30,8 +30,8 @@ using arrow::MemoryPool;
namespace parquet {
template <typename DType>
-TypedRowGroupStatistics<DType>::TypedRowGroupStatistics(
- const ColumnDescriptor* schema, MemoryPool* pool)
+TypedRowGroupStatistics<DType>::TypedRowGroupStatistics(const ColumnDescriptor* schema,
+ MemoryPool* pool)
: pool_(pool),
min_buffer_(AllocateBuffer(pool_, 0)),
max_buffer_(AllocateBuffer(pool_, 0)) {
@@ -41,8 +41,10 @@ TypedRowGroupStatistics<DType>::TypedRowGroupStatistics(
template <typename DType>
TypedRowGroupStatistics<DType>::TypedRowGroupStatistics(const typename DType::c_type& min,
- const typename DType::c_type& max, int64_t num_values, int64_t null_count,
- int64_t distinct_count)
+ const typename DType::c_type& max,
+ int64_t num_values,
+ int64_t null_count,
+ int64_t distinct_count)
: pool_(default_memory_pool()),
min_buffer_(AllocateBuffer(pool_, 0)),
max_buffer_(AllocateBuffer(pool_, 0)) {
@@ -56,9 +58,10 @@ TypedRowGroupStatistics<DType>::TypedRowGroupStatistics(const typename DType::c_
}
template <typename DType>
-TypedRowGroupStatistics<DType>::TypedRowGroupStatistics(const ColumnDescriptor* schema,
- const std::string& encoded_min, const std::string& encoded_max, int64_t num_values,
- int64_t null_count, int64_t distinct_count, bool has_min_max, MemoryPool* pool)
+TypedRowGroupStatistics<DType>::TypedRowGroupStatistics(
+ const ColumnDescriptor* schema, const std::string& encoded_min,
+ const std::string& encoded_max, int64_t num_values, int64_t null_count,
+ int64_t distinct_count, bool has_min_max, MemoryPool* pool)
: pool_(pool),
min_buffer_(AllocateBuffer(pool_, 0)),
max_buffer_(AllocateBuffer(pool_, 0)) {
@@ -68,8 +71,12 @@ TypedRowGroupStatistics<DType>::TypedRowGroupStatistics(const ColumnDescriptor*
SetDescr(schema);
- if (!encoded_min.empty()) { PlainDecode(encoded_min, &min_); }
- if (!encoded_max.empty()) { PlainDecode(encoded_max, &max_); }
+ if (!encoded_min.empty()) {
+ PlainDecode(encoded_min, &min_);
+ }
+ if (!encoded_max.empty()) {
+ PlainDecode(encoded_max, &max_);
+ }
has_min_max_ = has_min_max;
}
@@ -85,8 +92,8 @@ void TypedRowGroupStatistics<DType>::Reset() {
}
template <typename DType>
-void TypedRowGroupStatistics<DType>::Update(
- const T* values, int64_t num_not_null, int64_t num_null) {
+void TypedRowGroupStatistics<DType>::Update(const T* values, int64_t num_not_null,
+ int64_t num_null) {
DCHECK(num_not_null >= 0);
DCHECK(num_null >= 0);
@@ -109,8 +116,10 @@ void TypedRowGroupStatistics<DType>::Update(
template <typename DType>
void TypedRowGroupStatistics<DType>::UpdateSpaced(const T* values,
- const uint8_t* valid_bits, int64_t valid_bits_offset, int64_t num_not_null,
- int64_t num_null) {
+ const uint8_t* valid_bits,
+ int64_t valid_bits_offset,
+ int64_t num_not_null,
+ int64_t num_null) {
DCHECK(num_not_null >= 0);
DCHECK(num_null >= 0);
@@ -126,7 +135,9 @@ void TypedRowGroupStatistics<DType>::UpdateSpaced(const T* values,
int64_t length = num_null + num_not_null;
int64_t i = 0;
for (; i < length; i++) {
- if (bitset_valid_bits & (1 << bit_offset_valid_bits)) { break; }
+ if (bitset_valid_bits & (1 << bit_offset_valid_bits)) {
+ break;
+ }
READ_NEXT_BITSET(valid_bits);
}
T min = values[i];
@@ -216,8 +227,8 @@ void TypedRowGroupStatistics<DType>::PlainEncode(const T& src, std::string* dst)
template <typename DType>
void TypedRowGroupStatistics<DType>::PlainDecode(const std::string& src, T* dst) {
PlainDecoder<DType> decoder(descr());
- decoder.SetData(
- 1, reinterpret_cast<const uint8_t*>(src.c_str()), static_cast<int>(src.size()));
+ decoder.SetData(1, reinterpret_cast<const uint8_t*>(src.c_str()),
+ static_cast<int>(src.size()));
decoder.Decode(dst, 1);
}
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/statistics.h
----------------------------------------------------------------------
diff --git a/src/parquet/statistics.h b/src/parquet/statistics.h
index c6a2487..12d0555 100644
--- a/src/parquet/statistics.h
+++ b/src/parquet/statistics.h
@@ -134,15 +134,15 @@ class TypedRowGroupStatistics : public RowGroupStatistics {
using T = typename DType::c_type;
TypedRowGroupStatistics(const ColumnDescriptor* schema,
- ::arrow::MemoryPool* pool = ::arrow::default_memory_pool());
+ ::arrow::MemoryPool* pool = ::arrow::default_memory_pool());
TypedRowGroupStatistics(const T& min, const T& max, int64_t num_values,
- int64_t null_count, int64_t distinct_count);
+ int64_t null_count, int64_t distinct_count);
TypedRowGroupStatistics(const ColumnDescriptor* schema, const std::string& encoded_min,
- const std::string& encoded_max, int64_t num_values, int64_t null_count,
- int64_t distinct_count, bool has_min_max,
- ::arrow::MemoryPool* pool = ::arrow::default_memory_pool());
+ const std::string& encoded_max, int64_t num_values,
+ int64_t null_count, int64_t distinct_count, bool has_min_max,
+ ::arrow::MemoryPool* pool = ::arrow::default_memory_pool());
bool HasMinMax() const override;
void Reset() override;
@@ -150,7 +150,7 @@ class TypedRowGroupStatistics : public RowGroupStatistics {
void Update(const T* values, int64_t num_not_null, int64_t num_null);
void UpdateSpaced(const T* values, const uint8_t* valid_bits, int64_t valid_bits_spaced,
- int64_t num_not_null, int64_t num_null);
+ int64_t num_not_null, int64_t num_null);
const T& min() const;
const T& max() const;
@@ -178,8 +178,8 @@ inline void TypedRowGroupStatistics<DType>::Copy(const T& src, T* dst, PoolBuffe
}
template <>
-inline void TypedRowGroupStatistics<FLBAType>::Copy(
- const FLBA& src, FLBA* dst, PoolBuffer* buffer) {
+inline void TypedRowGroupStatistics<FLBAType>::Copy(const FLBA& src, FLBA* dst,
+ PoolBuffer* buffer) {
if (dst->ptr == src.ptr) return;
uint32_t len = descr_->type_length();
PARQUET_THROW_NOT_OK(buffer->Resize(len, false));
@@ -188,8 +188,9 @@ inline void TypedRowGroupStatistics<FLBAType>::Copy(
}
template <>
-inline void TypedRowGroupStatistics<ByteArrayType>::Copy(
- const ByteArray& src, ByteArray* dst, PoolBuffer* buffer) {
+inline void TypedRowGroupStatistics<ByteArrayType>::Copy(const ByteArray& src,
+ ByteArray* dst,
+ PoolBuffer* buffer) {
if (dst->ptr == src.ptr) return;
PARQUET_THROW_NOT_OK(buffer->Resize(src.len, false));
std::memcpy(buffer->mutable_data(), src.ptr, src.len);
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/test-specialization.h
----------------------------------------------------------------------
diff --git a/src/parquet/test-specialization.h b/src/parquet/test-specialization.h
index 6bd1dee..4719fdc 100644
--- a/src/parquet/test-specialization.h
+++ b/src/parquet/test-specialization.h
@@ -40,8 +40,8 @@ void InitValues<bool>(int num_values, vector<bool>& values, vector<uint8_t>& buf
}
template <>
-void InitValues<ByteArray>(
- int num_values, vector<ByteArray>& values, vector<uint8_t>& buffer) {
+void InitValues<ByteArray>(int num_values, vector<ByteArray>& values,
+ vector<uint8_t>& buffer) {
int max_byte_array_len = 12;
int num_bytes = max_byte_array_len + sizeof(uint32_t);
size_t nbytes = num_values * num_bytes;
@@ -59,7 +59,7 @@ void InitValues<FLBA>(int num_values, vector<FLBA>& values, vector<uint8_t>& buf
template <>
void InitValues<Int96>(int num_values, vector<Int96>& values, vector<uint8_t>& buffer) {
random_Int96_numbers(num_values, 0, std::numeric_limits<int32_t>::min(),
- std::numeric_limits<int32_t>::max(), values.data());
+ std::numeric_limits<int32_t>::max(), values.data());
}
inline std::string TestColumnName(int i) {
@@ -79,8 +79,8 @@ class PrimitiveTypedTest : public ::testing::Test {
for (int i = 0; i < num_columns; ++i) {
std::string name = TestColumnName(i);
- fields.push_back(schema::PrimitiveNode::Make(
- name, repetition, TestType::type_num, LogicalType::NONE, FLBA_LENGTH));
+ fields.push_back(schema::PrimitiveNode::Make(name, repetition, TestType::type_num,
+ LogicalType::NONE, FLBA_LENGTH));
}
node_ = schema::GroupNode::Make("schema", Repetition::REQUIRED, fields);
schema_.Init(node_);
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/test-util.h
----------------------------------------------------------------------
diff --git a/src/parquet/test-util.h b/src/parquet/test-util.h
index 8657a7f..356486b 100644
--- a/src/parquet/test-util.h
+++ b/src/parquet/test-util.h
@@ -53,12 +53,12 @@ namespace test {
template <typename T>
static void InitValues(int num_values, vector<T>& values, vector<uint8_t>& buffer) {
random_numbers(num_values, 0, std::numeric_limits<T>::min(),
- std::numeric_limits<T>::max(), values.data());
+ std::numeric_limits<T>::max(), values.data());
}
template <typename T>
-static void InitDictValues(
- int num_values, int num_dicts, vector<T>& values, vector<uint8_t>& buffer) {
+static void InitDictValues(int num_values, int num_dicts, vector<T>& values,
+ vector<uint8_t>& buffer) {
int repeat_factor = num_values / num_dicts;
InitValues<T>(num_dicts, values, buffer);
// add some repeated values
@@ -112,7 +112,7 @@ class DataPageBuilder {
have_values_(false) {}
void AppendDefLevels(const vector<int16_t>& levels, int16_t max_level,
- Encoding::type encoding = Encoding::RLE) {
+ Encoding::type encoding = Encoding::RLE) {
AppendLevels(levels, max_level, encoding);
num_values_ = std::max(static_cast<int32_t>(levels.size()), num_values_);
@@ -121,7 +121,7 @@ class DataPageBuilder {
}
void AppendRepLevels(const vector<int16_t>& levels, int16_t max_level,
- Encoding::type encoding = Encoding::RLE) {
+ Encoding::type encoding = Encoding::RLE) {
AppendLevels(levels, max_level, encoding);
num_values_ = std::max(static_cast<int32_t>(levels.size()), num_values_);
@@ -130,7 +130,7 @@ class DataPageBuilder {
}
void AppendValues(const ColumnDescriptor* d, const vector<T>& values,
- Encoding::type encoding = Encoding::PLAIN) {
+ Encoding::type encoding = Encoding::PLAIN) {
PlainEncoder<Type> encoder(d);
encoder.Put(&values[0], static_cast<int>(values.size()));
std::shared_ptr<Buffer> values_sink = encoder.FlushValues();
@@ -162,8 +162,8 @@ class DataPageBuilder {
bool have_values_;
// Used internally for both repetition and definition levels
- void AppendLevels(
- const vector<int16_t>& levels, int16_t max_level, Encoding::type encoding) {
+ void AppendLevels(const vector<int16_t>& levels, int16_t max_level,
+ Encoding::type encoding) {
if (encoding != Encoding::RLE) {
ParquetException::NYI("only rle encoding currently implemented");
}
@@ -176,7 +176,7 @@ class DataPageBuilder {
// size.
LevelEncoder encoder;
encoder.Init(encoding, max_level, static_cast<int>(levels.size()),
- encode_buffer.data(), static_cast<int>(encode_buffer.size()));
+ encode_buffer.data(), static_cast<int>(encode_buffer.size()));
encoder.Encode(static_cast<int>(levels.size()), levels.data());
@@ -187,8 +187,9 @@ class DataPageBuilder {
};
template <>
-void DataPageBuilder<BooleanType>::AppendValues(
- const ColumnDescriptor* d, const vector<bool>& values, Encoding::type encoding) {
+void DataPageBuilder<BooleanType>::AppendValues(const ColumnDescriptor* d,
+ const vector<bool>& values,
+ Encoding::type encoding) {
if (encoding != Encoding::PLAIN) {
ParquetException::NYI("only plain encoding currently implemented");
}
@@ -203,17 +204,22 @@ void DataPageBuilder<BooleanType>::AppendValues(
}
template <typename Type>
-static shared_ptr<DataPage> MakeDataPage(const ColumnDescriptor* d,
- const vector<typename Type::c_type>& values, int num_vals, Encoding::type encoding,
- const uint8_t* indices, int indices_size, const vector<int16_t>& def_levels,
- int16_t max_def_level, const vector<int16_t>& rep_levels, int16_t max_rep_level) {
+static shared_ptr<DataPage> MakeDataPage(
+ const ColumnDescriptor* d, const vector<typename Type::c_type>& values, int num_vals,
+ Encoding::type encoding, const uint8_t* indices, int indices_size,
+ const vector<int16_t>& def_levels, int16_t max_def_level,
+ const vector<int16_t>& rep_levels, int16_t max_rep_level) {
int num_values = 0;
InMemoryOutputStream page_stream;
test::DataPageBuilder<Type> page_builder(&page_stream);
- if (!rep_levels.empty()) { page_builder.AppendRepLevels(rep_levels, max_rep_level); }
- if (!def_levels.empty()) { page_builder.AppendDefLevels(def_levels, max_def_level); }
+ if (!rep_levels.empty()) {
+ page_builder.AppendRepLevels(rep_levels, max_rep_level);
+ }
+ if (!def_levels.empty()) {
+ page_builder.AppendDefLevels(def_levels, max_def_level);
+ }
if (encoding == Encoding::PLAIN) {
page_builder.AppendValues(d, values, encoding);
@@ -226,7 +232,8 @@ static shared_ptr<DataPage> MakeDataPage(const ColumnDescriptor* d,
auto buffer = page_stream.GetBuffer();
return std::make_shared<DataPage>(buffer, num_values, encoding,
- page_builder.def_level_encoding(), page_builder.rep_level_encoding());
+ page_builder.def_level_encoding(),
+ page_builder.rep_level_encoding());
}
template <typename TYPE>
@@ -287,9 +294,10 @@ shared_ptr<Buffer> DictionaryPageBuilder<BooleanType>::AppendValues(
}
template <typename Type>
-static shared_ptr<DictionaryPage> MakeDictPage(const ColumnDescriptor* d,
- const vector<typename Type::c_type>& values, const vector<int>& values_per_page,
- Encoding::type encoding, vector<shared_ptr<Buffer>>& rle_indices) {
+static shared_ptr<DictionaryPage> MakeDictPage(
+ const ColumnDescriptor* d, const vector<typename Type::c_type>& values,
+ const vector<int>& values_per_page, Encoding::type encoding,
+ vector<shared_ptr<Buffer>>& rle_indices) {
InMemoryOutputStream page_stream;
test::DictionaryPageBuilder<Type> page_builder(d);
int num_pages = static_cast<int>(values_per_page.size());
@@ -303,17 +311,19 @@ static shared_ptr<DictionaryPage> MakeDictPage(const ColumnDescriptor* d,
auto buffer = page_builder.WriteDict();
- return std::make_shared<DictionaryPage>(
- buffer, page_builder.num_values(), Encoding::PLAIN);
+ return std::make_shared<DictionaryPage>(buffer, page_builder.num_values(),
+ Encoding::PLAIN);
}
// Given def/rep levels and values create multiple dict pages
template <typename Type>
static void PaginateDict(const ColumnDescriptor* d,
- const vector<typename Type::c_type>& values, const vector<int16_t>& def_levels,
- int16_t max_def_level, const vector<int16_t>& rep_levels, int16_t max_rep_level,
- int num_levels_per_page, const vector<int>& values_per_page,
- vector<shared_ptr<Page>>& pages, Encoding::type encoding = Encoding::RLE_DICTIONARY) {
+ const vector<typename Type::c_type>& values,
+ const vector<int16_t>& def_levels, int16_t max_def_level,
+ const vector<int16_t>& rep_levels, int16_t max_rep_level,
+ int num_levels_per_page, const vector<int>& values_per_page,
+ vector<shared_ptr<Page>>& pages,
+ Encoding::type encoding = Encoding::RLE_DICTIONARY) {
int num_pages = static_cast<int>(values_per_page.size());
vector<shared_ptr<Buffer>> rle_indices;
shared_ptr<DictionaryPage> dict_page =
@@ -332,8 +342,9 @@ static void PaginateDict(const ColumnDescriptor* d,
rep_level_start = i * num_levels_per_page;
rep_level_end = (i + 1) * num_levels_per_page;
}
- shared_ptr<DataPage> data_page = MakeDataPage<Int32Type>(d, {}, values_per_page[i],
- encoding, rle_indices[i]->data(), static_cast<int>(rle_indices[i]->size()),
+ shared_ptr<DataPage> data_page = MakeDataPage<Int32Type>(
+ d, {}, values_per_page[i], encoding, rle_indices[i]->data(),
+ static_cast<int>(rle_indices[i]->size()),
slice(def_levels, def_level_start, def_level_end), max_def_level,
slice(rep_levels, rep_level_start, rep_level_end), max_rep_level);
pages.push_back(data_page);
@@ -343,10 +354,12 @@ static void PaginateDict(const ColumnDescriptor* d,
// Given def/rep levels and values create multiple plain pages
template <typename Type>
static void PaginatePlain(const ColumnDescriptor* d,
- const vector<typename Type::c_type>& values, const vector<int16_t>& def_levels,
- int16_t max_def_level, const vector<int16_t>& rep_levels, int16_t max_rep_level,
- int num_levels_per_page, const vector<int>& values_per_page,
- vector<shared_ptr<Page>>& pages, Encoding::type encoding = Encoding::PLAIN) {
+ const vector<typename Type::c_type>& values,
+ const vector<int16_t>& def_levels, int16_t max_def_level,
+ const vector<int16_t>& rep_levels, int16_t max_rep_level,
+ int num_levels_per_page, const vector<int>& values_per_page,
+ vector<shared_ptr<Page>>& pages,
+ Encoding::type encoding = Encoding::PLAIN) {
int num_pages = static_cast<int>(values_per_page.size());
int def_level_start = 0;
int def_level_end = 0;
@@ -362,10 +375,11 @@ static void PaginatePlain(const ColumnDescriptor* d,
rep_level_start = i * num_levels_per_page;
rep_level_end = (i + 1) * num_levels_per_page;
}
- shared_ptr<DataPage> page = MakeDataPage<Type>(d,
- slice(values, value_start, value_start + values_per_page[i]), values_per_page[i],
- encoding, NULL, 0, slice(def_levels, def_level_start, def_level_end),
- max_def_level, slice(rep_levels, rep_level_start, rep_level_end), max_rep_level);
+ shared_ptr<DataPage> page = MakeDataPage<Type>(
+ d, slice(values, value_start, value_start + values_per_page[i]),
+ values_per_page[i], encoding, NULL, 0,
+ slice(def_levels, def_level_start, def_level_end), max_def_level,
+ slice(rep_levels, rep_level_start, rep_level_end), max_rep_level);
pages.push_back(page);
value_start += values_per_page[i];
}
@@ -374,9 +388,10 @@ static void PaginatePlain(const ColumnDescriptor* d,
// Generates pages from randomly generated data
template <typename Type>
static int MakePages(const ColumnDescriptor* d, int num_pages, int levels_per_page,
- vector<int16_t>& def_levels, vector<int16_t>& rep_levels,
- vector<typename Type::c_type>& values, vector<uint8_t>& buffer,
- vector<shared_ptr<Page>>& pages, Encoding::type encoding = Encoding::PLAIN) {
+ vector<int16_t>& def_levels, vector<int16_t>& rep_levels,
+ vector<typename Type::c_type>& values, vector<uint8_t>& buffer,
+ vector<shared_ptr<Page>>& pages,
+ Encoding::type encoding = Encoding::PLAIN) {
int num_levels = levels_per_page * num_pages;
int num_values = 0;
uint32_t seed = 0;
@@ -411,13 +426,13 @@ static int MakePages(const ColumnDescriptor* d, int num_pages, int levels_per_pa
if (encoding == Encoding::PLAIN) {
InitValues<typename Type::c_type>(num_values, values, buffer);
PaginatePlain<Type>(d, values, def_levels, max_def_level, rep_levels, max_rep_level,
- levels_per_page, values_per_page, pages);
+ levels_per_page, values_per_page, pages);
} else if (encoding == Encoding::RLE_DICTIONARY ||
encoding == Encoding::PLAIN_DICTIONARY) {
// Calls InitValues and repeats the data
InitDictValues<typename Type::c_type>(num_values, levels_per_page, values, buffer);
PaginateDict<Type>(d, values, def_levels, max_def_level, rep_levels, max_rep_level,
- levels_per_page, values_per_page, pages);
+ levels_per_page, values_per_page, pages);
}
return num_values;
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/thrift.h
----------------------------------------------------------------------
diff --git a/src/parquet/thrift.h b/src/parquet/thrift.h
index 7fa0de3..7a23e41 100644
--- a/src/parquet/thrift.h
+++ b/src/parquet/thrift.h
@@ -30,9 +30,9 @@
#include <thrift/protocol/TCompactProtocol.h>
#include <thrift/protocol/TDebugProtocol.h>
-#include <sstream>
#include <thrift/protocol/TBinaryProtocol.h>
#include <thrift/transport/TBufferTransports.h>
+#include <sstream>
#include "parquet/exception.h"
#include "parquet/parquet_types.h"
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/types-test.cc
----------------------------------------------------------------------
diff --git a/src/parquet/types-test.cc b/src/parquet/types-test.cc
index ba3fe69..eedeaae 100644
--- a/src/parquet/types-test.cc
+++ b/src/parquet/types-test.cc
@@ -44,10 +44,10 @@ TEST(TestLogicalTypeToString, LogicalTypes) {
ASSERT_STREQ("DATE", LogicalTypeToString(LogicalType::DATE).c_str());
ASSERT_STREQ("TIME_MILLIS", LogicalTypeToString(LogicalType::TIME_MILLIS).c_str());
ASSERT_STREQ("TIME_MICROS", LogicalTypeToString(LogicalType::TIME_MICROS).c_str());
- ASSERT_STREQ(
- "TIMESTAMP_MILLIS", LogicalTypeToString(LogicalType::TIMESTAMP_MILLIS).c_str());
- ASSERT_STREQ(
- "TIMESTAMP_MICROS", LogicalTypeToString(LogicalType::TIMESTAMP_MICROS).c_str());
+ ASSERT_STREQ("TIMESTAMP_MILLIS",
+ LogicalTypeToString(LogicalType::TIMESTAMP_MILLIS).c_str());
+ ASSERT_STREQ("TIMESTAMP_MICROS",
+ LogicalTypeToString(LogicalType::TIMESTAMP_MICROS).c_str());
ASSERT_STREQ("UINT_8", LogicalTypeToString(LogicalType::UINT_8).c_str());
ASSERT_STREQ("UINT_16", LogicalTypeToString(LogicalType::UINT_16).c_str());
ASSERT_STREQ("UINT_32", LogicalTypeToString(LogicalType::UINT_32).c_str());
@@ -106,10 +106,10 @@ TEST(TypePrinter, StatisticsTypes) {
smin = std::string("abcdefgh");
smax = std::string("ijklmnop");
- ASSERT_STREQ(
- "abcdefgh ", FormatStatValue(Type::FIXED_LEN_BYTE_ARRAY, smin.c_str()).c_str());
- ASSERT_STREQ(
- "ijklmnop ", FormatStatValue(Type::FIXED_LEN_BYTE_ARRAY, smax.c_str()).c_str());
+ ASSERT_STREQ("abcdefgh ",
+ FormatStatValue(Type::FIXED_LEN_BYTE_ARRAY, smin.c_str()).c_str());
+ ASSERT_STREQ("ijklmnop ",
+ FormatStatValue(Type::FIXED_LEN_BYTE_ARRAY, smax.c_str()).c_str());
}
} // namespace parquet
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/util/buffer-builder.h
----------------------------------------------------------------------
diff --git a/src/parquet/util/buffer-builder.h b/src/parquet/util/buffer-builder.h
index b72e70d..26f134e 100644
--- a/src/parquet/util/buffer-builder.h
+++ b/src/parquet/util/buffer-builder.h
@@ -20,8 +20,8 @@
#ifndef PARQUET_UTIL_BUFFER_BUILDER_H
#define PARQUET_UTIL_BUFFER_BUILDER_H
-#include <cstdint>
#include <stdlib.h>
+#include <cstdint>
namespace parquet {
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/util/comparison-test.cc
----------------------------------------------------------------------
diff --git a/src/parquet/util/comparison-test.cc b/src/parquet/util/comparison-test.cc
index 7ef0338..8401983 100644
--- a/src/parquet/util/comparison-test.cc
+++ b/src/parquet/util/comparison-test.cc
@@ -67,8 +67,9 @@ TEST(Comparison, FLBA) {
auto arr1 = FLBAFromString(a);
auto arr2 = FLBAFromString(b);
- NodePtr node = PrimitiveNode::Make("FLBA", Repetition::REQUIRED,
- Type::FIXED_LEN_BYTE_ARRAY, LogicalType::NONE, static_cast<int>(a.size()));
+ NodePtr node =
+ PrimitiveNode::Make("FLBA", Repetition::REQUIRED, Type::FIXED_LEN_BYTE_ARRAY,
+ LogicalType::NONE, static_cast<int>(a.size()));
ColumnDescriptor descr(node, 0, 0);
Compare<parquet::FixedLenByteArray> less(&descr);
ASSERT_TRUE(less(arr1, arr2));
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/util/comparison.h
----------------------------------------------------------------------
diff --git a/src/parquet/util/comparison.h b/src/parquet/util/comparison.h
index 103f4c5..edd3df1 100644
--- a/src/parquet/util/comparison.h
+++ b/src/parquet/util/comparison.h
@@ -51,8 +51,8 @@ template <>
inline bool Compare<FLBA>::operator()(const FLBA& a, const FLBA& b) {
auto aptr = reinterpret_cast<const int8_t*>(a.ptr);
auto bptr = reinterpret_cast<const int8_t*>(b.ptr);
- return std::lexicographical_compare(
- aptr, aptr + type_length_, bptr, bptr + type_length_);
+ return std::lexicographical_compare(aptr, aptr + type_length_, bptr,
+ bptr + type_length_);
}
} // namespace parquet
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/util/memory.cc
----------------------------------------------------------------------
diff --git a/src/parquet/util/memory.cc b/src/parquet/util/memory.cc
index 39c43fb..5051c7b 100644
--- a/src/parquet/util/memory.cc
+++ b/src/parquet/util/memory.cc
@@ -140,9 +140,7 @@ uint8_t* ChunkedAllocator::Allocate(int size) {
return result;
}
-uint8_t* ChunkedAllocator::Allocate(int size) {
- return Allocate<false>(size);
-}
+uint8_t* ChunkedAllocator::Allocate(int size) { return Allocate<false>(size); }
void ChunkedAllocator::Clear() {
current_chunk_idx_ = -1;
@@ -335,9 +333,7 @@ ArrowInputFile::ArrowInputFile(
const std::shared_ptr<::arrow::io::ReadableFileInterface>& file)
: file_(file) {}
-::arrow::io::FileInterface* ArrowInputFile::file_interface() {
- return file_.get();
-}
+::arrow::io::FileInterface* ArrowInputFile::file_interface() { return file_.get(); }
int64_t ArrowInputFile::Size() const {
int64_t size;
@@ -374,9 +370,7 @@ ArrowOutputStream::ArrowOutputStream(
const std::shared_ptr<::arrow::io::OutputStream> file)
: file_(file) {}
-::arrow::io::FileInterface* ArrowOutputStream::file_interface() {
- return file_.get();
-}
+::arrow::io::FileInterface* ArrowOutputStream::file_interface() { return file_.get(); }
// Copy bytes into the output stream
void ArrowOutputStream::Write(const uint8_t* data, int64_t length) {
@@ -391,8 +385,8 @@ InMemoryInputStream::InMemoryInputStream(const std::shared_ptr<Buffer>& buffer)
len_ = buffer_->size();
}
-InMemoryInputStream::InMemoryInputStream(
- RandomAccessSource* source, int64_t start, int64_t num_bytes)
+InMemoryInputStream::InMemoryInputStream(RandomAccessSource* source, int64_t start,
+ int64_t num_bytes)
: offset_(0) {
buffer_ = source->ReadAt(start, num_bytes);
if (buffer_->size() < num_bytes) {
@@ -412,24 +406,22 @@ const uint8_t* InMemoryInputStream::Read(int64_t num_to_read, int64_t* num_bytes
return result;
}
-void InMemoryInputStream::Advance(int64_t num_bytes) {
- offset_ += num_bytes;
-}
+void InMemoryInputStream::Advance(int64_t num_bytes) { offset_ += num_bytes; }
// ----------------------------------------------------------------------
// In-memory output stream
InMemoryOutputStream::InMemoryOutputStream(MemoryPool* pool, int64_t initial_capacity)
: size_(0), capacity_(initial_capacity) {
- if (initial_capacity == 0) { initial_capacity = kInMemoryDefaultCapacity; }
+ if (initial_capacity == 0) {
+ initial_capacity = kInMemoryDefaultCapacity;
+ }
buffer_ = AllocateBuffer(pool, initial_capacity);
}
InMemoryOutputStream::~InMemoryOutputStream() {}
-uint8_t* InMemoryOutputStream::Head() {
- return buffer_->mutable_data() + size_;
-}
+uint8_t* InMemoryOutputStream::Head() { return buffer_->mutable_data() + size_; }
void InMemoryOutputStream::Write(const uint8_t* data, int64_t length) {
if (size_ + length > capacity_) {
@@ -444,9 +436,7 @@ void InMemoryOutputStream::Write(const uint8_t* data, int64_t length) {
size_ += length;
}
-int64_t InMemoryOutputStream::Tell() {
- return size_;
-}
+int64_t InMemoryOutputStream::Tell() { return size_; }
std::shared_ptr<Buffer> InMemoryOutputStream::GetBuffer() {
PARQUET_THROW_NOT_OK(buffer_->Resize(size_));
@@ -459,7 +449,8 @@ std::shared_ptr<Buffer> InMemoryOutputStream::GetBuffer() {
// BufferedInputStream
BufferedInputStream::BufferedInputStream(MemoryPool* pool, int64_t buffer_size,
- RandomAccessSource* source, int64_t start, int64_t num_bytes)
+ RandomAccessSource* source, int64_t start,
+ int64_t num_bytes)
: source_(source), stream_offset_(start), stream_end_(start + num_bytes) {
buffer_ = AllocateBuffer(pool, buffer_size);
buffer_size_ = buffer_->size();
@@ -502,13 +493,17 @@ void BufferedInputStream::Advance(int64_t num_bytes) {
std::shared_ptr<PoolBuffer> AllocateBuffer(MemoryPool* pool, int64_t size) {
auto result = std::make_shared<PoolBuffer>(pool);
- if (size > 0) { PARQUET_THROW_NOT_OK(result->Resize(size)); }
+ if (size > 0) {
+ PARQUET_THROW_NOT_OK(result->Resize(size));
+ }
return result;
}
std::unique_ptr<PoolBuffer> AllocateUniqueBuffer(MemoryPool* pool, int64_t size) {
std::unique_ptr<PoolBuffer> result(new PoolBuffer(pool));
- if (size > 0) { PARQUET_THROW_NOT_OK(result->Resize(size)); }
+ if (size > 0) {
+ PARQUET_THROW_NOT_OK(result->Resize(size));
+ }
return result;
}
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/util/memory.h
----------------------------------------------------------------------
diff --git a/src/parquet/util/memory.h b/src/parquet/util/memory.h
index d73a3de..04dcca4 100644
--- a/src/parquet/util/memory.h
+++ b/src/parquet/util/memory.h
@@ -414,7 +414,7 @@ class PARQUET_EXPORT InMemoryInputStream : public InputStream {
class PARQUET_EXPORT BufferedInputStream : public InputStream {
public:
BufferedInputStream(::arrow::MemoryPool* pool, int64_t buffer_size,
- RandomAccessSource* source, int64_t start, int64_t end);
+ RandomAccessSource* source, int64_t start, int64_t end);
virtual const uint8_t* Peek(int64_t num_to_peek, int64_t* num_bytes);
virtual const uint8_t* Read(int64_t num_to_read, int64_t* num_bytes);
@@ -429,11 +429,11 @@ class PARQUET_EXPORT BufferedInputStream : public InputStream {
int64_t buffer_size_;
};
-std::shared_ptr<PoolBuffer> PARQUET_EXPORT AllocateBuffer(
- ::arrow::MemoryPool* pool, int64_t size = 0);
+std::shared_ptr<PoolBuffer> PARQUET_EXPORT AllocateBuffer(::arrow::MemoryPool* pool,
+ int64_t size = 0);
-std::unique_ptr<PoolBuffer> PARQUET_EXPORT AllocateUniqueBuffer(
- ::arrow::MemoryPool* pool, int64_t size = 0);
+std::unique_ptr<PoolBuffer> PARQUET_EXPORT AllocateUniqueBuffer(::arrow::MemoryPool* pool,
+ int64_t size = 0);
} // namespace parquet
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/util/schema-util.h
----------------------------------------------------------------------
diff --git a/src/parquet/util/schema-util.h b/src/parquet/util/schema-util.h
index e199c21..9187962 100644
--- a/src/parquet/util/schema-util.h
+++ b/src/parquet/util/schema-util.h
@@ -35,7 +35,9 @@ using parquet::schema::Node;
using parquet::LogicalType;
inline bool str_endswith_tuple(const std::string& str) {
- if (str.size() >= 6) { return str.substr(str.size() - 6, 6) == "_tuple"; }
+ if (str.size() >= 6) {
+ return str.substr(str.size() - 6, 6) == "_tuple";
+ }
return false;
}
@@ -63,16 +65,21 @@ inline bool IsSimpleStruct(const NodePtr& node) {
// Coalesce a list of schema fields indices which are the roots of the
// columns referred by a list of column indices
inline bool ColumnIndicesToFieldIndices(const SchemaDescriptor& descr,
- const std::vector<int>& column_indices, std::vector<int>* out) {
+ const std::vector<int>& column_indices,
+ std::vector<int>* out) {
const GroupNode* group = descr.group_node();
std::unordered_set<int> already_added;
out->clear();
for (auto& column_idx : column_indices) {
auto field_node = descr.GetColumnRoot(column_idx);
auto field_idx = group->FieldIndex(field_node->name());
- if (field_idx < 0) { return false; }
+ if (field_idx < 0) {
+ return false;
+ }
auto insertion = already_added.insert(field_idx);
- if (insertion.second) { out->push_back(field_idx); }
+ if (insertion.second) {
+ out->push_back(field_idx);
+ }
}
return true;
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/util/test-common.h
----------------------------------------------------------------------
diff --git a/src/parquet/util/test-common.h b/src/parquet/util/test-common.h
index 2327aeb..1043378 100644
--- a/src/parquet/util/test-common.h
+++ b/src/parquet/util/test-common.h
@@ -32,7 +32,7 @@ namespace parquet {
namespace test {
typedef ::testing::Types<BooleanType, Int32Type, Int64Type, Int96Type, FloatType,
- DoubleType, ByteArrayType, FLBAType>
+ DoubleType, ByteArrayType, FLBAType>
ParquetTypes;
template <typename T>
@@ -46,7 +46,9 @@ static inline void assert_vector_equal(const vector<T>& left, const vector<T>& r
template <typename T>
static inline bool vector_equal(const vector<T>& left, const vector<T>& right) {
- if (left.size() != right.size()) { return false; }
+ if (left.size() != right.size()) {
+ return false;
+ }
for (size_t i = 0; i < left.size(); ++i) {
if (left[i] != right[i]) {
@@ -61,7 +63,9 @@ static inline bool vector_equal(const vector<T>& left, const vector<T>& right) {
template <typename T>
static vector<T> slice(const vector<T>& values, int start, int end) {
- if (end < start) { return vector<T>(0); }
+ if (end < start) {
+ return vector<T>(0);
+ }
vector<T> out(end - start);
for (int i = start; i < end; ++i) {
@@ -130,8 +134,8 @@ void random_numbers(int n, uint32_t seed, float min_value, float max_value, floa
}
template <>
-void random_numbers(
- int n, uint32_t seed, double min_value, double max_value, double* out) {
+void random_numbers(int n, uint32_t seed, double min_value, double max_value,
+ double* out) {
std::mt19937 gen(seed);
std::uniform_real_distribution<double> d(min_value, max_value);
for (int i = 0; i < n; ++i) {
@@ -139,8 +143,8 @@ void random_numbers(
}
}
-void random_Int96_numbers(
- int n, uint32_t seed, int32_t min_value, int32_t max_value, Int96* out) {
+void random_Int96_numbers(int n, uint32_t seed, int32_t min_value, int32_t max_value,
+ Int96* out) {
std::mt19937 gen(seed);
std::uniform_int_distribution<int32_t> d(min_value, max_value);
for (int i = 0; i < n; ++i) {
@@ -162,8 +166,8 @@ void random_fixed_byte_array(int n, uint32_t seed, uint8_t* buf, int len, FLBA*
}
}
-void random_byte_array(
- int n, uint32_t seed, uint8_t* buf, ByteArray* out, int min_size, int max_size) {
+void random_byte_array(int n, uint32_t seed, uint8_t* buf, ByteArray* out, int min_size,
+ int max_size) {
std::mt19937 gen(seed);
std::uniform_int_distribution<int> d1(min_size, max_size);
std::uniform_int_distribution<int> d2(0, 255);
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/tools/parquet-scan.cc
----------------------------------------------------------------------
diff --git a/tools/parquet-scan.cc b/tools/parquet-scan.cc
index 8ab15a4..5bf2b18 100644
--- a/tools/parquet-scan.cc
+++ b/tools/parquet-scan.cc
@@ -49,7 +49,9 @@ int main(int argc, char** argv) {
}
} else if ((param = std::strstr(argv[i], BATCH_SIZE_PREFIX.c_str()))) {
value = std::strtok(param + BATCH_SIZE_PREFIX.length(), " ");
- if (value) { batch_size = std::atoi(value); }
+ if (value) {
+ batch_size = std::atoi(value);
+ }
} else {
filename = argv[i];
}
@@ -84,8 +86,9 @@ int main(int argc, char** argv) {
int64_t values_read = 0;
while (col_reader->HasNext()) {
- total_rows[col] += ScanAllValues(batch_size, def_levels.data(),
- rep_levels.data(), values.data(), &values_read, col_reader.get());
+ total_rows[col] +=
+ ScanAllValues(batch_size, def_levels.data(), rep_levels.data(),
+ values.data(), &values_read, col_reader.get());
}
col++;
}
[3/5] parquet-cpp git commit: PARQUET-1068: Modify .clang-format to
use straight Google format with 90-character line width
Posted by we...@apache.org.
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/column_page.h
----------------------------------------------------------------------
diff --git a/src/parquet/column_page.h b/src/parquet/column_page.h
index 7840612..85e3bb5 100644
--- a/src/parquet/column_page.h
+++ b/src/parquet/column_page.h
@@ -62,9 +62,9 @@ class Page {
class DataPage : public Page {
public:
DataPage(const std::shared_ptr<Buffer>& buffer, int32_t num_values,
- Encoding::type encoding, Encoding::type definition_level_encoding,
- Encoding::type repetition_level_encoding,
- const EncodedStatistics& statistics = EncodedStatistics())
+ Encoding::type encoding, Encoding::type definition_level_encoding,
+ Encoding::type repetition_level_encoding,
+ const EncodedStatistics& statistics = EncodedStatistics())
: Page(buffer, PageType::DATA_PAGE),
num_values_(num_values),
encoding_(encoding),
@@ -93,11 +93,11 @@ class DataPage : public Page {
class CompressedDataPage : public DataPage {
public:
CompressedDataPage(const std::shared_ptr<Buffer>& buffer, int32_t num_values,
- Encoding::type encoding, Encoding::type definition_level_encoding,
- Encoding::type repetition_level_encoding, int64_t uncompressed_size,
- const EncodedStatistics& statistics = EncodedStatistics())
+ Encoding::type encoding, Encoding::type definition_level_encoding,
+ Encoding::type repetition_level_encoding, int64_t uncompressed_size,
+ const EncodedStatistics& statistics = EncodedStatistics())
: DataPage(buffer, num_values, encoding, definition_level_encoding,
- repetition_level_encoding, statistics),
+ repetition_level_encoding, statistics),
uncompressed_size_(uncompressed_size) {}
int64_t uncompressed_size() const { return uncompressed_size_; }
@@ -109,8 +109,9 @@ class CompressedDataPage : public DataPage {
class DataPageV2 : public Page {
public:
DataPageV2(const std::shared_ptr<Buffer>& buffer, int32_t num_values, int32_t num_nulls,
- int32_t num_rows, Encoding::type encoding, int32_t definition_levels_byte_length,
- int32_t repetition_levels_byte_length, bool is_compressed = false)
+ int32_t num_rows, Encoding::type encoding,
+ int32_t definition_levels_byte_length, int32_t repetition_levels_byte_length,
+ bool is_compressed = false)
: Page(buffer, PageType::DATA_PAGE_V2),
num_values_(num_values),
num_nulls_(num_nulls),
@@ -149,7 +150,7 @@ class DataPageV2 : public Page {
class DictionaryPage : public Page {
public:
DictionaryPage(const std::shared_ptr<Buffer>& buffer, int32_t num_values,
- Encoding::type encoding, bool is_sorted = false)
+ Encoding::type encoding, bool is_sorted = false)
: Page(buffer, PageType::DICTIONARY_PAGE),
num_values_(num_values),
encoding_(encoding),
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/column_reader-test.cc
----------------------------------------------------------------------
diff --git a/src/parquet/column_reader-test.cc b/src/parquet/column_reader-test.cc
index 84d1e37..2599325 100644
--- a/src/parquet/column_reader-test.cc
+++ b/src/parquet/column_reader-test.cc
@@ -44,8 +44,10 @@ namespace test {
template <typename T>
static inline bool vector_equal_with_def_levels(const vector<T>& left,
- const vector<int16_t>& def_levels, int16_t max_def_levels, int16_t max_rep_levels,
- const vector<T>& right) {
+ const vector<int16_t>& def_levels,
+ int16_t max_def_levels,
+ int16_t max_rep_levels,
+ const vector<T>& right) {
size_t i_left = 0;
size_t i_right = 0;
for (size_t i = 0; i < def_levels.size(); i++) {
@@ -63,7 +65,9 @@ static inline bool vector_equal_with_def_levels(const vector<T>& left,
i_right++;
} else if (def_levels[i] < (max_def_levels - 1)) {
// Null entry on a higher nesting level, only supported for non-repeating data
- if (max_rep_levels == 0) { i_right++; }
+ if (max_rep_levels == 0) {
+ i_right++;
+ }
}
}
@@ -93,8 +97,9 @@ class TestPrimitiveReader : public ::testing::Test {
// 1) batch_size < page_size (multiple ReadBatch from a single page)
// 2) batch_size > page_size (BatchRead limits to a single page)
do {
- batch = static_cast<int>(reader->ReadBatch(batch_size, &dresult[0] + batch_actual,
- &rresult[0] + batch_actual, &vresult[0] + total_values_read, &values_read));
+ batch = static_cast<int>(reader->ReadBatch(
+ batch_size, &dresult[0] + batch_actual, &rresult[0] + batch_actual,
+ &vresult[0] + total_values_read, &values_read));
total_values_read += static_cast<int>(values_read);
batch_actual += batch;
batch_size = std::max(batch_size * 2, 4096);
@@ -103,8 +108,12 @@ class TestPrimitiveReader : public ::testing::Test {
ASSERT_EQ(num_levels_, batch_actual);
ASSERT_EQ(num_values_, total_values_read);
ASSERT_TRUE(vector_equal(values_, vresult));
- if (max_def_level_ > 0) { ASSERT_TRUE(vector_equal(def_levels_, dresult)); }
- if (max_rep_level_ > 0) { ASSERT_TRUE(vector_equal(rep_levels_, rresult)); }
+ if (max_def_level_ > 0) {
+ ASSERT_TRUE(vector_equal(def_levels_, dresult));
+ }
+ if (max_rep_level_ > 0) {
+ ASSERT_TRUE(vector_equal(rep_levels_, rresult));
+ }
// catch improper writes at EOS
batch_actual =
static_cast<int>(reader->ReadBatch(5, nullptr, nullptr, nullptr, &values_read));
@@ -131,8 +140,8 @@ class TestPrimitiveReader : public ::testing::Test {
// 1) batch_size < page_size (multiple ReadBatch from a single page)
// 2) batch_size > page_size (BatchRead limits to a single page)
do {
- batch = static_cast<int>(reader->ReadBatchSpaced(batch_size,
- dresult.data() + levels_actual, rresult.data() + levels_actual,
+ batch = static_cast<int>(reader->ReadBatchSpaced(
+ batch_size, dresult.data() + levels_actual, rresult.data() + levels_actual,
vresult.data() + batch_actual, valid_bits.data() + batch_actual, 0,
&levels_read, &values_read, &null_count));
total_values_read += batch - static_cast<int>(null_count);
@@ -145,15 +154,18 @@ class TestPrimitiveReader : public ::testing::Test {
ASSERT_EQ(num_values_, total_values_read);
if (max_def_level_ > 0) {
ASSERT_TRUE(vector_equal(def_levels_, dresult));
- ASSERT_TRUE(vector_equal_with_def_levels(
- values_, dresult, max_def_level_, max_rep_level_, vresult));
+ ASSERT_TRUE(vector_equal_with_def_levels(values_, dresult, max_def_level_,
+ max_rep_level_, vresult));
} else {
ASSERT_TRUE(vector_equal(values_, vresult));
}
- if (max_rep_level_ > 0) { ASSERT_TRUE(vector_equal(rep_levels_, rresult)); }
+ if (max_rep_level_ > 0) {
+ ASSERT_TRUE(vector_equal(rep_levels_, rresult));
+ }
// catch improper writes at EOS
- batch_actual = static_cast<int>(reader->ReadBatchSpaced(5, nullptr, nullptr, nullptr,
- valid_bits.data(), 0, &levels_read, &values_read, &null_count));
+ batch_actual = static_cast<int>(
+ reader->ReadBatchSpaced(5, nullptr, nullptr, nullptr, valid_bits.data(), 0,
+ &levels_read, &values_read, &null_count));
ASSERT_EQ(0, batch_actual);
ASSERT_EQ(0, null_count);
}
@@ -167,15 +179,17 @@ class TestPrimitiveReader : public ::testing::Test {
}
void ExecutePlain(int num_pages, int levels_per_page, const ColumnDescriptor* d) {
- num_values_ = MakePages<Int32Type>(d, num_pages, levels_per_page, def_levels_,
- rep_levels_, values_, data_buffer_, pages_, Encoding::PLAIN);
+ num_values_ =
+ MakePages<Int32Type>(d, num_pages, levels_per_page, def_levels_, rep_levels_,
+ values_, data_buffer_, pages_, Encoding::PLAIN);
num_levels_ = num_pages * levels_per_page;
InitReader(d);
CheckResults();
Clear();
- num_values_ = MakePages<Int32Type>(d, num_pages, levels_per_page, def_levels_,
- rep_levels_, values_, data_buffer_, pages_, Encoding::PLAIN);
+ num_values_ =
+ MakePages<Int32Type>(d, num_pages, levels_per_page, def_levels_, rep_levels_,
+ values_, data_buffer_, pages_, Encoding::PLAIN);
num_levels_ = num_pages * levels_per_page;
InitReader(d);
CheckResultsSpaced();
@@ -183,15 +197,17 @@ class TestPrimitiveReader : public ::testing::Test {
}
void ExecuteDict(int num_pages, int levels_per_page, const ColumnDescriptor* d) {
- num_values_ = MakePages<Int32Type>(d, num_pages, levels_per_page, def_levels_,
- rep_levels_, values_, data_buffer_, pages_, Encoding::RLE_DICTIONARY);
+ num_values_ =
+ MakePages<Int32Type>(d, num_pages, levels_per_page, def_levels_, rep_levels_,
+ values_, data_buffer_, pages_, Encoding::RLE_DICTIONARY);
num_levels_ = num_pages * levels_per_page;
InitReader(d);
CheckResults();
Clear();
- num_values_ = MakePages<Int32Type>(d, num_pages, levels_per_page, def_levels_,
- rep_levels_, values_, data_buffer_, pages_, Encoding::RLE_DICTIONARY);
+ num_values_ =
+ MakePages<Int32Type>(d, num_pages, levels_per_page, def_levels_, rep_levels_,
+ values_, data_buffer_, pages_, Encoding::RLE_DICTIONARY);
num_levels_ = num_pages * levels_per_page;
InitReader(d);
CheckResultsSpaced();
@@ -252,7 +268,7 @@ TEST_F(TestPrimitiveReader, TestInt32FlatRequiredSkip) {
NodePtr type = schema::Int32("b", Repetition::REQUIRED);
const ColumnDescriptor descr(type, max_def_level_, max_rep_level_);
MakePages<Int32Type>(&descr, num_pages, levels_per_page, def_levels_, rep_levels_,
- values_, data_buffer_, pages_, Encoding::PLAIN);
+ values_, data_buffer_, pages_, Encoding::PLAIN);
InitReader(&descr);
vector<int32_t> vresult(levels_per_page / 2, -1);
vector<int16_t> dresult(levels_per_page / 2, -1);
@@ -266,9 +282,10 @@ TEST_F(TestPrimitiveReader, TestInt32FlatRequiredSkip) {
int64_t levels_skipped = reader->Skip(2 * levels_per_page);
ASSERT_EQ(2 * levels_per_page, levels_skipped);
// Read half a page
- reader->ReadBatch(
- levels_per_page / 2, dresult.data(), rresult.data(), vresult.data(), &values_read);
- vector<int32_t> sub_values(values_.begin() + 2 * levels_per_page,
+ reader->ReadBatch(levels_per_page / 2, dresult.data(), rresult.data(), vresult.data(),
+ &values_read);
+ vector<int32_t> sub_values(
+ values_.begin() + 2 * levels_per_page,
values_.begin() + static_cast<int>(2.5 * static_cast<double>(levels_per_page)));
ASSERT_TRUE(vector_equal(sub_values, vresult));
@@ -276,10 +293,11 @@ TEST_F(TestPrimitiveReader, TestInt32FlatRequiredSkip) {
levels_skipped = reader->Skip(levels_per_page);
ASSERT_EQ(levels_per_page, levels_skipped);
// Read half a page
- reader->ReadBatch(
- levels_per_page / 2, dresult.data(), rresult.data(), vresult.data(), &values_read);
+ reader->ReadBatch(levels_per_page / 2, dresult.data(), rresult.data(), vresult.data(),
+ &values_read);
sub_values.clear();
- sub_values.insert(sub_values.end(),
+ sub_values.insert(
+ sub_values.end(),
values_.begin() + static_cast<int>(3.5 * static_cast<double>(levels_per_page)),
values_.begin() + 4 * levels_per_page);
ASSERT_TRUE(vector_equal(sub_values, vresult));
@@ -289,10 +307,11 @@ TEST_F(TestPrimitiveReader, TestInt32FlatRequiredSkip) {
levels_skipped = reader->Skip(levels_per_page / 2);
ASSERT_EQ(0.5 * levels_per_page, levels_skipped);
// Read half a page
- reader->ReadBatch(
- levels_per_page / 2, dresult.data(), rresult.data(), vresult.data(), &values_read);
+ reader->ReadBatch(levels_per_page / 2, dresult.data(), rresult.data(), vresult.data(),
+ &values_read);
sub_values.clear();
- sub_values.insert(sub_values.end(),
+ sub_values.insert(
+ sub_values.end(),
values_.begin() + static_cast<int>(4.5 * static_cast<double>(levels_per_page)),
values_.end());
ASSERT_TRUE(vector_equal(sub_values, vresult));
@@ -323,8 +342,8 @@ TEST_F(TestPrimitiveReader, TestDictionaryEncodedPages) {
pages_.clear();
dict_page = std::make_shared<DictionaryPage>(dummy, 0, Encoding::PLAIN_DICTIONARY);
- data_page = MakeDataPage<Int32Type>(
- &descr, {}, 0, Encoding::PLAIN_DICTIONARY, {}, 0, {}, 0, {}, 0);
+ data_page = MakeDataPage<Int32Type>(&descr, {}, 0, Encoding::PLAIN_DICTIONARY, {}, 0,
+ {}, 0, {}, 0);
pages_.push_back(dict_page);
pages_.push_back(data_page);
InitReader(&descr);
@@ -332,8 +351,8 @@ TEST_F(TestPrimitiveReader, TestDictionaryEncodedPages) {
ASSERT_NO_THROW(reader_->HasNext());
pages_.clear();
- data_page = MakeDataPage<Int32Type>(
- &descr, {}, 0, Encoding::RLE_DICTIONARY, {}, 0, {}, 0, {}, 0);
+ data_page = MakeDataPage<Int32Type>(&descr, {}, 0, Encoding::RLE_DICTIONARY, {}, 0, {},
+ 0, {}, 0);
pages_.push_back(data_page);
InitReader(&descr);
// Tests dictionary page must occur before data page
@@ -358,8 +377,8 @@ TEST_F(TestPrimitiveReader, TestDictionaryEncodedPages) {
ASSERT_THROW(reader_->HasNext(), ParquetException);
pages_.clear();
- data_page = MakeDataPage<Int32Type>(
- &descr, {}, 0, Encoding::DELTA_BYTE_ARRAY, {}, 0, {}, 0, {}, 0);
+ data_page = MakeDataPage<Int32Type>(&descr, {}, 0, Encoding::DELTA_BYTE_ARRAY, {}, 0,
+ {}, 0, {}, 0);
pages_.push_back(data_page);
InitReader(&descr);
// unsupported encoding
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/column_reader.cc
----------------------------------------------------------------------
diff --git a/src/parquet/column_reader.cc b/src/parquet/column_reader.cc
index ce6936d..5f6259f 100644
--- a/src/parquet/column_reader.cc
+++ b/src/parquet/column_reader.cc
@@ -36,7 +36,7 @@ LevelDecoder::LevelDecoder() : num_values_remaining_(0) {}
LevelDecoder::~LevelDecoder() {}
int LevelDecoder::SetData(Encoding::type encoding, int16_t max_level,
- int num_buffered_values, const uint8_t* data) {
+ int num_buffered_values, const uint8_t* data) {
int32_t num_bytes = 0;
encoding_ = encoding;
num_values_remaining_ = num_buffered_values;
@@ -86,8 +86,8 @@ ReaderProperties default_reader_properties() {
return default_reader_properties;
}
-ColumnReader::ColumnReader(
- const ColumnDescriptor* descr, std::unique_ptr<PageReader> pager, MemoryPool* pool)
+ColumnReader::ColumnReader(const ColumnDescriptor* descr,
+ std::unique_ptr<PageReader> pager, MemoryPool* pool)
: descr_(descr),
pager_(std::move(pager)),
num_buffered_values_(0),
@@ -193,7 +193,9 @@ bool TypedColumnReader<DType>::ReadNewPage() {
// first page with this encoding.
Encoding::type encoding = page->encoding();
- if (IsDictionaryIndexEncoding(encoding)) { encoding = Encoding::RLE_DICTIONARY; }
+ if (IsDictionaryIndexEncoding(encoding)) {
+ encoding = Encoding::RLE_DICTIONARY;
+ }
auto it = decoders_.find(static_cast<int>(encoding));
if (it != decoders_.end()) {
@@ -221,8 +223,8 @@ bool TypedColumnReader<DType>::ReadNewPage() {
throw ParquetException("Unknown encoding type.");
}
}
- current_decoder_->SetData(
- static_cast<int>(num_buffered_values_), buffer, static_cast<int>(data_size));
+ current_decoder_->SetData(static_cast<int>(num_buffered_values_), buffer,
+ static_cast<int>(data_size));
return true;
} else {
// We don't know what this page type is. We're allowed to skip non-data
@@ -237,20 +239,25 @@ bool TypedColumnReader<DType>::ReadNewPage() {
// Batch read APIs
int64_t ColumnReader::ReadDefinitionLevels(int64_t batch_size, int16_t* levels) {
- if (descr_->max_definition_level() == 0) { return 0; }
+ if (descr_->max_definition_level() == 0) {
+ return 0;
+ }
return definition_level_decoder_.Decode(static_cast<int>(batch_size), levels);
}
int64_t ColumnReader::ReadRepetitionLevels(int64_t batch_size, int16_t* levels) {
- if (descr_->max_repetition_level() == 0) { return 0; }
+ if (descr_->max_repetition_level() == 0) {
+ return 0;
+ }
return repetition_level_decoder_.Decode(static_cast<int>(batch_size), levels);
}
// ----------------------------------------------------------------------
// Dynamic column reader constructor
-std::shared_ptr<ColumnReader> ColumnReader::Make(
- const ColumnDescriptor* descr, std::unique_ptr<PageReader> pager, MemoryPool* pool) {
+std::shared_ptr<ColumnReader> ColumnReader::Make(const ColumnDescriptor* descr,
+ std::unique_ptr<PageReader> pager,
+ MemoryPool* pool) {
switch (descr->physical_type()) {
case Type::BOOLEAN:
return std::make_shared<BoolReader>(descr, std::move(pager), pool);
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/column_reader.h
----------------------------------------------------------------------
diff --git a/src/parquet/column_reader.h b/src/parquet/column_reader.h
index e733d67..df7deb8 100644
--- a/src/parquet/column_reader.h
+++ b/src/parquet/column_reader.h
@@ -53,7 +53,7 @@ class PARQUET_EXPORT LevelDecoder {
// Initialize the LevelDecoder state with new data
// and return the number of bytes consumed
int SetData(Encoding::type encoding, int16_t max_level, int num_buffered_values,
- const uint8_t* data);
+ const uint8_t* data);
// Decodes a batch of levels into an array and returns the number of levels decoded
int Decode(int batch_size, int16_t* levels);
@@ -69,11 +69,11 @@ class PARQUET_EXPORT LevelDecoder {
class PARQUET_EXPORT ColumnReader {
public:
ColumnReader(const ColumnDescriptor*, std::unique_ptr<PageReader>,
- ::arrow::MemoryPool* pool = ::arrow::default_memory_pool());
+ ::arrow::MemoryPool* pool = ::arrow::default_memory_pool());
virtual ~ColumnReader();
- static std::shared_ptr<ColumnReader> Make(const ColumnDescriptor* descr,
- std::unique_ptr<PageReader> pager,
+ static std::shared_ptr<ColumnReader> Make(
+ const ColumnDescriptor* descr, std::unique_ptr<PageReader> pager,
::arrow::MemoryPool* pool = ::arrow::default_memory_pool());
// Returns true if there are still values in this column.
@@ -81,7 +81,9 @@ class PARQUET_EXPORT ColumnReader {
// Either there is no data page available yet, or the data page has been
// exhausted
if (num_buffered_values_ == 0 || num_decoded_values_ == num_buffered_values_) {
- if (!ReadNewPage() || num_buffered_values_ == 0) { return false; }
+ if (!ReadNewPage() || num_buffered_values_ == 0) {
+ return false;
+ }
}
return true;
}
@@ -135,7 +137,7 @@ class PARQUET_EXPORT TypedColumnReader : public ColumnReader {
typedef typename DType::c_type T;
TypedColumnReader(const ColumnDescriptor* schema, std::unique_ptr<PageReader> pager,
- ::arrow::MemoryPool* pool = ::arrow::default_memory_pool())
+ ::arrow::MemoryPool* pool = ::arrow::default_memory_pool())
: ColumnReader(schema, std::move(pager), pool), current_decoder_(NULL) {}
virtual ~TypedColumnReader() {}
@@ -157,7 +159,7 @@ class PARQUET_EXPORT TypedColumnReader : public ColumnReader {
//
// @returns: actual number of levels read (see values_read for number of values read)
int64_t ReadBatch(int64_t batch_size, int16_t* def_levels, int16_t* rep_levels,
- T* values, int64_t* values_read);
+ T* values, int64_t* values_read);
/// Read a batch of repetition levels, definition levels, and values from the
/// column and leave spaces for null entries on the lowest level in the values
@@ -194,8 +196,9 @@ class PARQUET_EXPORT TypedColumnReader : public ColumnReader {
/// @param[out] null_count The number of nulls on the lowest levels.
/// (i.e. (values_read - null_count) is total number of non-null entries)
int64_t ReadBatchSpaced(int64_t batch_size, int16_t* def_levels, int16_t* rep_levels,
- T* values, uint8_t* valid_bits, int64_t valid_bits_offset, int64_t* levels_read,
- int64_t* values_read, int64_t* null_count);
+ T* values, uint8_t* valid_bits, int64_t valid_bits_offset,
+ int64_t* levels_read, int64_t* values_read,
+ int64_t* null_count);
// Skip reading levels
// Returns the number of levels skipped
@@ -219,7 +222,7 @@ class PARQUET_EXPORT TypedColumnReader : public ColumnReader {
//
// @returns: the number of values read into the out buffer
int64_t ReadValuesSpaced(int64_t batch_size, T* out, int null_count,
- uint8_t* valid_bits, int64_t valid_bits_offset);
+ uint8_t* valid_bits, int64_t valid_bits_offset);
// Map of encoding type to the respective decoder object. For example, a
// column chunk's data pages may include both dictionary-encoded and
@@ -239,14 +242,18 @@ inline int64_t TypedColumnReader<DType>::ReadValues(int64_t batch_size, T* out)
template <typename DType>
inline int64_t TypedColumnReader<DType>::ReadValuesSpaced(int64_t batch_size, T* out,
- int null_count, uint8_t* valid_bits, int64_t valid_bits_offset) {
- return current_decoder_->DecodeSpaced(
- out, static_cast<int>(batch_size), null_count, valid_bits, valid_bits_offset);
+ int null_count,
+ uint8_t* valid_bits,
+ int64_t valid_bits_offset) {
+ return current_decoder_->DecodeSpaced(out, static_cast<int>(batch_size), null_count,
+ valid_bits, valid_bits_offset);
}
template <typename DType>
inline int64_t TypedColumnReader<DType>::ReadBatch(int64_t batch_size,
- int16_t* def_levels, int16_t* rep_levels, T* values, int64_t* values_read) {
+ int16_t* def_levels,
+ int16_t* rep_levels, T* values,
+ int64_t* values_read) {
// HasNext invokes ReadNewPage
if (!HasNext()) {
*values_read = 0;
@@ -268,7 +275,9 @@ inline int64_t TypedColumnReader<DType>::ReadBatch(int64_t batch_size,
// TODO(wesm): this tallying of values-to-decode can be performed with better
// cache-efficiency if fused with the level decoding.
for (int64_t i = 0; i < num_def_levels; ++i) {
- if (def_levels[i] == descr_->max_definition_level()) { ++values_to_read; }
+ if (def_levels[i] == descr_->max_definition_level()) {
+ ++values_to_read;
+ }
}
} else {
// Required field, read all values
@@ -291,8 +300,10 @@ inline int64_t TypedColumnReader<DType>::ReadBatch(int64_t batch_size,
}
inline void DefinitionLevelsToBitmap(const int16_t* def_levels, int64_t num_def_levels,
- int16_t max_definition_level, int16_t max_repetition_level, int64_t* values_read,
- int64_t* null_count, uint8_t* valid_bits, int64_t valid_bits_offset) {
+ int16_t max_definition_level,
+ int16_t max_repetition_level, int64_t* values_read,
+ int64_t* null_count, uint8_t* valid_bits,
+ int64_t valid_bits_offset) {
int byte_offset = static_cast<int>(valid_bits_offset) / 8;
int bit_offset = static_cast<int>(valid_bits_offset) % 8;
uint8_t bitset = valid_bits[byte_offset];
@@ -330,15 +341,17 @@ inline void DefinitionLevelsToBitmap(const int16_t* def_levels, int64_t num_def_
bitset = valid_bits[byte_offset];
}
}
- if (bit_offset != 0) { valid_bits[byte_offset] = bitset; }
+ if (bit_offset != 0) {
+ valid_bits[byte_offset] = bitset;
+ }
*values_read = (bit_offset + byte_offset * 8 - valid_bits_offset);
}
template <typename DType>
-inline int64_t TypedColumnReader<DType>::ReadBatchSpaced(int64_t batch_size,
- int16_t* def_levels, int16_t* rep_levels, T* values, uint8_t* valid_bits,
- int64_t valid_bits_offset, int64_t* levels_read, int64_t* values_read,
- int64_t* null_count_out) {
+inline int64_t TypedColumnReader<DType>::ReadBatchSpaced(
+ int64_t batch_size, int16_t* def_levels, int16_t* rep_levels, T* values,
+ uint8_t* valid_bits, int64_t valid_bits_offset, int64_t* levels_read,
+ int64_t* values_read, int64_t* null_count_out) {
// HasNext invokes ReadNewPage
if (!HasNext()) {
*levels_read = 0;
@@ -388,7 +401,9 @@ inline int64_t TypedColumnReader<DType>::ReadBatchSpaced(int64_t batch_size,
if (!has_spaced_values) {
int values_to_read = 0;
for (int64_t i = 0; i < num_def_levels; ++i) {
- if (def_levels[i] == descr_->max_definition_level()) { ++values_to_read; }
+ if (def_levels[i] == descr_->max_definition_level()) {
+ ++values_to_read;
+ }
}
total_values = ReadValues(values_to_read, values);
for (int64_t i = 0; i < total_values; i++) {
@@ -399,9 +414,10 @@ inline int64_t TypedColumnReader<DType>::ReadBatchSpaced(int64_t batch_size,
int16_t max_definition_level = descr_->max_definition_level();
int16_t max_repetition_level = descr_->max_repetition_level();
DefinitionLevelsToBitmap(def_levels, num_def_levels, max_definition_level,
- max_repetition_level, values_read, &null_count, valid_bits, valid_bits_offset);
+ max_repetition_level, values_read, &null_count, valid_bits,
+ valid_bits_offset);
total_values = ReadValuesSpaced(*values_read, values, static_cast<int>(null_count),
- valid_bits, valid_bits_offset);
+ valid_bits, valid_bits_offset);
}
*levels_read = num_def_levels;
*null_count_out = null_count;
@@ -446,9 +462,9 @@ inline int64_t TypedColumnReader<DType>::Skip(int64_t num_rows_to_skip) {
do {
batch_size = std::min(batch_size, rows_to_skip);
values_read = ReadBatch(static_cast<int>(batch_size),
- reinterpret_cast<int16_t*>(def_levels->mutable_data()),
- reinterpret_cast<int16_t*>(rep_levels->mutable_data()),
- reinterpret_cast<T*>(vals->mutable_data()), &values_read);
+ reinterpret_cast<int16_t*>(def_levels->mutable_data()),
+ reinterpret_cast<int16_t*>(rep_levels->mutable_data()),
+ reinterpret_cast<T*>(vals->mutable_data()), &values_read);
rows_to_skip -= values_read;
} while (values_read > 0 && rows_to_skip > 0);
}
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/column_scanner-test.cc
----------------------------------------------------------------------
diff --git a/src/parquet/column_scanner-test.cc b/src/parquet/column_scanner-test.cc
index 086722b..0cebdc0 100644
--- a/src/parquet/column_scanner-test.cc
+++ b/src/parquet/column_scanner-test.cc
@@ -43,8 +43,8 @@ using schema::NodePtr;
namespace test {
template <>
-void InitDictValues<bool>(
- int num_values, int dict_per_page, vector<bool>& values, vector<uint8_t>& buffer) {
+void InitDictValues<bool>(int num_values, int dict_per_page, vector<bool>& values,
+ vector<uint8_t>& buffer) {
// No op for bool
}
@@ -91,9 +91,9 @@ class TestFlatScanner : public ::testing::Test {
}
void Execute(int num_pages, int levels_per_page, int batch_size,
- const ColumnDescriptor* d, Encoding::type encoding) {
+ const ColumnDescriptor* d, Encoding::type encoding) {
num_values_ = MakePages<Type>(d, num_pages, levels_per_page, def_levels_, rep_levels_,
- values_, data_buffer_, pages_, encoding);
+ values_, data_buffer_, pages_, encoding);
num_levels_ = num_pages * levels_per_page;
InitScanner(d);
CheckResults(batch_size, d);
@@ -101,22 +101,22 @@ class TestFlatScanner : public ::testing::Test {
}
void InitDescriptors(std::shared_ptr<ColumnDescriptor>& d1,
- std::shared_ptr<ColumnDescriptor>& d2, std::shared_ptr<ColumnDescriptor>& d3,
- int length) {
+ std::shared_ptr<ColumnDescriptor>& d2,
+ std::shared_ptr<ColumnDescriptor>& d3, int length) {
NodePtr type;
- type = schema::PrimitiveNode::Make(
- "c1", Repetition::REQUIRED, Type::type_num, LogicalType::NONE, length);
+ type = schema::PrimitiveNode::Make("c1", Repetition::REQUIRED, Type::type_num,
+ LogicalType::NONE, length);
d1.reset(new ColumnDescriptor(type, 0, 0));
- type = schema::PrimitiveNode::Make(
- "c2", Repetition::OPTIONAL, Type::type_num, LogicalType::NONE, length);
+ type = schema::PrimitiveNode::Make("c2", Repetition::OPTIONAL, Type::type_num,
+ LogicalType::NONE, length);
d2.reset(new ColumnDescriptor(type, 4, 0));
- type = schema::PrimitiveNode::Make(
- "c3", Repetition::REPEATED, Type::type_num, LogicalType::NONE, length);
+ type = schema::PrimitiveNode::Make("c3", Repetition::REPEATED, Type::type_num,
+ LogicalType::NONE, length);
d3.reset(new ColumnDescriptor(type, 4, 2));
}
void ExecuteAll(int num_pages, int num_levels, int batch_size, int type_length,
- Encoding::type encoding = Encoding::PLAIN) {
+ Encoding::type encoding = Encoding::PLAIN) {
std::shared_ptr<ColumnDescriptor> d1;
std::shared_ptr<ColumnDescriptor> d2;
std::shared_ptr<ColumnDescriptor> d3;
@@ -145,7 +145,7 @@ static int num_pages = 20;
static int batch_size = 32;
typedef ::testing::Types<Int32Type, Int64Type, Int96Type, FloatType, DoubleType,
- ByteArrayType>
+ ByteArrayType>
TestTypes;
using TestBooleanFlatScanner = TestFlatScanner<BooleanType>;
@@ -158,8 +158,8 @@ TYPED_TEST(TestFlatScanner, TestPlainScanner) {
}
TYPED_TEST(TestFlatScanner, TestDictScanner) {
- this->ExecuteAll(
- num_pages, num_levels_per_page, batch_size, 0, Encoding::RLE_DICTIONARY);
+ this->ExecuteAll(num_pages, num_levels_per_page, batch_size, 0,
+ Encoding::RLE_DICTIONARY);
}
TEST_F(TestBooleanFlatScanner, TestPlainScanner) {
@@ -171,33 +171,35 @@ TEST_F(TestFLBAFlatScanner, TestPlainScanner) {
}
TEST_F(TestFLBAFlatScanner, TestDictScanner) {
- this->ExecuteAll(
- num_pages, num_levels_per_page, batch_size, FLBA_LENGTH, Encoding::RLE_DICTIONARY);
+ this->ExecuteAll(num_pages, num_levels_per_page, batch_size, FLBA_LENGTH,
+ Encoding::RLE_DICTIONARY);
}
TEST_F(TestFLBAFlatScanner, TestPlainDictScanner) {
this->ExecuteAll(num_pages, num_levels_per_page, batch_size, FLBA_LENGTH,
- Encoding::PLAIN_DICTIONARY);
+ Encoding::PLAIN_DICTIONARY);
}
// PARQUET 502
TEST_F(TestFLBAFlatScanner, TestSmallBatch) {
- NodePtr type = schema::PrimitiveNode::Make("c1", Repetition::REQUIRED,
- Type::FIXED_LEN_BYTE_ARRAY, LogicalType::DECIMAL, FLBA_LENGTH, 10, 2);
+ NodePtr type =
+ schema::PrimitiveNode::Make("c1", Repetition::REQUIRED, Type::FIXED_LEN_BYTE_ARRAY,
+ LogicalType::DECIMAL, FLBA_LENGTH, 10, 2);
const ColumnDescriptor d(type, 0, 0);
- num_values_ = MakePages<FLBAType>(
- &d, 1, 100, def_levels_, rep_levels_, values_, data_buffer_, pages_);
+ num_values_ = MakePages<FLBAType>(&d, 1, 100, def_levels_, rep_levels_, values_,
+ data_buffer_, pages_);
num_levels_ = 1 * 100;
InitScanner(&d);
CheckResults(1, &d);
}
TEST_F(TestFLBAFlatScanner, TestDescriptorAPI) {
- NodePtr type = schema::PrimitiveNode::Make("c1", Repetition::OPTIONAL,
- Type::FIXED_LEN_BYTE_ARRAY, LogicalType::DECIMAL, FLBA_LENGTH, 10, 2);
+ NodePtr type =
+ schema::PrimitiveNode::Make("c1", Repetition::OPTIONAL, Type::FIXED_LEN_BYTE_ARRAY,
+ LogicalType::DECIMAL, FLBA_LENGTH, 10, 2);
const ColumnDescriptor d(type, 4, 0);
- num_values_ = MakePages<FLBAType>(
- &d, 1, 100, def_levels_, rep_levels_, values_, data_buffer_, pages_);
+ num_values_ = MakePages<FLBAType>(&d, 1, 100, def_levels_, rep_levels_, values_,
+ data_buffer_, pages_);
num_levels_ = 1 * 100;
InitScanner(&d);
TypedScanner<FLBAType>* scanner =
@@ -208,11 +210,12 @@ TEST_F(TestFLBAFlatScanner, TestDescriptorAPI) {
}
TEST_F(TestFLBAFlatScanner, TestFLBAPrinterNext) {
- NodePtr type = schema::PrimitiveNode::Make("c1", Repetition::OPTIONAL,
- Type::FIXED_LEN_BYTE_ARRAY, LogicalType::DECIMAL, FLBA_LENGTH, 10, 2);
+ NodePtr type =
+ schema::PrimitiveNode::Make("c1", Repetition::OPTIONAL, Type::FIXED_LEN_BYTE_ARRAY,
+ LogicalType::DECIMAL, FLBA_LENGTH, 10, 2);
const ColumnDescriptor d(type, 4, 0);
- num_values_ = MakePages<FLBAType>(
- &d, 1, 100, def_levels_, rep_levels_, values_, data_buffer_, pages_);
+ num_values_ = MakePages<FLBAType>(&d, 1, 100, def_levels_, rep_levels_, values_,
+ data_buffer_, pages_);
num_levels_ = 1 * 100;
InitScanner(&d);
TypedScanner<FLBAType>* scanner =
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/column_scanner.cc
----------------------------------------------------------------------
diff --git a/src/parquet/column_scanner.cc b/src/parquet/column_scanner.cc
index a67af71..51c8773 100644
--- a/src/parquet/column_scanner.cc
+++ b/src/parquet/column_scanner.cc
@@ -27,8 +27,8 @@ using arrow::MemoryPool;
namespace parquet {
-std::shared_ptr<Scanner> Scanner::Make(
- std::shared_ptr<ColumnReader> col_reader, int64_t batch_size, MemoryPool* pool) {
+std::shared_ptr<Scanner> Scanner::Make(std::shared_ptr<ColumnReader> col_reader,
+ int64_t batch_size, MemoryPool* pool) {
switch (col_reader->type()) {
case Type::BOOLEAN:
return std::make_shared<BoolScanner>(col_reader, batch_size, pool);
@@ -54,32 +54,33 @@ std::shared_ptr<Scanner> Scanner::Make(
}
int64_t ScanAllValues(int32_t batch_size, int16_t* def_levels, int16_t* rep_levels,
- uint8_t* values, int64_t* values_buffered, parquet::ColumnReader* reader) {
+ uint8_t* values, int64_t* values_buffered,
+ parquet::ColumnReader* reader) {
switch (reader->type()) {
case parquet::Type::BOOLEAN:
- return ScanAll<parquet::BoolReader>(
- batch_size, def_levels, rep_levels, values, values_buffered, reader);
+ return ScanAll<parquet::BoolReader>(batch_size, def_levels, rep_levels, values,
+ values_buffered, reader);
case parquet::Type::INT32:
- return ScanAll<parquet::Int32Reader>(
- batch_size, def_levels, rep_levels, values, values_buffered, reader);
+ return ScanAll<parquet::Int32Reader>(batch_size, def_levels, rep_levels, values,
+ values_buffered, reader);
case parquet::Type::INT64:
- return ScanAll<parquet::Int64Reader>(
- batch_size, def_levels, rep_levels, values, values_buffered, reader);
+ return ScanAll<parquet::Int64Reader>(batch_size, def_levels, rep_levels, values,
+ values_buffered, reader);
case parquet::Type::INT96:
- return ScanAll<parquet::Int96Reader>(
- batch_size, def_levels, rep_levels, values, values_buffered, reader);
+ return ScanAll<parquet::Int96Reader>(batch_size, def_levels, rep_levels, values,
+ values_buffered, reader);
case parquet::Type::FLOAT:
- return ScanAll<parquet::FloatReader>(
- batch_size, def_levels, rep_levels, values, values_buffered, reader);
+ return ScanAll<parquet::FloatReader>(batch_size, def_levels, rep_levels, values,
+ values_buffered, reader);
case parquet::Type::DOUBLE:
- return ScanAll<parquet::DoubleReader>(
- batch_size, def_levels, rep_levels, values, values_buffered, reader);
+ return ScanAll<parquet::DoubleReader>(batch_size, def_levels, rep_levels, values,
+ values_buffered, reader);
case parquet::Type::BYTE_ARRAY:
- return ScanAll<parquet::ByteArrayReader>(
- batch_size, def_levels, rep_levels, values, values_buffered, reader);
+ return ScanAll<parquet::ByteArrayReader>(batch_size, def_levels, rep_levels, values,
+ values_buffered, reader);
case parquet::Type::FIXED_LEN_BYTE_ARRAY:
- return ScanAll<parquet::FixedLenByteArrayReader>(
- batch_size, def_levels, rep_levels, values, values_buffered, reader);
+ return ScanAll<parquet::FixedLenByteArrayReader>(batch_size, def_levels, rep_levels,
+ values, values_buffered, reader);
default:
parquet::ParquetException::NYI("type reader not implemented");
}
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/column_scanner.h
----------------------------------------------------------------------
diff --git a/src/parquet/column_scanner.h b/src/parquet/column_scanner.h
index 4be0b7f..2917201 100644
--- a/src/parquet/column_scanner.h
+++ b/src/parquet/column_scanner.h
@@ -18,10 +18,10 @@
#ifndef PARQUET_COLUMN_SCANNER_H
#define PARQUET_COLUMN_SCANNER_H
+#include <stdio.h>
#include <cstdint>
#include <memory>
#include <ostream>
-#include <stdio.h>
#include <string>
#include <vector>
@@ -39,8 +39,8 @@ static constexpr int64_t DEFAULT_SCANNER_BATCH_SIZE = 128;
class PARQUET_EXPORT Scanner {
public:
explicit Scanner(std::shared_ptr<ColumnReader> reader,
- int64_t batch_size = DEFAULT_SCANNER_BATCH_SIZE,
- ::arrow::MemoryPool* pool = ::arrow::default_memory_pool())
+ int64_t batch_size = DEFAULT_SCANNER_BATCH_SIZE,
+ ::arrow::MemoryPool* pool = ::arrow::default_memory_pool())
: batch_size_(batch_size),
level_offset_(0),
levels_buffered_(0),
@@ -54,7 +54,8 @@ class PARQUET_EXPORT Scanner {
virtual ~Scanner() {}
- static std::shared_ptr<Scanner> Make(std::shared_ptr<ColumnReader> col_reader,
+ static std::shared_ptr<Scanner> Make(
+ std::shared_ptr<ColumnReader> col_reader,
int64_t batch_size = DEFAULT_SCANNER_BATCH_SIZE,
::arrow::MemoryPool* pool = ::arrow::default_memory_pool());
@@ -90,8 +91,8 @@ class PARQUET_EXPORT TypedScanner : public Scanner {
typedef typename DType::c_type T;
explicit TypedScanner(std::shared_ptr<ColumnReader> reader,
- int64_t batch_size = DEFAULT_SCANNER_BATCH_SIZE,
- ::arrow::MemoryPool* pool = ::arrow::default_memory_pool())
+ int64_t batch_size = DEFAULT_SCANNER_BATCH_SIZE,
+ ::arrow::MemoryPool* pool = ::arrow::default_memory_pool())
: Scanner(reader, batch_size, pool) {
typed_reader_ = static_cast<TypedColumnReader<DType>*>(reader.get());
int value_byte_size = type_traits<DType::type_num>::value_byte_size;
@@ -103,13 +104,15 @@ class PARQUET_EXPORT TypedScanner : public Scanner {
bool NextLevels(int16_t* def_level, int16_t* rep_level) {
if (level_offset_ == levels_buffered_) {
- levels_buffered_ =
- static_cast<int>(typed_reader_->ReadBatch(static_cast<int>(batch_size_),
- def_levels_.data(), rep_levels_.data(), values_, &values_buffered_));
+ levels_buffered_ = static_cast<int>(
+ typed_reader_->ReadBatch(static_cast<int>(batch_size_), def_levels_.data(),
+ rep_levels_.data(), values_, &values_buffered_));
value_offset_ = 0;
level_offset_ = 0;
- if (!levels_buffered_) { return false; }
+ if (!levels_buffered_) {
+ return false;
+ }
}
*def_level = descr()->max_definition_level() > 0 ? def_levels_[level_offset_] : 0;
*rep_level = descr()->max_repetition_level() > 0 ? rep_levels_[level_offset_] : 0;
@@ -128,7 +131,9 @@ class PARQUET_EXPORT TypedScanner : public Scanner {
NextLevels(def_level, rep_level);
*is_null = *def_level < descr()->max_definition_level();
- if (*is_null) { return true; }
+ if (*is_null) {
+ return true;
+ }
if (value_offset_ == values_buffered_) {
throw ParquetException("Value was non-null, but has not been buffered");
@@ -152,7 +157,9 @@ class PARQUET_EXPORT TypedScanner : public Scanner {
NextLevels(&def_level, &rep_level);
*is_null = def_level < descr()->max_definition_level();
- if (*is_null) { return true; }
+ if (*is_null) {
+ return true;
+ }
if (value_offset_ == values_buffered_) {
throw ParquetException("Value was non-null, but has not been buffered");
@@ -166,7 +173,9 @@ class PARQUET_EXPORT TypedScanner : public Scanner {
bool is_null = false;
char buffer[25];
- if (!NextValue(&val, &is_null)) { throw ParquetException("No more values buffered"); }
+ if (!NextValue(&val, &is_null)) {
+ throw ParquetException("No more values buffered");
+ }
if (is_null) {
std::string null_fmt = format_fwf<ByteArrayType>(width);
@@ -187,31 +196,31 @@ class PARQUET_EXPORT TypedScanner : public Scanner {
};
template <typename DType>
-inline void TypedScanner<DType>::FormatValue(
- void* val, char* buffer, int bufsize, int width) {
+inline void TypedScanner<DType>::FormatValue(void* val, char* buffer, int bufsize,
+ int width) {
std::string fmt = format_fwf<DType>(width);
snprintf(buffer, bufsize, fmt.c_str(), *reinterpret_cast<T*>(val));
}
template <>
-inline void TypedScanner<Int96Type>::FormatValue(
- void* val, char* buffer, int bufsize, int width) {
+inline void TypedScanner<Int96Type>::FormatValue(void* val, char* buffer, int bufsize,
+ int width) {
std::string fmt = format_fwf<Int96Type>(width);
std::string result = Int96ToString(*reinterpret_cast<Int96*>(val));
snprintf(buffer, bufsize, fmt.c_str(), result.c_str());
}
template <>
-inline void TypedScanner<ByteArrayType>::FormatValue(
- void* val, char* buffer, int bufsize, int width) {
+inline void TypedScanner<ByteArrayType>::FormatValue(void* val, char* buffer, int bufsize,
+ int width) {
std::string fmt = format_fwf<ByteArrayType>(width);
std::string result = ByteArrayToString(*reinterpret_cast<ByteArray*>(val));
snprintf(buffer, bufsize, fmt.c_str(), result.c_str());
}
template <>
-inline void TypedScanner<FLBAType>::FormatValue(
- void* val, char* buffer, int bufsize, int width) {
+inline void TypedScanner<FLBAType>::FormatValue(void* val, char* buffer, int bufsize,
+ int width) {
std::string fmt = format_fwf<FLBAType>(width);
std::string result = FixedLenByteArrayToString(
*reinterpret_cast<FixedLenByteArray*>(val), descr()->type_length());
@@ -229,17 +238,19 @@ typedef TypedScanner<FLBAType> FixedLenByteArrayScanner;
template <typename RType>
int64_t ScanAll(int32_t batch_size, int16_t* def_levels, int16_t* rep_levels,
- uint8_t* values, int64_t* values_buffered, parquet::ColumnReader* reader) {
+ uint8_t* values, int64_t* values_buffered,
+ parquet::ColumnReader* reader) {
typedef typename RType::T Type;
auto typed_reader = static_cast<RType*>(reader);
auto vals = reinterpret_cast<Type*>(&values[0]);
- return typed_reader->ReadBatch(
- batch_size, def_levels, rep_levels, vals, values_buffered);
+ return typed_reader->ReadBatch(batch_size, def_levels, rep_levels, vals,
+ values_buffered);
}
int64_t PARQUET_EXPORT ScanAllValues(int32_t batch_size, int16_t* def_levels,
- int16_t* rep_levels, uint8_t* values, int64_t* values_buffered,
- parquet::ColumnReader* reader);
+ int16_t* rep_levels, uint8_t* values,
+ int64_t* values_buffered,
+ parquet::ColumnReader* reader);
} // namespace parquet
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/column_writer-test.cc
----------------------------------------------------------------------
diff --git a/src/parquet/column_writer-test.cc b/src/parquet/column_writer-test.cc
index 798c7ba..3ec3663 100644
--- a/src/parquet/column_writer-test.cc
+++ b/src/parquet/column_writer-test.cc
@@ -59,8 +59,8 @@ class TestPrimitiveWriter : public PrimitiveTypedTest<TestType> {
Type::type type_num() { return TestType::type_num; }
- void BuildReader(
- int64_t num_rows, Compression::type compression = Compression::UNCOMPRESSED) {
+ void BuildReader(int64_t num_rows,
+ Compression::type compression = Compression::UNCOMPRESSED) {
auto buffer = sink_->GetBuffer();
std::unique_ptr<InMemoryInputStream> source(new InMemoryInputStream(buffer));
std::unique_ptr<SerializedPageReader> page_reader(
@@ -93,8 +93,8 @@ class TestPrimitiveWriter : public PrimitiveTypedTest<TestType> {
void ReadColumn(Compression::type compression = Compression::UNCOMPRESSED) {
BuildReader(static_cast<int64_t>(this->values_out_.size()), compression);
reader_->ReadBatch(static_cast<int>(this->values_out_.size()),
- definition_levels_out_.data(), repetition_levels_out_.data(),
- this->values_out_ptr_, &values_read_);
+ definition_levels_out_.data(), repetition_levels_out_.data(),
+ this->values_out_ptr_, &values_read_);
this->SyncValuesOut();
}
@@ -105,22 +105,24 @@ class TestPrimitiveWriter : public PrimitiveTypedTest<TestType> {
}
void TestRequiredWithSettings(Encoding::type encoding, Compression::type compression,
- bool enable_dictionary, bool enable_statistics, int64_t num_rows = SMALL_SIZE) {
+ bool enable_dictionary, bool enable_statistics,
+ int64_t num_rows = SMALL_SIZE) {
this->GenerateData(num_rows);
- this->WriteRequiredWithSettings(
- encoding, compression, enable_dictionary, enable_statistics, num_rows);
+ this->WriteRequiredWithSettings(encoding, compression, enable_dictionary,
+ enable_statistics, num_rows);
this->ReadAndCompare(compression, num_rows);
- this->WriteRequiredWithSettingsSpaced(
- encoding, compression, enable_dictionary, enable_statistics, num_rows);
+ this->WriteRequiredWithSettingsSpaced(encoding, compression, enable_dictionary,
+ enable_statistics, num_rows);
this->ReadAndCompare(compression, num_rows);
}
void WriteRequiredWithSettings(Encoding::type encoding, Compression::type compression,
- bool enable_dictionary, bool enable_statistics, int64_t num_rows) {
- ColumnProperties column_properties(
- encoding, compression, enable_dictionary, enable_statistics);
+ bool enable_dictionary, bool enable_statistics,
+ int64_t num_rows) {
+ ColumnProperties column_properties(encoding, compression, enable_dictionary,
+ enable_statistics);
std::shared_ptr<TypedColumnWriter<TestType>> writer =
this->BuildWriter(num_rows, column_properties);
writer->WriteBatch(this->values_.size(), nullptr, nullptr, this->values_ptr_);
@@ -130,16 +132,17 @@ class TestPrimitiveWriter : public PrimitiveTypedTest<TestType> {
}
void WriteRequiredWithSettingsSpaced(Encoding::type encoding,
- Compression::type compression, bool enable_dictionary, bool enable_statistics,
- int64_t num_rows) {
+ Compression::type compression,
+ bool enable_dictionary, bool enable_statistics,
+ int64_t num_rows) {
std::vector<uint8_t> valid_bits(
BitUtil::RoundUpNumBytes(static_cast<uint32_t>(this->values_.size())) + 1, 255);
- ColumnProperties column_properties(
- encoding, compression, enable_dictionary, enable_statistics);
+ ColumnProperties column_properties(encoding, compression, enable_dictionary,
+ enable_statistics);
std::shared_ptr<TypedColumnWriter<TestType>> writer =
this->BuildWriter(num_rows, column_properties);
- writer->WriteBatchSpaced(
- this->values_.size(), nullptr, nullptr, valid_bits.data(), 0, this->values_ptr_);
+ writer->WriteBatchSpaced(this->values_.size(), nullptr, nullptr, valid_bits.data(), 0,
+ this->values_ptr_);
// The behaviour should be independent from the number of Close() calls
writer->Close();
writer->Close();
@@ -234,7 +237,7 @@ void TestPrimitiveWriter<FLBAType>::ReadColumnFully(Compression::type compressio
uint8_t* data_ptr = data.data();
for (int64_t i = 0; i < values_read_recently; i++) {
memcpy(data_ptr + this->descr_->type_length() * i,
- this->values_out_[i + values_read_].ptr, this->descr_->type_length());
+ this->values_out_[i + values_read_].ptr, this->descr_->type_length());
this->values_out_[i + values_read_].ptr =
data_ptr + this->descr_->type_length() * i;
}
@@ -246,7 +249,7 @@ void TestPrimitiveWriter<FLBAType>::ReadColumnFully(Compression::type compressio
}
typedef ::testing::Types<Int32Type, Int64Type, Int96Type, FloatType, DoubleType,
- BooleanType, ByteArrayType, FLBAType>
+ BooleanType, ByteArrayType, FLBAType>
TestTypes;
TYPED_TEST_CASE(TestPrimitiveWriter, TestTypes);
@@ -288,38 +291,38 @@ TYPED_TEST(TestPrimitiveWriter, RequiredRLEDictionary) {
*/
TYPED_TEST(TestPrimitiveWriter, RequiredPlainWithSnappyCompression) {
- this->TestRequiredWithSettings(
- Encoding::PLAIN, Compression::SNAPPY, false, false, LARGE_SIZE);
+ this->TestRequiredWithSettings(Encoding::PLAIN, Compression::SNAPPY, false, false,
+ LARGE_SIZE);
}
TYPED_TEST(TestPrimitiveWriter, RequiredPlainWithBrotliCompression) {
- this->TestRequiredWithSettings(
- Encoding::PLAIN, Compression::BROTLI, false, false, LARGE_SIZE);
+ this->TestRequiredWithSettings(Encoding::PLAIN, Compression::BROTLI, false, false,
+ LARGE_SIZE);
}
TYPED_TEST(TestPrimitiveWriter, RequiredPlainWithGzipCompression) {
- this->TestRequiredWithSettings(
- Encoding::PLAIN, Compression::GZIP, false, false, LARGE_SIZE);
+ this->TestRequiredWithSettings(Encoding::PLAIN, Compression::GZIP, false, false,
+ LARGE_SIZE);
}
TYPED_TEST(TestPrimitiveWriter, RequiredPlainWithStats) {
- this->TestRequiredWithSettings(
- Encoding::PLAIN, Compression::UNCOMPRESSED, false, true, LARGE_SIZE);
+ this->TestRequiredWithSettings(Encoding::PLAIN, Compression::UNCOMPRESSED, false, true,
+ LARGE_SIZE);
}
TYPED_TEST(TestPrimitiveWriter, RequiredPlainWithStatsAndSnappyCompression) {
- this->TestRequiredWithSettings(
- Encoding::PLAIN, Compression::SNAPPY, false, true, LARGE_SIZE);
+ this->TestRequiredWithSettings(Encoding::PLAIN, Compression::SNAPPY, false, true,
+ LARGE_SIZE);
}
TYPED_TEST(TestPrimitiveWriter, RequiredPlainWithStatsAndBrotliCompression) {
- this->TestRequiredWithSettings(
- Encoding::PLAIN, Compression::BROTLI, false, true, LARGE_SIZE);
+ this->TestRequiredWithSettings(Encoding::PLAIN, Compression::BROTLI, false, true,
+ LARGE_SIZE);
}
TYPED_TEST(TestPrimitiveWriter, RequiredPlainWithStatsAndGzipCompression) {
- this->TestRequiredWithSettings(
- Encoding::PLAIN, Compression::GZIP, false, true, LARGE_SIZE);
+ this->TestRequiredWithSettings(Encoding::PLAIN, Compression::GZIP, false, true,
+ LARGE_SIZE);
}
TYPED_TEST(TestPrimitiveWriter, Optional) {
@@ -332,8 +335,8 @@ TYPED_TEST(TestPrimitiveWriter, Optional) {
definition_levels[1] = 0;
auto writer = this->BuildWriter();
- writer->WriteBatch(
- this->values_.size(), definition_levels.data(), nullptr, this->values_ptr_);
+ writer->WriteBatch(this->values_.size(), definition_levels.data(), nullptr,
+ this->values_ptr_);
writer->Close();
// PARQUET-703
@@ -362,7 +365,7 @@ TYPED_TEST(TestPrimitiveWriter, OptionalSpaced) {
auto writer = this->BuildWriter();
writer->WriteBatchSpaced(this->values_.size(), definition_levels.data(), nullptr,
- valid_bits.data(), 0, this->values_ptr_);
+ valid_bits.data(), 0, this->values_ptr_);
writer->Close();
// PARQUET-703
@@ -387,7 +390,7 @@ TYPED_TEST(TestPrimitiveWriter, Repeated) {
auto writer = this->BuildWriter();
writer->WriteBatch(this->values_.size(), definition_levels.data(),
- repetition_levels.data(), this->values_ptr_);
+ repetition_levels.data(), this->values_ptr_);
writer->Close();
this->ReadColumn();
@@ -426,7 +429,7 @@ TYPED_TEST(TestPrimitiveWriter, RepeatedTooFewRows) {
auto writer = this->BuildWriter();
writer->WriteBatch(this->values_.size(), definition_levels.data(),
- repetition_levels.data(), this->values_ptr_);
+ repetition_levels.data(), this->values_ptr_);
ASSERT_THROW(writer->Close(), ParquetException);
}
@@ -485,8 +488,8 @@ TEST_F(TestNullValuesWriter, OptionalNullValueChunk) {
auto writer = this->BuildWriter(LARGE_SIZE);
// All values being written are NULL
- writer->WriteBatch(
- this->values_.size(), definition_levels.data(), repetition_levels.data(), NULL);
+ writer->WriteBatch(this->values_.size(), definition_levels.data(),
+ repetition_levels.data(), NULL);
writer->Close();
// Just read the first SMALL_SIZE rows to ensure we could read it back in
@@ -512,7 +515,7 @@ TEST_F(TestBooleanValuesWriter, AlternateBooleanValues) {
}
void GenerateLevels(int min_repeat_factor, int max_repeat_factor, int max_level,
- std::vector<int16_t>& input_levels) {
+ std::vector<int16_t>& input_levels) {
// for each repetition count upto max_repeat_factor
for (int repeat = min_repeat_factor; repeat <= max_repeat_factor; repeat++) {
// repeat count increases by a factor of 2 for every iteration
@@ -531,7 +534,7 @@ void GenerateLevels(int min_repeat_factor, int max_repeat_factor, int max_level,
}
void EncodeLevels(Encoding::type encoding, int max_level, int num_levels,
- const int16_t* input_levels, std::vector<uint8_t>& bytes) {
+ const int16_t* input_levels, std::vector<uint8_t>& bytes) {
LevelEncoder encoder;
int levels_count = 0;
bytes.resize(2 * num_levels);
@@ -540,20 +543,21 @@ void EncodeLevels(Encoding::type encoding, int max_level, int num_levels,
if (encoding == Encoding::RLE) {
// leave space to write the rle length value
encoder.Init(encoding, max_level, num_levels, bytes.data() + sizeof(int32_t),
- static_cast<int>(bytes.size()));
+ static_cast<int>(bytes.size()));
levels_count = encoder.Encode(num_levels, input_levels);
(reinterpret_cast<int32_t*>(bytes.data()))[0] = encoder.len();
} else {
- encoder.Init(
- encoding, max_level, num_levels, bytes.data(), static_cast<int>(bytes.size()));
+ encoder.Init(encoding, max_level, num_levels, bytes.data(),
+ static_cast<int>(bytes.size()));
levels_count = encoder.Encode(num_levels, input_levels);
}
ASSERT_EQ(num_levels, levels_count);
}
void VerifyDecodingLevels(Encoding::type encoding, int max_level,
- std::vector<int16_t>& input_levels, std::vector<uint8_t>& bytes) {
+ std::vector<int16_t>& input_levels,
+ std::vector<uint8_t>& bytes) {
LevelDecoder decoder;
int levels_count = 0;
std::vector<int16_t> output_levels;
@@ -590,7 +594,8 @@ void VerifyDecodingLevels(Encoding::type encoding, int max_level,
}
void VerifyDecodingMultipleSetData(Encoding::type encoding, int max_level,
- std::vector<int16_t>& input_levels, std::vector<std::vector<uint8_t>>& bytes) {
+ std::vector<int16_t>& input_levels,
+ std::vector<std::vector<uint8_t>>& bytes) {
LevelDecoder decoder;
int levels_count = 0;
std::vector<int16_t> output_levels;
@@ -634,7 +639,7 @@ TEST(TestLevels, TestLevelsDecodeMultipleBitWidth) {
// Generate levels
GenerateLevels(min_repeat_factor, max_repeat_factor, max_level, input_levels);
EncodeLevels(encoding, max_level, static_cast<int>(input_levels.size()),
- input_levels.data(), bytes);
+ input_levels.data(), bytes);
VerifyDecodingLevels(encoding, max_level, input_levels, bytes);
input_levels.clear();
}
@@ -662,7 +667,7 @@ TEST(TestLevels, TestLevelsDecodeMultipleSetData) {
for (int rf = 0; rf < setdata_factor; rf++) {
int offset = rf * split_level_size;
EncodeLevels(encoding, max_level, split_level_size,
- reinterpret_cast<int16_t*>(input_levels.data()) + offset, bytes[rf]);
+ reinterpret_cast<int16_t*>(input_levels.data()) + offset, bytes[rf]);
}
VerifyDecodingMultipleSetData(encoding, max_level, input_levels, bytes);
}
@@ -685,8 +690,8 @@ TEST(TestLevelEncoder, MinimumBufferSize) {
LevelEncoder::MaxBufferSize(Encoding::RLE, 1, kNumToEncode));
LevelEncoder encoder;
- encoder.Init(
- Encoding::RLE, 1, kNumToEncode, output.data(), static_cast<int>(output.size()));
+ encoder.Init(Encoding::RLE, 1, kNumToEncode, output.data(),
+ static_cast<int>(output.size()));
int encode_count = encoder.Encode(kNumToEncode, levels.data());
ASSERT_EQ(kNumToEncode, encode_count);
@@ -718,7 +723,7 @@ TEST(TestLevelEncoder, MinimumBufferSize2) {
LevelEncoder encoder;
encoder.Init(Encoding::RLE, bit_width, kNumToEncode, output.data(),
- static_cast<int>(output.size()));
+ static_cast<int>(output.size()));
int encode_count = encoder.Encode(kNumToEncode, levels.data());
ASSERT_EQ(kNumToEncode, encode_count);
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/column_writer.cc
----------------------------------------------------------------------
diff --git a/src/parquet/column_writer.cc b/src/parquet/column_writer.cc
index 21550da..b36f395 100644
--- a/src/parquet/column_writer.cc
+++ b/src/parquet/column_writer.cc
@@ -35,7 +35,7 @@ LevelEncoder::LevelEncoder() {}
LevelEncoder::~LevelEncoder() {}
void LevelEncoder::Init(Encoding::type encoding, int16_t max_level,
- int num_buffered_values, uint8_t* data, int data_size) {
+ int num_buffered_values, uint8_t* data, int data_size) {
bit_width_ = BitUtil::Log2(max_level + 1);
encoding_ = encoding;
switch (encoding) {
@@ -54,8 +54,8 @@ void LevelEncoder::Init(Encoding::type encoding, int16_t max_level,
}
}
-int LevelEncoder::MaxBufferSize(
- Encoding::type encoding, int16_t max_level, int num_buffered_values) {
+int LevelEncoder::MaxBufferSize(Encoding::type encoding, int16_t max_level,
+ int num_buffered_values) {
int bit_width = BitUtil::Log2(max_level + 1);
int num_bytes = 0;
switch (encoding) {
@@ -84,14 +84,18 @@ int LevelEncoder::Encode(int batch_size, const int16_t* levels) {
if (encoding_ == Encoding::RLE) {
for (int i = 0; i < batch_size; ++i) {
- if (!rle_encoder_->Put(*(levels + i))) { break; }
+ if (!rle_encoder_->Put(*(levels + i))) {
+ break;
+ }
++num_encoded;
}
rle_encoder_->Flush();
rle_length_ = rle_encoder_->len();
} else {
for (int i = 0; i < batch_size; ++i) {
- if (!bit_packed_encoder_->PutValue(*(levels + i), bit_width_)) { break; }
+ if (!bit_packed_encoder_->PutValue(*(levels + i), bit_width_)) {
+ break;
+ }
++num_encoded;
}
bit_packed_encoder_->Flush();
@@ -109,8 +113,9 @@ std::shared_ptr<WriterProperties> default_writer_properties() {
}
ColumnWriter::ColumnWriter(ColumnChunkMetaDataBuilder* metadata,
- std::unique_ptr<PageWriter> pager, int64_t expected_rows, bool has_dictionary,
- Encoding::type encoding, const WriterProperties* properties)
+ std::unique_ptr<PageWriter> pager, int64_t expected_rows,
+ bool has_dictionary, Encoding::type encoding,
+ const WriterProperties* properties)
: metadata_(metadata),
descr_(metadata->descr()),
pager_(std::move(pager)),
@@ -147,22 +152,22 @@ void ColumnWriter::InitSinks() {
void ColumnWriter::WriteDefinitionLevels(int64_t num_levels, const int16_t* levels) {
DCHECK(!closed_);
- definition_levels_sink_->Write(
- reinterpret_cast<const uint8_t*>(levels), sizeof(int16_t) * num_levels);
+ definition_levels_sink_->Write(reinterpret_cast<const uint8_t*>(levels),
+ sizeof(int16_t) * num_levels);
}
void ColumnWriter::WriteRepetitionLevels(int64_t num_levels, const int16_t* levels) {
DCHECK(!closed_);
- repetition_levels_sink_->Write(
- reinterpret_cast<const uint8_t*>(levels), sizeof(int16_t) * num_levels);
+ repetition_levels_sink_->Write(reinterpret_cast<const uint8_t*>(levels),
+ sizeof(int16_t) * num_levels);
}
// return the size of the encoded buffer
-int64_t ColumnWriter::RleEncodeLevels(
- const Buffer& src_buffer, ResizableBuffer* dest_buffer, int16_t max_level) {
+int64_t ColumnWriter::RleEncodeLevels(const Buffer& src_buffer,
+ ResizableBuffer* dest_buffer, int16_t max_level) {
// TODO: This only works with due to some RLE specifics
int64_t rle_size = LevelEncoder::MaxBufferSize(Encoding::RLE, max_level,
- static_cast<int>(num_buffered_values_)) +
+ static_cast<int>(num_buffered_values_)) +
sizeof(int32_t);
// Use Arrow::Buffer::shrink_to_fit = false
@@ -170,10 +175,11 @@ int64_t ColumnWriter::RleEncodeLevels(
PARQUET_THROW_NOT_OK(dest_buffer->Resize(rle_size, false));
level_encoder_.Init(Encoding::RLE, max_level, static_cast<int>(num_buffered_values_),
- dest_buffer->mutable_data() + sizeof(int32_t),
- static_cast<int>(dest_buffer->size()) - sizeof(int32_t));
- int encoded = level_encoder_.Encode(static_cast<int>(num_buffered_values_),
- reinterpret_cast<const int16_t*>(src_buffer.data()));
+ dest_buffer->mutable_data() + sizeof(int32_t),
+ static_cast<int>(dest_buffer->size()) - sizeof(int32_t));
+ int encoded =
+ level_encoder_.Encode(static_cast<int>(num_buffered_values_),
+ reinterpret_cast<const int16_t*>(src_buffer.data()));
DCHECK_EQ(encoded, num_buffered_values_);
reinterpret_cast<int32_t*>(dest_buffer->mutable_data())[0] = level_encoder_.len();
int64_t encoded_size = level_encoder_.len() + sizeof(int32_t);
@@ -187,13 +193,15 @@ void ColumnWriter::AddDataPage() {
std::shared_ptr<Buffer> values = GetValuesBuffer();
if (descr_->max_definition_level() > 0) {
- definition_levels_rle_size = RleEncodeLevels(definition_levels_sink_->GetBufferRef(),
- definition_levels_rle_.get(), descr_->max_definition_level());
+ definition_levels_rle_size =
+ RleEncodeLevels(definition_levels_sink_->GetBufferRef(),
+ definition_levels_rle_.get(), descr_->max_definition_level());
}
if (descr_->max_repetition_level() > 0) {
- repetition_levels_rle_size = RleEncodeLevels(repetition_levels_sink_->GetBufferRef(),
- repetition_levels_rle_.get(), descr_->max_repetition_level());
+ repetition_levels_rle_size =
+ RleEncodeLevels(repetition_levels_sink_->GetBufferRef(),
+ repetition_levels_rle_.get(), descr_->max_repetition_level());
}
int64_t uncompressed_size =
@@ -226,15 +234,16 @@ void ColumnWriter::AddDataPage() {
// if dictionary encoding has fallen back to PLAIN
if (has_dictionary_ && !fallback_) { // Save pages until end of dictionary encoding
std::shared_ptr<Buffer> compressed_data_copy;
- PARQUET_THROW_NOT_OK(compressed_data->Copy(
- 0, compressed_data->size(), allocator_, &compressed_data_copy));
+ PARQUET_THROW_NOT_OK(compressed_data->Copy(0, compressed_data->size(), allocator_,
+ &compressed_data_copy));
CompressedDataPage page(compressed_data_copy,
- static_cast<int32_t>(num_buffered_values_), encoding_, Encoding::RLE,
- Encoding::RLE, uncompressed_size, page_stats);
+ static_cast<int32_t>(num_buffered_values_), encoding_,
+ Encoding::RLE, Encoding::RLE, uncompressed_size, page_stats);
data_pages_.push_back(std::move(page));
} else { // Eagerly write pages
CompressedDataPage page(compressed_data, static_cast<int32_t>(num_buffered_values_),
- encoding_, Encoding::RLE, Encoding::RLE, uncompressed_size, page_stats);
+ encoding_, Encoding::RLE, Encoding::RLE, uncompressed_size,
+ page_stats);
WriteDataPage(page);
}
@@ -251,7 +260,9 @@ void ColumnWriter::WriteDataPage(const CompressedDataPage& page) {
int64_t ColumnWriter::Close() {
if (!closed_) {
closed_ = true;
- if (has_dictionary_ && !fallback_) { WriteDictionaryPage(); }
+ if (has_dictionary_ && !fallback_) {
+ WriteDictionaryPage();
+ }
FlushBufferedDataPages();
@@ -272,7 +283,9 @@ int64_t ColumnWriter::Close() {
void ColumnWriter::FlushBufferedDataPages() {
// Write all outstanding data to a new page
- if (num_buffered_values_ > 0) { AddDataPage(); }
+ if (num_buffered_values_ > 0) {
+ AddDataPage();
+ }
for (size_t i = 0; i < data_pages_.size(); i++) {
WriteDataPage(data_pages_[i]);
}
@@ -284,12 +297,13 @@ void ColumnWriter::FlushBufferedDataPages() {
template <typename Type>
TypedColumnWriter<Type>::TypedColumnWriter(ColumnChunkMetaDataBuilder* metadata,
- std::unique_ptr<PageWriter> pager, int64_t expected_rows, Encoding::type encoding,
- const WriterProperties* properties)
+ std::unique_ptr<PageWriter> pager,
+ int64_t expected_rows, Encoding::type encoding,
+ const WriterProperties* properties)
: ColumnWriter(metadata, std::move(pager), expected_rows,
- (encoding == Encoding::PLAIN_DICTIONARY ||
- encoding == Encoding::RLE_DICTIONARY),
- encoding, properties) {
+ (encoding == Encoding::PLAIN_DICTIONARY ||
+ encoding == Encoding::RLE_DICTIONARY),
+ encoding, properties) {
switch (encoding) {
case Encoding::PLAIN:
current_encoder_.reset(new PlainEncoder<Type>(descr_, properties->memory_pool()));
@@ -334,8 +348,8 @@ void TypedColumnWriter<Type>::WriteDictionaryPage() {
// TODO Get rid of this deep call
dict_encoder->mem_pool()->FreeAll();
- DictionaryPage page(
- buffer, dict_encoder->num_entries(), properties_->dictionary_index_encoding());
+ DictionaryPage page(buffer, dict_encoder->num_entries(),
+ properties_->dictionary_index_encoding());
total_bytes_written_ += pager_->WriteDictionaryPage(page);
}
@@ -365,8 +379,9 @@ void TypedColumnWriter<Type>::ResetPageStatistics() {
// Dynamic column writer constructor
std::shared_ptr<ColumnWriter> ColumnWriter::Make(ColumnChunkMetaDataBuilder* metadata,
- std::unique_ptr<PageWriter> pager, int64_t expected_rows,
- const WriterProperties* properties) {
+ std::unique_ptr<PageWriter> pager,
+ int64_t expected_rows,
+ const WriterProperties* properties) {
const ColumnDescriptor* descr = metadata->descr();
Encoding::type encoding = properties->encoding(descr->path());
if (properties->dictionary_enabled(descr->path()) &&
@@ -375,26 +390,26 @@ std::shared_ptr<ColumnWriter> ColumnWriter::Make(ColumnChunkMetaDataBuilder* met
}
switch (descr->physical_type()) {
case Type::BOOLEAN:
- return std::make_shared<BoolWriter>(
- metadata, std::move(pager), expected_rows, encoding, properties);
+ return std::make_shared<BoolWriter>(metadata, std::move(pager), expected_rows,
+ encoding, properties);
case Type::INT32:
- return std::make_shared<Int32Writer>(
- metadata, std::move(pager), expected_rows, encoding, properties);
+ return std::make_shared<Int32Writer>(metadata, std::move(pager), expected_rows,
+ encoding, properties);
case Type::INT64:
- return std::make_shared<Int64Writer>(
- metadata, std::move(pager), expected_rows, encoding, properties);
+ return std::make_shared<Int64Writer>(metadata, std::move(pager), expected_rows,
+ encoding, properties);
case Type::INT96:
- return std::make_shared<Int96Writer>(
- metadata, std::move(pager), expected_rows, encoding, properties);
+ return std::make_shared<Int96Writer>(metadata, std::move(pager), expected_rows,
+ encoding, properties);
case Type::FLOAT:
- return std::make_shared<FloatWriter>(
- metadata, std::move(pager), expected_rows, encoding, properties);
+ return std::make_shared<FloatWriter>(metadata, std::move(pager), expected_rows,
+ encoding, properties);
case Type::DOUBLE:
- return std::make_shared<DoubleWriter>(
- metadata, std::move(pager), expected_rows, encoding, properties);
+ return std::make_shared<DoubleWriter>(metadata, std::move(pager), expected_rows,
+ encoding, properties);
case Type::BYTE_ARRAY:
- return std::make_shared<ByteArrayWriter>(
- metadata, std::move(pager), expected_rows, encoding, properties);
+ return std::make_shared<ByteArrayWriter>(metadata, std::move(pager), expected_rows,
+ encoding, properties);
case Type::FIXED_LEN_BYTE_ARRAY:
return std::make_shared<FixedLenByteArrayWriter>(
metadata, std::move(pager), expected_rows, encoding, properties);
@@ -410,12 +425,16 @@ std::shared_ptr<ColumnWriter> ColumnWriter::Make(ColumnChunkMetaDataBuilder* met
template <typename DType>
inline int64_t TypedColumnWriter<DType>::WriteMiniBatch(int64_t num_values,
- const int16_t* def_levels, const int16_t* rep_levels, const T* values) {
+ const int16_t* def_levels,
+ const int16_t* rep_levels,
+ const T* values) {
int64_t values_to_write = 0;
// If the field is required and non-repeated, there are no definition levels
if (descr_->max_definition_level() > 0) {
for (int64_t i = 0; i < num_values; ++i) {
- if (def_levels[i] == descr_->max_definition_level()) { ++values_to_write; }
+ if (def_levels[i] == descr_->max_definition_level()) {
+ ++values_to_write;
+ }
}
WriteDefinitionLevels(num_values, def_levels);
@@ -429,7 +448,9 @@ inline int64_t TypedColumnWriter<DType>::WriteMiniBatch(int64_t num_values,
// A row could include more than one value
// Count the occasions where we start a new row
for (int64_t i = 0; i < num_values; ++i) {
- if (rep_levels[i] == 0) { num_rows_++; }
+ if (rep_levels[i] == 0) {
+ num_rows_++;
+ }
}
WriteRepetitionLevels(num_values, rep_levels);
@@ -443,7 +464,9 @@ inline int64_t TypedColumnWriter<DType>::WriteMiniBatch(int64_t num_values,
}
// PARQUET-780
- if (values_to_write > 0) { DCHECK(nullptr != values) << "Values ptr cannot be NULL"; }
+ if (values_to_write > 0) {
+ DCHECK(nullptr != values) << "Values ptr cannot be NULL";
+ }
WriteValues(values_to_write, values);
@@ -457,25 +480,34 @@ inline int64_t TypedColumnWriter<DType>::WriteMiniBatch(int64_t num_values,
if (current_encoder_->EstimatedDataEncodedSize() >= properties_->data_pagesize()) {
AddDataPage();
}
- if (has_dictionary_ && !fallback_) { CheckDictionarySizeLimit(); }
+ if (has_dictionary_ && !fallback_) {
+ CheckDictionarySizeLimit();
+ }
return values_to_write;
}
template <typename DType>
-inline int64_t TypedColumnWriter<DType>::WriteMiniBatchSpaced(int64_t num_values,
- const int16_t* def_levels, const int16_t* rep_levels, const uint8_t* valid_bits,
- int64_t valid_bits_offset, const T* values, int64_t* num_spaced_written) {
+inline int64_t TypedColumnWriter<DType>::WriteMiniBatchSpaced(
+ int64_t num_values, const int16_t* def_levels, const int16_t* rep_levels,
+ const uint8_t* valid_bits, int64_t valid_bits_offset, const T* values,
+ int64_t* num_spaced_written) {
int64_t values_to_write = 0;
int64_t spaced_values_to_write = 0;
// If the field is required and non-repeated, there are no definition levels
if (descr_->max_definition_level() > 0) {
// Minimal definition level for which spaced values are written
int16_t min_spaced_def_level = descr_->max_definition_level();
- if (descr_->schema_node()->is_optional()) { min_spaced_def_level--; }
+ if (descr_->schema_node()->is_optional()) {
+ min_spaced_def_level--;
+ }
for (int64_t i = 0; i < num_values; ++i) {
- if (def_levels[i] == descr_->max_definition_level()) { ++values_to_write; }
- if (def_levels[i] >= min_spaced_def_level) { ++spaced_values_to_write; }
+ if (def_levels[i] == descr_->max_definition_level()) {
+ ++values_to_write;
+ }
+ if (def_levels[i] >= min_spaced_def_level) {
+ ++spaced_values_to_write;
+ }
}
WriteDefinitionLevels(num_values, def_levels);
@@ -490,7 +522,9 @@ inline int64_t TypedColumnWriter<DType>::WriteMiniBatchSpaced(int64_t num_values
// A row could include more than one value
// Count the occasions where we start a new row
for (int64_t i = 0; i < num_values; ++i) {
- if (rep_levels[i] == 0) { num_rows_++; }
+ if (rep_levels[i] == 0) {
+ num_rows_++;
+ }
}
WriteRepetitionLevels(num_values, rep_levels);
@@ -512,7 +546,7 @@ inline int64_t TypedColumnWriter<DType>::WriteMiniBatchSpaced(int64_t num_values
if (page_statistics_ != nullptr) {
page_statistics_->UpdateSpaced(values, valid_bits, valid_bits_offset, values_to_write,
- num_values - values_to_write);
+ num_values - values_to_write);
}
num_buffered_values_ += num_values;
@@ -521,14 +555,16 @@ inline int64_t TypedColumnWriter<DType>::WriteMiniBatchSpaced(int64_t num_values
if (current_encoder_->EstimatedDataEncodedSize() >= properties_->data_pagesize()) {
AddDataPage();
}
- if (has_dictionary_ && !fallback_) { CheckDictionarySizeLimit(); }
+ if (has_dictionary_ && !fallback_) {
+ CheckDictionarySizeLimit();
+ }
return values_to_write;
}
template <typename DType>
void TypedColumnWriter<DType>::WriteBatch(int64_t num_values, const int16_t* def_levels,
- const int16_t* rep_levels, const T* values) {
+ const int16_t* rep_levels, const T* values) {
// We check for DataPage limits only after we have inserted the values. If a user
// writes a large number of values, the DataPage size can be much above the limit.
// The purpose of this chunking is to bound this. Even if a user writes large number
@@ -541,19 +577,19 @@ void TypedColumnWriter<DType>::WriteBatch(int64_t num_values, const int16_t* def
for (int round = 0; round < num_batches; round++) {
int64_t offset = round * write_batch_size;
int64_t num_values = WriteMiniBatch(write_batch_size, &def_levels[offset],
- &rep_levels[offset], &values[value_offset]);
+ &rep_levels[offset], &values[value_offset]);
value_offset += num_values;
}
// Write the remaining values
int64_t offset = num_batches * write_batch_size;
- WriteMiniBatch(
- num_remaining, &def_levels[offset], &rep_levels[offset], &values[value_offset]);
+ WriteMiniBatch(num_remaining, &def_levels[offset], &rep_levels[offset],
+ &values[value_offset]);
}
template <typename DType>
-void TypedColumnWriter<DType>::WriteBatchSpaced(int64_t num_values,
- const int16_t* def_levels, const int16_t* rep_levels, const uint8_t* valid_bits,
- int64_t valid_bits_offset, const T* values) {
+void TypedColumnWriter<DType>::WriteBatchSpaced(
+ int64_t num_values, const int16_t* def_levels, const int16_t* rep_levels,
+ const uint8_t* valid_bits, int64_t valid_bits_offset, const T* values) {
// We check for DataPage limits only after we have inserted the values. If a user
// writes a large number of values, the DataPage size can be much above the limit.
// The purpose of this chunking is to bound this. Even if a user writes large number
@@ -567,15 +603,15 @@ void TypedColumnWriter<DType>::WriteBatchSpaced(int64_t num_values,
for (int round = 0; round < num_batches; round++) {
int64_t offset = round * write_batch_size;
WriteMiniBatchSpaced(write_batch_size, &def_levels[offset], &rep_levels[offset],
- valid_bits, valid_bits_offset + values_offset, values + values_offset,
- &num_spaced_written);
+ valid_bits, valid_bits_offset + values_offset,
+ values + values_offset, &num_spaced_written);
values_offset += num_spaced_written;
}
// Write the remaining values
int64_t offset = num_batches * write_batch_size;
WriteMiniBatchSpaced(num_remaining, &def_levels[offset], &rep_levels[offset],
- valid_bits, valid_bits_offset + values_offset, values + values_offset,
- &num_spaced_written);
+ valid_bits, valid_bits_offset + values_offset,
+ values + values_offset, &num_spaced_written);
}
template <typename DType>
@@ -585,9 +621,11 @@ void TypedColumnWriter<DType>::WriteValues(int64_t num_values, const T* values)
template <typename DType>
void TypedColumnWriter<DType>::WriteValuesSpaced(int64_t num_values,
- const uint8_t* valid_bits, int64_t valid_bits_offset, const T* values) {
- current_encoder_->PutSpaced(
- values, static_cast<int>(num_values), valid_bits, valid_bits_offset);
+ const uint8_t* valid_bits,
+ int64_t valid_bits_offset,
+ const T* values) {
+ current_encoder_->PutSpaced(values, static_cast<int>(num_values), valid_bits,
+ valid_bits_offset);
}
template class PARQUET_TEMPLATE_EXPORT TypedColumnWriter<BooleanType>;
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/column_writer.h
----------------------------------------------------------------------
diff --git a/src/parquet/column_writer.h b/src/parquet/column_writer.h
index 1637780..837d2d0 100644
--- a/src/parquet/column_writer.h
+++ b/src/parquet/column_writer.h
@@ -44,12 +44,12 @@ class PARQUET_EXPORT LevelEncoder {
LevelEncoder();
~LevelEncoder();
- static int MaxBufferSize(
- Encoding::type encoding, int16_t max_level, int num_buffered_values);
+ static int MaxBufferSize(Encoding::type encoding, int16_t max_level,
+ int num_buffered_values);
// Initialize the LevelEncoder.
void Init(Encoding::type encoding, int16_t max_level, int num_buffered_values,
- uint8_t* data, int data_size);
+ uint8_t* data, int data_size);
// Encodes a batch of levels from an array and returns the number of levels encoded
int Encode(int batch_size, const int16_t* levels);
@@ -73,12 +73,13 @@ static constexpr int WRITE_BATCH_SIZE = 1000;
class PARQUET_EXPORT ColumnWriter {
public:
ColumnWriter(ColumnChunkMetaDataBuilder*, std::unique_ptr<PageWriter>,
- int64_t expected_rows, bool has_dictionary, Encoding::type encoding,
- const WriterProperties* properties);
+ int64_t expected_rows, bool has_dictionary, Encoding::type encoding,
+ const WriterProperties* properties);
static std::shared_ptr<ColumnWriter> Make(ColumnChunkMetaDataBuilder*,
- std::unique_ptr<PageWriter>, int64_t expected_rows,
- const WriterProperties* properties);
+ std::unique_ptr<PageWriter>,
+ int64_t expected_rows,
+ const WriterProperties* properties);
Type::type type() const { return descr_->physical_type(); }
@@ -126,8 +127,8 @@ class PARQUET_EXPORT ColumnWriter {
void WriteRepetitionLevels(int64_t num_levels, const int16_t* levels);
// RLE encode the src_buffer into dest_buffer and return the encoded size
- int64_t RleEncodeLevels(
- const Buffer& src_buffer, ResizableBuffer* dest_buffer, int16_t max_level);
+ int64_t RleEncodeLevels(const Buffer& src_buffer, ResizableBuffer* dest_buffer,
+ int16_t max_level);
// Serialize the buffered Data Pages
void FlushBufferedDataPages();
@@ -194,13 +195,13 @@ class PARQUET_EXPORT TypedColumnWriter : public ColumnWriter {
typedef typename DType::c_type T;
TypedColumnWriter(ColumnChunkMetaDataBuilder* metadata,
- std::unique_ptr<PageWriter> pager, int64_t expected_rows, Encoding::type encoding,
- const WriterProperties* properties);
+ std::unique_ptr<PageWriter> pager, int64_t expected_rows,
+ Encoding::type encoding, const WriterProperties* properties);
// Write a batch of repetition levels, definition levels, and values to the
// column.
void WriteBatch(int64_t num_values, const int16_t* def_levels,
- const int16_t* rep_levels, const T* values);
+ const int16_t* rep_levels, const T* values);
/// Write a batch of repetition levels, definition levels, and values to the
/// column.
@@ -229,8 +230,8 @@ class PARQUET_EXPORT TypedColumnWriter : public ColumnWriter {
/// spacing for nulls on the lowest levels; input has the length
/// of the number of rows on the lowest nesting level.
void WriteBatchSpaced(int64_t num_values, const int16_t* def_levels,
- const int16_t* rep_levels, const uint8_t* valid_bits, int64_t valid_bits_offset,
- const T* values);
+ const int16_t* rep_levels, const uint8_t* valid_bits,
+ int64_t valid_bits_offset, const T* values);
protected:
std::shared_ptr<Buffer> GetValuesBuffer() override {
@@ -244,18 +245,19 @@ class PARQUET_EXPORT TypedColumnWriter : public ColumnWriter {
private:
int64_t WriteMiniBatch(int64_t num_values, const int16_t* def_levels,
- const int16_t* rep_levels, const T* values);
+ const int16_t* rep_levels, const T* values);
int64_t WriteMiniBatchSpaced(int64_t num_values, const int16_t* def_levels,
- const int16_t* rep_levels, const uint8_t* valid_bits, int64_t valid_bits_offset,
- const T* values, int64_t* num_spaced_written);
+ const int16_t* rep_levels, const uint8_t* valid_bits,
+ int64_t valid_bits_offset, const T* values,
+ int64_t* num_spaced_written);
typedef Encoder<DType> EncoderType;
// Write values to a temporary buffer before they are encoded into pages
void WriteValues(int64_t num_values, const T* values);
void WriteValuesSpaced(int64_t num_values, const uint8_t* valid_bits,
- int64_t valid_bits_offset, const T* values);
+ int64_t valid_bits_offset, const T* values);
std::unique_ptr<EncoderType> current_encoder_;
typedef TypedRowGroupStatistics<DType> TypedStats;
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/encoding-benchmark.cc
----------------------------------------------------------------------
diff --git a/src/parquet/encoding-benchmark.cc b/src/parquet/encoding-benchmark.cc
index 1e93ba7..97eeefa 100644
--- a/src/parquet/encoding-benchmark.cc
+++ b/src/parquet/encoding-benchmark.cc
@@ -33,8 +33,8 @@ namespace benchmark {
std::shared_ptr<ColumnDescriptor> Int64Schema(Repetition::type repetition) {
auto node = PrimitiveNode::Make("int64", repetition, Type::INT64);
- return std::make_shared<ColumnDescriptor>(
- node, repetition != Repetition::REQUIRED, repetition == Repetition::REPEATED);
+ return std::make_shared<ColumnDescriptor>(node, repetition != Repetition::REQUIRED,
+ repetition == Repetition::REPEATED);
}
static void BM_PlainEncodingBoolean(::benchmark::State& state) {
@@ -99,8 +99,8 @@ static void BM_PlainDecodingInt64(::benchmark::State& state) {
BENCHMARK(BM_PlainDecodingInt64)->Range(1024, 65536);
template <typename Type>
-static void DecodeDict(
- std::vector<typename Type::c_type>& values, ::benchmark::State& state) {
+static void DecodeDict(std::vector<typename Type::c_type>& values,
+ ::benchmark::State& state) {
typedef typename Type::c_type T;
int num_values = values.size();
[2/5] parquet-cpp git commit: PARQUET-1068: Modify .clang-format to
use straight Google format with 90-character line width
Posted by we...@apache.org.
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/encoding-internal.h
----------------------------------------------------------------------
diff --git a/src/parquet/encoding-internal.h b/src/parquet/encoding-internal.h
index 88d781f..69bac32 100644
--- a/src/parquet/encoding-internal.h
+++ b/src/parquet/encoding-internal.h
@@ -78,10 +78,12 @@ class PlainDecoder : public Decoder<DType> {
// Decode routine templated on C++ type rather than type enum
template <typename T>
-inline int DecodePlain(
- const uint8_t* data, int64_t data_size, int num_values, int type_length, T* out) {
+inline int DecodePlain(const uint8_t* data, int64_t data_size, int num_values,
+ int type_length, T* out) {
int bytes_to_decode = num_values * sizeof(T);
- if (data_size < bytes_to_decode) { ParquetException::EofException(); }
+ if (data_size < bytes_to_decode) {
+ ParquetException::EofException();
+ }
memcpy(out, data, bytes_to_decode);
return bytes_to_decode;
}
@@ -90,7 +92,7 @@ inline int DecodePlain(
// own data.
template <>
inline int DecodePlain<ByteArray>(const uint8_t* data, int64_t data_size, int num_values,
- int type_length, ByteArray* out) {
+ int type_length, ByteArray* out) {
int bytes_decoded = 0;
int increment;
for (int i = 0; i < num_values; ++i) {
@@ -109,9 +111,12 @@ inline int DecodePlain<ByteArray>(const uint8_t* data, int64_t data_size, int nu
// own their own data.
template <>
inline int DecodePlain<FixedLenByteArray>(const uint8_t* data, int64_t data_size,
- int num_values, int type_length, FixedLenByteArray* out) {
+ int num_values, int type_length,
+ FixedLenByteArray* out) {
int bytes_to_decode = type_length * num_values;
- if (data_size < bytes_to_decode) { ParquetException::EofException(); }
+ if (data_size < bytes_to_decode) {
+ ParquetException::EofException();
+ }
for (int i = 0; i < num_values; ++i) {
out[i].ptr = data;
data += type_length;
@@ -146,7 +151,9 @@ class PlainDecoder<BooleanType> : public Decoder<BooleanType> {
max_values = std::min(max_values, num_values_);
bool val;
for (int i = 0; i < max_values; ++i) {
- if (!bit_reader_.GetValue(1, &val)) { ParquetException::EofException(); }
+ if (!bit_reader_.GetValue(1, &val)) {
+ ParquetException::EofException();
+ }
BitUtil::SetArrayBit(buffer, i, val);
}
num_values_ -= max_values;
@@ -175,7 +182,7 @@ class PlainEncoder : public Encoder<DType> {
typedef typename DType::c_type T;
explicit PlainEncoder(const ColumnDescriptor* descr,
- ::arrow::MemoryPool* pool = ::arrow::default_memory_pool())
+ ::arrow::MemoryPool* pool = ::arrow::default_memory_pool())
: Encoder<DType>(descr, Encoding::PLAIN, pool) {
values_sink_.reset(new InMemoryOutputStream(pool));
}
@@ -193,13 +200,13 @@ template <>
class PlainEncoder<BooleanType> : public Encoder<BooleanType> {
public:
explicit PlainEncoder(const ColumnDescriptor* descr,
- ::arrow::MemoryPool* pool = ::arrow::default_memory_pool())
+ ::arrow::MemoryPool* pool = ::arrow::default_memory_pool())
: Encoder<BooleanType>(descr, Encoding::PLAIN, pool),
bits_available_(kInMemoryDefaultCapacity * 8),
bits_buffer_(AllocateBuffer(pool, kInMemoryDefaultCapacity)),
values_sink_(new InMemoryOutputStream(pool)) {
- bit_writer_.reset(new ::arrow::BitWriter(
- bits_buffer_->mutable_data(), static_cast<int>(bits_buffer_->size())));
+ bit_writer_.reset(new ::arrow::BitWriter(bits_buffer_->mutable_data(),
+ static_cast<int>(bits_buffer_->size())));
}
int64_t EstimatedDataEncodedSize() override {
@@ -284,7 +291,9 @@ inline void PlainEncoder<ByteArrayType>::Put(const ByteArray* src, int num_value
for (int i = 0; i < num_values; ++i) {
// Write the result to the output stream
values_sink_->Write(reinterpret_cast<const uint8_t*>(&src[i].len), sizeof(uint32_t));
- if (src[i].len > 0) { DCHECK(nullptr != src[i].ptr) << "Value ptr cannot be NULL"; }
+ if (src[i].len > 0) {
+ DCHECK(nullptr != src[i].ptr) << "Value ptr cannot be NULL";
+ }
values_sink_->Write(reinterpret_cast<const uint8_t*>(src[i].ptr), src[i].len);
}
}
@@ -296,8 +305,8 @@ inline void PlainEncoder<FLBAType>::Put(const FixedLenByteArray* src, int num_va
if (descr_->type_length() > 0) {
DCHECK(nullptr != src[i].ptr) << "Value ptr cannot be NULL";
}
- values_sink_->Write(
- reinterpret_cast<const uint8_t*>(src[i].ptr), descr_->type_length());
+ values_sink_->Write(reinterpret_cast<const uint8_t*>(src[i].ptr),
+ descr_->type_length());
}
}
@@ -313,7 +322,7 @@ class DictionaryDecoder : public Decoder<Type> {
// dictionary is not guaranteed to persist in memory after this call so the
// dictionary decoder needs to copy the data out if necessary.
explicit DictionaryDecoder(const ColumnDescriptor* descr,
- ::arrow::MemoryPool* pool = ::arrow::default_memory_pool())
+ ::arrow::MemoryPool* pool = ::arrow::default_memory_pool())
: Decoder<Type>(descr, Encoding::RLE_DICTIONARY),
dictionary_(0, pool),
byte_array_data_(AllocateBuffer(pool, 0)) {}
@@ -334,16 +343,21 @@ class DictionaryDecoder : public Decoder<Type> {
max_values = std::min(max_values, num_values_);
int decoded_values =
idx_decoder_.GetBatchWithDict(dictionary_.data(), buffer, max_values);
- if (decoded_values != max_values) { ParquetException::EofException(); }
+ if (decoded_values != max_values) {
+ ParquetException::EofException();
+ }
num_values_ -= max_values;
return max_values;
}
int DecodeSpaced(T* buffer, int num_values, int null_count, const uint8_t* valid_bits,
- int64_t valid_bits_offset) override {
- int decoded_values = idx_decoder_.GetBatchWithDictSpaced(dictionary_.data(), buffer,
- num_values, null_count, valid_bits, valid_bits_offset);
- if (decoded_values != num_values) { ParquetException::EofException(); }
+ int64_t valid_bits_offset) override {
+ int decoded_values =
+ idx_decoder_.GetBatchWithDictSpaced(dictionary_.data(), buffer, num_values,
+ null_count, valid_bits, valid_bits_offset);
+ if (decoded_values != num_values) {
+ ParquetException::EofException();
+ }
return decoded_values;
}
@@ -439,7 +453,7 @@ class DictEncoder : public Encoder<DType> {
typedef typename DType::c_type T;
explicit DictEncoder(const ColumnDescriptor* desc, ChunkedAllocator* pool = nullptr,
- ::arrow::MemoryPool* allocator = ::arrow::default_memory_pool())
+ ::arrow::MemoryPool* allocator = ::arrow::default_memory_pool())
: Encoder<DType>(desc, Encoding::PLAIN_DICTIONARY, allocator),
allocator_(allocator),
pool_(pool),
@@ -449,7 +463,9 @@ class DictEncoder : public Encoder<DType> {
dict_encoded_size_(0),
type_length_(desc->type_length()) {
hash_slots_.Assign(hash_table_size_, HASH_SLOT_EMPTY);
- if (!::arrow::CpuInfo::initialized()) { ::arrow::CpuInfo::Init(); }
+ if (!::arrow::CpuInfo::initialized()) {
+ ::arrow::CpuInfo::Init();
+ }
}
virtual ~DictEncoder() { DCHECK(buffered_indices_.empty()); }
@@ -498,8 +514,8 @@ class DictEncoder : public Encoder<DType> {
std::shared_ptr<Buffer> FlushValues() override {
std::shared_ptr<PoolBuffer> buffer =
AllocateBuffer(this->allocator_, EstimatedDataEncodedSize());
- int result_size = WriteIndices(
- buffer->mutable_data(), static_cast<int>(EstimatedDataEncodedSize()));
+ int result_size = WriteIndices(buffer->mutable_data(),
+ static_cast<int>(EstimatedDataEncodedSize()));
ClearIndices();
PARQUET_THROW_NOT_OK(buffer->Resize(result_size, false));
return buffer;
@@ -512,10 +528,12 @@ class DictEncoder : public Encoder<DType> {
}
void PutSpaced(const T* src, int num_values, const uint8_t* valid_bits,
- int64_t valid_bits_offset) override {
+ int64_t valid_bits_offset) override {
INIT_BITSET(valid_bits, static_cast<int>(valid_bits_offset));
for (int32_t i = 0; i < num_values; i++) {
- if (bitset_valid_bits & (1 << bit_offset_valid_bits)) { Put(src[i]); }
+ if (bitset_valid_bits & (1 << bit_offset_valid_bits)) {
+ Put(src[i]);
+ }
READ_NEXT_BITSET(valid_bits);
}
}
@@ -576,25 +594,29 @@ inline int DictEncoder<DType>::Hash(const typename DType::c_type& value) const {
template <>
inline int DictEncoder<ByteArrayType>::Hash(const ByteArray& value) const {
- if (value.len > 0) { DCHECK(nullptr != value.ptr) << "Value ptr cannot be NULL"; }
+ if (value.len > 0) {
+ DCHECK(nullptr != value.ptr) << "Value ptr cannot be NULL";
+ }
return HashUtil::Hash(value.ptr, value.len, 0);
}
template <>
inline int DictEncoder<FLBAType>::Hash(const FixedLenByteArray& value) const {
- if (type_length_ > 0) { DCHECK(nullptr != value.ptr) << "Value ptr cannot be NULL"; }
+ if (type_length_ > 0) {
+ DCHECK(nullptr != value.ptr) << "Value ptr cannot be NULL";
+ }
return HashUtil::Hash(value.ptr, type_length_, 0);
}
template <typename DType>
-inline bool DictEncoder<DType>::SlotDifferent(
- const typename DType::c_type& v, hash_slot_t slot) {
+inline bool DictEncoder<DType>::SlotDifferent(const typename DType::c_type& v,
+ hash_slot_t slot) {
return v != uniques_[slot];
}
template <>
-inline bool DictEncoder<FLBAType>::SlotDifferent(
- const FixedLenByteArray& v, hash_slot_t slot) {
+inline bool DictEncoder<FLBAType>::SlotDifferent(const FixedLenByteArray& v,
+ hash_slot_t slot) {
return 0 != memcmp(v.ptr, uniques_[slot].ptr, type_length_);
}
@@ -635,7 +657,9 @@ inline void DictEncoder<DType>::DoubleTableSize() {
for (int i = 0; i < hash_table_size_; ++i) {
index = hash_slots_[i];
- if (index == HASH_SLOT_EMPTY) { continue; }
+ if (index == HASH_SLOT_EMPTY) {
+ continue;
+ }
// Compute the hash value mod the new table size to start looking for an
// empty slot
@@ -669,7 +693,9 @@ inline void DictEncoder<DType>::AddDictKey(const typename DType::c_type& v) {
template <>
inline void DictEncoder<ByteArrayType>::AddDictKey(const ByteArray& v) {
uint8_t* heap = pool_->Allocate(v.len);
- if (UNLIKELY(v.len > 0 && heap == nullptr)) { throw ParquetException("out of memory"); }
+ if (UNLIKELY(v.len > 0 && heap == nullptr)) {
+ throw ParquetException("out of memory");
+ }
memcpy(heap, v.ptr, v.len);
uniques_.push_back(ByteArray(v.len, heap));
dict_encoded_size_ += v.len + sizeof(uint32_t);
@@ -708,7 +734,9 @@ inline void DictEncoder<ByteArrayType>::WriteDict(uint8_t* buffer) {
for (const ByteArray& v : uniques_) {
memcpy(buffer, reinterpret_cast<const void*>(&v.len), sizeof(uint32_t));
buffer += sizeof(uint32_t);
- if (v.len > 0) { DCHECK(nullptr != v.ptr) << "Value ptr cannot be NULL"; }
+ if (v.len > 0) {
+ DCHECK(nullptr != v.ptr) << "Value ptr cannot be NULL";
+ }
memcpy(buffer, v.ptr, v.len);
buffer += v.len;
}
@@ -717,7 +745,9 @@ inline void DictEncoder<ByteArrayType>::WriteDict(uint8_t* buffer) {
template <>
inline void DictEncoder<FLBAType>::WriteDict(uint8_t* buffer) {
for (const FixedLenByteArray& v : uniques_) {
- if (type_length_ > 0) { DCHECK(nullptr != v.ptr) << "Value ptr cannot be NULL"; }
+ if (type_length_ > 0) {
+ DCHECK(nullptr != v.ptr) << "Value ptr cannot be NULL";
+ }
memcpy(buffer, v.ptr, type_length_);
buffer += type_length_;
}
@@ -749,7 +779,7 @@ class DeltaBitPackDecoder : public Decoder<DType> {
typedef typename DType::c_type T;
explicit DeltaBitPackDecoder(const ColumnDescriptor* descr,
- ::arrow::MemoryPool* pool = ::arrow::default_memory_pool())
+ ::arrow::MemoryPool* pool = ::arrow::default_memory_pool())
: Decoder<DType>(descr, Encoding::DELTA_BINARY_PACKED),
delta_bit_widths_(new PoolBuffer(pool)) {
if (DType::type_num != Type::INT32 && DType::type_num != Type::INT64) {
@@ -775,7 +805,9 @@ class DeltaBitPackDecoder : public Decoder<DType> {
int32_t block_size;
if (!decoder_.GetVlqInt(&block_size)) ParquetException::EofException();
if (!decoder_.GetVlqInt(&num_mini_blocks_)) ParquetException::EofException();
- if (!decoder_.GetVlqInt(&values_current_block_)) { ParquetException::EofException(); }
+ if (!decoder_.GetVlqInt(&values_current_block_)) {
+ ParquetException::EofException();
+ }
if (!decoder_.GetZigZagVlqInt(&last_value_)) ParquetException::EofException();
PARQUET_THROW_NOT_OK(delta_bit_widths_->Resize(num_mini_blocks_, false));
@@ -841,7 +873,8 @@ class DeltaBitPackDecoder : public Decoder<DType> {
class DeltaLengthByteArrayDecoder : public Decoder<ByteArrayType> {
public:
- explicit DeltaLengthByteArrayDecoder(const ColumnDescriptor* descr,
+ explicit DeltaLengthByteArrayDecoder(
+ const ColumnDescriptor* descr,
::arrow::MemoryPool* pool = ::arrow::default_memory_pool())
: Decoder<ByteArrayType>(descr, Encoding::DELTA_LENGTH_BYTE_ARRAY),
len_decoder_(nullptr, pool) {}
@@ -882,7 +915,8 @@ class DeltaLengthByteArrayDecoder : public Decoder<ByteArrayType> {
class DeltaByteArrayDecoder : public Decoder<ByteArrayType> {
public:
- explicit DeltaByteArrayDecoder(const ColumnDescriptor* descr,
+ explicit DeltaByteArrayDecoder(
+ const ColumnDescriptor* descr,
::arrow::MemoryPool* pool = ::arrow::default_memory_pool())
: Decoder<ByteArrayType>(descr, Encoding::DELTA_BYTE_ARRAY),
prefix_len_decoder_(nullptr, pool),
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/encoding-test.cc
----------------------------------------------------------------------
diff --git a/src/parquet/encoding-test.cc b/src/parquet/encoding-test.cc
index dcd813d..b0ca050 100644
--- a/src/parquet/encoding-test.cc
+++ b/src/parquet/encoding-test.cc
@@ -15,10 +15,10 @@
// specific language governing permissions and limitations
// under the License.
+#include <gtest/gtest.h>
#include <cstdint>
#include <cstdlib>
#include <cstring>
-#include <gtest/gtest.h>
#include <string>
#include <vector>
@@ -59,8 +59,8 @@ TEST(VectorBooleanTest, TestEncodeDecode) {
vector<uint8_t> decode_buffer(nbytes);
const uint8_t* decode_data = &decode_buffer[0];
- decoder.SetData(
- nvalues, encode_buffer->data(), static_cast<int>(encode_buffer->size()));
+ decoder.SetData(nvalues, encode_buffer->data(),
+ static_cast<int>(encode_buffer->size()));
int values_decoded = decoder.Decode(&decode_buffer[0], nvalues);
ASSERT_EQ(nvalues, values_decoded);
@@ -75,8 +75,8 @@ TEST(VectorBooleanTest, TestEncodeDecode) {
template <typename T>
void GenerateData(int num_values, T* out, vector<uint8_t>* heap) {
// seed the prng so failure is deterministic
- random_numbers(
- num_values, 0, std::numeric_limits<T>::min(), std::numeric_limits<T>::max(), out);
+ random_numbers(num_values, 0, std::numeric_limits<T>::min(),
+ std::numeric_limits<T>::max(), out);
}
template <>
@@ -89,7 +89,7 @@ template <>
void GenerateData<Int96>(int num_values, Int96* out, vector<uint8_t>* heap) {
// seed the prng so failure is deterministic
random_Int96_numbers(num_values, 0, std::numeric_limits<int32_t>::min(),
- std::numeric_limits<int32_t>::max(), out);
+ std::numeric_limits<int32_t>::max(), out);
}
template <>
@@ -135,7 +135,8 @@ std::shared_ptr<ColumnDescriptor> ExampleDescr() {
template <>
std::shared_ptr<ColumnDescriptor> ExampleDescr<FLBAType>() {
auto node = schema::PrimitiveNode::Make("name", Repetition::OPTIONAL,
- Type::FIXED_LEN_BYTE_ARRAY, LogicalType::DECIMAL, flba_length, 10, 2);
+ Type::FIXED_LEN_BYTE_ARRAY,
+ LogicalType::DECIMAL, flba_length, 10, 2);
return std::make_shared<ColumnDescriptor>(node, 0, 0);
}
@@ -220,8 +221,8 @@ class TestPlainEncoding : public TestEncodingBase<Type> {
encoder.Put(draws_, num_values_);
encode_buffer_ = encoder.FlushValues();
- decoder.SetData(
- num_values_, encode_buffer_->data(), static_cast<int>(encode_buffer_->size()));
+ decoder.SetData(num_values_, encode_buffer_->data(),
+ static_cast<int>(encode_buffer_->size()));
int values_decoded = decoder.Decode(decode_buf_, num_values_);
ASSERT_EQ(num_values_, values_decoded);
VerifyResults<T>(decode_buf_, draws_, num_values_);
@@ -233,15 +234,13 @@ class TestPlainEncoding : public TestEncodingBase<Type> {
TYPED_TEST_CASE(TestPlainEncoding, ParquetTypes);
-TYPED_TEST(TestPlainEncoding, BasicRoundTrip) {
- this->Execute(10000, 1);
-}
+TYPED_TEST(TestPlainEncoding, BasicRoundTrip) { this->Execute(10000, 1); }
// ----------------------------------------------------------------------
// Dictionary encoding tests
typedef ::testing::Types<Int32Type, Int64Type, Int96Type, FloatType, DoubleType,
- ByteArrayType, FLBAType>
+ ByteArrayType, FLBAType>
DictEncodedTypes;
template <typename Type>
@@ -267,7 +266,7 @@ class TestDictionaryEncoding : public TestEncodingBase<Type> {
PlainDecoder<Type> dict_decoder(descr_.get());
dict_decoder.SetData(encoder.num_entries(), dict_buffer_->data(),
- static_cast<int>(dict_buffer_->size()));
+ static_cast<int>(dict_buffer_->size()));
DictionaryDecoder<Type> decoder(descr_.get());
decoder.SetDict(&dict_decoder);
@@ -296,9 +295,7 @@ class TestDictionaryEncoding : public TestEncodingBase<Type> {
TYPED_TEST_CASE(TestDictionaryEncoding, DictEncodedTypes);
-TYPED_TEST(TestDictionaryEncoding, BasicRoundTrip) {
- this->Execute(2500, 2);
-}
+TYPED_TEST(TestDictionaryEncoding, BasicRoundTrip) { this->Execute(2500, 2); }
TEST(TestDictionaryEncoding, CannotDictDecodeBoolean) {
PlainDecoder<BooleanType> dict_decoder(nullptr);
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/encoding.h
----------------------------------------------------------------------
diff --git a/src/parquet/encoding.h b/src/parquet/encoding.h
index ecf3940..339eb35 100644
--- a/src/parquet/encoding.h
+++ b/src/parquet/encoding.h
@@ -22,8 +22,8 @@
#include <memory>
#include <sstream>
-#include "arrow/util/bit-util.h"
#include "arrow/status.h"
+#include "arrow/util/bit-util.h"
#include "parquet/exception.h"
#include "parquet/schema.h"
@@ -49,13 +49,13 @@ class Encoder {
virtual std::shared_ptr<Buffer> FlushValues() = 0;
virtual void Put(const T* src, int num_values) = 0;
virtual void PutSpaced(const T* src, int num_values, const uint8_t* valid_bits,
- int64_t valid_bits_offset) {
+ int64_t valid_bits_offset) {
PoolBuffer buffer(pool_);
::arrow::Status status = buffer.Resize(num_values * sizeof(T));
if (!status.ok()) {
std::ostringstream ss;
- ss << "buffer.Resize failed in Encoder.PutSpaced in " <<
- __FILE__ << ", on line " << __LINE__;
+ ss << "buffer.Resize failed in Encoder.PutSpaced in " << __FILE__ << ", on line "
+ << __LINE__;
throw ParquetException(ss.str());
}
int32_t num_valid_values = 0;
@@ -73,8 +73,8 @@ class Encoder {
Encoding::type encoding() const { return encoding_; }
protected:
- explicit Encoder(
- const ColumnDescriptor* descr, Encoding::type encoding, ::arrow::MemoryPool* pool)
+ explicit Encoder(const ColumnDescriptor* descr, Encoding::type encoding,
+ ::arrow::MemoryPool* pool)
: descr_(descr), encoding_(encoding), pool_(pool) {}
// For accessing type-specific metadata, like FIXED_LEN_BYTE_ARRAY
@@ -106,7 +106,7 @@ class Decoder {
// num_values is the size of the def_levels and buffer arrays including the number of
// null values.
virtual int DecodeSpaced(T* buffer, int num_values, int null_count,
- const uint8_t* valid_bits, int64_t valid_bits_offset) {
+ const uint8_t* valid_bits, int64_t valid_bits_offset) {
int values_to_read = num_values - null_count;
int values_read = Decode(buffer, values_to_read);
if (values_read != values_to_read) {
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/exception.cc
----------------------------------------------------------------------
diff --git a/src/parquet/exception.cc b/src/parquet/exception.cc
index 96bbc4b..480eecd 100644
--- a/src/parquet/exception.cc
+++ b/src/parquet/exception.cc
@@ -33,9 +33,7 @@ void ParquetException::NYI(const std::string& msg) {
throw ParquetException(ss.str());
}
-void ParquetException::Throw(const std::string& msg) {
- throw ParquetException(msg);
-}
+void ParquetException::Throw(const std::string& msg) { throw ParquetException(msg); }
ParquetException::ParquetException(const char* msg) : msg_(msg) {}
@@ -45,8 +43,6 @@ ParquetException::ParquetException(const char* msg, std::exception& e) : msg_(ms
ParquetException::~ParquetException() throw() {}
-const char* ParquetException::what() const throw() {
- return msg_.c_str();
-}
+const char* ParquetException::what() const throw() { return msg_.c_str(); }
} // namespace parquet
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/file/file-deserialize-test.cc
----------------------------------------------------------------------
diff --git a/src/parquet/file/file-deserialize-test.cc b/src/parquet/file/file-deserialize-test.cc
index 59d2051..39ea1d9 100644
--- a/src/parquet/file/file-deserialize-test.cc
+++ b/src/parquet/file/file-deserialize-test.cc
@@ -37,10 +37,12 @@
namespace parquet {
-#define ASSERT_OK(expr) \
- do { \
- ::arrow::Status s = (expr); \
- if (!s.ok()) { FAIL() << s.ToString(); } \
+#define ASSERT_OK(expr) \
+ do { \
+ ::arrow::Status s = (expr); \
+ if (!s.ok()) { \
+ FAIL() << s.ToString(); \
+ } \
} while (0)
using ::arrow::io::BufferReader;
@@ -66,8 +68,8 @@ class TestPageSerde : public ::testing::Test {
ResetStream();
}
- void InitSerializedPageReader(
- int64_t num_rows, Compression::type codec = Compression::UNCOMPRESSED) {
+ void InitSerializedPageReader(int64_t num_rows,
+ Compression::type codec = Compression::UNCOMPRESSED) {
EndStream();
std::unique_ptr<InputStream> stream;
stream.reset(new InMemoryInputStream(out_buffer_));
@@ -75,7 +77,7 @@ class TestPageSerde : public ::testing::Test {
}
void WriteDataPageHeader(int max_serialized_len = 1024, int32_t uncompressed_size = 0,
- int32_t compressed_size = 0) {
+ int32_t compressed_size = 0) {
// Simplifying writing serialized data page headers which may or may not
// have meaningful data associated with them
@@ -176,8 +178,8 @@ TEST_F(TestPageSerde, TestFailLargePageHeaders) {
}
TEST_F(TestPageSerde, Compression) {
- Compression::type codec_types[3] = {
- Compression::GZIP, Compression::SNAPPY, Compression::BROTLI};
+ Compression::type codec_types[3] = {Compression::GZIP, Compression::SNAPPY,
+ Compression::BROTLI};
const int32_t num_rows = 32; // dummy value
data_page_header_.num_values = num_rows;
@@ -203,8 +205,8 @@ TEST_F(TestPageSerde, Compression) {
buffer.resize(max_compressed_size);
int64_t actual_size;
- ASSERT_OK(codec->Compress(
- data_size, data, max_compressed_size, &buffer[0], &actual_size));
+ ASSERT_OK(codec->Compress(data_size, data, max_compressed_size, &buffer[0],
+ &actual_size));
WriteDataPageHeader(1024, data_size, static_cast<int32_t>(actual_size));
out_stream_->Write(buffer.data(), actual_size);
@@ -246,8 +248,8 @@ class TestParquetFileReader : public ::testing::Test {
auto reader = std::make_shared<BufferReader>(buffer);
auto wrapper = std::unique_ptr<ArrowInputFile>(new ArrowInputFile(reader));
- ASSERT_THROW(
- reader_->Open(SerializedFile::Open(std::move(wrapper))), ParquetException);
+ ASSERT_THROW(reader_->Open(SerializedFile::Open(std::move(wrapper))),
+ ParquetException);
}
protected:
@@ -257,22 +259,22 @@ class TestParquetFileReader : public ::testing::Test {
TEST_F(TestParquetFileReader, InvalidHeader) {
const char* bad_header = "PAR2";
- auto buffer = std::make_shared<Buffer>(
- reinterpret_cast<const uint8_t*>(bad_header), strlen(bad_header));
+ auto buffer = std::make_shared<Buffer>(reinterpret_cast<const uint8_t*>(bad_header),
+ strlen(bad_header));
AssertInvalidFileThrows(buffer);
}
TEST_F(TestParquetFileReader, InvalidFooter) {
// File is smaller than FOOTER_SIZE
const char* bad_file = "PAR1PAR";
- auto buffer = std::make_shared<Buffer>(
- reinterpret_cast<const uint8_t*>(bad_file), strlen(bad_file));
+ auto buffer = std::make_shared<Buffer>(reinterpret_cast<const uint8_t*>(bad_file),
+ strlen(bad_file));
AssertInvalidFileThrows(buffer);
// Magic number incorrect
const char* bad_file2 = "PAR1PAR2";
- buffer = std::make_shared<Buffer>(
- reinterpret_cast<const uint8_t*>(bad_file2), strlen(bad_file2));
+ buffer = std::make_shared<Buffer>(reinterpret_cast<const uint8_t*>(bad_file2),
+ strlen(bad_file2));
AssertInvalidFileThrows(buffer);
}
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/file/file-metadata-test.cc
----------------------------------------------------------------------
diff --git a/src/parquet/file/file-metadata-test.cc b/src/parquet/file/file-metadata-test.cc
index 10ce40c..a7c438c 100644
--- a/src/parquet/file/file-metadata-test.cc
+++ b/src/parquet/file/file-metadata-test.cc
@@ -15,11 +15,11 @@
// specific language governing permissions and limitations
// under the License.
+#include <gtest/gtest.h>
#include "parquet/file/metadata.h"
#include "parquet/schema.h"
#include "parquet/statistics.h"
#include "parquet/types.h"
-#include <gtest/gtest.h>
namespace parquet {
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/file/file-serialize-test.cc
----------------------------------------------------------------------
diff --git a/src/parquet/file/file-serialize-test.cc b/src/parquet/file/file-serialize-test.cc
index 5736fa1..059df0b 100644
--- a/src/parquet/file/file-serialize-test.cc
+++ b/src/parquet/file/file-serialize-test.cc
@@ -65,8 +65,8 @@ class TestSerialize : public PrimitiveTypedTest<TestType> {
for (int i = 0; i < num_columns_; ++i) {
auto column_writer =
static_cast<TypedColumnWriter<TestType>*>(row_group_writer->NextColumn());
- column_writer->WriteBatch(
- 100, this->def_levels_.data(), nullptr, this->values_ptr_);
+ column_writer->WriteBatch(100, this->def_levels_.data(), nullptr,
+ this->values_ptr_);
column_writer->Close();
}
@@ -96,7 +96,7 @@ class TestSerialize : public PrimitiveTypedTest<TestType> {
std::static_pointer_cast<TypedColumnReader<TestType>>(rg_reader->Column(i));
this->SetupValuesOut(100);
col_reader->ReadBatch(100, def_levels_out.data(), rep_levels_out.data(),
- this->values_out_ptr_, &values_read);
+ this->values_out_ptr_, &values_read);
this->SyncValuesOut();
ASSERT_EQ(100, values_read);
ASSERT_EQ(this->values_, this->values_out_);
@@ -106,7 +106,7 @@ class TestSerialize : public PrimitiveTypedTest<TestType> {
};
typedef ::testing::Types<Int32Type, Int64Type, Int96Type, FloatType, DoubleType,
- BooleanType, ByteArrayType, FLBAType>
+ BooleanType, ByteArrayType, FLBAType>
TestTypes;
TYPED_TEST_CASE(TestSerialize, TestTypes);
@@ -123,9 +123,7 @@ TYPED_TEST(TestSerialize, SmallFileBrotli) {
this->FileSerializeTest(Compression::BROTLI);
}
-TYPED_TEST(TestSerialize, SmallFileGzip) {
- this->FileSerializeTest(Compression::GZIP);
-}
+TYPED_TEST(TestSerialize, SmallFileGzip) { this->FileSerializeTest(Compression::GZIP); }
} // namespace test
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/file/metadata.cc
----------------------------------------------------------------------
diff --git a/src/parquet/file/metadata.cc b/src/parquet/file/metadata.cc
index b37ef4f..d5a96f3 100644
--- a/src/parquet/file/metadata.cc
+++ b/src/parquet/file/metadata.cc
@@ -91,8 +91,9 @@ SortOrder get_sort_order(LogicalType::type converted, Type::type primitive) {
template <typename DType>
static std::shared_ptr<RowGroupStatistics> MakeTypedColumnStats(
const format::ColumnMetaData& metadata, const ColumnDescriptor* descr) {
- return std::make_shared<TypedRowGroupStatistics<DType>>(descr, metadata.statistics.min,
- metadata.statistics.max, metadata.num_values - metadata.statistics.null_count,
+ return std::make_shared<TypedRowGroupStatistics<DType>>(
+ descr, metadata.statistics.min, metadata.statistics.max,
+ metadata.num_values - metadata.statistics.null_count,
metadata.statistics.null_count, metadata.statistics.distinct_count,
metadata.statistics.__isset.max || metadata.statistics.__isset.min);
}
@@ -125,7 +126,8 @@ std::shared_ptr<RowGroupStatistics> MakeColumnStats(
class ColumnChunkMetaData::ColumnChunkMetaDataImpl {
public:
explicit ColumnChunkMetaDataImpl(const format::ColumnChunk* column,
- const ColumnDescriptor* descr, const ApplicationVersion* writer_version)
+ const ColumnDescriptor* descr,
+ const ApplicationVersion* writer_version)
: column_(column), descr_(descr), writer_version_(writer_version) {
const format::ColumnMetaData& meta_data = column->meta_data;
for (auto encoding : meta_data.encodings) {
@@ -205,36 +207,30 @@ class ColumnChunkMetaData::ColumnChunkMetaDataImpl {
const ApplicationVersion* writer_version_;
};
-std::unique_ptr<ColumnChunkMetaData> ColumnChunkMetaData::Make(const uint8_t* metadata,
- const ColumnDescriptor* descr, const ApplicationVersion* writer_version) {
+std::unique_ptr<ColumnChunkMetaData> ColumnChunkMetaData::Make(
+ const uint8_t* metadata, const ColumnDescriptor* descr,
+ const ApplicationVersion* writer_version) {
return std::unique_ptr<ColumnChunkMetaData>(
new ColumnChunkMetaData(metadata, descr, writer_version));
}
ColumnChunkMetaData::ColumnChunkMetaData(const uint8_t* metadata,
- const ColumnDescriptor* descr, const ApplicationVersion* writer_version)
+ const ColumnDescriptor* descr,
+ const ApplicationVersion* writer_version)
: impl_{std::unique_ptr<ColumnChunkMetaDataImpl>(new ColumnChunkMetaDataImpl(
reinterpret_cast<const format::ColumnChunk*>(metadata), descr,
writer_version))} {}
ColumnChunkMetaData::~ColumnChunkMetaData() {}
// column chunk
-int64_t ColumnChunkMetaData::file_offset() const {
- return impl_->file_offset();
-}
+int64_t ColumnChunkMetaData::file_offset() const { return impl_->file_offset(); }
-const std::string& ColumnChunkMetaData::file_path() const {
- return impl_->file_path();
-}
+const std::string& ColumnChunkMetaData::file_path() const { return impl_->file_path(); }
// column metadata
-Type::type ColumnChunkMetaData::type() const {
- return impl_->type();
-}
+Type::type ColumnChunkMetaData::type() const { return impl_->type(); }
-int64_t ColumnChunkMetaData::num_values() const {
- return impl_->num_values();
-}
+int64_t ColumnChunkMetaData::num_values() const { return impl_->num_values(); }
std::shared_ptr<schema::ColumnPath> ColumnChunkMetaData::path_in_schema() const {
return impl_->path_in_schema();
@@ -244,9 +240,7 @@ std::shared_ptr<RowGroupStatistics> ColumnChunkMetaData::statistics() const {
return impl_->statistics();
}
-bool ColumnChunkMetaData::is_stats_set() const {
- return impl_->is_stats_set();
-}
+bool ColumnChunkMetaData::is_stats_set() const { return impl_->is_stats_set(); }
int64_t ColumnChunkMetaData::has_dictionary_page() const {
return impl_->has_dictionary_page();
@@ -284,7 +278,8 @@ int64_t ColumnChunkMetaData::total_compressed_size() const {
class RowGroupMetaData::RowGroupMetaDataImpl {
public:
explicit RowGroupMetaDataImpl(const format::RowGroup* row_group,
- const SchemaDescriptor* schema, const ApplicationVersion* writer_version)
+ const SchemaDescriptor* schema,
+ const ApplicationVersion* writer_version)
: row_group_(row_group), schema_(schema), writer_version_(writer_version) {}
~RowGroupMetaDataImpl() {}
@@ -314,34 +309,28 @@ class RowGroupMetaData::RowGroupMetaDataImpl {
const ApplicationVersion* writer_version_;
};
-std::unique_ptr<RowGroupMetaData> RowGroupMetaData::Make(const uint8_t* metadata,
- const SchemaDescriptor* schema, const ApplicationVersion* writer_version) {
+std::unique_ptr<RowGroupMetaData> RowGroupMetaData::Make(
+ const uint8_t* metadata, const SchemaDescriptor* schema,
+ const ApplicationVersion* writer_version) {
return std::unique_ptr<RowGroupMetaData>(
new RowGroupMetaData(metadata, schema, writer_version));
}
RowGroupMetaData::RowGroupMetaData(const uint8_t* metadata,
- const SchemaDescriptor* schema, const ApplicationVersion* writer_version)
+ const SchemaDescriptor* schema,
+ const ApplicationVersion* writer_version)
: impl_{std::unique_ptr<RowGroupMetaDataImpl>(new RowGroupMetaDataImpl(
reinterpret_cast<const format::RowGroup*>(metadata), schema, writer_version))} {
}
RowGroupMetaData::~RowGroupMetaData() {}
-int RowGroupMetaData::num_columns() const {
- return impl_->num_columns();
-}
+int RowGroupMetaData::num_columns() const { return impl_->num_columns(); }
-int64_t RowGroupMetaData::num_rows() const {
- return impl_->num_rows();
-}
+int64_t RowGroupMetaData::num_rows() const { return impl_->num_rows(); }
-int64_t RowGroupMetaData::total_byte_size() const {
- return impl_->total_byte_size();
-}
+int64_t RowGroupMetaData::total_byte_size() const { return impl_->total_byte_size(); }
-const SchemaDescriptor* RowGroupMetaData::schema() const {
- return impl_->schema();
-}
+const SchemaDescriptor* RowGroupMetaData::schema() const { return impl_->schema(); }
std::unique_ptr<ColumnChunkMetaData> RowGroupMetaData::ColumnChunk(int i) const {
return impl_->ColumnChunk(i);
@@ -408,8 +397,8 @@ class FileMetaData::FileMetaDataImpl {
uint32_t metadata_len_;
std::unique_ptr<format::FileMetaData> metadata_;
void InitSchema() {
- schema::FlatSchemaConverter converter(
- &metadata_->schema[0], static_cast<int>(metadata_->schema.size()));
+ schema::FlatSchemaConverter converter(&metadata_->schema[0],
+ static_cast<int>(metadata_->schema.size()));
schema_.Init(converter.Convert());
}
SchemaDescriptor schema_;
@@ -429,8 +418,8 @@ class FileMetaData::FileMetaDataImpl {
std::shared_ptr<const KeyValueMetadata> key_value_metadata_;
};
-std::shared_ptr<FileMetaData> FileMetaData::Make(
- const uint8_t* metadata, uint32_t* metadata_len) {
+std::shared_ptr<FileMetaData> FileMetaData::Make(const uint8_t* metadata,
+ uint32_t* metadata_len) {
// This FileMetaData ctor is private, not compatible with std::make_shared
return std::shared_ptr<FileMetaData>(new FileMetaData(metadata, metadata_len));
}
@@ -448,21 +437,13 @@ std::unique_ptr<RowGroupMetaData> FileMetaData::RowGroup(int i) const {
return impl_->RowGroup(i);
}
-uint32_t FileMetaData::size() const {
- return impl_->size();
-}
+uint32_t FileMetaData::size() const { return impl_->size(); }
-int FileMetaData::num_columns() const {
- return impl_->num_columns();
-}
+int FileMetaData::num_columns() const { return impl_->num_columns(); }
-int64_t FileMetaData::num_rows() const {
- return impl_->num_rows();
-}
+int64_t FileMetaData::num_rows() const { return impl_->num_rows(); }
-int FileMetaData::num_row_groups() const {
- return impl_->num_row_groups();
-}
+int FileMetaData::num_row_groups() const { return impl_->num_row_groups(); }
ParquetVersion::type FileMetaData::version() const {
switch (impl_->version()) {
@@ -481,25 +462,17 @@ const ApplicationVersion& FileMetaData::writer_version() const {
return impl_->writer_version();
}
-const std::string& FileMetaData::created_by() const {
- return impl_->created_by();
-}
+const std::string& FileMetaData::created_by() const { return impl_->created_by(); }
-int FileMetaData::num_schema_elements() const {
- return impl_->num_schema_elements();
-}
+int FileMetaData::num_schema_elements() const { return impl_->num_schema_elements(); }
-const SchemaDescriptor* FileMetaData::schema() const {
- return impl_->schema();
-}
+const SchemaDescriptor* FileMetaData::schema() const { return impl_->schema(); }
std::shared_ptr<const KeyValueMetadata> FileMetaData::key_value_metadata() const {
return impl_->key_value_metadata();
}
-void FileMetaData::WriteTo(OutputStream* dst) {
- return impl_->WriteTo(dst);
-}
+void FileMetaData::WriteTo(OutputStream* dst) { return impl_->WriteTo(dst); }
ApplicationVersion::ApplicationVersion(const std::string& created_by) {
boost::regex app_regex{ApplicationVersion::APPLICATION_FORMAT};
@@ -509,7 +482,7 @@ ApplicationVersion::ApplicationVersion(const std::string& created_by) {
std::string created_by_lower = created_by;
std::transform(created_by_lower.begin(), created_by_lower.end(),
- created_by_lower.begin(), ::tolower);
+ created_by_lower.begin(), ::tolower);
bool app_success = boost::regex_match(created_by_lower, app_matches, app_regex);
bool ver_success = false;
@@ -572,10 +545,14 @@ bool ApplicationVersion::HasCorrectStatistics(Type::type col_type) const {
// created_by is not populated, which could have been caused by
// parquet-mr during the same time as PARQUET-251, see PARQUET-297
- if (application_ == "unknown") { return true; }
+ if (application_ == "unknown") {
+ return true;
+ }
// PARQUET-251
- if (VersionLt(PARQUET_251_FIXED_VERSION)) { return false; }
+ if (VersionLt(PARQUET_251_FIXED_VERSION)) {
+ return false;
+ }
return true;
}
@@ -585,7 +562,8 @@ bool ApplicationVersion::HasCorrectStatistics(Type::type col_type) const {
class ColumnChunkMetaDataBuilder::ColumnChunkMetaDataBuilderImpl {
public:
explicit ColumnChunkMetaDataBuilderImpl(const std::shared_ptr<WriterProperties>& props,
- const ColumnDescriptor* column, uint8_t* contents)
+ const ColumnDescriptor* column,
+ uint8_t* contents)
: properties_(props), column_(column) {
column_chunk_ = reinterpret_cast<format::ColumnChunk*>(contents);
column_chunk_->meta_data.__set_type(ToThrift(column->physical_type()));
@@ -614,8 +592,9 @@ class ColumnChunkMetaDataBuilder::ColumnChunkMetaDataBuilderImpl {
}
void Finish(int64_t num_values, int64_t dictionary_page_offset,
- int64_t index_page_offset, int64_t data_page_offset, int64_t compressed_size,
- int64_t uncompressed_size, bool has_dictionary, bool dictionary_fallback) {
+ int64_t index_page_offset, int64_t data_page_offset,
+ int64_t compressed_size, int64_t uncompressed_size, bool has_dictionary,
+ bool dictionary_fallback) {
if (dictionary_page_offset > 0) {
column_chunk_->meta_data.__set_dictionary_page_offset(dictionary_page_offset);
column_chunk_->__set_file_offset(dictionary_page_offset + compressed_size);
@@ -642,7 +621,9 @@ class ColumnChunkMetaDataBuilder::ColumnChunkMetaDataBuilderImpl {
thrift_encodings.push_back(ToThrift(Encoding::RLE));
// Only PLAIN encoding is supported for fallback in V1
// TODO(majetideepak): Use user specified encoding for V2
- if (dictionary_fallback) { thrift_encodings.push_back(ToThrift(Encoding::PLAIN)); }
+ if (dictionary_fallback) {
+ thrift_encodings.push_back(ToThrift(Encoding::PLAIN));
+ }
column_chunk_->meta_data.__set_encodings(thrift_encodings);
}
@@ -678,16 +659,16 @@ void ColumnChunkMetaDataBuilder::set_file_path(const std::string& path) {
}
void ColumnChunkMetaDataBuilder::Finish(int64_t num_values,
- int64_t dictionary_page_offset, int64_t index_page_offset, int64_t data_page_offset,
- int64_t compressed_size, int64_t uncompressed_size, bool has_dictionary,
- bool dictionary_fallback) {
+ int64_t dictionary_page_offset,
+ int64_t index_page_offset,
+ int64_t data_page_offset, int64_t compressed_size,
+ int64_t uncompressed_size, bool has_dictionary,
+ bool dictionary_fallback) {
impl_->Finish(num_values, dictionary_page_offset, index_page_offset, data_page_offset,
- compressed_size, uncompressed_size, has_dictionary, dictionary_fallback);
+ compressed_size, uncompressed_size, has_dictionary, dictionary_fallback);
}
-void ColumnChunkMetaDataBuilder::WriteTo(OutputStream* sink) {
- impl_->WriteTo(sink);
-}
+void ColumnChunkMetaDataBuilder::WriteTo(OutputStream* sink) { impl_->WriteTo(sink); }
const ColumnDescriptor* ColumnChunkMetaDataBuilder::descr() const {
return impl_->descr();
@@ -700,8 +681,8 @@ void ColumnChunkMetaDataBuilder::SetStatistics(const EncodedStatistics& result)
class RowGroupMetaDataBuilder::RowGroupMetaDataBuilderImpl {
public:
explicit RowGroupMetaDataBuilderImpl(int64_t num_rows,
- const std::shared_ptr<WriterProperties>& props, const SchemaDescriptor* schema,
- uint8_t* contents)
+ const std::shared_ptr<WriterProperties>& props,
+ const SchemaDescriptor* schema, uint8_t* contents)
: properties_(props), schema_(schema), current_column_(0) {
row_group_ = reinterpret_cast<format::RowGroup*>(contents);
InitializeColumns(schema->num_columns());
@@ -717,7 +698,8 @@ class RowGroupMetaDataBuilder::RowGroupMetaDataBuilderImpl {
throw ParquetException(ss.str());
}
auto column = schema_->Column(current_column_);
- auto column_builder = ColumnChunkMetaDataBuilder::Make(properties_, column,
+ auto column_builder = ColumnChunkMetaDataBuilder::Make(
+ properties_, column,
reinterpret_cast<uint8_t*>(&row_group_->columns[current_column_++]));
auto column_builder_ptr = column_builder.get();
column_builders_.push_back(std::move(column_builder));
@@ -761,16 +743,16 @@ class RowGroupMetaDataBuilder::RowGroupMetaDataBuilderImpl {
int current_column_;
};
-std::unique_ptr<RowGroupMetaDataBuilder> RowGroupMetaDataBuilder::Make(int64_t num_rows,
- const std::shared_ptr<WriterProperties>& props, const SchemaDescriptor* schema_,
- uint8_t* contents) {
+std::unique_ptr<RowGroupMetaDataBuilder> RowGroupMetaDataBuilder::Make(
+ int64_t num_rows, const std::shared_ptr<WriterProperties>& props,
+ const SchemaDescriptor* schema_, uint8_t* contents) {
return std::unique_ptr<RowGroupMetaDataBuilder>(
new RowGroupMetaDataBuilder(num_rows, props, schema_, contents));
}
-RowGroupMetaDataBuilder::RowGroupMetaDataBuilder(int64_t num_rows,
- const std::shared_ptr<WriterProperties>& props, const SchemaDescriptor* schema_,
- uint8_t* contents)
+RowGroupMetaDataBuilder::RowGroupMetaDataBuilder(
+ int64_t num_rows, const std::shared_ptr<WriterProperties>& props,
+ const SchemaDescriptor* schema_, uint8_t* contents)
: impl_{std::unique_ptr<RowGroupMetaDataBuilderImpl>(
new RowGroupMetaDataBuilderImpl(num_rows, props, schema_, contents))} {}
@@ -780,13 +762,9 @@ ColumnChunkMetaDataBuilder* RowGroupMetaDataBuilder::NextColumnChunk() {
return impl_->NextColumnChunk();
}
-int RowGroupMetaDataBuilder::current_column() const {
- return impl_->current_column();
-}
+int RowGroupMetaDataBuilder::current_column() const { return impl_->current_column(); }
-int RowGroupMetaDataBuilder::num_columns() {
- return impl_->num_columns();
-}
+int RowGroupMetaDataBuilder::num_columns() { return impl_->num_columns(); }
void RowGroupMetaDataBuilder::Finish(int64_t total_bytes_written) {
impl_->Finish(total_bytes_written);
@@ -796,8 +774,8 @@ void RowGroupMetaDataBuilder::Finish(int64_t total_bytes_written) {
// TODO(PARQUET-595) Support key_value_metadata
class FileMetaDataBuilder::FileMetaDataBuilderImpl {
public:
- explicit FileMetaDataBuilderImpl(const SchemaDescriptor* schema,
- const std::shared_ptr<WriterProperties>& props,
+ explicit FileMetaDataBuilderImpl(
+ const SchemaDescriptor* schema, const std::shared_ptr<WriterProperties>& props,
const std::shared_ptr<const KeyValueMetadata>& key_value_metadata)
: properties_(props), schema_(schema), key_value_metadata_(key_value_metadata) {
metadata_.reset(new format::FileMetaData());
@@ -879,8 +857,8 @@ std::unique_ptr<FileMetaDataBuilder> FileMetaDataBuilder::Make(
new FileMetaDataBuilder(schema, props, key_value_metadata));
}
-FileMetaDataBuilder::FileMetaDataBuilder(const SchemaDescriptor* schema,
- const std::shared_ptr<WriterProperties>& props,
+FileMetaDataBuilder::FileMetaDataBuilder(
+ const SchemaDescriptor* schema, const std::shared_ptr<WriterProperties>& props,
const std::shared_ptr<const KeyValueMetadata>& key_value_metadata)
: impl_{std::unique_ptr<FileMetaDataBuilderImpl>(
new FileMetaDataBuilderImpl(schema, props, key_value_metadata))} {}
@@ -891,8 +869,6 @@ RowGroupMetaDataBuilder* FileMetaDataBuilder::AppendRowGroup(int64_t num_rows) {
return impl_->AppendRowGroup(num_rows);
}
-std::unique_ptr<FileMetaData> FileMetaDataBuilder::Finish() {
- return impl_->Finish();
-}
+std::unique_ptr<FileMetaData> FileMetaDataBuilder::Finish() { return impl_->Finish(); }
} // namespace parquet
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/file/metadata.h
----------------------------------------------------------------------
diff --git a/src/parquet/file/metadata.h b/src/parquet/file/metadata.h
index 2dc50d1..4250f6b 100644
--- a/src/parquet/file/metadata.h
+++ b/src/parquet/file/metadata.h
@@ -98,8 +98,9 @@ class ApplicationVersion {
class PARQUET_EXPORT ColumnChunkMetaData {
public:
// API convenience to get a MetaData accessor
- static std::unique_ptr<ColumnChunkMetaData> Make(const uint8_t* metadata,
- const ColumnDescriptor* descr, const ApplicationVersion* writer_version = NULL);
+ static std::unique_ptr<ColumnChunkMetaData> Make(
+ const uint8_t* metadata, const ColumnDescriptor* descr,
+ const ApplicationVersion* writer_version = NULL);
~ColumnChunkMetaData();
@@ -124,7 +125,7 @@ class PARQUET_EXPORT ColumnChunkMetaData {
private:
explicit ColumnChunkMetaData(const uint8_t* metadata, const ColumnDescriptor* descr,
- const ApplicationVersion* writer_version = NULL);
+ const ApplicationVersion* writer_version = NULL);
// PIMPL Idiom
class ColumnChunkMetaDataImpl;
std::unique_ptr<ColumnChunkMetaDataImpl> impl_;
@@ -133,8 +134,9 @@ class PARQUET_EXPORT ColumnChunkMetaData {
class PARQUET_EXPORT RowGroupMetaData {
public:
// API convenience to get a MetaData accessor
- static std::unique_ptr<RowGroupMetaData> Make(const uint8_t* metadata,
- const SchemaDescriptor* schema, const ApplicationVersion* writer_version = NULL);
+ static std::unique_ptr<RowGroupMetaData> Make(
+ const uint8_t* metadata, const SchemaDescriptor* schema,
+ const ApplicationVersion* writer_version = NULL);
~RowGroupMetaData();
@@ -148,7 +150,7 @@ class PARQUET_EXPORT RowGroupMetaData {
private:
explicit RowGroupMetaData(const uint8_t* metadata, const SchemaDescriptor* schema,
- const ApplicationVersion* writer_version = NULL);
+ const ApplicationVersion* writer_version = NULL);
// PIMPL Idiom
class RowGroupMetaDataImpl;
std::unique_ptr<RowGroupMetaDataImpl> impl_;
@@ -159,8 +161,8 @@ class FileMetaDataBuilder;
class PARQUET_EXPORT FileMetaData {
public:
// API convenience to get a MetaData accessor
- static std::shared_ptr<FileMetaData> Make(
- const uint8_t* serialized_metadata, uint32_t* metadata_len);
+ static std::shared_ptr<FileMetaData> Make(const uint8_t* serialized_metadata,
+ uint32_t* metadata_len);
~FileMetaData();
@@ -212,15 +214,16 @@ class PARQUET_EXPORT ColumnChunkMetaDataBuilder {
const ColumnDescriptor* descr() const;
// commit the metadata
void Finish(int64_t num_values, int64_t dictonary_page_offset,
- int64_t index_page_offset, int64_t data_page_offset, int64_t compressed_size,
- int64_t uncompressed_size, bool has_dictionary, bool dictionary_fallback);
+ int64_t index_page_offset, int64_t data_page_offset,
+ int64_t compressed_size, int64_t uncompressed_size, bool has_dictionary,
+ bool dictionary_fallback);
// For writing metadata at end of column chunk
void WriteTo(OutputStream* sink);
private:
explicit ColumnChunkMetaDataBuilder(const std::shared_ptr<WriterProperties>& props,
- const ColumnDescriptor* column, uint8_t* contents);
+ const ColumnDescriptor* column, uint8_t* contents);
// PIMPL Idiom
class ColumnChunkMetaDataBuilderImpl;
std::unique_ptr<ColumnChunkMetaDataBuilderImpl> impl_;
@@ -229,9 +232,9 @@ class PARQUET_EXPORT ColumnChunkMetaDataBuilder {
class PARQUET_EXPORT RowGroupMetaDataBuilder {
public:
// API convenience to get a MetaData reader
- static std::unique_ptr<RowGroupMetaDataBuilder> Make(int64_t num_rows,
- const std::shared_ptr<WriterProperties>& props, const SchemaDescriptor* schema_,
- uint8_t* contents);
+ static std::unique_ptr<RowGroupMetaDataBuilder> Make(
+ int64_t num_rows, const std::shared_ptr<WriterProperties>& props,
+ const SchemaDescriptor* schema_, uint8_t* contents);
~RowGroupMetaDataBuilder();
@@ -244,8 +247,8 @@ class PARQUET_EXPORT RowGroupMetaDataBuilder {
private:
explicit RowGroupMetaDataBuilder(int64_t num_rows,
- const std::shared_ptr<WriterProperties>& props, const SchemaDescriptor* schema_,
- uint8_t* contents);
+ const std::shared_ptr<WriterProperties>& props,
+ const SchemaDescriptor* schema_, uint8_t* contents);
// PIMPL Idiom
class RowGroupMetaDataBuilderImpl;
std::unique_ptr<RowGroupMetaDataBuilderImpl> impl_;
@@ -254,8 +257,8 @@ class PARQUET_EXPORT RowGroupMetaDataBuilder {
class PARQUET_EXPORT FileMetaDataBuilder {
public:
// API convenience to get a MetaData reader
- static std::unique_ptr<FileMetaDataBuilder> Make(const SchemaDescriptor* schema,
- const std::shared_ptr<WriterProperties>& props,
+ static std::unique_ptr<FileMetaDataBuilder> Make(
+ const SchemaDescriptor* schema, const std::shared_ptr<WriterProperties>& props,
const std::shared_ptr<const KeyValueMetadata>& key_value_metadata = nullptr);
~FileMetaDataBuilder();
@@ -266,8 +269,8 @@ class PARQUET_EXPORT FileMetaDataBuilder {
std::unique_ptr<FileMetaData> Finish();
private:
- explicit FileMetaDataBuilder(const SchemaDescriptor* schema,
- const std::shared_ptr<WriterProperties>& props,
+ explicit FileMetaDataBuilder(
+ const SchemaDescriptor* schema, const std::shared_ptr<WriterProperties>& props,
const std::shared_ptr<const KeyValueMetadata>& key_value_metadata = nullptr);
// PIMPL Idiom
class FileMetaDataBuilderImpl;
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/file/printer.cc
----------------------------------------------------------------------
diff --git a/src/parquet/file/printer.cc b/src/parquet/file/printer.cc
index 52b2598..2ba9474 100644
--- a/src/parquet/file/printer.cc
+++ b/src/parquet/file/printer.cc
@@ -33,7 +33,7 @@ namespace parquet {
#define COL_WIDTH "30"
void ParquetFilePrinter::DebugPrint(std::ostream& stream, std::list<int> selected_columns,
- bool print_values, const char* filename) {
+ bool print_values, const char* filename) {
const FileMetaData* file_metadata = fileReader->metadata().get();
stream << "File Name: " << filename << "\n";
@@ -101,7 +101,9 @@ void ParquetFilePrinter::DebugPrint(std::ostream& stream, std::list<int> selecte
<< std::endl;
}
- if (!print_values) { continue; }
+ if (!print_values) {
+ continue;
+ }
static constexpr int bufsize = 25;
char buffer[bufsize];
@@ -117,7 +119,7 @@ void ParquetFilePrinter::DebugPrint(std::ostream& stream, std::list<int> selecte
std::string fmt = ss.str();
snprintf(buffer, bufsize, fmt.c_str(),
- file_metadata->schema()->Column(i)->name().c_str());
+ file_metadata->schema()->Column(i)->name().c_str());
stream << buffer;
// This is OK in this method as long as the RowGroupReader does not get
@@ -140,8 +142,8 @@ void ParquetFilePrinter::DebugPrint(std::ostream& stream, std::list<int> selecte
}
}
-void ParquetFilePrinter::JSONPrint(
- std::ostream& stream, std::list<int> selected_columns, const char* filename) {
+void ParquetFilePrinter::JSONPrint(std::ostream& stream, std::list<int> selected_columns,
+ const char* filename) {
const FileMetaData* file_metadata = fileReader->metadata().get();
stream << "{\n";
stream << " \"FileName\": \"" << filename << "\",\n";
@@ -174,7 +176,9 @@ void ParquetFilePrinter::JSONPrint(
<< " \"LogicalType\": \"" << LogicalTypeToString(descr->logical_type())
<< "\" }";
c++;
- if (c != static_cast<int>(selected_columns.size())) { stream << ",\n"; }
+ if (c != static_cast<int>(selected_columns.size())) {
+ stream << ",\n";
+ }
}
stream << "\n ],\n \"RowGroups\": [\n";
@@ -223,11 +227,15 @@ void ParquetFilePrinter::JSONPrint(
// end of a ColumnChunk
stream << "\" }";
c1++;
- if (c1 != static_cast<int>(selected_columns.size())) { stream << ",\n"; }
+ if (c1 != static_cast<int>(selected_columns.size())) {
+ stream << ",\n";
+ }
}
stream << "\n ]\n }";
- if ((r + 1) != static_cast<int>(file_metadata->num_row_groups())) { stream << ",\n"; }
+ if ((r + 1) != static_cast<int>(file_metadata->num_row_groups())) {
+ stream << ",\n";
+ }
}
stream << "\n ]\n}\n";
}
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/file/printer.h
----------------------------------------------------------------------
diff --git a/src/parquet/file/printer.h b/src/parquet/file/printer.h
index a72c17d..a18af4a 100644
--- a/src/parquet/file/printer.h
+++ b/src/parquet/file/printer.h
@@ -38,10 +38,10 @@ class PARQUET_EXPORT ParquetFilePrinter {
~ParquetFilePrinter() {}
void DebugPrint(std::ostream& stream, std::list<int> selected_columns,
- bool print_values = true, const char* fileame = "No Name");
+ bool print_values = true, const char* fileame = "No Name");
void JSONPrint(std::ostream& stream, std::list<int> selected_columns,
- const char* filename = "No Name");
+ const char* filename = "No Name");
};
} // namespace parquet
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/file/reader-internal.cc
----------------------------------------------------------------------
diff --git a/src/parquet/file/reader-internal.cc b/src/parquet/file/reader-internal.cc
index c39d3eb..5ff7398 100644
--- a/src/parquet/file/reader-internal.cc
+++ b/src/parquet/file/reader-internal.cc
@@ -17,10 +17,10 @@
#include "parquet/file/reader-internal.h"
+#include <string.h>
#include <algorithm>
#include <exception>
#include <ostream>
-#include <string.h>
#include <string>
#include <vector>
@@ -42,7 +42,8 @@ namespace parquet {
// assembled in a serialized stream for storing in a Parquet files
SerializedPageReader::SerializedPageReader(std::unique_ptr<InputStream> stream,
- int64_t total_num_rows, Compression::type codec, MemoryPool* pool)
+ int64_t total_num_rows,
+ Compression::type codec, MemoryPool* pool)
: stream_(std::move(stream)),
decompression_buffer_(AllocateBuffer(pool, 0)),
seen_num_rows_(0),
@@ -66,7 +67,9 @@ std::shared_ptr<Page> SerializedPageReader::NextPage() {
// until a maximum allowed header limit
while (true) {
buffer = stream_->Peek(allowed_page_size, &bytes_available);
- if (bytes_available == 0) { return std::shared_ptr<Page>(nullptr); }
+ if (bytes_available == 0) {
+ return std::shared_ptr<Page>(nullptr);
+ }
// This gets used, then set by DeserializeThriftMsg
header_size = static_cast<uint32_t>(bytes_available);
@@ -92,7 +95,9 @@ std::shared_ptr<Page> SerializedPageReader::NextPage() {
// Read the compressed data page.
buffer = stream_->Read(compressed_len, &bytes_read);
- if (bytes_read != compressed_len) { ParquetException::EofException(); }
+ if (bytes_read != compressed_len) {
+ ParquetException::EofException();
+ }
// Uncompress it if we need to
if (decompressor_ != NULL) {
@@ -100,8 +105,9 @@ std::shared_ptr<Page> SerializedPageReader::NextPage() {
if (uncompressed_len > static_cast<int>(decompression_buffer_->size())) {
PARQUET_THROW_NOT_OK(decompression_buffer_->Resize(uncompressed_len, false));
}
- PARQUET_THROW_NOT_OK(decompressor_->Decompress(compressed_len, buffer,
- uncompressed_len, decompression_buffer_->mutable_data()));
+ PARQUET_THROW_NOT_OK(
+ decompressor_->Decompress(compressed_len, buffer, uncompressed_len,
+ decompression_buffer_->mutable_data()));
buffer = decompression_buffer_->data();
}
@@ -114,15 +120,20 @@ std::shared_ptr<Page> SerializedPageReader::NextPage() {
bool is_sorted = dict_header.__isset.is_sorted ? dict_header.is_sorted : false;
return std::make_shared<DictionaryPage>(page_buffer, dict_header.num_values,
- FromThrift(dict_header.encoding), is_sorted);
+ FromThrift(dict_header.encoding),
+ is_sorted);
} else if (current_page_header_.type == format::PageType::DATA_PAGE) {
const format::DataPageHeader& header = current_page_header_.data_page_header;
EncodedStatistics page_statistics;
if (header.__isset.statistics) {
const format::Statistics& stats = header.statistics;
- if (stats.__isset.max) { page_statistics.set_max(stats.max); }
- if (stats.__isset.min) { page_statistics.set_min(stats.min); }
+ if (stats.__isset.max) {
+ page_statistics.set_max(stats.max);
+ }
+ if (stats.__isset.min) {
+ page_statistics.set_min(stats.min);
+ }
if (stats.__isset.null_count) {
page_statistics.set_null_count(stats.null_count);
}
@@ -133,8 +144,9 @@ std::shared_ptr<Page> SerializedPageReader::NextPage() {
seen_num_rows_ += header.num_values;
- return std::make_shared<DataPage>(page_buffer, header.num_values,
- FromThrift(header.encoding), FromThrift(header.definition_level_encoding),
+ return std::make_shared<DataPage>(
+ page_buffer, header.num_values, FromThrift(header.encoding),
+ FromThrift(header.definition_level_encoding),
FromThrift(header.repetition_level_encoding), page_statistics);
} else if (current_page_header_.type == format::PageType::DATA_PAGE_V2) {
const format::DataPageHeaderV2& header = current_page_header_.data_page_header_v2;
@@ -142,10 +154,10 @@ std::shared_ptr<Page> SerializedPageReader::NextPage() {
seen_num_rows_ += header.num_values;
- return std::make_shared<DataPageV2>(page_buffer, header.num_values,
- header.num_nulls, header.num_rows, FromThrift(header.encoding),
- header.definition_levels_byte_length, header.repetition_levels_byte_length,
- is_compressed);
+ return std::make_shared<DataPageV2>(
+ page_buffer, header.num_values, header.num_nulls, header.num_rows,
+ FromThrift(header.encoding), header.definition_levels_byte_length,
+ header.repetition_levels_byte_length, is_compressed);
} else {
// We don't know what this page type is. We're allowed to skip non-data
// pages.
@@ -156,7 +168,8 @@ std::shared_ptr<Page> SerializedPageReader::NextPage() {
}
SerializedRowGroup::SerializedRowGroup(RandomAccessSource* source,
- FileMetaData* file_metadata, int row_group_number, const ReaderProperties& props)
+ FileMetaData* file_metadata, int row_group_number,
+ const ReaderProperties& props)
: source_(source), file_metadata_(file_metadata), properties_(props) {
row_group_metadata_ = file_metadata->RowGroup(row_group_number);
}
@@ -164,9 +177,7 @@ const RowGroupMetaData* SerializedRowGroup::metadata() const {
return row_group_metadata_.get();
}
-const ReaderProperties* SerializedRowGroup::properties() const {
- return &properties_;
-}
+const ReaderProperties* SerializedRowGroup::properties() const { return &properties_; }
// For PARQUET-816
static constexpr int64_t kMaxDictHeaderSize = 100;
@@ -196,8 +207,9 @@ std::unique_ptr<PageReader> SerializedRowGroup::GetColumnPageReader(int i) {
stream = properties_.GetStream(source_, col_start, col_length);
- return std::unique_ptr<PageReader>(new SerializedPageReader(std::move(stream),
- col->num_values(), col->compression(), properties_.memory_pool()));
+ return std::unique_ptr<PageReader>(
+ new SerializedPageReader(std::move(stream), col->num_values(), col->compression(),
+ properties_.memory_pool()));
}
// ----------------------------------------------------------------------
@@ -227,14 +239,13 @@ std::unique_ptr<ParquetFileReader::Contents> SerializedFile::Open(
return result;
}
-void SerializedFile::Close() {
- source_->Close();
-}
+void SerializedFile::Close() { source_->Close(); }
SerializedFile::~SerializedFile() {
try {
Close();
- } catch (...) {}
+ } catch (...) {
+ }
}
std::shared_ptr<RowGroupReader> SerializedFile::GetRowGroup(int i) {
@@ -243,11 +254,10 @@ std::shared_ptr<RowGroupReader> SerializedFile::GetRowGroup(int i) {
return std::make_shared<RowGroupReader>(std::move(contents));
}
-std::shared_ptr<FileMetaData> SerializedFile::metadata() const {
- return file_metadata_;
-}
+std::shared_ptr<FileMetaData> SerializedFile::metadata() const { return file_metadata_; }
-SerializedFile::SerializedFile(std::unique_ptr<RandomAccessSource> source,
+SerializedFile::SerializedFile(
+ std::unique_ptr<RandomAccessSource> source,
const ReaderProperties& props = default_reader_properties())
: source_(std::move(source)), properties_(props) {}
@@ -284,7 +294,7 @@ void SerializedFile::ParseMetaData() {
// Check if the footer_buffer contains the entire metadata
if (footer_read_size >= (metadata_len + FOOTER_SIZE)) {
memcpy(metadata_buffer->mutable_data(),
- footer_buffer + (footer_read_size - metadata_len - FOOTER_SIZE), metadata_len);
+ footer_buffer + (footer_read_size - metadata_len - FOOTER_SIZE), metadata_len);
} else {
bytes_read =
source_->ReadAt(metadata_start, metadata_len, metadata_buffer->mutable_data());
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/file/reader-internal.h
----------------------------------------------------------------------
diff --git a/src/parquet/file/reader-internal.h b/src/parquet/file/reader-internal.h
index 2667fa8..282c534 100644
--- a/src/parquet/file/reader-internal.h
+++ b/src/parquet/file/reader-internal.h
@@ -50,8 +50,8 @@ static constexpr uint32_t DEFAULT_PAGE_HEADER_SIZE = 16 * 1024;
class PARQUET_EXPORT SerializedPageReader : public PageReader {
public:
SerializedPageReader(std::unique_ptr<InputStream> stream, int64_t num_rows,
- Compression::type codec,
- ::arrow::MemoryPool* pool = ::arrow::default_memory_pool());
+ Compression::type codec,
+ ::arrow::MemoryPool* pool = ::arrow::default_memory_pool());
virtual ~SerializedPageReader() {}
@@ -84,7 +84,7 @@ class PARQUET_EXPORT SerializedPageReader : public PageReader {
class PARQUET_EXPORT SerializedRowGroup : public RowGroupReader::Contents {
public:
SerializedRowGroup(RandomAccessSource* source, FileMetaData* file_metadata,
- int row_group_number, const ReaderProperties& props);
+ int row_group_number, const ReaderProperties& props);
virtual const RowGroupMetaData* metadata() const;
@@ -118,8 +118,8 @@ class PARQUET_EXPORT SerializedFile : public ParquetFileReader::Contents {
private:
// This class takes ownership of the provided data source
- explicit SerializedFile(
- std::unique_ptr<RandomAccessSource> source, const ReaderProperties& props);
+ explicit SerializedFile(std::unique_ptr<RandomAccessSource> source,
+ const ReaderProperties& props);
std::unique_ptr<RandomAccessSource> source_;
std::shared_ptr<FileMetaData> file_metadata_;
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/file/reader.cc
----------------------------------------------------------------------
diff --git a/src/parquet/file/reader.cc b/src/parquet/file/reader.cc
index d3247cb..6e78fa4 100644
--- a/src/parquet/file/reader.cc
+++ b/src/parquet/file/reader.cc
@@ -51,14 +51,13 @@ std::shared_ptr<ColumnReader> RowGroupReader::Column(int i) {
const ColumnDescriptor* descr = metadata()->schema()->Column(i);
std::unique_ptr<PageReader> page_reader = contents_->GetColumnPageReader(i);
- return ColumnReader::Make(descr, std::move(page_reader),
+ return ColumnReader::Make(
+ descr, std::move(page_reader),
const_cast<ReaderProperties*>(contents_->properties())->memory_pool());
}
// Returns the rowgroup metadata
-const RowGroupMetaData* RowGroupReader::metadata() const {
- return contents_->metadata();
-}
+const RowGroupMetaData* RowGroupReader::metadata() const { return contents_->metadata(); }
// ----------------------------------------------------------------------
// ParquetFileReader public API
@@ -67,7 +66,8 @@ ParquetFileReader::ParquetFileReader() {}
ParquetFileReader::~ParquetFileReader() {
try {
Close();
- } catch (...) {}
+ } catch (...) {
+ }
}
std::unique_ptr<ParquetFileReader> ParquetFileReader::Open(
@@ -86,8 +86,8 @@ std::unique_ptr<ParquetFileReader> ParquetFileReader::Open(
return result;
}
-std::unique_ptr<ParquetFileReader> ParquetFileReader::OpenFile(const std::string& path,
- bool memory_map, const ReaderProperties& props,
+std::unique_ptr<ParquetFileReader> ParquetFileReader::OpenFile(
+ const std::string& path, bool memory_map, const ReaderProperties& props,
const std::shared_ptr<FileMetaData>& metadata) {
std::shared_ptr<::arrow::io::ReadableFileInterface> source;
if (memory_map) {
@@ -110,7 +110,9 @@ void ParquetFileReader::Open(std::unique_ptr<ParquetFileReader::Contents> conten
}
void ParquetFileReader::Close() {
- if (contents_) { contents_->Close(); }
+ if (contents_) {
+ contents_->Close();
+ }
}
std::shared_ptr<FileMetaData> ParquetFileReader::metadata() const {
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/file/reader.h
----------------------------------------------------------------------
diff --git a/src/parquet/file/reader.h b/src/parquet/file/reader.h
index 1cd287c..eb85235 100644
--- a/src/parquet/file/reader.h
+++ b/src/parquet/file/reader.h
@@ -98,8 +98,9 @@ class PARQUET_EXPORT ParquetFileReader {
// API Convenience to open a serialized Parquet file on disk, using Arrow IO
// interfaces.
- static std::unique_ptr<ParquetFileReader> OpenFile(const std::string& path,
- bool memory_map = true, const ReaderProperties& props = default_reader_properties(),
+ static std::unique_ptr<ParquetFileReader> OpenFile(
+ const std::string& path, bool memory_map = true,
+ const ReaderProperties& props = default_reader_properties(),
const std::shared_ptr<FileMetaData>& metadata = nullptr);
void Open(std::unique_ptr<Contents> contents);
@@ -117,8 +118,8 @@ class PARQUET_EXPORT ParquetFileReader {
};
// Read only Parquet file metadata
-std::shared_ptr<FileMetaData> PARQUET_EXPORT ReadMetaData(
- const std::shared_ptr<::arrow::io::ReadableFileInterface>& source);
+std::shared_ptr<FileMetaData> PARQUET_EXPORT
+ReadMetaData(const std::shared_ptr<::arrow::io::ReadableFileInterface>& source);
} // namespace parquet
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/file/writer-internal.cc
----------------------------------------------------------------------
diff --git a/src/parquet/file/writer-internal.cc b/src/parquet/file/writer-internal.cc
index 1cceb95..5702d2c 100644
--- a/src/parquet/file/writer-internal.cc
+++ b/src/parquet/file/writer-internal.cc
@@ -42,7 +42,8 @@ static constexpr uint8_t PARQUET_MAGIC[4] = {'P', 'A', 'R', '1'};
// SerializedPageWriter
SerializedPageWriter::SerializedPageWriter(OutputStream* sink, Compression::type codec,
- ColumnChunkMetaDataBuilder* metadata, MemoryPool* pool)
+ ColumnChunkMetaDataBuilder* metadata,
+ MemoryPool* pool)
: sink_(sink),
metadata_(metadata),
pool_(pool),
@@ -68,14 +69,15 @@ static format::Statistics ToThrift(const EncodedStatistics& row_group_statistics
void SerializedPageWriter::Close(bool has_dictionary, bool fallback) {
// index_page_offset = 0 since they are not supported
metadata_->Finish(num_values_, dictionary_page_offset_, 0, data_page_offset_,
- total_compressed_size_, total_uncompressed_size_, has_dictionary, fallback);
+ total_compressed_size_, total_uncompressed_size_, has_dictionary,
+ fallback);
// Write metadata at end of column chunk
metadata_->WriteTo(sink_);
}
-void SerializedPageWriter::Compress(
- const Buffer& src_buffer, ResizableBuffer* dest_buffer) {
+void SerializedPageWriter::Compress(const Buffer& src_buffer,
+ ResizableBuffer* dest_buffer) {
DCHECK(compressor_ != nullptr);
// Compress the data
@@ -87,8 +89,9 @@ void SerializedPageWriter::Compress(
PARQUET_THROW_NOT_OK(dest_buffer->Resize(max_compressed_size, false));
int64_t compressed_size;
- PARQUET_THROW_NOT_OK(compressor_->Compress(src_buffer.size(), src_buffer.data(),
- max_compressed_size, dest_buffer->mutable_data(), &compressed_size));
+ PARQUET_THROW_NOT_OK(
+ compressor_->Compress(src_buffer.size(), src_buffer.data(), max_compressed_size,
+ dest_buffer->mutable_data(), &compressed_size));
PARQUET_THROW_NOT_OK(dest_buffer->Resize(compressed_size, false));
}
@@ -113,7 +116,9 @@ int64_t SerializedPageWriter::WriteDataPage(const CompressedDataPage& page) {
// TODO(PARQUET-594) crc checksum
int64_t start_pos = sink_->Tell();
- if (data_page_offset_ == 0) { data_page_offset_ = start_pos; }
+ if (data_page_offset_ == 0) {
+ data_page_offset_ = start_pos;
+ }
int64_t header_size =
SerializeThriftMsg(&page_header, sizeof(format::PageHeader), sink_);
@@ -151,7 +156,9 @@ int64_t SerializedPageWriter::WriteDictionaryPage(const DictionaryPage& page) {
// TODO(PARQUET-594) crc checksum
int64_t start_pos = sink_->Tell();
- if (dictionary_page_offset_ == 0) { dictionary_page_offset_ = start_pos; }
+ if (dictionary_page_offset_ == 0) {
+ dictionary_page_offset_ = start_pos;
+ }
int64_t header_size =
SerializeThriftMsg(&page_header, sizeof(format::PageHeader), sink_);
sink_->Write(compressed_data->data(), compressed_data->size());
@@ -165,32 +172,28 @@ int64_t SerializedPageWriter::WriteDictionaryPage(const DictionaryPage& page) {
// ----------------------------------------------------------------------
// RowGroupSerializer
-int RowGroupSerializer::num_columns() const {
- return metadata_->num_columns();
-}
+int RowGroupSerializer::num_columns() const { return metadata_->num_columns(); }
-int64_t RowGroupSerializer::num_rows() const {
- return num_rows_;
-}
+int64_t RowGroupSerializer::num_rows() const { return num_rows_; }
ColumnWriter* RowGroupSerializer::NextColumn() {
// Throws an error if more columns are being written
auto col_meta = metadata_->NextColumnChunk();
- if (current_column_writer_) { total_bytes_written_ += current_column_writer_->Close(); }
+ if (current_column_writer_) {
+ total_bytes_written_ += current_column_writer_->Close();
+ }
const ColumnDescriptor* column_descr = col_meta->descr();
std::unique_ptr<PageWriter> pager(
new SerializedPageWriter(sink_, properties_->compression(column_descr->path()),
- col_meta, properties_->memory_pool()));
+ col_meta, properties_->memory_pool()));
current_column_writer_ =
ColumnWriter::Make(col_meta, std::move(pager), num_rows_, properties_);
return current_column_writer_.get();
}
-int RowGroupSerializer::current_column() const {
- return metadata_->current_column();
-}
+int RowGroupSerializer::current_column() const { return metadata_->current_column(); }
void RowGroupSerializer::Close() {
if (!closed_) {
@@ -220,7 +223,9 @@ std::unique_ptr<ParquetFileWriter::Contents> FileSerializer::Open(
void FileSerializer::Close() {
if (is_open_) {
- if (row_group_writer_) { row_group_writer_->Close(); }
+ if (row_group_writer_) {
+ row_group_writer_->Close();
+ }
row_group_writer_.reset();
// Write magic bytes and metadata
@@ -231,24 +236,20 @@ void FileSerializer::Close() {
}
}
-int FileSerializer::num_columns() const {
- return schema_.num_columns();
-}
+int FileSerializer::num_columns() const { return schema_.num_columns(); }
-int FileSerializer::num_row_groups() const {
- return num_row_groups_;
-}
+int FileSerializer::num_row_groups() const { return num_row_groups_; }
-int64_t FileSerializer::num_rows() const {
- return num_rows_;
-}
+int64_t FileSerializer::num_rows() const { return num_rows_; }
const std::shared_ptr<WriterProperties>& FileSerializer::properties() const {
return properties_;
}
RowGroupWriter* FileSerializer::AppendRowGroup(int64_t num_rows) {
- if (row_group_writer_) { row_group_writer_->Close(); }
+ if (row_group_writer_) {
+ row_group_writer_->Close();
+ }
num_rows_ += num_rows;
num_row_groups_++;
auto rg_metadata = metadata_->AppendRowGroup(num_rows);
@@ -261,7 +262,8 @@ RowGroupWriter* FileSerializer::AppendRowGroup(int64_t num_rows) {
FileSerializer::~FileSerializer() {
try {
Close();
- } catch (...) {}
+ } catch (...) {
+ }
}
void FileSerializer::WriteMetaData() {
@@ -278,8 +280,8 @@ void FileSerializer::WriteMetaData() {
sink_->Write(PARQUET_MAGIC, 4);
}
-FileSerializer::FileSerializer(const std::shared_ptr<OutputStream>& sink,
- const std::shared_ptr<GroupNode>& schema,
+FileSerializer::FileSerializer(
+ const std::shared_ptr<OutputStream>& sink, const std::shared_ptr<GroupNode>& schema,
const std::shared_ptr<WriterProperties>& properties,
const std::shared_ptr<const KeyValueMetadata>& key_value_metadata)
: ParquetFileWriter::Contents(schema, key_value_metadata),
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/file/writer-internal.h
----------------------------------------------------------------------
diff --git a/src/parquet/file/writer-internal.h b/src/parquet/file/writer-internal.h
index 447579a..5aba994 100644
--- a/src/parquet/file/writer-internal.h
+++ b/src/parquet/file/writer-internal.h
@@ -40,8 +40,8 @@ namespace parquet {
class SerializedPageWriter : public PageWriter {
public:
SerializedPageWriter(OutputStream* sink, Compression::type codec,
- ColumnChunkMetaDataBuilder* metadata,
- ::arrow::MemoryPool* pool = ::arrow::default_memory_pool());
+ ColumnChunkMetaDataBuilder* metadata,
+ ::arrow::MemoryPool* pool = ::arrow::default_memory_pool());
virtual ~SerializedPageWriter() {}
@@ -76,7 +76,8 @@ class SerializedPageWriter : public PageWriter {
class RowGroupSerializer : public RowGroupWriter::Contents {
public:
RowGroupSerializer(int64_t num_rows, OutputStream* sink,
- RowGroupMetaDataBuilder* metadata, const WriterProperties* properties)
+ RowGroupMetaDataBuilder* metadata,
+ const WriterProperties* properties)
: num_rows_(num_rows),
sink_(sink),
metadata_(metadata),
@@ -126,7 +127,8 @@ class FileSerializer : public ParquetFileWriter::Contents {
virtual ~FileSerializer();
private:
- explicit FileSerializer(const std::shared_ptr<OutputStream>& sink,
+ explicit FileSerializer(
+ const std::shared_ptr<OutputStream>& sink,
const std::shared_ptr<schema::GroupNode>& schema,
const std::shared_ptr<WriterProperties>& properties,
const std::shared_ptr<const KeyValueMetadata>& key_value_metadata);
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/file/writer.cc
----------------------------------------------------------------------
diff --git a/src/parquet/file/writer.cc b/src/parquet/file/writer.cc
index d52c25c..a1b9227 100644
--- a/src/parquet/file/writer.cc
+++ b/src/parquet/file/writer.cc
@@ -37,21 +37,13 @@ void RowGroupWriter::Close() {
}
}
-ColumnWriter* RowGroupWriter::NextColumn() {
- return contents_->NextColumn();
-}
+ColumnWriter* RowGroupWriter::NextColumn() { return contents_->NextColumn(); }
-int RowGroupWriter::current_column() {
- return contents_->current_column();
-}
+int RowGroupWriter::current_column() { return contents_->current_column(); }
-int RowGroupWriter::num_columns() const {
- return contents_->num_columns();
-}
+int RowGroupWriter::num_columns() const { return contents_->num_columns(); }
-int64_t RowGroupWriter::num_rows() const {
- return contents_->num_rows();
-}
+int64_t RowGroupWriter::num_rows() const { return contents_->num_rows(); }
// ----------------------------------------------------------------------
// ParquetFileWriter public API
@@ -61,7 +53,8 @@ ParquetFileWriter::ParquetFileWriter() {}
ParquetFileWriter::~ParquetFileWriter() {
try {
Close();
- } catch (...) {}
+ } catch (...) {
+ }
}
std::unique_ptr<ParquetFileWriter> ParquetFileWriter::Open(
@@ -69,8 +62,8 @@ std::unique_ptr<ParquetFileWriter> ParquetFileWriter::Open(
const std::shared_ptr<GroupNode>& schema,
const std::shared_ptr<WriterProperties>& properties,
const std::shared_ptr<const KeyValueMetadata>& key_value_metadata) {
- return Open(
- std::make_shared<ArrowOutputStream>(sink), schema, properties, key_value_metadata);
+ return Open(std::make_shared<ArrowOutputStream>(sink), schema, properties,
+ key_value_metadata);
}
std::unique_ptr<ParquetFileWriter> ParquetFileWriter::Open(
@@ -84,25 +77,17 @@ std::unique_ptr<ParquetFileWriter> ParquetFileWriter::Open(
return result;
}
-const SchemaDescriptor* ParquetFileWriter::schema() const {
- return contents_->schema();
-}
+const SchemaDescriptor* ParquetFileWriter::schema() const { return contents_->schema(); }
const ColumnDescriptor* ParquetFileWriter::descr(int i) const {
return contents_->schema()->Column(i);
}
-int ParquetFileWriter::num_columns() const {
- return contents_->num_columns();
-}
+int ParquetFileWriter::num_columns() const { return contents_->num_columns(); }
-int64_t ParquetFileWriter::num_rows() const {
- return contents_->num_rows();
-}
+int64_t ParquetFileWriter::num_rows() const { return contents_->num_rows(); }
-int ParquetFileWriter::num_row_groups() const {
- return contents_->num_row_groups();
-}
+int ParquetFileWriter::num_row_groups() const { return contents_->num_row_groups(); }
const std::shared_ptr<const KeyValueMetadata>& ParquetFileWriter::key_value_metadata()
const {
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/file/writer.h
----------------------------------------------------------------------
diff --git a/src/parquet/file/writer.h b/src/parquet/file/writer.h
index b22281a..c2b3f91 100644
--- a/src/parquet/file/writer.h
+++ b/src/parquet/file/writer.h
@@ -88,7 +88,7 @@ class PARQUET_EXPORT ParquetFileWriter {
// An implementation of the Contents class is defined in the .cc file
struct Contents {
Contents(const std::shared_ptr<::parquet::schema::GroupNode>& schema,
- const std::shared_ptr<const KeyValueMetadata>& key_value_metadata)
+ const std::shared_ptr<const KeyValueMetadata>& key_value_metadata)
: schema_(), key_value_metadata_(key_value_metadata) {
schema_.Init(schema);
}
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/properties-test.cc
----------------------------------------------------------------------
diff --git a/src/parquet/properties-test.cc b/src/parquet/properties-test.cc
index 0e6d725..c48fc34 100644
--- a/src/parquet/properties-test.cc
+++ b/src/parquet/properties-test.cc
@@ -52,12 +52,12 @@ TEST(TestWriterProperties, AdvancedHandling) {
std::shared_ptr<WriterProperties> props = builder.build();
ASSERT_EQ(Compression::GZIP, props->compression(ColumnPath::FromDotString("gzip")));
- ASSERT_EQ(
- Compression::SNAPPY, props->compression(ColumnPath::FromDotString("delta-length")));
- ASSERT_EQ(
- Encoding::DELTA_BINARY_PACKED, props->encoding(ColumnPath::FromDotString("gzip")));
+ ASSERT_EQ(Compression::SNAPPY,
+ props->compression(ColumnPath::FromDotString("delta-length")));
+ ASSERT_EQ(Encoding::DELTA_BINARY_PACKED,
+ props->encoding(ColumnPath::FromDotString("gzip")));
ASSERT_EQ(Encoding::DELTA_LENGTH_BYTE_ARRAY,
- props->encoding(ColumnPath::FromDotString("delta-length")));
+ props->encoding(ColumnPath::FromDotString("delta-length")));
}
} // namespace test
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/properties.h
----------------------------------------------------------------------
diff --git a/src/parquet/properties.h b/src/parquet/properties.h
index 3ebc3b7..77b0305 100644
--- a/src/parquet/properties.h
+++ b/src/parquet/properties.h
@@ -48,8 +48,8 @@ class PARQUET_EXPORT ReaderProperties {
::arrow::MemoryPool* memory_pool() const { return pool_; }
- std::unique_ptr<InputStream> GetStream(
- RandomAccessSource* source, int64_t start, int64_t num_bytes) {
+ std::unique_ptr<InputStream> GetStream(RandomAccessSource* source, int64_t start,
+ int64_t num_bytes) {
std::unique_ptr<InputStream> stream;
if (buffered_stream_enabled_) {
stream.reset(
@@ -92,9 +92,9 @@ static constexpr Compression::type DEFAULT_COMPRESSION_TYPE = Compression::UNCOM
class PARQUET_EXPORT ColumnProperties {
public:
ColumnProperties(Encoding::type encoding = DEFAULT_ENCODING,
- Compression::type codec = DEFAULT_COMPRESSION_TYPE,
- bool dictionary_enabled = DEFAULT_IS_DICTIONARY_ENABLED,
- bool statistics_enabled = DEFAULT_ARE_STATISTICS_ENABLED)
+ Compression::type codec = DEFAULT_COMPRESSION_TYPE,
+ bool dictionary_enabled = DEFAULT_IS_DICTIONARY_ENABLED,
+ bool statistics_enabled = DEFAULT_ARE_STATISTICS_ENABLED)
: encoding(encoding),
codec(codec),
dictionary_enabled(dictionary_enabled),
@@ -215,8 +215,8 @@ class PARQUET_EXPORT WriterProperties {
* This either apply if dictionary encoding is disabled or if we fallback
* as the dictionary grew too large.
*/
- Builder* encoding(
- const std::shared_ptr<schema::ColumnPath>& path, Encoding::type encoding_type) {
+ Builder* encoding(const std::shared_ptr<schema::ColumnPath>& path,
+ Encoding::type encoding_type) {
return this->encoding(path->ToDotString(), encoding_type);
}
@@ -230,8 +230,8 @@ class PARQUET_EXPORT WriterProperties {
return this;
}
- Builder* compression(
- const std::shared_ptr<schema::ColumnPath>& path, Compression::type codec) {
+ Builder* compression(const std::shared_ptr<schema::ColumnPath>& path,
+ Compression::type codec) {
return this->compression(path->ToDotString(), codec);
}
@@ -273,18 +273,16 @@ class PARQUET_EXPORT WriterProperties {
return it->second;
};
- for (const auto& item : encodings_)
- get(item.first).encoding = item.second;
- for (const auto& item : codecs_)
- get(item.first).codec = item.second;
+ for (const auto& item : encodings_) get(item.first).encoding = item.second;
+ for (const auto& item : codecs_) get(item.first).codec = item.second;
for (const auto& item : dictionary_enabled_)
get(item.first).dictionary_enabled = item.second;
for (const auto& item : statistics_enabled_)
get(item.first).statistics_enabled = item.second;
- return std::shared_ptr<WriterProperties>(new WriterProperties(pool_,
- dictionary_pagesize_limit_, write_batch_size_, pagesize_, version_, created_by_,
- default_column_properties_, column_properties));
+ return std::shared_ptr<WriterProperties>(new WriterProperties(
+ pool_, dictionary_pagesize_limit_, write_batch_size_, pagesize_, version_,
+ created_by_, default_column_properties_, column_properties));
}
private:
@@ -355,7 +353,8 @@ class PARQUET_EXPORT WriterProperties {
}
private:
- explicit WriterProperties(::arrow::MemoryPool* pool, int64_t dictionary_pagesize_limit,
+ explicit WriterProperties(
+ ::arrow::MemoryPool* pool, int64_t dictionary_pagesize_limit,
int64_t write_batch_size, int64_t pagesize, ParquetVersion::type version,
const std::string& created_by, const ColumnProperties& default_column_properties,
const std::unordered_map<std::string, ColumnProperties>& column_properties)
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/public-api-test.cc
----------------------------------------------------------------------
diff --git a/src/parquet/public-api-test.cc b/src/parquet/public-api-test.cc
index 4d6f675..09d399b 100644
--- a/src/parquet/public-api-test.cc
+++ b/src/parquet/public-api-test.cc
@@ -40,9 +40,7 @@ TEST(TestPublicAPI, DoesNotIncludeZlib) {
#endif
}
-void ThrowsParquetException() {
- throw parquet::ParquetException("This function throws");
-}
+void ThrowsParquetException() { throw parquet::ParquetException("This function throws"); }
TEST(TestPublicAPI, CanThrowParquetException) {
ASSERT_THROW(ThrowsParquetException(), parquet::ParquetException);
[5/5] parquet-cpp git commit: PARQUET-1068: Modify .clang-format to
use straight Google format with 90-character line width
Posted by we...@apache.org.
PARQUET-1068: Modify .clang-format to use straight Google format with 90-character line width
The main change is horizontal alignment. We should also do a clang-tidy pass sometime to do some further scrubbing
Author: Wes McKinney <we...@twosigma.com>
Closes #375 from wesm/PARQUET-1068 and squashes the following commits:
b81145d [Wes McKinney] Modify .clang-format to use straight Google format with 90-character line width
Project: http://git-wip-us.apache.org/repos/asf/parquet-cpp/repo
Commit: http://git-wip-us.apache.org/repos/asf/parquet-cpp/commit/b6f3caeb
Tree: http://git-wip-us.apache.org/repos/asf/parquet-cpp/tree/b6f3caeb
Diff: http://git-wip-us.apache.org/repos/asf/parquet-cpp/diff/b6f3caeb
Branch: refs/heads/master
Commit: b6f3caeb0776889310fe4d6a0e677cc3626cb389
Parents: af96ff0
Author: Wes McKinney <we...@twosigma.com>
Authored: Mon Jul 31 11:14:52 2017 -0400
Committer: Wes McKinney <we...@twosigma.com>
Committed: Mon Jul 31 11:14:52 2017 -0400
----------------------------------------------------------------------
.clang-format | 83 ++----
benchmarks/decode_benchmark.cc | 43 +--
examples/reader-writer.cc | 35 +--
.../arrow/arrow-reader-writer-benchmark.cc | 4 +-
src/parquet/arrow/arrow-reader-writer-test.cc | 210 +++++++-------
src/parquet/arrow/arrow-schema-test.cc | 130 +++++----
src/parquet/arrow/reader.cc | 276 +++++++++++--------
src/parquet/arrow/reader.h | 19 +-
src/parquet/arrow/schema.cc | 82 ++++--
src/parquet/arrow/schema.h | 32 ++-
src/parquet/arrow/test-util.h | 65 +++--
src/parquet/arrow/writer.cc | 177 ++++++------
src/parquet/arrow/writer.h | 38 +--
src/parquet/column-io-benchmark.cc | 35 +--
src/parquet/column_page.h | 21 +-
src/parquet/column_reader-test.cc | 95 ++++---
src/parquet/column_reader.cc | 27 +-
src/parquet/column_reader.h | 72 +++--
src/parquet/column_scanner-test.cc | 65 ++---
src/parquet/column_scanner.cc | 39 +--
src/parquet/column_scanner.h | 63 +++--
src/parquet/column_writer-test.cc | 111 ++++----
src/parquet/column_writer.cc | 198 +++++++------
src/parquet/column_writer.h | 38 +--
src/parquet/encoding-benchmark.cc | 8 +-
src/parquet/encoding-internal.h | 114 +++++---
src/parquet/encoding-test.cc | 31 +--
src/parquet/encoding.h | 14 +-
src/parquet/exception.cc | 8 +-
src/parquet/file/file-deserialize-test.cc | 40 +--
src/parquet/file/file-metadata-test.cc | 2 +-
src/parquet/file/file-serialize-test.cc | 12 +-
src/parquet/file/metadata.cc | 178 ++++++------
src/parquet/file/metadata.h | 43 +--
src/parquet/file/printer.cc | 24 +-
src/parquet/file/printer.h | 4 +-
src/parquet/file/reader-internal.cc | 70 +++--
src/parquet/file/reader-internal.h | 10 +-
src/parquet/file/reader.cc | 18 +-
src/parquet/file/reader.h | 9 +-
src/parquet/file/writer-internal.cc | 68 ++---
src/parquet/file/writer-internal.h | 10 +-
src/parquet/file/writer.cc | 39 +--
src/parquet/file/writer.h | 2 +-
src/parquet/properties-test.cc | 10 +-
src/parquet/properties.h | 33 ++-
src/parquet/public-api-test.cc | 4 +-
src/parquet/reader-test.cc | 8 +-
src/parquet/schema-test.cc | 121 ++++----
src/parquet/schema.cc | 109 ++++----
src/parquet/schema.h | 43 +--
src/parquet/statistics-test.cc | 54 ++--
src/parquet/statistics.cc | 43 +--
src/parquet/statistics.h | 21 +-
src/parquet/test-specialization.h | 10 +-
src/parquet/test-util.h | 99 ++++---
src/parquet/thrift.h | 2 +-
src/parquet/types-test.cc | 16 +-
src/parquet/util/buffer-builder.h | 2 +-
src/parquet/util/comparison-test.cc | 5 +-
src/parquet/util/comparison.h | 4 +-
src/parquet/util/memory.cc | 43 ++-
src/parquet/util/memory.h | 10 +-
src/parquet/util/schema-util.h | 15 +-
src/parquet/util/test-common.h | 22 +-
tools/parquet-scan.cc | 9 +-
66 files changed, 1802 insertions(+), 1543 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/.clang-format
----------------------------------------------------------------------
diff --git a/.clang-format b/.clang-format
index 7d5b3cf..06453df 100644
--- a/.clang-format
+++ b/.clang-format
@@ -1,65 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
---
-Language: Cpp
-# BasedOnStyle: Google
-AccessModifierOffset: -1
-AlignAfterOpenBracket: false
-AlignConsecutiveAssignments: false
-AlignEscapedNewlinesLeft: true
-AlignOperands: true
-AlignTrailingComments: true
-AllowAllParametersOfDeclarationOnNextLine: true
-AllowShortBlocksOnASingleLine: true
-AllowShortCaseLabelsOnASingleLine: false
-AllowShortFunctionsOnASingleLine: Inline
-AllowShortIfStatementsOnASingleLine: true
-AllowShortLoopsOnASingleLine: false
-AlwaysBreakAfterDefinitionReturnType: None
-AlwaysBreakBeforeMultilineStrings: true
-AlwaysBreakTemplateDeclarations: true
-BinPackArguments: true
-BinPackParameters: true
-BreakBeforeBinaryOperators: None
-BreakBeforeBraces: Attach
-BreakBeforeTernaryOperators: true
-BreakConstructorInitializersBeforeComma: false
-ColumnLimit: 90
-CommentPragmas: '^ IWYU pragma:'
-ConstructorInitializerAllOnOneLineOrOnePerLine: true
-ConstructorInitializerIndentWidth: 4
-ContinuationIndentWidth: 4
-Cpp11BracedListStyle: true
-DerivePointerAlignment: false
-DisableFormat: false
-ExperimentalAutoDetectBinPacking: false
-ForEachMacros: [ foreach, Q_FOREACH, BOOST_FOREACH ]
-IndentCaseLabels: true
-IndentWidth: 2
-IndentWrappedFunctionNames: false
-KeepEmptyLinesAtTheStartOfBlocks: false
-MacroBlockBegin: ''
-MacroBlockEnd: ''
-MaxEmptyLinesToKeep: 1
-NamespaceIndentation: None
-ObjCBlockIndentWidth: 2
-ObjCSpaceAfterProperty: false
-ObjCSpaceBeforeProtocolList: false
-PenaltyBreakBeforeFirstCallParameter: 1000
-PenaltyBreakComment: 300
-PenaltyBreakFirstLessLess: 120
-PenaltyBreakString: 1000
-PenaltyExcessCharacter: 1000000
-PenaltyReturnTypeOnItsOwnLine: 200
-PointerAlignment: Left
-SpaceAfterCStyleCast: false
-SpaceBeforeAssignmentOperators: true
-SpaceBeforeParens: ControlStatements
-SpaceInEmptyParentheses: false
-SpacesBeforeTrailingComments: 2
-SpacesInAngles: false
-SpacesInContainerLiterals: true
-SpacesInCStyleCastParentheses: false
-SpacesInParentheses: false
-SpacesInSquareBrackets: false
-Standard: Cpp11
-TabWidth: 8
-UseTab: Never
+BasedOnStyle: Google
+DerivePointerAlignment: false
+ColumnLimit: 90
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/benchmarks/decode_benchmark.cc
----------------------------------------------------------------------
diff --git a/benchmarks/decode_benchmark.cc b/benchmarks/decode_benchmark.cc
index 57279d0..8df45f7 100644
--- a/benchmarks/decode_benchmark.cc
+++ b/benchmarks/decode_benchmark.cc
@@ -15,9 +15,9 @@
// specific language governing permissions and limitations
// under the License.
+#include <stdio.h>
#include <iostream>
#include <random>
-#include <stdio.h>
#include "arrow/util/compression.h"
#include "arrow/util/compression_snappy.h"
@@ -165,8 +165,8 @@ class DeltaByteArrayEncoder {
}
}
prefix_len_encoder_.Add(prefix_len);
- suffix_encoder_.Add(
- reinterpret_cast<const uint8_t*>(s.data()) + prefix_len, s.size() - prefix_len);
+ suffix_encoder_.Add(reinterpret_cast<const uint8_t*>(s.data()) + prefix_len,
+ s.size() - prefix_len);
last_value_ = s;
}
@@ -210,7 +210,8 @@ uint64_t TestPlainIntEncoding(const uint8_t* data, int num_values, int batch_siz
}
uint64_t TestBinaryPackedEncoding(const char* name, const std::vector<int64_t>& values,
- int benchmark_iters = -1, int benchmark_batch_size = 1) {
+ int benchmark_iters = -1,
+ int benchmark_batch_size = 1) {
int mini_block_size;
if (values.size() < 8) {
mini_block_size = 8;
@@ -266,7 +267,7 @@ uint64_t TestBinaryPackedEncoding(const char* name, const std::vector<int64_t>&
uint64_t elapsed = sw.Stop();
double num_ints = values.size() * benchmark_iters * 1000.;
printf("%s rate (batch size = %2d): %0.3fM per second.\n", name, benchmark_batch_size,
- num_ints / elapsed);
+ num_ints / elapsed);
return result;
}
}
@@ -278,10 +279,10 @@ uint64_t TestBinaryPackedEncoding(const char* name, const std::vector<int64_t>&
} \
elapsed = sw.Stop(); \
printf("%s rate (batch size = %2d): %0.3fM per second.\n", NAME, BATCH_SIZE, \
- mult / elapsed);
+ mult / elapsed);
void TestPlainIntCompressed(::arrow::Codec* codec, const std::vector<int64_t>& data,
- int num_iters, int batch_size) {
+ int num_iters, int batch_size) {
const uint8_t* raw_data = reinterpret_cast<const uint8_t*>(&data[0]);
int uncompressed_len = data.size() * sizeof(int64_t);
uint8_t* decompressed_data = new uint8_t[uncompressed_len];
@@ -291,24 +292,24 @@ void TestPlainIntCompressed(::arrow::Codec* codec, const std::vector<int64_t>& d
int64_t compressed_len;
DCHECK(codec
->Compress(uncompressed_len, raw_data, max_compressed_size, compressed_data,
- &compressed_len)
+ &compressed_len)
.ok());
printf("\n%s:\n Uncompressed len: %d\n Compressed len: %d\n", codec->name(),
- uncompressed_len, static_cast<int>(compressed_len));
+ uncompressed_len, static_cast<int>(compressed_len));
double mult = num_iters * data.size() * 1000.;
parquet::StopWatch sw;
sw.Start();
uint64_t r = 0;
for (int i = 0; i < num_iters; ++i) {
- codec->Decompress(
- compressed_len, compressed_data, uncompressed_len, decompressed_data);
+ codec->Decompress(compressed_len, compressed_data, uncompressed_len,
+ decompressed_data);
r += TestPlainIntEncoding(decompressed_data, data.size(), batch_size);
}
int64_t elapsed = sw.Stop();
printf("Compressed(%s) plain int rate (batch size = %2d): %0.3fM per second.\n",
- codec->name(), batch_size, mult / elapsed);
+ codec->name(), batch_size, mult / elapsed);
delete[] compressed_data;
delete[] decompressed_data;
@@ -317,13 +318,11 @@ void TestPlainIntCompressed(::arrow::Codec* codec, const std::vector<int64_t>& d
void TestBinaryPacking() {
std::vector<int64_t> values;
values.clear();
- for (int i = 0; i < 100; ++i)
- values.push_back(0);
+ for (int i = 0; i < 100; ++i) values.push_back(0);
TestBinaryPackedEncoding("Zeros", values);
values.clear();
- for (int i = 1; i <= 5; ++i)
- values.push_back(i);
+ for (int i = 1; i <= 5; ++i) values.push_back(i);
TestBinaryPackedEncoding("Example 1", values);
values.clear();
@@ -373,13 +372,15 @@ void TestDeltaLengthByteArray() {
int len = 0;
uint8_t* buffer = encoder.Encode(&len);
printf("DeltaLengthByteArray\n Raw len: %d\n Encoded len: %d\n",
- encoder.plain_encoded_len(), len);
+ encoder.plain_encoded_len(), len);
decoder.SetData(encoder.num_values(), buffer, len);
for (int i = 0; i < encoder.num_values(); ++i) {
parquet::ByteArray v = {0, NULL};
decoder.Decode(&v, 1);
std::string r = std::string(reinterpret_cast<const char*>(v.ptr), v.len);
- if (r != values[i]) { std::cout << "Bad " << r << " != " << values[i] << std::endl; }
+ if (r != values[i]) {
+ std::cout << "Bad " << r << " != " << values[i] << std::endl;
+ }
}
}
@@ -409,13 +410,15 @@ void TestDeltaByteArray() {
int len = 0;
uint8_t* buffer = encoder.Encode(&len);
printf("DeltaLengthByteArray\n Raw len: %d\n Encoded len: %d\n",
- encoder.plain_encoded_len(), len);
+ encoder.plain_encoded_len(), len);
decoder.SetData(encoder.num_values(), buffer, len);
for (int i = 0; i < encoder.num_values(); ++i) {
parquet::ByteArray v;
decoder.Decode(&v, 1);
std::string r = std::string(reinterpret_cast<const char*>(v.ptr), v.len);
- if (r != values[i]) { std::cout << "Bad " << r << " != " << values[i] << std::endl; }
+ if (r != values[i]) {
+ std::cout << "Bad " << r << " != " << values[i] << std::endl;
+ }
}
}
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/examples/reader-writer.cc
----------------------------------------------------------------------
diff --git a/examples/reader-writer.cc b/examples/reader-writer.cc
index 210968c..7136b28 100644
--- a/examples/reader-writer.cc
+++ b/examples/reader-writer.cc
@@ -59,35 +59,36 @@ static std::shared_ptr<GroupNode> SetupSchema() {
parquet::schema::NodeVector fields;
// Create a primitive node named 'boolean_field' with type:BOOLEAN,
// repetition:REQUIRED
- fields.push_back(PrimitiveNode::Make(
- "boolean_field", Repetition::REQUIRED, Type::BOOLEAN, LogicalType::NONE));
+ fields.push_back(PrimitiveNode::Make("boolean_field", Repetition::REQUIRED,
+ Type::BOOLEAN, LogicalType::NONE));
// Create a primitive node named 'int32_field' with type:INT32, repetition:REQUIRED,
// logical type:TIME_MILLIS
- fields.push_back(PrimitiveNode::Make(
- "int32_field", Repetition::REQUIRED, Type::INT32, LogicalType::TIME_MILLIS));
+ fields.push_back(PrimitiveNode::Make("int32_field", Repetition::REQUIRED, Type::INT32,
+ LogicalType::TIME_MILLIS));
// Create a primitive node named 'int64_field' with type:INT64, repetition:REPEATED
- fields.push_back(PrimitiveNode::Make(
- "int64_field", Repetition::REPEATED, Type::INT64, LogicalType::NONE));
+ fields.push_back(PrimitiveNode::Make("int64_field", Repetition::REPEATED, Type::INT64,
+ LogicalType::NONE));
- fields.push_back(PrimitiveNode::Make(
- "int96_field", Repetition::REQUIRED, Type::INT96, LogicalType::NONE));
+ fields.push_back(PrimitiveNode::Make("int96_field", Repetition::REQUIRED, Type::INT96,
+ LogicalType::NONE));
- fields.push_back(PrimitiveNode::Make(
- "float_field", Repetition::REQUIRED, Type::FLOAT, LogicalType::NONE));
+ fields.push_back(PrimitiveNode::Make("float_field", Repetition::REQUIRED, Type::FLOAT,
+ LogicalType::NONE));
- fields.push_back(PrimitiveNode::Make(
- "double_field", Repetition::REQUIRED, Type::DOUBLE, LogicalType::NONE));
+ fields.push_back(PrimitiveNode::Make("double_field", Repetition::REQUIRED, Type::DOUBLE,
+ LogicalType::NONE));
// Create a primitive node named 'ba_field' with type:BYTE_ARRAY, repetition:OPTIONAL
- fields.push_back(PrimitiveNode::Make(
- "ba_field", Repetition::OPTIONAL, Type::BYTE_ARRAY, LogicalType::NONE));
+ fields.push_back(PrimitiveNode::Make("ba_field", Repetition::OPTIONAL, Type::BYTE_ARRAY,
+ LogicalType::NONE));
// Create a primitive node named 'flba_field' with type:FIXED_LEN_BYTE_ARRAY,
// repetition:REQUIRED, field_length = FIXED_LENGTH
fields.push_back(PrimitiveNode::Make("flba_field", Repetition::REQUIRED,
- Type::FIXED_LEN_BYTE_ARRAY, LogicalType::NONE, FIXED_LENGTH));
+ Type::FIXED_LEN_BYTE_ARRAY, LogicalType::NONE,
+ FIXED_LENGTH));
// Create a GroupNode named 'schema' using the primitive nodes defined above
// This GroupNode is the root node of the schema tree
@@ -308,8 +309,8 @@ int main(int argc, char** argv) {
int64_t value;
// Read one value at a time. The number of rows read is returned. values_read
// contains the number of non-null rows
- rows_read = int64_reader->ReadBatch(
- 1, &definition_level, &repetition_level, &value, &values_read);
+ rows_read = int64_reader->ReadBatch(1, &definition_level, &repetition_level,
+ &value, &values_read);
// Ensure only one value is read
assert(rows_read == 1);
// There are no NULL values in the rows written
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/arrow/arrow-reader-writer-benchmark.cc
----------------------------------------------------------------------
diff --git a/src/parquet/arrow/arrow-reader-writer-benchmark.cc b/src/parquet/arrow/arrow-reader-writer-benchmark.cc
index 677e437..149cc1a 100644
--- a/src/parquet/arrow/arrow-reader-writer-benchmark.cc
+++ b/src/parquet/arrow/arrow-reader-writer-benchmark.cc
@@ -64,8 +64,8 @@ using ArrowType = typename benchmark_traits<ParquetType>::arrow_type;
template <typename ParquetType>
std::shared_ptr<ColumnDescriptor> MakeSchema(Repetition::type repetition) {
auto node = PrimitiveNode::Make("int64", repetition, ParquetType::type_num);
- return std::make_shared<ColumnDescriptor>(
- node, repetition != Repetition::REQUIRED, repetition == Repetition::REPEATED);
+ return std::make_shared<ColumnDescriptor>(node, repetition != Repetition::REQUIRED,
+ repetition == Repetition::REPEATED);
}
template <bool nullable, typename ParquetType>
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/arrow/arrow-reader-writer-test.cc
----------------------------------------------------------------------
diff --git a/src/parquet/arrow/arrow-reader-writer-test.cc b/src/parquet/arrow/arrow-reader-writer-test.cc
index 4424ea6..69c4991 100644
--- a/src/parquet/arrow/arrow-reader-writer-test.cc
+++ b/src/parquet/arrow/arrow-reader-writer-test.cc
@@ -290,28 +290,29 @@ template <typename T>
using ParquetWriter = TypedColumnWriter<ParquetDataType<T>>;
void WriteTableToBuffer(const std::shared_ptr<Table>& table, int num_threads,
- int64_t row_group_size,
- const std::shared_ptr<ArrowWriterProperties>& arrow_properties,
- std::shared_ptr<Buffer>* out) {
+ int64_t row_group_size,
+ const std::shared_ptr<ArrowWriterProperties>& arrow_properties,
+ std::shared_ptr<Buffer>* out) {
auto sink = std::make_shared<InMemoryOutputStream>();
ASSERT_OK_NO_THROW(WriteTable(*table, ::arrow::default_memory_pool(), sink,
- row_group_size, default_writer_properties(), arrow_properties));
+ row_group_size, default_writer_properties(),
+ arrow_properties));
*out = sink->GetBuffer();
}
void DoSimpleRoundtrip(const std::shared_ptr<Table>& table, int num_threads,
- int64_t row_group_size, const std::vector<int>& column_subset,
- std::shared_ptr<Table>* out,
- const std::shared_ptr<ArrowWriterProperties>& arrow_properties =
- default_arrow_writer_properties()) {
+ int64_t row_group_size, const std::vector<int>& column_subset,
+ std::shared_ptr<Table>* out,
+ const std::shared_ptr<ArrowWriterProperties>& arrow_properties =
+ default_arrow_writer_properties()) {
std::shared_ptr<Buffer> buffer;
WriteTableToBuffer(table, num_threads, row_group_size, arrow_properties, &buffer);
std::unique_ptr<FileReader> reader;
- ASSERT_OK_NO_THROW(
- OpenFile(std::make_shared<BufferReader>(buffer), ::arrow::default_memory_pool(),
- ::parquet::default_reader_properties(), nullptr, &reader));
+ ASSERT_OK_NO_THROW(OpenFile(std::make_shared<BufferReader>(buffer),
+ ::arrow::default_memory_pool(),
+ ::parquet::default_reader_properties(), nullptr, &reader));
reader->set_num_threads(num_threads);
@@ -323,8 +324,8 @@ void DoSimpleRoundtrip(const std::shared_ptr<Table>& table, int num_threads,
}
}
-static std::shared_ptr<GroupNode> MakeSimpleSchema(
- const ::arrow::DataType& type, Repetition::type repetition) {
+static std::shared_ptr<GroupNode> MakeSimpleSchema(const ::arrow::DataType& type,
+ Repetition::type repetition) {
int byte_width;
// Decimal is not implemented yet.
switch (type.id()) {
@@ -334,8 +335,8 @@ static std::shared_ptr<GroupNode> MakeSimpleSchema(
default:
byte_width = -1;
}
- auto pnode = PrimitiveNode::Make(
- "column1", repetition, get_physical_type(type), get_logical_type(type), byte_width);
+ auto pnode = PrimitiveNode::Make("column1", repetition, get_physical_type(type),
+ get_logical_type(type), byte_width);
NodePtr node_ =
GroupNode::Make("schema", Repetition::REQUIRED, std::vector<NodePtr>({pnode}));
return std::static_pointer_cast<GroupNode>(node_);
@@ -354,13 +355,13 @@ class TestParquetIO : public ::testing::Test {
void ReaderFromSink(std::unique_ptr<FileReader>* out) {
std::shared_ptr<Buffer> buffer = sink_->GetBuffer();
- ASSERT_OK_NO_THROW(
- OpenFile(std::make_shared<BufferReader>(buffer), ::arrow::default_memory_pool(),
- ::parquet::default_reader_properties(), nullptr, out));
+ ASSERT_OK_NO_THROW(OpenFile(std::make_shared<BufferReader>(buffer),
+ ::arrow::default_memory_pool(),
+ ::parquet::default_reader_properties(), nullptr, out));
}
- void ReadSingleColumnFile(
- std::unique_ptr<FileReader> file_reader, std::shared_ptr<Array>* out) {
+ void ReadSingleColumnFile(std::unique_ptr<FileReader> file_reader,
+ std::shared_ptr<Array>* out) {
std::unique_ptr<ColumnReader> column_reader;
ASSERT_OK_NO_THROW(file_reader->GetColumn(0, &column_reader));
ASSERT_NE(nullptr, column_reader.get());
@@ -378,8 +379,8 @@ class TestParquetIO : public ::testing::Test {
ASSERT_TRUE(values->Equals(out));
}
- void ReadTableFromFile(
- std::unique_ptr<FileReader> reader, std::shared_ptr<Table>* out) {
+ void ReadTableFromFile(std::unique_ptr<FileReader> reader,
+ std::shared_ptr<Table>* out) {
ASSERT_OK_NO_THROW(reader->ReadTable(out));
auto key_value_metadata =
reader->parquet_reader()->metadata()->key_value_metadata().get();
@@ -388,30 +389,30 @@ class TestParquetIO : public ::testing::Test {
}
void PrepareListTable(int64_t size, bool nullable_lists, bool nullable_elements,
- int64_t null_count, std::shared_ptr<Table>* out) {
+ int64_t null_count, std::shared_ptr<Table>* out) {
std::shared_ptr<Array> values;
- ASSERT_OK(NullableArray<TestType>(
- size * size, nullable_elements ? null_count : 0, kDefaultSeed, &values));
+ ASSERT_OK(NullableArray<TestType>(size * size, nullable_elements ? null_count : 0,
+ kDefaultSeed, &values));
// Also test that slice offsets are respected
values = values->Slice(5, values->length() - 5);
std::shared_ptr<ListArray> lists;
- ASSERT_OK(MakeListArray(
- values, size, nullable_lists ? null_count : 0, nullable_elements, &lists));
+ ASSERT_OK(MakeListArray(values, size, nullable_lists ? null_count : 0,
+ nullable_elements, &lists));
*out = MakeSimpleTable(lists->Slice(3, size - 6), nullable_lists);
}
void PrepareListOfListTable(int64_t size, bool nullable_parent_lists,
- bool nullable_lists, bool nullable_elements, int64_t null_count,
- std::shared_ptr<Table>* out) {
+ bool nullable_lists, bool nullable_elements,
+ int64_t null_count, std::shared_ptr<Table>* out) {
std::shared_ptr<Array> values;
- ASSERT_OK(NullableArray<TestType>(
- size * 6, nullable_elements ? null_count : 0, kDefaultSeed, &values));
+ ASSERT_OK(NullableArray<TestType>(size * 6, nullable_elements ? null_count : 0,
+ kDefaultSeed, &values));
std::shared_ptr<ListArray> lists;
- ASSERT_OK(MakeListArray(
- values, size * 3, nullable_lists ? null_count : 0, nullable_elements, &lists));
+ ASSERT_OK(MakeListArray(values, size * 3, nullable_lists ? null_count : 0,
+ nullable_elements, &lists));
std::shared_ptr<ListArray> parent_lists;
ASSERT_OK(MakeListArray(lists, size, nullable_parent_lists ? null_count : 0,
- nullable_lists, &parent_lists));
+ nullable_lists, &parent_lists));
*out = MakeSimpleTable(parent_lists, nullable_parent_lists);
}
@@ -438,7 +439,7 @@ class TestParquetIO : public ::testing::Test {
template <typename ArrayType>
void WriteColumn(const std::shared_ptr<GroupNode>& schema,
- const std::shared_ptr<ArrayType>& values) {
+ const std::shared_ptr<ArrayType>& values) {
FileWriter writer(::arrow::default_memory_pool(), MakeWriter(schema));
ASSERT_OK_NO_THROW(writer.NewRowGroup(values->length()));
ASSERT_OK_NO_THROW(writer.WriteColumnChunk(*values));
@@ -454,9 +455,10 @@ class TestParquetIO : public ::testing::Test {
// Parquet version 1.0.
typedef ::testing::Types<::arrow::BooleanType, ::arrow::UInt8Type, ::arrow::Int8Type,
- ::arrow::UInt16Type, ::arrow::Int16Type, ::arrow::Int32Type, ::arrow::UInt64Type,
- ::arrow::Int64Type, ::arrow::Date32Type, ::arrow::FloatType, ::arrow::DoubleType,
- ::arrow::StringType, ::arrow::BinaryType, ::arrow::FixedSizeBinaryType>
+ ::arrow::UInt16Type, ::arrow::Int16Type, ::arrow::Int32Type,
+ ::arrow::UInt64Type, ::arrow::Int64Type, ::arrow::Date32Type,
+ ::arrow::FloatType, ::arrow::DoubleType, ::arrow::StringType,
+ ::arrow::BinaryType, ::arrow::FixedSizeBinaryType>
TestTypes;
TYPED_TEST_CASE(TestParquetIO, TestTypes);
@@ -478,7 +480,7 @@ TYPED_TEST(TestParquetIO, SingleColumnTableRequiredWrite) {
std::shared_ptr<Table> table = MakeSimpleTable(values, false);
this->sink_ = std::make_shared<InMemoryOutputStream>();
ASSERT_OK_NO_THROW(WriteTable(*table, ::arrow::default_memory_pool(), this->sink_,
- values->length(), default_writer_properties()));
+ values->length(), default_writer_properties()));
std::shared_ptr<Table> out;
std::unique_ptr<FileReader> reader;
@@ -599,8 +601,8 @@ TYPED_TEST(TestParquetIO, SingleColumnTableRequiredChunkedWrite) {
ASSERT_OK(NonNullArray<TypeParam>(LARGE_SIZE, &values));
std::shared_ptr<Table> table = MakeSimpleTable(values, false);
this->sink_ = std::make_shared<InMemoryOutputStream>();
- ASSERT_OK_NO_THROW(WriteTable(
- *table, default_memory_pool(), this->sink_, 512, default_writer_properties()));
+ ASSERT_OK_NO_THROW(WriteTable(*table, default_memory_pool(), this->sink_, 512,
+ default_writer_properties()));
this->ReadAndCheckSingleColumnTable(values);
}
@@ -615,8 +617,8 @@ TYPED_TEST(TestParquetIO, SingleColumnTableRequiredChunkedWriteArrowIO) {
{
// BufferOutputStream closed on gc
auto arrow_sink_ = std::make_shared<::arrow::io::BufferOutputStream>(buffer);
- ASSERT_OK_NO_THROW(WriteTable(
- *table, default_memory_pool(), arrow_sink_, 512, default_writer_properties()));
+ ASSERT_OK_NO_THROW(WriteTable(*table, default_memory_pool(), arrow_sink_, 512,
+ default_writer_properties()));
// XXX: Remove this after ARROW-455 completed
ASSERT_OK(arrow_sink_->Close());
@@ -664,7 +666,7 @@ TYPED_TEST(TestParquetIO, SingleColumnTableOptionalChunkedWrite) {
std::shared_ptr<Table> table = MakeSimpleTable(values, true);
this->sink_ = std::make_shared<InMemoryOutputStream>();
ASSERT_OK_NO_THROW(WriteTable(*table, ::arrow::default_memory_pool(), this->sink_, 512,
- default_writer_properties()));
+ default_writer_properties()));
this->ReadAndCheckSingleColumnTable(values);
}
@@ -713,8 +715,8 @@ TEST_F(TestInt96ParquetIO, ReadIntoTimestamp) {
rg_writer->Close();
writer->Close();
- ::arrow::TimestampBuilder builder(
- default_memory_pool(), ::arrow::timestamp(TimeUnit::NANO));
+ ::arrow::TimestampBuilder builder(default_memory_pool(),
+ ::arrow::timestamp(TimeUnit::NANO));
ASSERT_OK(builder.Append(val));
std::shared_ptr<Array> values;
ASSERT_OK(builder.Finish(&values));
@@ -777,8 +779,8 @@ TEST_F(TestUInt32ParquetIO, Parquet_1_0_Compability) {
const int32_t kOffset = 0;
ASSERT_OK(MakePrimitiveArray(std::make_shared<::arrow::Int64Type>(), values->length(),
- int64_data, values->null_bitmap(), values->null_count(), kOffset,
- &expected_values));
+ int64_data, values->null_bitmap(), values->null_count(),
+ kOffset, &expected_values));
this->ReadAndCheckSingleColumnTable(expected_values);
}
@@ -794,7 +796,7 @@ TEST_F(TestStringParquetIO, EmptyStringColumnRequiredWrite) {
std::shared_ptr<Table> table = MakeSimpleTable(values, false);
this->sink_ = std::make_shared<InMemoryOutputStream>();
ASSERT_OK_NO_THROW(WriteTable(*table, ::arrow::default_memory_pool(), this->sink_,
- values->length(), default_writer_properties()));
+ values->length(), default_writer_properties()));
std::shared_ptr<Table> out;
std::unique_ptr<FileReader> reader;
@@ -815,7 +817,7 @@ TEST_F(TestNullParquetIO, NullColumn) {
std::shared_ptr<Table> table = MakeSimpleTable(values, true);
this->sink_ = std::make_shared<InMemoryOutputStream>();
ASSERT_OK_NO_THROW(WriteTable(*table, ::arrow::default_memory_pool(), this->sink_,
- values->length(), default_writer_properties()));
+ values->length(), default_writer_properties()));
std::shared_ptr<Table> out;
std::unique_ptr<FileReader> reader;
@@ -847,16 +849,16 @@ class TestPrimitiveParquetIO : public TestParquetIO<TestType> {
public:
typedef typename c_type_trait<TestType>::ArrowCType T;
- void MakeTestFile(
- std::vector<T>& values, int num_chunks, std::unique_ptr<FileReader>* reader) {
+ void MakeTestFile(std::vector<T>& values, int num_chunks,
+ std::unique_ptr<FileReader>* reader) {
TestType dummy;
std::shared_ptr<GroupNode> schema = MakeSimpleSchema(dummy, Repetition::REQUIRED);
std::unique_ptr<ParquetFileWriter> file_writer = this->MakeWriter(schema);
size_t chunk_size = values.size() / num_chunks;
// Convert to Parquet's expected physical type
- std::vector<uint8_t> values_buffer(
- sizeof(ParquetCDataType<TestType>) * values.size());
+ std::vector<uint8_t> values_buffer(sizeof(ParquetCDataType<TestType>) *
+ values.size());
auto values_parquet =
reinterpret_cast<ParquetCDataType<TestType>*>(values_buffer.data());
std::copy(values.cbegin(), values.cend(), values_parquet);
@@ -901,8 +903,9 @@ class TestPrimitiveParquetIO : public TestParquetIO<TestType> {
};
typedef ::testing::Types<::arrow::BooleanType, ::arrow::UInt8Type, ::arrow::Int8Type,
- ::arrow::UInt16Type, ::arrow::Int16Type, ::arrow::UInt32Type, ::arrow::Int32Type,
- ::arrow::UInt64Type, ::arrow::Int64Type, ::arrow::FloatType, ::arrow::DoubleType>
+ ::arrow::UInt16Type, ::arrow::Int16Type, ::arrow::UInt32Type,
+ ::arrow::Int32Type, ::arrow::UInt64Type, ::arrow::Int64Type,
+ ::arrow::FloatType, ::arrow::DoubleType>
PrimitiveTestTypes;
TYPED_TEST_CASE(TestPrimitiveParquetIO, PrimitiveTestTypes);
@@ -942,23 +945,23 @@ void MakeDateTimeTypesTable(std::shared_ptr<Table>* out, bool nanos_as_micros =
auto f5 = field("f5", ::arrow::time64(TimeUnit::MICRO));
std::shared_ptr<::arrow::Schema> schema(new ::arrow::Schema({f0, f1, f2, f3, f4, f5}));
- std::vector<int32_t> t32_values = {
- 1489269000, 1489270000, 1489271000, 1489272000, 1489272000, 1489273000};
+ std::vector<int32_t> t32_values = {1489269000, 1489270000, 1489271000,
+ 1489272000, 1489272000, 1489273000};
std::vector<int64_t> t64_values = {1489269000000, 1489270000000, 1489271000000,
- 1489272000000, 1489272000000, 1489273000000};
- std::vector<int64_t> t64_us_values = {
- 1489269000, 1489270000, 1489271000, 1489272000, 1489272000, 1489273000};
+ 1489272000000, 1489272000000, 1489273000000};
+ std::vector<int64_t> t64_us_values = {1489269000, 1489270000, 1489271000,
+ 1489272000, 1489272000, 1489273000};
std::shared_ptr<Array> a0, a1, a2, a3, a4, a5;
ArrayFromVector<::arrow::Date32Type, int32_t>(f0->type(), is_valid, t32_values, &a0);
ArrayFromVector<::arrow::TimestampType, int64_t>(f1->type(), is_valid, t64_values, &a1);
ArrayFromVector<::arrow::TimestampType, int64_t>(f2->type(), is_valid, t64_values, &a2);
if (nanos_as_micros) {
- ArrayFromVector<::arrow::TimestampType, int64_t>(
- f3->type(), is_valid, t64_us_values, &a3);
+ ArrayFromVector<::arrow::TimestampType, int64_t>(f3->type(), is_valid, t64_us_values,
+ &a3);
} else {
- ArrayFromVector<::arrow::TimestampType, int64_t>(
- f3->type(), is_valid, t64_values, &a3);
+ ArrayFromVector<::arrow::TimestampType, int64_t>(f3->type(), is_valid, t64_values,
+ &a3);
}
ArrayFromVector<::arrow::Time32Type, int32_t>(f4->type(), is_valid, t32_values, &a4);
ArrayFromVector<::arrow::Time64Type, int64_t>(f5->type(), is_valid, t64_values, &a5);
@@ -976,7 +979,8 @@ TEST(TestArrowReadWrite, DateTimeTypes) {
// Use deprecated INT96 type
std::shared_ptr<Table> result;
- DoSimpleRoundtrip(table, 1, table->num_rows(), {}, &result,
+ DoSimpleRoundtrip(
+ table, 1, table->num_rows(), {}, &result,
ArrowWriterProperties::Builder().enable_deprecated_int96_timestamps()->build());
ASSERT_TRUE(table->Equals(*result));
@@ -999,7 +1003,7 @@ TEST(TestArrowReadWrite, ConvertedDateTimeTypes) {
std::shared_ptr<::arrow::Schema> schema(new ::arrow::Schema({f0, f1}));
std::vector<int64_t> a0_values = {1489190400000, 1489276800000, 1489363200000,
- 1489449600000, 1489536000000, 1489622400000};
+ 1489449600000, 1489536000000, 1489622400000};
std::vector<int32_t> a1_values = {0, 1, 2, 3, 4, 5};
std::shared_ptr<Array> a0, a1, x0, x1;
@@ -1030,8 +1034,8 @@ TEST(TestArrowReadWrite, ConvertedDateTimeTypes) {
ASSERT_TRUE(result->Equals(*ex_table));
}
-void MakeDoubleTable(
- int num_columns, int num_rows, int nchunks, std::shared_ptr<Table>* out) {
+void MakeDoubleTable(int num_columns, int num_rows, int nchunks,
+ std::shared_ptr<Table>* out) {
std::shared_ptr<::arrow::Column> column;
std::vector<std::shared_ptr<::arrow::Column>> columns(num_columns);
std::vector<std::shared_ptr<::arrow::Field>> fields(num_columns);
@@ -1039,8 +1043,8 @@ void MakeDoubleTable(
for (int i = 0; i < num_columns; ++i) {
std::vector<std::shared_ptr<Array>> arrays;
std::shared_ptr<Array> values;
- ASSERT_OK(NullableArray<::arrow::DoubleType>(
- num_rows, num_rows / 10, static_cast<uint32_t>(i), &values));
+ ASSERT_OK(NullableArray<::arrow::DoubleType>(num_rows, num_rows / 10,
+ static_cast<uint32_t>(i), &values));
std::stringstream ss;
ss << "col" << i;
@@ -1081,9 +1085,9 @@ TEST(TestArrowReadWrite, ReadSingleRowGroup) {
WriteTableToBuffer(table, 1, num_rows / 2, default_arrow_writer_properties(), &buffer);
std::unique_ptr<FileReader> reader;
- ASSERT_OK_NO_THROW(
- OpenFile(std::make_shared<BufferReader>(buffer), ::arrow::default_memory_pool(),
- ::parquet::default_reader_properties(), nullptr, &reader));
+ ASSERT_OK_NO_THROW(OpenFile(std::make_shared<BufferReader>(buffer),
+ ::arrow::default_memory_pool(),
+ ::parquet::default_reader_properties(), nullptr, &reader));
ASSERT_EQ(2, reader->num_row_groups());
@@ -1131,8 +1135,8 @@ TEST(TestArrowWrite, CheckChunkSize) {
auto sink = std::make_shared<InMemoryOutputStream>();
- ASSERT_RAISES(
- Invalid, WriteTable(*table, ::arrow::default_memory_pool(), sink, chunk_size));
+ ASSERT_RAISES(Invalid,
+ WriteTable(*table, ::arrow::default_memory_pool(), sink, chunk_size));
}
class TestNestedSchemaRead : public ::testing::TestWithParam<Repetition::type> {
@@ -1145,13 +1149,13 @@ class TestNestedSchemaRead : public ::testing::TestWithParam<Repetition::type> {
std::shared_ptr<Buffer> buffer = nested_parquet_->GetBuffer();
ASSERT_OK_NO_THROW(
OpenFile(std::make_shared<BufferReader>(buffer), ::arrow::default_memory_pool(),
- ::parquet::default_reader_properties(), nullptr, &reader_));
+ ::parquet::default_reader_properties(), nullptr, &reader_));
}
void InitNewParquetFile(const std::shared_ptr<GroupNode>& schema, int num_rows) {
nested_parquet_ = std::make_shared<InMemoryOutputStream>();
- writer_ = parquet::ParquetFileWriter::Open(
- nested_parquet_, schema, default_writer_properties());
+ writer_ = parquet::ParquetFileWriter::Open(nested_parquet_, schema,
+ default_writer_properties());
row_group_writer_ = writer_->AppendRowGroup(num_rows);
}
@@ -1166,8 +1170,8 @@ class TestNestedSchemaRead : public ::testing::TestWithParam<Repetition::type> {
values_array_ = std::dynamic_pointer_cast<::arrow::Int32Array>(arr);
}
- void WriteColumnData(
- size_t num_rows, int16_t* def_levels, int16_t* rep_levels, int32_t* values) {
+ void WriteColumnData(size_t num_rows, int16_t* def_levels, int16_t* rep_levels,
+ int32_t* values) {
auto typed_writer =
static_cast<TypedColumnWriter<Int32Type>*>(row_group_writer_->NextColumn());
typed_writer->WriteBatch(num_rows, def_levels, rep_levels, values);
@@ -1179,7 +1183,9 @@ class TestNestedSchemaRead : public ::testing::TestWithParam<Repetition::type> {
// Also independently count the nulls
auto local_null_count = 0;
for (int i = 0; i < array.length(); i++) {
- if (array.IsNull(i)) { local_null_count++; }
+ if (array.IsNull(i)) {
+ local_null_count++;
+ }
}
ASSERT_EQ(local_null_count, expected_nulls);
}
@@ -1189,7 +1195,9 @@ class TestNestedSchemaRead : public ::testing::TestWithParam<Repetition::type> {
int j = 0;
for (int i = 0; i < values_array_->length(); i++) {
- if (array.IsNull(i)) { continue; }
+ if (array.IsNull(i)) {
+ continue;
+ }
ASSERT_EQ(array.Value(i), values_array_->Value(j));
j++;
}
@@ -1219,9 +1227,10 @@ class TestNestedSchemaRead : public ::testing::TestWithParam<Repetition::type> {
// }
// required int32 leaf3;
- parquet_fields.push_back(GroupNode::Make("group1", struct_repetition,
+ parquet_fields.push_back(GroupNode::Make(
+ "group1", struct_repetition,
{PrimitiveNode::Make("leaf1", Repetition::REQUIRED, ParquetType::INT32),
- PrimitiveNode::Make("leaf2", Repetition::OPTIONAL, ParquetType::INT32)}));
+ PrimitiveNode::Make("leaf2", Repetition::OPTIONAL, ParquetType::INT32)}));
parquet_fields.push_back(
PrimitiveNode::Make("leaf3", Repetition::REQUIRED, ParquetType::INT32));
@@ -1252,33 +1261,34 @@ class TestNestedSchemaRead : public ::testing::TestWithParam<Repetition::type> {
int32_t* values = reinterpret_cast<int32_t*>(values_array_->values()->mutable_data());
// Create the actual parquet file
- InitNewParquetFile(
- std::static_pointer_cast<GroupNode>(schema_node), NUM_SIMPLE_TEST_ROWS);
+ InitNewParquetFile(std::static_pointer_cast<GroupNode>(schema_node),
+ NUM_SIMPLE_TEST_ROWS);
// leaf1 column
- WriteColumnData(
- NUM_SIMPLE_TEST_ROWS, leaf1_def_levels.data(), rep_levels.data(), values);
+ WriteColumnData(NUM_SIMPLE_TEST_ROWS, leaf1_def_levels.data(), rep_levels.data(),
+ values);
// leaf2 column
- WriteColumnData(
- NUM_SIMPLE_TEST_ROWS, leaf2_def_levels.data(), rep_levels.data(), values);
+ WriteColumnData(NUM_SIMPLE_TEST_ROWS, leaf2_def_levels.data(), rep_levels.data(),
+ values);
// leaf3 column
- WriteColumnData(
- NUM_SIMPLE_TEST_ROWS, leaf3_def_levels.data(), rep_levels.data(), values);
+ WriteColumnData(NUM_SIMPLE_TEST_ROWS, leaf3_def_levels.data(), rep_levels.data(),
+ values);
FinalizeParquetFile();
InitReader();
}
NodePtr CreateSingleTypedNestedGroup(int index, int depth, int num_children,
- Repetition::type node_repetition, ParquetType::type leaf_type) {
+ Repetition::type node_repetition,
+ ParquetType::type leaf_type) {
std::vector<NodePtr> children;
for (int i = 0; i < num_children; i++) {
if (depth <= 1) {
children.push_back(PrimitiveNode::Make("leaf", node_repetition, leaf_type));
} else {
- children.push_back(CreateSingleTypedNestedGroup(
- i, depth - 1, num_children, node_repetition, leaf_type));
+ children.push_back(CreateSingleTypedNestedGroup(i, depth - 1, num_children,
+ node_repetition, leaf_type));
}
}
@@ -1289,7 +1299,7 @@ class TestNestedSchemaRead : public ::testing::TestWithParam<Repetition::type> {
// A deeply nested schema
void CreateMultiLevelNestedParquet(int num_trees, int tree_depth, int num_children,
- int num_rows, Repetition::type node_repetition) {
+ int num_rows, Repetition::type node_repetition) {
// Create the schema
std::vector<NodePtr> parquet_fields;
for (int i = 0; i < num_trees; i++) {
@@ -1327,8 +1337,8 @@ class TestNestedSchemaRead : public ::testing::TestWithParam<Repetition::type> {
class DeepParquetTestVisitor : public ArrayVisitor {
public:
- DeepParquetTestVisitor(
- Repetition::type node_repetition, std::shared_ptr<::arrow::Int32Array> expected)
+ DeepParquetTestVisitor(Repetition::type node_repetition,
+ std::shared_ptr<::arrow::Int32Array> expected)
: node_repetition_(node_repetition), expected_(expected) {}
Status Validate(std::shared_ptr<Array> tree) { return tree->Accept(this); }
@@ -1475,7 +1485,7 @@ TEST_P(TestNestedSchemaRead, DeepNestedSchemaRead) {
}
INSTANTIATE_TEST_CASE_P(Repetition_type, TestNestedSchemaRead,
- ::testing::Values(Repetition::REQUIRED, Repetition::OPTIONAL));
+ ::testing::Values(Repetition::REQUIRED, Repetition::OPTIONAL));
TEST(TestImpalaConversion, NanosecondToImpala) {
// June 20, 2017 16:32:56 and 123456789 nanoseconds
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/arrow/arrow-schema-test.cc
----------------------------------------------------------------------
diff --git a/src/parquet/arrow/arrow-schema-test.cc b/src/parquet/arrow/arrow-schema-test.cc
index 34d4813..22e3adb 100644
--- a/src/parquet/arrow/arrow-schema-test.cc
+++ b/src/parquet/arrow/arrow-schema-test.cc
@@ -72,14 +72,15 @@ class TestConvertParquetSchema : public ::testing::Test {
return FromParquetSchema(&descr_, &result_schema_);
}
- ::arrow::Status ConvertSchema(
- const std::vector<NodePtr>& nodes, const std::vector<int>& column_indices) {
+ ::arrow::Status ConvertSchema(const std::vector<NodePtr>& nodes,
+ const std::vector<int>& column_indices) {
NodePtr schema = GroupNode::Make("schema", Repetition::REPEATED, nodes);
descr_.Init(schema);
return FromParquetSchema(&descr_, column_indices, &result_schema_);
}
- ::arrow::Status ConvertSchema(const std::vector<NodePtr>& nodes,
+ ::arrow::Status ConvertSchema(
+ const std::vector<NodePtr>& nodes,
const std::shared_ptr<const KeyValueMetadata>& key_value_metadata) {
NodePtr schema = GroupNode::Make("schema", Repetition::REPEATED, nodes);
descr_.Init(schema);
@@ -108,15 +109,17 @@ TEST_F(TestConvertParquetSchema, ParquetFlatPrimitives) {
arrow_fields.push_back(std::make_shared<Field>("int64", INT64, false));
parquet_fields.push_back(PrimitiveNode::Make("timestamp", Repetition::REQUIRED,
- ParquetType::INT64, LogicalType::TIMESTAMP_MILLIS));
+ ParquetType::INT64,
+ LogicalType::TIMESTAMP_MILLIS));
arrow_fields.push_back(std::make_shared<Field>("timestamp", TIMESTAMP_MS, false));
parquet_fields.push_back(PrimitiveNode::Make("timestamp[us]", Repetition::REQUIRED,
- ParquetType::INT64, LogicalType::TIMESTAMP_MICROS));
+ ParquetType::INT64,
+ LogicalType::TIMESTAMP_MICROS));
arrow_fields.push_back(std::make_shared<Field>("timestamp[us]", TIMESTAMP_US, false));
- parquet_fields.push_back(PrimitiveNode::Make(
- "date", Repetition::REQUIRED, ParquetType::INT32, LogicalType::DATE));
+ parquet_fields.push_back(PrimitiveNode::Make("date", Repetition::REQUIRED,
+ ParquetType::INT32, LogicalType::DATE));
arrow_fields.push_back(std::make_shared<Field>("date", ::arrow::date32(), false));
parquet_fields.push_back(PrimitiveNode::Make(
@@ -150,7 +153,8 @@ TEST_F(TestConvertParquetSchema, ParquetFlatPrimitives) {
arrow_fields.push_back(std::make_shared<Field>("string", UTF8));
parquet_fields.push_back(PrimitiveNode::Make("flba-binary", Repetition::OPTIONAL,
- ParquetType::FIXED_LEN_BYTE_ARRAY, LogicalType::NONE, 12));
+ ParquetType::FIXED_LEN_BYTE_ARRAY,
+ LogicalType::NONE, 12));
arrow_fields.push_back(
std::make_shared<Field>("flba-binary", ::arrow::fixed_size_binary(12)));
@@ -204,19 +208,23 @@ TEST_F(TestConvertParquetSchema, ParquetFlatDecimals) {
std::vector<std::shared_ptr<Field>> arrow_fields;
parquet_fields.push_back(PrimitiveNode::Make("flba-decimal", Repetition::OPTIONAL,
- ParquetType::FIXED_LEN_BYTE_ARRAY, LogicalType::DECIMAL, 4, 8, 4));
+ ParquetType::FIXED_LEN_BYTE_ARRAY,
+ LogicalType::DECIMAL, 4, 8, 4));
arrow_fields.push_back(std::make_shared<Field>("flba-decimal", DECIMAL_8_4));
parquet_fields.push_back(PrimitiveNode::Make("binary-decimal", Repetition::OPTIONAL,
- ParquetType::BYTE_ARRAY, LogicalType::DECIMAL, -1, 8, 4));
+ ParquetType::BYTE_ARRAY,
+ LogicalType::DECIMAL, -1, 8, 4));
arrow_fields.push_back(std::make_shared<Field>("binary-decimal", DECIMAL_8_4));
parquet_fields.push_back(PrimitiveNode::Make("int32-decimal", Repetition::OPTIONAL,
- ParquetType::INT32, LogicalType::DECIMAL, -1, 8, 4));
+ ParquetType::INT32, LogicalType::DECIMAL,
+ -1, 8, 4));
arrow_fields.push_back(std::make_shared<Field>("int32-decimal", DECIMAL_8_4));
parquet_fields.push_back(PrimitiveNode::Make("int64-decimal", Repetition::OPTIONAL,
- ParquetType::INT64, LogicalType::DECIMAL, -1, 8, 4));
+ ParquetType::INT64, LogicalType::DECIMAL,
+ -1, 8, 4));
arrow_fields.push_back(std::make_shared<Field>("int64-decimal", DECIMAL_8_4));
auto arrow_schema = std::make_shared<::arrow::Schema>(arrow_fields);
@@ -238,8 +246,8 @@ TEST_F(TestConvertParquetSchema, ParquetLists) {
// }
// }
{
- auto element = PrimitiveNode::Make(
- "string", Repetition::OPTIONAL, ParquetType::BYTE_ARRAY, LogicalType::UTF8);
+ auto element = PrimitiveNode::Make("string", Repetition::OPTIONAL,
+ ParquetType::BYTE_ARRAY, LogicalType::UTF8);
auto list = GroupNode::Make("list", Repetition::REPEATED, {element});
parquet_fields.push_back(
GroupNode::Make("my_list", Repetition::REQUIRED, {list}, LogicalType::LIST));
@@ -255,8 +263,8 @@ TEST_F(TestConvertParquetSchema, ParquetLists) {
// }
// }
{
- auto element = PrimitiveNode::Make(
- "string", Repetition::REQUIRED, ParquetType::BYTE_ARRAY, LogicalType::UTF8);
+ auto element = PrimitiveNode::Make("string", Repetition::REQUIRED,
+ ParquetType::BYTE_ARRAY, LogicalType::UTF8);
auto list = GroupNode::Make("list", Repetition::REPEATED, {element});
parquet_fields.push_back(
GroupNode::Make("my_list", Repetition::OPTIONAL, {list}, LogicalType::LIST));
@@ -284,8 +292,8 @@ TEST_F(TestConvertParquetSchema, ParquetLists) {
auto element =
GroupNode::Make("element", Repetition::REQUIRED, {inner_list}, LogicalType::LIST);
auto list = GroupNode::Make("list", Repetition::REPEATED, {element});
- parquet_fields.push_back(GroupNode::Make(
- "array_of_arrays", Repetition::OPTIONAL, {list}, LogicalType::LIST));
+ parquet_fields.push_back(GroupNode::Make("array_of_arrays", Repetition::OPTIONAL,
+ {list}, LogicalType::LIST));
auto arrow_inner_element = std::make_shared<Field>("int32", INT32, false);
auto arrow_inner_list = std::make_shared<::arrow::ListType>(arrow_inner_element);
auto arrow_element = std::make_shared<Field>("element", arrow_inner_list, false);
@@ -300,8 +308,8 @@ TEST_F(TestConvertParquetSchema, ParquetLists) {
// };
// }
{
- auto element = PrimitiveNode::Make(
- "str", Repetition::REQUIRED, ParquetType::BYTE_ARRAY, LogicalType::UTF8);
+ auto element = PrimitiveNode::Make("str", Repetition::REQUIRED,
+ ParquetType::BYTE_ARRAY, LogicalType::UTF8);
auto list = GroupNode::Make("element", Repetition::REPEATED, {element});
parquet_fields.push_back(
GroupNode::Make("my_list", Repetition::OPTIONAL, {list}, LogicalType::LIST));
@@ -332,8 +340,8 @@ TEST_F(TestConvertParquetSchema, ParquetLists) {
// };
// }
{
- auto str_element = PrimitiveNode::Make(
- "str", Repetition::REQUIRED, ParquetType::BYTE_ARRAY, LogicalType::UTF8);
+ auto str_element = PrimitiveNode::Make("str", Repetition::REQUIRED,
+ ParquetType::BYTE_ARRAY, LogicalType::UTF8);
auto num_element =
PrimitiveNode::Make("num", Repetition::REQUIRED, ParquetType::INT32);
auto element =
@@ -357,8 +365,8 @@ TEST_F(TestConvertParquetSchema, ParquetLists) {
// }
// Special case: group is named array
{
- auto element = PrimitiveNode::Make(
- "str", Repetition::REQUIRED, ParquetType::BYTE_ARRAY, LogicalType::UTF8);
+ auto element = PrimitiveNode::Make("str", Repetition::REQUIRED,
+ ParquetType::BYTE_ARRAY, LogicalType::UTF8);
auto array = GroupNode::Make("array", Repetition::REPEATED, {element});
parquet_fields.push_back(
GroupNode::Make("my_list", Repetition::OPTIONAL, {array}, LogicalType::LIST));
@@ -378,8 +386,8 @@ TEST_F(TestConvertParquetSchema, ParquetLists) {
// }
// Special case: group named ends in _tuple
{
- auto element = PrimitiveNode::Make(
- "str", Repetition::REQUIRED, ParquetType::BYTE_ARRAY, LogicalType::UTF8);
+ auto element = PrimitiveNode::Make("str", Repetition::REQUIRED,
+ ParquetType::BYTE_ARRAY, LogicalType::UTF8);
auto array = GroupNode::Make("my_list_tuple", Repetition::REPEATED, {element});
parquet_fields.push_back(
GroupNode::Make("my_list", Repetition::OPTIONAL, {array}, LogicalType::LIST));
@@ -425,14 +433,15 @@ TEST_F(TestConvertParquetSchema, ParquetNestedSchema) {
// }
// required int64 leaf3;
{
- parquet_fields.push_back(GroupNode::Make("group1", Repetition::REQUIRED,
+ parquet_fields.push_back(GroupNode::Make(
+ "group1", Repetition::REQUIRED,
{PrimitiveNode::Make("leaf1", Repetition::REQUIRED, ParquetType::BOOLEAN),
- PrimitiveNode::Make("leaf2", Repetition::REQUIRED, ParquetType::INT32)}));
+ PrimitiveNode::Make("leaf2", Repetition::REQUIRED, ParquetType::INT32)}));
parquet_fields.push_back(
PrimitiveNode::Make("leaf3", Repetition::REQUIRED, ParquetType::INT64));
auto group1_fields = {std::make_shared<Field>("leaf1", BOOL, false),
- std::make_shared<Field>("leaf2", INT32, false)};
+ std::make_shared<Field>("leaf2", INT32, false)};
auto arrow_group1_type = std::make_shared<::arrow::StructType>(group1_fields);
arrow_fields.push_back(std::make_shared<Field>("group1", arrow_group1_type, false));
arrow_fields.push_back(std::make_shared<Field>("leaf3", INT64, false));
@@ -468,12 +477,14 @@ TEST_F(TestConvertParquetSchema, ParquetNestedSchemaPartial) {
// }
// required int64 leaf5;
{
- parquet_fields.push_back(GroupNode::Make("group1", Repetition::REQUIRED,
+ parquet_fields.push_back(GroupNode::Make(
+ "group1", Repetition::REQUIRED,
{PrimitiveNode::Make("leaf1", Repetition::REQUIRED, ParquetType::INT64),
- PrimitiveNode::Make("leaf2", Repetition::REQUIRED, ParquetType::INT64)}));
- parquet_fields.push_back(GroupNode::Make("group2", Repetition::REQUIRED,
+ PrimitiveNode::Make("leaf2", Repetition::REQUIRED, ParquetType::INT64)}));
+ parquet_fields.push_back(GroupNode::Make(
+ "group2", Repetition::REQUIRED,
{PrimitiveNode::Make("leaf3", Repetition::REQUIRED, ParquetType::INT64),
- PrimitiveNode::Make("leaf4", Repetition::REQUIRED, ParquetType::INT64)}));
+ PrimitiveNode::Make("leaf4", Repetition::REQUIRED, ParquetType::INT64)}));
parquet_fields.push_back(
PrimitiveNode::Make("leaf5", Repetition::REQUIRED, ParquetType::INT64));
@@ -517,12 +528,14 @@ TEST_F(TestConvertParquetSchema, ParquetNestedSchemaPartialOrdering) {
// required int64 leaf1;
// }
{
- parquet_fields.push_back(GroupNode::Make("group1", Repetition::REQUIRED,
+ parquet_fields.push_back(GroupNode::Make(
+ "group1", Repetition::REQUIRED,
{PrimitiveNode::Make("leaf1", Repetition::REQUIRED, ParquetType::INT64),
- PrimitiveNode::Make("leaf2", Repetition::REQUIRED, ParquetType::INT64)}));
- parquet_fields.push_back(GroupNode::Make("group2", Repetition::REQUIRED,
+ PrimitiveNode::Make("leaf2", Repetition::REQUIRED, ParquetType::INT64)}));
+ parquet_fields.push_back(GroupNode::Make(
+ "group2", Repetition::REQUIRED,
{PrimitiveNode::Make("leaf3", Repetition::REQUIRED, ParquetType::INT64),
- PrimitiveNode::Make("leaf4", Repetition::REQUIRED, ParquetType::INT64)}));
+ PrimitiveNode::Make("leaf4", Repetition::REQUIRED, ParquetType::INT64)}));
parquet_fields.push_back(
PrimitiveNode::Make("leaf5", Repetition::REQUIRED, ParquetType::INT64));
@@ -554,22 +567,25 @@ TEST_F(TestConvertParquetSchema, ParquetRepeatedNestedSchema) {
// }
parquet_fields.push_back(
PrimitiveNode::Make("leaf1", Repetition::OPTIONAL, ParquetType::INT32));
- parquet_fields.push_back(GroupNode::Make("outerGroup", Repetition::REPEATED,
+ parquet_fields.push_back(GroupNode::Make(
+ "outerGroup", Repetition::REPEATED,
{PrimitiveNode::Make("leaf2", Repetition::OPTIONAL, ParquetType::INT32),
- GroupNode::Make("innerGroup", Repetition::REPEATED,
- {PrimitiveNode::Make(
- "leaf3", Repetition::OPTIONAL, ParquetType::INT32)})}));
+ GroupNode::Make(
+ "innerGroup", Repetition::REPEATED,
+ {PrimitiveNode::Make("leaf3", Repetition::OPTIONAL, ParquetType::INT32)})}));
auto inner_group_fields = {std::make_shared<Field>("leaf3", INT32, true)};
auto inner_group_type = std::make_shared<::arrow::StructType>(inner_group_fields);
- auto outer_group_fields = {std::make_shared<Field>("leaf2", INT32, true),
- std::make_shared<Field>("innerGroup",
- ::arrow::list(std::make_shared<Field>("innerGroup", inner_group_type, false)),
- false)};
+ auto outer_group_fields = {
+ std::make_shared<Field>("leaf2", INT32, true),
+ std::make_shared<Field>("innerGroup", ::arrow::list(std::make_shared<Field>(
+ "innerGroup", inner_group_type, false)),
+ false)};
auto outer_group_type = std::make_shared<::arrow::StructType>(outer_group_fields);
arrow_fields.push_back(std::make_shared<Field>("leaf1", INT32, true));
- arrow_fields.push_back(std::make_shared<Field>("outerGroup",
+ arrow_fields.push_back(std::make_shared<Field>(
+ "outerGroup",
::arrow::list(std::make_shared<Field>("outerGroup", outer_group_type, false)),
false));
}
@@ -626,20 +642,22 @@ TEST_F(TestConvertArrowSchema, ParquetFlatPrimitives) {
PrimitiveNode::Make("int64", Repetition::REQUIRED, ParquetType::INT64));
arrow_fields.push_back(std::make_shared<Field>("int64", INT64, false));
- parquet_fields.push_back(PrimitiveNode::Make(
- "date", Repetition::REQUIRED, ParquetType::INT32, LogicalType::DATE));
+ parquet_fields.push_back(PrimitiveNode::Make("date", Repetition::REQUIRED,
+ ParquetType::INT32, LogicalType::DATE));
arrow_fields.push_back(std::make_shared<Field>("date", ::arrow::date32(), false));
- parquet_fields.push_back(PrimitiveNode::Make(
- "date64", Repetition::REQUIRED, ParquetType::INT32, LogicalType::DATE));
+ parquet_fields.push_back(PrimitiveNode::Make("date64", Repetition::REQUIRED,
+ ParquetType::INT32, LogicalType::DATE));
arrow_fields.push_back(std::make_shared<Field>("date64", ::arrow::date64(), false));
parquet_fields.push_back(PrimitiveNode::Make("timestamp", Repetition::REQUIRED,
- ParquetType::INT64, LogicalType::TIMESTAMP_MILLIS));
+ ParquetType::INT64,
+ LogicalType::TIMESTAMP_MILLIS));
arrow_fields.push_back(std::make_shared<Field>("timestamp", TIMESTAMP_MS, false));
parquet_fields.push_back(PrimitiveNode::Make("timestamp[us]", Repetition::REQUIRED,
- ParquetType::INT64, LogicalType::TIMESTAMP_MICROS));
+ ParquetType::INT64,
+ LogicalType::TIMESTAMP_MICROS));
arrow_fields.push_back(std::make_shared<Field>("timestamp[us]", TIMESTAMP_US, false));
parquet_fields.push_back(
@@ -676,8 +694,8 @@ TEST_F(TestConvertArrowSchema, ParquetLists) {
// }
// }
{
- auto element = PrimitiveNode::Make(
- "string", Repetition::OPTIONAL, ParquetType::BYTE_ARRAY, LogicalType::UTF8);
+ auto element = PrimitiveNode::Make("string", Repetition::OPTIONAL,
+ ParquetType::BYTE_ARRAY, LogicalType::UTF8);
auto list = GroupNode::Make("list", Repetition::REPEATED, {element});
parquet_fields.push_back(
GroupNode::Make("my_list", Repetition::REQUIRED, {list}, LogicalType::LIST));
@@ -693,8 +711,8 @@ TEST_F(TestConvertArrowSchema, ParquetLists) {
// }
// }
{
- auto element = PrimitiveNode::Make(
- "string", Repetition::REQUIRED, ParquetType::BYTE_ARRAY, LogicalType::UTF8);
+ auto element = PrimitiveNode::Make("string", Repetition::REQUIRED,
+ ParquetType::BYTE_ARRAY, LogicalType::UTF8);
auto list = GroupNode::Make("list", Repetition::REPEATED, {element});
parquet_fields.push_back(
GroupNode::Make("my_list", Repetition::OPTIONAL, {list}, LogicalType::LIST));
[4/5] parquet-cpp git commit: PARQUET-1068: Modify .clang-format to
use straight Google format with 90-character line width
Posted by we...@apache.org.
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/arrow/reader.cc
----------------------------------------------------------------------
diff --git a/src/parquet/arrow/reader.cc b/src/parquet/arrow/reader.cc
index e941c1f..a41ad57 100644
--- a/src/parquet/arrow/reader.cc
+++ b/src/parquet/arrow/reader.cc
@@ -84,7 +84,9 @@ Status ParallelFor(int nthreads, int num_tasks, FUNCTION&& func) {
int task_id;
while (!error_occurred) {
task_id = task_counter.fetch_add(1);
- if (task_id >= num_tasks) { break; }
+ if (task_id >= num_tasks) {
+ break;
+ }
Status s = func(task_id);
if (!s.ok()) {
std::lock_guard<std::mutex> lock(error_mtx);
@@ -98,7 +100,9 @@ Status ParallelFor(int nthreads, int num_tasks, FUNCTION&& func) {
for (auto&& thread : thread_pool) {
thread.join();
}
- if (error_occurred) { return error; }
+ if (error_occurred) {
+ return error;
+ }
return Status::OK();
}
@@ -154,14 +158,16 @@ class AllRowGroupsIterator : public FileColumnIterator {
class SingleRowGroupIterator : public FileColumnIterator {
public:
- explicit SingleRowGroupIterator(
- int column_index, int row_group_number, ParquetFileReader* reader)
+ explicit SingleRowGroupIterator(int column_index, int row_group_number,
+ ParquetFileReader* reader)
: FileColumnIterator(column_index, reader),
row_group_number_(row_group_number),
done_(false) {}
std::shared_ptr<::parquet::ColumnReader> Next() override {
- if (done_) { return nullptr; }
+ if (done_) {
+ return nullptr;
+ }
auto result = reader_->RowGroup(row_group_number_)->Column(column_index_);
done_ = true;
@@ -185,16 +191,16 @@ class FileReader::Impl {
Status GetColumn(int i, std::unique_ptr<ColumnReader>* out);
Status ReadSchemaField(int i, std::shared_ptr<Array>* out);
- Status ReadSchemaField(
- int i, const std::vector<int>& indices, std::shared_ptr<Array>* out);
+ Status ReadSchemaField(int i, const std::vector<int>& indices,
+ std::shared_ptr<Array>* out);
Status GetReaderForNode(int index, const NodePtr& node, const std::vector<int>& indices,
- int16_t def_level, std::unique_ptr<ColumnReader::Impl>* out);
+ int16_t def_level, std::unique_ptr<ColumnReader::Impl>* out);
Status ReadColumn(int i, std::shared_ptr<Array>* out);
Status GetSchema(std::shared_ptr<::arrow::Schema>* out);
- Status GetSchema(
- const std::vector<int>& indices, std::shared_ptr<::arrow::Schema>* out);
+ Status GetSchema(const std::vector<int>& indices,
+ std::shared_ptr<::arrow::Schema>* out);
Status ReadRowGroup(int row_group_index, const std::vector<int>& indices,
- std::shared_ptr<::arrow::Table>* out);
+ std::shared_ptr<::arrow::Table>* out);
Status ReadTable(const std::vector<int>& indices, std::shared_ptr<Table>* table);
Status ReadTable(std::shared_ptr<Table>* table);
Status ReadRowGroup(int i, std::shared_ptr<Table>* table);
@@ -258,13 +264,13 @@ class PARQUET_NO_EXPORT PrimitiveImpl : public ColumnReader::Impl {
Status InitValidBits(int batch_size);
template <typename ArrowType, typename ParquetType>
Status ReadNullableBatch(TypedColumnReader<ParquetType>* reader, int16_t* def_levels,
- int16_t* rep_levels, int64_t values_to_read, int64_t* levels_read,
- int64_t* values_read);
+ int16_t* rep_levels, int64_t values_to_read,
+ int64_t* levels_read, int64_t* values_read);
template <typename ArrowType, typename ParquetType>
Status ReadNonNullableBatch(TypedColumnReader<ParquetType>* reader,
- int64_t values_to_read, int64_t* levels_read);
+ int64_t values_to_read, int64_t* levels_read);
Status WrapIntoListArray(const int16_t* def_levels, const int16_t* rep_levels,
- int64_t total_values_read, std::shared_ptr<Array>* array);
+ int64_t total_values_read, std::shared_ptr<Array>* array);
Status GetDefLevels(ValueLevelsPtr* data, size_t* length) override;
Status GetRepLevels(ValueLevelsPtr* data, size_t* length) override;
@@ -279,7 +285,7 @@ class PARQUET_NO_EXPORT PrimitiveImpl : public ColumnReader::Impl {
static constexpr bool value =
std::is_same<InType, OutType>::value ||
(std::is_integral<InType>{} && std::is_integral<OutType>{} &&
- (sizeof(InType) == sizeof(OutType)));
+ (sizeof(InType) == sizeof(OutType)));
};
MemoryPool* pool_;
@@ -304,7 +310,7 @@ class PARQUET_NO_EXPORT PrimitiveImpl : public ColumnReader::Impl {
class PARQUET_NO_EXPORT StructImpl : public ColumnReader::Impl {
public:
explicit StructImpl(const std::vector<std::shared_ptr<Impl>>& children,
- int16_t struct_def_level, MemoryPool* pool, const NodePtr& node)
+ int16_t struct_def_level, MemoryPool* pool, const NodePtr& node)
: children_(children),
struct_def_level_(struct_def_level),
pool_(pool),
@@ -326,8 +332,8 @@ class PARQUET_NO_EXPORT StructImpl : public ColumnReader::Impl {
std::shared_ptr<Field> field_;
PoolBuffer def_levels_buffer_;
- Status DefLevelsToNullArray(
- std::shared_ptr<MutableBuffer>* null_bitmap, int64_t* null_count);
+ Status DefLevelsToNullArray(std::shared_ptr<MutableBuffer>* null_bitmap,
+ int64_t* null_count);
void InitField(const NodePtr& node, const std::vector<std::shared_ptr<Impl>>& children);
};
@@ -345,8 +351,9 @@ Status FileReader::Impl::GetColumn(int i, std::unique_ptr<ColumnReader>* out) {
}
Status FileReader::Impl::GetReaderForNode(int index, const NodePtr& node,
- const std::vector<int>& indices, int16_t def_level,
- std::unique_ptr<ColumnReader::Impl>* out) {
+ const std::vector<int>& indices,
+ int16_t def_level,
+ std::unique_ptr<ColumnReader::Impl>* out) {
*out = nullptr;
if (IsSimpleStruct(node)) {
@@ -357,9 +364,11 @@ Status FileReader::Impl::GetReaderForNode(int index, const NodePtr& node,
// TODO(itaiin): Remove the -1 index hack when all types of nested reads
// are supported. This currently just signals the lower level reader resolution
// to abort
- RETURN_NOT_OK(GetReaderForNode(
- index, group->field(i), indices, def_level + 1, &child_reader));
- if (child_reader != nullptr) { children.push_back(std::move(child_reader)); }
+ RETURN_NOT_OK(GetReaderForNode(index, group->field(i), indices, def_level + 1,
+ &child_reader));
+ if (child_reader != nullptr) {
+ children.push_back(std::move(child_reader));
+ }
}
if (children.size() > 0) {
@@ -402,8 +411,8 @@ Status FileReader::Impl::ReadSchemaField(int i, std::shared_ptr<Array>* out) {
return ReadSchemaField(i, indices, out);
}
-Status FileReader::Impl::ReadSchemaField(
- int i, const std::vector<int>& indices, std::shared_ptr<Array>* out) {
+Status FileReader::Impl::ReadSchemaField(int i, const std::vector<int>& indices,
+ std::shared_ptr<Array>* out) {
auto parquet_schema = reader_->metadata()->schema();
auto node = parquet_schema->group_node()->field(i);
@@ -437,15 +446,16 @@ Status FileReader::Impl::ReadColumn(int i, std::shared_ptr<Array>* out) {
return flat_column_reader->NextBatch(static_cast<int>(batch_size), out);
}
-Status FileReader::Impl::GetSchema(
- const std::vector<int>& indices, std::shared_ptr<::arrow::Schema>* out) {
+Status FileReader::Impl::GetSchema(const std::vector<int>& indices,
+ std::shared_ptr<::arrow::Schema>* out) {
auto descr = reader_->metadata()->schema();
auto parquet_key_value_metadata = reader_->metadata()->key_value_metadata();
return FromParquetSchema(descr, indices, parquet_key_value_metadata, out);
}
Status FileReader::Impl::ReadRowGroup(int row_group_index,
- const std::vector<int>& indices, std::shared_ptr<::arrow::Table>* out) {
+ const std::vector<int>& indices,
+ std::shared_ptr<::arrow::Table>* out) {
std::shared_ptr<::arrow::Schema> schema;
RETURN_NOT_OK(GetSchema(indices, &schema));
@@ -458,7 +468,7 @@ Status FileReader::Impl::ReadRowGroup(int row_group_index,
// TODO(wesm): Refactor to share more code with ReadTable
auto ReadColumnFunc = [&indices, &row_group_index, &schema, &columns, &rg_metadata,
- this](int i) {
+ this](int i) {
int column_index = indices[i];
int64_t batch_size = rg_metadata->ColumnChunk(column_index)->num_values();
@@ -486,16 +496,16 @@ Status FileReader::Impl::ReadRowGroup(int row_group_index,
return Status::OK();
}
-Status FileReader::Impl::ReadTable(
- const std::vector<int>& indices, std::shared_ptr<Table>* table) {
+Status FileReader::Impl::ReadTable(const std::vector<int>& indices,
+ std::shared_ptr<Table>* table) {
std::shared_ptr<::arrow::Schema> schema;
RETURN_NOT_OK(GetSchema(indices, &schema));
// We only need to read schema fields which have columns indicated
// in the indices vector
std::vector<int> field_indices;
- if (!ColumnIndicesToFieldIndices(
- *reader_->metadata()->schema(), indices, &field_indices)) {
+ if (!ColumnIndicesToFieldIndices(*reader_->metadata()->schema(), indices,
+ &field_indices)) {
return Status::Invalid("Invalid column index");
}
@@ -541,20 +551,21 @@ Status FileReader::Impl::ReadRowGroup(int i, std::shared_ptr<Table>* table) {
// Static ctor
Status OpenFile(const std::shared_ptr<::arrow::io::ReadableFileInterface>& file,
- MemoryPool* allocator, const ReaderProperties& props,
- const std::shared_ptr<FileMetaData>& metadata, std::unique_ptr<FileReader>* reader) {
+ MemoryPool* allocator, const ReaderProperties& props,
+ const std::shared_ptr<FileMetaData>& metadata,
+ std::unique_ptr<FileReader>* reader) {
std::unique_ptr<RandomAccessSource> io_wrapper(new ArrowInputFile(file));
std::unique_ptr<ParquetReader> pq_reader;
- PARQUET_CATCH_NOT_OK(
- pq_reader = ParquetReader::Open(std::move(io_wrapper), props, metadata));
+ PARQUET_CATCH_NOT_OK(pq_reader =
+ ParquetReader::Open(std::move(io_wrapper), props, metadata));
reader->reset(new FileReader(allocator, std::move(pq_reader)));
return Status::OK();
}
Status OpenFile(const std::shared_ptr<::arrow::io::ReadableFileInterface>& file,
- MemoryPool* allocator, std::unique_ptr<FileReader>* reader) {
- return OpenFile(
- file, allocator, ::parquet::default_reader_properties(), nullptr, reader);
+ MemoryPool* allocator, std::unique_ptr<FileReader>* reader) {
+ return OpenFile(file, allocator, ::parquet::default_reader_properties(), nullptr,
+ reader);
}
Status FileReader::GetColumn(int i, std::unique_ptr<ColumnReader>* out) {
@@ -585,8 +596,8 @@ Status FileReader::ReadTable(std::shared_ptr<Table>* out) {
}
}
-Status FileReader::ReadTable(
- const std::vector<int>& indices, std::shared_ptr<Table>* out) {
+Status FileReader::ReadTable(const std::vector<int>& indices,
+ std::shared_ptr<Table>* out) {
try {
return impl_->ReadTable(indices, out);
} catch (const ::parquet::ParquetException& e) {
@@ -602,8 +613,8 @@ Status FileReader::ReadRowGroup(int i, std::shared_ptr<Table>* out) {
}
}
-Status FileReader::ReadRowGroup(
- int i, const std::vector<int>& indices, std::shared_ptr<Table>* out) {
+Status FileReader::ReadRowGroup(int i, const std::vector<int>& indices,
+ std::shared_ptr<Table>* out) {
try {
return impl_->ReadRowGroup(i, indices, out);
} catch (const ::parquet::ParquetException& e) {
@@ -611,13 +622,9 @@ Status FileReader::ReadRowGroup(
}
}
-int FileReader::num_row_groups() const {
- return impl_->num_row_groups();
-}
+int FileReader::num_row_groups() const { return impl_->num_row_groups(); }
-void FileReader::set_num_threads(int num_threads) {
- impl_->set_num_threads(num_threads);
-}
+void FileReader::set_num_threads(int num_threads) { impl_->set_num_threads(num_threads); }
const ParquetFileReader* FileReader::parquet_reader() const {
return impl_->parquet_reader();
@@ -625,15 +632,16 @@ const ParquetFileReader* FileReader::parquet_reader() const {
template <typename ArrowType, typename ParquetType>
Status PrimitiveImpl::ReadNonNullableBatch(TypedColumnReader<ParquetType>* reader,
- int64_t values_to_read, int64_t* levels_read) {
+ int64_t values_to_read, int64_t* levels_read) {
using ArrowCType = typename ArrowType::c_type;
using ParquetCType = typename ParquetType::c_type;
RETURN_NOT_OK(values_buffer_.Resize(values_to_read * sizeof(ParquetCType), false));
auto values = reinterpret_cast<ParquetCType*>(values_buffer_.mutable_data());
int64_t values_read;
- PARQUET_CATCH_NOT_OK(*levels_read = reader->ReadBatch(static_cast<int>(values_to_read),
- nullptr, nullptr, values, &values_read));
+ PARQUET_CATCH_NOT_OK(*levels_read =
+ reader->ReadBatch(static_cast<int>(values_to_read), nullptr,
+ nullptr, values, &values_read));
ArrowCType* out_ptr = reinterpret_cast<ArrowCType*>(data_buffer_ptr_);
std::copy(values, values + values_read, out_ptr + valid_bits_idx_);
@@ -673,8 +681,9 @@ Status PrimitiveImpl::ReadNonNullableBatch<::arrow::TimestampType, Int96Type>(
RETURN_NOT_OK(values_buffer_.Resize(values_to_read * sizeof(Int96), false));
auto values = reinterpret_cast<Int96*>(values_buffer_.mutable_data());
int64_t values_read;
- PARQUET_CATCH_NOT_OK(*levels_read = reader->ReadBatch(static_cast<int>(values_to_read),
- nullptr, nullptr, values, &values_read));
+ PARQUET_CATCH_NOT_OK(*levels_read =
+ reader->ReadBatch(static_cast<int>(values_to_read), nullptr,
+ nullptr, values, &values_read));
int64_t* out_ptr = reinterpret_cast<int64_t*>(data_buffer_ptr_) + valid_bits_idx_;
for (int64_t i = 0; i < values_read; i++) {
@@ -691,8 +700,9 @@ Status PrimitiveImpl::ReadNonNullableBatch<::arrow::Date64Type, Int32Type>(
RETURN_NOT_OK(values_buffer_.Resize(values_to_read * sizeof(int32_t), false));
auto values = reinterpret_cast<int32_t*>(values_buffer_.mutable_data());
int64_t values_read;
- PARQUET_CATCH_NOT_OK(*levels_read = reader->ReadBatch(static_cast<int>(values_to_read),
- nullptr, nullptr, values, &values_read));
+ PARQUET_CATCH_NOT_OK(*levels_read =
+ reader->ReadBatch(static_cast<int>(values_to_read), nullptr,
+ nullptr, values, &values_read));
int64_t* out_ptr = reinterpret_cast<int64_t*>(data_buffer_ptr_) + valid_bits_idx_;
for (int64_t i = 0; i < values_read; i++) {
@@ -710,11 +720,14 @@ Status PrimitiveImpl::ReadNonNullableBatch<::arrow::BooleanType, BooleanType>(
RETURN_NOT_OK(values_buffer_.Resize(values_to_read * sizeof(bool), false));
auto values = reinterpret_cast<bool*>(values_buffer_.mutable_data());
int64_t values_read;
- PARQUET_CATCH_NOT_OK(*levels_read = reader->ReadBatch(static_cast<int>(values_to_read),
- nullptr, nullptr, values, &values_read));
+ PARQUET_CATCH_NOT_OK(*levels_read =
+ reader->ReadBatch(static_cast<int>(values_to_read), nullptr,
+ nullptr, values, &values_read));
for (int64_t i = 0; i < values_read; i++) {
- if (values[i]) { ::arrow::BitUtil::SetBit(data_buffer_ptr_, valid_bits_idx_); }
+ if (values[i]) {
+ ::arrow::BitUtil::SetBit(data_buffer_ptr_, valid_bits_idx_);
+ }
valid_bits_idx_++;
}
@@ -723,17 +736,18 @@ Status PrimitiveImpl::ReadNonNullableBatch<::arrow::BooleanType, BooleanType>(
template <typename ArrowType, typename ParquetType>
Status PrimitiveImpl::ReadNullableBatch(TypedColumnReader<ParquetType>* reader,
- int16_t* def_levels, int16_t* rep_levels, int64_t values_to_read,
- int64_t* levels_read, int64_t* values_read) {
+ int16_t* def_levels, int16_t* rep_levels,
+ int64_t values_to_read, int64_t* levels_read,
+ int64_t* values_read) {
using ArrowCType = typename ArrowType::c_type;
using ParquetCType = typename ParquetType::c_type;
RETURN_NOT_OK(values_buffer_.Resize(values_to_read * sizeof(ParquetCType), false));
auto values = reinterpret_cast<ParquetCType*>(values_buffer_.mutable_data());
int64_t null_count;
- PARQUET_CATCH_NOT_OK(reader->ReadBatchSpaced(static_cast<int>(values_to_read),
- def_levels, rep_levels, values, valid_bits_ptr_, valid_bits_idx_, levels_read,
- values_read, &null_count));
+ PARQUET_CATCH_NOT_OK(reader->ReadBatchSpaced(
+ static_cast<int>(values_to_read), def_levels, rep_levels, values, valid_bits_ptr_,
+ valid_bits_idx_, levels_read, values_read, &null_count));
auto data_ptr = reinterpret_cast<ArrowCType*>(data_buffer_ptr_);
INIT_BITSET(valid_bits_ptr_, static_cast<int>(valid_bits_idx_));
@@ -758,9 +772,10 @@ Status PrimitiveImpl::ReadNullableBatch(TypedColumnReader<ParquetType>* reader,
int64_t * values_read) { \
auto data_ptr = reinterpret_cast<CType*>(data_buffer_ptr_); \
int64_t null_count; \
- PARQUET_CATCH_NOT_OK(reader->ReadBatchSpaced(static_cast<int>(values_to_read), \
- def_levels, rep_levels, data_ptr + valid_bits_idx_, valid_bits_ptr_, \
- valid_bits_idx_, levels_read, values_read, &null_count)); \
+ PARQUET_CATCH_NOT_OK(reader->ReadBatchSpaced( \
+ static_cast<int>(values_to_read), def_levels, rep_levels, \
+ data_ptr + valid_bits_idx_, valid_bits_ptr_, valid_bits_idx_, levels_read, \
+ values_read, &null_count)); \
\
valid_bits_idx_ += *values_read; \
null_count_ += null_count; \
@@ -784,9 +799,9 @@ Status PrimitiveImpl::ReadNullableBatch<::arrow::TimestampType, Int96Type>(
RETURN_NOT_OK(values_buffer_.Resize(values_to_read * sizeof(Int96), false));
auto values = reinterpret_cast<Int96*>(values_buffer_.mutable_data());
int64_t null_count;
- PARQUET_CATCH_NOT_OK(reader->ReadBatchSpaced(static_cast<int>(values_to_read),
- def_levels, rep_levels, values, valid_bits_ptr_, valid_bits_idx_, levels_read,
- values_read, &null_count));
+ PARQUET_CATCH_NOT_OK(reader->ReadBatchSpaced(
+ static_cast<int>(values_to_read), def_levels, rep_levels, values, valid_bits_ptr_,
+ valid_bits_idx_, levels_read, values_read, &null_count));
auto data_ptr = reinterpret_cast<int64_t*>(data_buffer_ptr_);
INIT_BITSET(valid_bits_ptr_, static_cast<int>(valid_bits_idx_));
@@ -809,9 +824,9 @@ Status PrimitiveImpl::ReadNullableBatch<::arrow::Date64Type, Int32Type>(
RETURN_NOT_OK(values_buffer_.Resize(values_to_read * sizeof(int32_t), false));
auto values = reinterpret_cast<int32_t*>(values_buffer_.mutable_data());
int64_t null_count;
- PARQUET_CATCH_NOT_OK(reader->ReadBatchSpaced(static_cast<int>(values_to_read),
- def_levels, rep_levels, values, valid_bits_ptr_, valid_bits_idx_, levels_read,
- values_read, &null_count));
+ PARQUET_CATCH_NOT_OK(reader->ReadBatchSpaced(
+ static_cast<int>(values_to_read), def_levels, rep_levels, values, valid_bits_ptr_,
+ valid_bits_idx_, levels_read, values_read, &null_count));
auto data_ptr = reinterpret_cast<int64_t*>(data_buffer_ptr_);
INIT_BITSET(valid_bits_ptr_, static_cast<int>(valid_bits_idx_));
@@ -834,14 +849,16 @@ Status PrimitiveImpl::ReadNullableBatch<::arrow::BooleanType, BooleanType>(
RETURN_NOT_OK(values_buffer_.Resize(values_to_read * sizeof(bool), false));
auto values = reinterpret_cast<bool*>(values_buffer_.mutable_data());
int64_t null_count;
- PARQUET_CATCH_NOT_OK(reader->ReadBatchSpaced(static_cast<int>(values_to_read),
- def_levels, rep_levels, values, valid_bits_ptr_, valid_bits_idx_, levels_read,
- values_read, &null_count));
+ PARQUET_CATCH_NOT_OK(reader->ReadBatchSpaced(
+ static_cast<int>(values_to_read), def_levels, rep_levels, values, valid_bits_ptr_,
+ valid_bits_idx_, levels_read, values_read, &null_count));
INIT_BITSET(valid_bits_ptr_, static_cast<int>(valid_bits_idx_));
for (int64_t i = 0; i < *values_read; i++) {
if (bitset_valid_bits_ptr_ & (1 << bit_offset_valid_bits_ptr_)) {
- if (values[i]) { ::arrow::BitUtil::SetBit(data_buffer_ptr_, valid_bits_idx_ + i); }
+ if (values[i]) {
+ ::arrow::BitUtil::SetBit(data_buffer_ptr_, valid_bits_idx_ + i);
+ }
}
READ_NEXT_BITSET(valid_bits_ptr_);
}
@@ -886,10 +903,13 @@ Status PrimitiveImpl::InitValidBits(int batch_size) {
}
Status PrimitiveImpl::WrapIntoListArray(const int16_t* def_levels,
- const int16_t* rep_levels, int64_t total_levels_read, std::shared_ptr<Array>* array) {
+ const int16_t* rep_levels,
+ int64_t total_levels_read,
+ std::shared_ptr<Array>* array) {
std::shared_ptr<::arrow::Schema> arrow_schema;
RETURN_NOT_OK(FromParquetSchema(input_->schema(), {input_->column_index()},
- input_->metadata()->key_value_metadata(), &arrow_schema));
+ input_->metadata()->key_value_metadata(),
+ &arrow_schema));
std::shared_ptr<Field> current_field = arrow_schema->field(0);
if (descr_->max_repetition_level() > 0) {
@@ -920,14 +940,18 @@ Status PrimitiveImpl::WrapIntoListArray(const int16_t* def_levels,
// This describes the minimal definition that describes a level that
// reflects a value in the primitive values array.
int16_t values_def_level = descr_->max_definition_level();
- if (nullable[nullable.size() - 1]) { values_def_level--; }
+ if (nullable[nullable.size() - 1]) {
+ values_def_level--;
+ }
// The definition levels that are needed so that a list is declared
// as empty and not null.
std::vector<int16_t> empty_def_level(list_depth);
int def_level = 0;
for (int i = 0; i < list_depth; i++) {
- if (nullable[i]) { def_level++; }
+ if (nullable[i]) {
+ def_level++;
+ }
empty_def_level[i] = def_level;
def_level++;
}
@@ -951,11 +975,15 @@ Status PrimitiveImpl::WrapIntoListArray(const int16_t* def_levels,
break;
} else {
RETURN_NOT_OK(valid_bits_builders[j]->Append(true));
- if (empty_def_level[j] == def_levels[i]) { break; }
+ if (empty_def_level[j] == def_levels[i]) {
+ break;
+ }
}
}
}
- if (def_levels[i] >= values_def_level) { values_offset++; }
+ if (def_levels[i] >= values_def_level) {
+ values_offset++;
+ }
}
// Add the final offset to all lists
for (int64_t j = 0; j < list_depth; j++) {
@@ -1013,18 +1041,20 @@ Status PrimitiveImpl::TypedReadBatch(int batch_size, std::shared_ptr<Array>* out
int64_t values_read;
int64_t levels_read;
if (descr_->max_definition_level() == 0) {
- RETURN_NOT_OK((ReadNonNullableBatch<ArrowType, ParquetType>(
- reader, values_to_read, &values_read)));
+ RETURN_NOT_OK((ReadNonNullableBatch<ArrowType, ParquetType>(reader, values_to_read,
+ &values_read)));
} else {
// As per the defintion and checks for flat (list) columns:
// descr_->max_definition_level() > 0, <= 3
- RETURN_NOT_OK((ReadNullableBatch<ArrowType, ParquetType>(reader,
- def_levels + total_levels_read, rep_levels + total_levels_read, values_to_read,
- &levels_read, &values_read)));
+ RETURN_NOT_OK((ReadNullableBatch<ArrowType, ParquetType>(
+ reader, def_levels + total_levels_read, rep_levels + total_levels_read,
+ values_to_read, &levels_read, &values_read)));
total_levels_read += static_cast<int>(levels_read);
}
values_to_read -= static_cast<int>(values_read);
- if (!column_reader_->HasNext()) { NextRowGroup(); }
+ if (!column_reader_->HasNext()) {
+ NextRowGroup();
+ }
}
// Shrink arrays as they may be larger than the output.
@@ -1039,8 +1069,8 @@ Status PrimitiveImpl::TypedReadBatch(int batch_size, std::shared_ptr<Array>* out
// Relase the ownership as the Buffer is now part of a new Array
valid_bits_buffer_.reset();
} else {
- *out = std::make_shared<ArrayType<ArrowType>>(
- field_->type(), valid_bits_idx_, data_buffer_);
+ *out = std::make_shared<ArrayType<ArrowType>>(field_->type(), valid_bits_idx_,
+ data_buffer_);
}
// Relase the ownership as the Buffer is now part of a new Array
data_buffer_.reset();
@@ -1076,13 +1106,15 @@ Status PrimitiveImpl::TypedReadBatch<::arrow::BooleanType, BooleanType>(
} else {
// As per the defintion and checks for flat columns:
// descr_->max_definition_level() == 1
- RETURN_NOT_OK((ReadNullableBatch<::arrow::BooleanType, BooleanType>(reader,
- def_levels + total_levels_read, rep_levels + total_levels_read, values_to_read,
- &levels_read, &values_read)));
+ RETURN_NOT_OK((ReadNullableBatch<::arrow::BooleanType, BooleanType>(
+ reader, def_levels + total_levels_read, rep_levels + total_levels_read,
+ values_to_read, &levels_read, &values_read)));
total_levels_read += static_cast<int>(levels_read);
}
values_to_read -= static_cast<int>(values_read);
- if (!column_reader_->HasNext()) { NextRowGroup(); }
+ if (!column_reader_->HasNext()) {
+ NextRowGroup();
+ }
}
if (descr_->max_definition_level() > 0) {
@@ -1102,11 +1134,11 @@ Status PrimitiveImpl::TypedReadBatch<::arrow::BooleanType, BooleanType>(
RETURN_NOT_OK(
valid_bits_buffer->Resize(::arrow::BitUtil::CeilByte(valid_bits_idx_) / 8));
memcpy(valid_bits_buffer->mutable_data(), valid_bits_buffer_->data(),
- valid_bits_buffer->size());
+ valid_bits_buffer->size());
valid_bits_buffer_ = valid_bits_buffer;
}
- *out = std::make_shared<BooleanArray>(
- field_->type(), valid_bits_idx_, data_buffer_, valid_bits_buffer_, null_count_);
+ *out = std::make_shared<BooleanArray>(field_->type(), valid_bits_idx_, data_buffer_,
+ valid_bits_buffer_, null_count_);
// Relase the ownership
data_buffer_.reset();
valid_bits_buffer_.reset();
@@ -1141,9 +1173,9 @@ Status PrimitiveImpl::ReadByteArrayBatch(int batch_size, std::shared_ptr<Array>*
int64_t values_read;
int64_t levels_read;
auto values = reinterpret_cast<ByteArray*>(values_buffer_.mutable_data());
- PARQUET_CATCH_NOT_OK(
- levels_read = reader->ReadBatch(values_to_read, def_levels + total_levels_read,
- rep_levels + total_levels_read, values, &values_read));
+ PARQUET_CATCH_NOT_OK(levels_read = reader->ReadBatch(
+ values_to_read, def_levels + total_levels_read,
+ rep_levels + total_levels_read, values, &values_read));
values_to_read -= static_cast<int>(levels_read);
if (descr_->max_definition_level() == 0) {
for (int64_t i = 0; i < levels_read; i++) {
@@ -1161,13 +1193,15 @@ Status PrimitiveImpl::ReadByteArrayBatch(int batch_size, std::shared_ptr<Array>*
} else if (def_levels[i + total_levels_read] == descr_->max_definition_level()) {
RETURN_NOT_OK(
builder.Append(reinterpret_cast<const char*>(values[values_idx].ptr),
- values[values_idx].len));
+ values[values_idx].len));
values_idx++;
}
}
total_levels_read += static_cast<int>(levels_read);
}
- if (!column_reader_->HasNext()) { NextRowGroup(); }
+ if (!column_reader_->HasNext()) {
+ NextRowGroup();
+ }
}
RETURN_NOT_OK(builder.Finish(out));
@@ -1176,8 +1210,8 @@ Status PrimitiveImpl::ReadByteArrayBatch(int batch_size, std::shared_ptr<Array>*
}
template <typename ArrowType>
-Status PrimitiveImpl::ReadFLBABatch(
- int batch_size, int byte_width, std::shared_ptr<Array>* out) {
+Status PrimitiveImpl::ReadFLBABatch(int batch_size, int byte_width,
+ std::shared_ptr<Array>* out) {
using BuilderType = typename ::arrow::TypeTraits<ArrowType>::BuilderType;
int total_levels_read = 0;
if (descr_->max_definition_level() > 0) {
@@ -1197,9 +1231,9 @@ Status PrimitiveImpl::ReadFLBABatch(
int64_t values_read;
int64_t levels_read;
auto values = reinterpret_cast<FLBA*>(values_buffer_.mutable_data());
- PARQUET_CATCH_NOT_OK(
- levels_read = reader->ReadBatch(values_to_read, def_levels + total_levels_read,
- rep_levels + total_levels_read, values, &values_read));
+ PARQUET_CATCH_NOT_OK(levels_read = reader->ReadBatch(
+ values_to_read, def_levels + total_levels_read,
+ rep_levels + total_levels_read, values, &values_read));
values_to_read -= static_cast<int>(levels_read);
if (descr_->max_definition_level() == 0) {
for (int64_t i = 0; i < levels_read; i++) {
@@ -1219,7 +1253,9 @@ Status PrimitiveImpl::ReadFLBABatch(
}
total_levels_read += static_cast<int>(levels_read);
}
- if (!column_reader_->HasNext()) { NextRowGroup(); }
+ if (!column_reader_->HasNext()) {
+ NextRowGroup();
+ }
}
RETURN_NOT_OK(builder.Finish(out));
@@ -1304,9 +1340,7 @@ Status PrimitiveImpl::NextBatch(int batch_size, std::shared_ptr<Array>* out) {
}
}
-void PrimitiveImpl::NextRowGroup() {
- column_reader_ = input_->Next();
-}
+void PrimitiveImpl::NextRowGroup() { column_reader_ = input_->Next(); }
Status PrimitiveImpl::GetDefLevels(ValueLevelsPtr* data, size_t* length) {
*data = reinterpret_cast<ValueLevelsPtr>(def_levels_buffer_.data());
@@ -1330,8 +1364,8 @@ Status ColumnReader::NextBatch(int batch_size, std::shared_ptr<Array>* out) {
// StructImpl methods
-Status StructImpl::DefLevelsToNullArray(
- std::shared_ptr<MutableBuffer>* null_bitmap_out, int64_t* null_count_out) {
+Status StructImpl::DefLevelsToNullArray(std::shared_ptr<MutableBuffer>* null_bitmap_out,
+ int64_t* null_count_out) {
std::shared_ptr<MutableBuffer> null_bitmap;
auto null_count = 0;
ValueLevelsPtr def_levels_data;
@@ -1387,7 +1421,7 @@ Status StructImpl::GetDefLevels(ValueLevelsPtr* data, size_t* length) {
// Check that value is either uninitialized, or current
// and previous children def levels agree on the struct level
DCHECK((result_levels[i] == -1) || ((result_levels[i] >= struct_def_level_) ==
- (child_def_levels[i] >= struct_def_level_)));
+ (child_def_levels[i] >= struct_def_level_)));
result_levels[i] =
std::max(result_levels[i], std::min(child_def_levels[i], struct_def_level_));
}
@@ -1397,8 +1431,8 @@ Status StructImpl::GetDefLevels(ValueLevelsPtr* data, size_t* length) {
return Status::OK();
}
-void StructImpl::InitField(
- const NodePtr& node, const std::vector<std::shared_ptr<Impl>>& children) {
+void StructImpl::InitField(const NodePtr& node,
+ const std::vector<std::shared_ptr<Impl>>& children) {
// Make a shallow node to field conversion from the children fields
std::vector<std::shared_ptr<::arrow::Field>> fields(children.size());
for (size_t i = 0; i < children.size(); i++) {
@@ -1428,8 +1462,8 @@ Status StructImpl::NextBatch(int batch_size, std::shared_ptr<Array>* out) {
RETURN_NOT_OK(DefLevelsToNullArray(&null_bitmap, &null_count));
- *out = std::make_shared<StructArray>(
- field()->type(), batch_size, children_arrays, null_bitmap, null_count);
+ *out = std::make_shared<StructArray>(field()->type(), batch_size, children_arrays,
+ null_bitmap, null_count);
return Status::OK();
}
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/arrow/reader.h
----------------------------------------------------------------------
diff --git a/src/parquet/arrow/reader.h b/src/parquet/arrow/reader.h
index 8d9aeb5..f9688fb 100644
--- a/src/parquet/arrow/reader.h
+++ b/src/parquet/arrow/reader.h
@@ -130,19 +130,19 @@ class PARQUET_EXPORT FileReader {
// i=1 indices={3} will read foo2 column
// i=1 indices={2} will result in out=nullptr
// leaf indices which are unrelated to the schema field are ignored
- ::arrow::Status ReadSchemaField(
- int i, const std::vector<int>& indices, std::shared_ptr<::arrow::Array>* out);
+ ::arrow::Status ReadSchemaField(int i, const std::vector<int>& indices,
+ std::shared_ptr<::arrow::Array>* out);
// Read a table of columns into a Table
::arrow::Status ReadTable(std::shared_ptr<::arrow::Table>* out);
// Read a table of columns into a Table. Read only the indicated column
// indices (relative to the schema)
- ::arrow::Status ReadTable(
- const std::vector<int>& column_indices, std::shared_ptr<::arrow::Table>* out);
+ ::arrow::Status ReadTable(const std::vector<int>& column_indices,
+ std::shared_ptr<::arrow::Table>* out);
::arrow::Status ReadRowGroup(int i, const std::vector<int>& column_indices,
- std::shared_ptr<::arrow::Table>* out);
+ std::shared_ptr<::arrow::Table>* out);
::arrow::Status ReadRowGroup(int i, std::shared_ptr<::arrow::Table>* out);
@@ -198,12 +198,15 @@ class PARQUET_EXPORT ColumnReader {
// metadata : separately-computed file metadata, can be nullptr
PARQUET_EXPORT
::arrow::Status OpenFile(const std::shared_ptr<::arrow::io::ReadableFileInterface>& file,
- ::arrow::MemoryPool* allocator, const ReaderProperties& properties,
- const std::shared_ptr<FileMetaData>& metadata, std::unique_ptr<FileReader>* reader);
+ ::arrow::MemoryPool* allocator,
+ const ReaderProperties& properties,
+ const std::shared_ptr<FileMetaData>& metadata,
+ std::unique_ptr<FileReader>* reader);
PARQUET_EXPORT
::arrow::Status OpenFile(const std::shared_ptr<::arrow::io::ReadableFileInterface>& file,
- ::arrow::MemoryPool* allocator, std::unique_ptr<FileReader>* reader);
+ ::arrow::MemoryPool* allocator,
+ std::unique_ptr<FileReader>* reader);
} // namespace arrow
} // namespace parquet
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/arrow/schema.cc
----------------------------------------------------------------------
diff --git a/src/parquet/arrow/schema.cc b/src/parquet/arrow/schema.cc
index d14ee4f..b0cde36 100644
--- a/src/parquet/arrow/schema.cc
+++ b/src/parquet/arrow/schema.cc
@@ -202,21 +202,25 @@ Status FromPrimitive(const PrimitiveNode* primitive, TypePtr* out) {
// Forward declaration
Status NodeToFieldInternal(const NodePtr& node,
- const std::unordered_set<NodePtr>* included_leaf_nodes, std::shared_ptr<Field>* out);
+ const std::unordered_set<NodePtr>* included_leaf_nodes,
+ std::shared_ptr<Field>* out);
/*
* Auxilary function to test if a parquet schema node is a leaf node
* that should be included in a resulting arrow schema
*/
-inline bool IsIncludedLeaf(
- const NodePtr& node, const std::unordered_set<NodePtr>* included_leaf_nodes) {
- if (included_leaf_nodes == nullptr) { return true; }
+inline bool IsIncludedLeaf(const NodePtr& node,
+ const std::unordered_set<NodePtr>* included_leaf_nodes) {
+ if (included_leaf_nodes == nullptr) {
+ return true;
+ }
auto search = included_leaf_nodes->find(node);
return (search != included_leaf_nodes->end());
}
Status StructFromGroup(const GroupNode* group,
- const std::unordered_set<NodePtr>* included_leaf_nodes, TypePtr* out) {
+ const std::unordered_set<NodePtr>* included_leaf_nodes,
+ TypePtr* out) {
std::vector<std::shared_ptr<Field>> fields;
std::shared_ptr<Field> field;
@@ -224,14 +228,18 @@ Status StructFromGroup(const GroupNode* group,
for (int i = 0; i < group->field_count(); i++) {
RETURN_NOT_OK(NodeToFieldInternal(group->field(i), included_leaf_nodes, &field));
- if (field != nullptr) { fields.push_back(field); }
+ if (field != nullptr) {
+ fields.push_back(field);
+ }
+ }
+ if (fields.size() > 0) {
+ *out = std::make_shared<::arrow::StructType>(fields);
}
- if (fields.size() > 0) { *out = std::make_shared<::arrow::StructType>(fields); }
return Status::OK();
}
Status NodeToList(const GroupNode* group,
- const std::unordered_set<NodePtr>* included_leaf_nodes, TypePtr* out) {
+ const std::unordered_set<NodePtr>* included_leaf_nodes, TypePtr* out) {
*out = nullptr;
if (group->field_count() == 1) {
// This attempts to resolve the preferred 3-level list encoding.
@@ -247,7 +255,9 @@ Status NodeToList(const GroupNode* group,
RETURN_NOT_OK(
NodeToFieldInternal(list_group->field(0), included_leaf_nodes, &item_field));
- if (item_field != nullptr) { *out = ::arrow::list(item_field); }
+ if (item_field != nullptr) {
+ *out = ::arrow::list(item_field);
+ }
} else {
// List of struct
std::shared_ptr<::arrow::DataType> inner_type;
@@ -283,7 +293,8 @@ Status NodeToField(const NodePtr& node, std::shared_ptr<Field>* out) {
}
Status NodeToFieldInternal(const NodePtr& node,
- const std::unordered_set<NodePtr>* included_leaf_nodes, std::shared_ptr<Field>* out) {
+ const std::unordered_set<NodePtr>* included_leaf_nodes,
+ std::shared_ptr<Field>* out) {
std::shared_ptr<::arrow::DataType> type = nullptr;
bool nullable = !node->is_required();
@@ -318,11 +329,14 @@ Status NodeToFieldInternal(const NodePtr& node,
RETURN_NOT_OK(FromPrimitive(primitive, &type));
}
}
- if (type != nullptr) { *out = std::make_shared<Field>(node->name(), type, nullable); }
+ if (type != nullptr) {
+ *out = std::make_shared<Field>(node->name(), type, nullable);
+ }
return Status::OK();
}
-Status FromParquetSchema(const SchemaDescriptor* parquet_schema,
+Status FromParquetSchema(
+ const SchemaDescriptor* parquet_schema,
const std::shared_ptr<const KeyValueMetadata>& key_value_metadata,
std::shared_ptr<::arrow::Schema>* out) {
const GroupNode* schema_node = parquet_schema->group_node();
@@ -337,8 +351,8 @@ Status FromParquetSchema(const SchemaDescriptor* parquet_schema,
return Status::OK();
}
-Status FromParquetSchema(const SchemaDescriptor* parquet_schema,
- const std::vector<int>& column_indices,
+Status FromParquetSchema(
+ const SchemaDescriptor* parquet_schema, const std::vector<int>& column_indices,
const std::shared_ptr<const KeyValueMetadata>& key_value_metadata,
std::shared_ptr<::arrow::Schema>* out) {
// TODO(wesm): Consider adding an arrow::Schema name attribute, which comes
@@ -356,14 +370,18 @@ Status FromParquetSchema(const SchemaDescriptor* parquet_schema,
included_leaf_nodes.insert(column_desc->schema_node());
auto column_root = parquet_schema->GetColumnRoot(column_indices[i]);
auto insertion = top_nodes.insert(column_root);
- if (insertion.second) { base_nodes.push_back(column_root); }
+ if (insertion.second) {
+ base_nodes.push_back(column_root);
+ }
}
std::vector<std::shared_ptr<Field>> fields;
std::shared_ptr<Field> field;
for (auto node : base_nodes) {
RETURN_NOT_OK(NodeToFieldInternal(node, &included_leaf_nodes, &field));
- if (field != nullptr) { fields.push_back(field); }
+ if (field != nullptr) {
+ fields.push_back(field);
+ }
}
*out = std::make_shared<::arrow::Schema>(fields, key_value_metadata);
@@ -371,18 +389,19 @@ Status FromParquetSchema(const SchemaDescriptor* parquet_schema,
}
Status FromParquetSchema(const SchemaDescriptor* parquet_schema,
- const std::vector<int>& column_indices, std::shared_ptr<::arrow::Schema>* out) {
+ const std::vector<int>& column_indices,
+ std::shared_ptr<::arrow::Schema>* out) {
return FromParquetSchema(parquet_schema, column_indices, nullptr, out);
}
-Status FromParquetSchema(
- const SchemaDescriptor* parquet_schema, std::shared_ptr<::arrow::Schema>* out) {
+Status FromParquetSchema(const SchemaDescriptor* parquet_schema,
+ std::shared_ptr<::arrow::Schema>* out) {
return FromParquetSchema(parquet_schema, nullptr, out);
}
Status ListToNode(const std::shared_ptr<::arrow::ListType>& type, const std::string& name,
- bool nullable, bool support_int96_nanoseconds, const WriterProperties& properties,
- NodePtr* out) {
+ bool nullable, bool support_int96_nanoseconds,
+ const WriterProperties& properties, NodePtr* out) {
Repetition::type repetition = nullable ? Repetition::OPTIONAL : Repetition::REQUIRED;
NodePtr element;
@@ -395,8 +414,9 @@ Status ListToNode(const std::shared_ptr<::arrow::ListType>& type, const std::str
}
Status StructToNode(const std::shared_ptr<::arrow::StructType>& type,
- const std::string& name, bool nullable, bool support_int96_nanoseconds,
- const WriterProperties& properties, NodePtr* out) {
+ const std::string& name, bool nullable,
+ bool support_int96_nanoseconds, const WriterProperties& properties,
+ NodePtr* out) {
Repetition::type repetition = nullable ? Repetition::OPTIONAL : Repetition::REQUIRED;
std::vector<NodePtr> children(type->num_children());
@@ -410,7 +430,8 @@ Status StructToNode(const std::shared_ptr<::arrow::StructType>& type,
}
Status FieldToNode(const std::shared_ptr<Field>& field,
- const WriterProperties& properties, NodePtr* out, bool support_int96_nanoseconds) {
+ const WriterProperties& properties, NodePtr* out,
+ bool support_int96_nanoseconds) {
LogicalType::type logical_type = LogicalType::NONE;
ParquetType::type type;
Repetition::type repetition =
@@ -524,12 +545,12 @@ Status FieldToNode(const std::shared_ptr<Field>& field,
case ArrowType::STRUCT: {
auto struct_type = std::static_pointer_cast<::arrow::StructType>(field->type());
return StructToNode(struct_type, field->name(), field->nullable(),
- support_int96_nanoseconds, properties, out);
+ support_int96_nanoseconds, properties, out);
} break;
case ArrowType::LIST: {
auto list_type = std::static_pointer_cast<::arrow::ListType>(field->type());
return ListToNode(list_type, field->name(), field->nullable(),
- support_int96_nanoseconds, properties, out);
+ support_int96_nanoseconds, properties, out);
} break;
default:
// TODO: LIST, DENSE_UNION, SPARE_UNION, JSON_SCALAR, DECIMAL, DECIMAL_TEXT, VARCHAR
@@ -540,12 +561,13 @@ Status FieldToNode(const std::shared_ptr<Field>& field,
}
Status ToParquetSchema(const ::arrow::Schema* arrow_schema,
- const WriterProperties& properties, std::shared_ptr<SchemaDescriptor>* out,
- bool support_int96_nanoseconds) {
+ const WriterProperties& properties,
+ std::shared_ptr<SchemaDescriptor>* out,
+ bool support_int96_nanoseconds) {
std::vector<NodePtr> nodes(arrow_schema->num_fields());
for (int i = 0; i < arrow_schema->num_fields(); i++) {
- RETURN_NOT_OK(FieldToNode(
- arrow_schema->field(i), properties, &nodes[i], support_int96_nanoseconds));
+ RETURN_NOT_OK(FieldToNode(arrow_schema->field(i), properties, &nodes[i],
+ support_int96_nanoseconds));
}
NodePtr schema = GroupNode::Make("schema", Repetition::REQUIRED, nodes);
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/arrow/schema.h
----------------------------------------------------------------------
diff --git a/src/parquet/arrow/schema.h b/src/parquet/arrow/schema.h
index d4f5ea3..7d1f27e 100644
--- a/src/parquet/arrow/schema.h
+++ b/src/parquet/arrow/schema.h
@@ -36,8 +36,8 @@ namespace parquet {
namespace arrow {
-::arrow::Status PARQUET_EXPORT NodeToField(
- const schema::NodePtr& node, std::shared_ptr<::arrow::Field>* out);
+::arrow::Status PARQUET_EXPORT NodeToField(const schema::NodePtr& node,
+ std::shared_ptr<::arrow::Field>* out);
/// Convert parquet schema to arrow schema with selected indices
/// \param parquet_schema to be converted
@@ -47,31 +47,35 @@ namespace arrow {
/// \param key_value_metadata optional metadata, can be nullptr
/// \param out the corresponding arrow schema
/// \return Status::OK() on a successful conversion.
-::arrow::Status PARQUET_EXPORT FromParquetSchema(const SchemaDescriptor* parquet_schema,
- const std::vector<int>& column_indices,
+::arrow::Status PARQUET_EXPORT FromParquetSchema(
+ const SchemaDescriptor* parquet_schema, const std::vector<int>& column_indices,
const std::shared_ptr<const KeyValueMetadata>& key_value_metadata,
std::shared_ptr<::arrow::Schema>* out);
// Without indices
-::arrow::Status PARQUET_EXPORT FromParquetSchema(const SchemaDescriptor* parquet_schema,
- const std::shared_ptr<const KeyValueMetadata>& key_value_metadata,
- std::shared_ptr<::arrow::Schema>* out);
+::arrow::Status PARQUET_EXPORT
+FromParquetSchema(const SchemaDescriptor* parquet_schema,
+ const std::shared_ptr<const KeyValueMetadata>& key_value_metadata,
+ std::shared_ptr<::arrow::Schema>* out);
// Without metadata
::arrow::Status PARQUET_EXPORT FromParquetSchema(const SchemaDescriptor* parquet_schema,
- const std::vector<int>& column_indices, std::shared_ptr<::arrow::Schema>* out);
+ const std::vector<int>& column_indices,
+ std::shared_ptr<::arrow::Schema>* out);
// Without metadata or indices
-::arrow::Status PARQUET_EXPORT FromParquetSchema(
- const SchemaDescriptor* parquet_schema, std::shared_ptr<::arrow::Schema>* out);
+::arrow::Status PARQUET_EXPORT FromParquetSchema(const SchemaDescriptor* parquet_schema,
+ std::shared_ptr<::arrow::Schema>* out);
::arrow::Status PARQUET_EXPORT FieldToNode(const std::shared_ptr<::arrow::Field>& field,
- const WriterProperties& properties, schema::NodePtr* out,
- bool support_int96_nanoseconds = false);
+ const WriterProperties& properties,
+ schema::NodePtr* out,
+ bool support_int96_nanoseconds = false);
::arrow::Status PARQUET_EXPORT ToParquetSchema(const ::arrow::Schema* arrow_schema,
- const WriterProperties& properties, std::shared_ptr<SchemaDescriptor>* out,
- bool support_int96_nanoseconds = false);
+ const WriterProperties& properties,
+ std::shared_ptr<SchemaDescriptor>* out,
+ bool support_int96_nanoseconds = false);
} // namespace arrow
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/arrow/test-util.h
----------------------------------------------------------------------
diff --git a/src/parquet/arrow/test-util.h b/src/parquet/arrow/test-util.h
index 946afad..5980199 100644
--- a/src/parquet/arrow/test-util.h
+++ b/src/parquet/arrow/test-util.h
@@ -67,8 +67,8 @@ NonNullArray(size_t size, std::shared_ptr<Array>* out) {
::arrow::test::randint<typename ArrowType::c_type>(size, 0, 64, &values);
// Passing data type so this will work with TimestampType too
- ::arrow::NumericBuilder<ArrowType> builder(
- ::arrow::default_memory_pool(), std::make_shared<ArrowType>());
+ ::arrow::NumericBuilder<ArrowType> builder(::arrow::default_memory_pool(),
+ std::make_shared<ArrowType>());
RETURN_NOT_OK(builder.Append(values.data(), values.size()));
return builder.Finish(out);
}
@@ -83,8 +83,8 @@ typename std::enable_if<is_arrow_date<ArrowType>::value, Status>::type NonNullAr
}
// Passing data type so this will work with TimestampType too
- ::arrow::NumericBuilder<ArrowType> builder(
- ::arrow::default_memory_pool(), std::make_shared<ArrowType>());
+ ::arrow::NumericBuilder<ArrowType> builder(::arrow::default_memory_pool(),
+ std::make_shared<ArrowType>());
builder.Append(values.data(), values.size());
return builder.Finish(out);
}
@@ -129,8 +129,8 @@ template <typename ArrowType>
typename std::enable_if<is_arrow_float<ArrowType>::value, Status>::type NullableArray(
size_t size, size_t num_nulls, uint32_t seed, std::shared_ptr<Array>* out) {
std::vector<typename ArrowType::c_type> values;
- ::arrow::test::random_real<typename ArrowType::c_type>(
- size, seed, -1e10, 1e10, &values);
+ ::arrow::test::random_real<typename ArrowType::c_type>(size, seed, -1e10, 1e10,
+ &values);
std::vector<uint8_t> valid_bytes(size, 1);
for (size_t i = 0; i < num_nulls; i++) {
@@ -159,8 +159,8 @@ NullableArray(size_t size, size_t num_nulls, uint32_t seed, std::shared_ptr<Arra
}
// Passing data type so this will work with TimestampType too
- ::arrow::NumericBuilder<ArrowType> builder(
- ::arrow::default_memory_pool(), std::make_shared<ArrowType>());
+ ::arrow::NumericBuilder<ArrowType> builder(::arrow::default_memory_pool(),
+ std::make_shared<ArrowType>());
RETURN_NOT_OK(builder.Append(values.data(), values.size(), valid_bytes.data()));
return builder.Finish(out);
}
@@ -183,8 +183,8 @@ typename std::enable_if<is_arrow_date<ArrowType>::value, Status>::type NullableA
}
// Passing data type so this will work with TimestampType too
- ::arrow::NumericBuilder<ArrowType> builder(
- ::arrow::default_memory_pool(), std::make_shared<ArrowType>());
+ ::arrow::NumericBuilder<ArrowType> builder(::arrow::default_memory_pool(),
+ std::make_shared<ArrowType>());
builder.Append(values.data(), values.size(), valid_bytes.data());
return builder.Finish(out);
}
@@ -193,8 +193,8 @@ typename std::enable_if<is_arrow_date<ArrowType>::value, Status>::type NullableA
template <typename ArrowType>
typename std::enable_if<
is_arrow_string<ArrowType>::value || is_arrow_binary<ArrowType>::value, Status>::type
-NullableArray(
- size_t size, size_t num_nulls, uint32_t seed, std::shared_ptr<::arrow::Array>* out) {
+NullableArray(size_t size, size_t num_nulls, uint32_t seed,
+ std::shared_ptr<::arrow::Array>* out) {
std::vector<uint8_t> valid_bytes(size, 1);
for (size_t i = 0; i < num_nulls; i++) {
@@ -221,8 +221,8 @@ NullableArray(
// same as NullableArray<String|Binary>(..)
template <typename ArrowType>
typename std::enable_if<is_arrow_fixed_size_binary<ArrowType>::value, Status>::type
-NullableArray(
- size_t size, size_t num_nulls, uint32_t seed, std::shared_ptr<::arrow::Array>* out) {
+NullableArray(size_t size, size_t num_nulls, uint32_t seed,
+ std::shared_ptr<::arrow::Array>* out) {
std::vector<uint8_t> valid_bytes(size, 1);
for (size_t i = 0; i < num_nulls; i++) {
@@ -231,8 +231,8 @@ NullableArray(
using BuilderType = typename ::arrow::TypeTraits<ArrowType>::BuilderType;
const int byte_width = 10;
- BuilderType builder(
- ::arrow::default_memory_pool(), ::arrow::fixed_size_binary(byte_width));
+ BuilderType builder(::arrow::default_memory_pool(),
+ ::arrow::fixed_size_binary(byte_width));
const int kBufferSize = byte_width;
uint8_t buffer[kBufferSize];
@@ -272,7 +272,8 @@ typename std::enable_if<is_arrow_bool<ArrowType>::value, Status>::type NullableA
///
/// This helper function only supports (size/2) nulls.
Status MakeListArray(const std::shared_ptr<Array>& values, int64_t size,
- int64_t null_count, bool nullable_values, std::shared_ptr<::arrow::ListArray>* out) {
+ int64_t null_count, bool nullable_values,
+ std::shared_ptr<::arrow::ListArray>* out) {
// We always include an empty list
int64_t non_null_entries = size - null_count - 1;
int64_t length_per_entry = values->length() / non_null_entries;
@@ -294,33 +295,37 @@ Status MakeListArray(const std::shared_ptr<Array>& values, int64_t size,
if (!(((i % 2) == 0) && ((i / 2) < null_count))) {
// Non-null list (list with index 1 is always empty).
::arrow::BitUtil::SetBit(null_bitmap_ptr, i);
- if (i != 1) { current_offset += static_cast<int32_t>(length_per_entry); }
+ if (i != 1) {
+ current_offset += static_cast<int32_t>(length_per_entry);
+ }
}
}
offsets_ptr[size] = static_cast<int32_t>(values->length());
auto value_field =
std::make_shared<::arrow::Field>("item", values->type(), nullable_values);
- *out = std::make_shared<::arrow::ListArray>(
- ::arrow::list(value_field), size, offsets, values, null_bitmap, null_count);
+ *out = std::make_shared<::arrow::ListArray>(::arrow::list(value_field), size, offsets,
+ values, null_bitmap, null_count);
return Status::OK();
}
-static std::shared_ptr<::arrow::Column> MakeColumn(
- const std::string& name, const std::shared_ptr<Array>& array, bool nullable) {
+static std::shared_ptr<::arrow::Column> MakeColumn(const std::string& name,
+ const std::shared_ptr<Array>& array,
+ bool nullable) {
auto field = std::make_shared<::arrow::Field>(name, array->type(), nullable);
return std::make_shared<::arrow::Column>(field, array);
}
-static std::shared_ptr<::arrow::Column> MakeColumn(const std::string& name,
- const std::vector<std::shared_ptr<Array>>& arrays, bool nullable) {
+static std::shared_ptr<::arrow::Column> MakeColumn(
+ const std::string& name, const std::vector<std::shared_ptr<Array>>& arrays,
+ bool nullable) {
auto field = std::make_shared<::arrow::Field>(name, arrays[0]->type(), nullable);
return std::make_shared<::arrow::Column>(field, arrays);
}
-std::shared_ptr<::arrow::Table> MakeSimpleTable(
- const std::shared_ptr<Array>& values, bool nullable) {
+std::shared_ptr<::arrow::Table> MakeSimpleTable(const std::shared_ptr<Array>& values,
+ bool nullable) {
std::shared_ptr<::arrow::Column> column = MakeColumn("col", values, nullable);
std::vector<std::shared_ptr<::arrow::Column>> columns({column});
std::vector<std::shared_ptr<::arrow::Field>> fields({column->field()});
@@ -341,15 +346,15 @@ void ExpectArrayT(void* expected, Array* result) {
::arrow::PrimitiveArray* p_array = static_cast<::arrow::PrimitiveArray*>(result);
for (int64_t i = 0; i < result->length(); i++) {
EXPECT_EQ(reinterpret_cast<typename ArrowType::c_type*>(expected)[i],
- reinterpret_cast<const typename ArrowType::c_type*>(
- p_array->values()->data())[i]);
+ reinterpret_cast<const typename ArrowType::c_type*>(
+ p_array->values()->data())[i]);
}
}
template <>
void ExpectArrayT<::arrow::BooleanType>(void* expected, Array* result) {
- ::arrow::BooleanBuilder builder(
- ::arrow::default_memory_pool(), std::make_shared<::arrow::BooleanType>());
+ ::arrow::BooleanBuilder builder(::arrow::default_memory_pool(),
+ std::make_shared<::arrow::BooleanType>());
EXPECT_OK(builder.Append(reinterpret_cast<uint8_t*>(expected), result->length()));
std::shared_ptr<Array> expected_array;
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/arrow/writer.cc
----------------------------------------------------------------------
diff --git a/src/parquet/arrow/writer.cc b/src/parquet/arrow/writer.cc
index c562b27..41c1146 100644
--- a/src/parquet/arrow/writer.cc
+++ b/src/parquet/arrow/writer.cc
@@ -104,9 +104,11 @@ class LevelBuilder {
NOT_IMPLEMENTED_VISIT(Interval)
Status GenerateLevels(const Array& array, const std::shared_ptr<Field>& field,
- int64_t* values_offset, ::arrow::Type::type* values_type, int64_t* num_values,
- int64_t* num_levels, std::shared_ptr<Buffer>* def_levels,
- std::shared_ptr<Buffer>* rep_levels, std::shared_ptr<Array>* values_array) {
+ int64_t* values_offset, ::arrow::Type::type* values_type,
+ int64_t* num_values, int64_t* num_levels,
+ std::shared_ptr<Buffer>* def_levels,
+ std::shared_ptr<Buffer>* rep_levels,
+ std::shared_ptr<Array>* values_array) {
// Work downwards to extract bitmaps and offsets
min_offset_idx_ = 0;
max_offset_idx_ = static_cast<int32_t>(array.length());
@@ -192,17 +194,21 @@ class LevelBuilder {
int32_t inner_offset = offsets_[rep_level][index];
int32_t inner_length = offsets_[rep_level][index + 1] - inner_offset;
int64_t recursion_level = rep_level + 1;
- if (inner_length == 0) { return def_levels_.Append(def_level); }
+ if (inner_length == 0) {
+ return def_levels_.Append(def_level);
+ }
if (recursion_level < static_cast<int64_t>(offsets_.size())) {
return HandleListEntries(def_level + 1, rep_level + 1, inner_offset, inner_length);
} else {
// We have reached the leaf: primitive list, handle remaining nullables
for (int64_t i = 0; i < inner_length; i++) {
- if (i > 0) { RETURN_NOT_OK(rep_levels_.Append(rep_level + 1)); }
+ if (i > 0) {
+ RETURN_NOT_OK(rep_levels_.Append(rep_level + 1));
+ }
if (nullable_[recursion_level] &&
((null_counts_[recursion_level] == 0) ||
- BitUtil::GetBit(valid_bitmaps_[recursion_level],
- inner_offset + i + array_offsets_[recursion_level]))) {
+ BitUtil::GetBit(valid_bitmaps_[recursion_level],
+ inner_offset + i + array_offsets_[recursion_level]))) {
RETURN_NOT_OK(def_levels_.Append(def_level + 2));
} else {
// This can be produced in two case:
@@ -216,10 +222,12 @@ class LevelBuilder {
}
}
- Status HandleListEntries(
- int16_t def_level, int16_t rep_level, int64_t offset, int64_t length) {
+ Status HandleListEntries(int16_t def_level, int16_t rep_level, int64_t offset,
+ int64_t length) {
for (int64_t i = 0; i < length; i++) {
- if (i > 0) { RETURN_NOT_OK(rep_levels_.Append(rep_level)); }
+ if (i > 0) {
+ RETURN_NOT_OK(rep_levels_.Append(rep_level));
+ }
RETURN_NOT_OK(HandleList(def_level, rep_level, offset + i));
}
return Status::OK();
@@ -249,28 +257,32 @@ Status LevelBuilder::VisitInline(const Array& array) {
class FileWriter::Impl {
public:
Impl(MemoryPool* pool, std::unique_ptr<ParquetFileWriter> writer,
- const std::shared_ptr<ArrowWriterProperties>& arrow_properties);
+ const std::shared_ptr<ArrowWriterProperties>& arrow_properties);
Status NewRowGroup(int64_t chunk_size);
template <typename ParquetType, typename ArrowType>
Status TypedWriteBatch(ColumnWriter* writer, const std::shared_ptr<Array>& data,
- int64_t num_levels, const int16_t* def_levels, const int16_t* rep_levels);
+ int64_t num_levels, const int16_t* def_levels,
+ const int16_t* rep_levels);
Status TypedWriteBatchConvertedNanos(ColumnWriter* writer,
- const std::shared_ptr<Array>& data, int64_t num_levels, const int16_t* def_levels,
- const int16_t* rep_levels);
+ const std::shared_ptr<Array>& data,
+ int64_t num_levels, const int16_t* def_levels,
+ const int16_t* rep_levels);
template <typename ParquetType, typename ArrowType>
Status WriteNonNullableBatch(TypedColumnWriter<ParquetType>* writer,
- const ArrowType& type, int64_t num_values, int64_t num_levels,
- const int16_t* def_levels, const int16_t* rep_levels,
- const typename ArrowType::c_type* data_ptr);
+ const ArrowType& type, int64_t num_values,
+ int64_t num_levels, const int16_t* def_levels,
+ const int16_t* rep_levels,
+ const typename ArrowType::c_type* data_ptr);
template <typename ParquetType, typename ArrowType>
Status WriteNullableBatch(TypedColumnWriter<ParquetType>* writer, const ArrowType& type,
- int64_t num_values, int64_t num_levels, const int16_t* def_levels,
- const int16_t* rep_levels, const uint8_t* valid_bits, int64_t valid_bits_offset,
- const typename ArrowType::c_type* data_ptr);
+ int64_t num_values, int64_t num_levels,
+ const int16_t* def_levels, const int16_t* rep_levels,
+ const uint8_t* valid_bits, int64_t valid_bits_offset,
+ const typename ArrowType::c_type* data_ptr);
Status WriteColumnChunk(const Array& data);
Status Close();
@@ -290,7 +302,7 @@ class FileWriter::Impl {
};
FileWriter::Impl::Impl(MemoryPool* pool, std::unique_ptr<ParquetFileWriter> writer,
- const std::shared_ptr<ArrowWriterProperties>& arrow_properties)
+ const std::shared_ptr<ArrowWriterProperties>& arrow_properties)
: pool_(pool),
data_buffer_(pool),
writer_(std::move(writer)),
@@ -298,15 +310,18 @@ FileWriter::Impl::Impl(MemoryPool* pool, std::unique_ptr<ParquetFileWriter> writ
arrow_properties_(arrow_properties) {}
Status FileWriter::Impl::NewRowGroup(int64_t chunk_size) {
- if (row_group_writer_ != nullptr) { PARQUET_CATCH_NOT_OK(row_group_writer_->Close()); }
+ if (row_group_writer_ != nullptr) {
+ PARQUET_CATCH_NOT_OK(row_group_writer_->Close());
+ }
PARQUET_CATCH_NOT_OK(row_group_writer_ = writer_->AppendRowGroup(chunk_size));
return Status::OK();
}
template <typename ParquetType, typename ArrowType>
Status FileWriter::Impl::TypedWriteBatch(ColumnWriter* column_writer,
- const std::shared_ptr<Array>& array, int64_t num_levels, const int16_t* def_levels,
- const int16_t* rep_levels) {
+ const std::shared_ptr<Array>& array,
+ int64_t num_levels, const int16_t* def_levels,
+ const int16_t* rep_levels) {
using ArrowCType = typename ArrowType::c_type;
auto data = static_cast<const PrimitiveArray*>(array.get());
@@ -315,13 +330,13 @@ Status FileWriter::Impl::TypedWriteBatch(ColumnWriter* column_writer,
if (writer->descr()->schema_node()->is_required() || (data->null_count() == 0)) {
// no nulls, just dump the data
- RETURN_NOT_OK((WriteNonNullableBatch<ParquetType, ArrowType>(writer,
- static_cast<const ArrowType&>(*array->type()), array->length(), num_levels,
- def_levels, rep_levels, data_ptr + data->offset())));
+ RETURN_NOT_OK((WriteNonNullableBatch<ParquetType, ArrowType>(
+ writer, static_cast<const ArrowType&>(*array->type()), array->length(),
+ num_levels, def_levels, rep_levels, data_ptr + data->offset())));
} else {
const uint8_t* valid_bits = data->null_bitmap_data();
- RETURN_NOT_OK((WriteNullableBatch<ParquetType, ArrowType>(writer,
- static_cast<const ArrowType&>(*array->type()), data->length(), num_levels,
+ RETURN_NOT_OK((WriteNullableBatch<ParquetType, ArrowType>(
+ writer, static_cast<const ArrowType&>(*array->type()), data->length(), num_levels,
def_levels, rep_levels, valid_bits, data->offset(), data_ptr + data->offset())));
}
PARQUET_CATCH_NOT_OK(writer->Close());
@@ -329,9 +344,9 @@ Status FileWriter::Impl::TypedWriteBatch(ColumnWriter* column_writer,
}
template <typename ParquetType, typename ArrowType>
-Status FileWriter::Impl::WriteNonNullableBatch(TypedColumnWriter<ParquetType>* writer,
- const ArrowType& type, int64_t num_values, int64_t num_levels,
- const int16_t* def_levels, const int16_t* rep_levels,
+Status FileWriter::Impl::WriteNonNullableBatch(
+ TypedColumnWriter<ParquetType>* writer, const ArrowType& type, int64_t num_values,
+ int64_t num_levels, const int16_t* def_levels, const int16_t* rep_levels,
const typename ArrowType::c_type* data_ptr) {
using ParquetCType = typename ParquetType::c_type;
RETURN_NOT_OK(data_buffer_.Resize(num_values * sizeof(ParquetCType)));
@@ -416,9 +431,12 @@ NONNULLABLE_BATCH_FAST_PATH(DoubleType, ::arrow::DoubleType, double)
template <typename ParquetType, typename ArrowType>
Status FileWriter::Impl::WriteNullableBatch(TypedColumnWriter<ParquetType>* writer,
- const ArrowType& type, int64_t num_values, int64_t num_levels,
- const int16_t* def_levels, const int16_t* rep_levels, const uint8_t* valid_bits,
- int64_t valid_bits_offset, const typename ArrowType::c_type* data_ptr) {
+ const ArrowType& type, int64_t num_values,
+ int64_t num_levels, const int16_t* def_levels,
+ const int16_t* rep_levels,
+ const uint8_t* valid_bits,
+ int64_t valid_bits_offset,
+ const typename ArrowType::c_type* data_ptr) {
using ParquetCType = typename ParquetType::c_type;
RETURN_NOT_OK(data_buffer_.Resize(num_values * sizeof(ParquetCType)));
@@ -535,9 +553,9 @@ NULLABLE_BATCH_FAST_PATH(Int64Type, ::arrow::Time64Type, int64_t)
NULLABLE_BATCH_FAST_PATH(FloatType, ::arrow::FloatType, float)
NULLABLE_BATCH_FAST_PATH(DoubleType, ::arrow::DoubleType, double)
-Status FileWriter::Impl::TypedWriteBatchConvertedNanos(ColumnWriter* column_writer,
- const std::shared_ptr<Array>& array, int64_t num_levels, const int16_t* def_levels,
- const int16_t* rep_levels) {
+Status FileWriter::Impl::TypedWriteBatchConvertedNanos(
+ ColumnWriter* column_writer, const std::shared_ptr<Array>& array, int64_t num_levels,
+ const int16_t* def_levels, const int16_t* rep_levels) {
// Note that we can only use data_buffer_ here as we write timestamps with the fast
// path.
RETURN_NOT_OK(data_buffer_.Resize(array->length() * sizeof(int64_t)));
@@ -557,13 +575,14 @@ Status FileWriter::Impl::TypedWriteBatchConvertedNanos(ColumnWriter* column_writ
::arrow::timestamp(::arrow::TimeUnit::MICRO));
if (writer->descr()->schema_node()->is_required() || (data->null_count() == 0)) {
// no nulls, just dump the data
- RETURN_NOT_OK((WriteNonNullableBatch<Int64Type, ::arrow::TimestampType>(writer, *type,
- array->length(), num_levels, def_levels, rep_levels, data_buffer_ptr)));
+ RETURN_NOT_OK((WriteNonNullableBatch<Int64Type, ::arrow::TimestampType>(
+ writer, *type, array->length(), num_levels, def_levels, rep_levels,
+ data_buffer_ptr)));
} else {
const uint8_t* valid_bits = data->null_bitmap_data();
- RETURN_NOT_OK((WriteNullableBatch<Int64Type, ::arrow::TimestampType>(writer, *type,
- array->length(), num_levels, def_levels, rep_levels, valid_bits, data->offset(),
- data_buffer_ptr)));
+ RETURN_NOT_OK((WriteNullableBatch<Int64Type, ::arrow::TimestampType>(
+ writer, *type, array->length(), num_levels, def_levels, rep_levels, valid_bits,
+ data->offset(), data_buffer_ptr)));
}
PARQUET_CATCH_NOT_OK(writer->Close());
return Status::OK();
@@ -681,7 +700,9 @@ Status FileWriter::Impl::TypedWriteBatch<FLBAType, ::arrow::FixedSizeBinaryType>
}
Status FileWriter::Impl::Close() {
- if (row_group_writer_ != nullptr) { PARQUET_CATCH_NOT_OK(row_group_writer_->Close()); }
+ if (row_group_writer_ != nullptr) {
+ PARQUET_CATCH_NOT_OK(row_group_writer_->Close());
+ }
PARQUET_CATCH_NOT_OK(writer_->Close());
return Status::OK();
}
@@ -697,7 +718,7 @@ Status FileWriter::Impl::WriteColumnChunk(const Array& data) {
int current_column_idx = row_group_writer_->current_column();
std::shared_ptr<::arrow::Schema> arrow_schema;
RETURN_NOT_OK(FromParquetSchema(writer_->schema(), {current_column_idx - 1},
- writer_->key_value_metadata(), &arrow_schema));
+ writer_->key_value_metadata(), &arrow_schema));
std::shared_ptr<Buffer> def_levels_buffer;
std::shared_ptr<Buffer> rep_levels_buffer;
int64_t values_offset;
@@ -707,9 +728,9 @@ Status FileWriter::Impl::WriteColumnChunk(const Array& data) {
std::shared_ptr<Array> _values_array;
LevelBuilder level_builder(pool_);
- RETURN_NOT_OK(level_builder.GenerateLevels(data, arrow_schema->field(0), &values_offset,
- &values_type, &num_values, &num_levels, &def_levels_buffer, &rep_levels_buffer,
- &_values_array));
+ RETURN_NOT_OK(level_builder.GenerateLevels(
+ data, arrow_schema->field(0), &values_offset, &values_type, &num_values,
+ &num_levels, &def_levels_buffer, &rep_levels_buffer, &_values_array));
const int16_t* def_levels = nullptr;
if (def_levels_buffer) {
def_levels = reinterpret_cast<const int16_t*>(def_levels_buffer->data());
@@ -747,8 +768,8 @@ Status FileWriter::Impl::WriteColumnChunk(const Array& data) {
return TypedWriteBatch<Int96Type, ::arrow::TimestampType>(
column_writer, values_array, num_levels, def_levels, rep_levels);
} else if (timestamp_type->unit() == ::arrow::TimeUnit::NANO) {
- return TypedWriteBatchConvertedNanos(
- column_writer, values_array, num_levels, def_levels, rep_levels);
+ return TypedWriteBatchConvertedNanos(column_writer, values_array, num_levels,
+ def_levels, rep_levels);
} else {
return TypedWriteBatch<Int64Type, ::arrow::TimestampType>(
column_writer, values_array, num_levels, def_levels, rep_levels);
@@ -786,35 +807,31 @@ Status FileWriter::WriteColumnChunk(const ::arrow::Array& array) {
return impl_->WriteColumnChunk(array);
}
-Status FileWriter::Close() {
- return impl_->Close();
-}
+Status FileWriter::Close() { return impl_->Close(); }
-MemoryPool* FileWriter::memory_pool() const {
- return impl_->pool_;
-}
+MemoryPool* FileWriter::memory_pool() const { return impl_->pool_; }
FileWriter::~FileWriter() {}
FileWriter::FileWriter(MemoryPool* pool, std::unique_ptr<ParquetFileWriter> writer,
- const std::shared_ptr<ArrowWriterProperties>& arrow_properties)
+ const std::shared_ptr<ArrowWriterProperties>& arrow_properties)
: impl_(new FileWriter::Impl(pool, std::move(writer), arrow_properties)) {}
Status FileWriter::Open(const ::arrow::Schema& schema, ::arrow::MemoryPool* pool,
- const std::shared_ptr<OutputStream>& sink,
- const std::shared_ptr<WriterProperties>& properties,
- std::unique_ptr<FileWriter>* writer) {
+ const std::shared_ptr<OutputStream>& sink,
+ const std::shared_ptr<WriterProperties>& properties,
+ std::unique_ptr<FileWriter>* writer) {
return Open(schema, pool, sink, properties, default_arrow_writer_properties(), writer);
}
Status FileWriter::Open(const ::arrow::Schema& schema, ::arrow::MemoryPool* pool,
- const std::shared_ptr<OutputStream>& sink,
- const std::shared_ptr<WriterProperties>& properties,
- const std::shared_ptr<ArrowWriterProperties>& arrow_properties,
- std::unique_ptr<FileWriter>* writer) {
+ const std::shared_ptr<OutputStream>& sink,
+ const std::shared_ptr<WriterProperties>& properties,
+ const std::shared_ptr<ArrowWriterProperties>& arrow_properties,
+ std::unique_ptr<FileWriter>* writer) {
std::shared_ptr<SchemaDescriptor> parquet_schema;
RETURN_NOT_OK(ToParquetSchema(&schema, *properties, &parquet_schema,
- arrow_properties->support_deprecated_int96_timestamps()));
+ arrow_properties->support_deprecated_int96_timestamps()));
auto schema_node = std::static_pointer_cast<GroupNode>(parquet_schema->schema_root());
@@ -826,18 +843,18 @@ Status FileWriter::Open(const ::arrow::Schema& schema, ::arrow::MemoryPool* pool
}
Status FileWriter::Open(const ::arrow::Schema& schema, ::arrow::MemoryPool* pool,
- const std::shared_ptr<::arrow::io::OutputStream>& sink,
- const std::shared_ptr<WriterProperties>& properties,
- std::unique_ptr<FileWriter>* writer) {
+ const std::shared_ptr<::arrow::io::OutputStream>& sink,
+ const std::shared_ptr<WriterProperties>& properties,
+ std::unique_ptr<FileWriter>* writer) {
auto wrapper = std::make_shared<ArrowOutputStream>(sink);
return Open(schema, pool, wrapper, properties, writer);
}
Status FileWriter::Open(const ::arrow::Schema& schema, ::arrow::MemoryPool* pool,
- const std::shared_ptr<::arrow::io::OutputStream>& sink,
- const std::shared_ptr<WriterProperties>& properties,
- const std::shared_ptr<ArrowWriterProperties>& arrow_properties,
- std::unique_ptr<FileWriter>* writer) {
+ const std::shared_ptr<::arrow::io::OutputStream>& sink,
+ const std::shared_ptr<WriterProperties>& properties,
+ const std::shared_ptr<ArrowWriterProperties>& arrow_properties,
+ std::unique_ptr<FileWriter>* writer) {
auto wrapper = std::make_shared<ArrowOutputStream>(sink);
return Open(schema, pool, wrapper, properties, arrow_properties, writer);
}
@@ -868,20 +885,20 @@ Status FileWriter::WriteTable(const Table& table, int64_t chunk_size) {
}
Status WriteTable(const ::arrow::Table& table, ::arrow::MemoryPool* pool,
- const std::shared_ptr<OutputStream>& sink, int64_t chunk_size,
- const std::shared_ptr<WriterProperties>& properties,
- const std::shared_ptr<ArrowWriterProperties>& arrow_properties) {
+ const std::shared_ptr<OutputStream>& sink, int64_t chunk_size,
+ const std::shared_ptr<WriterProperties>& properties,
+ const std::shared_ptr<ArrowWriterProperties>& arrow_properties) {
std::unique_ptr<FileWriter> writer;
- RETURN_NOT_OK(FileWriter::Open(
- *table.schema(), pool, sink, properties, arrow_properties, &writer));
+ RETURN_NOT_OK(FileWriter::Open(*table.schema(), pool, sink, properties,
+ arrow_properties, &writer));
RETURN_NOT_OK(writer->WriteTable(table, chunk_size));
return writer->Close();
}
Status WriteTable(const ::arrow::Table& table, ::arrow::MemoryPool* pool,
- const std::shared_ptr<::arrow::io::OutputStream>& sink, int64_t chunk_size,
- const std::shared_ptr<WriterProperties>& properties,
- const std::shared_ptr<ArrowWriterProperties>& arrow_properties) {
+ const std::shared_ptr<::arrow::io::OutputStream>& sink,
+ int64_t chunk_size, const std::shared_ptr<WriterProperties>& properties,
+ const std::shared_ptr<ArrowWriterProperties>& arrow_properties) {
auto wrapper = std::make_shared<ArrowOutputStream>(sink);
return WriteTable(table, pool, wrapper, chunk_size, properties, arrow_properties);
}
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/arrow/writer.h
----------------------------------------------------------------------
diff --git a/src/parquet/arrow/writer.h b/src/parquet/arrow/writer.h
index 4f7d2b4..a74f263 100644
--- a/src/parquet/arrow/writer.h
+++ b/src/parquet/arrow/writer.h
@@ -85,26 +85,28 @@ std::shared_ptr<ArrowWriterProperties> PARQUET_EXPORT default_arrow_writer_prope
class PARQUET_EXPORT FileWriter {
public:
FileWriter(::arrow::MemoryPool* pool, std::unique_ptr<ParquetFileWriter> writer,
- const std::shared_ptr<ArrowWriterProperties>& arrow_properties =
- default_arrow_writer_properties());
+ const std::shared_ptr<ArrowWriterProperties>& arrow_properties =
+ default_arrow_writer_properties());
static ::arrow::Status Open(const ::arrow::Schema& schema, ::arrow::MemoryPool* pool,
- const std::shared_ptr<OutputStream>& sink,
- const std::shared_ptr<WriterProperties>& properties,
- std::unique_ptr<FileWriter>* writer);
+ const std::shared_ptr<OutputStream>& sink,
+ const std::shared_ptr<WriterProperties>& properties,
+ std::unique_ptr<FileWriter>* writer);
- static ::arrow::Status Open(const ::arrow::Schema& schema, ::arrow::MemoryPool* pool,
+ static ::arrow::Status Open(
+ const ::arrow::Schema& schema, ::arrow::MemoryPool* pool,
const std::shared_ptr<OutputStream>& sink,
const std::shared_ptr<WriterProperties>& properties,
const std::shared_ptr<ArrowWriterProperties>& arrow_properties,
std::unique_ptr<FileWriter>* writer);
static ::arrow::Status Open(const ::arrow::Schema& schema, ::arrow::MemoryPool* pool,
- const std::shared_ptr<::arrow::io::OutputStream>& sink,
- const std::shared_ptr<WriterProperties>& properties,
- std::unique_ptr<FileWriter>* writer);
+ const std::shared_ptr<::arrow::io::OutputStream>& sink,
+ const std::shared_ptr<WriterProperties>& properties,
+ std::unique_ptr<FileWriter>* writer);
- static ::arrow::Status Open(const ::arrow::Schema& schema, ::arrow::MemoryPool* pool,
+ static ::arrow::Status Open(
+ const ::arrow::Schema& schema, ::arrow::MemoryPool* pool,
const std::shared_ptr<::arrow::io::OutputStream>& sink,
const std::shared_ptr<WriterProperties>& properties,
const std::shared_ptr<ArrowWriterProperties>& arrow_properties,
@@ -135,16 +137,16 @@ class PARQUET_EXPORT FileWriter {
*
* The table shall only consist of columns of primitive type or of primitive lists.
*/
-::arrow::Status PARQUET_EXPORT WriteTable(const ::arrow::Table& table,
- ::arrow::MemoryPool* pool, const std::shared_ptr<OutputStream>& sink,
- int64_t chunk_size,
+::arrow::Status PARQUET_EXPORT WriteTable(
+ const ::arrow::Table& table, ::arrow::MemoryPool* pool,
+ const std::shared_ptr<OutputStream>& sink, int64_t chunk_size,
const std::shared_ptr<WriterProperties>& properties = default_writer_properties(),
const std::shared_ptr<ArrowWriterProperties>& arrow_properties =
default_arrow_writer_properties());
-::arrow::Status PARQUET_EXPORT WriteTable(const ::arrow::Table& table,
- ::arrow::MemoryPool* pool, const std::shared_ptr<::arrow::io::OutputStream>& sink,
- int64_t chunk_size,
+::arrow::Status PARQUET_EXPORT WriteTable(
+ const ::arrow::Table& table, ::arrow::MemoryPool* pool,
+ const std::shared_ptr<::arrow::io::OutputStream>& sink, int64_t chunk_size,
const std::shared_ptr<WriterProperties>& properties = default_writer_properties(),
const std::shared_ptr<ArrowWriterProperties>& arrow_properties =
default_arrow_writer_properties());
@@ -160,8 +162,8 @@ constexpr int64_t kNanosecondsPerDay = INT64_C(86400000000000);
/**
* Converts nanosecond timestamps to Impala (Int96) format
*/
-inline void NanosecondsToImpalaTimestamp(
- const int64_t nanoseconds, Int96* impala_timestamp) {
+inline void NanosecondsToImpalaTimestamp(const int64_t nanoseconds,
+ Int96* impala_timestamp) {
int64_t julian_days = (nanoseconds / kNanosecondsPerDay) + kJulianEpochOffsetDays;
(*impala_timestamp).value[2] = (uint32_t)julian_days;
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/column-io-benchmark.cc
----------------------------------------------------------------------
diff --git a/src/parquet/column-io-benchmark.cc b/src/parquet/column-io-benchmark.cc
index 0a60367..2abf6fa 100644
--- a/src/parquet/column-io-benchmark.cc
+++ b/src/parquet/column-io-benchmark.cc
@@ -30,8 +30,9 @@ using schema::PrimitiveNode;
namespace benchmark {
std::unique_ptr<Int64Writer> BuildWriter(int64_t output_size, OutputStream* dst,
- ColumnChunkMetaDataBuilder* metadata, ColumnDescriptor* schema,
- const WriterProperties* properties) {
+ ColumnChunkMetaDataBuilder* metadata,
+ ColumnDescriptor* schema,
+ const WriterProperties* properties) {
std::unique_ptr<SerializedPageWriter> pager(
new SerializedPageWriter(dst, Compression::UNCOMPRESSED, metadata));
return std::unique_ptr<Int64Writer>(new Int64Writer(
@@ -40,8 +41,8 @@ std::unique_ptr<Int64Writer> BuildWriter(int64_t output_size, OutputStream* dst,
std::shared_ptr<ColumnDescriptor> Int64Schema(Repetition::type repetition) {
auto node = PrimitiveNode::Make("int64", repetition, Type::INT64);
- return std::make_shared<ColumnDescriptor>(
- node, repetition != Repetition::REQUIRED, repetition == Repetition::REPEATED);
+ return std::make_shared<ColumnDescriptor>(node, repetition != Repetition::REQUIRED,
+ repetition == Repetition::REPEATED);
}
void SetBytesProcessed(::benchmark::State& state, Repetition::type repetition) {
@@ -70,8 +71,8 @@ static void BM_WriteInt64Column(::benchmark::State& state) {
InMemoryOutputStream stream;
std::unique_ptr<Int64Writer> writer = BuildWriter(
state.range(0), &stream, metadata.get(), schema.get(), properties.get());
- writer->WriteBatch(
- values.size(), definition_levels.data(), repetition_levels.data(), values.data());
+ writer->WriteBatch(values.size(), definition_levels.data(), repetition_levels.data(),
+ values.data());
writer->Close();
}
SetBytesProcessed(state, repetition);
@@ -83,8 +84,8 @@ BENCHMARK_TEMPLATE(BM_WriteInt64Column, Repetition::OPTIONAL)->Range(1024, 65536
BENCHMARK_TEMPLATE(BM_WriteInt64Column, Repetition::REPEATED)->Range(1024, 65536);
-std::unique_ptr<Int64Reader> BuildReader(
- std::shared_ptr<Buffer>& buffer, int64_t num_values, ColumnDescriptor* schema) {
+std::unique_ptr<Int64Reader> BuildReader(std::shared_ptr<Buffer>& buffer,
+ int64_t num_values, ColumnDescriptor* schema) {
std::unique_ptr<InMemoryInputStream> source(new InMemoryInputStream(buffer));
std::unique_ptr<SerializedPageReader> page_reader(
new SerializedPageReader(std::move(source), num_values, Compression::UNCOMPRESSED));
@@ -105,8 +106,8 @@ static void BM_ReadInt64Column(::benchmark::State& state) {
InMemoryOutputStream stream;
std::unique_ptr<Int64Writer> writer = BuildWriter(
state.range(0), &stream, metadata.get(), schema.get(), properties.get());
- writer->WriteBatch(
- values.size(), definition_levels.data(), repetition_levels.data(), values.data());
+ writer->WriteBatch(values.size(), definition_levels.data(), repetition_levels.data(),
+ values.data());
writer->Close();
std::shared_ptr<Buffer> src = stream.GetBuffer();
@@ -118,7 +119,7 @@ static void BM_ReadInt64Column(::benchmark::State& state) {
int64_t values_read = 0;
for (size_t i = 0; i < values.size(); i += values_read) {
reader->ReadBatch(values_out.size(), definition_levels_out.data(),
- repetition_levels_out.data(), values_out.data(), &values_read);
+ repetition_levels_out.data(), values_out.data(), &values_read);
}
}
SetBytesProcessed(state, repetition);
@@ -136,8 +137,8 @@ BENCHMARK_TEMPLATE(BM_ReadInt64Column, Repetition::REPEATED)
static void BM_RleEncoding(::benchmark::State& state) {
std::vector<int16_t> levels(state.range(0), 0);
int64_t n = 0;
- std::generate(
- levels.begin(), levels.end(), [&state, &n] { return (n++ % state.range(1)) == 0; });
+ std::generate(levels.begin(), levels.end(),
+ [&state, &n] { return (n++ % state.range(1)) == 0; });
int16_t max_level = 1;
int64_t rle_size = LevelEncoder::MaxBufferSize(Encoding::RLE, max_level, levels.size());
auto buffer_rle = std::make_shared<PoolBuffer>();
@@ -146,7 +147,7 @@ static void BM_RleEncoding(::benchmark::State& state) {
while (state.KeepRunning()) {
LevelEncoder level_encoder;
level_encoder.Init(Encoding::RLE, max_level, levels.size(),
- buffer_rle->mutable_data(), buffer_rle->size());
+ buffer_rle->mutable_data(), buffer_rle->size());
level_encoder.Encode(levels.size(), levels.data());
}
state.SetBytesProcessed(state.iterations() * state.range(0) * sizeof(int16_t));
@@ -159,14 +160,14 @@ static void BM_RleDecoding(::benchmark::State& state) {
LevelEncoder level_encoder;
std::vector<int16_t> levels(state.range(0), 0);
int64_t n = 0;
- std::generate(
- levels.begin(), levels.end(), [&state, &n] { return (n++ % state.range(1)) == 0; });
+ std::generate(levels.begin(), levels.end(),
+ [&state, &n] { return (n++ % state.range(1)) == 0; });
int16_t max_level = 1;
int64_t rle_size = LevelEncoder::MaxBufferSize(Encoding::RLE, max_level, levels.size());
auto buffer_rle = std::make_shared<PoolBuffer>();
PARQUET_THROW_NOT_OK(buffer_rle->Resize(rle_size + sizeof(int32_t)));
level_encoder.Init(Encoding::RLE, max_level, levels.size(),
- buffer_rle->mutable_data() + sizeof(int32_t), rle_size);
+ buffer_rle->mutable_data() + sizeof(int32_t), rle_size);
level_encoder.Encode(levels.size(), levels.data());
reinterpret_cast<int32_t*>(buffer_rle->mutable_data())[0] = level_encoder.len();