You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@parquet.apache.org by no...@apache.org on 2016/01/09 00:51:52 UTC
[1/7] parquet-cpp git commit: PARQUET-416: C++11 compilation,
code reorg, libparquet and installation targets
Repository: parquet-cpp
Updated Branches:
refs/heads/master ea30decd9 -> 337cf584e
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/parquet/thrift/parquet_types.h
----------------------------------------------------------------------
diff --git a/src/parquet/thrift/parquet_types.h b/src/parquet/thrift/parquet_types.h
new file mode 100644
index 0000000..4360d02
--- /dev/null
+++ b/src/parquet/thrift/parquet_types.h
@@ -0,0 +1,1123 @@
+/**
+ * Autogenerated by Thrift Compiler (0.9.0)
+ *
+ * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+ * @generated
+ */
+#ifndef parquet_TYPES_H
+#define parquet_TYPES_H
+
+#include <thrift/Thrift.h>
+#include <thrift/TApplicationException.h>
+#include <thrift/protocol/TProtocol.h>
+#include <thrift/transport/TTransport.h>
+
+
+
+namespace parquet {
+
+struct Type {
+ enum type {
+ BOOLEAN = 0,
+ INT32 = 1,
+ INT64 = 2,
+ INT96 = 3,
+ FLOAT = 4,
+ DOUBLE = 5,
+ BYTE_ARRAY = 6,
+ FIXED_LEN_BYTE_ARRAY = 7
+ };
+};
+
+extern const std::map<int, const char*> _Type_VALUES_TO_NAMES;
+
+struct ConvertedType {
+ enum type {
+ UTF8 = 0,
+ MAP = 1,
+ MAP_KEY_VALUE = 2,
+ LIST = 3,
+ ENUM = 4,
+ DECIMAL = 5
+ };
+};
+
+extern const std::map<int, const char*> _ConvertedType_VALUES_TO_NAMES;
+
+struct FieldRepetitionType {
+ enum type {
+ REQUIRED = 0,
+ OPTIONAL = 1,
+ REPEATED = 2
+ };
+};
+
+extern const std::map<int, const char*> _FieldRepetitionType_VALUES_TO_NAMES;
+
+struct Encoding {
+ enum type {
+ PLAIN = 0,
+ PLAIN_DICTIONARY = 2,
+ RLE = 3,
+ BIT_PACKED = 4,
+ DELTA_BINARY_PACKED = 5,
+ DELTA_LENGTH_BYTE_ARRAY = 6,
+ DELTA_BYTE_ARRAY = 7,
+ RLE_DICTIONARY = 8
+ };
+};
+
+extern const std::map<int, const char*> _Encoding_VALUES_TO_NAMES;
+
+struct CompressionCodec {
+ enum type {
+ UNCOMPRESSED = 0,
+ SNAPPY = 1,
+ GZIP = 2,
+ LZO = 3
+ };
+};
+
+extern const std::map<int, const char*> _CompressionCodec_VALUES_TO_NAMES;
+
+struct PageType {
+ enum type {
+ DATA_PAGE = 0,
+ INDEX_PAGE = 1,
+ DICTIONARY_PAGE = 2,
+ DATA_PAGE_V2 = 3
+ };
+};
+
+extern const std::map<int, const char*> _PageType_VALUES_TO_NAMES;
+
+typedef struct _Statistics__isset {
+ _Statistics__isset() : max(false), min(false), null_count(false), distinct_count(false) {}
+ bool max;
+ bool min;
+ bool null_count;
+ bool distinct_count;
+} _Statistics__isset;
+
+class Statistics {
+ public:
+
+ static const char* ascii_fingerprint; // = "CE004821871820DD79A8FD98BB101F6D";
+ static const uint8_t binary_fingerprint[16]; // = {0xCE,0x00,0x48,0x21,0x87,0x18,0x20,0xDD,0x79,0xA8,0xFD,0x98,0xBB,0x10,0x1F,0x6D};
+
+ Statistics() : max(), min(), null_count(0), distinct_count(0) {
+ }
+
+ virtual ~Statistics() throw() {}
+
+ std::string max;
+ std::string min;
+ int64_t null_count;
+ int64_t distinct_count;
+
+ _Statistics__isset __isset;
+
+ void __set_max(const std::string& val) {
+ max = val;
+ __isset.max = true;
+ }
+
+ void __set_min(const std::string& val) {
+ min = val;
+ __isset.min = true;
+ }
+
+ void __set_null_count(const int64_t val) {
+ null_count = val;
+ __isset.null_count = true;
+ }
+
+ void __set_distinct_count(const int64_t val) {
+ distinct_count = val;
+ __isset.distinct_count = true;
+ }
+
+ bool operator == (const Statistics & rhs) const
+ {
+ if (__isset.max != rhs.__isset.max)
+ return false;
+ else if (__isset.max && !(max == rhs.max))
+ return false;
+ if (__isset.min != rhs.__isset.min)
+ return false;
+ else if (__isset.min && !(min == rhs.min))
+ return false;
+ if (__isset.null_count != rhs.__isset.null_count)
+ return false;
+ else if (__isset.null_count && !(null_count == rhs.null_count))
+ return false;
+ if (__isset.distinct_count != rhs.__isset.distinct_count)
+ return false;
+ else if (__isset.distinct_count && !(distinct_count == rhs.distinct_count))
+ return false;
+ return true;
+ }
+ bool operator != (const Statistics &rhs) const {
+ return !(*this == rhs);
+ }
+
+ bool operator < (const Statistics & ) const;
+
+ uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
+ uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
+
+};
+
+void swap(Statistics &a, Statistics &b);
+
+typedef struct _SchemaElement__isset {
+ _SchemaElement__isset() : type(false), type_length(false), repetition_type(false), num_children(false), converted_type(false), scale(false), precision(false) {}
+ bool type;
+ bool type_length;
+ bool repetition_type;
+ bool num_children;
+ bool converted_type;
+ bool scale;
+ bool precision;
+} _SchemaElement__isset;
+
+class SchemaElement {
+ public:
+
+ static const char* ascii_fingerprint; // = "388A784401753800444CFEAC8BC1B1A1";
+ static const uint8_t binary_fingerprint[16]; // = {0x38,0x8A,0x78,0x44,0x01,0x75,0x38,0x00,0x44,0x4C,0xFE,0xAC,0x8B,0xC1,0xB1,0xA1};
+
+ SchemaElement() : type((Type::type)0), type_length(0), repetition_type((FieldRepetitionType::type)0), name(), num_children(0), converted_type((ConvertedType::type)0), scale(0), precision(0) {
+ }
+
+ virtual ~SchemaElement() throw() {}
+
+ Type::type type;
+ int32_t type_length;
+ FieldRepetitionType::type repetition_type;
+ std::string name;
+ int32_t num_children;
+ ConvertedType::type converted_type;
+ int32_t scale;
+ int32_t precision;
+
+ _SchemaElement__isset __isset;
+
+ void __set_type(const Type::type val) {
+ type = val;
+ __isset.type = true;
+ }
+
+ void __set_type_length(const int32_t val) {
+ type_length = val;
+ __isset.type_length = true;
+ }
+
+ void __set_repetition_type(const FieldRepetitionType::type val) {
+ repetition_type = val;
+ __isset.repetition_type = true;
+ }
+
+ void __set_name(const std::string& val) {
+ name = val;
+ }
+
+ void __set_num_children(const int32_t val) {
+ num_children = val;
+ __isset.num_children = true;
+ }
+
+ void __set_converted_type(const ConvertedType::type val) {
+ converted_type = val;
+ __isset.converted_type = true;
+ }
+
+ void __set_scale(const int32_t val) {
+ scale = val;
+ __isset.scale = true;
+ }
+
+ void __set_precision(const int32_t val) {
+ precision = val;
+ __isset.precision = true;
+ }
+
+ bool operator == (const SchemaElement & rhs) const
+ {
+ if (__isset.type != rhs.__isset.type)
+ return false;
+ else if (__isset.type && !(type == rhs.type))
+ return false;
+ if (__isset.type_length != rhs.__isset.type_length)
+ return false;
+ else if (__isset.type_length && !(type_length == rhs.type_length))
+ return false;
+ if (__isset.repetition_type != rhs.__isset.repetition_type)
+ return false;
+ else if (__isset.repetition_type && !(repetition_type == rhs.repetition_type))
+ return false;
+ if (!(name == rhs.name))
+ return false;
+ if (__isset.num_children != rhs.__isset.num_children)
+ return false;
+ else if (__isset.num_children && !(num_children == rhs.num_children))
+ return false;
+ if (__isset.converted_type != rhs.__isset.converted_type)
+ return false;
+ else if (__isset.converted_type && !(converted_type == rhs.converted_type))
+ return false;
+ if (__isset.scale != rhs.__isset.scale)
+ return false;
+ else if (__isset.scale && !(scale == rhs.scale))
+ return false;
+ if (__isset.precision != rhs.__isset.precision)
+ return false;
+ else if (__isset.precision && !(precision == rhs.precision))
+ return false;
+ return true;
+ }
+ bool operator != (const SchemaElement &rhs) const {
+ return !(*this == rhs);
+ }
+
+ bool operator < (const SchemaElement & ) const;
+
+ uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
+ uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
+
+};
+
+void swap(SchemaElement &a, SchemaElement &b);
+
+typedef struct _DataPageHeader__isset {
+ _DataPageHeader__isset() : statistics(false) {}
+ bool statistics;
+} _DataPageHeader__isset;
+
+class DataPageHeader {
+ public:
+
+ static const char* ascii_fingerprint; // = "5FC1792B0483E9C984475384165040B1";
+ static const uint8_t binary_fingerprint[16]; // = {0x5F,0xC1,0x79,0x2B,0x04,0x83,0xE9,0xC9,0x84,0x47,0x53,0x84,0x16,0x50,0x40,0xB1};
+
+ DataPageHeader() : num_values(0), encoding((Encoding::type)0), definition_level_encoding((Encoding::type)0), repetition_level_encoding((Encoding::type)0) {
+ }
+
+ virtual ~DataPageHeader() throw() {}
+
+ int32_t num_values;
+ Encoding::type encoding;
+ Encoding::type definition_level_encoding;
+ Encoding::type repetition_level_encoding;
+ Statistics statistics;
+
+ _DataPageHeader__isset __isset;
+
+ void __set_num_values(const int32_t val) {
+ num_values = val;
+ }
+
+ void __set_encoding(const Encoding::type val) {
+ encoding = val;
+ }
+
+ void __set_definition_level_encoding(const Encoding::type val) {
+ definition_level_encoding = val;
+ }
+
+ void __set_repetition_level_encoding(const Encoding::type val) {
+ repetition_level_encoding = val;
+ }
+
+ void __set_statistics(const Statistics& val) {
+ statistics = val;
+ __isset.statistics = true;
+ }
+
+ bool operator == (const DataPageHeader & rhs) const
+ {
+ if (!(num_values == rhs.num_values))
+ return false;
+ if (!(encoding == rhs.encoding))
+ return false;
+ if (!(definition_level_encoding == rhs.definition_level_encoding))
+ return false;
+ if (!(repetition_level_encoding == rhs.repetition_level_encoding))
+ return false;
+ if (__isset.statistics != rhs.__isset.statistics)
+ return false;
+ else if (__isset.statistics && !(statistics == rhs.statistics))
+ return false;
+ return true;
+ }
+ bool operator != (const DataPageHeader &rhs) const {
+ return !(*this == rhs);
+ }
+
+ bool operator < (const DataPageHeader & ) const;
+
+ uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
+ uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
+
+};
+
+void swap(DataPageHeader &a, DataPageHeader &b);
+
+
+class IndexPageHeader {
+ public:
+
+ static const char* ascii_fingerprint; // = "99914B932BD37A50B983C5E7C90AE93B";
+ static const uint8_t binary_fingerprint[16]; // = {0x99,0x91,0x4B,0x93,0x2B,0xD3,0x7A,0x50,0xB9,0x83,0xC5,0xE7,0xC9,0x0A,0xE9,0x3B};
+
+ IndexPageHeader() {
+ }
+
+ virtual ~IndexPageHeader() throw() {}
+
+
+ bool operator == (const IndexPageHeader & /* rhs */) const
+ {
+ return true;
+ }
+ bool operator != (const IndexPageHeader &rhs) const {
+ return !(*this == rhs);
+ }
+
+ bool operator < (const IndexPageHeader & ) const;
+
+ uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
+ uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
+
+};
+
+void swap(IndexPageHeader &a, IndexPageHeader &b);
+
+typedef struct _DictionaryPageHeader__isset {
+ _DictionaryPageHeader__isset() : is_sorted(false) {}
+ bool is_sorted;
+} _DictionaryPageHeader__isset;
+
+class DictionaryPageHeader {
+ public:
+
+ static const char* ascii_fingerprint; // = "B149E4528254D495610C22AE4BD539C5";
+ static const uint8_t binary_fingerprint[16]; // = {0xB1,0x49,0xE4,0x52,0x82,0x54,0xD4,0x95,0x61,0x0C,0x22,0xAE,0x4B,0xD5,0x39,0xC5};
+
+ DictionaryPageHeader() : num_values(0), encoding((Encoding::type)0), is_sorted(0) {
+ }
+
+ virtual ~DictionaryPageHeader() throw() {}
+
+ int32_t num_values;
+ Encoding::type encoding;
+ bool is_sorted;
+
+ _DictionaryPageHeader__isset __isset;
+
+ void __set_num_values(const int32_t val) {
+ num_values = val;
+ }
+
+ void __set_encoding(const Encoding::type val) {
+ encoding = val;
+ }
+
+ void __set_is_sorted(const bool val) {
+ is_sorted = val;
+ __isset.is_sorted = true;
+ }
+
+ bool operator == (const DictionaryPageHeader & rhs) const
+ {
+ if (!(num_values == rhs.num_values))
+ return false;
+ if (!(encoding == rhs.encoding))
+ return false;
+ if (__isset.is_sorted != rhs.__isset.is_sorted)
+ return false;
+ else if (__isset.is_sorted && !(is_sorted == rhs.is_sorted))
+ return false;
+ return true;
+ }
+ bool operator != (const DictionaryPageHeader &rhs) const {
+ return !(*this == rhs);
+ }
+
+ bool operator < (const DictionaryPageHeader & ) const;
+
+ uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
+ uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
+
+};
+
+void swap(DictionaryPageHeader &a, DictionaryPageHeader &b);
+
+typedef struct _DataPageHeaderV2__isset {
+ _DataPageHeaderV2__isset() : is_compressed(true), statistics(false) {}
+ bool is_compressed;
+ bool statistics;
+} _DataPageHeaderV2__isset;
+
+class DataPageHeaderV2 {
+ public:
+
+ static const char* ascii_fingerprint; // = "69FF2F6BD1A443440D5E46ABA5A3A919";
+ static const uint8_t binary_fingerprint[16]; // = {0x69,0xFF,0x2F,0x6B,0xD1,0xA4,0x43,0x44,0x0D,0x5E,0x46,0xAB,0xA5,0xA3,0xA9,0x19};
+
+ DataPageHeaderV2() : num_values(0), num_nulls(0), num_rows(0), encoding((Encoding::type)0), definition_levels_byte_length(0), repetition_levels_byte_length(0), is_compressed(true) {
+ }
+
+ virtual ~DataPageHeaderV2() throw() {}
+
+ int32_t num_values;
+ int32_t num_nulls;
+ int32_t num_rows;
+ Encoding::type encoding;
+ int32_t definition_levels_byte_length;
+ int32_t repetition_levels_byte_length;
+ bool is_compressed;
+ Statistics statistics;
+
+ _DataPageHeaderV2__isset __isset;
+
+ void __set_num_values(const int32_t val) {
+ num_values = val;
+ }
+
+ void __set_num_nulls(const int32_t val) {
+ num_nulls = val;
+ }
+
+ void __set_num_rows(const int32_t val) {
+ num_rows = val;
+ }
+
+ void __set_encoding(const Encoding::type val) {
+ encoding = val;
+ }
+
+ void __set_definition_levels_byte_length(const int32_t val) {
+ definition_levels_byte_length = val;
+ }
+
+ void __set_repetition_levels_byte_length(const int32_t val) {
+ repetition_levels_byte_length = val;
+ }
+
+ void __set_is_compressed(const bool val) {
+ is_compressed = val;
+ __isset.is_compressed = true;
+ }
+
+ void __set_statistics(const Statistics& val) {
+ statistics = val;
+ __isset.statistics = true;
+ }
+
+ bool operator == (const DataPageHeaderV2 & rhs) const
+ {
+ if (!(num_values == rhs.num_values))
+ return false;
+ if (!(num_nulls == rhs.num_nulls))
+ return false;
+ if (!(num_rows == rhs.num_rows))
+ return false;
+ if (!(encoding == rhs.encoding))
+ return false;
+ if (!(definition_levels_byte_length == rhs.definition_levels_byte_length))
+ return false;
+ if (!(repetition_levels_byte_length == rhs.repetition_levels_byte_length))
+ return false;
+ if (__isset.is_compressed != rhs.__isset.is_compressed)
+ return false;
+ else if (__isset.is_compressed && !(is_compressed == rhs.is_compressed))
+ return false;
+ if (__isset.statistics != rhs.__isset.statistics)
+ return false;
+ else if (__isset.statistics && !(statistics == rhs.statistics))
+ return false;
+ return true;
+ }
+ bool operator != (const DataPageHeaderV2 &rhs) const {
+ return !(*this == rhs);
+ }
+
+ bool operator < (const DataPageHeaderV2 & ) const;
+
+ uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
+ uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
+
+};
+
+void swap(DataPageHeaderV2 &a, DataPageHeaderV2 &b);
+
+typedef struct _PageHeader__isset {
+ _PageHeader__isset() : crc(false), data_page_header(false), index_page_header(false), dictionary_page_header(false), data_page_header_v2(false) {}
+ bool crc;
+ bool data_page_header;
+ bool index_page_header;
+ bool dictionary_page_header;
+ bool data_page_header_v2;
+} _PageHeader__isset;
+
+class PageHeader {
+ public:
+
+ static const char* ascii_fingerprint; // = "B5BD2BDF3756C883A58B30B9C9F204A0";
+ static const uint8_t binary_fingerprint[16]; // = {0xB5,0xBD,0x2B,0xDF,0x37,0x56,0xC8,0x83,0xA5,0x8B,0x30,0xB9,0xC9,0xF2,0x04,0xA0};
+
+ PageHeader() : type((PageType::type)0), uncompressed_page_size(0), compressed_page_size(0), crc(0) {
+ }
+
+ virtual ~PageHeader() throw() {}
+
+ PageType::type type;
+ int32_t uncompressed_page_size;
+ int32_t compressed_page_size;
+ int32_t crc;
+ DataPageHeader data_page_header;
+ IndexPageHeader index_page_header;
+ DictionaryPageHeader dictionary_page_header;
+ DataPageHeaderV2 data_page_header_v2;
+
+ _PageHeader__isset __isset;
+
+ void __set_type(const PageType::type val) {
+ type = val;
+ }
+
+ void __set_uncompressed_page_size(const int32_t val) {
+ uncompressed_page_size = val;
+ }
+
+ void __set_compressed_page_size(const int32_t val) {
+ compressed_page_size = val;
+ }
+
+ void __set_crc(const int32_t val) {
+ crc = val;
+ __isset.crc = true;
+ }
+
+ void __set_data_page_header(const DataPageHeader& val) {
+ data_page_header = val;
+ __isset.data_page_header = true;
+ }
+
+ void __set_index_page_header(const IndexPageHeader& val) {
+ index_page_header = val;
+ __isset.index_page_header = true;
+ }
+
+ void __set_dictionary_page_header(const DictionaryPageHeader& val) {
+ dictionary_page_header = val;
+ __isset.dictionary_page_header = true;
+ }
+
+ void __set_data_page_header_v2(const DataPageHeaderV2& val) {
+ data_page_header_v2 = val;
+ __isset.data_page_header_v2 = true;
+ }
+
+ bool operator == (const PageHeader & rhs) const
+ {
+ if (!(type == rhs.type))
+ return false;
+ if (!(uncompressed_page_size == rhs.uncompressed_page_size))
+ return false;
+ if (!(compressed_page_size == rhs.compressed_page_size))
+ return false;
+ if (__isset.crc != rhs.__isset.crc)
+ return false;
+ else if (__isset.crc && !(crc == rhs.crc))
+ return false;
+ if (__isset.data_page_header != rhs.__isset.data_page_header)
+ return false;
+ else if (__isset.data_page_header && !(data_page_header == rhs.data_page_header))
+ return false;
+ if (__isset.index_page_header != rhs.__isset.index_page_header)
+ return false;
+ else if (__isset.index_page_header && !(index_page_header == rhs.index_page_header))
+ return false;
+ if (__isset.dictionary_page_header != rhs.__isset.dictionary_page_header)
+ return false;
+ else if (__isset.dictionary_page_header && !(dictionary_page_header == rhs.dictionary_page_header))
+ return false;
+ if (__isset.data_page_header_v2 != rhs.__isset.data_page_header_v2)
+ return false;
+ else if (__isset.data_page_header_v2 && !(data_page_header_v2 == rhs.data_page_header_v2))
+ return false;
+ return true;
+ }
+ bool operator != (const PageHeader &rhs) const {
+ return !(*this == rhs);
+ }
+
+ bool operator < (const PageHeader & ) const;
+
+ uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
+ uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
+
+};
+
+void swap(PageHeader &a, PageHeader &b);
+
+typedef struct _KeyValue__isset {
+ _KeyValue__isset() : value(false) {}
+ bool value;
+} _KeyValue__isset;
+
+class KeyValue {
+ public:
+
+ static const char* ascii_fingerprint; // = "5B708A954C550ECA9C1A49D3C5CAFAB9";
+ static const uint8_t binary_fingerprint[16]; // = {0x5B,0x70,0x8A,0x95,0x4C,0x55,0x0E,0xCA,0x9C,0x1A,0x49,0xD3,0xC5,0xCA,0xFA,0xB9};
+
+ KeyValue() : key(), value() {
+ }
+
+ virtual ~KeyValue() throw() {}
+
+ std::string key;
+ std::string value;
+
+ _KeyValue__isset __isset;
+
+ void __set_key(const std::string& val) {
+ key = val;
+ }
+
+ void __set_value(const std::string& val) {
+ value = val;
+ __isset.value = true;
+ }
+
+ bool operator == (const KeyValue & rhs) const
+ {
+ if (!(key == rhs.key))
+ return false;
+ if (__isset.value != rhs.__isset.value)
+ return false;
+ else if (__isset.value && !(value == rhs.value))
+ return false;
+ return true;
+ }
+ bool operator != (const KeyValue &rhs) const {
+ return !(*this == rhs);
+ }
+
+ bool operator < (const KeyValue & ) const;
+
+ uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
+ uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
+
+};
+
+void swap(KeyValue &a, KeyValue &b);
+
+
+class SortingColumn {
+ public:
+
+ static const char* ascii_fingerprint; // = "F079C2D58A783AD90F9BE05D10DBBC6F";
+ static const uint8_t binary_fingerprint[16]; // = {0xF0,0x79,0xC2,0xD5,0x8A,0x78,0x3A,0xD9,0x0F,0x9B,0xE0,0x5D,0x10,0xDB,0xBC,0x6F};
+
+ SortingColumn() : column_idx(0), descending(0), nulls_first(0) {
+ }
+
+ virtual ~SortingColumn() throw() {}
+
+ int32_t column_idx;
+ bool descending;
+ bool nulls_first;
+
+ void __set_column_idx(const int32_t val) {
+ column_idx = val;
+ }
+
+ void __set_descending(const bool val) {
+ descending = val;
+ }
+
+ void __set_nulls_first(const bool val) {
+ nulls_first = val;
+ }
+
+ bool operator == (const SortingColumn & rhs) const
+ {
+ if (!(column_idx == rhs.column_idx))
+ return false;
+ if (!(descending == rhs.descending))
+ return false;
+ if (!(nulls_first == rhs.nulls_first))
+ return false;
+ return true;
+ }
+ bool operator != (const SortingColumn &rhs) const {
+ return !(*this == rhs);
+ }
+
+ bool operator < (const SortingColumn & ) const;
+
+ uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
+ uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
+
+};
+
+void swap(SortingColumn &a, SortingColumn &b);
+
+typedef struct _ColumnMetaData__isset {
+ _ColumnMetaData__isset() : key_value_metadata(false), index_page_offset(false), dictionary_page_offset(false), statistics(false) {}
+ bool key_value_metadata;
+ bool index_page_offset;
+ bool dictionary_page_offset;
+ bool statistics;
+} _ColumnMetaData__isset;
+
+class ColumnMetaData {
+ public:
+
+ static const char* ascii_fingerprint; // = "1AF797732BCB4465C6314FB29B86638D";
+ static const uint8_t binary_fingerprint[16]; // = {0x1A,0xF7,0x97,0x73,0x2B,0xCB,0x44,0x65,0xC6,0x31,0x4F,0xB2,0x9B,0x86,0x63,0x8D};
+
+ ColumnMetaData() : type((Type::type)0), codec((CompressionCodec::type)0), num_values(0), total_uncompressed_size(0), total_compressed_size(0), data_page_offset(0), index_page_offset(0), dictionary_page_offset(0) {
+ }
+
+ virtual ~ColumnMetaData() throw() {}
+
+ Type::type type;
+ std::vector<Encoding::type> encodings;
+ std::vector<std::string> path_in_schema;
+ CompressionCodec::type codec;
+ int64_t num_values;
+ int64_t total_uncompressed_size;
+ int64_t total_compressed_size;
+ std::vector<KeyValue> key_value_metadata;
+ int64_t data_page_offset;
+ int64_t index_page_offset;
+ int64_t dictionary_page_offset;
+ Statistics statistics;
+
+ _ColumnMetaData__isset __isset;
+
+ void __set_type(const Type::type val) {
+ type = val;
+ }
+
+ void __set_encodings(const std::vector<Encoding::type> & val) {
+ encodings = val;
+ }
+
+ void __set_path_in_schema(const std::vector<std::string> & val) {
+ path_in_schema = val;
+ }
+
+ void __set_codec(const CompressionCodec::type val) {
+ codec = val;
+ }
+
+ void __set_num_values(const int64_t val) {
+ num_values = val;
+ }
+
+ void __set_total_uncompressed_size(const int64_t val) {
+ total_uncompressed_size = val;
+ }
+
+ void __set_total_compressed_size(const int64_t val) {
+ total_compressed_size = val;
+ }
+
+ void __set_key_value_metadata(const std::vector<KeyValue> & val) {
+ key_value_metadata = val;
+ __isset.key_value_metadata = true;
+ }
+
+ void __set_data_page_offset(const int64_t val) {
+ data_page_offset = val;
+ }
+
+ void __set_index_page_offset(const int64_t val) {
+ index_page_offset = val;
+ __isset.index_page_offset = true;
+ }
+
+ void __set_dictionary_page_offset(const int64_t val) {
+ dictionary_page_offset = val;
+ __isset.dictionary_page_offset = true;
+ }
+
+ void __set_statistics(const Statistics& val) {
+ statistics = val;
+ __isset.statistics = true;
+ }
+
+ bool operator == (const ColumnMetaData & rhs) const
+ {
+ if (!(type == rhs.type))
+ return false;
+ if (!(encodings == rhs.encodings))
+ return false;
+ if (!(path_in_schema == rhs.path_in_schema))
+ return false;
+ if (!(codec == rhs.codec))
+ return false;
+ if (!(num_values == rhs.num_values))
+ return false;
+ if (!(total_uncompressed_size == rhs.total_uncompressed_size))
+ return false;
+ if (!(total_compressed_size == rhs.total_compressed_size))
+ return false;
+ if (__isset.key_value_metadata != rhs.__isset.key_value_metadata)
+ return false;
+ else if (__isset.key_value_metadata && !(key_value_metadata == rhs.key_value_metadata))
+ return false;
+ if (!(data_page_offset == rhs.data_page_offset))
+ return false;
+ if (__isset.index_page_offset != rhs.__isset.index_page_offset)
+ return false;
+ else if (__isset.index_page_offset && !(index_page_offset == rhs.index_page_offset))
+ return false;
+ if (__isset.dictionary_page_offset != rhs.__isset.dictionary_page_offset)
+ return false;
+ else if (__isset.dictionary_page_offset && !(dictionary_page_offset == rhs.dictionary_page_offset))
+ return false;
+ if (__isset.statistics != rhs.__isset.statistics)
+ return false;
+ else if (__isset.statistics && !(statistics == rhs.statistics))
+ return false;
+ return true;
+ }
+ bool operator != (const ColumnMetaData &rhs) const {
+ return !(*this == rhs);
+ }
+
+ bool operator < (const ColumnMetaData & ) const;
+
+ uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
+ uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
+
+};
+
+void swap(ColumnMetaData &a, ColumnMetaData &b);
+
+typedef struct _ColumnChunk__isset {
+ _ColumnChunk__isset() : file_path(false), meta_data(false) {}
+ bool file_path;
+ bool meta_data;
+} _ColumnChunk__isset;
+
+class ColumnChunk {
+ public:
+
+ static const char* ascii_fingerprint; // = "169FC47057EF3D82E2FACDDEC2641AE8";
+ static const uint8_t binary_fingerprint[16]; // = {0x16,0x9F,0xC4,0x70,0x57,0xEF,0x3D,0x82,0xE2,0xFA,0xCD,0xDE,0xC2,0x64,0x1A,0xE8};
+
+ ColumnChunk() : file_path(), file_offset(0) {
+ }
+
+ virtual ~ColumnChunk() throw() {}
+
+ std::string file_path;
+ int64_t file_offset;
+ ColumnMetaData meta_data;
+
+ _ColumnChunk__isset __isset;
+
+ void __set_file_path(const std::string& val) {
+ file_path = val;
+ __isset.file_path = true;
+ }
+
+ void __set_file_offset(const int64_t val) {
+ file_offset = val;
+ }
+
+ void __set_meta_data(const ColumnMetaData& val) {
+ meta_data = val;
+ __isset.meta_data = true;
+ }
+
+ bool operator == (const ColumnChunk & rhs) const
+ {
+ if (__isset.file_path != rhs.__isset.file_path)
+ return false;
+ else if (__isset.file_path && !(file_path == rhs.file_path))
+ return false;
+ if (!(file_offset == rhs.file_offset))
+ return false;
+ if (__isset.meta_data != rhs.__isset.meta_data)
+ return false;
+ else if (__isset.meta_data && !(meta_data == rhs.meta_data))
+ return false;
+ return true;
+ }
+ bool operator != (const ColumnChunk &rhs) const {
+ return !(*this == rhs);
+ }
+
+ bool operator < (const ColumnChunk & ) const;
+
+ uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
+ uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
+
+};
+
+void swap(ColumnChunk &a, ColumnChunk &b);
+
+typedef struct _RowGroup__isset {
+ _RowGroup__isset() : sorting_columns(false) {}
+ bool sorting_columns;
+} _RowGroup__isset;
+
+class RowGroup {
+ public:
+
+ static const char* ascii_fingerprint; // = "DC7968627FA826DDC4C6C9BE773586C9";
+ static const uint8_t binary_fingerprint[16]; // = {0xDC,0x79,0x68,0x62,0x7F,0xA8,0x26,0xDD,0xC4,0xC6,0xC9,0xBE,0x77,0x35,0x86,0xC9};
+
+ RowGroup() : total_byte_size(0), num_rows(0) {
+ }
+
+ virtual ~RowGroup() throw() {}
+
+ std::vector<ColumnChunk> columns;
+ int64_t total_byte_size;
+ int64_t num_rows;
+ std::vector<SortingColumn> sorting_columns;
+
+ _RowGroup__isset __isset;
+
+ void __set_columns(const std::vector<ColumnChunk> & val) {
+ columns = val;
+ }
+
+ void __set_total_byte_size(const int64_t val) {
+ total_byte_size = val;
+ }
+
+ void __set_num_rows(const int64_t val) {
+ num_rows = val;
+ }
+
+ void __set_sorting_columns(const std::vector<SortingColumn> & val) {
+ sorting_columns = val;
+ __isset.sorting_columns = true;
+ }
+
+ bool operator == (const RowGroup & rhs) const
+ {
+ if (!(columns == rhs.columns))
+ return false;
+ if (!(total_byte_size == rhs.total_byte_size))
+ return false;
+ if (!(num_rows == rhs.num_rows))
+ return false;
+ if (__isset.sorting_columns != rhs.__isset.sorting_columns)
+ return false;
+ else if (__isset.sorting_columns && !(sorting_columns == rhs.sorting_columns))
+ return false;
+ return true;
+ }
+ bool operator != (const RowGroup &rhs) const {
+ return !(*this == rhs);
+ }
+
+ bool operator < (const RowGroup & ) const;
+
+ uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
+ uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
+
+};
+
+void swap(RowGroup &a, RowGroup &b);
+
+typedef struct _FileMetaData__isset {
+ _FileMetaData__isset() : key_value_metadata(false), created_by(false) {}
+ bool key_value_metadata;
+ bool created_by;
+} _FileMetaData__isset;
+
+class FileMetaData {
+ public:
+
+ static const char* ascii_fingerprint; // = "44DC7D83A66D54A7B7892A985C4125C9";
+ static const uint8_t binary_fingerprint[16]; // = {0x44,0xDC,0x7D,0x83,0xA6,0x6D,0x54,0xA7,0xB7,0x89,0x2A,0x98,0x5C,0x41,0x25,0xC9};
+
+ FileMetaData() : version(0), num_rows(0), created_by() {
+ }
+
+ virtual ~FileMetaData() throw() {}
+
+ int32_t version;
+ std::vector<SchemaElement> schema;
+ int64_t num_rows;
+ std::vector<RowGroup> row_groups;
+ std::vector<KeyValue> key_value_metadata;
+ std::string created_by;
+
+ _FileMetaData__isset __isset;
+
+ void __set_version(const int32_t val) {
+ version = val;
+ }
+
+ void __set_schema(const std::vector<SchemaElement> & val) {
+ schema = val;
+ }
+
+ void __set_num_rows(const int64_t val) {
+ num_rows = val;
+ }
+
+ void __set_row_groups(const std::vector<RowGroup> & val) {
+ row_groups = val;
+ }
+
+ void __set_key_value_metadata(const std::vector<KeyValue> & val) {
+ key_value_metadata = val;
+ __isset.key_value_metadata = true;
+ }
+
+ void __set_created_by(const std::string& val) {
+ created_by = val;
+ __isset.created_by = true;
+ }
+
+ bool operator == (const FileMetaData & rhs) const
+ {
+ if (!(version == rhs.version))
+ return false;
+ if (!(schema == rhs.schema))
+ return false;
+ if (!(num_rows == rhs.num_rows))
+ return false;
+ if (!(row_groups == rhs.row_groups))
+ return false;
+ if (__isset.key_value_metadata != rhs.__isset.key_value_metadata)
+ return false;
+ else if (__isset.key_value_metadata && !(key_value_metadata == rhs.key_value_metadata))
+ return false;
+ if (__isset.created_by != rhs.__isset.created_by)
+ return false;
+ else if (__isset.created_by && !(created_by == rhs.created_by))
+ return false;
+ return true;
+ }
+ bool operator != (const FileMetaData &rhs) const {
+ return !(*this == rhs);
+ }
+
+ bool operator < (const FileMetaData & ) const;
+
+ uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
+ uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
+
+};
+
+void swap(FileMetaData &a, FileMetaData &b);
+
+} // namespace
+
+#endif
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/parquet/util/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/src/parquet/util/CMakeLists.txt b/src/parquet/util/CMakeLists.txt
new file mode 100644
index 0000000..1a5de97
--- /dev/null
+++ b/src/parquet/util/CMakeLists.txt
@@ -0,0 +1,24 @@
+# Copyright 2015 Cloudera Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Headers: util
+install(FILES
+ bit-stream-utils.h
+ bit-stream-utils.inline.h
+ bit-util.h
+ compiler-util.h
+ logging.h
+ rle-encoding.h
+ stopwatch.h
+ DESTINATION include/parquet/util)
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/parquet/util/bit-stream-utils.h
----------------------------------------------------------------------
diff --git a/src/parquet/util/bit-stream-utils.h b/src/parquet/util/bit-stream-utils.h
new file mode 100644
index 0000000..7fba30a
--- /dev/null
+++ b/src/parquet/util/bit-stream-utils.h
@@ -0,0 +1,147 @@
+// Copyright 2012 Cloudera Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+#ifndef PARQUET_UTIL_BIT_STREAM_UTILS_H
+#define PARQUET_UTIL_BIT_STREAM_UTILS_H
+
+#include <string.h>
+#include <algorithm>
+#include <cstdint>
+
+#include "parquet/util/compiler-util.h"
+#include "parquet/util/bit-util.h"
+#include "parquet/util/logging.h"
+
+namespace parquet_cpp {
+
+// Utility class to write bit/byte streams. This class can write data to either be
+// bit packed or byte aligned (and a single stream that has a mix of both).
+// This class does not allocate memory.
+class BitWriter {
+ public:
+ // buffer: buffer to write bits to. Buffer should be preallocated with
+ // 'buffer_len' bytes.
+ BitWriter(uint8_t* buffer, int buffer_len) :
+ buffer_(buffer),
+ max_bytes_(buffer_len) {
+ Clear();
+ }
+
+ void Clear() {
+ buffered_values_ = 0;
+ byte_offset_ = 0;
+ bit_offset_ = 0;
+ }
+
+ // The number of current bytes written, including the current byte (i.e. may include a
+ // fraction of a byte). Includes buffered values.
+ int bytes_written() const { return byte_offset_ + BitUtil::Ceil(bit_offset_, 8); }
+ uint8_t* buffer() const { return buffer_; }
+ int buffer_len() const { return max_bytes_; }
+
+ // Writes a value to buffered_values_, flushing to buffer_ if necessary. This is bit
+ // packed. Returns false if there was not enough space. num_bits must be <= 32.
+ bool PutValue(uint64_t v, int num_bits);
+
+ // Writes v to the next aligned byte using num_bytes. If T is larger than num_bytes, the
+ // extra high-order bytes will be ignored. Returns false if there was not enough space.
+ template<typename T>
+ bool PutAligned(T v, int num_bytes);
+
+ // Write a Vlq encoded int to the buffer. Returns false if there was not enough
+ // room. The value is written byte aligned.
+ // For more details on vlq:
+ // en.wikipedia.org/wiki/Variable-length_quantity
+ bool PutVlqInt(uint32_t v);
+ bool PutZigZagVlqInt(int32_t v);
+
+ // Get a pointer to the next aligned byte and advance the underlying buffer
+ // by num_bytes.
+ // Returns NULL if there was not enough space.
+ uint8_t* GetNextBytePtr(int num_bytes = 1);
+
+ // Flushes all buffered values to the buffer. Call this when done writing to the buffer.
+ // If 'align' is true, buffered_values_ is reset and any future writes will be written
+ // to the next byte boundary.
+ void Flush(bool align = false);
+
+ private:
+ uint8_t* buffer_;
+ int max_bytes_;
+
+ // Bit-packed values are initially written to this variable before being memcpy'd to
+ // buffer_. This is faster than writing values byte by byte directly to buffer_.
+ uint64_t buffered_values_;
+
+ int byte_offset_; // Offset in buffer_
+ int bit_offset_; // Offset in buffered_values_
+};
+
+// Utility class to read bit/byte stream. This class can read bits or bytes
+// that are either byte aligned or not. It also has utilities to read multiple
+// bytes in one read (e.g. encoded int).
+class BitReader {
+ public:
+ // 'buffer' is the buffer to read from. The buffer's length is 'buffer_len'.
+ BitReader(const uint8_t* buffer, int buffer_len) :
+ buffer_(buffer),
+ max_bytes_(buffer_len),
+ byte_offset_(0),
+ bit_offset_(0) {
+ int num_bytes = std::min(8, max_bytes_ - byte_offset_);
+ memcpy(&buffered_values_, buffer_ + byte_offset_, num_bytes);
+ }
+
+ BitReader() : buffer_(NULL), max_bytes_(0) {}
+
+ // Gets the next value from the buffer. Returns true if 'v' could be read or false if
+ // there are not enough bytes left. num_bits must be <= 32.
+ template<typename T>
+ bool GetValue(int num_bits, T* v);
+
+ // Reads a 'num_bytes'-sized value from the buffer and stores it in 'v'. T needs to be a
+ // little-endian native type and big enough to store 'num_bytes'. The value is assumed
+ // to be byte-aligned so the stream will be advanced to the start of the next byte
+ // before 'v' is read. Returns false if there are not enough bytes left.
+ template<typename T>
+ bool GetAligned(int num_bytes, T* v);
+
+ // Reads a vlq encoded int from the stream. The encoded int must start at the
+ // beginning of a byte. Return false if there were not enough bytes in the buffer.
+ bool GetVlqInt(uint64_t* v);
+ bool GetZigZagVlqInt(int64_t* v);
+
+ // Returns the number of bytes left in the stream, not including the current byte (i.e.,
+ // there may be an additional fraction of a byte).
+ int bytes_left() { return max_bytes_ - (byte_offset_ + BitUtil::Ceil(bit_offset_, 8)); }
+
+ // Maximum byte length of a vlq encoded int
+ static const int MAX_VLQ_BYTE_LEN = 5;
+
+ private:
+ const uint8_t* buffer_;
+ int max_bytes_;
+
+ // Bytes are memcpy'd from buffer_ and values are read from this variable. This is
+ // faster than reading values byte by byte directly from buffer_.
+ uint64_t buffered_values_;
+
+ int byte_offset_; // Offset in buffer_
+ int bit_offset_; // Offset in buffered_values_
+};
+
+} // namespace parquet_cpp
+
+#endif
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/parquet/util/bit-stream-utils.inline.h
----------------------------------------------------------------------
diff --git a/src/parquet/util/bit-stream-utils.inline.h b/src/parquet/util/bit-stream-utils.inline.h
new file mode 100644
index 0000000..8678e50
--- /dev/null
+++ b/src/parquet/util/bit-stream-utils.inline.h
@@ -0,0 +1,164 @@
+// Copyright 2012 Cloudera Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+#ifndef PARQUET_UTIL_BIT_STREAM_UTILS_INLINE_H
+#define PARQUET_UTIL_BIT_STREAM_UTILS_INLINE_H
+
+#include "parquet/util/bit-stream-utils.h"
+
+namespace parquet_cpp {
+
+inline bool BitWriter::PutValue(uint64_t v, int num_bits) {
+ // TODO: revisit this limit if necessary (can be raised to 64 by fixing some edge cases)
+ DCHECK_LE(num_bits, 32);
+ DCHECK_EQ(v >> num_bits, 0) << "v = " << v << ", num_bits = " << num_bits;
+
+ if (UNLIKELY(byte_offset_ * 8 + bit_offset_ + num_bits > max_bytes_ * 8)) return false;
+
+ buffered_values_ |= v << bit_offset_;
+ bit_offset_ += num_bits;
+
+ if (UNLIKELY(bit_offset_ >= 64)) {
+ // Flush buffered_values_ and write out bits of v that did not fit
+ memcpy(buffer_ + byte_offset_, &buffered_values_, 8);
+ buffered_values_ = 0;
+ byte_offset_ += 8;
+ bit_offset_ -= 64;
+ buffered_values_ = v >> (num_bits - bit_offset_);
+ }
+ DCHECK_LT(bit_offset_, 64);
+ return true;
+}
+
+inline void BitWriter::Flush(bool align) {
+ int num_bytes = BitUtil::Ceil(bit_offset_, 8);
+ DCHECK_LE(byte_offset_ + num_bytes, max_bytes_);
+ memcpy(buffer_ + byte_offset_, &buffered_values_, num_bytes);
+
+ if (align) {
+ buffered_values_ = 0;
+ byte_offset_ += num_bytes;
+ bit_offset_ = 0;
+ }
+}
+
+inline uint8_t* BitWriter::GetNextBytePtr(int num_bytes) {
+ Flush(/* align */ true);
+ DCHECK_LE(byte_offset_, max_bytes_);
+ if (byte_offset_ + num_bytes > max_bytes_) return NULL;
+ uint8_t* ptr = buffer_ + byte_offset_;
+ byte_offset_ += num_bytes;
+ return ptr;
+}
+
+template<typename T>
+inline bool BitWriter::PutAligned(T val, int num_bytes) {
+ uint8_t* ptr = GetNextBytePtr(num_bytes);
+ if (ptr == NULL) return false;
+ memcpy(ptr, &val, num_bytes);
+ return true;
+}
+
+inline bool BitWriter::PutVlqInt(uint32_t v) {
+ bool result = true;
+ while ((v & 0xFFFFFF80) != 0L) {
+ result &= PutAligned<uint8_t>((v & 0x7F) | 0x80, 1);
+ v >>= 7;
+ }
+ result &= PutAligned<uint8_t>(v & 0x7F, 1);
+ return result;
+}
+
+inline bool BitWriter::PutZigZagVlqInt(int32_t v) {
+ uint32_t u = (v << 1) ^ (v >> 31);
+ return PutVlqInt(u);
+}
+
+template<typename T>
+inline bool BitReader::GetValue(int num_bits, T* v) {
+ // TODO: revisit this limit if necessary
+ DCHECK_LE(num_bits, 32);
+ DCHECK_LE(num_bits, sizeof(T) * 8);
+
+ if (UNLIKELY(byte_offset_ * 8 + bit_offset_ + num_bits > max_bytes_ * 8)) return false;
+
+ *v = BitUtil::TrailingBits(buffered_values_, bit_offset_ + num_bits) >> bit_offset_;
+
+ bit_offset_ += num_bits;
+ if (bit_offset_ >= 64) {
+ byte_offset_ += 8;
+ bit_offset_ -= 64;
+
+ int bytes_remaining = max_bytes_ - byte_offset_;
+ if (LIKELY(bytes_remaining >= 8)) {
+ memcpy(&buffered_values_, buffer_ + byte_offset_, 8);
+ } else {
+ memcpy(&buffered_values_, buffer_ + byte_offset_, bytes_remaining);
+ }
+
+ // Read bits of v that crossed into new buffered_values_
+ *v |= BitUtil::TrailingBits(buffered_values_, bit_offset_)
+ << (num_bits - bit_offset_);
+ }
+ DCHECK_LE(bit_offset_, 64);
+ return true;
+}
+
+template<typename T>
+inline bool BitReader::GetAligned(int num_bytes, T* v) {
+ DCHECK_LE(num_bytes, sizeof(T));
+ int bytes_read = BitUtil::Ceil(bit_offset_, 8);
+ if (UNLIKELY(byte_offset_ + bytes_read + num_bytes > max_bytes_)) return false;
+
+ // Advance byte_offset to next unread byte and read num_bytes
+ byte_offset_ += bytes_read;
+ memcpy(v, buffer_ + byte_offset_, num_bytes);
+ byte_offset_ += num_bytes;
+
+ // Reset buffered_values_
+ bit_offset_ = 0;
+ int bytes_remaining = max_bytes_ - byte_offset_;
+ if (LIKELY(bytes_remaining >= 8)) {
+ memcpy(&buffered_values_, buffer_ + byte_offset_, 8);
+ } else {
+ memcpy(&buffered_values_, buffer_ + byte_offset_, bytes_remaining);
+ }
+ return true;
+}
+
+inline bool BitReader::GetVlqInt(uint64_t* v) {
+ *v = 0;
+ int shift = 0;
+ int num_bytes = 0;
+ uint8_t byte = 0;
+ do {
+ if (!GetAligned<uint8_t>(1, &byte)) return false;
+ *v |= (byte & 0x7F) << shift;
+ shift += 7;
+ DCHECK_LE(++num_bytes, MAX_VLQ_BYTE_LEN);
+ } while ((byte & 0x80) != 0);
+ return true;
+}
+
+inline bool BitReader::GetZigZagVlqInt(int64_t* v) {
+ uint64_t u;
+ if (!GetVlqInt(&u)) return false;
+ *reinterpret_cast<uint64_t*>(v) = (u >> 1) ^ -(u & 1);
+ return true;
+}
+
+} // namespace parquet_cpp
+
+#endif // PARQUET_UTIL_BIT_STREAM_UTILS_INLINE_H
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/parquet/util/bit-util.h
----------------------------------------------------------------------
diff --git a/src/parquet/util/bit-util.h b/src/parquet/util/bit-util.h
new file mode 100644
index 0000000..3fbdbbe
--- /dev/null
+++ b/src/parquet/util/bit-util.h
@@ -0,0 +1,174 @@
+// Copyright 2012 Cloudera Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+#ifndef PARQUET_UTIL_BIT_UTIL_H
+#define PARQUET_UTIL_BIT_UTIL_H
+
+#if defined(__APPLE__)
+ #include <machine/endian.h>
+#else
+ #include <endian.h>
+#endif
+
+#include "parquet/util/compiler-util.h"
+#include "parquet/util/logging.h"
+
+namespace parquet_cpp {
+
+// Utility class to do standard bit tricks
+// TODO: is this in boost or something else like that?
+class BitUtil {
+ public:
+ // Returns the ceil of value/divisor
+ static inline int Ceil(int value, int divisor) {
+ return value / divisor + (value % divisor != 0);
+ }
+
+ // Returns 'value' rounded up to the nearest multiple of 'factor'
+ static inline int RoundUp(int value, int factor) {
+ return (value + (factor - 1)) / factor * factor;
+ }
+
+ // Returns 'value' rounded down to the nearest multiple of 'factor'
+ static inline int RoundDown(int value, int factor) {
+ return (value / factor) * factor;
+ }
+
+ // Returns the number of set bits in x
+ static inline int Popcount(uint64_t x) {
+ int count = 0;
+ for (; x != 0; ++count) x &= x-1;
+ return count;
+ }
+
+ // Returns the 'num_bits' least-significant bits of 'v'.
+ static inline uint64_t TrailingBits(uint64_t v, int num_bits) {
+ if (UNLIKELY(num_bits == 0)) return 0;
+ if (UNLIKELY(num_bits >= 64)) return v;
+ int n = 64 - num_bits;
+ return (v << n) >> n;
+ }
+
+ // Returns ceil(log2(x)).
+ // TODO: this could be faster if we use __builtin_clz. Fix this if this ever shows up
+ // in a hot path.
+ static inline int Log2(uint64_t x) {
+ if (x == 0) return 0;
+ // Compute result = ceil(log2(x))
+ // = floor(log2(x - 1)) + 1, for x > 1
+ // by finding the position of the most significant bit (1-indexed) of x - 1
+ // (floor(log2(n)) = MSB(n) (0-indexed))
+ --x;
+ int result = 1;
+ while (x >>= 1) ++result;
+ return result;
+ }
+
+ // Returns the minimum number of bits needed to represent the value of 'x'
+ static inline int NumRequiredBits(uint64_t x) {
+ for (int i = 63; i >= 0; --i) {
+ if (x & 1L << i) return i + 1;
+ }
+ return 0;
+ }
+
+ // Swaps the byte order (i.e. endianess)
+ static inline int64_t ByteSwap(int64_t value) {
+ return __builtin_bswap64(value);
+ }
+ static inline uint64_t ByteSwap(uint64_t value) {
+ return static_cast<uint64_t>(__builtin_bswap64(value));
+ }
+ static inline int32_t ByteSwap(int32_t value) {
+ return __builtin_bswap32(value);
+ }
+ static inline uint32_t ByteSwap(uint32_t value) {
+ return static_cast<uint32_t>(__builtin_bswap32(value));
+ }
+ static inline int16_t ByteSwap(int16_t value) {
+ return (((value >> 8) & 0xff) | ((value & 0xff) << 8));
+ }
+ static inline uint16_t ByteSwap(uint16_t value) {
+ return static_cast<uint16_t>(ByteSwap(static_cast<int16_t>(value)));
+ }
+
+ // Write the swapped bytes into dst. Src and st cannot overlap.
+ static inline void ByteSwap(void* dst, const void* src, int len) {
+ switch (len) {
+ case 1:
+ *reinterpret_cast<int8_t*>(dst) = *reinterpret_cast<const int8_t*>(src);
+ return;
+ case 2:
+ *reinterpret_cast<int16_t*>(dst) =
+ ByteSwap(*reinterpret_cast<const int16_t*>(src));
+ return;
+ case 4:
+ *reinterpret_cast<int32_t*>(dst) =
+ ByteSwap(*reinterpret_cast<const int32_t*>(src));
+ return;
+ case 8:
+ *reinterpret_cast<int64_t*>(dst) =
+ ByteSwap(*reinterpret_cast<const int64_t*>(src));
+ return;
+ default: break;
+ }
+
+ uint8_t* d = reinterpret_cast<uint8_t*>(dst);
+ const uint8_t* s = reinterpret_cast<const uint8_t*>(src);
+ for (int i = 0; i < len; ++i) {
+ d[i] = s[len - i - 1];
+ }
+ }
+
+ // Converts to big endian format (if not already in big endian) from the
+ // machine's native endian format.
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ static inline int64_t ToBigEndian(int64_t value) { return ByteSwap(value); }
+ static inline uint64_t ToBigEndian(uint64_t value) { return ByteSwap(value); }
+ static inline int32_t ToBigEndian(int32_t value) { return ByteSwap(value); }
+ static inline uint32_t ToBigEndian(uint32_t value) { return ByteSwap(value); }
+ static inline int16_t ToBigEndian(int16_t value) { return ByteSwap(value); }
+ static inline uint16_t ToBigEndian(uint16_t value) { return ByteSwap(value); }
+#else
+ static inline int64_t ToBigEndian(int64_t val) { return val; }
+ static inline uint64_t ToBigEndian(uint64_t val) { return val; }
+ static inline int32_t ToBigEndian(int32_t val) { return val; }
+ static inline uint32_t ToBigEndian(uint32_t val) { return val; }
+ static inline int16_t ToBigEndian(int16_t val) { return val; }
+ static inline uint16_t ToBigEndian(uint16_t val) { return val; }
+#endif
+
+ // Converts from big endian format to the machine's native endian format.
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ static inline int64_t FromBigEndian(int64_t value) { return ByteSwap(value); }
+ static inline uint64_t FromBigEndian(uint64_t value) { return ByteSwap(value); }
+ static inline int32_t FromBigEndian(int32_t value) { return ByteSwap(value); }
+ static inline uint32_t FromBigEndian(uint32_t value) { return ByteSwap(value); }
+ static inline int16_t FromBigEndian(int16_t value) { return ByteSwap(value); }
+ static inline uint16_t FromBigEndian(uint16_t value) { return ByteSwap(value); }
+#else
+ static inline int64_t FromBigEndian(int64_t val) { return val; }
+ static inline uint64_t FromBigEndian(uint64_t val) { return val; }
+ static inline int32_t FromBigEndian(int32_t val) { return val; }
+ static inline uint32_t FromBigEndian(uint32_t val) { return val; }
+ static inline int16_t FromBigEndian(int16_t val) { return val; }
+ static inline uint16_t FromBigEndian(uint16_t val) { return val; }
+#endif
+
+};
+
+} // namespace parquet_cpp
+
+#endif // PARQUET_UTIL_BIT_UTIL_H
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/parquet/util/compiler-util.h
----------------------------------------------------------------------
diff --git a/src/parquet/util/compiler-util.h b/src/parquet/util/compiler-util.h
new file mode 100644
index 0000000..6425247
--- /dev/null
+++ b/src/parquet/util/compiler-util.h
@@ -0,0 +1,37 @@
+// Copyright 2012 Cloudera Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+#ifndef PARQUET_UTIL_COMPILER_UTIL_H
+#define PARQUET_UTIL_COMPILER_UTIL_H
+
+// Compiler hint that this branch is likely or unlikely to
+// be taken. Take from the "What all programmers should know
+// about memory" paper.
+// example: if (LIKELY(size > 0)) { ... }
+// example: if (UNLIKELY(!status.ok())) { ... }
+#ifdef LIKELY
+#undef LIKELY
+#endif
+
+#ifdef UNLIKELY
+#undef UNLIKELY
+#endif
+
+#define LIKELY(expr) __builtin_expect(!!(expr), 1)
+#define UNLIKELY(expr) __builtin_expect(!!(expr), 0)
+
+#define PREFETCH(addr) __builtin_prefetch(addr)
+
+#endif // PARQUET_UTIL_COMPILER_UTIL_H
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/parquet/util/logging.h
----------------------------------------------------------------------
diff --git a/src/parquet/util/logging.h b/src/parquet/util/logging.h
new file mode 100644
index 0000000..c6e6303
--- /dev/null
+++ b/src/parquet/util/logging.h
@@ -0,0 +1,31 @@
+// Copyright 2012 Cloudera Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+#ifndef PARQUET_UTIL_LOGGING_H
+#define PARQUET_UTIL_LOGGING_H
+
+#include <iostream>
+
+#define DCHECK(condition) while (false) std::cout
+#define DCHECK_EQ(a, b) while (false) std::cout
+#define DCHECK_NE(a, b) while (false) std::cout
+#define DCHECK_GT(a, b) while (false) std::cout
+#define DCHECK_LT(a, b) while (false) std::cout
+#define DCHECK_GE(a, b) while (false) std::cout
+#define DCHECK_LE(a, b) while (false) std::cout
+// Similar to how glog defines DCHECK for release.
+#define LOG(level) while (false) std::cout
+
+#endif
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/parquet/util/rle-encoding.h
----------------------------------------------------------------------
diff --git a/src/parquet/util/rle-encoding.h b/src/parquet/util/rle-encoding.h
new file mode 100644
index 0000000..b074d6d
--- /dev/null
+++ b/src/parquet/util/rle-encoding.h
@@ -0,0 +1,419 @@
+// Copyright 2012 Cloudera Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef PARQUET_UTIL_RLE_ENCODING_H
+#define PARQUET_UTIL_RLE_ENCODING_H
+
+#include <math.h>
+#include <algorithm>
+
+#include "parquet/util/compiler-util.h"
+#include "parquet/util/bit-stream-utils.inline.h"
+#include "parquet/util/bit-util.h"
+#include "parquet/util/logging.h"
+
+namespace parquet_cpp {
+
+// Utility classes to do run length encoding (RLE) for fixed bit width values. If runs
+// are sufficiently long, RLE is used, otherwise, the values are just bit-packed
+// (literal encoding).
+// For both types of runs, there is a byte-aligned indicator which encodes the length
+// of the run and the type of the run.
+// This encoding has the benefit that when there aren't any long enough runs, values
+// are always decoded at fixed (can be precomputed) bit offsets OR both the value and
+// the run length are byte aligned. This allows for very efficient decoding
+// implementations.
+// The encoding is:
+// encoded-block := run*
+// run := literal-run | repeated-run
+// literal-run := literal-indicator < literal bytes >
+// repeated-run := repeated-indicator < repeated value. padded to byte boundary >
+// literal-indicator := varint_encode( number_of_groups << 1 | 1)
+// repeated-indicator := varint_encode( number_of_repetitions << 1 )
+//
+// Each run is preceded by a varint. The varint's least significant bit is
+// used to indicate whether the run is a literal run or a repeated run. The rest
+// of the varint is used to determine the length of the run (eg how many times the
+// value repeats).
+//
+// In the case of literal runs, the run length is always a multiple of 8 (i.e. encode
+// in groups of 8), so that no matter the bit-width of the value, the sequence will end
+// on a byte boundary without padding.
+// Given that we know it is a multiple of 8, we store the number of 8-groups rather than
+// the actual number of encoded ints. (This means that the total number of encoded values
+// can not be determined from the encoded data, since the number of values in the last
+// group may not be a multiple of 8). For the last group of literal runs, we pad
+// the group to 8 with zeros. This allows for 8 at a time decoding on the read side
+// without the need for additional checks.
+//
+// There is a break-even point when it is more storage efficient to do run length
+// encoding. For 1 bit-width values, that point is 8 values. They require 2 bytes
+// for both the repeated encoding or the literal encoding. This value can always
+// be computed based on the bit-width.
+// TODO: think about how to use this for strings. The bit packing isn't quite the same.
+//
+// Examples with bit-width 1 (eg encoding booleans):
+// ----------------------------------------
+// 100 1s followed by 100 0s:
+// <varint(100 << 1)> <1, padded to 1 byte> <varint(100 << 1)> <0, padded to 1 byte>
+// - (total 4 bytes)
+//
+// alternating 1s and 0s (200 total):
+// 200 ints = 25 groups of 8
+// <varint((25 << 1) | 1)> <25 bytes of values, bitpacked>
+// (total 26 bytes, 1 byte overhead)
+//
+
+// Decoder class for RLE encoded data.
+class RleDecoder {
+ public:
+ // Create a decoder object. buffer/buffer_len is the decoded data.
+ // bit_width is the width of each value (before encoding).
+ RleDecoder(const uint8_t* buffer, int buffer_len, int bit_width)
+ : bit_reader_(buffer, buffer_len),
+ bit_width_(bit_width),
+ current_value_(0),
+ repeat_count_(0),
+ literal_count_(0) {
+ DCHECK_GE(bit_width_, 0);
+ DCHECK_LE(bit_width_, 64);
+ }
+
+ RleDecoder() {}
+
+ // Gets the next value. Returns false if there are no more.
+ template<typename T>
+ bool Get(T* val);
+
+ private:
+ BitReader bit_reader_;
+ int bit_width_;
+ uint64_t current_value_;
+ uint32_t repeat_count_;
+ uint32_t literal_count_;
+};
+
+// Class to incrementally build the rle data. This class does not allocate any memory.
+// The encoding has two modes: encoding repeated runs and literal runs.
+// If the run is sufficiently short, it is more efficient to encode as a literal run.
+// This class does so by buffering 8 values at a time. If they are not all the same
+// they are added to the literal run. If they are the same, they are added to the
+// repeated run. When we switch modes, the previous run is flushed out.
+class RleEncoder {
+ public:
+ // buffer/buffer_len: preallocated output buffer.
+ // bit_width: max number of bits for value.
+ // TODO: consider adding a min_repeated_run_length so the caller can control
+ // when values should be encoded as repeated runs. Currently this is derived
+ // based on the bit_width, which can determine a storage optimal choice.
+ // TODO: allow 0 bit_width (and have dict encoder use it)
+ RleEncoder(uint8_t* buffer, int buffer_len, int bit_width)
+ : bit_width_(bit_width),
+ bit_writer_(buffer, buffer_len) {
+ DCHECK_GE(bit_width_, 1);
+ DCHECK_LE(bit_width_, 64);
+ max_run_byte_size_ = MinBufferSize(bit_width);
+ DCHECK_GE(buffer_len, max_run_byte_size_) << "Input buffer not big enough.";
+ Clear();
+ }
+
+ // Returns the minimum buffer size needed to use the encoder for 'bit_width'
+ // This is the maximum length of a single run for 'bit_width'.
+ // It is not valid to pass a buffer less than this length.
+ static int MinBufferSize(int bit_width) {
+ // 1 indicator byte and MAX_VALUES_PER_LITERAL_RUN 'bit_width' values.
+ int max_literal_run_size = 1 +
+ BitUtil::Ceil(MAX_VALUES_PER_LITERAL_RUN * bit_width, 8);
+ // Up to MAX_VLQ_BYTE_LEN indicator and a single 'bit_width' value.
+ int max_repeated_run_size = BitReader::MAX_VLQ_BYTE_LEN + BitUtil::Ceil(bit_width, 8);
+ return std::max(max_literal_run_size, max_repeated_run_size);
+ }
+
+ // Returns the maximum byte size it could take to encode 'num_values'.
+ static int MaxBufferSize(int bit_width, int num_values) {
+ int bytes_per_run = BitUtil::Ceil(bit_width * MAX_VALUES_PER_LITERAL_RUN, 8.0);
+ int num_runs = BitUtil::Ceil(num_values, MAX_VALUES_PER_LITERAL_RUN);
+ int literal_max_size = num_runs + num_runs * bytes_per_run;
+ int min_run_size = MinBufferSize(bit_width);
+ return std::max(min_run_size, literal_max_size) + min_run_size;
+ }
+
+ // Encode value. Returns true if the value fits in buffer, false otherwise.
+ // This value must be representable with bit_width_ bits.
+ bool Put(uint64_t value);
+
+ // Flushes any pending values to the underlying buffer.
+ // Returns the total number of bytes written
+ int Flush();
+
+ // Resets all the state in the encoder.
+ void Clear();
+
+ // Returns pointer to underlying buffer
+ uint8_t* buffer() { return bit_writer_.buffer(); }
+ int32_t len() { return bit_writer_.bytes_written(); }
+
+ private:
+ // Flushes any buffered values. If this is part of a repeated run, this is largely
+ // a no-op.
+ // If it is part of a literal run, this will call FlushLiteralRun, which writes
+ // out the buffered literal values.
+ // If 'done' is true, the current run would be written even if it would normally
+ // have been buffered more. This should only be called at the end, when the
+ // encoder has received all values even if it would normally continue to be
+ // buffered.
+ void FlushBufferedValues(bool done);
+
+ // Flushes literal values to the underlying buffer. If update_indicator_byte,
+ // then the current literal run is complete and the indicator byte is updated.
+ void FlushLiteralRun(bool update_indicator_byte);
+
+ // Flushes a repeated run to the underlying buffer.
+ void FlushRepeatedRun();
+
+ // Checks and sets buffer_full_. This must be called after flushing a run to
+ // make sure there are enough bytes remaining to encode the next run.
+ void CheckBufferFull();
+
+ // The maximum number of values in a single literal run
+ // (number of groups encodable by a 1-byte indicator * 8)
+ static const int MAX_VALUES_PER_LITERAL_RUN = (1 << 6) * 8;
+
+ // Number of bits needed to encode the value.
+ const int bit_width_;
+
+ // Underlying buffer.
+ BitWriter bit_writer_;
+
+ // If true, the buffer is full and subsequent Put()'s will fail.
+ bool buffer_full_;
+
+ // The maximum byte size a single run can take.
+ int max_run_byte_size_;
+
+ // We need to buffer at most 8 values for literals. This happens when the
+ // bit_width is 1 (so 8 values fit in one byte).
+ // TODO: generalize this to other bit widths
+ int64_t buffered_values_[8];
+
+ // Number of values in buffered_values_
+ int num_buffered_values_;
+
+ // The current (also last) value that was written and the count of how
+ // many times in a row that value has been seen. This is maintained even
+ // if we are in a literal run. If the repeat_count_ get high enough, we switch
+ // to encoding repeated runs.
+ int64_t current_value_;
+ int repeat_count_;
+
+ // Number of literals in the current run. This does not include the literals
+ // that might be in buffered_values_. Only after we've got a group big enough
+ // can we decide if they should part of the literal_count_ or repeat_count_
+ int literal_count_;
+
+ // Pointer to a byte in the underlying buffer that stores the indicator byte.
+ // This is reserved as soon as we need a literal run but the value is written
+ // when the literal run is complete.
+ uint8_t* literal_indicator_byte_;
+};
+
+template<typename T>
+inline bool RleDecoder::Get(T* val) {
+ if (UNLIKELY(literal_count_ == 0 && repeat_count_ == 0)) {
+ // Read the next run's indicator int, it could be a literal or repeated run
+ // The int is encoded as a vlq-encoded value.
+ uint64_t indicator_value = 0;
+ bool result = bit_reader_.GetVlqInt(&indicator_value);
+ if (!result) return false;
+
+ // lsb indicates if it is a literal run or repeated run
+ bool is_literal = indicator_value & 1;
+ if (is_literal) {
+ literal_count_ = (indicator_value >> 1) * 8;
+ } else {
+ repeat_count_ = indicator_value >> 1;
+ bool result = bit_reader_.GetAligned<T>(
+ BitUtil::Ceil(bit_width_, 8), reinterpret_cast<T*>(¤t_value_));
+ DCHECK(result);
+ }
+ }
+
+ if (LIKELY(repeat_count_ > 0)) {
+ *val = current_value_;
+ --repeat_count_;
+ } else {
+ DCHECK(literal_count_ > 0);
+ bool result = bit_reader_.GetValue(bit_width_, val);
+ DCHECK(result);
+ --literal_count_;
+ }
+
+ return true;
+}
+
+// This function buffers input values 8 at a time. After seeing all 8 values,
+// it decides whether they should be encoded as a literal or repeated run.
+inline bool RleEncoder::Put(uint64_t value) {
+ DCHECK(bit_width_ == 64 || value < (1LL << bit_width_));
+ if (UNLIKELY(buffer_full_)) return false;
+
+ if (LIKELY(current_value_ == value)) {
+ ++repeat_count_;
+ if (repeat_count_ > 8) {
+ // This is just a continuation of the current run, no need to buffer the
+ // values.
+ // Note that this is the fast path for long repeated runs.
+ return true;
+ }
+ } else {
+ if (repeat_count_ >= 8) {
+ // We had a run that was long enough but it has ended. Flush the
+ // current repeated run.
+ DCHECK_EQ(literal_count_, 0);
+ FlushRepeatedRun();
+ }
+ repeat_count_ = 1;
+ current_value_ = value;
+ }
+
+ buffered_values_[num_buffered_values_] = value;
+ if (++num_buffered_values_ == 8) {
+ DCHECK_EQ(literal_count_ % 8, 0);
+ FlushBufferedValues(false);
+ }
+ return true;
+}
+
+inline void RleEncoder::FlushLiteralRun(bool update_indicator_byte) {
+ if (literal_indicator_byte_ == NULL) {
+ // The literal indicator byte has not been reserved yet, get one now.
+ literal_indicator_byte_ = bit_writer_.GetNextBytePtr();
+ DCHECK(literal_indicator_byte_ != NULL);
+ }
+
+ // Write all the buffered values as bit packed literals
+ for (int i = 0; i < num_buffered_values_; ++i) {
+ bool success = bit_writer_.PutValue(buffered_values_[i], bit_width_);
+ DCHECK(success) << "There is a bug in using CheckBufferFull()";
+ }
+ num_buffered_values_ = 0;
+
+ if (update_indicator_byte) {
+ // At this point we need to write the indicator byte for the literal run.
+ // We only reserve one byte, to allow for streaming writes of literal values.
+ // The logic makes sure we flush literal runs often enough to not overrun
+ // the 1 byte.
+ DCHECK_EQ(literal_count_ % 8, 0);
+ int num_groups = literal_count_ / 8;
+ int32_t indicator_value = (num_groups << 1) | 1;
+ DCHECK_EQ(indicator_value & 0xFFFFFF00, 0);
+ *literal_indicator_byte_ = indicator_value;
+ literal_indicator_byte_ = NULL;
+ literal_count_ = 0;
+ CheckBufferFull();
+ }
+}
+
+inline void RleEncoder::FlushRepeatedRun() {
+ DCHECK_GT(repeat_count_, 0);
+ bool result = true;
+ // The lsb of 0 indicates this is a repeated run
+ int32_t indicator_value = repeat_count_ << 1 | 0;
+ result &= bit_writer_.PutVlqInt(indicator_value);
+ result &= bit_writer_.PutAligned(current_value_, BitUtil::Ceil(bit_width_, 8));
+ DCHECK(result);
+ num_buffered_values_ = 0;
+ repeat_count_ = 0;
+ CheckBufferFull();
+}
+
+// Flush the values that have been buffered. At this point we decide whether
+// we need to switch between the run types or continue the current one.
+inline void RleEncoder::FlushBufferedValues(bool done) {
+ if (repeat_count_ >= 8) {
+ // Clear the buffered values. They are part of the repeated run now and we
+ // don't want to flush them out as literals.
+ num_buffered_values_ = 0;
+ if (literal_count_ != 0) {
+ // There was a current literal run. All the values in it have been flushed
+ // but we still need to update the indicator byte.
+ DCHECK_EQ(literal_count_ % 8, 0);
+ DCHECK_EQ(repeat_count_, 8);
+ FlushLiteralRun(true);
+ }
+ DCHECK_EQ(literal_count_, 0);
+ return;
+ }
+
+ literal_count_ += num_buffered_values_;
+ DCHECK_EQ(literal_count_ % 8, 0);
+ int num_groups = literal_count_ / 8;
+ if (num_groups + 1 >= (1 << 6)) {
+ // We need to start a new literal run because the indicator byte we've reserved
+ // cannot store more values.
+ DCHECK(literal_indicator_byte_ != NULL);
+ FlushLiteralRun(true);
+ } else {
+ FlushLiteralRun(done);
+ }
+ repeat_count_ = 0;
+}
+
+inline int RleEncoder::Flush() {
+ if (literal_count_ > 0 || repeat_count_ > 0 || num_buffered_values_ > 0) {
+ bool all_repeat = literal_count_ == 0 &&
+ (repeat_count_ == num_buffered_values_ || num_buffered_values_ == 0);
+ // There is something pending, figure out if it's a repeated or literal run
+ if (repeat_count_ > 0 && all_repeat) {
+ FlushRepeatedRun();
+ } else {
+ DCHECK_EQ(literal_count_ % 8, 0);
+ // Buffer the last group of literals to 8 by padding with 0s.
+ for (; num_buffered_values_ != 0 && num_buffered_values_ < 8;
+ ++num_buffered_values_) {
+ buffered_values_[num_buffered_values_] = 0;
+ }
+ literal_count_ += num_buffered_values_;
+ FlushLiteralRun(true);
+ repeat_count_ = 0;
+ }
+ }
+ bit_writer_.Flush();
+ DCHECK_EQ(num_buffered_values_, 0);
+ DCHECK_EQ(literal_count_, 0);
+ DCHECK_EQ(repeat_count_, 0);
+
+ return bit_writer_.bytes_written();
+}
+
+inline void RleEncoder::CheckBufferFull() {
+ int bytes_written = bit_writer_.bytes_written();
+ if (bytes_written + max_run_byte_size_ > bit_writer_.buffer_len()) {
+ buffer_full_ = true;
+ }
+}
+
+inline void RleEncoder::Clear() {
+ buffer_full_ = false;
+ current_value_ = 0;
+ repeat_count_ = 0;
+ num_buffered_values_ = 0;
+ literal_count_ = 0;
+ literal_indicator_byte_ = NULL;
+ bit_writer_.Clear();
+}
+
+} // namespace parquet_cpp
+
+#endif // PARQUET_UTIL_RLE_ENCODING_H
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/parquet/util/stopwatch.h
----------------------------------------------------------------------
diff --git a/src/parquet/util/stopwatch.h b/src/parquet/util/stopwatch.h
new file mode 100644
index 0000000..10ed9e9
--- /dev/null
+++ b/src/parquet/util/stopwatch.h
@@ -0,0 +1,49 @@
+// Copyright 2012 Cloudera Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef PARQUET_UTIL_STOPWATCH_H
+#define PARQUET_UTIL_STOPWATCH_H
+
+#include <iostream>
+#include <stdio.h>
+#include <ctime>
+#include <sys/time.h>
+
+namespace parquet_cpp {
+
+class StopWatch {
+ public:
+ StopWatch() {
+ }
+
+ void Start() {
+ gettimeofday(&start_time, 0);
+ }
+
+ // Returns time in nanoseconds.
+ uint64_t Stop() {
+ struct timeval t_time;
+ gettimeofday(&t_time, 0);
+
+ return (1000L * 1000L * 1000L * (t_time.tv_sec - start_time.tv_sec)
+ + (t_time.tv_usec - start_time.tv_usec));
+ }
+
+ private:
+ struct timeval start_time;
+};
+
+} // namespace parquet_cpp
+
+#endif
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/util/stopwatch.h
----------------------------------------------------------------------
diff --git a/src/util/stopwatch.h b/src/util/stopwatch.h
deleted file mode 100644
index 145f130..0000000
--- a/src/util/stopwatch.h
+++ /dev/null
@@ -1,49 +0,0 @@
-// Copyright 2012 Cloudera Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef PARQUET_UTIL_STOPWATCH_H
-#define PARQUET_UTIL_STOPWATCH_H
-
-#include <iostream>
-#include <stdio.h>
-#include <ctime>
-#include <sys/time.h>
-
-namespace parquet_cpp {
-
-class StopWatch {
- public:
- StopWatch() {
- }
-
- void Start() {
- gettimeofday(&start_time, 0);
- }
-
- // Returns time in nanoseconds.
- uint64_t Stop() {
- struct timeval t_time;
- gettimeofday(&t_time, 0);
-
- return (1000L * 1000L * 1000L * (t_time.tv_sec - start_time.tv_sec)
- + (t_time.tv_usec - start_time.tv_usec));
- }
-
- private:
- struct timeval start_time;
-};
-
-}
-
-#endif
[4/7] parquet-cpp git commit: PARQUET-416: C++11 compilation,
code reorg, libparquet and installation targets
Posted by no...@apache.org.
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/generated/gen-cpp/parquet_types.h
----------------------------------------------------------------------
diff --git a/generated/gen-cpp/parquet_types.h b/generated/gen-cpp/parquet_types.h
deleted file mode 100644
index 4360d02..0000000
--- a/generated/gen-cpp/parquet_types.h
+++ /dev/null
@@ -1,1123 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.0)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- * @generated
- */
-#ifndef parquet_TYPES_H
-#define parquet_TYPES_H
-
-#include <thrift/Thrift.h>
-#include <thrift/TApplicationException.h>
-#include <thrift/protocol/TProtocol.h>
-#include <thrift/transport/TTransport.h>
-
-
-
-namespace parquet {
-
-struct Type {
- enum type {
- BOOLEAN = 0,
- INT32 = 1,
- INT64 = 2,
- INT96 = 3,
- FLOAT = 4,
- DOUBLE = 5,
- BYTE_ARRAY = 6,
- FIXED_LEN_BYTE_ARRAY = 7
- };
-};
-
-extern const std::map<int, const char*> _Type_VALUES_TO_NAMES;
-
-struct ConvertedType {
- enum type {
- UTF8 = 0,
- MAP = 1,
- MAP_KEY_VALUE = 2,
- LIST = 3,
- ENUM = 4,
- DECIMAL = 5
- };
-};
-
-extern const std::map<int, const char*> _ConvertedType_VALUES_TO_NAMES;
-
-struct FieldRepetitionType {
- enum type {
- REQUIRED = 0,
- OPTIONAL = 1,
- REPEATED = 2
- };
-};
-
-extern const std::map<int, const char*> _FieldRepetitionType_VALUES_TO_NAMES;
-
-struct Encoding {
- enum type {
- PLAIN = 0,
- PLAIN_DICTIONARY = 2,
- RLE = 3,
- BIT_PACKED = 4,
- DELTA_BINARY_PACKED = 5,
- DELTA_LENGTH_BYTE_ARRAY = 6,
- DELTA_BYTE_ARRAY = 7,
- RLE_DICTIONARY = 8
- };
-};
-
-extern const std::map<int, const char*> _Encoding_VALUES_TO_NAMES;
-
-struct CompressionCodec {
- enum type {
- UNCOMPRESSED = 0,
- SNAPPY = 1,
- GZIP = 2,
- LZO = 3
- };
-};
-
-extern const std::map<int, const char*> _CompressionCodec_VALUES_TO_NAMES;
-
-struct PageType {
- enum type {
- DATA_PAGE = 0,
- INDEX_PAGE = 1,
- DICTIONARY_PAGE = 2,
- DATA_PAGE_V2 = 3
- };
-};
-
-extern const std::map<int, const char*> _PageType_VALUES_TO_NAMES;
-
-typedef struct _Statistics__isset {
- _Statistics__isset() : max(false), min(false), null_count(false), distinct_count(false) {}
- bool max;
- bool min;
- bool null_count;
- bool distinct_count;
-} _Statistics__isset;
-
-class Statistics {
- public:
-
- static const char* ascii_fingerprint; // = "CE004821871820DD79A8FD98BB101F6D";
- static const uint8_t binary_fingerprint[16]; // = {0xCE,0x00,0x48,0x21,0x87,0x18,0x20,0xDD,0x79,0xA8,0xFD,0x98,0xBB,0x10,0x1F,0x6D};
-
- Statistics() : max(), min(), null_count(0), distinct_count(0) {
- }
-
- virtual ~Statistics() throw() {}
-
- std::string max;
- std::string min;
- int64_t null_count;
- int64_t distinct_count;
-
- _Statistics__isset __isset;
-
- void __set_max(const std::string& val) {
- max = val;
- __isset.max = true;
- }
-
- void __set_min(const std::string& val) {
- min = val;
- __isset.min = true;
- }
-
- void __set_null_count(const int64_t val) {
- null_count = val;
- __isset.null_count = true;
- }
-
- void __set_distinct_count(const int64_t val) {
- distinct_count = val;
- __isset.distinct_count = true;
- }
-
- bool operator == (const Statistics & rhs) const
- {
- if (__isset.max != rhs.__isset.max)
- return false;
- else if (__isset.max && !(max == rhs.max))
- return false;
- if (__isset.min != rhs.__isset.min)
- return false;
- else if (__isset.min && !(min == rhs.min))
- return false;
- if (__isset.null_count != rhs.__isset.null_count)
- return false;
- else if (__isset.null_count && !(null_count == rhs.null_count))
- return false;
- if (__isset.distinct_count != rhs.__isset.distinct_count)
- return false;
- else if (__isset.distinct_count && !(distinct_count == rhs.distinct_count))
- return false;
- return true;
- }
- bool operator != (const Statistics &rhs) const {
- return !(*this == rhs);
- }
-
- bool operator < (const Statistics & ) const;
-
- uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
- uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
-
-};
-
-void swap(Statistics &a, Statistics &b);
-
-typedef struct _SchemaElement__isset {
- _SchemaElement__isset() : type(false), type_length(false), repetition_type(false), num_children(false), converted_type(false), scale(false), precision(false) {}
- bool type;
- bool type_length;
- bool repetition_type;
- bool num_children;
- bool converted_type;
- bool scale;
- bool precision;
-} _SchemaElement__isset;
-
-class SchemaElement {
- public:
-
- static const char* ascii_fingerprint; // = "388A784401753800444CFEAC8BC1B1A1";
- static const uint8_t binary_fingerprint[16]; // = {0x38,0x8A,0x78,0x44,0x01,0x75,0x38,0x00,0x44,0x4C,0xFE,0xAC,0x8B,0xC1,0xB1,0xA1};
-
- SchemaElement() : type((Type::type)0), type_length(0), repetition_type((FieldRepetitionType::type)0), name(), num_children(0), converted_type((ConvertedType::type)0), scale(0), precision(0) {
- }
-
- virtual ~SchemaElement() throw() {}
-
- Type::type type;
- int32_t type_length;
- FieldRepetitionType::type repetition_type;
- std::string name;
- int32_t num_children;
- ConvertedType::type converted_type;
- int32_t scale;
- int32_t precision;
-
- _SchemaElement__isset __isset;
-
- void __set_type(const Type::type val) {
- type = val;
- __isset.type = true;
- }
-
- void __set_type_length(const int32_t val) {
- type_length = val;
- __isset.type_length = true;
- }
-
- void __set_repetition_type(const FieldRepetitionType::type val) {
- repetition_type = val;
- __isset.repetition_type = true;
- }
-
- void __set_name(const std::string& val) {
- name = val;
- }
-
- void __set_num_children(const int32_t val) {
- num_children = val;
- __isset.num_children = true;
- }
-
- void __set_converted_type(const ConvertedType::type val) {
- converted_type = val;
- __isset.converted_type = true;
- }
-
- void __set_scale(const int32_t val) {
- scale = val;
- __isset.scale = true;
- }
-
- void __set_precision(const int32_t val) {
- precision = val;
- __isset.precision = true;
- }
-
- bool operator == (const SchemaElement & rhs) const
- {
- if (__isset.type != rhs.__isset.type)
- return false;
- else if (__isset.type && !(type == rhs.type))
- return false;
- if (__isset.type_length != rhs.__isset.type_length)
- return false;
- else if (__isset.type_length && !(type_length == rhs.type_length))
- return false;
- if (__isset.repetition_type != rhs.__isset.repetition_type)
- return false;
- else if (__isset.repetition_type && !(repetition_type == rhs.repetition_type))
- return false;
- if (!(name == rhs.name))
- return false;
- if (__isset.num_children != rhs.__isset.num_children)
- return false;
- else if (__isset.num_children && !(num_children == rhs.num_children))
- return false;
- if (__isset.converted_type != rhs.__isset.converted_type)
- return false;
- else if (__isset.converted_type && !(converted_type == rhs.converted_type))
- return false;
- if (__isset.scale != rhs.__isset.scale)
- return false;
- else if (__isset.scale && !(scale == rhs.scale))
- return false;
- if (__isset.precision != rhs.__isset.precision)
- return false;
- else if (__isset.precision && !(precision == rhs.precision))
- return false;
- return true;
- }
- bool operator != (const SchemaElement &rhs) const {
- return !(*this == rhs);
- }
-
- bool operator < (const SchemaElement & ) const;
-
- uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
- uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
-
-};
-
-void swap(SchemaElement &a, SchemaElement &b);
-
-typedef struct _DataPageHeader__isset {
- _DataPageHeader__isset() : statistics(false) {}
- bool statistics;
-} _DataPageHeader__isset;
-
-class DataPageHeader {
- public:
-
- static const char* ascii_fingerprint; // = "5FC1792B0483E9C984475384165040B1";
- static const uint8_t binary_fingerprint[16]; // = {0x5F,0xC1,0x79,0x2B,0x04,0x83,0xE9,0xC9,0x84,0x47,0x53,0x84,0x16,0x50,0x40,0xB1};
-
- DataPageHeader() : num_values(0), encoding((Encoding::type)0), definition_level_encoding((Encoding::type)0), repetition_level_encoding((Encoding::type)0) {
- }
-
- virtual ~DataPageHeader() throw() {}
-
- int32_t num_values;
- Encoding::type encoding;
- Encoding::type definition_level_encoding;
- Encoding::type repetition_level_encoding;
- Statistics statistics;
-
- _DataPageHeader__isset __isset;
-
- void __set_num_values(const int32_t val) {
- num_values = val;
- }
-
- void __set_encoding(const Encoding::type val) {
- encoding = val;
- }
-
- void __set_definition_level_encoding(const Encoding::type val) {
- definition_level_encoding = val;
- }
-
- void __set_repetition_level_encoding(const Encoding::type val) {
- repetition_level_encoding = val;
- }
-
- void __set_statistics(const Statistics& val) {
- statistics = val;
- __isset.statistics = true;
- }
-
- bool operator == (const DataPageHeader & rhs) const
- {
- if (!(num_values == rhs.num_values))
- return false;
- if (!(encoding == rhs.encoding))
- return false;
- if (!(definition_level_encoding == rhs.definition_level_encoding))
- return false;
- if (!(repetition_level_encoding == rhs.repetition_level_encoding))
- return false;
- if (__isset.statistics != rhs.__isset.statistics)
- return false;
- else if (__isset.statistics && !(statistics == rhs.statistics))
- return false;
- return true;
- }
- bool operator != (const DataPageHeader &rhs) const {
- return !(*this == rhs);
- }
-
- bool operator < (const DataPageHeader & ) const;
-
- uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
- uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
-
-};
-
-void swap(DataPageHeader &a, DataPageHeader &b);
-
-
-class IndexPageHeader {
- public:
-
- static const char* ascii_fingerprint; // = "99914B932BD37A50B983C5E7C90AE93B";
- static const uint8_t binary_fingerprint[16]; // = {0x99,0x91,0x4B,0x93,0x2B,0xD3,0x7A,0x50,0xB9,0x83,0xC5,0xE7,0xC9,0x0A,0xE9,0x3B};
-
- IndexPageHeader() {
- }
-
- virtual ~IndexPageHeader() throw() {}
-
-
- bool operator == (const IndexPageHeader & /* rhs */) const
- {
- return true;
- }
- bool operator != (const IndexPageHeader &rhs) const {
- return !(*this == rhs);
- }
-
- bool operator < (const IndexPageHeader & ) const;
-
- uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
- uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
-
-};
-
-void swap(IndexPageHeader &a, IndexPageHeader &b);
-
-typedef struct _DictionaryPageHeader__isset {
- _DictionaryPageHeader__isset() : is_sorted(false) {}
- bool is_sorted;
-} _DictionaryPageHeader__isset;
-
-class DictionaryPageHeader {
- public:
-
- static const char* ascii_fingerprint; // = "B149E4528254D495610C22AE4BD539C5";
- static const uint8_t binary_fingerprint[16]; // = {0xB1,0x49,0xE4,0x52,0x82,0x54,0xD4,0x95,0x61,0x0C,0x22,0xAE,0x4B,0xD5,0x39,0xC5};
-
- DictionaryPageHeader() : num_values(0), encoding((Encoding::type)0), is_sorted(0) {
- }
-
- virtual ~DictionaryPageHeader() throw() {}
-
- int32_t num_values;
- Encoding::type encoding;
- bool is_sorted;
-
- _DictionaryPageHeader__isset __isset;
-
- void __set_num_values(const int32_t val) {
- num_values = val;
- }
-
- void __set_encoding(const Encoding::type val) {
- encoding = val;
- }
-
- void __set_is_sorted(const bool val) {
- is_sorted = val;
- __isset.is_sorted = true;
- }
-
- bool operator == (const DictionaryPageHeader & rhs) const
- {
- if (!(num_values == rhs.num_values))
- return false;
- if (!(encoding == rhs.encoding))
- return false;
- if (__isset.is_sorted != rhs.__isset.is_sorted)
- return false;
- else if (__isset.is_sorted && !(is_sorted == rhs.is_sorted))
- return false;
- return true;
- }
- bool operator != (const DictionaryPageHeader &rhs) const {
- return !(*this == rhs);
- }
-
- bool operator < (const DictionaryPageHeader & ) const;
-
- uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
- uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
-
-};
-
-void swap(DictionaryPageHeader &a, DictionaryPageHeader &b);
-
-typedef struct _DataPageHeaderV2__isset {
- _DataPageHeaderV2__isset() : is_compressed(true), statistics(false) {}
- bool is_compressed;
- bool statistics;
-} _DataPageHeaderV2__isset;
-
-class DataPageHeaderV2 {
- public:
-
- static const char* ascii_fingerprint; // = "69FF2F6BD1A443440D5E46ABA5A3A919";
- static const uint8_t binary_fingerprint[16]; // = {0x69,0xFF,0x2F,0x6B,0xD1,0xA4,0x43,0x44,0x0D,0x5E,0x46,0xAB,0xA5,0xA3,0xA9,0x19};
-
- DataPageHeaderV2() : num_values(0), num_nulls(0), num_rows(0), encoding((Encoding::type)0), definition_levels_byte_length(0), repetition_levels_byte_length(0), is_compressed(true) {
- }
-
- virtual ~DataPageHeaderV2() throw() {}
-
- int32_t num_values;
- int32_t num_nulls;
- int32_t num_rows;
- Encoding::type encoding;
- int32_t definition_levels_byte_length;
- int32_t repetition_levels_byte_length;
- bool is_compressed;
- Statistics statistics;
-
- _DataPageHeaderV2__isset __isset;
-
- void __set_num_values(const int32_t val) {
- num_values = val;
- }
-
- void __set_num_nulls(const int32_t val) {
- num_nulls = val;
- }
-
- void __set_num_rows(const int32_t val) {
- num_rows = val;
- }
-
- void __set_encoding(const Encoding::type val) {
- encoding = val;
- }
-
- void __set_definition_levels_byte_length(const int32_t val) {
- definition_levels_byte_length = val;
- }
-
- void __set_repetition_levels_byte_length(const int32_t val) {
- repetition_levels_byte_length = val;
- }
-
- void __set_is_compressed(const bool val) {
- is_compressed = val;
- __isset.is_compressed = true;
- }
-
- void __set_statistics(const Statistics& val) {
- statistics = val;
- __isset.statistics = true;
- }
-
- bool operator == (const DataPageHeaderV2 & rhs) const
- {
- if (!(num_values == rhs.num_values))
- return false;
- if (!(num_nulls == rhs.num_nulls))
- return false;
- if (!(num_rows == rhs.num_rows))
- return false;
- if (!(encoding == rhs.encoding))
- return false;
- if (!(definition_levels_byte_length == rhs.definition_levels_byte_length))
- return false;
- if (!(repetition_levels_byte_length == rhs.repetition_levels_byte_length))
- return false;
- if (__isset.is_compressed != rhs.__isset.is_compressed)
- return false;
- else if (__isset.is_compressed && !(is_compressed == rhs.is_compressed))
- return false;
- if (__isset.statistics != rhs.__isset.statistics)
- return false;
- else if (__isset.statistics && !(statistics == rhs.statistics))
- return false;
- return true;
- }
- bool operator != (const DataPageHeaderV2 &rhs) const {
- return !(*this == rhs);
- }
-
- bool operator < (const DataPageHeaderV2 & ) const;
-
- uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
- uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
-
-};
-
-void swap(DataPageHeaderV2 &a, DataPageHeaderV2 &b);
-
-typedef struct _PageHeader__isset {
- _PageHeader__isset() : crc(false), data_page_header(false), index_page_header(false), dictionary_page_header(false), data_page_header_v2(false) {}
- bool crc;
- bool data_page_header;
- bool index_page_header;
- bool dictionary_page_header;
- bool data_page_header_v2;
-} _PageHeader__isset;
-
-class PageHeader {
- public:
-
- static const char* ascii_fingerprint; // = "B5BD2BDF3756C883A58B30B9C9F204A0";
- static const uint8_t binary_fingerprint[16]; // = {0xB5,0xBD,0x2B,0xDF,0x37,0x56,0xC8,0x83,0xA5,0x8B,0x30,0xB9,0xC9,0xF2,0x04,0xA0};
-
- PageHeader() : type((PageType::type)0), uncompressed_page_size(0), compressed_page_size(0), crc(0) {
- }
-
- virtual ~PageHeader() throw() {}
-
- PageType::type type;
- int32_t uncompressed_page_size;
- int32_t compressed_page_size;
- int32_t crc;
- DataPageHeader data_page_header;
- IndexPageHeader index_page_header;
- DictionaryPageHeader dictionary_page_header;
- DataPageHeaderV2 data_page_header_v2;
-
- _PageHeader__isset __isset;
-
- void __set_type(const PageType::type val) {
- type = val;
- }
-
- void __set_uncompressed_page_size(const int32_t val) {
- uncompressed_page_size = val;
- }
-
- void __set_compressed_page_size(const int32_t val) {
- compressed_page_size = val;
- }
-
- void __set_crc(const int32_t val) {
- crc = val;
- __isset.crc = true;
- }
-
- void __set_data_page_header(const DataPageHeader& val) {
- data_page_header = val;
- __isset.data_page_header = true;
- }
-
- void __set_index_page_header(const IndexPageHeader& val) {
- index_page_header = val;
- __isset.index_page_header = true;
- }
-
- void __set_dictionary_page_header(const DictionaryPageHeader& val) {
- dictionary_page_header = val;
- __isset.dictionary_page_header = true;
- }
-
- void __set_data_page_header_v2(const DataPageHeaderV2& val) {
- data_page_header_v2 = val;
- __isset.data_page_header_v2 = true;
- }
-
- bool operator == (const PageHeader & rhs) const
- {
- if (!(type == rhs.type))
- return false;
- if (!(uncompressed_page_size == rhs.uncompressed_page_size))
- return false;
- if (!(compressed_page_size == rhs.compressed_page_size))
- return false;
- if (__isset.crc != rhs.__isset.crc)
- return false;
- else if (__isset.crc && !(crc == rhs.crc))
- return false;
- if (__isset.data_page_header != rhs.__isset.data_page_header)
- return false;
- else if (__isset.data_page_header && !(data_page_header == rhs.data_page_header))
- return false;
- if (__isset.index_page_header != rhs.__isset.index_page_header)
- return false;
- else if (__isset.index_page_header && !(index_page_header == rhs.index_page_header))
- return false;
- if (__isset.dictionary_page_header != rhs.__isset.dictionary_page_header)
- return false;
- else if (__isset.dictionary_page_header && !(dictionary_page_header == rhs.dictionary_page_header))
- return false;
- if (__isset.data_page_header_v2 != rhs.__isset.data_page_header_v2)
- return false;
- else if (__isset.data_page_header_v2 && !(data_page_header_v2 == rhs.data_page_header_v2))
- return false;
- return true;
- }
- bool operator != (const PageHeader &rhs) const {
- return !(*this == rhs);
- }
-
- bool operator < (const PageHeader & ) const;
-
- uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
- uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
-
-};
-
-void swap(PageHeader &a, PageHeader &b);
-
-typedef struct _KeyValue__isset {
- _KeyValue__isset() : value(false) {}
- bool value;
-} _KeyValue__isset;
-
-class KeyValue {
- public:
-
- static const char* ascii_fingerprint; // = "5B708A954C550ECA9C1A49D3C5CAFAB9";
- static const uint8_t binary_fingerprint[16]; // = {0x5B,0x70,0x8A,0x95,0x4C,0x55,0x0E,0xCA,0x9C,0x1A,0x49,0xD3,0xC5,0xCA,0xFA,0xB9};
-
- KeyValue() : key(), value() {
- }
-
- virtual ~KeyValue() throw() {}
-
- std::string key;
- std::string value;
-
- _KeyValue__isset __isset;
-
- void __set_key(const std::string& val) {
- key = val;
- }
-
- void __set_value(const std::string& val) {
- value = val;
- __isset.value = true;
- }
-
- bool operator == (const KeyValue & rhs) const
- {
- if (!(key == rhs.key))
- return false;
- if (__isset.value != rhs.__isset.value)
- return false;
- else if (__isset.value && !(value == rhs.value))
- return false;
- return true;
- }
- bool operator != (const KeyValue &rhs) const {
- return !(*this == rhs);
- }
-
- bool operator < (const KeyValue & ) const;
-
- uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
- uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
-
-};
-
-void swap(KeyValue &a, KeyValue &b);
-
-
-class SortingColumn {
- public:
-
- static const char* ascii_fingerprint; // = "F079C2D58A783AD90F9BE05D10DBBC6F";
- static const uint8_t binary_fingerprint[16]; // = {0xF0,0x79,0xC2,0xD5,0x8A,0x78,0x3A,0xD9,0x0F,0x9B,0xE0,0x5D,0x10,0xDB,0xBC,0x6F};
-
- SortingColumn() : column_idx(0), descending(0), nulls_first(0) {
- }
-
- virtual ~SortingColumn() throw() {}
-
- int32_t column_idx;
- bool descending;
- bool nulls_first;
-
- void __set_column_idx(const int32_t val) {
- column_idx = val;
- }
-
- void __set_descending(const bool val) {
- descending = val;
- }
-
- void __set_nulls_first(const bool val) {
- nulls_first = val;
- }
-
- bool operator == (const SortingColumn & rhs) const
- {
- if (!(column_idx == rhs.column_idx))
- return false;
- if (!(descending == rhs.descending))
- return false;
- if (!(nulls_first == rhs.nulls_first))
- return false;
- return true;
- }
- bool operator != (const SortingColumn &rhs) const {
- return !(*this == rhs);
- }
-
- bool operator < (const SortingColumn & ) const;
-
- uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
- uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
-
-};
-
-void swap(SortingColumn &a, SortingColumn &b);
-
-typedef struct _ColumnMetaData__isset {
- _ColumnMetaData__isset() : key_value_metadata(false), index_page_offset(false), dictionary_page_offset(false), statistics(false) {}
- bool key_value_metadata;
- bool index_page_offset;
- bool dictionary_page_offset;
- bool statistics;
-} _ColumnMetaData__isset;
-
-class ColumnMetaData {
- public:
-
- static const char* ascii_fingerprint; // = "1AF797732BCB4465C6314FB29B86638D";
- static const uint8_t binary_fingerprint[16]; // = {0x1A,0xF7,0x97,0x73,0x2B,0xCB,0x44,0x65,0xC6,0x31,0x4F,0xB2,0x9B,0x86,0x63,0x8D};
-
- ColumnMetaData() : type((Type::type)0), codec((CompressionCodec::type)0), num_values(0), total_uncompressed_size(0), total_compressed_size(0), data_page_offset(0), index_page_offset(0), dictionary_page_offset(0) {
- }
-
- virtual ~ColumnMetaData() throw() {}
-
- Type::type type;
- std::vector<Encoding::type> encodings;
- std::vector<std::string> path_in_schema;
- CompressionCodec::type codec;
- int64_t num_values;
- int64_t total_uncompressed_size;
- int64_t total_compressed_size;
- std::vector<KeyValue> key_value_metadata;
- int64_t data_page_offset;
- int64_t index_page_offset;
- int64_t dictionary_page_offset;
- Statistics statistics;
-
- _ColumnMetaData__isset __isset;
-
- void __set_type(const Type::type val) {
- type = val;
- }
-
- void __set_encodings(const std::vector<Encoding::type> & val) {
- encodings = val;
- }
-
- void __set_path_in_schema(const std::vector<std::string> & val) {
- path_in_schema = val;
- }
-
- void __set_codec(const CompressionCodec::type val) {
- codec = val;
- }
-
- void __set_num_values(const int64_t val) {
- num_values = val;
- }
-
- void __set_total_uncompressed_size(const int64_t val) {
- total_uncompressed_size = val;
- }
-
- void __set_total_compressed_size(const int64_t val) {
- total_compressed_size = val;
- }
-
- void __set_key_value_metadata(const std::vector<KeyValue> & val) {
- key_value_metadata = val;
- __isset.key_value_metadata = true;
- }
-
- void __set_data_page_offset(const int64_t val) {
- data_page_offset = val;
- }
-
- void __set_index_page_offset(const int64_t val) {
- index_page_offset = val;
- __isset.index_page_offset = true;
- }
-
- void __set_dictionary_page_offset(const int64_t val) {
- dictionary_page_offset = val;
- __isset.dictionary_page_offset = true;
- }
-
- void __set_statistics(const Statistics& val) {
- statistics = val;
- __isset.statistics = true;
- }
-
- bool operator == (const ColumnMetaData & rhs) const
- {
- if (!(type == rhs.type))
- return false;
- if (!(encodings == rhs.encodings))
- return false;
- if (!(path_in_schema == rhs.path_in_schema))
- return false;
- if (!(codec == rhs.codec))
- return false;
- if (!(num_values == rhs.num_values))
- return false;
- if (!(total_uncompressed_size == rhs.total_uncompressed_size))
- return false;
- if (!(total_compressed_size == rhs.total_compressed_size))
- return false;
- if (__isset.key_value_metadata != rhs.__isset.key_value_metadata)
- return false;
- else if (__isset.key_value_metadata && !(key_value_metadata == rhs.key_value_metadata))
- return false;
- if (!(data_page_offset == rhs.data_page_offset))
- return false;
- if (__isset.index_page_offset != rhs.__isset.index_page_offset)
- return false;
- else if (__isset.index_page_offset && !(index_page_offset == rhs.index_page_offset))
- return false;
- if (__isset.dictionary_page_offset != rhs.__isset.dictionary_page_offset)
- return false;
- else if (__isset.dictionary_page_offset && !(dictionary_page_offset == rhs.dictionary_page_offset))
- return false;
- if (__isset.statistics != rhs.__isset.statistics)
- return false;
- else if (__isset.statistics && !(statistics == rhs.statistics))
- return false;
- return true;
- }
- bool operator != (const ColumnMetaData &rhs) const {
- return !(*this == rhs);
- }
-
- bool operator < (const ColumnMetaData & ) const;
-
- uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
- uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
-
-};
-
-void swap(ColumnMetaData &a, ColumnMetaData &b);
-
-typedef struct _ColumnChunk__isset {
- _ColumnChunk__isset() : file_path(false), meta_data(false) {}
- bool file_path;
- bool meta_data;
-} _ColumnChunk__isset;
-
-class ColumnChunk {
- public:
-
- static const char* ascii_fingerprint; // = "169FC47057EF3D82E2FACDDEC2641AE8";
- static const uint8_t binary_fingerprint[16]; // = {0x16,0x9F,0xC4,0x70,0x57,0xEF,0x3D,0x82,0xE2,0xFA,0xCD,0xDE,0xC2,0x64,0x1A,0xE8};
-
- ColumnChunk() : file_path(), file_offset(0) {
- }
-
- virtual ~ColumnChunk() throw() {}
-
- std::string file_path;
- int64_t file_offset;
- ColumnMetaData meta_data;
-
- _ColumnChunk__isset __isset;
-
- void __set_file_path(const std::string& val) {
- file_path = val;
- __isset.file_path = true;
- }
-
- void __set_file_offset(const int64_t val) {
- file_offset = val;
- }
-
- void __set_meta_data(const ColumnMetaData& val) {
- meta_data = val;
- __isset.meta_data = true;
- }
-
- bool operator == (const ColumnChunk & rhs) const
- {
- if (__isset.file_path != rhs.__isset.file_path)
- return false;
- else if (__isset.file_path && !(file_path == rhs.file_path))
- return false;
- if (!(file_offset == rhs.file_offset))
- return false;
- if (__isset.meta_data != rhs.__isset.meta_data)
- return false;
- else if (__isset.meta_data && !(meta_data == rhs.meta_data))
- return false;
- return true;
- }
- bool operator != (const ColumnChunk &rhs) const {
- return !(*this == rhs);
- }
-
- bool operator < (const ColumnChunk & ) const;
-
- uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
- uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
-
-};
-
-void swap(ColumnChunk &a, ColumnChunk &b);
-
-typedef struct _RowGroup__isset {
- _RowGroup__isset() : sorting_columns(false) {}
- bool sorting_columns;
-} _RowGroup__isset;
-
-class RowGroup {
- public:
-
- static const char* ascii_fingerprint; // = "DC7968627FA826DDC4C6C9BE773586C9";
- static const uint8_t binary_fingerprint[16]; // = {0xDC,0x79,0x68,0x62,0x7F,0xA8,0x26,0xDD,0xC4,0xC6,0xC9,0xBE,0x77,0x35,0x86,0xC9};
-
- RowGroup() : total_byte_size(0), num_rows(0) {
- }
-
- virtual ~RowGroup() throw() {}
-
- std::vector<ColumnChunk> columns;
- int64_t total_byte_size;
- int64_t num_rows;
- std::vector<SortingColumn> sorting_columns;
-
- _RowGroup__isset __isset;
-
- void __set_columns(const std::vector<ColumnChunk> & val) {
- columns = val;
- }
-
- void __set_total_byte_size(const int64_t val) {
- total_byte_size = val;
- }
-
- void __set_num_rows(const int64_t val) {
- num_rows = val;
- }
-
- void __set_sorting_columns(const std::vector<SortingColumn> & val) {
- sorting_columns = val;
- __isset.sorting_columns = true;
- }
-
- bool operator == (const RowGroup & rhs) const
- {
- if (!(columns == rhs.columns))
- return false;
- if (!(total_byte_size == rhs.total_byte_size))
- return false;
- if (!(num_rows == rhs.num_rows))
- return false;
- if (__isset.sorting_columns != rhs.__isset.sorting_columns)
- return false;
- else if (__isset.sorting_columns && !(sorting_columns == rhs.sorting_columns))
- return false;
- return true;
- }
- bool operator != (const RowGroup &rhs) const {
- return !(*this == rhs);
- }
-
- bool operator < (const RowGroup & ) const;
-
- uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
- uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
-
-};
-
-void swap(RowGroup &a, RowGroup &b);
-
-typedef struct _FileMetaData__isset {
- _FileMetaData__isset() : key_value_metadata(false), created_by(false) {}
- bool key_value_metadata;
- bool created_by;
-} _FileMetaData__isset;
-
-class FileMetaData {
- public:
-
- static const char* ascii_fingerprint; // = "44DC7D83A66D54A7B7892A985C4125C9";
- static const uint8_t binary_fingerprint[16]; // = {0x44,0xDC,0x7D,0x83,0xA6,0x6D,0x54,0xA7,0xB7,0x89,0x2A,0x98,0x5C,0x41,0x25,0xC9};
-
- FileMetaData() : version(0), num_rows(0), created_by() {
- }
-
- virtual ~FileMetaData() throw() {}
-
- int32_t version;
- std::vector<SchemaElement> schema;
- int64_t num_rows;
- std::vector<RowGroup> row_groups;
- std::vector<KeyValue> key_value_metadata;
- std::string created_by;
-
- _FileMetaData__isset __isset;
-
- void __set_version(const int32_t val) {
- version = val;
- }
-
- void __set_schema(const std::vector<SchemaElement> & val) {
- schema = val;
- }
-
- void __set_num_rows(const int64_t val) {
- num_rows = val;
- }
-
- void __set_row_groups(const std::vector<RowGroup> & val) {
- row_groups = val;
- }
-
- void __set_key_value_metadata(const std::vector<KeyValue> & val) {
- key_value_metadata = val;
- __isset.key_value_metadata = true;
- }
-
- void __set_created_by(const std::string& val) {
- created_by = val;
- __isset.created_by = true;
- }
-
- bool operator == (const FileMetaData & rhs) const
- {
- if (!(version == rhs.version))
- return false;
- if (!(schema == rhs.schema))
- return false;
- if (!(num_rows == rhs.num_rows))
- return false;
- if (!(row_groups == rhs.row_groups))
- return false;
- if (__isset.key_value_metadata != rhs.__isset.key_value_metadata)
- return false;
- else if (__isset.key_value_metadata && !(key_value_metadata == rhs.key_value_metadata))
- return false;
- if (__isset.created_by != rhs.__isset.created_by)
- return false;
- else if (__isset.created_by && !(created_by == rhs.created_by))
- return false;
- return true;
- }
- bool operator != (const FileMetaData &rhs) const {
- return !(*this == rhs);
- }
-
- bool operator < (const FileMetaData & ) const;
-
- uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
- uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
-
-};
-
-void swap(FileMetaData &a, FileMetaData &b);
-
-} // namespace
-
-#endif
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
deleted file mode 100644
index 82725d7..0000000
--- a/src/CMakeLists.txt
+++ /dev/null
@@ -1,19 +0,0 @@
-# Copyright 2012 Cloudera Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-add_library(Parquet STATIC
- parquet.cc
-)
-
-add_subdirectory(compression)
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/compression/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/src/compression/CMakeLists.txt b/src/compression/CMakeLists.txt
deleted file mode 100644
index c8f0d2b..0000000
--- a/src/compression/CMakeLists.txt
+++ /dev/null
@@ -1,18 +0,0 @@
-# Copyright 2012 Cloudera Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-add_library(ParquetCompression STATIC
- lz4-codec.cc
- snappy-codec.cc
-)
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/compression/codec.h
----------------------------------------------------------------------
diff --git a/src/compression/codec.h b/src/compression/codec.h
deleted file mode 100644
index 4ce0139..0000000
--- a/src/compression/codec.h
+++ /dev/null
@@ -1,72 +0,0 @@
-// Copyright 2012 Cloudera Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef PARQUET_COMPRESSION_CODEC_H
-#define PARQUET_COMPRESSION_CODEC_H
-
-#include "parquet/parquet.h"
-
-#include <boost/cstdint.hpp>
-#include "gen-cpp/parquet_constants.h"
-#include "gen-cpp/parquet_types.h"
-
-namespace parquet_cpp {
-
-class Codec {
- public:
- virtual ~Codec() {}
- virtual void Decompress(int input_len, const uint8_t* input,
- int output_len, uint8_t* output_buffer) = 0;
-
- virtual int Compress(int input_len, const uint8_t* input,
- int output_buffer_len, uint8_t* output_buffer) = 0;
-
- virtual int MaxCompressedLen(int input_len, const uint8_t* input) = 0;
-
- virtual const char* name() const = 0;
-};
-
-
-// Snappy codec.
-class SnappyCodec : public Codec {
- public:
- virtual void Decompress(int input_len, const uint8_t* input,
- int output_len, uint8_t* output_buffer);
-
- virtual int Compress(int input_len, const uint8_t* input,
- int output_buffer_len, uint8_t* output_buffer);
-
- virtual int MaxCompressedLen(int input_len, const uint8_t* input);
-
- virtual const char* name() const { return "snappy"; }
-};
-
-// Lz4 codec.
-class Lz4Codec : public Codec {
- public:
- virtual void Decompress(int input_len, const uint8_t* input,
- int output_len, uint8_t* output_buffer);
-
- virtual int Compress(int input_len, const uint8_t* input,
- int output_buffer_len, uint8_t* output_buffer);
-
- virtual int MaxCompressedLen(int input_len, const uint8_t* input);
-
- virtual const char* name() const { return "lz4"; }
-};
-
-}
-
-#endif
-
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/compression/lz4-codec.cc
----------------------------------------------------------------------
diff --git a/src/compression/lz4-codec.cc b/src/compression/lz4-codec.cc
deleted file mode 100644
index 8b8588c..0000000
--- a/src/compression/lz4-codec.cc
+++ /dev/null
@@ -1,38 +0,0 @@
-// Copyright 2012 Cloudera Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "codec.h"
-
-#include <lz4.h>
-
-using namespace parquet_cpp;
-
-void Lz4Codec::Decompress(int input_len, const uint8_t* input,
- int output_len, uint8_t* output_buffer) {
- int n = LZ4_uncompress(reinterpret_cast<const char*>(input),
- reinterpret_cast<char*>(output_buffer), output_len);
- if (n != input_len) {
- throw ParquetException("Corrupt lz4 compressed data.");
- }
-}
-
-int Lz4Codec::MaxCompressedLen(int input_len, const uint8_t* input) {
- return LZ4_compressBound(input_len);
-}
-
-int Lz4Codec::Compress(int input_len, const uint8_t* input,
- int output_buffer_len, uint8_t* output_buffer) {
- return LZ4_compress(reinterpret_cast<const char*>(input),
- reinterpret_cast<char*>(output_buffer), input_len);
-}
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/compression/snappy-codec.cc
----------------------------------------------------------------------
diff --git a/src/compression/snappy-codec.cc b/src/compression/snappy-codec.cc
deleted file mode 100644
index 96d6559..0000000
--- a/src/compression/snappy-codec.cc
+++ /dev/null
@@ -1,40 +0,0 @@
-// Copyright 2012 Cloudera Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "codec.h"
-
-#include <snappy.h>
-
-using namespace parquet_cpp;
-
-void SnappyCodec::Decompress(int input_len, const uint8_t* input,
- int output_len, uint8_t* output_buffer) {
- if (!snappy::RawUncompress(reinterpret_cast<const char*>(input),
- static_cast<size_t>(input_len), reinterpret_cast<char*>(output_buffer))) {
- throw ParquetException("Corrupt snappy compressed data.");
- }
-}
-
-int SnappyCodec::MaxCompressedLen(int input_len, const uint8_t* input) {
- return snappy::MaxCompressedLength(input_len);
-}
-
-int SnappyCodec::Compress(int input_len, const uint8_t* input,
- int output_buffer_len, uint8_t* output_buffer) {
- size_t output_len;
- snappy::RawCompress(reinterpret_cast<const char*>(input),
- static_cast<size_t>(input_len), reinterpret_cast<char*>(output_buffer),
- &output_len);
- return output_len;
-}
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/encodings/bool-encoding.h
----------------------------------------------------------------------
diff --git a/src/encodings/bool-encoding.h b/src/encodings/bool-encoding.h
deleted file mode 100644
index 1cccd1d..0000000
--- a/src/encodings/bool-encoding.h
+++ /dev/null
@@ -1,47 +0,0 @@
-// Copyright 2012 Cloudera Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef PARQUET_BOOL_ENCODING_H
-#define PARQUET_BOOL_ENCODING_H
-
-#include "encodings.h"
-
-namespace parquet_cpp {
-
-class BoolDecoder : public Decoder {
- public:
- BoolDecoder() : Decoder(parquet::Type::BOOLEAN, parquet::Encoding::PLAIN) { }
-
- virtual void SetData(int num_values, const uint8_t* data, int len) {
- num_values_ = num_values;
- decoder_ = impala::RleDecoder(data, len, 1);
- }
-
- virtual int GetBool(bool* buffer, int max_values) {
- max_values = std::min(max_values, num_values_);
- for (int i = 0; i < max_values; ++i) {
- if (!decoder_.Get(&buffer[i])) ParquetException::EofException();
- }
- num_values_ -= max_values;
- return max_values;
- }
-
- private:
- impala::RleDecoder decoder_;
-};
-
-}
-
-#endif
-
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/encodings/delta-bit-pack-encoding.h
----------------------------------------------------------------------
diff --git a/src/encodings/delta-bit-pack-encoding.h b/src/encodings/delta-bit-pack-encoding.h
deleted file mode 100644
index 12de07a..0000000
--- a/src/encodings/delta-bit-pack-encoding.h
+++ /dev/null
@@ -1,114 +0,0 @@
-// Copyright 2012 Cloudera Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef PARQUET_DELTA_BIT_PACK_ENCODING_H
-#define PARQUET_DELTA_BIT_PACK_ENCODING_H
-
-#include "encodings.h"
-
-namespace parquet_cpp {
-
-class DeltaBitPackDecoder : public Decoder {
- public:
- DeltaBitPackDecoder(const parquet::Type::type& type)
- : Decoder(type, parquet::Encoding::DELTA_BINARY_PACKED) {
- if (type != parquet::Type::INT32 && type != parquet::Type::INT64) {
- throw ParquetException("Delta bit pack encoding should only be for integer data.");
- }
- }
-
- virtual void SetData(int num_values, const uint8_t* data, int len) {
- num_values_ = num_values;
- decoder_ = impala::BitReader(data, len);
- values_current_block_ = 0;
- values_current_mini_block_ = 0;
- }
-
- virtual int GetInt32(int32_t* buffer, int max_values) {
- return GetInternal(buffer, max_values);
- }
-
- virtual int GetInt64(int64_t* buffer, int max_values) {
- return GetInternal(buffer, max_values);
- }
-
- private:
- void InitBlock() {
- uint64_t block_size;
- if (!decoder_.GetVlqInt(&block_size)) ParquetException::EofException();
- if (!decoder_.GetVlqInt(&num_mini_blocks_)) ParquetException::EofException();
- if (!decoder_.GetVlqInt(&values_current_block_)) {
- ParquetException::EofException();
- }
- if (!decoder_.GetZigZagVlqInt(&last_value_)) ParquetException::EofException();
- delta_bit_widths_.resize(num_mini_blocks_);
-
- if (!decoder_.GetZigZagVlqInt(&min_delta_)) ParquetException::EofException();
- for (int i = 0; i < num_mini_blocks_; ++i) {
- if (!decoder_.GetAligned<uint8_t>(1, &delta_bit_widths_[i])) {
- ParquetException::EofException();
- }
- }
- values_per_mini_block_ = block_size / num_mini_blocks_;
- mini_block_idx_ = 0;
- delta_bit_width_ = delta_bit_widths_[0];
- values_current_mini_block_ = values_per_mini_block_;
- }
-
- template <typename T>
- int GetInternal(T* buffer, int max_values) {
- max_values = std::min(max_values, num_values_);
- for (int i = 0; i < max_values; ++i) {
- if (UNLIKELY(values_current_mini_block_ == 0)) {
- ++mini_block_idx_;
- if (mini_block_idx_ < delta_bit_widths_.size()) {
- delta_bit_width_ = delta_bit_widths_[mini_block_idx_];
- values_current_mini_block_ = values_per_mini_block_;
- } else {
- InitBlock();
- buffer[i] = last_value_;
- continue;
- }
- }
-
- // TODO: the key to this algorithm is to decode the entire miniblock at once.
- int64_t delta;
- if (!decoder_.GetValue(delta_bit_width_, &delta)) ParquetException::EofException();
- delta += min_delta_;
- last_value_ += delta;
- buffer[i] = last_value_;
- --values_current_mini_block_;
- }
- num_values_ -= max_values;
- return max_values;
- }
-
- impala::BitReader decoder_;
- uint64_t values_current_block_;
- uint64_t num_mini_blocks_;
- uint64_t values_per_mini_block_;
- uint64_t values_current_mini_block_;
-
- int64_t min_delta_;
- int mini_block_idx_;
- std::vector<uint8_t> delta_bit_widths_;
- int delta_bit_width_;
-
- int64_t last_value_;
-};
-
-}
-
-#endif
-
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/encodings/delta-byte-array-encoding.h
----------------------------------------------------------------------
diff --git a/src/encodings/delta-byte-array-encoding.h b/src/encodings/delta-byte-array-encoding.h
deleted file mode 100644
index cdbbfde..0000000
--- a/src/encodings/delta-byte-array-encoding.h
+++ /dev/null
@@ -1,73 +0,0 @@
-// Copyright 2012 Cloudera Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef PARQUET_DELTA_BYTE_ARRAY_ENCODING_H
-#define PARQUET_DELTA_BYTE_ARRAY_ENCODING_H
-
-#include "encodings.h"
-
-namespace parquet_cpp {
-
-class DeltaByteArrayDecoder : public Decoder {
- public:
- DeltaByteArrayDecoder()
- : Decoder(parquet::Type::BYTE_ARRAY, parquet::Encoding::DELTA_BYTE_ARRAY),
- prefix_len_decoder_(parquet::Type::INT32),
- suffix_decoder_() {
- }
-
- virtual void SetData(int num_values, const uint8_t* data, int len) {
- num_values_ = num_values;
- if (len == 0) return;
- int prefix_len_length = *reinterpret_cast<const int*>(data);
- data += 4;
- len -= 4;
- prefix_len_decoder_.SetData(num_values, data, prefix_len_length);
- data += prefix_len_length;
- len -= prefix_len_length;
- suffix_decoder_.SetData(num_values, data, len);
- }
-
- // TODO: this doesn't work and requires memory management. We need to allocate
- // new strings to store the results.
- virtual int GetByteArray(ByteArray* buffer, int max_values) {
- max_values = std::min(max_values, num_values_);
- for (int i = 0; i < max_values; ++i) {
- int prefix_len = 0;
- prefix_len_decoder_.GetInt32(&prefix_len, 1);
- ByteArray suffix;
- suffix_decoder_.GetByteArray(&suffix, 1);
- buffer[i].len = prefix_len + suffix.len;
-
- uint8_t* result = reinterpret_cast<uint8_t*>(malloc(buffer[i].len));
- memcpy(result, last_value_.ptr, prefix_len);
- memcpy(result + prefix_len, suffix.ptr, suffix.len);
-
- buffer[i].ptr = result;
- last_value_ = buffer[i];
- }
- num_values_ -= max_values;
- return max_values;
- }
-
- private:
- DeltaBitPackDecoder prefix_len_decoder_;
- DeltaLengthByteArrayDecoder suffix_decoder_;
- ByteArray last_value_;
-};
-
-}
-
-#endif
-
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/encodings/delta-length-byte-array-encoding.h
----------------------------------------------------------------------
diff --git a/src/encodings/delta-length-byte-array-encoding.h b/src/encodings/delta-length-byte-array-encoding.h
deleted file mode 100644
index d6f018c..0000000
--- a/src/encodings/delta-length-byte-array-encoding.h
+++ /dev/null
@@ -1,62 +0,0 @@
-// Copyright 2012 Cloudera Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef PARQUET_DELTA_LENGTH_BYTE_ARRAY_ENCODING_H
-#define PARQUET_DELTA_LENGTH_BYTE_ARRAY_ENCODING_H
-
-#include "encodings.h"
-
-namespace parquet_cpp {
-
-class DeltaLengthByteArrayDecoder : public Decoder {
- public:
- DeltaLengthByteArrayDecoder()
- : Decoder(parquet::Type::BYTE_ARRAY, parquet::Encoding::DELTA_LENGTH_BYTE_ARRAY),
- len_decoder_(parquet::Type::INT32) {
- }
-
- virtual void SetData(int num_values, const uint8_t* data, int len) {
- num_values_ = num_values;
- if (len == 0) return;
- int total_lengths_len = *reinterpret_cast<const int*>(data);
- data += 4;
- len_decoder_.SetData(num_values, data, total_lengths_len);
- data_ = data + total_lengths_len;
- len_ = len - 4 - total_lengths_len;
- }
-
- virtual int GetByteArray(ByteArray* buffer, int max_values) {
- max_values = std::min(max_values, num_values_);
- int lengths[max_values];
- len_decoder_.GetInt32(lengths, max_values);
- for (int i = 0; i < max_values; ++i) {
- buffer[i].len = lengths[i];
- buffer[i].ptr = data_;
- data_ += lengths[i];
- len_ -= lengths[i];
- }
- num_values_ -= max_values;
- return max_values;
- }
-
- private:
- DeltaBitPackDecoder len_decoder_;
- const uint8_t* data_;
- int len_;
-};
-
-}
-
-#endif
-
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/encodings/dictionary-encoding.h
----------------------------------------------------------------------
diff --git a/src/encodings/dictionary-encoding.h b/src/encodings/dictionary-encoding.h
deleted file mode 100644
index ba1e302..0000000
--- a/src/encodings/dictionary-encoding.h
+++ /dev/null
@@ -1,146 +0,0 @@
-// Copyright 2012 Cloudera Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef PARQUET_DICTIONARY_ENCODING_H
-#define PARQUET_DICTIONARY_ENCODING_H
-
-#include "encodings.h"
-
-namespace parquet_cpp {
-
-class DictionaryDecoder : public Decoder {
- public:
- // Initializes the dictionary with values from 'dictionary'. The data in dictionary
- // is not guaranteed to persist in memory after this call so the dictionary decoder
- // needs to copy the data out if necessary.
- DictionaryDecoder(const parquet::Type::type& type, Decoder* dictionary)
- : Decoder(type, parquet::Encoding::RLE_DICTIONARY) {
- int num_dictionary_values = dictionary->values_left();
- switch (type) {
- case parquet::Type::BOOLEAN:
- throw ParquetException("Boolean cols should not be dictionary encoded.");
-
- case parquet::Type::INT32:
- int32_dictionary_.resize(num_dictionary_values);
- dictionary->GetInt32(&int32_dictionary_[0], num_dictionary_values);
- break;
- case parquet::Type::INT64:
- int64_dictionary_.resize(num_dictionary_values);
- dictionary->GetInt64(&int64_dictionary_[0], num_dictionary_values);
- break;
- case parquet::Type::FLOAT:
- float_dictionary_.resize(num_dictionary_values);
- dictionary->GetFloat(&float_dictionary_[0], num_dictionary_values);
- break;
- case parquet::Type::DOUBLE:
- double_dictionary_.resize(num_dictionary_values);
- dictionary->GetDouble(&double_dictionary_[0], num_dictionary_values);
- break;
- case parquet::Type::BYTE_ARRAY: {
- byte_array_dictionary_.resize(num_dictionary_values);
- dictionary->GetByteArray(&byte_array_dictionary_[0], num_dictionary_values);
- int total_size = 0;
- for (int i = 0; i < num_dictionary_values; ++i) {
- total_size += byte_array_dictionary_[i].len;
- }
- byte_array_data_.resize(total_size);
- int offset = 0;
- for (int i = 0; i < num_dictionary_values; ++i) {
- memcpy(&byte_array_data_[offset],
- byte_array_dictionary_[i].ptr, byte_array_dictionary_[i].len);
- byte_array_dictionary_[i].ptr = &byte_array_data_[offset];
- offset += byte_array_dictionary_[i].len;
- }
- break;
- }
- default:
- ParquetException::NYI("Unsupported dictionary type");
- }
- }
-
- virtual void SetData(int num_values, const uint8_t* data, int len) {
- num_values_ = num_values;
- if (len == 0) return;
- uint8_t bit_width = *data;
- ++data;
- --len;
- idx_decoder_ = impala::RleDecoder(data, len, bit_width);
- }
-
- virtual int GetInt32(int32_t* buffer, int max_values) {
- max_values = std::min(max_values, num_values_);
- for (int i = 0; i < max_values; ++i) {
- buffer[i] = int32_dictionary_[index()];
- }
- return max_values;
- }
-
- virtual int GetInt64(int64_t* buffer, int max_values) {
- max_values = std::min(max_values, num_values_);
- for (int i = 0; i < max_values; ++i) {
- buffer[i] = int64_dictionary_[index()];
- }
- return max_values;
- }
-
- virtual int GetFloat(float* buffer, int max_values) {
- max_values = std::min(max_values, num_values_);
- for (int i = 0; i < max_values; ++i) {
- buffer[i] = float_dictionary_[index()];
- }
- return max_values;
- }
-
- virtual int GetDouble(double* buffer, int max_values) {
- max_values = std::min(max_values, num_values_);
- for (int i = 0; i < max_values; ++i) {
- buffer[i] = double_dictionary_[index()];
- }
- return max_values;
- }
-
- virtual int GetByteArray(ByteArray* buffer, int max_values) {
- max_values = std::min(max_values, num_values_);
- for (int i = 0; i < max_values; ++i) {
- buffer[i] = byte_array_dictionary_[index()];
- }
- return max_values;
- }
-
- private:
- int index() {
- int idx = 0;
- if (!idx_decoder_.Get(&idx)) ParquetException::EofException();
- --num_values_;
- return idx;
- }
-
- // Only one is set.
- std::vector<int32_t> int32_dictionary_;
- std::vector<int64_t> int64_dictionary_;
- std::vector<float> float_dictionary_;
- std::vector<double> double_dictionary_;
- std::vector<ByteArray> byte_array_dictionary_;
-
- // Data that contains the byte array data (byte_array_dictionary_ just has the
- // pointers).
- std::vector<uint8_t> byte_array_data_;
-
- impala::RleDecoder idx_decoder_;
-};
-
-}
-
-#endif
-
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/encodings/encodings.h
----------------------------------------------------------------------
diff --git a/src/encodings/encodings.h b/src/encodings/encodings.h
deleted file mode 100644
index e888c1f..0000000
--- a/src/encodings/encodings.h
+++ /dev/null
@@ -1,83 +0,0 @@
-// Copyright 2012 Cloudera Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef PARQUET_ENCODINGS_H
-#define PARQUET_ENCODINGS_H
-
-#include <boost/cstdint.hpp>
-#include "gen-cpp/parquet_constants.h"
-#include "gen-cpp/parquet_types.h"
-
-#include "impala/rle-encoding.h"
-#include "impala/bit-stream-utils.inline.h"
-
-namespace parquet_cpp {
-
-class Decoder {
- public:
- virtual ~Decoder() {}
-
- // Sets the data for a new page. This will be called multiple times on the same
- // decoder and should reset all internal state.
- virtual void SetData(int num_values, const uint8_t* data, int len) = 0;
-
- // Subclasses should override the ones they support. In each of these functions,
- // the decoder would decode put to 'max_values', storing the result in 'buffer'.
- // The function returns the number of values decoded, which should be max_values
- // except for end of the current data page.
- virtual int GetBool(bool* buffer, int max_values) {
- throw ParquetException("Decoder does not implement this type.");
- }
- virtual int GetInt32(int32_t* buffer, int max_values) {
- throw ParquetException("Decoder does not implement this type.");
- }
- virtual int GetInt64(int64_t* buffer, int max_values) {
- throw ParquetException("Decoder does not implement this type.");
- }
- virtual int GetFloat(float* buffer, int max_values) {
- throw ParquetException("Decoder does not implement this type.");
- }
- virtual int GetDouble(double* buffer, int max_values) {
- throw ParquetException("Decoder does not implement this type.");
- }
- virtual int GetByteArray(ByteArray* buffer, int max_values) {
- throw ParquetException("Decoder does not implement this type.");
- }
-
- // Returns the number of values left (for the last call to SetData()). This is
- // the number of values left in this page.
- int values_left() const { return num_values_; }
-
- const parquet::Encoding::type encoding() const { return encoding_; }
-
- protected:
- Decoder(const parquet::Type::type& type, const parquet::Encoding::type& encoding)
- : type_(type), encoding_(encoding), num_values_(0) {}
-
- const parquet::Type::type type_;
- const parquet::Encoding::type encoding_;
- int num_values_;
-};
-
-}
-
-#include "bool-encoding.h"
-#include "plain-encoding.h"
-#include "dictionary-encoding.h"
-#include "delta-bit-pack-encoding.h"
-#include "delta-length-byte-array-encoding.h"
-#include "delta-byte-array-encoding.h"
-
-#endif
-
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/encodings/plain-encoding.h
----------------------------------------------------------------------
diff --git a/src/encodings/plain-encoding.h b/src/encodings/plain-encoding.h
deleted file mode 100644
index 511dbdd..0000000
--- a/src/encodings/plain-encoding.h
+++ /dev/null
@@ -1,82 +0,0 @@
-// Copyright 2012 Cloudera Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef PARQUET_PLAIN_ENCODING_H
-#define PARQUET_PLAIN_ENCODING_H
-
-#include "encodings.h"
-
-namespace parquet_cpp {
-
-class PlainDecoder : public Decoder {
- public:
- PlainDecoder(const parquet::Type::type& type)
- : Decoder(type, parquet::Encoding::PLAIN), data_(NULL), len_(0) {
- }
-
- virtual void SetData(int num_values, const uint8_t* data, int len) {
- num_values_ = num_values;
- data_ = data;
- len_ = len;
- }
-
- int GetValues(void* buffer, int max_values, int byte_size) {
- max_values = std::min(max_values, num_values_);
- int size = max_values * byte_size;
- if (len_ < size) ParquetException::EofException();
- memcpy(buffer, data_, size);
- data_ += size;
- len_ -= size;
- num_values_ -= max_values;
- return max_values;
- }
-
- virtual int GetInt32(int32_t* buffer, int max_values) {
- return GetValues(buffer, max_values, sizeof(int32_t));
- }
-
- virtual int GetInt64(int64_t* buffer, int max_values) {
- return GetValues(buffer, max_values, sizeof(int64_t));
- }
-
- virtual int GetFloat(float* buffer, int max_values) {
- return GetValues(buffer, max_values, sizeof(float));
- }
-
- virtual int GetDouble(double* buffer, int max_values) {
- return GetValues(buffer, max_values, sizeof(double));
- }
-
- virtual int GetByteArray(ByteArray* buffer, int max_values) {
- max_values = std::min(max_values, num_values_);
- for (int i = 0; i < max_values; ++i) {
- buffer[i].len = *reinterpret_cast<const uint32_t*>(data_);
- if (len_ < sizeof(uint32_t) + buffer[i].len) ParquetException::EofException();
- buffer[i].ptr = data_ + sizeof(uint32_t);
- data_ += sizeof(uint32_t) + buffer[i].len;
- len_ -= sizeof(uint32_t) + buffer[i].len;
- }
- num_values_ -= max_values;
- return max_values;
- }
-
- private:
- const uint8_t* data_;
- int len_;
-};
-
-}
-
-#endif
-
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/impala/bit-stream-utils.h
----------------------------------------------------------------------
diff --git a/src/impala/bit-stream-utils.h b/src/impala/bit-stream-utils.h
deleted file mode 100644
index 5eba254..0000000
--- a/src/impala/bit-stream-utils.h
+++ /dev/null
@@ -1,145 +0,0 @@
-// Copyright 2012 Cloudera Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef IMPALA_UTIL_BIT_STREAM_UTILS_H
-#define IMPALA_UTIL_BIT_STREAM_UTILS_H
-
-#include <boost/cstdint.hpp>
-#include <string.h>
-#include "impala/compiler-util.h"
-#include "impala/bit-util.h"
-#include "impala/logging.h"
-
-namespace impala {
-
-// Utility class to write bit/byte streams. This class can write data to either be
-// bit packed or byte aligned (and a single stream that has a mix of both).
-// This class does not allocate memory.
-class BitWriter {
- public:
- // buffer: buffer to write bits to. Buffer should be preallocated with
- // 'buffer_len' bytes.
- BitWriter(uint8_t* buffer, int buffer_len) :
- buffer_(buffer),
- max_bytes_(buffer_len) {
- Clear();
- }
-
- void Clear() {
- buffered_values_ = 0;
- byte_offset_ = 0;
- bit_offset_ = 0;
- }
-
- // The number of current bytes written, including the current byte (i.e. may include a
- // fraction of a byte). Includes buffered values.
- int bytes_written() const { return byte_offset_ + BitUtil::Ceil(bit_offset_, 8); }
- uint8_t* buffer() const { return buffer_; }
- int buffer_len() const { return max_bytes_; }
-
- // Writes a value to buffered_values_, flushing to buffer_ if necessary. This is bit
- // packed. Returns false if there was not enough space. num_bits must be <= 32.
- bool PutValue(uint64_t v, int num_bits);
-
- // Writes v to the next aligned byte using num_bytes. If T is larger than num_bytes, the
- // extra high-order bytes will be ignored. Returns false if there was not enough space.
- template<typename T>
- bool PutAligned(T v, int num_bytes);
-
- // Write a Vlq encoded int to the buffer. Returns false if there was not enough
- // room. The value is written byte aligned.
- // For more details on vlq:
- // en.wikipedia.org/wiki/Variable-length_quantity
- bool PutVlqInt(uint32_t v);
- bool PutZigZagVlqInt(int32_t v);
-
- // Get a pointer to the next aligned byte and advance the underlying buffer
- // by num_bytes.
- // Returns NULL if there was not enough space.
- uint8_t* GetNextBytePtr(int num_bytes = 1);
-
- // Flushes all buffered values to the buffer. Call this when done writing to the buffer.
- // If 'align' is true, buffered_values_ is reset and any future writes will be written
- // to the next byte boundary.
- void Flush(bool align=false);
-
- private:
- uint8_t* buffer_;
- int max_bytes_;
-
- // Bit-packed values are initially written to this variable before being memcpy'd to
- // buffer_. This is faster than writing values byte by byte directly to buffer_.
- uint64_t buffered_values_;
-
- int byte_offset_; // Offset in buffer_
- int bit_offset_; // Offset in buffered_values_
-};
-
-// Utility class to read bit/byte stream. This class can read bits or bytes
-// that are either byte aligned or not. It also has utilities to read multiple
-// bytes in one read (e.g. encoded int).
-class BitReader {
- public:
- // 'buffer' is the buffer to read from. The buffer's length is 'buffer_len'.
- BitReader(const uint8_t* buffer, int buffer_len) :
- buffer_(buffer),
- max_bytes_(buffer_len),
- byte_offset_(0),
- bit_offset_(0) {
- int num_bytes = std::min(8, max_bytes_ - byte_offset_);
- memcpy(&buffered_values_, buffer_ + byte_offset_, num_bytes);
- }
-
- BitReader() : buffer_(NULL), max_bytes_(0) {}
-
- // Gets the next value from the buffer. Returns true if 'v' could be read or false if
- // there are not enough bytes left. num_bits must be <= 32.
- template<typename T>
- bool GetValue(int num_bits, T* v);
-
- // Reads a 'num_bytes'-sized value from the buffer and stores it in 'v'. T needs to be a
- // little-endian native type and big enough to store 'num_bytes'. The value is assumed
- // to be byte-aligned so the stream will be advanced to the start of the next byte
- // before 'v' is read. Returns false if there are not enough bytes left.
- template<typename T>
- bool GetAligned(int num_bytes, T* v);
-
- // Reads a vlq encoded int from the stream. The encoded int must start at the
- // beginning of a byte. Return false if there were not enough bytes in the buffer.
- bool GetVlqInt(uint64_t* v);
- bool GetZigZagVlqInt(int64_t* v);
-
- // Returns the number of bytes left in the stream, not including the current byte (i.e.,
- // there may be an additional fraction of a byte).
- int bytes_left() { return max_bytes_ - (byte_offset_ + BitUtil::Ceil(bit_offset_, 8)); }
-
- // Maximum byte length of a vlq encoded int
- static const int MAX_VLQ_BYTE_LEN = 5;
-
- private:
- const uint8_t* buffer_;
- int max_bytes_;
-
- // Bytes are memcpy'd from buffer_ and values are read from this variable. This is
- // faster than reading values byte by byte directly from buffer_.
- uint64_t buffered_values_;
-
- int byte_offset_; // Offset in buffer_
- int bit_offset_; // Offset in buffered_values_
-};
-
-}
-
-#endif
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/impala/bit-stream-utils.inline.h
----------------------------------------------------------------------
diff --git a/src/impala/bit-stream-utils.inline.h b/src/impala/bit-stream-utils.inline.h
deleted file mode 100644
index d84ef4d..0000000
--- a/src/impala/bit-stream-utils.inline.h
+++ /dev/null
@@ -1,164 +0,0 @@
-// Copyright 2012 Cloudera Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef IMPALA_UTIL_BIT_STREAM_UTILS_INLINE_H
-#define IMPALA_UTIL_BIT_STREAM_UTILS_INLINE_H
-
-#include "impala/bit-stream-utils.h"
-
-namespace impala {
-
-inline bool BitWriter::PutValue(uint64_t v, int num_bits) {
- // TODO: revisit this limit if necessary (can be raised to 64 by fixing some edge cases)
- DCHECK_LE(num_bits, 32);
- DCHECK_EQ(v >> num_bits, 0) << "v = " << v << ", num_bits = " << num_bits;
-
- if (UNLIKELY(byte_offset_ * 8 + bit_offset_ + num_bits > max_bytes_ * 8)) return false;
-
- buffered_values_ |= v << bit_offset_;
- bit_offset_ += num_bits;
-
- if (UNLIKELY(bit_offset_ >= 64)) {
- // Flush buffered_values_ and write out bits of v that did not fit
- memcpy(buffer_ + byte_offset_, &buffered_values_, 8);
- buffered_values_ = 0;
- byte_offset_ += 8;
- bit_offset_ -= 64;
- buffered_values_ = v >> (num_bits - bit_offset_);
- }
- DCHECK_LT(bit_offset_, 64);
- return true;
-}
-
-inline void BitWriter::Flush(bool align) {
- int num_bytes = BitUtil::Ceil(bit_offset_, 8);
- DCHECK_LE(byte_offset_ + num_bytes, max_bytes_);
- memcpy(buffer_ + byte_offset_, &buffered_values_, num_bytes);
-
- if (align) {
- buffered_values_ = 0;
- byte_offset_ += num_bytes;
- bit_offset_ = 0;
- }
-}
-
-inline uint8_t* BitWriter::GetNextBytePtr(int num_bytes) {
- Flush(/* align */ true);
- DCHECK_LE(byte_offset_, max_bytes_);
- if (byte_offset_ + num_bytes > max_bytes_) return NULL;
- uint8_t* ptr = buffer_ + byte_offset_;
- byte_offset_ += num_bytes;
- return ptr;
-}
-
-template<typename T>
-inline bool BitWriter::PutAligned(T val, int num_bytes) {
- uint8_t* ptr = GetNextBytePtr(num_bytes);
- if (ptr == NULL) return false;
- memcpy(ptr, &val, num_bytes);
- return true;
-}
-
-inline bool BitWriter::PutVlqInt(uint32_t v) {
- bool result = true;
- while ((v & 0xFFFFFF80) != 0L) {
- result &= PutAligned<uint8_t>((v & 0x7F) | 0x80, 1);
- v >>= 7;
- }
- result &= PutAligned<uint8_t>(v & 0x7F, 1);
- return result;
-}
-
-inline bool BitWriter::PutZigZagVlqInt(int32_t v) {
- uint32_t u = (v << 1) ^ (v >> 31);
- return PutVlqInt(u);
-}
-
-template<typename T>
-inline bool BitReader::GetValue(int num_bits, T* v) {
- // TODO: revisit this limit if necessary
- DCHECK_LE(num_bits, 32);
- DCHECK_LE(num_bits, sizeof(T) * 8);
-
- if (UNLIKELY(byte_offset_ * 8 + bit_offset_ + num_bits > max_bytes_ * 8)) return false;
-
- *v = BitUtil::TrailingBits(buffered_values_, bit_offset_ + num_bits) >> bit_offset_;
-
- bit_offset_ += num_bits;
- if (bit_offset_ >= 64) {
- byte_offset_ += 8;
- bit_offset_ -= 64;
-
- int bytes_remaining = max_bytes_ - byte_offset_;
- if (LIKELY(bytes_remaining >= 8)) {
- memcpy(&buffered_values_, buffer_ + byte_offset_, 8);
- } else {
- memcpy(&buffered_values_, buffer_ + byte_offset_, bytes_remaining);
- }
-
- // Read bits of v that crossed into new buffered_values_
- *v |= BitUtil::TrailingBits(buffered_values_, bit_offset_)
- << (num_bits - bit_offset_);
- }
- DCHECK_LE(bit_offset_, 64);
- return true;
-}
-
-template<typename T>
-inline bool BitReader::GetAligned(int num_bytes, T* v) {
- DCHECK_LE(num_bytes, sizeof(T));
- int bytes_read = BitUtil::Ceil(bit_offset_, 8);
- if (UNLIKELY(byte_offset_ + bytes_read + num_bytes > max_bytes_)) return false;
-
- // Advance byte_offset to next unread byte and read num_bytes
- byte_offset_ += bytes_read;
- memcpy(v, buffer_ + byte_offset_, num_bytes);
- byte_offset_ += num_bytes;
-
- // Reset buffered_values_
- bit_offset_ = 0;
- int bytes_remaining = max_bytes_ - byte_offset_;
- if (LIKELY(bytes_remaining >= 8)) {
- memcpy(&buffered_values_, buffer_ + byte_offset_, 8);
- } else {
- memcpy(&buffered_values_, buffer_ + byte_offset_, bytes_remaining);
- }
- return true;
-}
-
-inline bool BitReader::GetVlqInt(uint64_t* v) {
- *v = 0;
- int shift = 0;
- int num_bytes = 0;
- uint8_t byte = 0;
- do {
- if (!GetAligned<uint8_t>(1, &byte)) return false;
- *v |= (byte & 0x7F) << shift;
- shift += 7;
- DCHECK_LE(++num_bytes, MAX_VLQ_BYTE_LEN);
- } while ((byte & 0x80) != 0);
- return true;
-}
-
-inline bool BitReader::GetZigZagVlqInt(int64_t* v) {
- uint64_t u;
- if (!GetVlqInt(&u)) return false;
- *reinterpret_cast<uint64_t*>(v) = (u >> 1) ^ -(u & 1);
- return true;
-}
-
-}
-
-#endif
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/impala/bit-util.h
----------------------------------------------------------------------
diff --git a/src/impala/bit-util.h b/src/impala/bit-util.h
deleted file mode 100644
index c2b6055..0000000
--- a/src/impala/bit-util.h
+++ /dev/null
@@ -1,174 +0,0 @@
-// Copyright 2012 Cloudera Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef IMPALA_BIT_UTIL_H
-#define IMPALA_BIT_UTIL_H
-
-#if defined(__APPLE__)
- #include <machine/endian.h>
-#else
- #include <endian.h>
-#endif
-
-#include "impala/compiler-util.h"
-#include "impala/logging.h"
-
-namespace impala {
-
-// Utility class to do standard bit tricks
-// TODO: is this in boost or something else like that?
-class BitUtil {
- public:
- // Returns the ceil of value/divisor
- static inline int Ceil(int value, int divisor) {
- return value / divisor + (value % divisor != 0);
- }
-
- // Returns 'value' rounded up to the nearest multiple of 'factor'
- static inline int RoundUp(int value, int factor) {
- return (value + (factor - 1)) / factor * factor;
- }
-
- // Returns 'value' rounded down to the nearest multiple of 'factor'
- static inline int RoundDown(int value, int factor) {
- return (value / factor) * factor;
- }
-
- // Returns the number of set bits in x
- static inline int Popcount(uint64_t x) {
- int count = 0;
- for (; x != 0; ++count) x &= x-1;
- return count;
- }
-
- // Returns the 'num_bits' least-significant bits of 'v'.
- static inline uint64_t TrailingBits(uint64_t v, int num_bits) {
- if (UNLIKELY(num_bits == 0)) return 0;
- if (UNLIKELY(num_bits >= 64)) return v;
- int n = 64 - num_bits;
- return (v << n) >> n;
- }
-
- // Returns ceil(log2(x)).
- // TODO: this could be faster if we use __builtin_clz. Fix this if this ever shows up
- // in a hot path.
- static inline int Log2(uint64_t x) {
- if (x == 0) return 0;
- // Compute result = ceil(log2(x))
- // = floor(log2(x - 1)) + 1, for x > 1
- // by finding the position of the most significant bit (1-indexed) of x - 1
- // (floor(log2(n)) = MSB(n) (0-indexed))
- --x;
- int result = 1;
- while (x >>= 1) ++result;
- return result;
- }
-
- // Returns the minimum number of bits needed to represent the value of 'x'
- static inline int NumRequiredBits(uint64_t x) {
- for (int i = 63; i >= 0; --i) {
- if (x & 1L << i) return i + 1;
- }
- return 0;
- }
-
- // Swaps the byte order (i.e. endianess)
- static inline int64_t ByteSwap(int64_t value) {
- return __builtin_bswap64(value);
- }
- static inline uint64_t ByteSwap(uint64_t value) {
- return static_cast<uint64_t>(__builtin_bswap64(value));
- }
- static inline int32_t ByteSwap(int32_t value) {
- return __builtin_bswap32(value);
- }
- static inline uint32_t ByteSwap(uint32_t value) {
- return static_cast<uint32_t>(__builtin_bswap32(value));
- }
- static inline int16_t ByteSwap(int16_t value) {
- return (((value >> 8) & 0xff) | ((value & 0xff) << 8));
- }
- static inline uint16_t ByteSwap(uint16_t value) {
- return static_cast<uint16_t>(ByteSwap(static_cast<int16_t>(value)));
- }
-
- // Write the swapped bytes into dst. Src and st cannot overlap.
- static inline void ByteSwap(void* dst, const void* src, int len) {
- switch (len) {
- case 1:
- *reinterpret_cast<int8_t*>(dst) = *reinterpret_cast<const int8_t*>(src);
- return;
- case 2:
- *reinterpret_cast<int16_t*>(dst) =
- ByteSwap(*reinterpret_cast<const int16_t*>(src));
- return;
- case 4:
- *reinterpret_cast<int32_t*>(dst) =
- ByteSwap(*reinterpret_cast<const int32_t*>(src));
- return;
- case 8:
- *reinterpret_cast<int64_t*>(dst) =
- ByteSwap(*reinterpret_cast<const int64_t*>(src));
- return;
- default: break;
- }
-
- uint8_t* d = reinterpret_cast<uint8_t*>(dst);
- const uint8_t* s = reinterpret_cast<const uint8_t*>(src);
- for (int i = 0; i < len; ++i) {
- d[i] = s[len - i - 1];
- }
- }
-
- // Converts to big endian format (if not already in big endian) from the
- // machine's native endian format.
-#if __BYTE_ORDER == __LITTLE_ENDIAN
- static inline int64_t ToBigEndian(int64_t value) { return ByteSwap(value); }
- static inline uint64_t ToBigEndian(uint64_t value) { return ByteSwap(value); }
- static inline int32_t ToBigEndian(int32_t value) { return ByteSwap(value); }
- static inline uint32_t ToBigEndian(uint32_t value) { return ByteSwap(value); }
- static inline int16_t ToBigEndian(int16_t value) { return ByteSwap(value); }
- static inline uint16_t ToBigEndian(uint16_t value) { return ByteSwap(value); }
-#else
- static inline int64_t ToBigEndian(int64_t val) { return val; }
- static inline uint64_t ToBigEndian(uint64_t val) { return val; }
- static inline int32_t ToBigEndian(int32_t val) { return val; }
- static inline uint32_t ToBigEndian(uint32_t val) { return val; }
- static inline int16_t ToBigEndian(int16_t val) { return val; }
- static inline uint16_t ToBigEndian(uint16_t val) { return val; }
-#endif
-
- // Converts from big endian format to the machine's native endian format.
-#if __BYTE_ORDER == __LITTLE_ENDIAN
- static inline int64_t FromBigEndian(int64_t value) { return ByteSwap(value); }
- static inline uint64_t FromBigEndian(uint64_t value) { return ByteSwap(value); }
- static inline int32_t FromBigEndian(int32_t value) { return ByteSwap(value); }
- static inline uint32_t FromBigEndian(uint32_t value) { return ByteSwap(value); }
- static inline int16_t FromBigEndian(int16_t value) { return ByteSwap(value); }
- static inline uint16_t FromBigEndian(uint16_t value) { return ByteSwap(value); }
-#else
- static inline int64_t FromBigEndian(int64_t val) { return val; }
- static inline uint64_t FromBigEndian(uint64_t val) { return val; }
- static inline int32_t FromBigEndian(int32_t val) { return val; }
- static inline uint32_t FromBigEndian(uint32_t val) { return val; }
- static inline int16_t FromBigEndian(int16_t val) { return val; }
- static inline uint16_t FromBigEndian(uint16_t val) { return val; }
-#endif
-
-};
-
-}
-
-#endif
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/impala/compiler-util.h
----------------------------------------------------------------------
diff --git a/src/impala/compiler-util.h b/src/impala/compiler-util.h
deleted file mode 100644
index 6b25cdf..0000000
--- a/src/impala/compiler-util.h
+++ /dev/null
@@ -1,38 +0,0 @@
-// Copyright 2012 Cloudera Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef IMPALA_COMMON_COMPILER_UTIL_H
-#define IMPALA_COMMON_COMPILER_UTIL_H
-
-// Compiler hint that this branch is likely or unlikely to
-// be taken. Take from the "What all programmers should know
-// about memory" paper.
-// example: if (LIKELY(size > 0)) { ... }
-// example: if (UNLIKELY(!status.ok())) { ... }
-#ifdef LIKELY
-#undef LIKELY
-#endif
-
-#ifdef UNLIKELY
-#undef UNLIKELY
-#endif
-
-#define LIKELY(expr) __builtin_expect(!!(expr), 1)
-#define UNLIKELY(expr) __builtin_expect(!!(expr), 0)
-
-#define PREFETCH(addr) __builtin_prefetch(addr)
-
-#endif
-
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/impala/logging.h
----------------------------------------------------------------------
diff --git a/src/impala/logging.h b/src/impala/logging.h
deleted file mode 100644
index fc130f3..0000000
--- a/src/impala/logging.h
+++ /dev/null
@@ -1,31 +0,0 @@
-// Copyright 2012 Cloudera Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef IMPALA_COMMON_LOGGING_H
-#define IMPALA_COMMON_LOGGING_H
-
-#include <iostream>
-
-#define DCHECK(condition) while(false) std::cout
-#define DCHECK_EQ(a, b) while(false) std::cout
-#define DCHECK_NE(a, b) while(false) std::cout
-#define DCHECK_GT(a, b) while(false) std::cout
-#define DCHECK_LT(a, b) while(false) std::cout
-#define DCHECK_GE(a, b) while(false) std::cout
-#define DCHECK_LE(a, b) while(false) std::cout
-// Similar to how glog defines DCHECK for release.
-#define LOG(level) while(false) std::cout
-
-#endif
[2/7] parquet-cpp git commit: PARQUET-416: C++11 compilation,
code reorg, libparquet and installation targets
Posted by no...@apache.org.
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/parquet/thrift/parquet_types.cpp
----------------------------------------------------------------------
diff --git a/src/parquet/thrift/parquet_types.cpp b/src/parquet/thrift/parquet_types.cpp
new file mode 100644
index 0000000..06d388c
--- /dev/null
+++ b/src/parquet/thrift/parquet_types.cpp
@@ -0,0 +1,2006 @@
+/**
+ * Autogenerated by Thrift Compiler (0.9.0)
+ *
+ * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+ * @generated
+ */
+#include "parquet_types.h"
+
+#include <algorithm>
+
+namespace parquet {
+
+int _kTypeValues[] = {
+ Type::BOOLEAN,
+ Type::INT32,
+ Type::INT64,
+ Type::INT96,
+ Type::FLOAT,
+ Type::DOUBLE,
+ Type::BYTE_ARRAY,
+ Type::FIXED_LEN_BYTE_ARRAY
+};
+const char* _kTypeNames[] = {
+ "BOOLEAN",
+ "INT32",
+ "INT64",
+ "INT96",
+ "FLOAT",
+ "DOUBLE",
+ "BYTE_ARRAY",
+ "FIXED_LEN_BYTE_ARRAY"
+};
+const std::map<int, const char*> _Type_VALUES_TO_NAMES(::apache::thrift::TEnumIterator(8, _kTypeValues, _kTypeNames), ::apache::thrift::TEnumIterator(-1, NULL, NULL));
+
+int _kConvertedTypeValues[] = {
+ ConvertedType::UTF8,
+ ConvertedType::MAP,
+ ConvertedType::MAP_KEY_VALUE,
+ ConvertedType::LIST,
+ ConvertedType::ENUM,
+ ConvertedType::DECIMAL
+};
+const char* _kConvertedTypeNames[] = {
+ "UTF8",
+ "MAP",
+ "MAP_KEY_VALUE",
+ "LIST",
+ "ENUM",
+ "DECIMAL"
+};
+const std::map<int, const char*> _ConvertedType_VALUES_TO_NAMES(::apache::thrift::TEnumIterator(6, _kConvertedTypeValues, _kConvertedTypeNames), ::apache::thrift::TEnumIterator(-1, NULL, NULL));
+
+int _kFieldRepetitionTypeValues[] = {
+ FieldRepetitionType::REQUIRED,
+ FieldRepetitionType::OPTIONAL,
+ FieldRepetitionType::REPEATED
+};
+const char* _kFieldRepetitionTypeNames[] = {
+ "REQUIRED",
+ "OPTIONAL",
+ "REPEATED"
+};
+const std::map<int, const char*> _FieldRepetitionType_VALUES_TO_NAMES(::apache::thrift::TEnumIterator(3, _kFieldRepetitionTypeValues, _kFieldRepetitionTypeNames), ::apache::thrift::TEnumIterator(-1, NULL, NULL));
+
+int _kEncodingValues[] = {
+ Encoding::PLAIN,
+ Encoding::PLAIN_DICTIONARY,
+ Encoding::RLE,
+ Encoding::BIT_PACKED,
+ Encoding::DELTA_BINARY_PACKED,
+ Encoding::DELTA_LENGTH_BYTE_ARRAY,
+ Encoding::DELTA_BYTE_ARRAY,
+ Encoding::RLE_DICTIONARY
+};
+const char* _kEncodingNames[] = {
+ "PLAIN",
+ "PLAIN_DICTIONARY",
+ "RLE",
+ "BIT_PACKED",
+ "DELTA_BINARY_PACKED",
+ "DELTA_LENGTH_BYTE_ARRAY",
+ "DELTA_BYTE_ARRAY",
+ "RLE_DICTIONARY"
+};
+const std::map<int, const char*> _Encoding_VALUES_TO_NAMES(::apache::thrift::TEnumIterator(8, _kEncodingValues, _kEncodingNames), ::apache::thrift::TEnumIterator(-1, NULL, NULL));
+
+int _kCompressionCodecValues[] = {
+ CompressionCodec::UNCOMPRESSED,
+ CompressionCodec::SNAPPY,
+ CompressionCodec::GZIP,
+ CompressionCodec::LZO
+};
+const char* _kCompressionCodecNames[] = {
+ "UNCOMPRESSED",
+ "SNAPPY",
+ "GZIP",
+ "LZO"
+};
+const std::map<int, const char*> _CompressionCodec_VALUES_TO_NAMES(::apache::thrift::TEnumIterator(4, _kCompressionCodecValues, _kCompressionCodecNames), ::apache::thrift::TEnumIterator(-1, NULL, NULL));
+
+int _kPageTypeValues[] = {
+ PageType::DATA_PAGE,
+ PageType::INDEX_PAGE,
+ PageType::DICTIONARY_PAGE,
+ PageType::DATA_PAGE_V2
+};
+const char* _kPageTypeNames[] = {
+ "DATA_PAGE",
+ "INDEX_PAGE",
+ "DICTIONARY_PAGE",
+ "DATA_PAGE_V2"
+};
+const std::map<int, const char*> _PageType_VALUES_TO_NAMES(::apache::thrift::TEnumIterator(4, _kPageTypeValues, _kPageTypeNames), ::apache::thrift::TEnumIterator(-1, NULL, NULL));
+
+const char* Statistics::ascii_fingerprint = "CE004821871820DD79A8FD98BB101F6D";
+const uint8_t Statistics::binary_fingerprint[16] = {0xCE,0x00,0x48,0x21,0x87,0x18,0x20,0xDD,0x79,0xA8,0xFD,0x98,0xBB,0x10,0x1F,0x6D};
+
+uint32_t Statistics::read(::apache::thrift::protocol::TProtocol* iprot) {
+
+ uint32_t xfer = 0;
+ std::string fname;
+ ::apache::thrift::protocol::TType ftype;
+ int16_t fid;
+
+ xfer += iprot->readStructBegin(fname);
+
+ using ::apache::thrift::protocol::TProtocolException;
+
+
+ while (true)
+ {
+ xfer += iprot->readFieldBegin(fname, ftype, fid);
+ if (ftype == ::apache::thrift::protocol::T_STOP) {
+ break;
+ }
+ switch (fid)
+ {
+ case 1:
+ if (ftype == ::apache::thrift::protocol::T_STRING) {
+ xfer += iprot->readBinary(this->max);
+ this->__isset.max = true;
+ } else {
+ xfer += iprot->skip(ftype);
+ }
+ break;
+ case 2:
+ if (ftype == ::apache::thrift::protocol::T_STRING) {
+ xfer += iprot->readBinary(this->min);
+ this->__isset.min = true;
+ } else {
+ xfer += iprot->skip(ftype);
+ }
+ break;
+ case 3:
+ if (ftype == ::apache::thrift::protocol::T_I64) {
+ xfer += iprot->readI64(this->null_count);
+ this->__isset.null_count = true;
+ } else {
+ xfer += iprot->skip(ftype);
+ }
+ break;
+ case 4:
+ if (ftype == ::apache::thrift::protocol::T_I64) {
+ xfer += iprot->readI64(this->distinct_count);
+ this->__isset.distinct_count = true;
+ } else {
+ xfer += iprot->skip(ftype);
+ }
+ break;
+ default:
+ xfer += iprot->skip(ftype);
+ break;
+ }
+ xfer += iprot->readFieldEnd();
+ }
+
+ xfer += iprot->readStructEnd();
+
+ return xfer;
+}
+
+uint32_t Statistics::write(::apache::thrift::protocol::TProtocol* oprot) const {
+ uint32_t xfer = 0;
+ uint32_t fcnt = 0;
+ xfer += oprot->writeStructBegin("Statistics");
+
+ if (this->__isset.max) {
+ ++fcnt;
+ xfer += oprot->writeFieldBegin("max", ::apache::thrift::protocol::T_STRING, 1);
+ xfer += oprot->writeBinary(this->max);
+ xfer += oprot->writeFieldEnd();
+ }
+ if (this->__isset.min) {
+ ++fcnt;
+ xfer += oprot->writeFieldBegin("min", ::apache::thrift::protocol::T_STRING, 2);
+ xfer += oprot->writeBinary(this->min);
+ xfer += oprot->writeFieldEnd();
+ }
+ if (this->__isset.null_count) {
+ ++fcnt;
+ xfer += oprot->writeFieldBegin("null_count", ::apache::thrift::protocol::T_I64, 3);
+ xfer += oprot->writeI64(this->null_count);
+ xfer += oprot->writeFieldEnd();
+ }
+ if (this->__isset.distinct_count) {
+ ++fcnt;
+ xfer += oprot->writeFieldBegin("distinct_count", ::apache::thrift::protocol::T_I64, 4);
+ xfer += oprot->writeI64(this->distinct_count);
+ xfer += oprot->writeFieldEnd();
+ }
+ xfer += oprot->writeFieldStop();
+ xfer += oprot->writeStructEnd();
+ return xfer;
+}
+
+void swap(Statistics &a, Statistics &b) {
+ using ::std::swap;
+ swap(a.max, b.max);
+ swap(a.min, b.min);
+ swap(a.null_count, b.null_count);
+ swap(a.distinct_count, b.distinct_count);
+ swap(a.__isset, b.__isset);
+}
+
+const char* SchemaElement::ascii_fingerprint = "388A784401753800444CFEAC8BC1B1A1";
+const uint8_t SchemaElement::binary_fingerprint[16] = {0x38,0x8A,0x78,0x44,0x01,0x75,0x38,0x00,0x44,0x4C,0xFE,0xAC,0x8B,0xC1,0xB1,0xA1};
+
+uint32_t SchemaElement::read(::apache::thrift::protocol::TProtocol* iprot) {
+
+ uint32_t xfer = 0;
+ std::string fname;
+ ::apache::thrift::protocol::TType ftype;
+ int16_t fid;
+
+ xfer += iprot->readStructBegin(fname);
+
+ using ::apache::thrift::protocol::TProtocolException;
+
+ bool isset_name = false;
+
+ while (true)
+ {
+ xfer += iprot->readFieldBegin(fname, ftype, fid);
+ if (ftype == ::apache::thrift::protocol::T_STOP) {
+ break;
+ }
+ switch (fid)
+ {
+ case 1:
+ if (ftype == ::apache::thrift::protocol::T_I32) {
+ int32_t ecast0;
+ xfer += iprot->readI32(ecast0);
+ this->type = (Type::type)ecast0;
+ this->__isset.type = true;
+ } else {
+ xfer += iprot->skip(ftype);
+ }
+ break;
+ case 2:
+ if (ftype == ::apache::thrift::protocol::T_I32) {
+ xfer += iprot->readI32(this->type_length);
+ this->__isset.type_length = true;
+ } else {
+ xfer += iprot->skip(ftype);
+ }
+ break;
+ case 3:
+ if (ftype == ::apache::thrift::protocol::T_I32) {
+ int32_t ecast1;
+ xfer += iprot->readI32(ecast1);
+ this->repetition_type = (FieldRepetitionType::type)ecast1;
+ this->__isset.repetition_type = true;
+ } else {
+ xfer += iprot->skip(ftype);
+ }
+ break;
+ case 4:
+ if (ftype == ::apache::thrift::protocol::T_STRING) {
+ xfer += iprot->readString(this->name);
+ isset_name = true;
+ } else {
+ xfer += iprot->skip(ftype);
+ }
+ break;
+ case 5:
+ if (ftype == ::apache::thrift::protocol::T_I32) {
+ xfer += iprot->readI32(this->num_children);
+ this->__isset.num_children = true;
+ } else {
+ xfer += iprot->skip(ftype);
+ }
+ break;
+ case 6:
+ if (ftype == ::apache::thrift::protocol::T_I32) {
+ int32_t ecast2;
+ xfer += iprot->readI32(ecast2);
+ this->converted_type = (ConvertedType::type)ecast2;
+ this->__isset.converted_type = true;
+ } else {
+ xfer += iprot->skip(ftype);
+ }
+ break;
+ case 7:
+ if (ftype == ::apache::thrift::protocol::T_I32) {
+ xfer += iprot->readI32(this->scale);
+ this->__isset.scale = true;
+ } else {
+ xfer += iprot->skip(ftype);
+ }
+ break;
+ case 8:
+ if (ftype == ::apache::thrift::protocol::T_I32) {
+ xfer += iprot->readI32(this->precision);
+ this->__isset.precision = true;
+ } else {
+ xfer += iprot->skip(ftype);
+ }
+ break;
+ default:
+ xfer += iprot->skip(ftype);
+ break;
+ }
+ xfer += iprot->readFieldEnd();
+ }
+
+ xfer += iprot->readStructEnd();
+
+ if (!isset_name)
+ throw TProtocolException(TProtocolException::INVALID_DATA);
+ return xfer;
+}
+
+uint32_t SchemaElement::write(::apache::thrift::protocol::TProtocol* oprot) const {
+ uint32_t xfer = 0;
+ uint32_t fcnt = 0;
+ xfer += oprot->writeStructBegin("SchemaElement");
+
+ if (this->__isset.type) {
+ ++fcnt;
+ xfer += oprot->writeFieldBegin("type", ::apache::thrift::protocol::T_I32, 1);
+ xfer += oprot->writeI32((int32_t)this->type);
+ xfer += oprot->writeFieldEnd();
+ }
+ if (this->__isset.type_length) {
+ ++fcnt;
+ xfer += oprot->writeFieldBegin("type_length", ::apache::thrift::protocol::T_I32, 2);
+ xfer += oprot->writeI32(this->type_length);
+ xfer += oprot->writeFieldEnd();
+ }
+ if (this->__isset.repetition_type) {
+ ++fcnt;
+ xfer += oprot->writeFieldBegin("repetition_type", ::apache::thrift::protocol::T_I32, 3);
+ xfer += oprot->writeI32((int32_t)this->repetition_type);
+ xfer += oprot->writeFieldEnd();
+ }
+ ++fcnt;
+ xfer += oprot->writeFieldBegin("name", ::apache::thrift::protocol::T_STRING, 4);
+ xfer += oprot->writeString(this->name);
+ xfer += oprot->writeFieldEnd();
+
+ if (this->__isset.num_children) {
+ ++fcnt;
+ xfer += oprot->writeFieldBegin("num_children", ::apache::thrift::protocol::T_I32, 5);
+ xfer += oprot->writeI32(this->num_children);
+ xfer += oprot->writeFieldEnd();
+ }
+ if (this->__isset.converted_type) {
+ ++fcnt;
+ xfer += oprot->writeFieldBegin("converted_type", ::apache::thrift::protocol::T_I32, 6);
+ xfer += oprot->writeI32((int32_t)this->converted_type);
+ xfer += oprot->writeFieldEnd();
+ }
+ if (this->__isset.scale) {
+ ++fcnt;
+ xfer += oprot->writeFieldBegin("scale", ::apache::thrift::protocol::T_I32, 7);
+ xfer += oprot->writeI32(this->scale);
+ xfer += oprot->writeFieldEnd();
+ }
+ if (this->__isset.precision) {
+ ++fcnt;
+ xfer += oprot->writeFieldBegin("precision", ::apache::thrift::protocol::T_I32, 8);
+ xfer += oprot->writeI32(this->precision);
+ xfer += oprot->writeFieldEnd();
+ }
+ xfer += oprot->writeFieldStop();
+ xfer += oprot->writeStructEnd();
+ return xfer;
+}
+
+void swap(SchemaElement &a, SchemaElement &b) {
+ using ::std::swap;
+ swap(a.type, b.type);
+ swap(a.type_length, b.type_length);
+ swap(a.repetition_type, b.repetition_type);
+ swap(a.name, b.name);
+ swap(a.num_children, b.num_children);
+ swap(a.converted_type, b.converted_type);
+ swap(a.scale, b.scale);
+ swap(a.precision, b.precision);
+ swap(a.__isset, b.__isset);
+}
+
+const char* DataPageHeader::ascii_fingerprint = "5FC1792B0483E9C984475384165040B1";
+const uint8_t DataPageHeader::binary_fingerprint[16] = {0x5F,0xC1,0x79,0x2B,0x04,0x83,0xE9,0xC9,0x84,0x47,0x53,0x84,0x16,0x50,0x40,0xB1};
+
+uint32_t DataPageHeader::read(::apache::thrift::protocol::TProtocol* iprot) {
+
+ uint32_t xfer = 0;
+ std::string fname;
+ ::apache::thrift::protocol::TType ftype;
+ int16_t fid;
+
+ xfer += iprot->readStructBegin(fname);
+
+ using ::apache::thrift::protocol::TProtocolException;
+
+ bool isset_num_values = false;
+ bool isset_encoding = false;
+ bool isset_definition_level_encoding = false;
+ bool isset_repetition_level_encoding = false;
+
+ while (true)
+ {
+ xfer += iprot->readFieldBegin(fname, ftype, fid);
+ if (ftype == ::apache::thrift::protocol::T_STOP) {
+ break;
+ }
+ switch (fid)
+ {
+ case 1:
+ if (ftype == ::apache::thrift::protocol::T_I32) {
+ xfer += iprot->readI32(this->num_values);
+ isset_num_values = true;
+ } else {
+ xfer += iprot->skip(ftype);
+ }
+ break;
+ case 2:
+ if (ftype == ::apache::thrift::protocol::T_I32) {
+ int32_t ecast3;
+ xfer += iprot->readI32(ecast3);
+ this->encoding = (Encoding::type)ecast3;
+ isset_encoding = true;
+ } else {
+ xfer += iprot->skip(ftype);
+ }
+ break;
+ case 3:
+ if (ftype == ::apache::thrift::protocol::T_I32) {
+ int32_t ecast4;
+ xfer += iprot->readI32(ecast4);
+ this->definition_level_encoding = (Encoding::type)ecast4;
+ isset_definition_level_encoding = true;
+ } else {
+ xfer += iprot->skip(ftype);
+ }
+ break;
+ case 4:
+ if (ftype == ::apache::thrift::protocol::T_I32) {
+ int32_t ecast5;
+ xfer += iprot->readI32(ecast5);
+ this->repetition_level_encoding = (Encoding::type)ecast5;
+ isset_repetition_level_encoding = true;
+ } else {
+ xfer += iprot->skip(ftype);
+ }
+ break;
+ case 5:
+ if (ftype == ::apache::thrift::protocol::T_STRUCT) {
+ xfer += this->statistics.read(iprot);
+ this->__isset.statistics = true;
+ } else {
+ xfer += iprot->skip(ftype);
+ }
+ break;
+ default:
+ xfer += iprot->skip(ftype);
+ break;
+ }
+ xfer += iprot->readFieldEnd();
+ }
+
+ xfer += iprot->readStructEnd();
+
+ if (!isset_num_values)
+ throw TProtocolException(TProtocolException::INVALID_DATA);
+ if (!isset_encoding)
+ throw TProtocolException(TProtocolException::INVALID_DATA);
+ if (!isset_definition_level_encoding)
+ throw TProtocolException(TProtocolException::INVALID_DATA);
+ if (!isset_repetition_level_encoding)
+ throw TProtocolException(TProtocolException::INVALID_DATA);
+ return xfer;
+}
+
+uint32_t DataPageHeader::write(::apache::thrift::protocol::TProtocol* oprot) const {
+ uint32_t xfer = 0;
+ uint32_t fcnt = 0;
+ xfer += oprot->writeStructBegin("DataPageHeader");
+
+ ++fcnt;
+ xfer += oprot->writeFieldBegin("num_values", ::apache::thrift::protocol::T_I32, 1);
+ xfer += oprot->writeI32(this->num_values);
+ xfer += oprot->writeFieldEnd();
+
+ ++fcnt;
+ xfer += oprot->writeFieldBegin("encoding", ::apache::thrift::protocol::T_I32, 2);
+ xfer += oprot->writeI32((int32_t)this->encoding);
+ xfer += oprot->writeFieldEnd();
+
+ ++fcnt;
+ xfer += oprot->writeFieldBegin("definition_level_encoding", ::apache::thrift::protocol::T_I32, 3);
+ xfer += oprot->writeI32((int32_t)this->definition_level_encoding);
+ xfer += oprot->writeFieldEnd();
+
+ ++fcnt;
+ xfer += oprot->writeFieldBegin("repetition_level_encoding", ::apache::thrift::protocol::T_I32, 4);
+ xfer += oprot->writeI32((int32_t)this->repetition_level_encoding);
+ xfer += oprot->writeFieldEnd();
+
+ if (this->__isset.statistics) {
+ ++fcnt;
+ xfer += oprot->writeFieldBegin("statistics", ::apache::thrift::protocol::T_STRUCT, 5);
+ xfer += this->statistics.write(oprot);
+ xfer += oprot->writeFieldEnd();
+ }
+ xfer += oprot->writeFieldStop();
+ xfer += oprot->writeStructEnd();
+ return xfer;
+}
+
+void swap(DataPageHeader &a, DataPageHeader &b) {
+ using ::std::swap;
+ swap(a.num_values, b.num_values);
+ swap(a.encoding, b.encoding);
+ swap(a.definition_level_encoding, b.definition_level_encoding);
+ swap(a.repetition_level_encoding, b.repetition_level_encoding);
+ swap(a.statistics, b.statistics);
+ swap(a.__isset, b.__isset);
+}
+
+const char* IndexPageHeader::ascii_fingerprint = "99914B932BD37A50B983C5E7C90AE93B";
+const uint8_t IndexPageHeader::binary_fingerprint[16] = {0x99,0x91,0x4B,0x93,0x2B,0xD3,0x7A,0x50,0xB9,0x83,0xC5,0xE7,0xC9,0x0A,0xE9,0x3B};
+
+uint32_t IndexPageHeader::read(::apache::thrift::protocol::TProtocol* iprot) {
+
+ uint32_t xfer = 0;
+ std::string fname;
+ ::apache::thrift::protocol::TType ftype;
+ int16_t fid;
+
+ xfer += iprot->readStructBegin(fname);
+
+ using ::apache::thrift::protocol::TProtocolException;
+
+
+ while (true)
+ {
+ xfer += iprot->readFieldBegin(fname, ftype, fid);
+ if (ftype == ::apache::thrift::protocol::T_STOP) {
+ break;
+ }
+ xfer += iprot->skip(ftype);
+ xfer += iprot->readFieldEnd();
+ }
+
+ xfer += iprot->readStructEnd();
+
+ return xfer;
+}
+
+uint32_t IndexPageHeader::write(::apache::thrift::protocol::TProtocol* oprot) const {
+ uint32_t xfer = 0;
+ uint32_t fcnt = 0;
+ xfer += oprot->writeStructBegin("IndexPageHeader");
+
+ xfer += oprot->writeFieldStop();
+ xfer += oprot->writeStructEnd();
+ return xfer;
+}
+
+void swap(IndexPageHeader &a, IndexPageHeader &b) {
+ using ::std::swap;
+ (void) a;
+ (void) b;
+}
+
+const char* DictionaryPageHeader::ascii_fingerprint = "B149E4528254D495610C22AE4BD539C5";
+const uint8_t DictionaryPageHeader::binary_fingerprint[16] = {0xB1,0x49,0xE4,0x52,0x82,0x54,0xD4,0x95,0x61,0x0C,0x22,0xAE,0x4B,0xD5,0x39,0xC5};
+
+uint32_t DictionaryPageHeader::read(::apache::thrift::protocol::TProtocol* iprot) {
+
+ uint32_t xfer = 0;
+ std::string fname;
+ ::apache::thrift::protocol::TType ftype;
+ int16_t fid;
+
+ xfer += iprot->readStructBegin(fname);
+
+ using ::apache::thrift::protocol::TProtocolException;
+
+ bool isset_num_values = false;
+ bool isset_encoding = false;
+
+ while (true)
+ {
+ xfer += iprot->readFieldBegin(fname, ftype, fid);
+ if (ftype == ::apache::thrift::protocol::T_STOP) {
+ break;
+ }
+ switch (fid)
+ {
+ case 1:
+ if (ftype == ::apache::thrift::protocol::T_I32) {
+ xfer += iprot->readI32(this->num_values);
+ isset_num_values = true;
+ } else {
+ xfer += iprot->skip(ftype);
+ }
+ break;
+ case 2:
+ if (ftype == ::apache::thrift::protocol::T_I32) {
+ int32_t ecast6;
+ xfer += iprot->readI32(ecast6);
+ this->encoding = (Encoding::type)ecast6;
+ isset_encoding = true;
+ } else {
+ xfer += iprot->skip(ftype);
+ }
+ break;
+ case 3:
+ if (ftype == ::apache::thrift::protocol::T_BOOL) {
+ xfer += iprot->readBool(this->is_sorted);
+ this->__isset.is_sorted = true;
+ } else {
+ xfer += iprot->skip(ftype);
+ }
+ break;
+ default:
+ xfer += iprot->skip(ftype);
+ break;
+ }
+ xfer += iprot->readFieldEnd();
+ }
+
+ xfer += iprot->readStructEnd();
+
+ if (!isset_num_values)
+ throw TProtocolException(TProtocolException::INVALID_DATA);
+ if (!isset_encoding)
+ throw TProtocolException(TProtocolException::INVALID_DATA);
+ return xfer;
+}
+
+uint32_t DictionaryPageHeader::write(::apache::thrift::protocol::TProtocol* oprot) const {
+ uint32_t xfer = 0;
+ uint32_t fcnt = 0;
+ xfer += oprot->writeStructBegin("DictionaryPageHeader");
+
+ ++fcnt;
+ xfer += oprot->writeFieldBegin("num_values", ::apache::thrift::protocol::T_I32, 1);
+ xfer += oprot->writeI32(this->num_values);
+ xfer += oprot->writeFieldEnd();
+
+ ++fcnt;
+ xfer += oprot->writeFieldBegin("encoding", ::apache::thrift::protocol::T_I32, 2);
+ xfer += oprot->writeI32((int32_t)this->encoding);
+ xfer += oprot->writeFieldEnd();
+
+ if (this->__isset.is_sorted) {
+ ++fcnt;
+ xfer += oprot->writeFieldBegin("is_sorted", ::apache::thrift::protocol::T_BOOL, 3);
+ xfer += oprot->writeBool(this->is_sorted);
+ xfer += oprot->writeFieldEnd();
+ }
+ xfer += oprot->writeFieldStop();
+ xfer += oprot->writeStructEnd();
+ return xfer;
+}
+
+void swap(DictionaryPageHeader &a, DictionaryPageHeader &b) {
+ using ::std::swap;
+ swap(a.num_values, b.num_values);
+ swap(a.encoding, b.encoding);
+ swap(a.is_sorted, b.is_sorted);
+ swap(a.__isset, b.__isset);
+}
+
+const char* DataPageHeaderV2::ascii_fingerprint = "69FF2F6BD1A443440D5E46ABA5A3A919";
+const uint8_t DataPageHeaderV2::binary_fingerprint[16] = {0x69,0xFF,0x2F,0x6B,0xD1,0xA4,0x43,0x44,0x0D,0x5E,0x46,0xAB,0xA5,0xA3,0xA9,0x19};
+
+uint32_t DataPageHeaderV2::read(::apache::thrift::protocol::TProtocol* iprot) {
+
+ uint32_t xfer = 0;
+ std::string fname;
+ ::apache::thrift::protocol::TType ftype;
+ int16_t fid;
+
+ xfer += iprot->readStructBegin(fname);
+
+ using ::apache::thrift::protocol::TProtocolException;
+
+ bool isset_num_values = false;
+ bool isset_num_nulls = false;
+ bool isset_num_rows = false;
+ bool isset_encoding = false;
+ bool isset_definition_levels_byte_length = false;
+ bool isset_repetition_levels_byte_length = false;
+
+ while (true)
+ {
+ xfer += iprot->readFieldBegin(fname, ftype, fid);
+ if (ftype == ::apache::thrift::protocol::T_STOP) {
+ break;
+ }
+ switch (fid)
+ {
+ case 1:
+ if (ftype == ::apache::thrift::protocol::T_I32) {
+ xfer += iprot->readI32(this->num_values);
+ isset_num_values = true;
+ } else {
+ xfer += iprot->skip(ftype);
+ }
+ break;
+ case 2:
+ if (ftype == ::apache::thrift::protocol::T_I32) {
+ xfer += iprot->readI32(this->num_nulls);
+ isset_num_nulls = true;
+ } else {
+ xfer += iprot->skip(ftype);
+ }
+ break;
+ case 3:
+ if (ftype == ::apache::thrift::protocol::T_I32) {
+ xfer += iprot->readI32(this->num_rows);
+ isset_num_rows = true;
+ } else {
+ xfer += iprot->skip(ftype);
+ }
+ break;
+ case 4:
+ if (ftype == ::apache::thrift::protocol::T_I32) {
+ int32_t ecast7;
+ xfer += iprot->readI32(ecast7);
+ this->encoding = (Encoding::type)ecast7;
+ isset_encoding = true;
+ } else {
+ xfer += iprot->skip(ftype);
+ }
+ break;
+ case 5:
+ if (ftype == ::apache::thrift::protocol::T_I32) {
+ xfer += iprot->readI32(this->definition_levels_byte_length);
+ isset_definition_levels_byte_length = true;
+ } else {
+ xfer += iprot->skip(ftype);
+ }
+ break;
+ case 6:
+ if (ftype == ::apache::thrift::protocol::T_I32) {
+ xfer += iprot->readI32(this->repetition_levels_byte_length);
+ isset_repetition_levels_byte_length = true;
+ } else {
+ xfer += iprot->skip(ftype);
+ }
+ break;
+ case 7:
+ if (ftype == ::apache::thrift::protocol::T_BOOL) {
+ xfer += iprot->readBool(this->is_compressed);
+ this->__isset.is_compressed = true;
+ } else {
+ xfer += iprot->skip(ftype);
+ }
+ break;
+ case 8:
+ if (ftype == ::apache::thrift::protocol::T_STRUCT) {
+ xfer += this->statistics.read(iprot);
+ this->__isset.statistics = true;
+ } else {
+ xfer += iprot->skip(ftype);
+ }
+ break;
+ default:
+ xfer += iprot->skip(ftype);
+ break;
+ }
+ xfer += iprot->readFieldEnd();
+ }
+
+ xfer += iprot->readStructEnd();
+
+ if (!isset_num_values)
+ throw TProtocolException(TProtocolException::INVALID_DATA);
+ if (!isset_num_nulls)
+ throw TProtocolException(TProtocolException::INVALID_DATA);
+ if (!isset_num_rows)
+ throw TProtocolException(TProtocolException::INVALID_DATA);
+ if (!isset_encoding)
+ throw TProtocolException(TProtocolException::INVALID_DATA);
+ if (!isset_definition_levels_byte_length)
+ throw TProtocolException(TProtocolException::INVALID_DATA);
+ if (!isset_repetition_levels_byte_length)
+ throw TProtocolException(TProtocolException::INVALID_DATA);
+ return xfer;
+}
+
+uint32_t DataPageHeaderV2::write(::apache::thrift::protocol::TProtocol* oprot) const {
+ uint32_t xfer = 0;
+ uint32_t fcnt = 0;
+ xfer += oprot->writeStructBegin("DataPageHeaderV2");
+
+ ++fcnt;
+ xfer += oprot->writeFieldBegin("num_values", ::apache::thrift::protocol::T_I32, 1);
+ xfer += oprot->writeI32(this->num_values);
+ xfer += oprot->writeFieldEnd();
+
+ ++fcnt;
+ xfer += oprot->writeFieldBegin("num_nulls", ::apache::thrift::protocol::T_I32, 2);
+ xfer += oprot->writeI32(this->num_nulls);
+ xfer += oprot->writeFieldEnd();
+
+ ++fcnt;
+ xfer += oprot->writeFieldBegin("num_rows", ::apache::thrift::protocol::T_I32, 3);
+ xfer += oprot->writeI32(this->num_rows);
+ xfer += oprot->writeFieldEnd();
+
+ ++fcnt;
+ xfer += oprot->writeFieldBegin("encoding", ::apache::thrift::protocol::T_I32, 4);
+ xfer += oprot->writeI32((int32_t)this->encoding);
+ xfer += oprot->writeFieldEnd();
+
+ ++fcnt;
+ xfer += oprot->writeFieldBegin("definition_levels_byte_length", ::apache::thrift::protocol::T_I32, 5);
+ xfer += oprot->writeI32(this->definition_levels_byte_length);
+ xfer += oprot->writeFieldEnd();
+
+ ++fcnt;
+ xfer += oprot->writeFieldBegin("repetition_levels_byte_length", ::apache::thrift::protocol::T_I32, 6);
+ xfer += oprot->writeI32(this->repetition_levels_byte_length);
+ xfer += oprot->writeFieldEnd();
+
+ if (this->__isset.is_compressed) {
+ ++fcnt;
+ xfer += oprot->writeFieldBegin("is_compressed", ::apache::thrift::protocol::T_BOOL, 7);
+ xfer += oprot->writeBool(this->is_compressed);
+ xfer += oprot->writeFieldEnd();
+ }
+ if (this->__isset.statistics) {
+ ++fcnt;
+ xfer += oprot->writeFieldBegin("statistics", ::apache::thrift::protocol::T_STRUCT, 8);
+ xfer += this->statistics.write(oprot);
+ xfer += oprot->writeFieldEnd();
+ }
+ xfer += oprot->writeFieldStop();
+ xfer += oprot->writeStructEnd();
+ return xfer;
+}
+
+void swap(DataPageHeaderV2 &a, DataPageHeaderV2 &b) {
+ using ::std::swap;
+ swap(a.num_values, b.num_values);
+ swap(a.num_nulls, b.num_nulls);
+ swap(a.num_rows, b.num_rows);
+ swap(a.encoding, b.encoding);
+ swap(a.definition_levels_byte_length, b.definition_levels_byte_length);
+ swap(a.repetition_levels_byte_length, b.repetition_levels_byte_length);
+ swap(a.is_compressed, b.is_compressed);
+ swap(a.statistics, b.statistics);
+ swap(a.__isset, b.__isset);
+}
+
+const char* PageHeader::ascii_fingerprint = "B5BD2BDF3756C883A58B30B9C9F204A0";
+const uint8_t PageHeader::binary_fingerprint[16] = {0xB5,0xBD,0x2B,0xDF,0x37,0x56,0xC8,0x83,0xA5,0x8B,0x30,0xB9,0xC9,0xF2,0x04,0xA0};
+
+uint32_t PageHeader::read(::apache::thrift::protocol::TProtocol* iprot) {
+
+ uint32_t xfer = 0;
+ std::string fname;
+ ::apache::thrift::protocol::TType ftype;
+ int16_t fid;
+
+ xfer += iprot->readStructBegin(fname);
+
+ using ::apache::thrift::protocol::TProtocolException;
+
+ bool isset_type = false;
+ bool isset_uncompressed_page_size = false;
+ bool isset_compressed_page_size = false;
+
+ while (true)
+ {
+ xfer += iprot->readFieldBegin(fname, ftype, fid);
+ if (ftype == ::apache::thrift::protocol::T_STOP) {
+ break;
+ }
+ switch (fid)
+ {
+ case 1:
+ if (ftype == ::apache::thrift::protocol::T_I32) {
+ int32_t ecast8;
+ xfer += iprot->readI32(ecast8);
+ this->type = (PageType::type)ecast8;
+ isset_type = true;
+ } else {
+ xfer += iprot->skip(ftype);
+ }
+ break;
+ case 2:
+ if (ftype == ::apache::thrift::protocol::T_I32) {
+ xfer += iprot->readI32(this->uncompressed_page_size);
+ isset_uncompressed_page_size = true;
+ } else {
+ xfer += iprot->skip(ftype);
+ }
+ break;
+ case 3:
+ if (ftype == ::apache::thrift::protocol::T_I32) {
+ xfer += iprot->readI32(this->compressed_page_size);
+ isset_compressed_page_size = true;
+ } else {
+ xfer += iprot->skip(ftype);
+ }
+ break;
+ case 4:
+ if (ftype == ::apache::thrift::protocol::T_I32) {
+ xfer += iprot->readI32(this->crc);
+ this->__isset.crc = true;
+ } else {
+ xfer += iprot->skip(ftype);
+ }
+ break;
+ case 5:
+ if (ftype == ::apache::thrift::protocol::T_STRUCT) {
+ xfer += this->data_page_header.read(iprot);
+ this->__isset.data_page_header = true;
+ } else {
+ xfer += iprot->skip(ftype);
+ }
+ break;
+ case 6:
+ if (ftype == ::apache::thrift::protocol::T_STRUCT) {
+ xfer += this->index_page_header.read(iprot);
+ this->__isset.index_page_header = true;
+ } else {
+ xfer += iprot->skip(ftype);
+ }
+ break;
+ case 7:
+ if (ftype == ::apache::thrift::protocol::T_STRUCT) {
+ xfer += this->dictionary_page_header.read(iprot);
+ this->__isset.dictionary_page_header = true;
+ } else {
+ xfer += iprot->skip(ftype);
+ }
+ break;
+ case 8:
+ if (ftype == ::apache::thrift::protocol::T_STRUCT) {
+ xfer += this->data_page_header_v2.read(iprot);
+ this->__isset.data_page_header_v2 = true;
+ } else {
+ xfer += iprot->skip(ftype);
+ }
+ break;
+ default:
+ xfer += iprot->skip(ftype);
+ break;
+ }
+ xfer += iprot->readFieldEnd();
+ }
+
+ xfer += iprot->readStructEnd();
+
+ if (!isset_type)
+ throw TProtocolException(TProtocolException::INVALID_DATA);
+ if (!isset_uncompressed_page_size)
+ throw TProtocolException(TProtocolException::INVALID_DATA);
+ if (!isset_compressed_page_size)
+ throw TProtocolException(TProtocolException::INVALID_DATA);
+ return xfer;
+}
+
+uint32_t PageHeader::write(::apache::thrift::protocol::TProtocol* oprot) const {
+ uint32_t xfer = 0;
+ uint32_t fcnt = 0;
+ xfer += oprot->writeStructBegin("PageHeader");
+
+ ++fcnt;
+ xfer += oprot->writeFieldBegin("type", ::apache::thrift::protocol::T_I32, 1);
+ xfer += oprot->writeI32((int32_t)this->type);
+ xfer += oprot->writeFieldEnd();
+
+ ++fcnt;
+ xfer += oprot->writeFieldBegin("uncompressed_page_size", ::apache::thrift::protocol::T_I32, 2);
+ xfer += oprot->writeI32(this->uncompressed_page_size);
+ xfer += oprot->writeFieldEnd();
+
+ ++fcnt;
+ xfer += oprot->writeFieldBegin("compressed_page_size", ::apache::thrift::protocol::T_I32, 3);
+ xfer += oprot->writeI32(this->compressed_page_size);
+ xfer += oprot->writeFieldEnd();
+
+ if (this->__isset.crc) {
+ ++fcnt;
+ xfer += oprot->writeFieldBegin("crc", ::apache::thrift::protocol::T_I32, 4);
+ xfer += oprot->writeI32(this->crc);
+ xfer += oprot->writeFieldEnd();
+ }
+ if (this->__isset.data_page_header) {
+ ++fcnt;
+ xfer += oprot->writeFieldBegin("data_page_header", ::apache::thrift::protocol::T_STRUCT, 5);
+ xfer += this->data_page_header.write(oprot);
+ xfer += oprot->writeFieldEnd();
+ }
+ if (this->__isset.index_page_header) {
+ ++fcnt;
+ xfer += oprot->writeFieldBegin("index_page_header", ::apache::thrift::protocol::T_STRUCT, 6);
+ xfer += this->index_page_header.write(oprot);
+ xfer += oprot->writeFieldEnd();
+ }
+ if (this->__isset.dictionary_page_header) {
+ ++fcnt;
+ xfer += oprot->writeFieldBegin("dictionary_page_header", ::apache::thrift::protocol::T_STRUCT, 7);
+ xfer += this->dictionary_page_header.write(oprot);
+ xfer += oprot->writeFieldEnd();
+ }
+ if (this->__isset.data_page_header_v2) {
+ ++fcnt;
+ xfer += oprot->writeFieldBegin("data_page_header_v2", ::apache::thrift::protocol::T_STRUCT, 8);
+ xfer += this->data_page_header_v2.write(oprot);
+ xfer += oprot->writeFieldEnd();
+ }
+ xfer += oprot->writeFieldStop();
+ xfer += oprot->writeStructEnd();
+ return xfer;
+}
+
+void swap(PageHeader &a, PageHeader &b) {
+ using ::std::swap;
+ swap(a.type, b.type);
+ swap(a.uncompressed_page_size, b.uncompressed_page_size);
+ swap(a.compressed_page_size, b.compressed_page_size);
+ swap(a.crc, b.crc);
+ swap(a.data_page_header, b.data_page_header);
+ swap(a.index_page_header, b.index_page_header);
+ swap(a.dictionary_page_header, b.dictionary_page_header);
+ swap(a.data_page_header_v2, b.data_page_header_v2);
+ swap(a.__isset, b.__isset);
+}
+
+const char* KeyValue::ascii_fingerprint = "5B708A954C550ECA9C1A49D3C5CAFAB9";
+const uint8_t KeyValue::binary_fingerprint[16] = {0x5B,0x70,0x8A,0x95,0x4C,0x55,0x0E,0xCA,0x9C,0x1A,0x49,0xD3,0xC5,0xCA,0xFA,0xB9};
+
+uint32_t KeyValue::read(::apache::thrift::protocol::TProtocol* iprot) {
+
+ uint32_t xfer = 0;
+ std::string fname;
+ ::apache::thrift::protocol::TType ftype;
+ int16_t fid;
+
+ xfer += iprot->readStructBegin(fname);
+
+ using ::apache::thrift::protocol::TProtocolException;
+
+ bool isset_key = false;
+
+ while (true)
+ {
+ xfer += iprot->readFieldBegin(fname, ftype, fid);
+ if (ftype == ::apache::thrift::protocol::T_STOP) {
+ break;
+ }
+ switch (fid)
+ {
+ case 1:
+ if (ftype == ::apache::thrift::protocol::T_STRING) {
+ xfer += iprot->readString(this->key);
+ isset_key = true;
+ } else {
+ xfer += iprot->skip(ftype);
+ }
+ break;
+ case 2:
+ if (ftype == ::apache::thrift::protocol::T_STRING) {
+ xfer += iprot->readString(this->value);
+ this->__isset.value = true;
+ } else {
+ xfer += iprot->skip(ftype);
+ }
+ break;
+ default:
+ xfer += iprot->skip(ftype);
+ break;
+ }
+ xfer += iprot->readFieldEnd();
+ }
+
+ xfer += iprot->readStructEnd();
+
+ if (!isset_key)
+ throw TProtocolException(TProtocolException::INVALID_DATA);
+ return xfer;
+}
+
+uint32_t KeyValue::write(::apache::thrift::protocol::TProtocol* oprot) const {
+ uint32_t xfer = 0;
+ uint32_t fcnt = 0;
+ xfer += oprot->writeStructBegin("KeyValue");
+
+ ++fcnt;
+ xfer += oprot->writeFieldBegin("key", ::apache::thrift::protocol::T_STRING, 1);
+ xfer += oprot->writeString(this->key);
+ xfer += oprot->writeFieldEnd();
+
+ if (this->__isset.value) {
+ ++fcnt;
+ xfer += oprot->writeFieldBegin("value", ::apache::thrift::protocol::T_STRING, 2);
+ xfer += oprot->writeString(this->value);
+ xfer += oprot->writeFieldEnd();
+ }
+ xfer += oprot->writeFieldStop();
+ xfer += oprot->writeStructEnd();
+ return xfer;
+}
+
+void swap(KeyValue &a, KeyValue &b) {
+ using ::std::swap;
+ swap(a.key, b.key);
+ swap(a.value, b.value);
+ swap(a.__isset, b.__isset);
+}
+
+const char* SortingColumn::ascii_fingerprint = "F079C2D58A783AD90F9BE05D10DBBC6F";
+const uint8_t SortingColumn::binary_fingerprint[16] = {0xF0,0x79,0xC2,0xD5,0x8A,0x78,0x3A,0xD9,0x0F,0x9B,0xE0,0x5D,0x10,0xDB,0xBC,0x6F};
+
+uint32_t SortingColumn::read(::apache::thrift::protocol::TProtocol* iprot) {
+
+ uint32_t xfer = 0;
+ std::string fname;
+ ::apache::thrift::protocol::TType ftype;
+ int16_t fid;
+
+ xfer += iprot->readStructBegin(fname);
+
+ using ::apache::thrift::protocol::TProtocolException;
+
+ bool isset_column_idx = false;
+ bool isset_descending = false;
+ bool isset_nulls_first = false;
+
+ while (true)
+ {
+ xfer += iprot->readFieldBegin(fname, ftype, fid);
+ if (ftype == ::apache::thrift::protocol::T_STOP) {
+ break;
+ }
+ switch (fid)
+ {
+ case 1:
+ if (ftype == ::apache::thrift::protocol::T_I32) {
+ xfer += iprot->readI32(this->column_idx);
+ isset_column_idx = true;
+ } else {
+ xfer += iprot->skip(ftype);
+ }
+ break;
+ case 2:
+ if (ftype == ::apache::thrift::protocol::T_BOOL) {
+ xfer += iprot->readBool(this->descending);
+ isset_descending = true;
+ } else {
+ xfer += iprot->skip(ftype);
+ }
+ break;
+ case 3:
+ if (ftype == ::apache::thrift::protocol::T_BOOL) {
+ xfer += iprot->readBool(this->nulls_first);
+ isset_nulls_first = true;
+ } else {
+ xfer += iprot->skip(ftype);
+ }
+ break;
+ default:
+ xfer += iprot->skip(ftype);
+ break;
+ }
+ xfer += iprot->readFieldEnd();
+ }
+
+ xfer += iprot->readStructEnd();
+
+ if (!isset_column_idx)
+ throw TProtocolException(TProtocolException::INVALID_DATA);
+ if (!isset_descending)
+ throw TProtocolException(TProtocolException::INVALID_DATA);
+ if (!isset_nulls_first)
+ throw TProtocolException(TProtocolException::INVALID_DATA);
+ return xfer;
+}
+
+uint32_t SortingColumn::write(::apache::thrift::protocol::TProtocol* oprot) const {
+ uint32_t xfer = 0;
+ uint32_t fcnt = 0;
+ xfer += oprot->writeStructBegin("SortingColumn");
+
+ ++fcnt;
+ xfer += oprot->writeFieldBegin("column_idx", ::apache::thrift::protocol::T_I32, 1);
+ xfer += oprot->writeI32(this->column_idx);
+ xfer += oprot->writeFieldEnd();
+
+ ++fcnt;
+ xfer += oprot->writeFieldBegin("descending", ::apache::thrift::protocol::T_BOOL, 2);
+ xfer += oprot->writeBool(this->descending);
+ xfer += oprot->writeFieldEnd();
+
+ ++fcnt;
+ xfer += oprot->writeFieldBegin("nulls_first", ::apache::thrift::protocol::T_BOOL, 3);
+ xfer += oprot->writeBool(this->nulls_first);
+ xfer += oprot->writeFieldEnd();
+
+ xfer += oprot->writeFieldStop();
+ xfer += oprot->writeStructEnd();
+ return xfer;
+}
+
+void swap(SortingColumn &a, SortingColumn &b) {
+ using ::std::swap;
+ swap(a.column_idx, b.column_idx);
+ swap(a.descending, b.descending);
+ swap(a.nulls_first, b.nulls_first);
+}
+
+const char* ColumnMetaData::ascii_fingerprint = "1AF797732BCB4465C6314FB29B86638D";
+const uint8_t ColumnMetaData::binary_fingerprint[16] = {0x1A,0xF7,0x97,0x73,0x2B,0xCB,0x44,0x65,0xC6,0x31,0x4F,0xB2,0x9B,0x86,0x63,0x8D};
+
+uint32_t ColumnMetaData::read(::apache::thrift::protocol::TProtocol* iprot) {
+
+ uint32_t xfer = 0;
+ std::string fname;
+ ::apache::thrift::protocol::TType ftype;
+ int16_t fid;
+
+ xfer += iprot->readStructBegin(fname);
+
+ using ::apache::thrift::protocol::TProtocolException;
+
+ bool isset_type = false;
+ bool isset_encodings = false;
+ bool isset_path_in_schema = false;
+ bool isset_codec = false;
+ bool isset_num_values = false;
+ bool isset_total_uncompressed_size = false;
+ bool isset_total_compressed_size = false;
+ bool isset_data_page_offset = false;
+
+ while (true)
+ {
+ xfer += iprot->readFieldBegin(fname, ftype, fid);
+ if (ftype == ::apache::thrift::protocol::T_STOP) {
+ break;
+ }
+ switch (fid)
+ {
+ case 1:
+ if (ftype == ::apache::thrift::protocol::T_I32) {
+ int32_t ecast9;
+ xfer += iprot->readI32(ecast9);
+ this->type = (Type::type)ecast9;
+ isset_type = true;
+ } else {
+ xfer += iprot->skip(ftype);
+ }
+ break;
+ case 2:
+ if (ftype == ::apache::thrift::protocol::T_LIST) {
+ {
+ this->encodings.clear();
+ uint32_t _size10;
+ ::apache::thrift::protocol::TType _etype13;
+ xfer += iprot->readListBegin(_etype13, _size10);
+ this->encodings.resize(_size10);
+ uint32_t _i14;
+ for (_i14 = 0; _i14 < _size10; ++_i14)
+ {
+ int32_t ecast15;
+ xfer += iprot->readI32(ecast15);
+ this->encodings[_i14] = (Encoding::type)ecast15;
+ }
+ xfer += iprot->readListEnd();
+ }
+ isset_encodings = true;
+ } else {
+ xfer += iprot->skip(ftype);
+ }
+ break;
+ case 3:
+ if (ftype == ::apache::thrift::protocol::T_LIST) {
+ {
+ this->path_in_schema.clear();
+ uint32_t _size16;
+ ::apache::thrift::protocol::TType _etype19;
+ xfer += iprot->readListBegin(_etype19, _size16);
+ this->path_in_schema.resize(_size16);
+ uint32_t _i20;
+ for (_i20 = 0; _i20 < _size16; ++_i20)
+ {
+ xfer += iprot->readString(this->path_in_schema[_i20]);
+ }
+ xfer += iprot->readListEnd();
+ }
+ isset_path_in_schema = true;
+ } else {
+ xfer += iprot->skip(ftype);
+ }
+ break;
+ case 4:
+ if (ftype == ::apache::thrift::protocol::T_I32) {
+ int32_t ecast21;
+ xfer += iprot->readI32(ecast21);
+ this->codec = (CompressionCodec::type)ecast21;
+ isset_codec = true;
+ } else {
+ xfer += iprot->skip(ftype);
+ }
+ break;
+ case 5:
+ if (ftype == ::apache::thrift::protocol::T_I64) {
+ xfer += iprot->readI64(this->num_values);
+ isset_num_values = true;
+ } else {
+ xfer += iprot->skip(ftype);
+ }
+ break;
+ case 6:
+ if (ftype == ::apache::thrift::protocol::T_I64) {
+ xfer += iprot->readI64(this->total_uncompressed_size);
+ isset_total_uncompressed_size = true;
+ } else {
+ xfer += iprot->skip(ftype);
+ }
+ break;
+ case 7:
+ if (ftype == ::apache::thrift::protocol::T_I64) {
+ xfer += iprot->readI64(this->total_compressed_size);
+ isset_total_compressed_size = true;
+ } else {
+ xfer += iprot->skip(ftype);
+ }
+ break;
+ case 8:
+ if (ftype == ::apache::thrift::protocol::T_LIST) {
+ {
+ this->key_value_metadata.clear();
+ uint32_t _size22;
+ ::apache::thrift::protocol::TType _etype25;
+ xfer += iprot->readListBegin(_etype25, _size22);
+ this->key_value_metadata.resize(_size22);
+ uint32_t _i26;
+ for (_i26 = 0; _i26 < _size22; ++_i26)
+ {
+ xfer += this->key_value_metadata[_i26].read(iprot);
+ }
+ xfer += iprot->readListEnd();
+ }
+ this->__isset.key_value_metadata = true;
+ } else {
+ xfer += iprot->skip(ftype);
+ }
+ break;
+ case 9:
+ if (ftype == ::apache::thrift::protocol::T_I64) {
+ xfer += iprot->readI64(this->data_page_offset);
+ isset_data_page_offset = true;
+ } else {
+ xfer += iprot->skip(ftype);
+ }
+ break;
+ case 10:
+ if (ftype == ::apache::thrift::protocol::T_I64) {
+ xfer += iprot->readI64(this->index_page_offset);
+ this->__isset.index_page_offset = true;
+ } else {
+ xfer += iprot->skip(ftype);
+ }
+ break;
+ case 11:
+ if (ftype == ::apache::thrift::protocol::T_I64) {
+ xfer += iprot->readI64(this->dictionary_page_offset);
+ this->__isset.dictionary_page_offset = true;
+ } else {
+ xfer += iprot->skip(ftype);
+ }
+ break;
+ case 12:
+ if (ftype == ::apache::thrift::protocol::T_STRUCT) {
+ xfer += this->statistics.read(iprot);
+ this->__isset.statistics = true;
+ } else {
+ xfer += iprot->skip(ftype);
+ }
+ break;
+ default:
+ xfer += iprot->skip(ftype);
+ break;
+ }
+ xfer += iprot->readFieldEnd();
+ }
+
+ xfer += iprot->readStructEnd();
+
+ if (!isset_type)
+ throw TProtocolException(TProtocolException::INVALID_DATA);
+ if (!isset_encodings)
+ throw TProtocolException(TProtocolException::INVALID_DATA);
+ if (!isset_path_in_schema)
+ throw TProtocolException(TProtocolException::INVALID_DATA);
+ if (!isset_codec)
+ throw TProtocolException(TProtocolException::INVALID_DATA);
+ if (!isset_num_values)
+ throw TProtocolException(TProtocolException::INVALID_DATA);
+ if (!isset_total_uncompressed_size)
+ throw TProtocolException(TProtocolException::INVALID_DATA);
+ if (!isset_total_compressed_size)
+ throw TProtocolException(TProtocolException::INVALID_DATA);
+ if (!isset_data_page_offset)
+ throw TProtocolException(TProtocolException::INVALID_DATA);
+ return xfer;
+}
+
+uint32_t ColumnMetaData::write(::apache::thrift::protocol::TProtocol* oprot) const {
+ uint32_t xfer = 0;
+ uint32_t fcnt = 0;
+ xfer += oprot->writeStructBegin("ColumnMetaData");
+
+ ++fcnt;
+ xfer += oprot->writeFieldBegin("type", ::apache::thrift::protocol::T_I32, 1);
+ xfer += oprot->writeI32((int32_t)this->type);
+ xfer += oprot->writeFieldEnd();
+
+ ++fcnt;
+ xfer += oprot->writeFieldBegin("encodings", ::apache::thrift::protocol::T_LIST, 2);
+ {
+ xfer += oprot->writeListBegin(::apache::thrift::protocol::T_I32, static_cast<uint32_t>(this->encodings.size()));
+ std::vector<Encoding::type> ::const_iterator _iter27;
+ for (_iter27 = this->encodings.begin(); _iter27 != this->encodings.end(); ++_iter27)
+ {
+ xfer += oprot->writeI32((int32_t)(*_iter27));
+ }
+ xfer += oprot->writeListEnd();
+ }
+ xfer += oprot->writeFieldEnd();
+
+ ++fcnt;
+ xfer += oprot->writeFieldBegin("path_in_schema", ::apache::thrift::protocol::T_LIST, 3);
+ {
+ xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRING, static_cast<uint32_t>(this->path_in_schema.size()));
+ std::vector<std::string> ::const_iterator _iter28;
+ for (_iter28 = this->path_in_schema.begin(); _iter28 != this->path_in_schema.end(); ++_iter28)
+ {
+ xfer += oprot->writeString((*_iter28));
+ }
+ xfer += oprot->writeListEnd();
+ }
+ xfer += oprot->writeFieldEnd();
+
+ ++fcnt;
+ xfer += oprot->writeFieldBegin("codec", ::apache::thrift::protocol::T_I32, 4);
+ xfer += oprot->writeI32((int32_t)this->codec);
+ xfer += oprot->writeFieldEnd();
+
+ ++fcnt;
+ xfer += oprot->writeFieldBegin("num_values", ::apache::thrift::protocol::T_I64, 5);
+ xfer += oprot->writeI64(this->num_values);
+ xfer += oprot->writeFieldEnd();
+
+ ++fcnt;
+ xfer += oprot->writeFieldBegin("total_uncompressed_size", ::apache::thrift::protocol::T_I64, 6);
+ xfer += oprot->writeI64(this->total_uncompressed_size);
+ xfer += oprot->writeFieldEnd();
+
+ ++fcnt;
+ xfer += oprot->writeFieldBegin("total_compressed_size", ::apache::thrift::protocol::T_I64, 7);
+ xfer += oprot->writeI64(this->total_compressed_size);
+ xfer += oprot->writeFieldEnd();
+
+ if (this->__isset.key_value_metadata) {
+ ++fcnt;
+ xfer += oprot->writeFieldBegin("key_value_metadata", ::apache::thrift::protocol::T_LIST, 8);
+ {
+ xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast<uint32_t>(this->key_value_metadata.size()));
+ std::vector<KeyValue> ::const_iterator _iter29;
+ for (_iter29 = this->key_value_metadata.begin(); _iter29 != this->key_value_metadata.end(); ++_iter29)
+ {
+ xfer += (*_iter29).write(oprot);
+ }
+ xfer += oprot->writeListEnd();
+ }
+ xfer += oprot->writeFieldEnd();
+ }
+ ++fcnt;
+ xfer += oprot->writeFieldBegin("data_page_offset", ::apache::thrift::protocol::T_I64, 9);
+ xfer += oprot->writeI64(this->data_page_offset);
+ xfer += oprot->writeFieldEnd();
+
+ if (this->__isset.index_page_offset) {
+ ++fcnt;
+ xfer += oprot->writeFieldBegin("index_page_offset", ::apache::thrift::protocol::T_I64, 10);
+ xfer += oprot->writeI64(this->index_page_offset);
+ xfer += oprot->writeFieldEnd();
+ }
+ if (this->__isset.dictionary_page_offset) {
+ ++fcnt;
+ xfer += oprot->writeFieldBegin("dictionary_page_offset", ::apache::thrift::protocol::T_I64, 11);
+ xfer += oprot->writeI64(this->dictionary_page_offset);
+ xfer += oprot->writeFieldEnd();
+ }
+ if (this->__isset.statistics) {
+ ++fcnt;
+ xfer += oprot->writeFieldBegin("statistics", ::apache::thrift::protocol::T_STRUCT, 12);
+ xfer += this->statistics.write(oprot);
+ xfer += oprot->writeFieldEnd();
+ }
+ xfer += oprot->writeFieldStop();
+ xfer += oprot->writeStructEnd();
+ return xfer;
+}
+
+void swap(ColumnMetaData &a, ColumnMetaData &b) {
+ using ::std::swap;
+ swap(a.type, b.type);
+ swap(a.encodings, b.encodings);
+ swap(a.path_in_schema, b.path_in_schema);
+ swap(a.codec, b.codec);
+ swap(a.num_values, b.num_values);
+ swap(a.total_uncompressed_size, b.total_uncompressed_size);
+ swap(a.total_compressed_size, b.total_compressed_size);
+ swap(a.key_value_metadata, b.key_value_metadata);
+ swap(a.data_page_offset, b.data_page_offset);
+ swap(a.index_page_offset, b.index_page_offset);
+ swap(a.dictionary_page_offset, b.dictionary_page_offset);
+ swap(a.statistics, b.statistics);
+ swap(a.__isset, b.__isset);
+}
+
+const char* ColumnChunk::ascii_fingerprint = "169FC47057EF3D82E2FACDDEC2641AE8";
+const uint8_t ColumnChunk::binary_fingerprint[16] = {0x16,0x9F,0xC4,0x70,0x57,0xEF,0x3D,0x82,0xE2,0xFA,0xCD,0xDE,0xC2,0x64,0x1A,0xE8};
+
+uint32_t ColumnChunk::read(::apache::thrift::protocol::TProtocol* iprot) {
+
+ uint32_t xfer = 0;
+ std::string fname;
+ ::apache::thrift::protocol::TType ftype;
+ int16_t fid;
+
+ xfer += iprot->readStructBegin(fname);
+
+ using ::apache::thrift::protocol::TProtocolException;
+
+ bool isset_file_offset = false;
+
+ while (true)
+ {
+ xfer += iprot->readFieldBegin(fname, ftype, fid);
+ if (ftype == ::apache::thrift::protocol::T_STOP) {
+ break;
+ }
+ switch (fid)
+ {
+ case 1:
+ if (ftype == ::apache::thrift::protocol::T_STRING) {
+ xfer += iprot->readString(this->file_path);
+ this->__isset.file_path = true;
+ } else {
+ xfer += iprot->skip(ftype);
+ }
+ break;
+ case 2:
+ if (ftype == ::apache::thrift::protocol::T_I64) {
+ xfer += iprot->readI64(this->file_offset);
+ isset_file_offset = true;
+ } else {
+ xfer += iprot->skip(ftype);
+ }
+ break;
+ case 3:
+ if (ftype == ::apache::thrift::protocol::T_STRUCT) {
+ xfer += this->meta_data.read(iprot);
+ this->__isset.meta_data = true;
+ } else {
+ xfer += iprot->skip(ftype);
+ }
+ break;
+ default:
+ xfer += iprot->skip(ftype);
+ break;
+ }
+ xfer += iprot->readFieldEnd();
+ }
+
+ xfer += iprot->readStructEnd();
+
+ if (!isset_file_offset)
+ throw TProtocolException(TProtocolException::INVALID_DATA);
+ return xfer;
+}
+
+uint32_t ColumnChunk::write(::apache::thrift::protocol::TProtocol* oprot) const {
+ uint32_t xfer = 0;
+ uint32_t fcnt = 0;
+ xfer += oprot->writeStructBegin("ColumnChunk");
+
+ if (this->__isset.file_path) {
+ ++fcnt;
+ xfer += oprot->writeFieldBegin("file_path", ::apache::thrift::protocol::T_STRING, 1);
+ xfer += oprot->writeString(this->file_path);
+ xfer += oprot->writeFieldEnd();
+ }
+ ++fcnt;
+ xfer += oprot->writeFieldBegin("file_offset", ::apache::thrift::protocol::T_I64, 2);
+ xfer += oprot->writeI64(this->file_offset);
+ xfer += oprot->writeFieldEnd();
+
+ if (this->__isset.meta_data) {
+ ++fcnt;
+ xfer += oprot->writeFieldBegin("meta_data", ::apache::thrift::protocol::T_STRUCT, 3);
+ xfer += this->meta_data.write(oprot);
+ xfer += oprot->writeFieldEnd();
+ }
+ xfer += oprot->writeFieldStop();
+ xfer += oprot->writeStructEnd();
+ return xfer;
+}
+
+void swap(ColumnChunk &a, ColumnChunk &b) {
+ using ::std::swap;
+ swap(a.file_path, b.file_path);
+ swap(a.file_offset, b.file_offset);
+ swap(a.meta_data, b.meta_data);
+ swap(a.__isset, b.__isset);
+}
+
+const char* RowGroup::ascii_fingerprint = "DC7968627FA826DDC4C6C9BE773586C9";
+const uint8_t RowGroup::binary_fingerprint[16] = {0xDC,0x79,0x68,0x62,0x7F,0xA8,0x26,0xDD,0xC4,0xC6,0xC9,0xBE,0x77,0x35,0x86,0xC9};
+
+uint32_t RowGroup::read(::apache::thrift::protocol::TProtocol* iprot) {
+
+ uint32_t xfer = 0;
+ std::string fname;
+ ::apache::thrift::protocol::TType ftype;
+ int16_t fid;
+
+ xfer += iprot->readStructBegin(fname);
+
+ using ::apache::thrift::protocol::TProtocolException;
+
+ bool isset_columns = false;
+ bool isset_total_byte_size = false;
+ bool isset_num_rows = false;
+
+ while (true)
+ {
+ xfer += iprot->readFieldBegin(fname, ftype, fid);
+ if (ftype == ::apache::thrift::protocol::T_STOP) {
+ break;
+ }
+ switch (fid)
+ {
+ case 1:
+ if (ftype == ::apache::thrift::protocol::T_LIST) {
+ {
+ this->columns.clear();
+ uint32_t _size30;
+ ::apache::thrift::protocol::TType _etype33;
+ xfer += iprot->readListBegin(_etype33, _size30);
+ this->columns.resize(_size30);
+ uint32_t _i34;
+ for (_i34 = 0; _i34 < _size30; ++_i34)
+ {
+ xfer += this->columns[_i34].read(iprot);
+ }
+ xfer += iprot->readListEnd();
+ }
+ isset_columns = true;
+ } else {
+ xfer += iprot->skip(ftype);
+ }
+ break;
+ case 2:
+ if (ftype == ::apache::thrift::protocol::T_I64) {
+ xfer += iprot->readI64(this->total_byte_size);
+ isset_total_byte_size = true;
+ } else {
+ xfer += iprot->skip(ftype);
+ }
+ break;
+ case 3:
+ if (ftype == ::apache::thrift::protocol::T_I64) {
+ xfer += iprot->readI64(this->num_rows);
+ isset_num_rows = true;
+ } else {
+ xfer += iprot->skip(ftype);
+ }
+ break;
+ case 4:
+ if (ftype == ::apache::thrift::protocol::T_LIST) {
+ {
+ this->sorting_columns.clear();
+ uint32_t _size35;
+ ::apache::thrift::protocol::TType _etype38;
+ xfer += iprot->readListBegin(_etype38, _size35);
+ this->sorting_columns.resize(_size35);
+ uint32_t _i39;
+ for (_i39 = 0; _i39 < _size35; ++_i39)
+ {
+ xfer += this->sorting_columns[_i39].read(iprot);
+ }
+ xfer += iprot->readListEnd();
+ }
+ this->__isset.sorting_columns = true;
+ } else {
+ xfer += iprot->skip(ftype);
+ }
+ break;
+ default:
+ xfer += iprot->skip(ftype);
+ break;
+ }
+ xfer += iprot->readFieldEnd();
+ }
+
+ xfer += iprot->readStructEnd();
+
+ if (!isset_columns)
+ throw TProtocolException(TProtocolException::INVALID_DATA);
+ if (!isset_total_byte_size)
+ throw TProtocolException(TProtocolException::INVALID_DATA);
+ if (!isset_num_rows)
+ throw TProtocolException(TProtocolException::INVALID_DATA);
+ return xfer;
+}
+
+uint32_t RowGroup::write(::apache::thrift::protocol::TProtocol* oprot) const {
+ uint32_t xfer = 0;
+ uint32_t fcnt = 0;
+ xfer += oprot->writeStructBegin("RowGroup");
+
+ ++fcnt;
+ xfer += oprot->writeFieldBegin("columns", ::apache::thrift::protocol::T_LIST, 1);
+ {
+ xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast<uint32_t>(this->columns.size()));
+ std::vector<ColumnChunk> ::const_iterator _iter40;
+ for (_iter40 = this->columns.begin(); _iter40 != this->columns.end(); ++_iter40)
+ {
+ xfer += (*_iter40).write(oprot);
+ }
+ xfer += oprot->writeListEnd();
+ }
+ xfer += oprot->writeFieldEnd();
+
+ ++fcnt;
+ xfer += oprot->writeFieldBegin("total_byte_size", ::apache::thrift::protocol::T_I64, 2);
+ xfer += oprot->writeI64(this->total_byte_size);
+ xfer += oprot->writeFieldEnd();
+
+ ++fcnt;
+ xfer += oprot->writeFieldBegin("num_rows", ::apache::thrift::protocol::T_I64, 3);
+ xfer += oprot->writeI64(this->num_rows);
+ xfer += oprot->writeFieldEnd();
+
+ if (this->__isset.sorting_columns) {
+ ++fcnt;
+ xfer += oprot->writeFieldBegin("sorting_columns", ::apache::thrift::protocol::T_LIST, 4);
+ {
+ xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast<uint32_t>(this->sorting_columns.size()));
+ std::vector<SortingColumn> ::const_iterator _iter41;
+ for (_iter41 = this->sorting_columns.begin(); _iter41 != this->sorting_columns.end(); ++_iter41)
+ {
+ xfer += (*_iter41).write(oprot);
+ }
+ xfer += oprot->writeListEnd();
+ }
+ xfer += oprot->writeFieldEnd();
+ }
+ xfer += oprot->writeFieldStop();
+ xfer += oprot->writeStructEnd();
+ return xfer;
+}
+
+void swap(RowGroup &a, RowGroup &b) {
+ using ::std::swap;
+ swap(a.columns, b.columns);
+ swap(a.total_byte_size, b.total_byte_size);
+ swap(a.num_rows, b.num_rows);
+ swap(a.sorting_columns, b.sorting_columns);
+ swap(a.__isset, b.__isset);
+}
+
+const char* FileMetaData::ascii_fingerprint = "44DC7D83A66D54A7B7892A985C4125C9";
+const uint8_t FileMetaData::binary_fingerprint[16] = {0x44,0xDC,0x7D,0x83,0xA6,0x6D,0x54,0xA7,0xB7,0x89,0x2A,0x98,0x5C,0x41,0x25,0xC9};
+
+uint32_t FileMetaData::read(::apache::thrift::protocol::TProtocol* iprot) {
+
+ uint32_t xfer = 0;
+ std::string fname;
+ ::apache::thrift::protocol::TType ftype;
+ int16_t fid;
+
+ xfer += iprot->readStructBegin(fname);
+
+ using ::apache::thrift::protocol::TProtocolException;
+
+ bool isset_version = false;
+ bool isset_schema = false;
+ bool isset_num_rows = false;
+ bool isset_row_groups = false;
+
+ while (true)
+ {
+ xfer += iprot->readFieldBegin(fname, ftype, fid);
+ if (ftype == ::apache::thrift::protocol::T_STOP) {
+ break;
+ }
+ switch (fid)
+ {
+ case 1:
+ if (ftype == ::apache::thrift::protocol::T_I32) {
+ xfer += iprot->readI32(this->version);
+ isset_version = true;
+ } else {
+ xfer += iprot->skip(ftype);
+ }
+ break;
+ case 2:
+ if (ftype == ::apache::thrift::protocol::T_LIST) {
+ {
+ this->schema.clear();
+ uint32_t _size42;
+ ::apache::thrift::protocol::TType _etype45;
+ xfer += iprot->readListBegin(_etype45, _size42);
+ this->schema.resize(_size42);
+ uint32_t _i46;
+ for (_i46 = 0; _i46 < _size42; ++_i46)
+ {
+ xfer += this->schema[_i46].read(iprot);
+ }
+ xfer += iprot->readListEnd();
+ }
+ isset_schema = true;
+ } else {
+ xfer += iprot->skip(ftype);
+ }
+ break;
+ case 3:
+ if (ftype == ::apache::thrift::protocol::T_I64) {
+ xfer += iprot->readI64(this->num_rows);
+ isset_num_rows = true;
+ } else {
+ xfer += iprot->skip(ftype);
+ }
+ break;
+ case 4:
+ if (ftype == ::apache::thrift::protocol::T_LIST) {
+ {
+ this->row_groups.clear();
+ uint32_t _size47;
+ ::apache::thrift::protocol::TType _etype50;
+ xfer += iprot->readListBegin(_etype50, _size47);
+ this->row_groups.resize(_size47);
+ uint32_t _i51;
+ for (_i51 = 0; _i51 < _size47; ++_i51)
+ {
+ xfer += this->row_groups[_i51].read(iprot);
+ }
+ xfer += iprot->readListEnd();
+ }
+ isset_row_groups = true;
+ } else {
+ xfer += iprot->skip(ftype);
+ }
+ break;
+ case 5:
+ if (ftype == ::apache::thrift::protocol::T_LIST) {
+ {
+ this->key_value_metadata.clear();
+ uint32_t _size52;
+ ::apache::thrift::protocol::TType _etype55;
+ xfer += iprot->readListBegin(_etype55, _size52);
+ this->key_value_metadata.resize(_size52);
+ uint32_t _i56;
+ for (_i56 = 0; _i56 < _size52; ++_i56)
+ {
+ xfer += this->key_value_metadata[_i56].read(iprot);
+ }
+ xfer += iprot->readListEnd();
+ }
+ this->__isset.key_value_metadata = true;
+ } else {
+ xfer += iprot->skip(ftype);
+ }
+ break;
+ case 6:
+ if (ftype == ::apache::thrift::protocol::T_STRING) {
+ xfer += iprot->readString(this->created_by);
+ this->__isset.created_by = true;
+ } else {
+ xfer += iprot->skip(ftype);
+ }
+ break;
+ default:
+ xfer += iprot->skip(ftype);
+ break;
+ }
+ xfer += iprot->readFieldEnd();
+ }
+
+ xfer += iprot->readStructEnd();
+
+ if (!isset_version)
+ throw TProtocolException(TProtocolException::INVALID_DATA);
+ if (!isset_schema)
+ throw TProtocolException(TProtocolException::INVALID_DATA);
+ if (!isset_num_rows)
+ throw TProtocolException(TProtocolException::INVALID_DATA);
+ if (!isset_row_groups)
+ throw TProtocolException(TProtocolException::INVALID_DATA);
+ return xfer;
+}
+
+uint32_t FileMetaData::write(::apache::thrift::protocol::TProtocol* oprot) const {
+ uint32_t xfer = 0;
+ uint32_t fcnt = 0;
+ xfer += oprot->writeStructBegin("FileMetaData");
+
+ ++fcnt;
+ xfer += oprot->writeFieldBegin("version", ::apache::thrift::protocol::T_I32, 1);
+ xfer += oprot->writeI32(this->version);
+ xfer += oprot->writeFieldEnd();
+
+ ++fcnt;
+ xfer += oprot->writeFieldBegin("schema", ::apache::thrift::protocol::T_LIST, 2);
+ {
+ xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast<uint32_t>(this->schema.size()));
+ std::vector<SchemaElement> ::const_iterator _iter57;
+ for (_iter57 = this->schema.begin(); _iter57 != this->schema.end(); ++_iter57)
+ {
+ xfer += (*_iter57).write(oprot);
+ }
+ xfer += oprot->writeListEnd();
+ }
+ xfer += oprot->writeFieldEnd();
+
+ ++fcnt;
+ xfer += oprot->writeFieldBegin("num_rows", ::apache::thrift::protocol::T_I64, 3);
+ xfer += oprot->writeI64(this->num_rows);
+ xfer += oprot->writeFieldEnd();
+
+ ++fcnt;
+ xfer += oprot->writeFieldBegin("row_groups", ::apache::thrift::protocol::T_LIST, 4);
+ {
+ xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast<uint32_t>(this->row_groups.size()));
+ std::vector<RowGroup> ::const_iterator _iter58;
+ for (_iter58 = this->row_groups.begin(); _iter58 != this->row_groups.end(); ++_iter58)
+ {
+ xfer += (*_iter58).write(oprot);
+ }
+ xfer += oprot->writeListEnd();
+ }
+ xfer += oprot->writeFieldEnd();
+
+ if (this->__isset.key_value_metadata) {
+ ++fcnt;
+ xfer += oprot->writeFieldBegin("key_value_metadata", ::apache::thrift::protocol::T_LIST, 5);
+ {
+ xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast<uint32_t>(this->key_value_metadata.size()));
+ std::vector<KeyValue> ::const_iterator _iter59;
+ for (_iter59 = this->key_value_metadata.begin(); _iter59 != this->key_value_metadata.end(); ++_iter59)
+ {
+ xfer += (*_iter59).write(oprot);
+ }
+ xfer += oprot->writeListEnd();
+ }
+ xfer += oprot->writeFieldEnd();
+ }
+ if (this->__isset.created_by) {
+ ++fcnt;
+ xfer += oprot->writeFieldBegin("created_by", ::apache::thrift::protocol::T_STRING, 6);
+ xfer += oprot->writeString(this->created_by);
+ xfer += oprot->writeFieldEnd();
+ }
+ xfer += oprot->writeFieldStop();
+ xfer += oprot->writeStructEnd();
+ return xfer;
+}
+
+void swap(FileMetaData &a, FileMetaData &b) {
+ using ::std::swap;
+ swap(a.version, b.version);
+ swap(a.schema, b.schema);
+ swap(a.num_rows, b.num_rows);
+ swap(a.row_groups, b.row_groups);
+ swap(a.key_value_metadata, b.key_value_metadata);
+ swap(a.created_by, b.created_by);
+ swap(a.__isset, b.__isset);
+}
+
+} // namespace
[5/7] parquet-cpp git commit: PARQUET-416: C++11 compilation,
code reorg, libparquet and installation targets
Posted by no...@apache.org.
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/cmake_modules/clean-all.cmake
----------------------------------------------------------------------
diff --git a/cmake_modules/clean-all.cmake b/cmake_modules/clean-all.cmake
new file mode 100644
index 0000000..35041ea
--- /dev/null
+++ b/cmake_modules/clean-all.cmake
@@ -0,0 +1,14 @@
+set(cmake_generated ${CMAKE_BINARY_DIR}/CMakeCache.txt
+ ${CMAKE_BINARY_DIR}/CTestTestfile.cmake
+ ${CMAKE_BINARY_DIR}/cmake_install.cmake
+ ${CMAKE_BINARY_DIR}/Makefile
+ ${CMAKE_BINARY_DIR}/CMakeFiles
+)
+
+foreach(file ${cmake_generated})
+
+ if (EXISTS ${file})
+ file(REMOVE_RECURSE ${file})
+ endif()
+
+endforeach(file)
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/example/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/example/CMakeLists.txt b/example/CMakeLists.txt
index 8844cdd..a9f4fa3 100644
--- a/example/CMakeLists.txt
+++ b/example/CMakeLists.txt
@@ -18,13 +18,10 @@ add_library(Example STATIC
)
SET(LINK_LIBS
- Parquet
- ParquetCompression
- Example
- ThriftParquet
+ parquet
+ snappystatic
thriftstatic
- lz4static
- snappystatic)
+ Example)
add_executable(compute_stats compute_stats.cc)
target_link_libraries(compute_stats ${LINK_LIBS})
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/example/decode_benchmark.cc
----------------------------------------------------------------------
diff --git a/example/decode_benchmark.cc b/example/decode_benchmark.cc
index 1add399..ed4077a 100644
--- a/example/decode_benchmark.cc
+++ b/example/decode_benchmark.cc
@@ -17,11 +17,10 @@
#include <stdio.h>
#include "example_util.h"
-#include "compression/codec.h"
-#include "encodings/encodings.h"
-#include "util/stopwatch.h"
+#include "parquet/compression/codec.h"
+#include "parquet/encodings/encodings.h"
+#include "parquet/util/stopwatch.h"
-using namespace impala;
using namespace parquet;
using namespace parquet_cpp;
using namespace std;
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/example/parquet_reader.cc
----------------------------------------------------------------------
diff --git a/example/parquet_reader.cc b/example/parquet_reader.cc
index c02ffb0..55895ce 100644
--- a/example/parquet_reader.cc
+++ b/example/parquet_reader.cc
@@ -215,7 +215,7 @@ void* read_parquet(char* filename) {
char *str = (char*)malloc(50);
assert(str);
strcpy(str, metadata.schema[j+1].name.c_str());
- printf("%-"COL_WIDTH"s", str);
+ printf("%-" COL_WIDTH"s", str);
free(str);
}
@@ -225,22 +225,22 @@ void* read_parquet(char* filename) {
for (j = 0;j < row_group.columns.size(); ++j)
switch(type_array[j]) {
case Type::BOOLEAN:
- printf("%-"COL_WIDTH"s","BOOLEAN");
+ printf("%-" COL_WIDTH"s","BOOLEAN");
break;
case Type::INT32:
- printf("%-"COL_WIDTH"s","INT32");
+ printf("%-" COL_WIDTH"s","INT32");
break;
case Type::INT64:
- printf("%-"COL_WIDTH"s","INT64");
+ printf("%-" COL_WIDTH"s","INT64");
break;
case Type::FLOAT:
- printf("%-"COL_WIDTH"s","FLOAT");
+ printf("%-" COL_WIDTH"s","FLOAT");
break;
case Type::DOUBLE:
- printf("%-"COL_WIDTH"s","DOUBLE");
+ printf("%-" COL_WIDTH"s","DOUBLE");
break;
case Type::BYTE_ARRAY:
- printf("%-"COL_WIDTH"s","BYTE_ARRAY");
+ printf("%-" COL_WIDTH"s","BYTE_ARRAY");
break;
default:
continue;
@@ -255,26 +255,26 @@ void* read_parquet(char* filename) {
for (j = 0; j < row_group.columns.size(); ++j) {
switch(type_array[j]) {
case Type::BOOLEAN:
- printf("%-"COL_WIDTH"d",((bool*)(((bool**)column_ptr)[j]))[k]);
+ printf("%-" COL_WIDTH"d",((bool*)(((bool**)column_ptr)[j]))[k]);
break;
case Type::INT32:
- printf("%-"COL_WIDTH"d",((int32_t *)(((int32_t **)column_ptr)[j]))[k]);
+ printf("%-" COL_WIDTH"d",((int32_t *)(((int32_t **)column_ptr)[j]))[k]);
break;
case Type::INT64:
- printf("%-"COL_WIDTH"ld",((int64_t *)(((int64_t **)column_ptr)[j]))[k]);
+ printf("%-" COL_WIDTH"ld",((int64_t *)(((int64_t **)column_ptr)[j]))[k]);
break;
case Type::FLOAT:
- printf("%-"COL_WIDTH"f",((float*)(((float**)column_ptr)[j]))[k]);
+ printf("%-" COL_WIDTH"f",((float*)(((float**)column_ptr)[j]))[k]);
break;
case Type::DOUBLE:
- printf("%-"COL_WIDTH"lf",((double*)(((double**)column_ptr)[j]))[k]);
+ printf("%-" COL_WIDTH"lf",((double*)(((double**)column_ptr)[j]))[k]);
break;
case Type::BYTE_ARRAY:
result = ByteArrayToString( ((ByteArray*)(((ByteArray**)column_ptr)[j]))[k] );
str1 = (char*)malloc(result.size());
assert(str1);
strcpy(str1, result.c_str());
- printf("%-"COL_WIDTH"s", str1);
+ printf("%-" COL_WIDTH"s", str1);
free(str1);
break;
default:
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/generated/gen-cpp/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/generated/gen-cpp/CMakeLists.txt b/generated/gen-cpp/CMakeLists.txt
deleted file mode 100644
index 262c23f..0000000
--- a/generated/gen-cpp/CMakeLists.txt
+++ /dev/null
@@ -1,20 +0,0 @@
-# Copyright 2012 Cloudera Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-add_library(ThriftParquet STATIC
- parquet_constants.cpp
- parquet_types.cpp
-)
-
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/generated/gen-cpp/parquet_constants.cpp
----------------------------------------------------------------------
diff --git a/generated/gen-cpp/parquet_constants.cpp b/generated/gen-cpp/parquet_constants.cpp
deleted file mode 100644
index caa5af6..0000000
--- a/generated/gen-cpp/parquet_constants.cpp
+++ /dev/null
@@ -1,17 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.0)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- * @generated
- */
-#include "parquet_constants.h"
-
-namespace parquet {
-
-const parquetConstants g_parquet_constants;
-
-parquetConstants::parquetConstants() {
-}
-
-} // namespace
-
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/generated/gen-cpp/parquet_constants.h
----------------------------------------------------------------------
diff --git a/generated/gen-cpp/parquet_constants.h b/generated/gen-cpp/parquet_constants.h
deleted file mode 100644
index 71d6f58..0000000
--- a/generated/gen-cpp/parquet_constants.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.0)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- * @generated
- */
-#ifndef parquet_CONSTANTS_H
-#define parquet_CONSTANTS_H
-
-#include "parquet_types.h"
-
-namespace parquet {
-
-class parquetConstants {
- public:
- parquetConstants();
-
-};
-
-extern const parquetConstants g_parquet_constants;
-
-} // namespace
-
-#endif
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/generated/gen-cpp/parquet_types.cpp
----------------------------------------------------------------------
diff --git a/generated/gen-cpp/parquet_types.cpp b/generated/gen-cpp/parquet_types.cpp
deleted file mode 100644
index 06d388c..0000000
--- a/generated/gen-cpp/parquet_types.cpp
+++ /dev/null
@@ -1,2006 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.0)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- * @generated
- */
-#include "parquet_types.h"
-
-#include <algorithm>
-
-namespace parquet {
-
-int _kTypeValues[] = {
- Type::BOOLEAN,
- Type::INT32,
- Type::INT64,
- Type::INT96,
- Type::FLOAT,
- Type::DOUBLE,
- Type::BYTE_ARRAY,
- Type::FIXED_LEN_BYTE_ARRAY
-};
-const char* _kTypeNames[] = {
- "BOOLEAN",
- "INT32",
- "INT64",
- "INT96",
- "FLOAT",
- "DOUBLE",
- "BYTE_ARRAY",
- "FIXED_LEN_BYTE_ARRAY"
-};
-const std::map<int, const char*> _Type_VALUES_TO_NAMES(::apache::thrift::TEnumIterator(8, _kTypeValues, _kTypeNames), ::apache::thrift::TEnumIterator(-1, NULL, NULL));
-
-int _kConvertedTypeValues[] = {
- ConvertedType::UTF8,
- ConvertedType::MAP,
- ConvertedType::MAP_KEY_VALUE,
- ConvertedType::LIST,
- ConvertedType::ENUM,
- ConvertedType::DECIMAL
-};
-const char* _kConvertedTypeNames[] = {
- "UTF8",
- "MAP",
- "MAP_KEY_VALUE",
- "LIST",
- "ENUM",
- "DECIMAL"
-};
-const std::map<int, const char*> _ConvertedType_VALUES_TO_NAMES(::apache::thrift::TEnumIterator(6, _kConvertedTypeValues, _kConvertedTypeNames), ::apache::thrift::TEnumIterator(-1, NULL, NULL));
-
-int _kFieldRepetitionTypeValues[] = {
- FieldRepetitionType::REQUIRED,
- FieldRepetitionType::OPTIONAL,
- FieldRepetitionType::REPEATED
-};
-const char* _kFieldRepetitionTypeNames[] = {
- "REQUIRED",
- "OPTIONAL",
- "REPEATED"
-};
-const std::map<int, const char*> _FieldRepetitionType_VALUES_TO_NAMES(::apache::thrift::TEnumIterator(3, _kFieldRepetitionTypeValues, _kFieldRepetitionTypeNames), ::apache::thrift::TEnumIterator(-1, NULL, NULL));
-
-int _kEncodingValues[] = {
- Encoding::PLAIN,
- Encoding::PLAIN_DICTIONARY,
- Encoding::RLE,
- Encoding::BIT_PACKED,
- Encoding::DELTA_BINARY_PACKED,
- Encoding::DELTA_LENGTH_BYTE_ARRAY,
- Encoding::DELTA_BYTE_ARRAY,
- Encoding::RLE_DICTIONARY
-};
-const char* _kEncodingNames[] = {
- "PLAIN",
- "PLAIN_DICTIONARY",
- "RLE",
- "BIT_PACKED",
- "DELTA_BINARY_PACKED",
- "DELTA_LENGTH_BYTE_ARRAY",
- "DELTA_BYTE_ARRAY",
- "RLE_DICTIONARY"
-};
-const std::map<int, const char*> _Encoding_VALUES_TO_NAMES(::apache::thrift::TEnumIterator(8, _kEncodingValues, _kEncodingNames), ::apache::thrift::TEnumIterator(-1, NULL, NULL));
-
-int _kCompressionCodecValues[] = {
- CompressionCodec::UNCOMPRESSED,
- CompressionCodec::SNAPPY,
- CompressionCodec::GZIP,
- CompressionCodec::LZO
-};
-const char* _kCompressionCodecNames[] = {
- "UNCOMPRESSED",
- "SNAPPY",
- "GZIP",
- "LZO"
-};
-const std::map<int, const char*> _CompressionCodec_VALUES_TO_NAMES(::apache::thrift::TEnumIterator(4, _kCompressionCodecValues, _kCompressionCodecNames), ::apache::thrift::TEnumIterator(-1, NULL, NULL));
-
-int _kPageTypeValues[] = {
- PageType::DATA_PAGE,
- PageType::INDEX_PAGE,
- PageType::DICTIONARY_PAGE,
- PageType::DATA_PAGE_V2
-};
-const char* _kPageTypeNames[] = {
- "DATA_PAGE",
- "INDEX_PAGE",
- "DICTIONARY_PAGE",
- "DATA_PAGE_V2"
-};
-const std::map<int, const char*> _PageType_VALUES_TO_NAMES(::apache::thrift::TEnumIterator(4, _kPageTypeValues, _kPageTypeNames), ::apache::thrift::TEnumIterator(-1, NULL, NULL));
-
-const char* Statistics::ascii_fingerprint = "CE004821871820DD79A8FD98BB101F6D";
-const uint8_t Statistics::binary_fingerprint[16] = {0xCE,0x00,0x48,0x21,0x87,0x18,0x20,0xDD,0x79,0xA8,0xFD,0x98,0xBB,0x10,0x1F,0x6D};
-
-uint32_t Statistics::read(::apache::thrift::protocol::TProtocol* iprot) {
-
- uint32_t xfer = 0;
- std::string fname;
- ::apache::thrift::protocol::TType ftype;
- int16_t fid;
-
- xfer += iprot->readStructBegin(fname);
-
- using ::apache::thrift::protocol::TProtocolException;
-
-
- while (true)
- {
- xfer += iprot->readFieldBegin(fname, ftype, fid);
- if (ftype == ::apache::thrift::protocol::T_STOP) {
- break;
- }
- switch (fid)
- {
- case 1:
- if (ftype == ::apache::thrift::protocol::T_STRING) {
- xfer += iprot->readBinary(this->max);
- this->__isset.max = true;
- } else {
- xfer += iprot->skip(ftype);
- }
- break;
- case 2:
- if (ftype == ::apache::thrift::protocol::T_STRING) {
- xfer += iprot->readBinary(this->min);
- this->__isset.min = true;
- } else {
- xfer += iprot->skip(ftype);
- }
- break;
- case 3:
- if (ftype == ::apache::thrift::protocol::T_I64) {
- xfer += iprot->readI64(this->null_count);
- this->__isset.null_count = true;
- } else {
- xfer += iprot->skip(ftype);
- }
- break;
- case 4:
- if (ftype == ::apache::thrift::protocol::T_I64) {
- xfer += iprot->readI64(this->distinct_count);
- this->__isset.distinct_count = true;
- } else {
- xfer += iprot->skip(ftype);
- }
- break;
- default:
- xfer += iprot->skip(ftype);
- break;
- }
- xfer += iprot->readFieldEnd();
- }
-
- xfer += iprot->readStructEnd();
-
- return xfer;
-}
-
-uint32_t Statistics::write(::apache::thrift::protocol::TProtocol* oprot) const {
- uint32_t xfer = 0;
- uint32_t fcnt = 0;
- xfer += oprot->writeStructBegin("Statistics");
-
- if (this->__isset.max) {
- ++fcnt;
- xfer += oprot->writeFieldBegin("max", ::apache::thrift::protocol::T_STRING, 1);
- xfer += oprot->writeBinary(this->max);
- xfer += oprot->writeFieldEnd();
- }
- if (this->__isset.min) {
- ++fcnt;
- xfer += oprot->writeFieldBegin("min", ::apache::thrift::protocol::T_STRING, 2);
- xfer += oprot->writeBinary(this->min);
- xfer += oprot->writeFieldEnd();
- }
- if (this->__isset.null_count) {
- ++fcnt;
- xfer += oprot->writeFieldBegin("null_count", ::apache::thrift::protocol::T_I64, 3);
- xfer += oprot->writeI64(this->null_count);
- xfer += oprot->writeFieldEnd();
- }
- if (this->__isset.distinct_count) {
- ++fcnt;
- xfer += oprot->writeFieldBegin("distinct_count", ::apache::thrift::protocol::T_I64, 4);
- xfer += oprot->writeI64(this->distinct_count);
- xfer += oprot->writeFieldEnd();
- }
- xfer += oprot->writeFieldStop();
- xfer += oprot->writeStructEnd();
- return xfer;
-}
-
-void swap(Statistics &a, Statistics &b) {
- using ::std::swap;
- swap(a.max, b.max);
- swap(a.min, b.min);
- swap(a.null_count, b.null_count);
- swap(a.distinct_count, b.distinct_count);
- swap(a.__isset, b.__isset);
-}
-
-const char* SchemaElement::ascii_fingerprint = "388A784401753800444CFEAC8BC1B1A1";
-const uint8_t SchemaElement::binary_fingerprint[16] = {0x38,0x8A,0x78,0x44,0x01,0x75,0x38,0x00,0x44,0x4C,0xFE,0xAC,0x8B,0xC1,0xB1,0xA1};
-
-uint32_t SchemaElement::read(::apache::thrift::protocol::TProtocol* iprot) {
-
- uint32_t xfer = 0;
- std::string fname;
- ::apache::thrift::protocol::TType ftype;
- int16_t fid;
-
- xfer += iprot->readStructBegin(fname);
-
- using ::apache::thrift::protocol::TProtocolException;
-
- bool isset_name = false;
-
- while (true)
- {
- xfer += iprot->readFieldBegin(fname, ftype, fid);
- if (ftype == ::apache::thrift::protocol::T_STOP) {
- break;
- }
- switch (fid)
- {
- case 1:
- if (ftype == ::apache::thrift::protocol::T_I32) {
- int32_t ecast0;
- xfer += iprot->readI32(ecast0);
- this->type = (Type::type)ecast0;
- this->__isset.type = true;
- } else {
- xfer += iprot->skip(ftype);
- }
- break;
- case 2:
- if (ftype == ::apache::thrift::protocol::T_I32) {
- xfer += iprot->readI32(this->type_length);
- this->__isset.type_length = true;
- } else {
- xfer += iprot->skip(ftype);
- }
- break;
- case 3:
- if (ftype == ::apache::thrift::protocol::T_I32) {
- int32_t ecast1;
- xfer += iprot->readI32(ecast1);
- this->repetition_type = (FieldRepetitionType::type)ecast1;
- this->__isset.repetition_type = true;
- } else {
- xfer += iprot->skip(ftype);
- }
- break;
- case 4:
- if (ftype == ::apache::thrift::protocol::T_STRING) {
- xfer += iprot->readString(this->name);
- isset_name = true;
- } else {
- xfer += iprot->skip(ftype);
- }
- break;
- case 5:
- if (ftype == ::apache::thrift::protocol::T_I32) {
- xfer += iprot->readI32(this->num_children);
- this->__isset.num_children = true;
- } else {
- xfer += iprot->skip(ftype);
- }
- break;
- case 6:
- if (ftype == ::apache::thrift::protocol::T_I32) {
- int32_t ecast2;
- xfer += iprot->readI32(ecast2);
- this->converted_type = (ConvertedType::type)ecast2;
- this->__isset.converted_type = true;
- } else {
- xfer += iprot->skip(ftype);
- }
- break;
- case 7:
- if (ftype == ::apache::thrift::protocol::T_I32) {
- xfer += iprot->readI32(this->scale);
- this->__isset.scale = true;
- } else {
- xfer += iprot->skip(ftype);
- }
- break;
- case 8:
- if (ftype == ::apache::thrift::protocol::T_I32) {
- xfer += iprot->readI32(this->precision);
- this->__isset.precision = true;
- } else {
- xfer += iprot->skip(ftype);
- }
- break;
- default:
- xfer += iprot->skip(ftype);
- break;
- }
- xfer += iprot->readFieldEnd();
- }
-
- xfer += iprot->readStructEnd();
-
- if (!isset_name)
- throw TProtocolException(TProtocolException::INVALID_DATA);
- return xfer;
-}
-
-uint32_t SchemaElement::write(::apache::thrift::protocol::TProtocol* oprot) const {
- uint32_t xfer = 0;
- uint32_t fcnt = 0;
- xfer += oprot->writeStructBegin("SchemaElement");
-
- if (this->__isset.type) {
- ++fcnt;
- xfer += oprot->writeFieldBegin("type", ::apache::thrift::protocol::T_I32, 1);
- xfer += oprot->writeI32((int32_t)this->type);
- xfer += oprot->writeFieldEnd();
- }
- if (this->__isset.type_length) {
- ++fcnt;
- xfer += oprot->writeFieldBegin("type_length", ::apache::thrift::protocol::T_I32, 2);
- xfer += oprot->writeI32(this->type_length);
- xfer += oprot->writeFieldEnd();
- }
- if (this->__isset.repetition_type) {
- ++fcnt;
- xfer += oprot->writeFieldBegin("repetition_type", ::apache::thrift::protocol::T_I32, 3);
- xfer += oprot->writeI32((int32_t)this->repetition_type);
- xfer += oprot->writeFieldEnd();
- }
- ++fcnt;
- xfer += oprot->writeFieldBegin("name", ::apache::thrift::protocol::T_STRING, 4);
- xfer += oprot->writeString(this->name);
- xfer += oprot->writeFieldEnd();
-
- if (this->__isset.num_children) {
- ++fcnt;
- xfer += oprot->writeFieldBegin("num_children", ::apache::thrift::protocol::T_I32, 5);
- xfer += oprot->writeI32(this->num_children);
- xfer += oprot->writeFieldEnd();
- }
- if (this->__isset.converted_type) {
- ++fcnt;
- xfer += oprot->writeFieldBegin("converted_type", ::apache::thrift::protocol::T_I32, 6);
- xfer += oprot->writeI32((int32_t)this->converted_type);
- xfer += oprot->writeFieldEnd();
- }
- if (this->__isset.scale) {
- ++fcnt;
- xfer += oprot->writeFieldBegin("scale", ::apache::thrift::protocol::T_I32, 7);
- xfer += oprot->writeI32(this->scale);
- xfer += oprot->writeFieldEnd();
- }
- if (this->__isset.precision) {
- ++fcnt;
- xfer += oprot->writeFieldBegin("precision", ::apache::thrift::protocol::T_I32, 8);
- xfer += oprot->writeI32(this->precision);
- xfer += oprot->writeFieldEnd();
- }
- xfer += oprot->writeFieldStop();
- xfer += oprot->writeStructEnd();
- return xfer;
-}
-
-void swap(SchemaElement &a, SchemaElement &b) {
- using ::std::swap;
- swap(a.type, b.type);
- swap(a.type_length, b.type_length);
- swap(a.repetition_type, b.repetition_type);
- swap(a.name, b.name);
- swap(a.num_children, b.num_children);
- swap(a.converted_type, b.converted_type);
- swap(a.scale, b.scale);
- swap(a.precision, b.precision);
- swap(a.__isset, b.__isset);
-}
-
-const char* DataPageHeader::ascii_fingerprint = "5FC1792B0483E9C984475384165040B1";
-const uint8_t DataPageHeader::binary_fingerprint[16] = {0x5F,0xC1,0x79,0x2B,0x04,0x83,0xE9,0xC9,0x84,0x47,0x53,0x84,0x16,0x50,0x40,0xB1};
-
-uint32_t DataPageHeader::read(::apache::thrift::protocol::TProtocol* iprot) {
-
- uint32_t xfer = 0;
- std::string fname;
- ::apache::thrift::protocol::TType ftype;
- int16_t fid;
-
- xfer += iprot->readStructBegin(fname);
-
- using ::apache::thrift::protocol::TProtocolException;
-
- bool isset_num_values = false;
- bool isset_encoding = false;
- bool isset_definition_level_encoding = false;
- bool isset_repetition_level_encoding = false;
-
- while (true)
- {
- xfer += iprot->readFieldBegin(fname, ftype, fid);
- if (ftype == ::apache::thrift::protocol::T_STOP) {
- break;
- }
- switch (fid)
- {
- case 1:
- if (ftype == ::apache::thrift::protocol::T_I32) {
- xfer += iprot->readI32(this->num_values);
- isset_num_values = true;
- } else {
- xfer += iprot->skip(ftype);
- }
- break;
- case 2:
- if (ftype == ::apache::thrift::protocol::T_I32) {
- int32_t ecast3;
- xfer += iprot->readI32(ecast3);
- this->encoding = (Encoding::type)ecast3;
- isset_encoding = true;
- } else {
- xfer += iprot->skip(ftype);
- }
- break;
- case 3:
- if (ftype == ::apache::thrift::protocol::T_I32) {
- int32_t ecast4;
- xfer += iprot->readI32(ecast4);
- this->definition_level_encoding = (Encoding::type)ecast4;
- isset_definition_level_encoding = true;
- } else {
- xfer += iprot->skip(ftype);
- }
- break;
- case 4:
- if (ftype == ::apache::thrift::protocol::T_I32) {
- int32_t ecast5;
- xfer += iprot->readI32(ecast5);
- this->repetition_level_encoding = (Encoding::type)ecast5;
- isset_repetition_level_encoding = true;
- } else {
- xfer += iprot->skip(ftype);
- }
- break;
- case 5:
- if (ftype == ::apache::thrift::protocol::T_STRUCT) {
- xfer += this->statistics.read(iprot);
- this->__isset.statistics = true;
- } else {
- xfer += iprot->skip(ftype);
- }
- break;
- default:
- xfer += iprot->skip(ftype);
- break;
- }
- xfer += iprot->readFieldEnd();
- }
-
- xfer += iprot->readStructEnd();
-
- if (!isset_num_values)
- throw TProtocolException(TProtocolException::INVALID_DATA);
- if (!isset_encoding)
- throw TProtocolException(TProtocolException::INVALID_DATA);
- if (!isset_definition_level_encoding)
- throw TProtocolException(TProtocolException::INVALID_DATA);
- if (!isset_repetition_level_encoding)
- throw TProtocolException(TProtocolException::INVALID_DATA);
- return xfer;
-}
-
-uint32_t DataPageHeader::write(::apache::thrift::protocol::TProtocol* oprot) const {
- uint32_t xfer = 0;
- uint32_t fcnt = 0;
- xfer += oprot->writeStructBegin("DataPageHeader");
-
- ++fcnt;
- xfer += oprot->writeFieldBegin("num_values", ::apache::thrift::protocol::T_I32, 1);
- xfer += oprot->writeI32(this->num_values);
- xfer += oprot->writeFieldEnd();
-
- ++fcnt;
- xfer += oprot->writeFieldBegin("encoding", ::apache::thrift::protocol::T_I32, 2);
- xfer += oprot->writeI32((int32_t)this->encoding);
- xfer += oprot->writeFieldEnd();
-
- ++fcnt;
- xfer += oprot->writeFieldBegin("definition_level_encoding", ::apache::thrift::protocol::T_I32, 3);
- xfer += oprot->writeI32((int32_t)this->definition_level_encoding);
- xfer += oprot->writeFieldEnd();
-
- ++fcnt;
- xfer += oprot->writeFieldBegin("repetition_level_encoding", ::apache::thrift::protocol::T_I32, 4);
- xfer += oprot->writeI32((int32_t)this->repetition_level_encoding);
- xfer += oprot->writeFieldEnd();
-
- if (this->__isset.statistics) {
- ++fcnt;
- xfer += oprot->writeFieldBegin("statistics", ::apache::thrift::protocol::T_STRUCT, 5);
- xfer += this->statistics.write(oprot);
- xfer += oprot->writeFieldEnd();
- }
- xfer += oprot->writeFieldStop();
- xfer += oprot->writeStructEnd();
- return xfer;
-}
-
-void swap(DataPageHeader &a, DataPageHeader &b) {
- using ::std::swap;
- swap(a.num_values, b.num_values);
- swap(a.encoding, b.encoding);
- swap(a.definition_level_encoding, b.definition_level_encoding);
- swap(a.repetition_level_encoding, b.repetition_level_encoding);
- swap(a.statistics, b.statistics);
- swap(a.__isset, b.__isset);
-}
-
-const char* IndexPageHeader::ascii_fingerprint = "99914B932BD37A50B983C5E7C90AE93B";
-const uint8_t IndexPageHeader::binary_fingerprint[16] = {0x99,0x91,0x4B,0x93,0x2B,0xD3,0x7A,0x50,0xB9,0x83,0xC5,0xE7,0xC9,0x0A,0xE9,0x3B};
-
-uint32_t IndexPageHeader::read(::apache::thrift::protocol::TProtocol* iprot) {
-
- uint32_t xfer = 0;
- std::string fname;
- ::apache::thrift::protocol::TType ftype;
- int16_t fid;
-
- xfer += iprot->readStructBegin(fname);
-
- using ::apache::thrift::protocol::TProtocolException;
-
-
- while (true)
- {
- xfer += iprot->readFieldBegin(fname, ftype, fid);
- if (ftype == ::apache::thrift::protocol::T_STOP) {
- break;
- }
- xfer += iprot->skip(ftype);
- xfer += iprot->readFieldEnd();
- }
-
- xfer += iprot->readStructEnd();
-
- return xfer;
-}
-
-uint32_t IndexPageHeader::write(::apache::thrift::protocol::TProtocol* oprot) const {
- uint32_t xfer = 0;
- uint32_t fcnt = 0;
- xfer += oprot->writeStructBegin("IndexPageHeader");
-
- xfer += oprot->writeFieldStop();
- xfer += oprot->writeStructEnd();
- return xfer;
-}
-
-void swap(IndexPageHeader &a, IndexPageHeader &b) {
- using ::std::swap;
- (void) a;
- (void) b;
-}
-
-const char* DictionaryPageHeader::ascii_fingerprint = "B149E4528254D495610C22AE4BD539C5";
-const uint8_t DictionaryPageHeader::binary_fingerprint[16] = {0xB1,0x49,0xE4,0x52,0x82,0x54,0xD4,0x95,0x61,0x0C,0x22,0xAE,0x4B,0xD5,0x39,0xC5};
-
-uint32_t DictionaryPageHeader::read(::apache::thrift::protocol::TProtocol* iprot) {
-
- uint32_t xfer = 0;
- std::string fname;
- ::apache::thrift::protocol::TType ftype;
- int16_t fid;
-
- xfer += iprot->readStructBegin(fname);
-
- using ::apache::thrift::protocol::TProtocolException;
-
- bool isset_num_values = false;
- bool isset_encoding = false;
-
- while (true)
- {
- xfer += iprot->readFieldBegin(fname, ftype, fid);
- if (ftype == ::apache::thrift::protocol::T_STOP) {
- break;
- }
- switch (fid)
- {
- case 1:
- if (ftype == ::apache::thrift::protocol::T_I32) {
- xfer += iprot->readI32(this->num_values);
- isset_num_values = true;
- } else {
- xfer += iprot->skip(ftype);
- }
- break;
- case 2:
- if (ftype == ::apache::thrift::protocol::T_I32) {
- int32_t ecast6;
- xfer += iprot->readI32(ecast6);
- this->encoding = (Encoding::type)ecast6;
- isset_encoding = true;
- } else {
- xfer += iprot->skip(ftype);
- }
- break;
- case 3:
- if (ftype == ::apache::thrift::protocol::T_BOOL) {
- xfer += iprot->readBool(this->is_sorted);
- this->__isset.is_sorted = true;
- } else {
- xfer += iprot->skip(ftype);
- }
- break;
- default:
- xfer += iprot->skip(ftype);
- break;
- }
- xfer += iprot->readFieldEnd();
- }
-
- xfer += iprot->readStructEnd();
-
- if (!isset_num_values)
- throw TProtocolException(TProtocolException::INVALID_DATA);
- if (!isset_encoding)
- throw TProtocolException(TProtocolException::INVALID_DATA);
- return xfer;
-}
-
-uint32_t DictionaryPageHeader::write(::apache::thrift::protocol::TProtocol* oprot) const {
- uint32_t xfer = 0;
- uint32_t fcnt = 0;
- xfer += oprot->writeStructBegin("DictionaryPageHeader");
-
- ++fcnt;
- xfer += oprot->writeFieldBegin("num_values", ::apache::thrift::protocol::T_I32, 1);
- xfer += oprot->writeI32(this->num_values);
- xfer += oprot->writeFieldEnd();
-
- ++fcnt;
- xfer += oprot->writeFieldBegin("encoding", ::apache::thrift::protocol::T_I32, 2);
- xfer += oprot->writeI32((int32_t)this->encoding);
- xfer += oprot->writeFieldEnd();
-
- if (this->__isset.is_sorted) {
- ++fcnt;
- xfer += oprot->writeFieldBegin("is_sorted", ::apache::thrift::protocol::T_BOOL, 3);
- xfer += oprot->writeBool(this->is_sorted);
- xfer += oprot->writeFieldEnd();
- }
- xfer += oprot->writeFieldStop();
- xfer += oprot->writeStructEnd();
- return xfer;
-}
-
-void swap(DictionaryPageHeader &a, DictionaryPageHeader &b) {
- using ::std::swap;
- swap(a.num_values, b.num_values);
- swap(a.encoding, b.encoding);
- swap(a.is_sorted, b.is_sorted);
- swap(a.__isset, b.__isset);
-}
-
-const char* DataPageHeaderV2::ascii_fingerprint = "69FF2F6BD1A443440D5E46ABA5A3A919";
-const uint8_t DataPageHeaderV2::binary_fingerprint[16] = {0x69,0xFF,0x2F,0x6B,0xD1,0xA4,0x43,0x44,0x0D,0x5E,0x46,0xAB,0xA5,0xA3,0xA9,0x19};
-
-uint32_t DataPageHeaderV2::read(::apache::thrift::protocol::TProtocol* iprot) {
-
- uint32_t xfer = 0;
- std::string fname;
- ::apache::thrift::protocol::TType ftype;
- int16_t fid;
-
- xfer += iprot->readStructBegin(fname);
-
- using ::apache::thrift::protocol::TProtocolException;
-
- bool isset_num_values = false;
- bool isset_num_nulls = false;
- bool isset_num_rows = false;
- bool isset_encoding = false;
- bool isset_definition_levels_byte_length = false;
- bool isset_repetition_levels_byte_length = false;
-
- while (true)
- {
- xfer += iprot->readFieldBegin(fname, ftype, fid);
- if (ftype == ::apache::thrift::protocol::T_STOP) {
- break;
- }
- switch (fid)
- {
- case 1:
- if (ftype == ::apache::thrift::protocol::T_I32) {
- xfer += iprot->readI32(this->num_values);
- isset_num_values = true;
- } else {
- xfer += iprot->skip(ftype);
- }
- break;
- case 2:
- if (ftype == ::apache::thrift::protocol::T_I32) {
- xfer += iprot->readI32(this->num_nulls);
- isset_num_nulls = true;
- } else {
- xfer += iprot->skip(ftype);
- }
- break;
- case 3:
- if (ftype == ::apache::thrift::protocol::T_I32) {
- xfer += iprot->readI32(this->num_rows);
- isset_num_rows = true;
- } else {
- xfer += iprot->skip(ftype);
- }
- break;
- case 4:
- if (ftype == ::apache::thrift::protocol::T_I32) {
- int32_t ecast7;
- xfer += iprot->readI32(ecast7);
- this->encoding = (Encoding::type)ecast7;
- isset_encoding = true;
- } else {
- xfer += iprot->skip(ftype);
- }
- break;
- case 5:
- if (ftype == ::apache::thrift::protocol::T_I32) {
- xfer += iprot->readI32(this->definition_levels_byte_length);
- isset_definition_levels_byte_length = true;
- } else {
- xfer += iprot->skip(ftype);
- }
- break;
- case 6:
- if (ftype == ::apache::thrift::protocol::T_I32) {
- xfer += iprot->readI32(this->repetition_levels_byte_length);
- isset_repetition_levels_byte_length = true;
- } else {
- xfer += iprot->skip(ftype);
- }
- break;
- case 7:
- if (ftype == ::apache::thrift::protocol::T_BOOL) {
- xfer += iprot->readBool(this->is_compressed);
- this->__isset.is_compressed = true;
- } else {
- xfer += iprot->skip(ftype);
- }
- break;
- case 8:
- if (ftype == ::apache::thrift::protocol::T_STRUCT) {
- xfer += this->statistics.read(iprot);
- this->__isset.statistics = true;
- } else {
- xfer += iprot->skip(ftype);
- }
- break;
- default:
- xfer += iprot->skip(ftype);
- break;
- }
- xfer += iprot->readFieldEnd();
- }
-
- xfer += iprot->readStructEnd();
-
- if (!isset_num_values)
- throw TProtocolException(TProtocolException::INVALID_DATA);
- if (!isset_num_nulls)
- throw TProtocolException(TProtocolException::INVALID_DATA);
- if (!isset_num_rows)
- throw TProtocolException(TProtocolException::INVALID_DATA);
- if (!isset_encoding)
- throw TProtocolException(TProtocolException::INVALID_DATA);
- if (!isset_definition_levels_byte_length)
- throw TProtocolException(TProtocolException::INVALID_DATA);
- if (!isset_repetition_levels_byte_length)
- throw TProtocolException(TProtocolException::INVALID_DATA);
- return xfer;
-}
-
-uint32_t DataPageHeaderV2::write(::apache::thrift::protocol::TProtocol* oprot) const {
- uint32_t xfer = 0;
- uint32_t fcnt = 0;
- xfer += oprot->writeStructBegin("DataPageHeaderV2");
-
- ++fcnt;
- xfer += oprot->writeFieldBegin("num_values", ::apache::thrift::protocol::T_I32, 1);
- xfer += oprot->writeI32(this->num_values);
- xfer += oprot->writeFieldEnd();
-
- ++fcnt;
- xfer += oprot->writeFieldBegin("num_nulls", ::apache::thrift::protocol::T_I32, 2);
- xfer += oprot->writeI32(this->num_nulls);
- xfer += oprot->writeFieldEnd();
-
- ++fcnt;
- xfer += oprot->writeFieldBegin("num_rows", ::apache::thrift::protocol::T_I32, 3);
- xfer += oprot->writeI32(this->num_rows);
- xfer += oprot->writeFieldEnd();
-
- ++fcnt;
- xfer += oprot->writeFieldBegin("encoding", ::apache::thrift::protocol::T_I32, 4);
- xfer += oprot->writeI32((int32_t)this->encoding);
- xfer += oprot->writeFieldEnd();
-
- ++fcnt;
- xfer += oprot->writeFieldBegin("definition_levels_byte_length", ::apache::thrift::protocol::T_I32, 5);
- xfer += oprot->writeI32(this->definition_levels_byte_length);
- xfer += oprot->writeFieldEnd();
-
- ++fcnt;
- xfer += oprot->writeFieldBegin("repetition_levels_byte_length", ::apache::thrift::protocol::T_I32, 6);
- xfer += oprot->writeI32(this->repetition_levels_byte_length);
- xfer += oprot->writeFieldEnd();
-
- if (this->__isset.is_compressed) {
- ++fcnt;
- xfer += oprot->writeFieldBegin("is_compressed", ::apache::thrift::protocol::T_BOOL, 7);
- xfer += oprot->writeBool(this->is_compressed);
- xfer += oprot->writeFieldEnd();
- }
- if (this->__isset.statistics) {
- ++fcnt;
- xfer += oprot->writeFieldBegin("statistics", ::apache::thrift::protocol::T_STRUCT, 8);
- xfer += this->statistics.write(oprot);
- xfer += oprot->writeFieldEnd();
- }
- xfer += oprot->writeFieldStop();
- xfer += oprot->writeStructEnd();
- return xfer;
-}
-
-void swap(DataPageHeaderV2 &a, DataPageHeaderV2 &b) {
- using ::std::swap;
- swap(a.num_values, b.num_values);
- swap(a.num_nulls, b.num_nulls);
- swap(a.num_rows, b.num_rows);
- swap(a.encoding, b.encoding);
- swap(a.definition_levels_byte_length, b.definition_levels_byte_length);
- swap(a.repetition_levels_byte_length, b.repetition_levels_byte_length);
- swap(a.is_compressed, b.is_compressed);
- swap(a.statistics, b.statistics);
- swap(a.__isset, b.__isset);
-}
-
-const char* PageHeader::ascii_fingerprint = "B5BD2BDF3756C883A58B30B9C9F204A0";
-const uint8_t PageHeader::binary_fingerprint[16] = {0xB5,0xBD,0x2B,0xDF,0x37,0x56,0xC8,0x83,0xA5,0x8B,0x30,0xB9,0xC9,0xF2,0x04,0xA0};
-
-uint32_t PageHeader::read(::apache::thrift::protocol::TProtocol* iprot) {
-
- uint32_t xfer = 0;
- std::string fname;
- ::apache::thrift::protocol::TType ftype;
- int16_t fid;
-
- xfer += iprot->readStructBegin(fname);
-
- using ::apache::thrift::protocol::TProtocolException;
-
- bool isset_type = false;
- bool isset_uncompressed_page_size = false;
- bool isset_compressed_page_size = false;
-
- while (true)
- {
- xfer += iprot->readFieldBegin(fname, ftype, fid);
- if (ftype == ::apache::thrift::protocol::T_STOP) {
- break;
- }
- switch (fid)
- {
- case 1:
- if (ftype == ::apache::thrift::protocol::T_I32) {
- int32_t ecast8;
- xfer += iprot->readI32(ecast8);
- this->type = (PageType::type)ecast8;
- isset_type = true;
- } else {
- xfer += iprot->skip(ftype);
- }
- break;
- case 2:
- if (ftype == ::apache::thrift::protocol::T_I32) {
- xfer += iprot->readI32(this->uncompressed_page_size);
- isset_uncompressed_page_size = true;
- } else {
- xfer += iprot->skip(ftype);
- }
- break;
- case 3:
- if (ftype == ::apache::thrift::protocol::T_I32) {
- xfer += iprot->readI32(this->compressed_page_size);
- isset_compressed_page_size = true;
- } else {
- xfer += iprot->skip(ftype);
- }
- break;
- case 4:
- if (ftype == ::apache::thrift::protocol::T_I32) {
- xfer += iprot->readI32(this->crc);
- this->__isset.crc = true;
- } else {
- xfer += iprot->skip(ftype);
- }
- break;
- case 5:
- if (ftype == ::apache::thrift::protocol::T_STRUCT) {
- xfer += this->data_page_header.read(iprot);
- this->__isset.data_page_header = true;
- } else {
- xfer += iprot->skip(ftype);
- }
- break;
- case 6:
- if (ftype == ::apache::thrift::protocol::T_STRUCT) {
- xfer += this->index_page_header.read(iprot);
- this->__isset.index_page_header = true;
- } else {
- xfer += iprot->skip(ftype);
- }
- break;
- case 7:
- if (ftype == ::apache::thrift::protocol::T_STRUCT) {
- xfer += this->dictionary_page_header.read(iprot);
- this->__isset.dictionary_page_header = true;
- } else {
- xfer += iprot->skip(ftype);
- }
- break;
- case 8:
- if (ftype == ::apache::thrift::protocol::T_STRUCT) {
- xfer += this->data_page_header_v2.read(iprot);
- this->__isset.data_page_header_v2 = true;
- } else {
- xfer += iprot->skip(ftype);
- }
- break;
- default:
- xfer += iprot->skip(ftype);
- break;
- }
- xfer += iprot->readFieldEnd();
- }
-
- xfer += iprot->readStructEnd();
-
- if (!isset_type)
- throw TProtocolException(TProtocolException::INVALID_DATA);
- if (!isset_uncompressed_page_size)
- throw TProtocolException(TProtocolException::INVALID_DATA);
- if (!isset_compressed_page_size)
- throw TProtocolException(TProtocolException::INVALID_DATA);
- return xfer;
-}
-
-uint32_t PageHeader::write(::apache::thrift::protocol::TProtocol* oprot) const {
- uint32_t xfer = 0;
- uint32_t fcnt = 0;
- xfer += oprot->writeStructBegin("PageHeader");
-
- ++fcnt;
- xfer += oprot->writeFieldBegin("type", ::apache::thrift::protocol::T_I32, 1);
- xfer += oprot->writeI32((int32_t)this->type);
- xfer += oprot->writeFieldEnd();
-
- ++fcnt;
- xfer += oprot->writeFieldBegin("uncompressed_page_size", ::apache::thrift::protocol::T_I32, 2);
- xfer += oprot->writeI32(this->uncompressed_page_size);
- xfer += oprot->writeFieldEnd();
-
- ++fcnt;
- xfer += oprot->writeFieldBegin("compressed_page_size", ::apache::thrift::protocol::T_I32, 3);
- xfer += oprot->writeI32(this->compressed_page_size);
- xfer += oprot->writeFieldEnd();
-
- if (this->__isset.crc) {
- ++fcnt;
- xfer += oprot->writeFieldBegin("crc", ::apache::thrift::protocol::T_I32, 4);
- xfer += oprot->writeI32(this->crc);
- xfer += oprot->writeFieldEnd();
- }
- if (this->__isset.data_page_header) {
- ++fcnt;
- xfer += oprot->writeFieldBegin("data_page_header", ::apache::thrift::protocol::T_STRUCT, 5);
- xfer += this->data_page_header.write(oprot);
- xfer += oprot->writeFieldEnd();
- }
- if (this->__isset.index_page_header) {
- ++fcnt;
- xfer += oprot->writeFieldBegin("index_page_header", ::apache::thrift::protocol::T_STRUCT, 6);
- xfer += this->index_page_header.write(oprot);
- xfer += oprot->writeFieldEnd();
- }
- if (this->__isset.dictionary_page_header) {
- ++fcnt;
- xfer += oprot->writeFieldBegin("dictionary_page_header", ::apache::thrift::protocol::T_STRUCT, 7);
- xfer += this->dictionary_page_header.write(oprot);
- xfer += oprot->writeFieldEnd();
- }
- if (this->__isset.data_page_header_v2) {
- ++fcnt;
- xfer += oprot->writeFieldBegin("data_page_header_v2", ::apache::thrift::protocol::T_STRUCT, 8);
- xfer += this->data_page_header_v2.write(oprot);
- xfer += oprot->writeFieldEnd();
- }
- xfer += oprot->writeFieldStop();
- xfer += oprot->writeStructEnd();
- return xfer;
-}
-
-void swap(PageHeader &a, PageHeader &b) {
- using ::std::swap;
- swap(a.type, b.type);
- swap(a.uncompressed_page_size, b.uncompressed_page_size);
- swap(a.compressed_page_size, b.compressed_page_size);
- swap(a.crc, b.crc);
- swap(a.data_page_header, b.data_page_header);
- swap(a.index_page_header, b.index_page_header);
- swap(a.dictionary_page_header, b.dictionary_page_header);
- swap(a.data_page_header_v2, b.data_page_header_v2);
- swap(a.__isset, b.__isset);
-}
-
-const char* KeyValue::ascii_fingerprint = "5B708A954C550ECA9C1A49D3C5CAFAB9";
-const uint8_t KeyValue::binary_fingerprint[16] = {0x5B,0x70,0x8A,0x95,0x4C,0x55,0x0E,0xCA,0x9C,0x1A,0x49,0xD3,0xC5,0xCA,0xFA,0xB9};
-
-uint32_t KeyValue::read(::apache::thrift::protocol::TProtocol* iprot) {
-
- uint32_t xfer = 0;
- std::string fname;
- ::apache::thrift::protocol::TType ftype;
- int16_t fid;
-
- xfer += iprot->readStructBegin(fname);
-
- using ::apache::thrift::protocol::TProtocolException;
-
- bool isset_key = false;
-
- while (true)
- {
- xfer += iprot->readFieldBegin(fname, ftype, fid);
- if (ftype == ::apache::thrift::protocol::T_STOP) {
- break;
- }
- switch (fid)
- {
- case 1:
- if (ftype == ::apache::thrift::protocol::T_STRING) {
- xfer += iprot->readString(this->key);
- isset_key = true;
- } else {
- xfer += iprot->skip(ftype);
- }
- break;
- case 2:
- if (ftype == ::apache::thrift::protocol::T_STRING) {
- xfer += iprot->readString(this->value);
- this->__isset.value = true;
- } else {
- xfer += iprot->skip(ftype);
- }
- break;
- default:
- xfer += iprot->skip(ftype);
- break;
- }
- xfer += iprot->readFieldEnd();
- }
-
- xfer += iprot->readStructEnd();
-
- if (!isset_key)
- throw TProtocolException(TProtocolException::INVALID_DATA);
- return xfer;
-}
-
-uint32_t KeyValue::write(::apache::thrift::protocol::TProtocol* oprot) const {
- uint32_t xfer = 0;
- uint32_t fcnt = 0;
- xfer += oprot->writeStructBegin("KeyValue");
-
- ++fcnt;
- xfer += oprot->writeFieldBegin("key", ::apache::thrift::protocol::T_STRING, 1);
- xfer += oprot->writeString(this->key);
- xfer += oprot->writeFieldEnd();
-
- if (this->__isset.value) {
- ++fcnt;
- xfer += oprot->writeFieldBegin("value", ::apache::thrift::protocol::T_STRING, 2);
- xfer += oprot->writeString(this->value);
- xfer += oprot->writeFieldEnd();
- }
- xfer += oprot->writeFieldStop();
- xfer += oprot->writeStructEnd();
- return xfer;
-}
-
-void swap(KeyValue &a, KeyValue &b) {
- using ::std::swap;
- swap(a.key, b.key);
- swap(a.value, b.value);
- swap(a.__isset, b.__isset);
-}
-
-const char* SortingColumn::ascii_fingerprint = "F079C2D58A783AD90F9BE05D10DBBC6F";
-const uint8_t SortingColumn::binary_fingerprint[16] = {0xF0,0x79,0xC2,0xD5,0x8A,0x78,0x3A,0xD9,0x0F,0x9B,0xE0,0x5D,0x10,0xDB,0xBC,0x6F};
-
-uint32_t SortingColumn::read(::apache::thrift::protocol::TProtocol* iprot) {
-
- uint32_t xfer = 0;
- std::string fname;
- ::apache::thrift::protocol::TType ftype;
- int16_t fid;
-
- xfer += iprot->readStructBegin(fname);
-
- using ::apache::thrift::protocol::TProtocolException;
-
- bool isset_column_idx = false;
- bool isset_descending = false;
- bool isset_nulls_first = false;
-
- while (true)
- {
- xfer += iprot->readFieldBegin(fname, ftype, fid);
- if (ftype == ::apache::thrift::protocol::T_STOP) {
- break;
- }
- switch (fid)
- {
- case 1:
- if (ftype == ::apache::thrift::protocol::T_I32) {
- xfer += iprot->readI32(this->column_idx);
- isset_column_idx = true;
- } else {
- xfer += iprot->skip(ftype);
- }
- break;
- case 2:
- if (ftype == ::apache::thrift::protocol::T_BOOL) {
- xfer += iprot->readBool(this->descending);
- isset_descending = true;
- } else {
- xfer += iprot->skip(ftype);
- }
- break;
- case 3:
- if (ftype == ::apache::thrift::protocol::T_BOOL) {
- xfer += iprot->readBool(this->nulls_first);
- isset_nulls_first = true;
- } else {
- xfer += iprot->skip(ftype);
- }
- break;
- default:
- xfer += iprot->skip(ftype);
- break;
- }
- xfer += iprot->readFieldEnd();
- }
-
- xfer += iprot->readStructEnd();
-
- if (!isset_column_idx)
- throw TProtocolException(TProtocolException::INVALID_DATA);
- if (!isset_descending)
- throw TProtocolException(TProtocolException::INVALID_DATA);
- if (!isset_nulls_first)
- throw TProtocolException(TProtocolException::INVALID_DATA);
- return xfer;
-}
-
-uint32_t SortingColumn::write(::apache::thrift::protocol::TProtocol* oprot) const {
- uint32_t xfer = 0;
- uint32_t fcnt = 0;
- xfer += oprot->writeStructBegin("SortingColumn");
-
- ++fcnt;
- xfer += oprot->writeFieldBegin("column_idx", ::apache::thrift::protocol::T_I32, 1);
- xfer += oprot->writeI32(this->column_idx);
- xfer += oprot->writeFieldEnd();
-
- ++fcnt;
- xfer += oprot->writeFieldBegin("descending", ::apache::thrift::protocol::T_BOOL, 2);
- xfer += oprot->writeBool(this->descending);
- xfer += oprot->writeFieldEnd();
-
- ++fcnt;
- xfer += oprot->writeFieldBegin("nulls_first", ::apache::thrift::protocol::T_BOOL, 3);
- xfer += oprot->writeBool(this->nulls_first);
- xfer += oprot->writeFieldEnd();
-
- xfer += oprot->writeFieldStop();
- xfer += oprot->writeStructEnd();
- return xfer;
-}
-
-void swap(SortingColumn &a, SortingColumn &b) {
- using ::std::swap;
- swap(a.column_idx, b.column_idx);
- swap(a.descending, b.descending);
- swap(a.nulls_first, b.nulls_first);
-}
-
-const char* ColumnMetaData::ascii_fingerprint = "1AF797732BCB4465C6314FB29B86638D";
-const uint8_t ColumnMetaData::binary_fingerprint[16] = {0x1A,0xF7,0x97,0x73,0x2B,0xCB,0x44,0x65,0xC6,0x31,0x4F,0xB2,0x9B,0x86,0x63,0x8D};
-
-uint32_t ColumnMetaData::read(::apache::thrift::protocol::TProtocol* iprot) {
-
- uint32_t xfer = 0;
- std::string fname;
- ::apache::thrift::protocol::TType ftype;
- int16_t fid;
-
- xfer += iprot->readStructBegin(fname);
-
- using ::apache::thrift::protocol::TProtocolException;
-
- bool isset_type = false;
- bool isset_encodings = false;
- bool isset_path_in_schema = false;
- bool isset_codec = false;
- bool isset_num_values = false;
- bool isset_total_uncompressed_size = false;
- bool isset_total_compressed_size = false;
- bool isset_data_page_offset = false;
-
- while (true)
- {
- xfer += iprot->readFieldBegin(fname, ftype, fid);
- if (ftype == ::apache::thrift::protocol::T_STOP) {
- break;
- }
- switch (fid)
- {
- case 1:
- if (ftype == ::apache::thrift::protocol::T_I32) {
- int32_t ecast9;
- xfer += iprot->readI32(ecast9);
- this->type = (Type::type)ecast9;
- isset_type = true;
- } else {
- xfer += iprot->skip(ftype);
- }
- break;
- case 2:
- if (ftype == ::apache::thrift::protocol::T_LIST) {
- {
- this->encodings.clear();
- uint32_t _size10;
- ::apache::thrift::protocol::TType _etype13;
- xfer += iprot->readListBegin(_etype13, _size10);
- this->encodings.resize(_size10);
- uint32_t _i14;
- for (_i14 = 0; _i14 < _size10; ++_i14)
- {
- int32_t ecast15;
- xfer += iprot->readI32(ecast15);
- this->encodings[_i14] = (Encoding::type)ecast15;
- }
- xfer += iprot->readListEnd();
- }
- isset_encodings = true;
- } else {
- xfer += iprot->skip(ftype);
- }
- break;
- case 3:
- if (ftype == ::apache::thrift::protocol::T_LIST) {
- {
- this->path_in_schema.clear();
- uint32_t _size16;
- ::apache::thrift::protocol::TType _etype19;
- xfer += iprot->readListBegin(_etype19, _size16);
- this->path_in_schema.resize(_size16);
- uint32_t _i20;
- for (_i20 = 0; _i20 < _size16; ++_i20)
- {
- xfer += iprot->readString(this->path_in_schema[_i20]);
- }
- xfer += iprot->readListEnd();
- }
- isset_path_in_schema = true;
- } else {
- xfer += iprot->skip(ftype);
- }
- break;
- case 4:
- if (ftype == ::apache::thrift::protocol::T_I32) {
- int32_t ecast21;
- xfer += iprot->readI32(ecast21);
- this->codec = (CompressionCodec::type)ecast21;
- isset_codec = true;
- } else {
- xfer += iprot->skip(ftype);
- }
- break;
- case 5:
- if (ftype == ::apache::thrift::protocol::T_I64) {
- xfer += iprot->readI64(this->num_values);
- isset_num_values = true;
- } else {
- xfer += iprot->skip(ftype);
- }
- break;
- case 6:
- if (ftype == ::apache::thrift::protocol::T_I64) {
- xfer += iprot->readI64(this->total_uncompressed_size);
- isset_total_uncompressed_size = true;
- } else {
- xfer += iprot->skip(ftype);
- }
- break;
- case 7:
- if (ftype == ::apache::thrift::protocol::T_I64) {
- xfer += iprot->readI64(this->total_compressed_size);
- isset_total_compressed_size = true;
- } else {
- xfer += iprot->skip(ftype);
- }
- break;
- case 8:
- if (ftype == ::apache::thrift::protocol::T_LIST) {
- {
- this->key_value_metadata.clear();
- uint32_t _size22;
- ::apache::thrift::protocol::TType _etype25;
- xfer += iprot->readListBegin(_etype25, _size22);
- this->key_value_metadata.resize(_size22);
- uint32_t _i26;
- for (_i26 = 0; _i26 < _size22; ++_i26)
- {
- xfer += this->key_value_metadata[_i26].read(iprot);
- }
- xfer += iprot->readListEnd();
- }
- this->__isset.key_value_metadata = true;
- } else {
- xfer += iprot->skip(ftype);
- }
- break;
- case 9:
- if (ftype == ::apache::thrift::protocol::T_I64) {
- xfer += iprot->readI64(this->data_page_offset);
- isset_data_page_offset = true;
- } else {
- xfer += iprot->skip(ftype);
- }
- break;
- case 10:
- if (ftype == ::apache::thrift::protocol::T_I64) {
- xfer += iprot->readI64(this->index_page_offset);
- this->__isset.index_page_offset = true;
- } else {
- xfer += iprot->skip(ftype);
- }
- break;
- case 11:
- if (ftype == ::apache::thrift::protocol::T_I64) {
- xfer += iprot->readI64(this->dictionary_page_offset);
- this->__isset.dictionary_page_offset = true;
- } else {
- xfer += iprot->skip(ftype);
- }
- break;
- case 12:
- if (ftype == ::apache::thrift::protocol::T_STRUCT) {
- xfer += this->statistics.read(iprot);
- this->__isset.statistics = true;
- } else {
- xfer += iprot->skip(ftype);
- }
- break;
- default:
- xfer += iprot->skip(ftype);
- break;
- }
- xfer += iprot->readFieldEnd();
- }
-
- xfer += iprot->readStructEnd();
-
- if (!isset_type)
- throw TProtocolException(TProtocolException::INVALID_DATA);
- if (!isset_encodings)
- throw TProtocolException(TProtocolException::INVALID_DATA);
- if (!isset_path_in_schema)
- throw TProtocolException(TProtocolException::INVALID_DATA);
- if (!isset_codec)
- throw TProtocolException(TProtocolException::INVALID_DATA);
- if (!isset_num_values)
- throw TProtocolException(TProtocolException::INVALID_DATA);
- if (!isset_total_uncompressed_size)
- throw TProtocolException(TProtocolException::INVALID_DATA);
- if (!isset_total_compressed_size)
- throw TProtocolException(TProtocolException::INVALID_DATA);
- if (!isset_data_page_offset)
- throw TProtocolException(TProtocolException::INVALID_DATA);
- return xfer;
-}
-
-uint32_t ColumnMetaData::write(::apache::thrift::protocol::TProtocol* oprot) const {
- uint32_t xfer = 0;
- uint32_t fcnt = 0;
- xfer += oprot->writeStructBegin("ColumnMetaData");
-
- ++fcnt;
- xfer += oprot->writeFieldBegin("type", ::apache::thrift::protocol::T_I32, 1);
- xfer += oprot->writeI32((int32_t)this->type);
- xfer += oprot->writeFieldEnd();
-
- ++fcnt;
- xfer += oprot->writeFieldBegin("encodings", ::apache::thrift::protocol::T_LIST, 2);
- {
- xfer += oprot->writeListBegin(::apache::thrift::protocol::T_I32, static_cast<uint32_t>(this->encodings.size()));
- std::vector<Encoding::type> ::const_iterator _iter27;
- for (_iter27 = this->encodings.begin(); _iter27 != this->encodings.end(); ++_iter27)
- {
- xfer += oprot->writeI32((int32_t)(*_iter27));
- }
- xfer += oprot->writeListEnd();
- }
- xfer += oprot->writeFieldEnd();
-
- ++fcnt;
- xfer += oprot->writeFieldBegin("path_in_schema", ::apache::thrift::protocol::T_LIST, 3);
- {
- xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRING, static_cast<uint32_t>(this->path_in_schema.size()));
- std::vector<std::string> ::const_iterator _iter28;
- for (_iter28 = this->path_in_schema.begin(); _iter28 != this->path_in_schema.end(); ++_iter28)
- {
- xfer += oprot->writeString((*_iter28));
- }
- xfer += oprot->writeListEnd();
- }
- xfer += oprot->writeFieldEnd();
-
- ++fcnt;
- xfer += oprot->writeFieldBegin("codec", ::apache::thrift::protocol::T_I32, 4);
- xfer += oprot->writeI32((int32_t)this->codec);
- xfer += oprot->writeFieldEnd();
-
- ++fcnt;
- xfer += oprot->writeFieldBegin("num_values", ::apache::thrift::protocol::T_I64, 5);
- xfer += oprot->writeI64(this->num_values);
- xfer += oprot->writeFieldEnd();
-
- ++fcnt;
- xfer += oprot->writeFieldBegin("total_uncompressed_size", ::apache::thrift::protocol::T_I64, 6);
- xfer += oprot->writeI64(this->total_uncompressed_size);
- xfer += oprot->writeFieldEnd();
-
- ++fcnt;
- xfer += oprot->writeFieldBegin("total_compressed_size", ::apache::thrift::protocol::T_I64, 7);
- xfer += oprot->writeI64(this->total_compressed_size);
- xfer += oprot->writeFieldEnd();
-
- if (this->__isset.key_value_metadata) {
- ++fcnt;
- xfer += oprot->writeFieldBegin("key_value_metadata", ::apache::thrift::protocol::T_LIST, 8);
- {
- xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast<uint32_t>(this->key_value_metadata.size()));
- std::vector<KeyValue> ::const_iterator _iter29;
- for (_iter29 = this->key_value_metadata.begin(); _iter29 != this->key_value_metadata.end(); ++_iter29)
- {
- xfer += (*_iter29).write(oprot);
- }
- xfer += oprot->writeListEnd();
- }
- xfer += oprot->writeFieldEnd();
- }
- ++fcnt;
- xfer += oprot->writeFieldBegin("data_page_offset", ::apache::thrift::protocol::T_I64, 9);
- xfer += oprot->writeI64(this->data_page_offset);
- xfer += oprot->writeFieldEnd();
-
- if (this->__isset.index_page_offset) {
- ++fcnt;
- xfer += oprot->writeFieldBegin("index_page_offset", ::apache::thrift::protocol::T_I64, 10);
- xfer += oprot->writeI64(this->index_page_offset);
- xfer += oprot->writeFieldEnd();
- }
- if (this->__isset.dictionary_page_offset) {
- ++fcnt;
- xfer += oprot->writeFieldBegin("dictionary_page_offset", ::apache::thrift::protocol::T_I64, 11);
- xfer += oprot->writeI64(this->dictionary_page_offset);
- xfer += oprot->writeFieldEnd();
- }
- if (this->__isset.statistics) {
- ++fcnt;
- xfer += oprot->writeFieldBegin("statistics", ::apache::thrift::protocol::T_STRUCT, 12);
- xfer += this->statistics.write(oprot);
- xfer += oprot->writeFieldEnd();
- }
- xfer += oprot->writeFieldStop();
- xfer += oprot->writeStructEnd();
- return xfer;
-}
-
-void swap(ColumnMetaData &a, ColumnMetaData &b) {
- using ::std::swap;
- swap(a.type, b.type);
- swap(a.encodings, b.encodings);
- swap(a.path_in_schema, b.path_in_schema);
- swap(a.codec, b.codec);
- swap(a.num_values, b.num_values);
- swap(a.total_uncompressed_size, b.total_uncompressed_size);
- swap(a.total_compressed_size, b.total_compressed_size);
- swap(a.key_value_metadata, b.key_value_metadata);
- swap(a.data_page_offset, b.data_page_offset);
- swap(a.index_page_offset, b.index_page_offset);
- swap(a.dictionary_page_offset, b.dictionary_page_offset);
- swap(a.statistics, b.statistics);
- swap(a.__isset, b.__isset);
-}
-
-const char* ColumnChunk::ascii_fingerprint = "169FC47057EF3D82E2FACDDEC2641AE8";
-const uint8_t ColumnChunk::binary_fingerprint[16] = {0x16,0x9F,0xC4,0x70,0x57,0xEF,0x3D,0x82,0xE2,0xFA,0xCD,0xDE,0xC2,0x64,0x1A,0xE8};
-
-uint32_t ColumnChunk::read(::apache::thrift::protocol::TProtocol* iprot) {
-
- uint32_t xfer = 0;
- std::string fname;
- ::apache::thrift::protocol::TType ftype;
- int16_t fid;
-
- xfer += iprot->readStructBegin(fname);
-
- using ::apache::thrift::protocol::TProtocolException;
-
- bool isset_file_offset = false;
-
- while (true)
- {
- xfer += iprot->readFieldBegin(fname, ftype, fid);
- if (ftype == ::apache::thrift::protocol::T_STOP) {
- break;
- }
- switch (fid)
- {
- case 1:
- if (ftype == ::apache::thrift::protocol::T_STRING) {
- xfer += iprot->readString(this->file_path);
- this->__isset.file_path = true;
- } else {
- xfer += iprot->skip(ftype);
- }
- break;
- case 2:
- if (ftype == ::apache::thrift::protocol::T_I64) {
- xfer += iprot->readI64(this->file_offset);
- isset_file_offset = true;
- } else {
- xfer += iprot->skip(ftype);
- }
- break;
- case 3:
- if (ftype == ::apache::thrift::protocol::T_STRUCT) {
- xfer += this->meta_data.read(iprot);
- this->__isset.meta_data = true;
- } else {
- xfer += iprot->skip(ftype);
- }
- break;
- default:
- xfer += iprot->skip(ftype);
- break;
- }
- xfer += iprot->readFieldEnd();
- }
-
- xfer += iprot->readStructEnd();
-
- if (!isset_file_offset)
- throw TProtocolException(TProtocolException::INVALID_DATA);
- return xfer;
-}
-
-uint32_t ColumnChunk::write(::apache::thrift::protocol::TProtocol* oprot) const {
- uint32_t xfer = 0;
- uint32_t fcnt = 0;
- xfer += oprot->writeStructBegin("ColumnChunk");
-
- if (this->__isset.file_path) {
- ++fcnt;
- xfer += oprot->writeFieldBegin("file_path", ::apache::thrift::protocol::T_STRING, 1);
- xfer += oprot->writeString(this->file_path);
- xfer += oprot->writeFieldEnd();
- }
- ++fcnt;
- xfer += oprot->writeFieldBegin("file_offset", ::apache::thrift::protocol::T_I64, 2);
- xfer += oprot->writeI64(this->file_offset);
- xfer += oprot->writeFieldEnd();
-
- if (this->__isset.meta_data) {
- ++fcnt;
- xfer += oprot->writeFieldBegin("meta_data", ::apache::thrift::protocol::T_STRUCT, 3);
- xfer += this->meta_data.write(oprot);
- xfer += oprot->writeFieldEnd();
- }
- xfer += oprot->writeFieldStop();
- xfer += oprot->writeStructEnd();
- return xfer;
-}
-
-void swap(ColumnChunk &a, ColumnChunk &b) {
- using ::std::swap;
- swap(a.file_path, b.file_path);
- swap(a.file_offset, b.file_offset);
- swap(a.meta_data, b.meta_data);
- swap(a.__isset, b.__isset);
-}
-
-const char* RowGroup::ascii_fingerprint = "DC7968627FA826DDC4C6C9BE773586C9";
-const uint8_t RowGroup::binary_fingerprint[16] = {0xDC,0x79,0x68,0x62,0x7F,0xA8,0x26,0xDD,0xC4,0xC6,0xC9,0xBE,0x77,0x35,0x86,0xC9};
-
-uint32_t RowGroup::read(::apache::thrift::protocol::TProtocol* iprot) {
-
- uint32_t xfer = 0;
- std::string fname;
- ::apache::thrift::protocol::TType ftype;
- int16_t fid;
-
- xfer += iprot->readStructBegin(fname);
-
- using ::apache::thrift::protocol::TProtocolException;
-
- bool isset_columns = false;
- bool isset_total_byte_size = false;
- bool isset_num_rows = false;
-
- while (true)
- {
- xfer += iprot->readFieldBegin(fname, ftype, fid);
- if (ftype == ::apache::thrift::protocol::T_STOP) {
- break;
- }
- switch (fid)
- {
- case 1:
- if (ftype == ::apache::thrift::protocol::T_LIST) {
- {
- this->columns.clear();
- uint32_t _size30;
- ::apache::thrift::protocol::TType _etype33;
- xfer += iprot->readListBegin(_etype33, _size30);
- this->columns.resize(_size30);
- uint32_t _i34;
- for (_i34 = 0; _i34 < _size30; ++_i34)
- {
- xfer += this->columns[_i34].read(iprot);
- }
- xfer += iprot->readListEnd();
- }
- isset_columns = true;
- } else {
- xfer += iprot->skip(ftype);
- }
- break;
- case 2:
- if (ftype == ::apache::thrift::protocol::T_I64) {
- xfer += iprot->readI64(this->total_byte_size);
- isset_total_byte_size = true;
- } else {
- xfer += iprot->skip(ftype);
- }
- break;
- case 3:
- if (ftype == ::apache::thrift::protocol::T_I64) {
- xfer += iprot->readI64(this->num_rows);
- isset_num_rows = true;
- } else {
- xfer += iprot->skip(ftype);
- }
- break;
- case 4:
- if (ftype == ::apache::thrift::protocol::T_LIST) {
- {
- this->sorting_columns.clear();
- uint32_t _size35;
- ::apache::thrift::protocol::TType _etype38;
- xfer += iprot->readListBegin(_etype38, _size35);
- this->sorting_columns.resize(_size35);
- uint32_t _i39;
- for (_i39 = 0; _i39 < _size35; ++_i39)
- {
- xfer += this->sorting_columns[_i39].read(iprot);
- }
- xfer += iprot->readListEnd();
- }
- this->__isset.sorting_columns = true;
- } else {
- xfer += iprot->skip(ftype);
- }
- break;
- default:
- xfer += iprot->skip(ftype);
- break;
- }
- xfer += iprot->readFieldEnd();
- }
-
- xfer += iprot->readStructEnd();
-
- if (!isset_columns)
- throw TProtocolException(TProtocolException::INVALID_DATA);
- if (!isset_total_byte_size)
- throw TProtocolException(TProtocolException::INVALID_DATA);
- if (!isset_num_rows)
- throw TProtocolException(TProtocolException::INVALID_DATA);
- return xfer;
-}
-
-uint32_t RowGroup::write(::apache::thrift::protocol::TProtocol* oprot) const {
- uint32_t xfer = 0;
- uint32_t fcnt = 0;
- xfer += oprot->writeStructBegin("RowGroup");
-
- ++fcnt;
- xfer += oprot->writeFieldBegin("columns", ::apache::thrift::protocol::T_LIST, 1);
- {
- xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast<uint32_t>(this->columns.size()));
- std::vector<ColumnChunk> ::const_iterator _iter40;
- for (_iter40 = this->columns.begin(); _iter40 != this->columns.end(); ++_iter40)
- {
- xfer += (*_iter40).write(oprot);
- }
- xfer += oprot->writeListEnd();
- }
- xfer += oprot->writeFieldEnd();
-
- ++fcnt;
- xfer += oprot->writeFieldBegin("total_byte_size", ::apache::thrift::protocol::T_I64, 2);
- xfer += oprot->writeI64(this->total_byte_size);
- xfer += oprot->writeFieldEnd();
-
- ++fcnt;
- xfer += oprot->writeFieldBegin("num_rows", ::apache::thrift::protocol::T_I64, 3);
- xfer += oprot->writeI64(this->num_rows);
- xfer += oprot->writeFieldEnd();
-
- if (this->__isset.sorting_columns) {
- ++fcnt;
- xfer += oprot->writeFieldBegin("sorting_columns", ::apache::thrift::protocol::T_LIST, 4);
- {
- xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast<uint32_t>(this->sorting_columns.size()));
- std::vector<SortingColumn> ::const_iterator _iter41;
- for (_iter41 = this->sorting_columns.begin(); _iter41 != this->sorting_columns.end(); ++_iter41)
- {
- xfer += (*_iter41).write(oprot);
- }
- xfer += oprot->writeListEnd();
- }
- xfer += oprot->writeFieldEnd();
- }
- xfer += oprot->writeFieldStop();
- xfer += oprot->writeStructEnd();
- return xfer;
-}
-
-void swap(RowGroup &a, RowGroup &b) {
- using ::std::swap;
- swap(a.columns, b.columns);
- swap(a.total_byte_size, b.total_byte_size);
- swap(a.num_rows, b.num_rows);
- swap(a.sorting_columns, b.sorting_columns);
- swap(a.__isset, b.__isset);
-}
-
-const char* FileMetaData::ascii_fingerprint = "44DC7D83A66D54A7B7892A985C4125C9";
-const uint8_t FileMetaData::binary_fingerprint[16] = {0x44,0xDC,0x7D,0x83,0xA6,0x6D,0x54,0xA7,0xB7,0x89,0x2A,0x98,0x5C,0x41,0x25,0xC9};
-
-uint32_t FileMetaData::read(::apache::thrift::protocol::TProtocol* iprot) {
-
- uint32_t xfer = 0;
- std::string fname;
- ::apache::thrift::protocol::TType ftype;
- int16_t fid;
-
- xfer += iprot->readStructBegin(fname);
-
- using ::apache::thrift::protocol::TProtocolException;
-
- bool isset_version = false;
- bool isset_schema = false;
- bool isset_num_rows = false;
- bool isset_row_groups = false;
-
- while (true)
- {
- xfer += iprot->readFieldBegin(fname, ftype, fid);
- if (ftype == ::apache::thrift::protocol::T_STOP) {
- break;
- }
- switch (fid)
- {
- case 1:
- if (ftype == ::apache::thrift::protocol::T_I32) {
- xfer += iprot->readI32(this->version);
- isset_version = true;
- } else {
- xfer += iprot->skip(ftype);
- }
- break;
- case 2:
- if (ftype == ::apache::thrift::protocol::T_LIST) {
- {
- this->schema.clear();
- uint32_t _size42;
- ::apache::thrift::protocol::TType _etype45;
- xfer += iprot->readListBegin(_etype45, _size42);
- this->schema.resize(_size42);
- uint32_t _i46;
- for (_i46 = 0; _i46 < _size42; ++_i46)
- {
- xfer += this->schema[_i46].read(iprot);
- }
- xfer += iprot->readListEnd();
- }
- isset_schema = true;
- } else {
- xfer += iprot->skip(ftype);
- }
- break;
- case 3:
- if (ftype == ::apache::thrift::protocol::T_I64) {
- xfer += iprot->readI64(this->num_rows);
- isset_num_rows = true;
- } else {
- xfer += iprot->skip(ftype);
- }
- break;
- case 4:
- if (ftype == ::apache::thrift::protocol::T_LIST) {
- {
- this->row_groups.clear();
- uint32_t _size47;
- ::apache::thrift::protocol::TType _etype50;
- xfer += iprot->readListBegin(_etype50, _size47);
- this->row_groups.resize(_size47);
- uint32_t _i51;
- for (_i51 = 0; _i51 < _size47; ++_i51)
- {
- xfer += this->row_groups[_i51].read(iprot);
- }
- xfer += iprot->readListEnd();
- }
- isset_row_groups = true;
- } else {
- xfer += iprot->skip(ftype);
- }
- break;
- case 5:
- if (ftype == ::apache::thrift::protocol::T_LIST) {
- {
- this->key_value_metadata.clear();
- uint32_t _size52;
- ::apache::thrift::protocol::TType _etype55;
- xfer += iprot->readListBegin(_etype55, _size52);
- this->key_value_metadata.resize(_size52);
- uint32_t _i56;
- for (_i56 = 0; _i56 < _size52; ++_i56)
- {
- xfer += this->key_value_metadata[_i56].read(iprot);
- }
- xfer += iprot->readListEnd();
- }
- this->__isset.key_value_metadata = true;
- } else {
- xfer += iprot->skip(ftype);
- }
- break;
- case 6:
- if (ftype == ::apache::thrift::protocol::T_STRING) {
- xfer += iprot->readString(this->created_by);
- this->__isset.created_by = true;
- } else {
- xfer += iprot->skip(ftype);
- }
- break;
- default:
- xfer += iprot->skip(ftype);
- break;
- }
- xfer += iprot->readFieldEnd();
- }
-
- xfer += iprot->readStructEnd();
-
- if (!isset_version)
- throw TProtocolException(TProtocolException::INVALID_DATA);
- if (!isset_schema)
- throw TProtocolException(TProtocolException::INVALID_DATA);
- if (!isset_num_rows)
- throw TProtocolException(TProtocolException::INVALID_DATA);
- if (!isset_row_groups)
- throw TProtocolException(TProtocolException::INVALID_DATA);
- return xfer;
-}
-
-uint32_t FileMetaData::write(::apache::thrift::protocol::TProtocol* oprot) const {
- uint32_t xfer = 0;
- uint32_t fcnt = 0;
- xfer += oprot->writeStructBegin("FileMetaData");
-
- ++fcnt;
- xfer += oprot->writeFieldBegin("version", ::apache::thrift::protocol::T_I32, 1);
- xfer += oprot->writeI32(this->version);
- xfer += oprot->writeFieldEnd();
-
- ++fcnt;
- xfer += oprot->writeFieldBegin("schema", ::apache::thrift::protocol::T_LIST, 2);
- {
- xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast<uint32_t>(this->schema.size()));
- std::vector<SchemaElement> ::const_iterator _iter57;
- for (_iter57 = this->schema.begin(); _iter57 != this->schema.end(); ++_iter57)
- {
- xfer += (*_iter57).write(oprot);
- }
- xfer += oprot->writeListEnd();
- }
- xfer += oprot->writeFieldEnd();
-
- ++fcnt;
- xfer += oprot->writeFieldBegin("num_rows", ::apache::thrift::protocol::T_I64, 3);
- xfer += oprot->writeI64(this->num_rows);
- xfer += oprot->writeFieldEnd();
-
- ++fcnt;
- xfer += oprot->writeFieldBegin("row_groups", ::apache::thrift::protocol::T_LIST, 4);
- {
- xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast<uint32_t>(this->row_groups.size()));
- std::vector<RowGroup> ::const_iterator _iter58;
- for (_iter58 = this->row_groups.begin(); _iter58 != this->row_groups.end(); ++_iter58)
- {
- xfer += (*_iter58).write(oprot);
- }
- xfer += oprot->writeListEnd();
- }
- xfer += oprot->writeFieldEnd();
-
- if (this->__isset.key_value_metadata) {
- ++fcnt;
- xfer += oprot->writeFieldBegin("key_value_metadata", ::apache::thrift::protocol::T_LIST, 5);
- {
- xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast<uint32_t>(this->key_value_metadata.size()));
- std::vector<KeyValue> ::const_iterator _iter59;
- for (_iter59 = this->key_value_metadata.begin(); _iter59 != this->key_value_metadata.end(); ++_iter59)
- {
- xfer += (*_iter59).write(oprot);
- }
- xfer += oprot->writeListEnd();
- }
- xfer += oprot->writeFieldEnd();
- }
- if (this->__isset.created_by) {
- ++fcnt;
- xfer += oprot->writeFieldBegin("created_by", ::apache::thrift::protocol::T_STRING, 6);
- xfer += oprot->writeString(this->created_by);
- xfer += oprot->writeFieldEnd();
- }
- xfer += oprot->writeFieldStop();
- xfer += oprot->writeStructEnd();
- return xfer;
-}
-
-void swap(FileMetaData &a, FileMetaData &b) {
- using ::std::swap;
- swap(a.version, b.version);
- swap(a.schema, b.schema);
- swap(a.num_rows, b.num_rows);
- swap(a.row_groups, b.row_groups);
- swap(a.key_value_metadata, b.key_value_metadata);
- swap(a.created_by, b.created_by);
- swap(a.__isset, b.__isset);
-}
-
-} // namespace
[6/7] parquet-cpp git commit: PARQUET-416: C++11 compilation,
code reorg, libparquet and installation targets
Posted by no...@apache.org.
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/build-support/cpplint.py
----------------------------------------------------------------------
diff --git a/build-support/cpplint.py b/build-support/cpplint.py
new file mode 100755
index 0000000..ece520b
--- /dev/null
+++ b/build-support/cpplint.py
@@ -0,0 +1,6323 @@
+#!/usr/bin/env python
+#
+# Copyright (c) 2009 Google Inc. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""Does google-lint on c++ files.
+
+The goal of this script is to identify places in the code that *may*
+be in non-compliance with google style. It does not attempt to fix
+up these problems -- the point is to educate. It does also not
+attempt to find all problems, or to ensure that everything it does
+find is legitimately a problem.
+
+In particular, we can get very confused by /* and // inside strings!
+We do a small hack, which is to ignore //'s with "'s after them on the
+same line, but it is far from perfect (in either direction).
+"""
+
+import codecs
+import copy
+import getopt
+import math # for log
+import os
+import re
+import sre_compile
+import string
+import sys
+import unicodedata
+
+
+_USAGE = """
+Syntax: cpplint.py [--verbose=#] [--output=vs7] [--filter=-x,+y,...]
+ [--counting=total|toplevel|detailed] [--root=subdir]
+ [--linelength=digits]
+ <file> [file] ...
+
+ The style guidelines this tries to follow are those in
+ http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml
+
+ Every problem is given a confidence score from 1-5, with 5 meaning we are
+ certain of the problem, and 1 meaning it could be a legitimate construct.
+ This will miss some errors, and is not a substitute for a code review.
+
+ To suppress false-positive errors of a certain category, add a
+ 'NOLINT(category)' comment to the line. NOLINT or NOLINT(*)
+ suppresses errors of all categories on that line.
+
+ The files passed in will be linted; at least one file must be provided.
+ Default linted extensions are .cc, .cpp, .cu, .cuh and .h. Change the
+ extensions with the --extensions flag.
+
+ Flags:
+
+ output=vs7
+ By default, the output is formatted to ease emacs parsing. Visual Studio
+ compatible output (vs7) may also be used. Other formats are unsupported.
+
+ verbose=#
+ Specify a number 0-5 to restrict errors to certain verbosity levels.
+
+ filter=-x,+y,...
+ Specify a comma-separated list of category-filters to apply: only
+ error messages whose category names pass the filters will be printed.
+ (Category names are printed with the message and look like
+ "[whitespace/indent]".) Filters are evaluated left to right.
+ "-FOO" and "FOO" means "do not print categories that start with FOO".
+ "+FOO" means "do print categories that start with FOO".
+
+ Examples: --filter=-whitespace,+whitespace/braces
+ --filter=whitespace,runtime/printf,+runtime/printf_format
+ --filter=-,+build/include_what_you_use
+
+ To see a list of all the categories used in cpplint, pass no arg:
+ --filter=
+
+ counting=total|toplevel|detailed
+ The total number of errors found is always printed. If
+ 'toplevel' is provided, then the count of errors in each of
+ the top-level categories like 'build' and 'whitespace' will
+ also be printed. If 'detailed' is provided, then a count
+ is provided for each category like 'build/class'.
+
+ root=subdir
+ The root directory used for deriving header guard CPP variable.
+ By default, the header guard CPP variable is calculated as the relative
+ path to the directory that contains .git, .hg, or .svn. When this flag
+ is specified, the relative path is calculated from the specified
+ directory. If the specified directory does not exist, this flag is
+ ignored.
+
+ Examples:
+ Assuming that src/.git exists, the header guard CPP variables for
+ src/chrome/browser/ui/browser.h are:
+
+ No flag => CHROME_BROWSER_UI_BROWSER_H_
+ --root=chrome => BROWSER_UI_BROWSER_H_
+ --root=chrome/browser => UI_BROWSER_H_
+
+ linelength=digits
+ This is the allowed line length for the project. The default value is
+ 80 characters.
+
+ Examples:
+ --linelength=120
+
+ extensions=extension,extension,...
+ The allowed file extensions that cpplint will check
+
+ Examples:
+ --extensions=hpp,cpp
+
+ cpplint.py supports per-directory configurations specified in CPPLINT.cfg
+ files. CPPLINT.cfg file can contain a number of key=value pairs.
+ Currently the following options are supported:
+
+ set noparent
+ filter=+filter1,-filter2,...
+ exclude_files=regex
+ linelength=80
+
+ "set noparent" option prevents cpplint from traversing directory tree
+ upwards looking for more .cfg files in parent directories. This option
+ is usually placed in the top-level project directory.
+
+ The "filter" option is similar in function to --filter flag. It specifies
+ message filters in addition to the |_DEFAULT_FILTERS| and those specified
+ through --filter command-line flag.
+
+ "exclude_files" allows to specify a regular expression to be matched against
+ a file name. If the expression matches, the file is skipped and not run
+ through liner.
+
+ "linelength" allows to specify the allowed line length for the project.
+
+ CPPLINT.cfg has an effect on files in the same directory and all
+ sub-directories, unless overridden by a nested configuration file.
+
+ Example file:
+ filter=-build/include_order,+build/include_alpha
+ exclude_files=.*\.cc
+
+ The above example disables build/include_order warning and enables
+ build/include_alpha as well as excludes all .cc from being
+ processed by linter, in the current directory (where the .cfg
+ file is located) and all sub-directories.
+"""
+
+# We categorize each error message we print. Here are the categories.
+# We want an explicit list so we can list them all in cpplint --filter=.
+# If you add a new error message with a new category, add it to the list
+# here! cpplint_unittest.py should tell you if you forget to do this.
+_ERROR_CATEGORIES = [
+ 'build/class',
+ 'build/c++11',
+ 'build/deprecated',
+ 'build/endif_comment',
+ 'build/explicit_make_pair',
+ 'build/forward_decl',
+ 'build/header_guard',
+ 'build/include',
+ 'build/include_alpha',
+ 'build/include_order',
+ 'build/include_what_you_use',
+ 'build/namespaces',
+ 'build/printf_format',
+ 'build/storage_class',
+ 'legal/copyright',
+ 'readability/alt_tokens',
+ 'readability/braces',
+ 'readability/casting',
+ 'readability/check',
+ 'readability/constructors',
+ 'readability/fn_size',
+ 'readability/function',
+ 'readability/inheritance',
+ 'readability/multiline_comment',
+ 'readability/multiline_string',
+ 'readability/namespace',
+ 'readability/nolint',
+ 'readability/nul',
+ 'readability/strings',
+ 'readability/todo',
+ 'readability/utf8',
+ 'runtime/arrays',
+ 'runtime/casting',
+ 'runtime/explicit',
+ 'runtime/int',
+ 'runtime/init',
+ 'runtime/invalid_increment',
+ 'runtime/member_string_references',
+ 'runtime/memset',
+ 'runtime/indentation_namespace',
+ 'runtime/operator',
+ 'runtime/printf',
+ 'runtime/printf_format',
+ 'runtime/references',
+ 'runtime/string',
+ 'runtime/threadsafe_fn',
+ 'runtime/vlog',
+ 'whitespace/blank_line',
+ 'whitespace/braces',
+ 'whitespace/comma',
+ 'whitespace/comments',
+ 'whitespace/empty_conditional_body',
+ 'whitespace/empty_loop_body',
+ 'whitespace/end_of_line',
+ 'whitespace/ending_newline',
+ 'whitespace/forcolon',
+ 'whitespace/indent',
+ 'whitespace/line_length',
+ 'whitespace/newline',
+ 'whitespace/operators',
+ 'whitespace/parens',
+ 'whitespace/semicolon',
+ 'whitespace/tab',
+ 'whitespace/todo',
+ ]
+
+# These error categories are no longer enforced by cpplint, but for backwards-
+# compatibility they may still appear in NOLINT comments.
+_LEGACY_ERROR_CATEGORIES = [
+ 'readability/streams',
+ ]
+
+# The default state of the category filter. This is overridden by the --filter=
+# flag. By default all errors are on, so only add here categories that should be
+# off by default (i.e., categories that must be enabled by the --filter= flags).
+# All entries here should start with a '-' or '+', as in the --filter= flag.
+_DEFAULT_FILTERS = ['-build/include_alpha']
+
+# We used to check for high-bit characters, but after much discussion we
+# decided those were OK, as long as they were in UTF-8 and didn't represent
+# hard-coded international strings, which belong in a separate i18n file.
+
+# C++ headers
+_CPP_HEADERS = frozenset([
+ # Legacy
+ 'algobase.h',
+ 'algo.h',
+ 'alloc.h',
+ 'builtinbuf.h',
+ 'bvector.h',
+ 'complex.h',
+ 'defalloc.h',
+ 'deque.h',
+ 'editbuf.h',
+ 'fstream.h',
+ 'function.h',
+ 'hash_map',
+ 'hash_map.h',
+ 'hash_set',
+ 'hash_set.h',
+ 'hashtable.h',
+ 'heap.h',
+ 'indstream.h',
+ 'iomanip.h',
+ 'iostream.h',
+ 'istream.h',
+ 'iterator.h',
+ 'list.h',
+ 'map.h',
+ 'multimap.h',
+ 'multiset.h',
+ 'ostream.h',
+ 'pair.h',
+ 'parsestream.h',
+ 'pfstream.h',
+ 'procbuf.h',
+ 'pthread_alloc',
+ 'pthread_alloc.h',
+ 'rope',
+ 'rope.h',
+ 'ropeimpl.h',
+ 'set.h',
+ 'slist',
+ 'slist.h',
+ 'stack.h',
+ 'stdiostream.h',
+ 'stl_alloc.h',
+ 'stl_relops.h',
+ 'streambuf.h',
+ 'stream.h',
+ 'strfile.h',
+ 'strstream.h',
+ 'tempbuf.h',
+ 'tree.h',
+ 'type_traits.h',
+ 'vector.h',
+ # 17.6.1.2 C++ library headers
+ 'algorithm',
+ 'array',
+ 'atomic',
+ 'bitset',
+ 'chrono',
+ 'codecvt',
+ 'complex',
+ 'condition_variable',
+ 'deque',
+ 'exception',
+ 'forward_list',
+ 'fstream',
+ 'functional',
+ 'future',
+ 'initializer_list',
+ 'iomanip',
+ 'ios',
+ 'iosfwd',
+ 'iostream',
+ 'istream',
+ 'iterator',
+ 'limits',
+ 'list',
+ 'locale',
+ 'map',
+ 'memory',
+ 'mutex',
+ 'new',
+ 'numeric',
+ 'ostream',
+ 'queue',
+ 'random',
+ 'ratio',
+ 'regex',
+ 'set',
+ 'sstream',
+ 'stack',
+ 'stdexcept',
+ 'streambuf',
+ 'string',
+ 'strstream',
+ 'system_error',
+ 'thread',
+ 'tuple',
+ 'typeindex',
+ 'typeinfo',
+ 'type_traits',
+ 'unordered_map',
+ 'unordered_set',
+ 'utility',
+ 'valarray',
+ 'vector',
+ # 17.6.1.2 C++ headers for C library facilities
+ 'cassert',
+ 'ccomplex',
+ 'cctype',
+ 'cerrno',
+ 'cfenv',
+ 'cfloat',
+ 'cinttypes',
+ 'ciso646',
+ 'climits',
+ 'clocale',
+ 'cmath',
+ 'csetjmp',
+ 'csignal',
+ 'cstdalign',
+ 'cstdarg',
+ 'cstdbool',
+ 'cstddef',
+ 'cstdint',
+ 'cstdio',
+ 'cstdlib',
+ 'cstring',
+ 'ctgmath',
+ 'ctime',
+ 'cuchar',
+ 'cwchar',
+ 'cwctype',
+ ])
+
+
+# These headers are excluded from [build/include] and [build/include_order]
+# checks:
+# - Anything not following google file name conventions (containing an
+# uppercase character, such as Python.h or nsStringAPI.h, for example).
+# - Lua headers.
+_THIRD_PARTY_HEADERS_PATTERN = re.compile(
+ r'^(?:[^/]*[A-Z][^/]*\.h|lua\.h|lauxlib\.h|lualib\.h)$')
+
+
+# Assertion macros. These are defined in base/logging.h and
+# testing/base/gunit.h. Note that the _M versions need to come first
+# for substring matching to work.
+_CHECK_MACROS = [
+ 'DCHECK', 'CHECK',
+ 'EXPECT_TRUE_M', 'EXPECT_TRUE',
+ 'ASSERT_TRUE_M', 'ASSERT_TRUE',
+ 'EXPECT_FALSE_M', 'EXPECT_FALSE',
+ 'ASSERT_FALSE_M', 'ASSERT_FALSE',
+ ]
+
+# Replacement macros for CHECK/DCHECK/EXPECT_TRUE/EXPECT_FALSE
+_CHECK_REPLACEMENT = dict([(m, {}) for m in _CHECK_MACROS])
+
+for op, replacement in [('==', 'EQ'), ('!=', 'NE'),
+ ('>=', 'GE'), ('>', 'GT'),
+ ('<=', 'LE'), ('<', 'LT')]:
+ _CHECK_REPLACEMENT['DCHECK'][op] = 'DCHECK_%s' % replacement
+ _CHECK_REPLACEMENT['CHECK'][op] = 'CHECK_%s' % replacement
+ _CHECK_REPLACEMENT['EXPECT_TRUE'][op] = 'EXPECT_%s' % replacement
+ _CHECK_REPLACEMENT['ASSERT_TRUE'][op] = 'ASSERT_%s' % replacement
+ _CHECK_REPLACEMENT['EXPECT_TRUE_M'][op] = 'EXPECT_%s_M' % replacement
+ _CHECK_REPLACEMENT['ASSERT_TRUE_M'][op] = 'ASSERT_%s_M' % replacement
+
+for op, inv_replacement in [('==', 'NE'), ('!=', 'EQ'),
+ ('>=', 'LT'), ('>', 'LE'),
+ ('<=', 'GT'), ('<', 'GE')]:
+ _CHECK_REPLACEMENT['EXPECT_FALSE'][op] = 'EXPECT_%s' % inv_replacement
+ _CHECK_REPLACEMENT['ASSERT_FALSE'][op] = 'ASSERT_%s' % inv_replacement
+ _CHECK_REPLACEMENT['EXPECT_FALSE_M'][op] = 'EXPECT_%s_M' % inv_replacement
+ _CHECK_REPLACEMENT['ASSERT_FALSE_M'][op] = 'ASSERT_%s_M' % inv_replacement
+
+# Alternative tokens and their replacements. For full list, see section 2.5
+# Alternative tokens [lex.digraph] in the C++ standard.
+#
+# Digraphs (such as '%:') are not included here since it's a mess to
+# match those on a word boundary.
+_ALT_TOKEN_REPLACEMENT = {
+ 'and': '&&',
+ 'bitor': '|',
+ 'or': '||',
+ 'xor': '^',
+ 'compl': '~',
+ 'bitand': '&',
+ 'and_eq': '&=',
+ 'or_eq': '|=',
+ 'xor_eq': '^=',
+ 'not': '!',
+ 'not_eq': '!='
+ }
+
+# Compile regular expression that matches all the above keywords. The "[ =()]"
+# bit is meant to avoid matching these keywords outside of boolean expressions.
+#
+# False positives include C-style multi-line comments and multi-line strings
+# but those have always been troublesome for cpplint.
+_ALT_TOKEN_REPLACEMENT_PATTERN = re.compile(
+ r'[ =()](' + ('|'.join(_ALT_TOKEN_REPLACEMENT.keys())) + r')(?=[ (]|$)')
+
+
+# These constants define types of headers for use with
+# _IncludeState.CheckNextIncludeOrder().
+_C_SYS_HEADER = 1
+_CPP_SYS_HEADER = 2
+_LIKELY_MY_HEADER = 3
+_POSSIBLE_MY_HEADER = 4
+_OTHER_HEADER = 5
+
+# These constants define the current inline assembly state
+_NO_ASM = 0 # Outside of inline assembly block
+_INSIDE_ASM = 1 # Inside inline assembly block
+_END_ASM = 2 # Last line of inline assembly block
+_BLOCK_ASM = 3 # The whole block is an inline assembly block
+
+# Match start of assembly blocks
+_MATCH_ASM = re.compile(r'^\s*(?:asm|_asm|__asm|__asm__)'
+ r'(?:\s+(volatile|__volatile__))?'
+ r'\s*[{(]')
+
+
+_regexp_compile_cache = {}
+
+# {str, set(int)}: a map from error categories to sets of linenumbers
+# on which those errors are expected and should be suppressed.
+_error_suppressions = {}
+
+# The root directory used for deriving header guard CPP variable.
+# This is set by --root flag.
+_root = None
+
+# The allowed line length of files.
+# This is set by --linelength flag.
+_line_length = 80
+
+# The allowed extensions for file names
+# This is set by --extensions flag.
+_valid_extensions = set(['cc', 'h', 'cpp', 'cu', 'cuh'])
+
+def ParseNolintSuppressions(filename, raw_line, linenum, error):
+ """Updates the global list of error-suppressions.
+
+ Parses any NOLINT comments on the current line, updating the global
+ error_suppressions store. Reports an error if the NOLINT comment
+ was malformed.
+
+ Args:
+ filename: str, the name of the input file.
+ raw_line: str, the line of input text, with comments.
+ linenum: int, the number of the current line.
+ error: function, an error handler.
+ """
+ matched = Search(r'\bNOLINT(NEXTLINE)?\b(\([^)]+\))?', raw_line)
+ if matched:
+ if matched.group(1):
+ suppressed_line = linenum + 1
+ else:
+ suppressed_line = linenum
+ category = matched.group(2)
+ if category in (None, '(*)'): # => "suppress all"
+ _error_suppressions.setdefault(None, set()).add(suppressed_line)
+ else:
+ if category.startswith('(') and category.endswith(')'):
+ category = category[1:-1]
+ if category in _ERROR_CATEGORIES:
+ _error_suppressions.setdefault(category, set()).add(suppressed_line)
+ elif category not in _LEGACY_ERROR_CATEGORIES:
+ error(filename, linenum, 'readability/nolint', 5,
+ 'Unknown NOLINT error category: %s' % category)
+
+
+def ResetNolintSuppressions():
+ """Resets the set of NOLINT suppressions to empty."""
+ _error_suppressions.clear()
+
+
+def IsErrorSuppressedByNolint(category, linenum):
+ """Returns true if the specified error category is suppressed on this line.
+
+ Consults the global error_suppressions map populated by
+ ParseNolintSuppressions/ResetNolintSuppressions.
+
+ Args:
+ category: str, the category of the error.
+ linenum: int, the current line number.
+ Returns:
+ bool, True iff the error should be suppressed due to a NOLINT comment.
+ """
+ return (linenum in _error_suppressions.get(category, set()) or
+ linenum in _error_suppressions.get(None, set()))
+
+
+def Match(pattern, s):
+ """Matches the string with the pattern, caching the compiled regexp."""
+ # The regexp compilation caching is inlined in both Match and Search for
+ # performance reasons; factoring it out into a separate function turns out
+ # to be noticeably expensive.
+ if pattern not in _regexp_compile_cache:
+ _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
+ return _regexp_compile_cache[pattern].match(s)
+
+
+def ReplaceAll(pattern, rep, s):
+ """Replaces instances of pattern in a string with a replacement.
+
+ The compiled regex is kept in a cache shared by Match and Search.
+
+ Args:
+ pattern: regex pattern
+ rep: replacement text
+ s: search string
+
+ Returns:
+ string with replacements made (or original string if no replacements)
+ """
+ if pattern not in _regexp_compile_cache:
+ _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
+ return _regexp_compile_cache[pattern].sub(rep, s)
+
+
+def Search(pattern, s):
+ """Searches the string for the pattern, caching the compiled regexp."""
+ if pattern not in _regexp_compile_cache:
+ _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
+ return _regexp_compile_cache[pattern].search(s)
+
+
+class _IncludeState(object):
+ """Tracks line numbers for includes, and the order in which includes appear.
+
+ include_list contains list of lists of (header, line number) pairs.
+ It's a lists of lists rather than just one flat list to make it
+ easier to update across preprocessor boundaries.
+
+ Call CheckNextIncludeOrder() once for each header in the file, passing
+ in the type constants defined above. Calls in an illegal order will
+ raise an _IncludeError with an appropriate error message.
+
+ """
+ # self._section will move monotonically through this set. If it ever
+ # needs to move backwards, CheckNextIncludeOrder will raise an error.
+ _INITIAL_SECTION = 0
+ _MY_H_SECTION = 1
+ _C_SECTION = 2
+ _CPP_SECTION = 3
+ _OTHER_H_SECTION = 4
+
+ _TYPE_NAMES = {
+ _C_SYS_HEADER: 'C system header',
+ _CPP_SYS_HEADER: 'C++ system header',
+ _LIKELY_MY_HEADER: 'header this file implements',
+ _POSSIBLE_MY_HEADER: 'header this file may implement',
+ _OTHER_HEADER: 'other header',
+ }
+ _SECTION_NAMES = {
+ _INITIAL_SECTION: "... nothing. (This can't be an error.)",
+ _MY_H_SECTION: 'a header this file implements',
+ _C_SECTION: 'C system header',
+ _CPP_SECTION: 'C++ system header',
+ _OTHER_H_SECTION: 'other header',
+ }
+
+ def __init__(self):
+ self.include_list = [[]]
+ self.ResetSection('')
+
+ def FindHeader(self, header):
+ """Check if a header has already been included.
+
+ Args:
+ header: header to check.
+ Returns:
+ Line number of previous occurrence, or -1 if the header has not
+ been seen before.
+ """
+ for section_list in self.include_list:
+ for f in section_list:
+ if f[0] == header:
+ return f[1]
+ return -1
+
+ def ResetSection(self, directive):
+ """Reset section checking for preprocessor directive.
+
+ Args:
+ directive: preprocessor directive (e.g. "if", "else").
+ """
+ # The name of the current section.
+ self._section = self._INITIAL_SECTION
+ # The path of last found header.
+ self._last_header = ''
+
+ # Update list of includes. Note that we never pop from the
+ # include list.
+ if directive in ('if', 'ifdef', 'ifndef'):
+ self.include_list.append([])
+ elif directive in ('else', 'elif'):
+ self.include_list[-1] = []
+
+ def SetLastHeader(self, header_path):
+ self._last_header = header_path
+
+ def CanonicalizeAlphabeticalOrder(self, header_path):
+ """Returns a path canonicalized for alphabetical comparison.
+
+ - replaces "-" with "_" so they both cmp the same.
+ - removes '-inl' since we don't require them to be after the main header.
+ - lowercase everything, just in case.
+
+ Args:
+ header_path: Path to be canonicalized.
+
+ Returns:
+ Canonicalized path.
+ """
+ return header_path.replace('-inl.h', '.h').replace('-', '_').lower()
+
+ def IsInAlphabeticalOrder(self, clean_lines, linenum, header_path):
+ """Check if a header is in alphabetical order with the previous header.
+
+ Args:
+ clean_lines: A CleansedLines instance containing the file.
+ linenum: The number of the line to check.
+ header_path: Canonicalized header to be checked.
+
+ Returns:
+ Returns true if the header is in alphabetical order.
+ """
+ # If previous section is different from current section, _last_header will
+ # be reset to empty string, so it's always less than current header.
+ #
+ # If previous line was a blank line, assume that the headers are
+ # intentionally sorted the way they are.
+ if (self._last_header > header_path and
+ Match(r'^\s*#\s*include\b', clean_lines.elided[linenum - 1])):
+ return False
+ return True
+
+ def CheckNextIncludeOrder(self, header_type):
+ """Returns a non-empty error message if the next header is out of order.
+
+ This function also updates the internal state to be ready to check
+ the next include.
+
+ Args:
+ header_type: One of the _XXX_HEADER constants defined above.
+
+ Returns:
+ The empty string if the header is in the right order, or an
+ error message describing what's wrong.
+
+ """
+ error_message = ('Found %s after %s' %
+ (self._TYPE_NAMES[header_type],
+ self._SECTION_NAMES[self._section]))
+
+ last_section = self._section
+
+ if header_type == _C_SYS_HEADER:
+ if self._section <= self._C_SECTION:
+ self._section = self._C_SECTION
+ else:
+ self._last_header = ''
+ return error_message
+ elif header_type == _CPP_SYS_HEADER:
+ if self._section <= self._CPP_SECTION:
+ self._section = self._CPP_SECTION
+ else:
+ self._last_header = ''
+ return error_message
+ elif header_type == _LIKELY_MY_HEADER:
+ if self._section <= self._MY_H_SECTION:
+ self._section = self._MY_H_SECTION
+ else:
+ self._section = self._OTHER_H_SECTION
+ elif header_type == _POSSIBLE_MY_HEADER:
+ if self._section <= self._MY_H_SECTION:
+ self._section = self._MY_H_SECTION
+ else:
+ # This will always be the fallback because we're not sure
+ # enough that the header is associated with this file.
+ self._section = self._OTHER_H_SECTION
+ else:
+ assert header_type == _OTHER_HEADER
+ self._section = self._OTHER_H_SECTION
+
+ if last_section != self._section:
+ self._last_header = ''
+
+ return ''
+
+
+class _CppLintState(object):
+ """Maintains module-wide state.."""
+
+ def __init__(self):
+ self.verbose_level = 1 # global setting.
+ self.error_count = 0 # global count of reported errors
+ # filters to apply when emitting error messages
+ self.filters = _DEFAULT_FILTERS[:]
+ # backup of filter list. Used to restore the state after each file.
+ self._filters_backup = self.filters[:]
+ self.counting = 'total' # In what way are we counting errors?
+ self.errors_by_category = {} # string to int dict storing error counts
+
+ # output format:
+ # "emacs" - format that emacs can parse (default)
+ # "vs7" - format that Microsoft Visual Studio 7 can parse
+ self.output_format = 'emacs'
+
+ def SetOutputFormat(self, output_format):
+ """Sets the output format for errors."""
+ self.output_format = output_format
+
+ def SetVerboseLevel(self, level):
+ """Sets the module's verbosity, and returns the previous setting."""
+ last_verbose_level = self.verbose_level
+ self.verbose_level = level
+ return last_verbose_level
+
+ def SetCountingStyle(self, counting_style):
+ """Sets the module's counting options."""
+ self.counting = counting_style
+
+ def SetFilters(self, filters):
+ """Sets the error-message filters.
+
+ These filters are applied when deciding whether to emit a given
+ error message.
+
+ Args:
+ filters: A string of comma-separated filters (eg "+whitespace/indent").
+ Each filter should start with + or -; else we die.
+
+ Raises:
+ ValueError: The comma-separated filters did not all start with '+' or '-'.
+ E.g. "-,+whitespace,-whitespace/indent,whitespace/badfilter"
+ """
+ # Default filters always have less priority than the flag ones.
+ self.filters = _DEFAULT_FILTERS[:]
+ self.AddFilters(filters)
+
+ def AddFilters(self, filters):
+ """ Adds more filters to the existing list of error-message filters. """
+ for filt in filters.split(','):
+ clean_filt = filt.strip()
+ if clean_filt:
+ self.filters.append(clean_filt)
+ for filt in self.filters:
+ if not (filt.startswith('+') or filt.startswith('-')):
+ raise ValueError('Every filter in --filters must start with + or -'
+ ' (%s does not)' % filt)
+
+ def BackupFilters(self):
+ """ Saves the current filter list to backup storage."""
+ self._filters_backup = self.filters[:]
+
+ def RestoreFilters(self):
+ """ Restores filters previously backed up."""
+ self.filters = self._filters_backup[:]
+
+ def ResetErrorCounts(self):
+ """Sets the module's error statistic back to zero."""
+ self.error_count = 0
+ self.errors_by_category = {}
+
+ def IncrementErrorCount(self, category):
+ """Bumps the module's error statistic."""
+ self.error_count += 1
+ if self.counting in ('toplevel', 'detailed'):
+ if self.counting != 'detailed':
+ category = category.split('/')[0]
+ if category not in self.errors_by_category:
+ self.errors_by_category[category] = 0
+ self.errors_by_category[category] += 1
+
+ def PrintErrorCounts(self):
+ """Print a summary of errors by category, and the total."""
+ for category, count in self.errors_by_category.iteritems():
+ sys.stderr.write('Category \'%s\' errors found: %d\n' %
+ (category, count))
+ sys.stderr.write('Total errors found: %d\n' % self.error_count)
+
+_cpplint_state = _CppLintState()
+
+
+def _OutputFormat():
+ """Gets the module's output format."""
+ return _cpplint_state.output_format
+
+
+def _SetOutputFormat(output_format):
+ """Sets the module's output format."""
+ _cpplint_state.SetOutputFormat(output_format)
+
+
+def _VerboseLevel():
+ """Returns the module's verbosity setting."""
+ return _cpplint_state.verbose_level
+
+
+def _SetVerboseLevel(level):
+ """Sets the module's verbosity, and returns the previous setting."""
+ return _cpplint_state.SetVerboseLevel(level)
+
+
+def _SetCountingStyle(level):
+ """Sets the module's counting options."""
+ _cpplint_state.SetCountingStyle(level)
+
+
+def _Filters():
+ """Returns the module's list of output filters, as a list."""
+ return _cpplint_state.filters
+
+
+def _SetFilters(filters):
+ """Sets the module's error-message filters.
+
+ These filters are applied when deciding whether to emit a given
+ error message.
+
+ Args:
+ filters: A string of comma-separated filters (eg "whitespace/indent").
+ Each filter should start with + or -; else we die.
+ """
+ _cpplint_state.SetFilters(filters)
+
+def _AddFilters(filters):
+ """Adds more filter overrides.
+
+ Unlike _SetFilters, this function does not reset the current list of filters
+ available.
+
+ Args:
+ filters: A string of comma-separated filters (eg "whitespace/indent").
+ Each filter should start with + or -; else we die.
+ """
+ _cpplint_state.AddFilters(filters)
+
+def _BackupFilters():
+ """ Saves the current filter list to backup storage."""
+ _cpplint_state.BackupFilters()
+
+def _RestoreFilters():
+ """ Restores filters previously backed up."""
+ _cpplint_state.RestoreFilters()
+
+class _FunctionState(object):
+ """Tracks current function name and the number of lines in its body."""
+
+ _NORMAL_TRIGGER = 250 # for --v=0, 500 for --v=1, etc.
+ _TEST_TRIGGER = 400 # about 50% more than _NORMAL_TRIGGER.
+
+ def __init__(self):
+ self.in_a_function = False
+ self.lines_in_function = 0
+ self.current_function = ''
+
+ def Begin(self, function_name):
+ """Start analyzing function body.
+
+ Args:
+ function_name: The name of the function being tracked.
+ """
+ self.in_a_function = True
+ self.lines_in_function = 0
+ self.current_function = function_name
+
+ def Count(self):
+ """Count line in current function body."""
+ if self.in_a_function:
+ self.lines_in_function += 1
+
+ def Check(self, error, filename, linenum):
+ """Report if too many lines in function body.
+
+ Args:
+ error: The function to call with any errors found.
+ filename: The name of the current file.
+ linenum: The number of the line to check.
+ """
+ if Match(r'T(EST|est)', self.current_function):
+ base_trigger = self._TEST_TRIGGER
+ else:
+ base_trigger = self._NORMAL_TRIGGER
+ trigger = base_trigger * 2**_VerboseLevel()
+
+ if self.lines_in_function > trigger:
+ error_level = int(math.log(self.lines_in_function / base_trigger, 2))
+ # 50 => 0, 100 => 1, 200 => 2, 400 => 3, 800 => 4, 1600 => 5, ...
+ if error_level > 5:
+ error_level = 5
+ error(filename, linenum, 'readability/fn_size', error_level,
+ 'Small and focused functions are preferred:'
+ ' %s has %d non-comment lines'
+ ' (error triggered by exceeding %d lines).' % (
+ self.current_function, self.lines_in_function, trigger))
+
+ def End(self):
+ """Stop analyzing function body."""
+ self.in_a_function = False
+
+
+class _IncludeError(Exception):
+ """Indicates a problem with the include order in a file."""
+ pass
+
+
+class FileInfo(object):
+ """Provides utility functions for filenames.
+
+ FileInfo provides easy access to the components of a file's path
+ relative to the project root.
+ """
+
+ def __init__(self, filename):
+ self._filename = filename
+
+ def FullName(self):
+ """Make Windows paths like Unix."""
+ return os.path.abspath(self._filename).replace('\\', '/')
+
+ def RepositoryName(self):
+ """FullName after removing the local path to the repository.
+
+ If we have a real absolute path name here we can try to do something smart:
+ detecting the root of the checkout and truncating /path/to/checkout from
+ the name so that we get header guards that don't include things like
+ "C:\Documents and Settings\..." or "/home/username/..." in them and thus
+ people on different computers who have checked the source out to different
+ locations won't see bogus errors.
+ """
+ fullname = self.FullName()
+
+ if os.path.exists(fullname):
+ project_dir = os.path.dirname(fullname)
+
+ if os.path.exists(os.path.join(project_dir, ".svn")):
+ # If there's a .svn file in the current directory, we recursively look
+ # up the directory tree for the top of the SVN checkout
+ root_dir = project_dir
+ one_up_dir = os.path.dirname(root_dir)
+ while os.path.exists(os.path.join(one_up_dir, ".svn")):
+ root_dir = os.path.dirname(root_dir)
+ one_up_dir = os.path.dirname(one_up_dir)
+
+ prefix = os.path.commonprefix([root_dir, project_dir])
+ return fullname[len(prefix) + 1:]
+
+ # Not SVN <= 1.6? Try to find a git, hg, or svn top level directory by
+ # searching up from the current path.
+ root_dir = os.path.dirname(fullname)
+ while (root_dir != os.path.dirname(root_dir) and
+ not os.path.exists(os.path.join(root_dir, ".git")) and
+ not os.path.exists(os.path.join(root_dir, ".hg")) and
+ not os.path.exists(os.path.join(root_dir, ".svn"))):
+ root_dir = os.path.dirname(root_dir)
+
+ if (os.path.exists(os.path.join(root_dir, ".git")) or
+ os.path.exists(os.path.join(root_dir, ".hg")) or
+ os.path.exists(os.path.join(root_dir, ".svn"))):
+ prefix = os.path.commonprefix([root_dir, project_dir])
+ return fullname[len(prefix) + 1:]
+
+ # Don't know what to do; header guard warnings may be wrong...
+ return fullname
+
+ def Split(self):
+ """Splits the file into the directory, basename, and extension.
+
+ For 'chrome/browser/browser.cc', Split() would
+ return ('chrome/browser', 'browser', '.cc')
+
+ Returns:
+ A tuple of (directory, basename, extension).
+ """
+
+ googlename = self.RepositoryName()
+ project, rest = os.path.split(googlename)
+ return (project,) + os.path.splitext(rest)
+
+ def BaseName(self):
+ """File base name - text after the final slash, before the final period."""
+ return self.Split()[1]
+
+ def Extension(self):
+ """File extension - text following the final period."""
+ return self.Split()[2]
+
+ def NoExtension(self):
+ """File has no source file extension."""
+ return '/'.join(self.Split()[0:2])
+
+ def IsSource(self):
+ """File has a source file extension."""
+ return self.Extension()[1:] in ('c', 'cc', 'cpp', 'cxx')
+
+
+def _ShouldPrintError(category, confidence, linenum):
+ """If confidence >= verbose, category passes filter and is not suppressed."""
+
+ # There are three ways we might decide not to print an error message:
+ # a "NOLINT(category)" comment appears in the source,
+ # the verbosity level isn't high enough, or the filters filter it out.
+ if IsErrorSuppressedByNolint(category, linenum):
+ return False
+
+ if confidence < _cpplint_state.verbose_level:
+ return False
+
+ is_filtered = False
+ for one_filter in _Filters():
+ if one_filter.startswith('-'):
+ if category.startswith(one_filter[1:]):
+ is_filtered = True
+ elif one_filter.startswith('+'):
+ if category.startswith(one_filter[1:]):
+ is_filtered = False
+ else:
+ assert False # should have been checked for in SetFilter.
+ if is_filtered:
+ return False
+
+ return True
+
+
+def Error(filename, linenum, category, confidence, message):
+ """Logs the fact we've found a lint error.
+
+ We log where the error was found, and also our confidence in the error,
+ that is, how certain we are this is a legitimate style regression, and
+ not a misidentification or a use that's sometimes justified.
+
+ False positives can be suppressed by the use of
+ "cpplint(category)" comments on the offending line. These are
+ parsed into _error_suppressions.
+
+ Args:
+ filename: The name of the file containing the error.
+ linenum: The number of the line containing the error.
+ category: A string used to describe the "category" this bug
+ falls under: "whitespace", say, or "runtime". Categories
+ may have a hierarchy separated by slashes: "whitespace/indent".
+ confidence: A number from 1-5 representing a confidence score for
+ the error, with 5 meaning that we are certain of the problem,
+ and 1 meaning that it could be a legitimate construct.
+ message: The error message.
+ """
+ if _ShouldPrintError(category, confidence, linenum):
+ _cpplint_state.IncrementErrorCount(category)
+ if _cpplint_state.output_format == 'vs7':
+ sys.stderr.write('%s(%s): %s [%s] [%d]\n' % (
+ filename, linenum, message, category, confidence))
+ elif _cpplint_state.output_format == 'eclipse':
+ sys.stderr.write('%s:%s: warning: %s [%s] [%d]\n' % (
+ filename, linenum, message, category, confidence))
+ else:
+ sys.stderr.write('%s:%s: %s [%s] [%d]\n' % (
+ filename, linenum, message, category, confidence))
+
+
+# Matches standard C++ escape sequences per 2.13.2.3 of the C++ standard.
+_RE_PATTERN_CLEANSE_LINE_ESCAPES = re.compile(
+ r'\\([abfnrtv?"\\\']|\d+|x[0-9a-fA-F]+)')
+# Match a single C style comment on the same line.
+_RE_PATTERN_C_COMMENTS = r'/\*(?:[^*]|\*(?!/))*\*/'
+# Matches multi-line C style comments.
+# This RE is a little bit more complicated than one might expect, because we
+# have to take care of space removals tools so we can handle comments inside
+# statements better.
+# The current rule is: We only clear spaces from both sides when we're at the
+# end of the line. Otherwise, we try to remove spaces from the right side,
+# if this doesn't work we try on left side but only if there's a non-character
+# on the right.
+_RE_PATTERN_CLEANSE_LINE_C_COMMENTS = re.compile(
+ r'(\s*' + _RE_PATTERN_C_COMMENTS + r'\s*$|' +
+ _RE_PATTERN_C_COMMENTS + r'\s+|' +
+ r'\s+' + _RE_PATTERN_C_COMMENTS + r'(?=\W)|' +
+ _RE_PATTERN_C_COMMENTS + r')')
+
+
+def IsCppString(line):
+ """Does line terminate so, that the next symbol is in string constant.
+
+ This function does not consider single-line nor multi-line comments.
+
+ Args:
+ line: is a partial line of code starting from the 0..n.
+
+ Returns:
+ True, if next character appended to 'line' is inside a
+ string constant.
+ """
+
+ line = line.replace(r'\\', 'XX') # after this, \\" does not match to \"
+ return ((line.count('"') - line.count(r'\"') - line.count("'\"'")) & 1) == 1
+
+
+def CleanseRawStrings(raw_lines):
+ """Removes C++11 raw strings from lines.
+
+ Before:
+ static const char kData[] = R"(
+ multi-line string
+ )";
+
+ After:
+ static const char kData[] = ""
+ (replaced by blank line)
+ "";
+
+ Args:
+ raw_lines: list of raw lines.
+
+ Returns:
+ list of lines with C++11 raw strings replaced by empty strings.
+ """
+
+ delimiter = None
+ lines_without_raw_strings = []
+ for line in raw_lines:
+ if delimiter:
+ # Inside a raw string, look for the end
+ end = line.find(delimiter)
+ if end >= 0:
+ # Found the end of the string, match leading space for this
+ # line and resume copying the original lines, and also insert
+ # a "" on the last line.
+ leading_space = Match(r'^(\s*)\S', line)
+ line = leading_space.group(1) + '""' + line[end + len(delimiter):]
+ delimiter = None
+ else:
+ # Haven't found the end yet, append a blank line.
+ line = '""'
+
+ # Look for beginning of a raw string, and replace them with
+ # empty strings. This is done in a loop to handle multiple raw
+ # strings on the same line.
+ while delimiter is None:
+ # Look for beginning of a raw string.
+ # See 2.14.15 [lex.string] for syntax.
+ matched = Match(r'^(.*)\b(?:R|u8R|uR|UR|LR)"([^\s\\()]*)\((.*)$', line)
+ if matched:
+ delimiter = ')' + matched.group(2) + '"'
+
+ end = matched.group(3).find(delimiter)
+ if end >= 0:
+ # Raw string ended on same line
+ line = (matched.group(1) + '""' +
+ matched.group(3)[end + len(delimiter):])
+ delimiter = None
+ else:
+ # Start of a multi-line raw string
+ line = matched.group(1) + '""'
+ else:
+ break
+
+ lines_without_raw_strings.append(line)
+
+ # TODO(unknown): if delimiter is not None here, we might want to
+ # emit a warning for unterminated string.
+ return lines_without_raw_strings
+
+
+def FindNextMultiLineCommentStart(lines, lineix):
+ """Find the beginning marker for a multiline comment."""
+ while lineix < len(lines):
+ if lines[lineix].strip().startswith('/*'):
+ # Only return this marker if the comment goes beyond this line
+ if lines[lineix].strip().find('*/', 2) < 0:
+ return lineix
+ lineix += 1
+ return len(lines)
+
+
+def FindNextMultiLineCommentEnd(lines, lineix):
+ """We are inside a comment, find the end marker."""
+ while lineix < len(lines):
+ if lines[lineix].strip().endswith('*/'):
+ return lineix
+ lineix += 1
+ return len(lines)
+
+
+def RemoveMultiLineCommentsFromRange(lines, begin, end):
+ """Clears a range of lines for multi-line comments."""
+ # Having // dummy comments makes the lines non-empty, so we will not get
+ # unnecessary blank line warnings later in the code.
+ for i in range(begin, end):
+ lines[i] = '/**/'
+
+
+def RemoveMultiLineComments(filename, lines, error):
+ """Removes multiline (c-style) comments from lines."""
+ lineix = 0
+ while lineix < len(lines):
+ lineix_begin = FindNextMultiLineCommentStart(lines, lineix)
+ if lineix_begin >= len(lines):
+ return
+ lineix_end = FindNextMultiLineCommentEnd(lines, lineix_begin)
+ if lineix_end >= len(lines):
+ error(filename, lineix_begin + 1, 'readability/multiline_comment', 5,
+ 'Could not find end of multi-line comment')
+ return
+ RemoveMultiLineCommentsFromRange(lines, lineix_begin, lineix_end + 1)
+ lineix = lineix_end + 1
+
+
+def CleanseComments(line):
+ """Removes //-comments and single-line C-style /* */ comments.
+
+ Args:
+ line: A line of C++ source.
+
+ Returns:
+ The line with single-line comments removed.
+ """
+ commentpos = line.find('//')
+ if commentpos != -1 and not IsCppString(line[:commentpos]):
+ line = line[:commentpos].rstrip()
+ # get rid of /* ... */
+ return _RE_PATTERN_CLEANSE_LINE_C_COMMENTS.sub('', line)
+
+
+class CleansedLines(object):
+ """Holds 4 copies of all lines with different preprocessing applied to them.
+
+ 1) elided member contains lines without strings and comments.
+ 2) lines member contains lines without comments.
+ 3) raw_lines member contains all the lines without processing.
+ 4) lines_without_raw_strings member is same as raw_lines, but with C++11 raw
+ strings removed.
+ All these members are of <type 'list'>, and of the same length.
+ """
+
+ def __init__(self, lines):
+ self.elided = []
+ self.lines = []
+ self.raw_lines = lines
+ self.num_lines = len(lines)
+ self.lines_without_raw_strings = CleanseRawStrings(lines)
+ for linenum in range(len(self.lines_without_raw_strings)):
+ self.lines.append(CleanseComments(
+ self.lines_without_raw_strings[linenum]))
+ elided = self._CollapseStrings(self.lines_without_raw_strings[linenum])
+ self.elided.append(CleanseComments(elided))
+
+ def NumLines(self):
+ """Returns the number of lines represented."""
+ return self.num_lines
+
+ @staticmethod
+ def _CollapseStrings(elided):
+ """Collapses strings and chars on a line to simple "" or '' blocks.
+
+ We nix strings first so we're not fooled by text like '"http://"'
+
+ Args:
+ elided: The line being processed.
+
+ Returns:
+ The line with collapsed strings.
+ """
+ if _RE_PATTERN_INCLUDE.match(elided):
+ return elided
+
+ # Remove escaped characters first to make quote/single quote collapsing
+ # basic. Things that look like escaped characters shouldn't occur
+ # outside of strings and chars.
+ elided = _RE_PATTERN_CLEANSE_LINE_ESCAPES.sub('', elided)
+
+ # Replace quoted strings and digit separators. Both single quotes
+ # and double quotes are processed in the same loop, otherwise
+ # nested quotes wouldn't work.
+ collapsed = ''
+ while True:
+ # Find the first quote character
+ match = Match(r'^([^\'"]*)([\'"])(.*)$', elided)
+ if not match:
+ collapsed += elided
+ break
+ head, quote, tail = match.groups()
+
+ if quote == '"':
+ # Collapse double quoted strings
+ second_quote = tail.find('"')
+ if second_quote >= 0:
+ collapsed += head + '""'
+ elided = tail[second_quote + 1:]
+ else:
+ # Unmatched double quote, don't bother processing the rest
+ # of the line since this is probably a multiline string.
+ collapsed += elided
+ break
+ else:
+ # Found single quote, check nearby text to eliminate digit separators.
+ #
+ # There is no special handling for floating point here, because
+ # the integer/fractional/exponent parts would all be parsed
+ # correctly as long as there are digits on both sides of the
+ # separator. So we are fine as long as we don't see something
+ # like "0.'3" (gcc 4.9.0 will not allow this literal).
+ if Search(r'\b(?:0[bBxX]?|[1-9])[0-9a-fA-F]*$', head):
+ match_literal = Match(r'^((?:\'?[0-9a-zA-Z_])*)(.*)$', "'" + tail)
+ collapsed += head + match_literal.group(1).replace("'", '')
+ elided = match_literal.group(2)
+ else:
+ second_quote = tail.find('\'')
+ if second_quote >= 0:
+ collapsed += head + "''"
+ elided = tail[second_quote + 1:]
+ else:
+ # Unmatched single quote
+ collapsed += elided
+ break
+
+ return collapsed
+
+
+def FindEndOfExpressionInLine(line, startpos, stack):
+ """Find the position just after the end of current parenthesized expression.
+
+ Args:
+ line: a CleansedLines line.
+ startpos: start searching at this position.
+ stack: nesting stack at startpos.
+
+ Returns:
+ On finding matching end: (index just after matching end, None)
+ On finding an unclosed expression: (-1, None)
+ Otherwise: (-1, new stack at end of this line)
+ """
+ for i in xrange(startpos, len(line)):
+ char = line[i]
+ if char in '([{':
+ # Found start of parenthesized expression, push to expression stack
+ stack.append(char)
+ elif char == '<':
+ # Found potential start of template argument list
+ if i > 0 and line[i - 1] == '<':
+ # Left shift operator
+ if stack and stack[-1] == '<':
+ stack.pop()
+ if not stack:
+ return (-1, None)
+ elif i > 0 and Search(r'\boperator\s*$', line[0:i]):
+ # operator<, don't add to stack
+ continue
+ else:
+ # Tentative start of template argument list
+ stack.append('<')
+ elif char in ')]}':
+ # Found end of parenthesized expression.
+ #
+ # If we are currently expecting a matching '>', the pending '<'
+ # must have been an operator. Remove them from expression stack.
+ while stack and stack[-1] == '<':
+ stack.pop()
+ if not stack:
+ return (-1, None)
+ if ((stack[-1] == '(' and char == ')') or
+ (stack[-1] == '[' and char == ']') or
+ (stack[-1] == '{' and char == '}')):
+ stack.pop()
+ if not stack:
+ return (i + 1, None)
+ else:
+ # Mismatched parentheses
+ return (-1, None)
+ elif char == '>':
+ # Found potential end of template argument list.
+
+ # Ignore "->" and operator functions
+ if (i > 0 and
+ (line[i - 1] == '-' or Search(r'\boperator\s*$', line[0:i - 1]))):
+ continue
+
+ # Pop the stack if there is a matching '<'. Otherwise, ignore
+ # this '>' since it must be an operator.
+ if stack:
+ if stack[-1] == '<':
+ stack.pop()
+ if not stack:
+ return (i + 1, None)
+ elif char == ';':
+ # Found something that look like end of statements. If we are currently
+ # expecting a '>', the matching '<' must have been an operator, since
+ # template argument list should not contain statements.
+ while stack and stack[-1] == '<':
+ stack.pop()
+ if not stack:
+ return (-1, None)
+
+ # Did not find end of expression or unbalanced parentheses on this line
+ return (-1, stack)
+
+
+def CloseExpression(clean_lines, linenum, pos):
+ """If input points to ( or { or [ or <, finds the position that closes it.
+
+ If lines[linenum][pos] points to a '(' or '{' or '[' or '<', finds the
+ linenum/pos that correspond to the closing of the expression.
+
+ TODO(unknown): cpplint spends a fair bit of time matching parentheses.
+ Ideally we would want to index all opening and closing parentheses once
+ and have CloseExpression be just a simple lookup, but due to preprocessor
+ tricks, this is not so easy.
+
+ Args:
+ clean_lines: A CleansedLines instance containing the file.
+ linenum: The number of the line to check.
+ pos: A position on the line.
+
+ Returns:
+ A tuple (line, linenum, pos) pointer *past* the closing brace, or
+ (line, len(lines), -1) if we never find a close. Note we ignore
+ strings and comments when matching; and the line we return is the
+ 'cleansed' line at linenum.
+ """
+
+ line = clean_lines.elided[linenum]
+ if (line[pos] not in '({[<') or Match(r'<[<=]', line[pos:]):
+ return (line, clean_lines.NumLines(), -1)
+
+ # Check first line
+ (end_pos, stack) = FindEndOfExpressionInLine(line, pos, [])
+ if end_pos > -1:
+ return (line, linenum, end_pos)
+
+ # Continue scanning forward
+ while stack and linenum < clean_lines.NumLines() - 1:
+ linenum += 1
+ line = clean_lines.elided[linenum]
+ (end_pos, stack) = FindEndOfExpressionInLine(line, 0, stack)
+ if end_pos > -1:
+ return (line, linenum, end_pos)
+
+ # Did not find end of expression before end of file, give up
+ return (line, clean_lines.NumLines(), -1)
+
+
+def FindStartOfExpressionInLine(line, endpos, stack):
+ """Find position at the matching start of current expression.
+
+ This is almost the reverse of FindEndOfExpressionInLine, but note
+ that the input position and returned position differs by 1.
+
+ Args:
+ line: a CleansedLines line.
+ endpos: start searching at this position.
+ stack: nesting stack at endpos.
+
+ Returns:
+ On finding matching start: (index at matching start, None)
+ On finding an unclosed expression: (-1, None)
+ Otherwise: (-1, new stack at beginning of this line)
+ """
+ i = endpos
+ while i >= 0:
+ char = line[i]
+ if char in ')]}':
+ # Found end of expression, push to expression stack
+ stack.append(char)
+ elif char == '>':
+ # Found potential end of template argument list.
+ #
+ # Ignore it if it's a "->" or ">=" or "operator>"
+ if (i > 0 and
+ (line[i - 1] == '-' or
+ Match(r'\s>=\s', line[i - 1:]) or
+ Search(r'\boperator\s*$', line[0:i]))):
+ i -= 1
+ else:
+ stack.append('>')
+ elif char == '<':
+ # Found potential start of template argument list
+ if i > 0 and line[i - 1] == '<':
+ # Left shift operator
+ i -= 1
+ else:
+ # If there is a matching '>', we can pop the expression stack.
+ # Otherwise, ignore this '<' since it must be an operator.
+ if stack and stack[-1] == '>':
+ stack.pop()
+ if not stack:
+ return (i, None)
+ elif char in '([{':
+ # Found start of expression.
+ #
+ # If there are any unmatched '>' on the stack, they must be
+ # operators. Remove those.
+ while stack and stack[-1] == '>':
+ stack.pop()
+ if not stack:
+ return (-1, None)
+ if ((char == '(' and stack[-1] == ')') or
+ (char == '[' and stack[-1] == ']') or
+ (char == '{' and stack[-1] == '}')):
+ stack.pop()
+ if not stack:
+ return (i, None)
+ else:
+ # Mismatched parentheses
+ return (-1, None)
+ elif char == ';':
+ # Found something that look like end of statements. If we are currently
+ # expecting a '<', the matching '>' must have been an operator, since
+ # template argument list should not contain statements.
+ while stack and stack[-1] == '>':
+ stack.pop()
+ if not stack:
+ return (-1, None)
+
+ i -= 1
+
+ return (-1, stack)
+
+
+def ReverseCloseExpression(clean_lines, linenum, pos):
+ """If input points to ) or } or ] or >, finds the position that opens it.
+
+ If lines[linenum][pos] points to a ')' or '}' or ']' or '>', finds the
+ linenum/pos that correspond to the opening of the expression.
+
+ Args:
+ clean_lines: A CleansedLines instance containing the file.
+ linenum: The number of the line to check.
+ pos: A position on the line.
+
+ Returns:
+ A tuple (line, linenum, pos) pointer *at* the opening brace, or
+ (line, 0, -1) if we never find the matching opening brace. Note
+ we ignore strings and comments when matching; and the line we
+ return is the 'cleansed' line at linenum.
+ """
+ line = clean_lines.elided[linenum]
+ if line[pos] not in ')}]>':
+ return (line, 0, -1)
+
+ # Check last line
+ (start_pos, stack) = FindStartOfExpressionInLine(line, pos, [])
+ if start_pos > -1:
+ return (line, linenum, start_pos)
+
+ # Continue scanning backward
+ while stack and linenum > 0:
+ linenum -= 1
+ line = clean_lines.elided[linenum]
+ (start_pos, stack) = FindStartOfExpressionInLine(line, len(line) - 1, stack)
+ if start_pos > -1:
+ return (line, linenum, start_pos)
+
+ # Did not find start of expression before beginning of file, give up
+ return (line, 0, -1)
+
+
+def CheckForCopyright(filename, lines, error):
+ """Logs an error if no Copyright message appears at the top of the file."""
+
+ # We'll say it should occur by line 10. Don't forget there's a
+ # dummy line at the front.
+ for line in xrange(1, min(len(lines), 11)):
+ if re.search(r'Copyright', lines[line], re.I): break
+ else: # means no copyright line was found
+ error(filename, 0, 'legal/copyright', 5,
+ 'No copyright message found. '
+ 'You should have a line: "Copyright [year] <Copyright Owner>"')
+
+
+def GetIndentLevel(line):
+ """Return the number of leading spaces in line.
+
+ Args:
+ line: A string to check.
+
+ Returns:
+ An integer count of leading spaces, possibly zero.
+ """
+ indent = Match(r'^( *)\S', line)
+ if indent:
+ return len(indent.group(1))
+ else:
+ return 0
+
+
+def GetHeaderGuardCPPVariable(filename):
+ """Returns the CPP variable that should be used as a header guard.
+
+ Args:
+ filename: The name of a C++ header file.
+
+ Returns:
+ The CPP variable that should be used as a header guard in the
+ named file.
+
+ """
+
+ # Restores original filename in case that cpplint is invoked from Emacs's
+ # flymake.
+ filename = re.sub(r'_flymake\.h$', '.h', filename)
+ filename = re.sub(r'/\.flymake/([^/]*)$', r'/\1', filename)
+ # Replace 'c++' with 'cpp'.
+ filename = filename.replace('C++', 'cpp').replace('c++', 'cpp')
+
+ fileinfo = FileInfo(filename)
+ file_path_from_root = fileinfo.RepositoryName()
+ if _root:
+ file_path_from_root = re.sub('^' + _root + os.sep, '', file_path_from_root)
+ return re.sub(r'[^a-zA-Z0-9]', '_', file_path_from_root).upper() + '_'
+
+
+def CheckForHeaderGuard(filename, clean_lines, error):
+ """Checks that the file contains a header guard.
+
+ Logs an error if no #ifndef header guard is present. For other
+ headers, checks that the full pathname is used.
+
+ Args:
+ filename: The name of the C++ header file.
+ clean_lines: A CleansedLines instance containing the file.
+ error: The function to call with any errors found.
+ """
+
+ # Don't check for header guards if there are error suppression
+ # comments somewhere in this file.
+ #
+ # Because this is silencing a warning for a nonexistent line, we
+ # only support the very specific NOLINT(build/header_guard) syntax,
+ # and not the general NOLINT or NOLINT(*) syntax.
+ raw_lines = clean_lines.lines_without_raw_strings
+ for i in raw_lines:
+ if Search(r'//\s*NOLINT\(build/header_guard\)', i):
+ return
+
+ cppvar = GetHeaderGuardCPPVariable(filename)
+
+ ifndef = ''
+ ifndef_linenum = 0
+ define = ''
+ endif = ''
+ endif_linenum = 0
+ for linenum, line in enumerate(raw_lines):
+ linesplit = line.split()
+ if len(linesplit) >= 2:
+ # find the first occurrence of #ifndef and #define, save arg
+ if not ifndef and linesplit[0] == '#ifndef':
+ # set ifndef to the header guard presented on the #ifndef line.
+ ifndef = linesplit[1]
+ ifndef_linenum = linenum
+ if not define and linesplit[0] == '#define':
+ define = linesplit[1]
+ # find the last occurrence of #endif, save entire line
+ if line.startswith('#endif'):
+ endif = line
+ endif_linenum = linenum
+
+ if not ifndef or not define or ifndef != define:
+ error(filename, 0, 'build/header_guard', 5,
+ 'No #ifndef header guard found, suggested CPP variable is: %s' %
+ cppvar)
+ return
+
+ # The guard should be PATH_FILE_H_, but we also allow PATH_FILE_H__
+ # for backward compatibility.
+ if ifndef != cppvar:
+ error_level = 0
+ if ifndef != cppvar + '_':
+ error_level = 5
+
+ ParseNolintSuppressions(filename, raw_lines[ifndef_linenum], ifndef_linenum,
+ error)
+ error(filename, ifndef_linenum, 'build/header_guard', error_level,
+ '#ifndef header guard has wrong style, please use: %s' % cppvar)
+
+ # Check for "//" comments on endif line.
+ ParseNolintSuppressions(filename, raw_lines[endif_linenum], endif_linenum,
+ error)
+ match = Match(r'#endif\s*//\s*' + cppvar + r'(_)?\b', endif)
+ if match:
+ if match.group(1) == '_':
+ # Issue low severity warning for deprecated double trailing underscore
+ error(filename, endif_linenum, 'build/header_guard', 0,
+ '#endif line should be "#endif // %s"' % cppvar)
+ return
+
+ # Didn't find the corresponding "//" comment. If this file does not
+ # contain any "//" comments at all, it could be that the compiler
+ # only wants "/**/" comments, look for those instead.
+ no_single_line_comments = True
+ for i in xrange(1, len(raw_lines) - 1):
+ line = raw_lines[i]
+ if Match(r'^(?:(?:\'(?:\.|[^\'])*\')|(?:"(?:\.|[^"])*")|[^\'"])*//', line):
+ no_single_line_comments = False
+ break
+
+ if no_single_line_comments:
+ match = Match(r'#endif\s*/\*\s*' + cppvar + r'(_)?\s*\*/', endif)
+ if match:
+ if match.group(1) == '_':
+ # Low severity warning for double trailing underscore
+ error(filename, endif_linenum, 'build/header_guard', 0,
+ '#endif line should be "#endif /* %s */"' % cppvar)
+ return
+
+ # Didn't find anything
+ error(filename, endif_linenum, 'build/header_guard', 5,
+ '#endif line should be "#endif // %s"' % cppvar)
+
+
+def CheckHeaderFileIncluded(filename, include_state, error):
+ """Logs an error if a .cc file does not include its header."""
+
+ # Do not check test files
+ if filename.endswith('_test.cc') or filename.endswith('_unittest.cc'):
+ return
+
+ fileinfo = FileInfo(filename)
+ headerfile = filename[0:len(filename) - 2] + 'h'
+ if not os.path.exists(headerfile):
+ return
+ headername = FileInfo(headerfile).RepositoryName()
+ first_include = 0
+ for section_list in include_state.include_list:
+ for f in section_list:
+ if headername in f[0] or f[0] in headername:
+ return
+ if not first_include:
+ first_include = f[1]
+
+ error(filename, first_include, 'build/include', 5,
+ '%s should include its header file %s' % (fileinfo.RepositoryName(),
+ headername))
+
+
+def CheckForBadCharacters(filename, lines, error):
+ """Logs an error for each line containing bad characters.
+
+ Two kinds of bad characters:
+
+ 1. Unicode replacement characters: These indicate that either the file
+ contained invalid UTF-8 (likely) or Unicode replacement characters (which
+ it shouldn't). Note that it's possible for this to throw off line
+ numbering if the invalid UTF-8 occurred adjacent to a newline.
+
+ 2. NUL bytes. These are problematic for some tools.
+
+ Args:
+ filename: The name of the current file.
+ lines: An array of strings, each representing a line of the file.
+ error: The function to call with any errors found.
+ """
+ for linenum, line in enumerate(lines):
+ if u'\ufffd' in line:
+ error(filename, linenum, 'readability/utf8', 5,
+ 'Line contains invalid UTF-8 (or Unicode replacement character).')
+ if '\0' in line:
+ error(filename, linenum, 'readability/nul', 5, 'Line contains NUL byte.')
+
+
+def CheckForNewlineAtEOF(filename, lines, error):
+ """Logs an error if there is no newline char at the end of the file.
+
+ Args:
+ filename: The name of the current file.
+ lines: An array of strings, each representing a line of the file.
+ error: The function to call with any errors found.
+ """
+
+ # The array lines() was created by adding two newlines to the
+ # original file (go figure), then splitting on \n.
+ # To verify that the file ends in \n, we just have to make sure the
+ # last-but-two element of lines() exists and is empty.
+ if len(lines) < 3 or lines[-2]:
+ error(filename, len(lines) - 2, 'whitespace/ending_newline', 5,
+ 'Could not find a newline character at the end of the file.')
+
+
+def CheckForMultilineCommentsAndStrings(filename, clean_lines, linenum, error):
+ """Logs an error if we see /* ... */ or "..." that extend past one line.
+
+ /* ... */ comments are legit inside macros, for one line.
+ Otherwise, we prefer // comments, so it's ok to warn about the
+ other. Likewise, it's ok for strings to extend across multiple
+ lines, as long as a line continuation character (backslash)
+ terminates each line. Although not currently prohibited by the C++
+ style guide, it's ugly and unnecessary. We don't do well with either
+ in this lint program, so we warn about both.
+
+ Args:
+ filename: The name of the current file.
+ clean_lines: A CleansedLines instance containing the file.
+ linenum: The number of the line to check.
+ error: The function to call with any errors found.
+ """
+ line = clean_lines.elided[linenum]
+
+ # Remove all \\ (escaped backslashes) from the line. They are OK, and the
+ # second (escaped) slash may trigger later \" detection erroneously.
+ line = line.replace('\\\\', '')
+
+ if line.count('/*') > line.count('*/'):
+ error(filename, linenum, 'readability/multiline_comment', 5,
+ 'Complex multi-line /*...*/-style comment found. '
+ 'Lint may give bogus warnings. '
+ 'Consider replacing these with //-style comments, '
+ 'with #if 0...#endif, '
+ 'or with more clearly structured multi-line comments.')
+
+ if (line.count('"') - line.count('\\"')) % 2:
+ error(filename, linenum, 'readability/multiline_string', 5,
+ 'Multi-line string ("...") found. This lint script doesn\'t '
+ 'do well with such strings, and may give bogus warnings. '
+ 'Use C++11 raw strings or concatenation instead.')
+
+
+# (non-threadsafe name, thread-safe alternative, validation pattern)
+#
+# The validation pattern is used to eliminate false positives such as:
+# _rand(); // false positive due to substring match.
+# ->rand(); // some member function rand().
+# ACMRandom rand(seed); // some variable named rand.
+# ISAACRandom rand(); // another variable named rand.
+#
+# Basically we require the return value of these functions to be used
+# in some expression context on the same line by matching on some
+# operator before the function name. This eliminates constructors and
+# member function calls.
+_UNSAFE_FUNC_PREFIX = r'(?:[-+*/=%^&|(<]\s*|>\s+)'
+_THREADING_LIST = (
+ ('asctime(', 'asctime_r(', _UNSAFE_FUNC_PREFIX + r'asctime\([^)]+\)'),
+ ('ctime(', 'ctime_r(', _UNSAFE_FUNC_PREFIX + r'ctime\([^)]+\)'),
+ ('getgrgid(', 'getgrgid_r(', _UNSAFE_FUNC_PREFIX + r'getgrgid\([^)]+\)'),
+ ('getgrnam(', 'getgrnam_r(', _UNSAFE_FUNC_PREFIX + r'getgrnam\([^)]+\)'),
+ ('getlogin(', 'getlogin_r(', _UNSAFE_FUNC_PREFIX + r'getlogin\(\)'),
+ ('getpwnam(', 'getpwnam_r(', _UNSAFE_FUNC_PREFIX + r'getpwnam\([^)]+\)'),
+ ('getpwuid(', 'getpwuid_r(', _UNSAFE_FUNC_PREFIX + r'getpwuid\([^)]+\)'),
+ ('gmtime(', 'gmtime_r(', _UNSAFE_FUNC_PREFIX + r'gmtime\([^)]+\)'),
+ ('localtime(', 'localtime_r(', _UNSAFE_FUNC_PREFIX + r'localtime\([^)]+\)'),
+ ('rand(', 'rand_r(', _UNSAFE_FUNC_PREFIX + r'rand\(\)'),
+ ('strtok(', 'strtok_r(',
+ _UNSAFE_FUNC_PREFIX + r'strtok\([^)]+\)'),
+ ('ttyname(', 'ttyname_r(', _UNSAFE_FUNC_PREFIX + r'ttyname\([^)]+\)'),
+ )
+
+
+def CheckPosixThreading(filename, clean_lines, linenum, error):
+ """Checks for calls to thread-unsafe functions.
+
+ Much code has been originally written without consideration of
+ multi-threading. Also, engineers are relying on their old experience;
+ they have learned posix before threading extensions were added. These
+ tests guide the engineers to use thread-safe functions (when using
+ posix directly).
+
+ Args:
+ filename: The name of the current file.
+ clean_lines: A CleansedLines instance containing the file.
+ linenum: The number of the line to check.
+ error: The function to call with any errors found.
+ """
+ line = clean_lines.elided[linenum]
+ for single_thread_func, multithread_safe_func, pattern in _THREADING_LIST:
+ # Additional pattern matching check to confirm that this is the
+ # function we are looking for
+ if Search(pattern, line):
+ error(filename, linenum, 'runtime/threadsafe_fn', 2,
+ 'Consider using ' + multithread_safe_func +
+ '...) instead of ' + single_thread_func +
+ '...) for improved thread safety.')
+
+
+def CheckVlogArguments(filename, clean_lines, linenum, error):
+ """Checks that VLOG() is only used for defining a logging level.
+
+ For example, VLOG(2) is correct. VLOG(INFO), VLOG(WARNING), VLOG(ERROR), and
+ VLOG(FATAL) are not.
+
+ Args:
+ filename: The name of the current file.
+ clean_lines: A CleansedLines instance containing the file.
+ linenum: The number of the line to check.
+ error: The function to call with any errors found.
+ """
+ line = clean_lines.elided[linenum]
+ if Search(r'\bVLOG\((INFO|ERROR|WARNING|DFATAL|FATAL)\)', line):
+ error(filename, linenum, 'runtime/vlog', 5,
+ 'VLOG() should be used with numeric verbosity level. '
+ 'Use LOG() if you want symbolic severity levels.')
+
+# Matches invalid increment: *count++, which moves pointer instead of
+# incrementing a value.
+_RE_PATTERN_INVALID_INCREMENT = re.compile(
+ r'^\s*\*\w+(\+\+|--);')
+
+
+def CheckInvalidIncrement(filename, clean_lines, linenum, error):
+ """Checks for invalid increment *count++.
+
+ For example following function:
+ void increment_counter(int* count) {
+ *count++;
+ }
+ is invalid, because it effectively does count++, moving pointer, and should
+ be replaced with ++*count, (*count)++ or *count += 1.
+
+ Args:
+ filename: The name of the current file.
+ clean_lines: A CleansedLines instance containing the file.
+ linenum: The number of the line to check.
+ error: The function to call with any errors found.
+ """
+ line = clean_lines.elided[linenum]
+ if _RE_PATTERN_INVALID_INCREMENT.match(line):
+ error(filename, linenum, 'runtime/invalid_increment', 5,
+ 'Changing pointer instead of value (or unused value of operator*).')
+
+
+def IsMacroDefinition(clean_lines, linenum):
+ if Search(r'^#define', clean_lines[linenum]):
+ return True
+
+ if linenum > 0 and Search(r'\\$', clean_lines[linenum - 1]):
+ return True
+
+ return False
+
+
+def IsForwardClassDeclaration(clean_lines, linenum):
+ return Match(r'^\s*(\btemplate\b)*.*class\s+\w+;\s*$', clean_lines[linenum])
+
+
+class _BlockInfo(object):
+ """Stores information about a generic block of code."""
+
+ def __init__(self, seen_open_brace):
+ self.seen_open_brace = seen_open_brace
+ self.open_parentheses = 0
+ self.inline_asm = _NO_ASM
+ self.check_namespace_indentation = False
+
+ def CheckBegin(self, filename, clean_lines, linenum, error):
+ """Run checks that applies to text up to the opening brace.
+
+ This is mostly for checking the text after the class identifier
+ and the "{", usually where the base class is specified. For other
+ blocks, there isn't much to check, so we always pass.
+
+ Args:
+ filename: The name of the current file.
+ clean_lines: A CleansedLines instance containing the file.
+ linenum: The number of the line to check.
+ error: The function to call with any errors found.
+ """
+ pass
+
+ def CheckEnd(self, filename, clean_lines, linenum, error):
+ """Run checks that applies to text after the closing brace.
+
+ This is mostly used for checking end of namespace comments.
+
+ Args:
+ filename: The name of the current file.
+ clean_lines: A CleansedLines instance containing the file.
+ linenum: The number of the line to check.
+ error: The function to call with any errors found.
+ """
+ pass
+
+ def IsBlockInfo(self):
+ """Returns true if this block is a _BlockInfo.
+
+ This is convenient for verifying that an object is an instance of
+ a _BlockInfo, but not an instance of any of the derived classes.
+
+ Returns:
+ True for this class, False for derived classes.
+ """
+ return self.__class__ == _BlockInfo
+
+
+class _ExternCInfo(_BlockInfo):
+ """Stores information about an 'extern "C"' block."""
+
+ def __init__(self):
+ _BlockInfo.__init__(self, True)
+
+
+class _ClassInfo(_BlockInfo):
+ """Stores information about a class."""
+
+ def __init__(self, name, class_or_struct, clean_lines, linenum):
+ _BlockInfo.__init__(self, False)
+ self.name = name
+ self.starting_linenum = linenum
+ self.is_derived = False
+ self.check_namespace_indentation = True
+ if class_or_struct == 'struct':
+ self.access = 'public'
+ self.is_struct = True
+ else:
+ self.access = 'private'
+ self.is_struct = False
+
+ # Remember initial indentation level for this class. Using raw_lines here
+ # instead of elided to account for leading comments.
+ self.class_indent = GetIndentLevel(clean_lines.raw_lines[linenum])
+
+ # Try to find the end of the class. This will be confused by things like:
+ # class A {
+ # } *x = { ...
+ #
+ # But it's still good enough for CheckSectionSpacing.
+ self.last_line = 0
+ depth = 0
+ for i in range(linenum, clean_lines.NumLines()):
+ line = clean_lines.elided[i]
+ depth += line.count('{') - line.count('}')
+ if not depth:
+ self.last_line = i
+ break
+
+ def CheckBegin(self, filename, clean_lines, linenum, error):
+ # Look for a bare ':'
+ if Search('(^|[^:]):($|[^:])', clean_lines.elided[linenum]):
+ self.is_derived = True
+
+ def CheckEnd(self, filename, clean_lines, linenum, error):
+ # If there is a DISALLOW macro, it should appear near the end of
+ # the class.
+ seen_last_thing_in_class = False
+ for i in xrange(linenum - 1, self.starting_linenum, -1):
+ match = Search(
+ r'\b(DISALLOW_COPY_AND_ASSIGN|DISALLOW_IMPLICIT_CONSTRUCTORS)\(' +
+ self.name + r'\)',
+ clean_lines.elided[i])
+ if match:
+ if seen_last_thing_in_class:
+ error(filename, i, 'readability/constructors', 3,
+ match.group(1) + ' should be the last thing in the class')
+ break
+
+ if not Match(r'^\s*$', clean_lines.elided[i]):
+ seen_last_thing_in_class = True
+
+ # Check that closing brace is aligned with beginning of the class.
+ # Only do this if the closing brace is indented by only whitespaces.
+ # This means we will not check single-line class definitions.
+ indent = Match(r'^( *)\}', clean_lines.elided[linenum])
+ if indent and len(indent.group(1)) != self.class_indent:
+ if self.is_struct:
+ parent = 'struct ' + self.name
+ else:
+ parent = 'class ' + self.name
+ error(filename, linenum, 'whitespace/indent', 3,
+ 'Closing brace should be aligned with beginning of %s' % parent)
+
+
+class _NamespaceInfo(_BlockInfo):
+ """Stores information about a namespace."""
+
+ def __init__(self, name, linenum):
+ _BlockInfo.__init__(self, False)
+ self.name = name or ''
+ self.starting_linenum = linenum
+ self.check_namespace_indentation = True
+
+ def CheckEnd(self, filename, clean_lines, linenum, error):
+ """Check end of namespace comments."""
+ line = clean_lines.raw_lines[linenum]
+
+ # Check how many lines is enclosed in this namespace. Don't issue
+ # warning for missing namespace comments if there aren't enough
+ # lines. However, do apply checks if there is already an end of
+ # namespace comment and it's incorrect.
+ #
+ # TODO(unknown): We always want to check end of namespace comments
+ # if a namespace is large, but sometimes we also want to apply the
+ # check if a short namespace contained nontrivial things (something
+ # other than forward declarations). There is currently no logic on
+ # deciding what these nontrivial things are, so this check is
+ # triggered by namespace size only, which works most of the time.
+ if (linenum - self.starting_linenum < 10
+ and not Match(r'};*\s*(//|/\*).*\bnamespace\b', line)):
+ return
+
+ # Look for matching comment at end of namespace.
+ #
+ # Note that we accept C style "/* */" comments for terminating
+ # namespaces, so that code that terminate namespaces inside
+ # preprocessor macros can be cpplint clean.
+ #
+ # We also accept stuff like "// end of namespace <name>." with the
+ # period at the end.
+ #
+ # Besides these, we don't accept anything else, otherwise we might
+ # get false negatives when existing comment is a substring of the
+ # expected namespace.
+ if self.name:
+ # Named namespace
+ if not Match((r'};*\s*(//|/\*).*\bnamespace\s+' + re.escape(self.name) +
+ r'[\*/\.\\\s]*$'),
+ line):
+ error(filename, linenum, 'readability/namespace', 5,
+ 'Namespace should be terminated with "// namespace %s"' %
+ self.name)
+ else:
+ # Anonymous namespace
+ if not Match(r'};*\s*(//|/\*).*\bnamespace[\*/\.\\\s]*$', line):
+ # If "// namespace anonymous" or "// anonymous namespace (more text)",
+ # mention "// anonymous namespace" as an acceptable form
+ if Match(r'}.*\b(namespace anonymous|anonymous namespace)\b', line):
+ error(filename, linenum, 'readability/namespace', 5,
+ 'Anonymous namespace should be terminated with "// namespace"'
+ ' or "// anonymous namespace"')
+ else:
+ error(filename, linenum, 'readability/namespace', 5,
+ 'Anonymous namespace should be terminated with "// namespace"')
+
+
+class _PreprocessorInfo(object):
+ """Stores checkpoints of nesting stacks when #if/#else is seen."""
+
+ def __init__(self, stack_before_if):
+ # The entire nesting stack before #if
+ self.stack_before_if = stack_before_if
+
+ # The entire nesting stack up to #else
+ self.stack_before_else = []
+
+ # Whether we have already seen #else or #elif
+ self.seen_else = False
+
+
+class NestingState(object):
+ """Holds states related to parsing braces."""
+
+ def __init__(self):
+ # Stack for tracking all braces. An object is pushed whenever we
+ # see a "{", and popped when we see a "}". Only 3 types of
+ # objects are possible:
+ # - _ClassInfo: a class or struct.
+ # - _NamespaceInfo: a namespace.
+ # - _BlockInfo: some other type of block.
+ self.stack = []
+
+ # Top of the previous stack before each Update().
+ #
+ # Because the nesting_stack is updated at the end of each line, we
+ # had to do some convoluted checks to find out what is the current
+ # scope at the beginning of the line. This check is simplified by
+ # saving the previous top of nesting stack.
+ #
+ # We could save the full stack, but we only need the top. Copying
+ # the full nesting stack would slow down cpplint by ~10%.
+ self.previous_stack_top = []
+
+ # Stack of _PreprocessorInfo objects.
+ self.pp_stack = []
+
+ def SeenOpenBrace(self):
+ """Check if we have seen the opening brace for the innermost block.
+
+ Returns:
+ True if we have seen the opening brace, False if the innermost
+ block is still expecting an opening brace.
+ """
+ return (not self.stack) or self.stack[-1].seen_open_brace
+
+ def InNamespaceBody(self):
+ """Check if we are currently one level inside a namespace body.
+
+ Returns:
+ True if top of the stack is a namespace block, False otherwise.
+ """
+ return self.stack and isinstance(self.stack[-1], _NamespaceInfo)
+
+ def InExternC(self):
+ """Check if we are currently one level inside an 'extern "C"' block.
+
+ Returns:
+ True if top of the stack is an extern block, False otherwise.
+ """
+ return self.stack and isinstance(self.stack[-1], _ExternCInfo)
+
+ def InClassDeclaration(self):
+ """Check if we are currently one level inside a class or struct declaration.
+
+ Returns:
+ True if top of the stack is a class/struct, False otherwise.
+ """
+ return self.stack and isinstance(self.stack[-1], _ClassInfo)
+
+ def InAsmBlock(self):
+ """Check if we are currently one level inside an inline ASM block.
+
+ Returns:
+ True if the top of the stack is a block containing inline ASM.
+ """
+ return self.stack and self.stack[-1].inline_asm != _NO_ASM
+
+ def InTemplateArgumentList(self, clean_lines, linenum, pos):
+ """Check if current position is inside template argument list.
+
+ Args:
+ clean_lines: A CleansedLines instance containing the file.
+ linenum: The number of the line to check.
+ pos: position just after the suspected template argument.
+ Returns:
+ True if (linenum, pos) is inside template arguments.
+ """
+ while linenum < clean_lines.NumLines():
+ # Find the earliest character that might indicate a template argument
+ line = clean_lines.elided[linenum]
+ match = Match(r'^[^{};=\[\]\.<>]*(.)', line[pos:])
+ if not match:
+ linenum += 1
+ pos = 0
+ continue
+ token = match.group(1)
+ pos += len(match.group(0))
+
+ # These things do not look like template argument list:
+ # class Suspect {
+ # class Suspect x; }
+ if token in ('{', '}', ';'): return False
+
+ # These things look like template argument list:
+ # template <class Suspect>
+ # template <class Suspect = default_value>
+ # template <class Suspect[]>
+ # template <class Suspect...>
+ if token in ('>', '=', '[', ']', '.'): return True
+
+ # Check if token is an unmatched '<'.
+ # If not, move on to the next character.
+ if token != '<':
+ pos += 1
+ if pos >= len(line):
+ linenum += 1
+ pos = 0
+ continue
+
+ # We can't be sure if we just find a single '<', and need to
+ # find the matching '>'.
+ (_, end_line, end_pos) = CloseExpression(clean_lines, linenum, pos - 1)
+ if end_pos < 0:
+ # Not sure if template argument list or syntax error in file
+ return False
+ linenum = end_line
+ pos = end_pos
+ return False
+
+ def UpdatePreprocessor(self, line):
+ """Update preprocessor stack.
+
+ We need to handle preprocessors due to classes like this:
+ #ifdef SWIG
+ struct ResultDetailsPageElementExtensionPoint {
+ #else
+ struct ResultDetailsPageElementExtensionPoint : public Extension {
+ #endif
+
+ We make the following assumptions (good enough for most files):
+ - Preprocessor condition evaluates to true from #if up to first
+ #else/#elif/#endif.
+
+ - Preprocessor condition evaluates to false from #else/#elif up
+ to #endif. We still perform lint checks on these lines, but
+ these do not affect nesting stack.
+
+ Args:
+ line: current line to check.
+ """
+ if Match(r'^\s*#\s*(if|ifdef|ifndef)\b', line):
+ # Beginning of #if block, save the nesting stack here. The saved
+ # stack will allow us to restore the parsing state in the #else case.
+ self.pp_stack.append(_PreprocessorInfo(copy.deepcopy(self.stack)))
+ elif Match(r'^\s*#\s*(else|elif)\b', line):
+ # Beginning of #else block
+ if self.pp_stack:
+ if not self.pp_stack[-1].seen_else:
+ # This is the first #else or #elif block. Remember the
+ # whole nesting stack up to this point. This is what we
+ # keep after the #endif.
+ self.pp_stack[-1].seen_else = True
+ self.pp_stack[-1].stack_before_else = copy.deepcopy(self.stack)
+
+ # Restore the stack to how it was before the #if
+ self.stack = copy.deepcopy(self.pp_stack[-1].stack_before_if)
+ else:
+ # TODO(unknown): unexpected #else, issue warning?
+ pass
+ elif Match(r'^\s*#\s*endif\b', line):
+ # End of #if or #else blocks.
+ if self.pp_stack:
+ # If we saw an #else, we will need to restore the nesting
+ # stack to its former state before the #else, otherwise we
+ # will just continue from where we left off.
+ if self.pp_stack[-1].seen_else:
+ # Here we can just use a shallow copy since we are the last
+ # reference to it.
+ self.stack = self.pp_stack[-1].stack_before_else
+ # Drop the corresponding #if
+ self.pp_stack.pop()
+ else:
+ # TODO(unknown): unexpected #endif, issue warning?
+ pass
+
+ # TODO(unknown): Update() is too long, but we will refactor later.
+ def Update(self, filename, clean_lines, linenum, error):
+ """Update nesting state with current line.
+
+ Args:
+ filename: The name of the current file.
+ clean_lines: A CleansedLines instance containing the file.
+ linenum: The number of the line to check.
+ error: The function to call with any errors found.
+ """
+ line = clean_lines.elided[linenum]
+
+ # Remember top of the previous nesting stack.
+ #
+ # The stack is always pushed/popped and not modified in place, so
+ # we can just do a shallow copy instead of copy.deepcopy. Using
+ # deepcopy would slow down cpplint by ~28%.
+ if self.stack:
+ self.previous_stack_top = self.stack[-1]
+ else:
+ self.previous_stack_top = None
+
+ # Update pp_stack
+ self.UpdatePreprocessor(line)
+
+ # Count parentheses. This is to avoid adding struct arguments to
+ # the nesting stack.
+ if self.stack:
+ inner_block = self.stack[-1]
+ depth_change = line.count('(') - line.count(')')
+ inner_block.open_parentheses += depth_change
+
+ # Also check if we are starting or ending an inline assembly block.
+ if inner_block.inline_asm in (_NO_ASM, _END_ASM):
+ if (depth_change != 0 and
+ inner_block.open_parentheses == 1 and
+ _MATCH_ASM.match(line)):
+ # Enter assembly block
+ inner_block.inline_asm = _INSIDE_ASM
+ else:
+ # Not entering assembly block. If previous line was _END_ASM,
+ # we will now shift to _NO_ASM state.
+ inner_block.inline_asm = _NO_ASM
+ elif (inner_block.inline_asm == _INSIDE_ASM and
+ inner_block.open_parentheses == 0):
+ # Exit assembly block
+ inner_block.inline_asm = _END_ASM
+
+ # Consume namespace declaration at the beginning of the line. Do
+ # this in a loop so that we catch same line declarations like this:
+ # namespace proto2 { namespace bridge { class MessageSet; } }
+ while True:
+ # Match start of namespace. The "\b\s*" below catches namespace
+ # declarations even if it weren't followed by a whitespace, this
+ # is so that we don't confuse our namespace checker. The
+ # missing spaces will be flagged by CheckSpacing.
+ namespace_decl_match = Match(r'^\s*namespace\b\s*([:\w]+)?(.*)$', line)
+ if not namespace_decl_match:
+ break
+
+ new_namespace = _NamespaceInfo(namespace_decl_match.group(1), linenum)
+ self.stack.append(new_namespace)
+
+ line = namespace_decl_match.group(2)
+ if line.find('{') != -1:
+ new_namespace.seen_open_brace = True
+ line = line[line.find('{') + 1:]
+
+ # Look for a class declaration in whatever is left of the line
+ # after parsing namespaces. The regexp accounts for decorated classes
+ # such as in:
+ # class LOCKABLE API Object {
+ # };
+ class_decl_match = Match(
+ r'^(\s*(?:template\s*<[\w\s<>,:]*>\s*)?'
+ r'(class|struct)\s+(?:[A-Z_]+\s+)*(\w+(?:::\w+)*))'
+ r'(.*)$', line)
+ if (class_decl_match and
+ (not self.stack or self.stack[-1].open_parentheses == 0)):
+ # We do not want to accept classes that are actually template arguments:
+ # template <class Ignore1,
+ # class Ignore2 = Default<Args>,
+ # template <Args> class Ignore3>
+ # void Function() {};
+ #
+ # To avoid template argument cases, we scan forward and look for
+ # an unmatched '>'. If we see one, assume we are inside a
+ # template argument list.
+ end_declaration = len(class_decl_match.group(1))
+ if not self.InTemplateArgumentList(clean_lines, linenum, end_declaration):
+ self.stack.append(_ClassInfo(
+ class_decl_match.group(3), class_decl_match.group(2),
+ clean_lines, linenum))
+ line = class_decl_match.group(4)
+
+ # If we have not yet seen the opening brace for the innermost block,
+ # run checks here.
+ if not self.SeenOpenBrace():
+ self.stack[-1].CheckBegin(filename, clean_lines, linenum, error)
+
+ # Update access control if we are inside a class/struct
+ if self.stack and isinstance(self.stack[-1], _ClassInfo):
+ classinfo = self.stack[-1]
+ access_match = Match(
+ r'^(.*)\b(public|private|protected|signals)(\s+(?:slots\s*)?)?'
+ r':(?:[^:]|$)',
+ line)
+ if access_match:
+ classinfo.access = access_match.group(2)
+
+ # Check that access keywords are indented +1 space. Skip this
+ # check if the keywords are not preceded by whitespaces.
+ indent = access_match.group(1)
+ if (len(indent) != classinfo.class_indent + 1 and
+ Match(r'^\s*$', in
<TRUNCATED>
[3/7] parquet-cpp git commit: PARQUET-416: C++11 compilation,
code reorg, libparquet and installation targets
Posted by no...@apache.org.
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/impala/rle-encoding.h
----------------------------------------------------------------------
diff --git a/src/impala/rle-encoding.h b/src/impala/rle-encoding.h
deleted file mode 100644
index 759f917..0000000
--- a/src/impala/rle-encoding.h
+++ /dev/null
@@ -1,417 +0,0 @@
-// Copyright 2012 Cloudera Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef IMPALA_RLE_ENCODING_H
-#define IMPALA_RLE_ENCODING_H
-
-#include <math.h>
-
-#include "impala/compiler-util.h"
-#include "impala/bit-stream-utils.inline.h"
-#include "impala/bit-util.h"
-#include "impala/logging.h"
-
-namespace impala {
-
-// Utility classes to do run length encoding (RLE) for fixed bit width values. If runs
-// are sufficiently long, RLE is used, otherwise, the values are just bit-packed
-// (literal encoding).
-// For both types of runs, there is a byte-aligned indicator which encodes the length
-// of the run and the type of the run.
-// This encoding has the benefit that when there aren't any long enough runs, values
-// are always decoded at fixed (can be precomputed) bit offsets OR both the value and
-// the run length are byte aligned. This allows for very efficient decoding
-// implementations.
-// The encoding is:
-// encoded-block := run*
-// run := literal-run | repeated-run
-// literal-run := literal-indicator < literal bytes >
-// repeated-run := repeated-indicator < repeated value. padded to byte boundary >
-// literal-indicator := varint_encode( number_of_groups << 1 | 1)
-// repeated-indicator := varint_encode( number_of_repetitions << 1 )
-//
-// Each run is preceded by a varint. The varint's least significant bit is
-// used to indicate whether the run is a literal run or a repeated run. The rest
-// of the varint is used to determine the length of the run (eg how many times the
-// value repeats).
-//
-// In the case of literal runs, the run length is always a multiple of 8 (i.e. encode
-// in groups of 8), so that no matter the bit-width of the value, the sequence will end
-// on a byte boundary without padding.
-// Given that we know it is a multiple of 8, we store the number of 8-groups rather than
-// the actual number of encoded ints. (This means that the total number of encoded values
-// can not be determined from the encoded data, since the number of values in the last
-// group may not be a multiple of 8). For the last group of literal runs, we pad
-// the group to 8 with zeros. This allows for 8 at a time decoding on the read side
-// without the need for additional checks.
-//
-// There is a break-even point when it is more storage efficient to do run length
-// encoding. For 1 bit-width values, that point is 8 values. They require 2 bytes
-// for both the repeated encoding or the literal encoding. This value can always
-// be computed based on the bit-width.
-// TODO: think about how to use this for strings. The bit packing isn't quite the same.
-//
-// Examples with bit-width 1 (eg encoding booleans):
-// ----------------------------------------
-// 100 1s followed by 100 0s:
-// <varint(100 << 1)> <1, padded to 1 byte> <varint(100 << 1)> <0, padded to 1 byte>
-// - (total 4 bytes)
-//
-// alternating 1s and 0s (200 total):
-// 200 ints = 25 groups of 8
-// <varint((25 << 1) | 1)> <25 bytes of values, bitpacked>
-// (total 26 bytes, 1 byte overhead)
-//
-
-// Decoder class for RLE encoded data.
-class RleDecoder {
- public:
- // Create a decoder object. buffer/buffer_len is the decoded data.
- // bit_width is the width of each value (before encoding).
- RleDecoder(const uint8_t* buffer, int buffer_len, int bit_width)
- : bit_reader_(buffer, buffer_len),
- bit_width_(bit_width),
- current_value_(0),
- repeat_count_(0),
- literal_count_(0) {
- DCHECK_GE(bit_width_, 0);
- DCHECK_LE(bit_width_, 64);
- }
-
- RleDecoder() {}
-
- // Gets the next value. Returns false if there are no more.
- template<typename T>
- bool Get(T* val);
-
- private:
- BitReader bit_reader_;
- int bit_width_;
- uint64_t current_value_;
- uint32_t repeat_count_;
- uint32_t literal_count_;
-};
-
-// Class to incrementally build the rle data. This class does not allocate any memory.
-// The encoding has two modes: encoding repeated runs and literal runs.
-// If the run is sufficiently short, it is more efficient to encode as a literal run.
-// This class does so by buffering 8 values at a time. If they are not all the same
-// they are added to the literal run. If they are the same, they are added to the
-// repeated run. When we switch modes, the previous run is flushed out.
-class RleEncoder {
- public:
- // buffer/buffer_len: preallocated output buffer.
- // bit_width: max number of bits for value.
- // TODO: consider adding a min_repeated_run_length so the caller can control
- // when values should be encoded as repeated runs. Currently this is derived
- // based on the bit_width, which can determine a storage optimal choice.
- // TODO: allow 0 bit_width (and have dict encoder use it)
- RleEncoder(uint8_t* buffer, int buffer_len, int bit_width)
- : bit_width_(bit_width),
- bit_writer_(buffer, buffer_len) {
- DCHECK_GE(bit_width_, 1);
- DCHECK_LE(bit_width_, 64);
- max_run_byte_size_ = MinBufferSize(bit_width);
- DCHECK_GE(buffer_len, max_run_byte_size_) << "Input buffer not big enough.";
- Clear();
- }
-
- // Returns the minimum buffer size needed to use the encoder for 'bit_width'
- // This is the maximum length of a single run for 'bit_width'.
- // It is not valid to pass a buffer less than this length.
- static int MinBufferSize(int bit_width) {
- // 1 indicator byte and MAX_VALUES_PER_LITERAL_RUN 'bit_width' values.
- int max_literal_run_size = 1 +
- BitUtil::Ceil(MAX_VALUES_PER_LITERAL_RUN * bit_width, 8);
- // Up to MAX_VLQ_BYTE_LEN indicator and a single 'bit_width' value.
- int max_repeated_run_size = BitReader::MAX_VLQ_BYTE_LEN + BitUtil::Ceil(bit_width, 8);
- return std::max(max_literal_run_size, max_repeated_run_size);
- }
-
- // Returns the maximum byte size it could take to encode 'num_values'.
- static int MaxBufferSize(int bit_width, int num_values) {
- int bytes_per_run = BitUtil::Ceil(bit_width * MAX_VALUES_PER_LITERAL_RUN, 8.0);
- int num_runs = BitUtil::Ceil(num_values, MAX_VALUES_PER_LITERAL_RUN);
- int literal_max_size = num_runs + num_runs * bytes_per_run;
- int min_run_size = MinBufferSize(bit_width);
- return std::max(min_run_size, literal_max_size) + min_run_size;
- }
-
- // Encode value. Returns true if the value fits in buffer, false otherwise.
- // This value must be representable with bit_width_ bits.
- bool Put(uint64_t value);
-
- // Flushes any pending values to the underlying buffer.
- // Returns the total number of bytes written
- int Flush();
-
- // Resets all the state in the encoder.
- void Clear();
-
- // Returns pointer to underlying buffer
- uint8_t* buffer() { return bit_writer_.buffer(); }
- int32_t len() { return bit_writer_.bytes_written(); }
-
- private:
- // Flushes any buffered values. If this is part of a repeated run, this is largely
- // a no-op.
- // If it is part of a literal run, this will call FlushLiteralRun, which writes
- // out the buffered literal values.
- // If 'done' is true, the current run would be written even if it would normally
- // have been buffered more. This should only be called at the end, when the
- // encoder has received all values even if it would normally continue to be
- // buffered.
- void FlushBufferedValues(bool done);
-
- // Flushes literal values to the underlying buffer. If update_indicator_byte,
- // then the current literal run is complete and the indicator byte is updated.
- void FlushLiteralRun(bool update_indicator_byte);
-
- // Flushes a repeated run to the underlying buffer.
- void FlushRepeatedRun();
-
- // Checks and sets buffer_full_. This must be called after flushing a run to
- // make sure there are enough bytes remaining to encode the next run.
- void CheckBufferFull();
-
- // The maximum number of values in a single literal run
- // (number of groups encodable by a 1-byte indicator * 8)
- static const int MAX_VALUES_PER_LITERAL_RUN = (1 << 6) * 8;
-
- // Number of bits needed to encode the value.
- const int bit_width_;
-
- // Underlying buffer.
- BitWriter bit_writer_;
-
- // If true, the buffer is full and subsequent Put()'s will fail.
- bool buffer_full_;
-
- // The maximum byte size a single run can take.
- int max_run_byte_size_;
-
- // We need to buffer at most 8 values for literals. This happens when the
- // bit_width is 1 (so 8 values fit in one byte).
- // TODO: generalize this to other bit widths
- int64_t buffered_values_[8];
-
- // Number of values in buffered_values_
- int num_buffered_values_;
-
- // The current (also last) value that was written and the count of how
- // many times in a row that value has been seen. This is maintained even
- // if we are in a literal run. If the repeat_count_ get high enough, we switch
- // to encoding repeated runs.
- int64_t current_value_;
- int repeat_count_;
-
- // Number of literals in the current run. This does not include the literals
- // that might be in buffered_values_. Only after we've got a group big enough
- // can we decide if they should part of the literal_count_ or repeat_count_
- int literal_count_;
-
- // Pointer to a byte in the underlying buffer that stores the indicator byte.
- // This is reserved as soon as we need a literal run but the value is written
- // when the literal run is complete.
- uint8_t* literal_indicator_byte_;
-};
-
-template<typename T>
-inline bool RleDecoder::Get(T* val) {
- if (UNLIKELY(literal_count_ == 0 && repeat_count_ == 0)) {
- // Read the next run's indicator int, it could be a literal or repeated run
- // The int is encoded as a vlq-encoded value.
- uint64_t indicator_value = 0;
- bool result = bit_reader_.GetVlqInt(&indicator_value);
- if (!result) return false;
-
- // lsb indicates if it is a literal run or repeated run
- bool is_literal = indicator_value & 1;
- if (is_literal) {
- literal_count_ = (indicator_value >> 1) * 8;
- } else {
- repeat_count_ = indicator_value >> 1;
- bool result = bit_reader_.GetAligned<T>(
- BitUtil::Ceil(bit_width_, 8), reinterpret_cast<T*>(¤t_value_));
- DCHECK(result);
- }
- }
-
- if (LIKELY(repeat_count_ > 0)) {
- *val = current_value_;
- --repeat_count_;
- } else {
- DCHECK(literal_count_ > 0);
- bool result = bit_reader_.GetValue(bit_width_, val);
- DCHECK(result);
- --literal_count_;
- }
-
- return true;
-}
-
-// This function buffers input values 8 at a time. After seeing all 8 values,
-// it decides whether they should be encoded as a literal or repeated run.
-inline bool RleEncoder::Put(uint64_t value) {
- DCHECK(bit_width_ == 64 || value < (1LL << bit_width_));
- if (UNLIKELY(buffer_full_)) return false;
-
- if (LIKELY(current_value_ == value)) {
- ++repeat_count_;
- if (repeat_count_ > 8) {
- // This is just a continuation of the current run, no need to buffer the
- // values.
- // Note that this is the fast path for long repeated runs.
- return true;
- }
- } else {
- if (repeat_count_ >= 8) {
- // We had a run that was long enough but it has ended. Flush the
- // current repeated run.
- DCHECK_EQ(literal_count_, 0);
- FlushRepeatedRun();
- }
- repeat_count_ = 1;
- current_value_ = value;
- }
-
- buffered_values_[num_buffered_values_] = value;
- if (++num_buffered_values_ == 8) {
- DCHECK_EQ(literal_count_ % 8, 0);
- FlushBufferedValues(false);
- }
- return true;
-}
-
-inline void RleEncoder::FlushLiteralRun(bool update_indicator_byte) {
- if (literal_indicator_byte_ == NULL) {
- // The literal indicator byte has not been reserved yet, get one now.
- literal_indicator_byte_ = bit_writer_.GetNextBytePtr();
- DCHECK(literal_indicator_byte_ != NULL);
- }
-
- // Write all the buffered values as bit packed literals
- for (int i = 0; i < num_buffered_values_; ++i) {
- bool success = bit_writer_.PutValue(buffered_values_[i], bit_width_);
- DCHECK(success) << "There is a bug in using CheckBufferFull()";
- }
- num_buffered_values_ = 0;
-
- if (update_indicator_byte) {
- // At this point we need to write the indicator byte for the literal run.
- // We only reserve one byte, to allow for streaming writes of literal values.
- // The logic makes sure we flush literal runs often enough to not overrun
- // the 1 byte.
- DCHECK_EQ(literal_count_ % 8, 0);
- int num_groups = literal_count_ / 8;
- int32_t indicator_value = (num_groups << 1) | 1;
- DCHECK_EQ(indicator_value & 0xFFFFFF00, 0);
- *literal_indicator_byte_ = indicator_value;
- literal_indicator_byte_ = NULL;
- literal_count_ = 0;
- CheckBufferFull();
- }
-}
-
-inline void RleEncoder::FlushRepeatedRun() {
- DCHECK_GT(repeat_count_, 0);
- bool result = true;
- // The lsb of 0 indicates this is a repeated run
- int32_t indicator_value = repeat_count_ << 1 | 0;
- result &= bit_writer_.PutVlqInt(indicator_value);
- result &= bit_writer_.PutAligned(current_value_, BitUtil::Ceil(bit_width_, 8));
- DCHECK(result);
- num_buffered_values_ = 0;
- repeat_count_ = 0;
- CheckBufferFull();
-}
-
-// Flush the values that have been buffered. At this point we decide whether
-// we need to switch between the run types or continue the current one.
-inline void RleEncoder::FlushBufferedValues(bool done) {
- if (repeat_count_ >= 8) {
- // Clear the buffered values. They are part of the repeated run now and we
- // don't want to flush them out as literals.
- num_buffered_values_ = 0;
- if (literal_count_ != 0) {
- // There was a current literal run. All the values in it have been flushed
- // but we still need to update the indicator byte.
- DCHECK_EQ(literal_count_ % 8, 0);
- DCHECK_EQ(repeat_count_, 8);
- FlushLiteralRun(true);
- }
- DCHECK_EQ(literal_count_, 0);
- return;
- }
-
- literal_count_ += num_buffered_values_;
- DCHECK_EQ(literal_count_ % 8, 0);
- int num_groups = literal_count_ / 8;
- if (num_groups + 1 >= (1 << 6)) {
- // We need to start a new literal run because the indicator byte we've reserved
- // cannot store more values.
- DCHECK(literal_indicator_byte_ != NULL);
- FlushLiteralRun(true);
- } else {
- FlushLiteralRun(done);
- }
- repeat_count_ = 0;
-}
-
-inline int RleEncoder::Flush() {
- if (literal_count_ > 0 || repeat_count_ > 0 || num_buffered_values_ > 0) {
- bool all_repeat = literal_count_ == 0 &&
- (repeat_count_ == num_buffered_values_ || num_buffered_values_ == 0);
- // There is something pending, figure out if it's a repeated or literal run
- if (repeat_count_ > 0 && all_repeat) {
- FlushRepeatedRun();
- } else {
- DCHECK_EQ(literal_count_ % 8, 0);
- // Buffer the last group of literals to 8 by padding with 0s.
- for (; num_buffered_values_ != 0 && num_buffered_values_ < 8;
- ++num_buffered_values_) {
- buffered_values_[num_buffered_values_] = 0;
- }
- literal_count_ += num_buffered_values_;
- FlushLiteralRun(true);
- repeat_count_ = 0;
- }
- }
- bit_writer_.Flush();
- DCHECK_EQ(num_buffered_values_, 0);
- DCHECK_EQ(literal_count_, 0);
- DCHECK_EQ(repeat_count_, 0);
-
- return bit_writer_.bytes_written();
-}
-
-inline void RleEncoder::CheckBufferFull() {
- int bytes_written = bit_writer_.bytes_written();
- if (bytes_written + max_run_byte_size_ > bit_writer_.buffer_len()) {
- buffer_full_ = true;
- }
-}
-
-inline void RleEncoder::Clear() {
- buffer_full_ = false;
- current_value_ = 0;
- repeat_count_ = 0;
- num_buffered_values_ = 0;
- literal_count_ = 0;
- literal_indicator_byte_ = NULL;
- bit_writer_.Clear();
-}
-
-}
-#endif
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/parquet.cc
----------------------------------------------------------------------
diff --git a/src/parquet.cc b/src/parquet.cc
index 6c939ae..f71d32b 100644
--- a/src/parquet.cc
+++ b/src/parquet.cc
@@ -13,9 +13,10 @@
// limitations under the License.
#include "parquet/parquet.h"
-#include "encodings/encodings.h"
-#include "compression/codec.h"
+#include "parquet/encodings/encodings.h"
+#include "parquet/compression/codec.h"
+#include <algorithm>
#include <string>
#include <string.h>
@@ -23,18 +24,21 @@
const int DATA_PAGE_SIZE = 64 * 1024;
-using namespace boost;
-using namespace parquet;
-using namespace std;
-
namespace parquet_cpp {
+using parquet::CompressionCodec;
+using parquet::Encoding;
+using parquet::FieldRepetitionType;
+using parquet::PageType;
+using parquet::SchemaElement;
+using parquet::Type;
+
InMemoryInputStream::InMemoryInputStream(const uint8_t* buffer, int64_t len) :
buffer_(buffer), len_(len), offset_(0) {
}
const uint8_t* InMemoryInputStream::Peek(int num_to_peek, int* num_bytes) {
- *num_bytes = ::min(static_cast<int64_t>(num_to_peek), len_ - offset_);
+ *num_bytes = std::min(static_cast<int64_t>(num_to_peek), len_ - offset_);
return buffer_ + offset_;
}
@@ -47,7 +51,7 @@ const uint8_t* InMemoryInputStream::Read(int num_to_read, int* num_bytes) {
ColumnReader::~ColumnReader() {
}
-ColumnReader::ColumnReader(const ColumnMetaData* metadata,
+ColumnReader::ColumnReader(const parquet::ColumnMetaData* metadata,
const SchemaElement* schema, InputStream* stream)
: metadata_(metadata),
schema_(schema),
@@ -96,7 +100,7 @@ ColumnReader::ColumnReader(const ColumnMetaData* metadata,
void ColumnReader::BatchDecode() {
buffered_values_offset_ = 0;
- uint8_t* buf= &values_buffer_[0];
+ uint8_t* buf = &values_buffer_[0];
int batch_size = config_.batch_size;
switch (metadata_->type) {
case parquet::Type::BOOLEAN:
@@ -164,7 +168,7 @@ bool ColumnReader::ReadNewPage() {
}
if (current_page_header_.type == PageType::DICTIONARY_PAGE) {
- boost::unordered_map<Encoding::type, boost::shared_ptr<Decoder> >::iterator it =
+ std::unordered_map<Encoding::type, std::shared_ptr<Decoder> >::iterator it =
decoders_.find(Encoding::RLE_DICTIONARY);
if (it != decoders_.end()) {
throw ParquetException("Column cannot have more than one dictionary.");
@@ -173,7 +177,7 @@ bool ColumnReader::ReadNewPage() {
PlainDecoder dictionary(schema_->type);
dictionary.SetData(current_page_header_.dictionary_page_header.num_values,
buffer, uncompressed_len);
- boost::shared_ptr<Decoder> decoder(
+ std::shared_ptr<Decoder> decoder(
new DictionaryDecoder(schema_->type, &dictionary));
decoders_[Encoding::RLE_DICTIONARY] = decoder;
current_decoder_ = decoders_[Encoding::RLE_DICTIONARY].get();
@@ -187,7 +191,7 @@ bool ColumnReader::ReadNewPage() {
int num_definition_bytes = *reinterpret_cast<const uint32_t*>(buffer);
buffer += sizeof(uint32_t);
definition_level_decoder_.reset(
- new impala::RleDecoder(buffer, num_definition_bytes, 1));
+ new RleDecoder(buffer, num_definition_bytes, 1));
buffer += num_definition_bytes;
uncompressed_len -= sizeof(uint32_t);
uncompressed_len -= num_definition_bytes;
@@ -200,14 +204,14 @@ bool ColumnReader::ReadNewPage() {
Encoding::type encoding = current_page_header_.data_page_header.encoding;
if (IsDictionaryIndexEncoding(encoding)) encoding = Encoding::RLE_DICTIONARY;
- boost::unordered_map<Encoding::type, boost::shared_ptr<Decoder> >::iterator it =
+ std::unordered_map<Encoding::type, std::shared_ptr<Decoder> >::iterator it =
decoders_.find(encoding);
if (it != decoders_.end()) {
current_decoder_ = it->second.get();
} else {
switch (encoding) {
case Encoding::PLAIN: {
- boost::shared_ptr<Decoder> decoder;
+ std::shared_ptr<Decoder> decoder;
if (schema_->type == Type::BOOLEAN) {
decoder.reset(new BoolDecoder());
} else {
@@ -239,5 +243,4 @@ bool ColumnReader::ReadNewPage() {
return true;
}
-}
-
+} // namespace parquet_cpp
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/parquet/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/src/parquet/CMakeLists.txt b/src/parquet/CMakeLists.txt
new file mode 100644
index 0000000..11eaeb6
--- /dev/null
+++ b/src/parquet/CMakeLists.txt
@@ -0,0 +1,18 @@
+# Copyright 2015 Cloudera Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Headers: top level
+install(FILES
+ parquet.h
+ DESTINATION include/parquet)
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/parquet/compression/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/src/parquet/compression/CMakeLists.txt b/src/parquet/compression/CMakeLists.txt
new file mode 100644
index 0000000..291ef03
--- /dev/null
+++ b/src/parquet/compression/CMakeLists.txt
@@ -0,0 +1,30 @@
+# Copyright 2012 Cloudera Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+add_library(parquet_compression STATIC
+ lz4-codec.cc
+ snappy-codec.cc
+)
+target_link_libraries(parquet_compression
+ lz4static
+ snappystatic)
+
+set_target_properties(parquet_compression
+ PROPERTIES
+ LIBRARY_OUTPUT_DIRECTORY "${BUILD_OUTPUT_ROOT_DIRECTORY}")
+
+# Headers: compression
+install(FILES
+ codec.h
+ DESTINATION include/parquet/compression)
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/parquet/compression/codec.h
----------------------------------------------------------------------
diff --git a/src/parquet/compression/codec.h b/src/parquet/compression/codec.h
new file mode 100644
index 0000000..8166847
--- /dev/null
+++ b/src/parquet/compression/codec.h
@@ -0,0 +1,71 @@
+// Copyright 2012 Cloudera Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef PARQUET_COMPRESSION_CODEC_H
+#define PARQUET_COMPRESSION_CODEC_H
+
+#include "parquet/parquet.h"
+
+#include <cstdint>
+#include "parquet/thrift/parquet_constants.h"
+#include "parquet/thrift/parquet_types.h"
+
+namespace parquet_cpp {
+
+class Codec {
+ public:
+ virtual ~Codec() {}
+ virtual void Decompress(int input_len, const uint8_t* input,
+ int output_len, uint8_t* output_buffer) = 0;
+
+ virtual int Compress(int input_len, const uint8_t* input,
+ int output_buffer_len, uint8_t* output_buffer) = 0;
+
+ virtual int MaxCompressedLen(int input_len, const uint8_t* input) = 0;
+
+ virtual const char* name() const = 0;
+};
+
+
+// Snappy codec.
+class SnappyCodec : public Codec {
+ public:
+ virtual void Decompress(int input_len, const uint8_t* input,
+ int output_len, uint8_t* output_buffer);
+
+ virtual int Compress(int input_len, const uint8_t* input,
+ int output_buffer_len, uint8_t* output_buffer);
+
+ virtual int MaxCompressedLen(int input_len, const uint8_t* input);
+
+ virtual const char* name() const { return "snappy"; }
+};
+
+// Lz4 codec.
+class Lz4Codec : public Codec {
+ public:
+ virtual void Decompress(int input_len, const uint8_t* input,
+ int output_len, uint8_t* output_buffer);
+
+ virtual int Compress(int input_len, const uint8_t* input,
+ int output_buffer_len, uint8_t* output_buffer);
+
+ virtual int MaxCompressedLen(int input_len, const uint8_t* input);
+
+ virtual const char* name() const { return "lz4"; }
+};
+
+} // namespace parquet_cpp
+
+#endif
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/parquet/compression/lz4-codec.cc
----------------------------------------------------------------------
diff --git a/src/parquet/compression/lz4-codec.cc b/src/parquet/compression/lz4-codec.cc
new file mode 100644
index 0000000..6655387
--- /dev/null
+++ b/src/parquet/compression/lz4-codec.cc
@@ -0,0 +1,40 @@
+// Copyright 2012 Cloudera Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "parquet/compression/codec.h"
+
+#include <lz4.h>
+
+namespace parquet_cpp {
+
+void Lz4Codec::Decompress(int input_len, const uint8_t* input,
+ int output_len, uint8_t* output_buffer) {
+ int n = LZ4_uncompress(reinterpret_cast<const char*>(input),
+ reinterpret_cast<char*>(output_buffer), output_len);
+ if (n != input_len) {
+ throw parquet_cpp::ParquetException("Corrupt lz4 compressed data.");
+ }
+}
+
+int Lz4Codec::MaxCompressedLen(int input_len, const uint8_t* input) {
+ return LZ4_compressBound(input_len);
+}
+
+int Lz4Codec::Compress(int input_len, const uint8_t* input,
+ int output_buffer_len, uint8_t* output_buffer) {
+ return LZ4_compress(reinterpret_cast<const char*>(input),
+ reinterpret_cast<char*>(output_buffer), input_len);
+}
+
+} // namespace parquet_cpp
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/parquet/compression/snappy-codec.cc
----------------------------------------------------------------------
diff --git a/src/parquet/compression/snappy-codec.cc b/src/parquet/compression/snappy-codec.cc
new file mode 100644
index 0000000..0633d47
--- /dev/null
+++ b/src/parquet/compression/snappy-codec.cc
@@ -0,0 +1,42 @@
+// Copyright 2012 Cloudera Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "parquet/compression/codec.h"
+
+#include <snappy.h>
+
+namespace parquet_cpp {
+
+void SnappyCodec::Decompress(int input_len, const uint8_t* input,
+ int output_len, uint8_t* output_buffer) {
+ if (!snappy::RawUncompress(reinterpret_cast<const char*>(input),
+ static_cast<size_t>(input_len), reinterpret_cast<char*>(output_buffer))) {
+ throw parquet_cpp::ParquetException("Corrupt snappy compressed data.");
+ }
+}
+
+int SnappyCodec::MaxCompressedLen(int input_len, const uint8_t* input) {
+ return snappy::MaxCompressedLength(input_len);
+}
+
+int SnappyCodec::Compress(int input_len, const uint8_t* input,
+ int output_buffer_len, uint8_t* output_buffer) {
+ size_t output_len;
+ snappy::RawCompress(reinterpret_cast<const char*>(input),
+ static_cast<size_t>(input_len), reinterpret_cast<char*>(output_buffer),
+ &output_len);
+ return output_len;
+}
+
+} // namespace parquet_cpp
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/parquet/encodings/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/src/parquet/encodings/CMakeLists.txt b/src/parquet/encodings/CMakeLists.txt
new file mode 100644
index 0000000..72baf48
--- /dev/null
+++ b/src/parquet/encodings/CMakeLists.txt
@@ -0,0 +1,24 @@
+# Copyright 2015 Cloudera Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Headers: encodings
+install(FILES
+ encodings.h
+ bool-encoding.h
+ delta-bit-pack-encoding.h
+ delta-byte-array-encoding.h
+ delta-length-byte-array-encoding.h
+ dictionary-encoding.h
+ plain-encoding.h
+ DESTINATION include/parquet/encodings)
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/parquet/encodings/bool-encoding.h
----------------------------------------------------------------------
diff --git a/src/parquet/encodings/bool-encoding.h b/src/parquet/encodings/bool-encoding.h
new file mode 100644
index 0000000..8eb55bc
--- /dev/null
+++ b/src/parquet/encodings/bool-encoding.h
@@ -0,0 +1,48 @@
+// Copyright 2012 Cloudera Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef PARQUET_BOOL_ENCODING_H
+#define PARQUET_BOOL_ENCODING_H
+
+#include "parquet/encodings/encodings.h"
+
+#include <algorithm>
+
+namespace parquet_cpp {
+
+class BoolDecoder : public Decoder {
+ public:
+ BoolDecoder() : Decoder(parquet::Type::BOOLEAN, parquet::Encoding::PLAIN) { }
+
+ virtual void SetData(int num_values, const uint8_t* data, int len) {
+ num_values_ = num_values;
+ decoder_ = RleDecoder(data, len, 1);
+ }
+
+ virtual int GetBool(bool* buffer, int max_values) {
+ max_values = std::min(max_values, num_values_);
+ for (int i = 0; i < max_values; ++i) {
+ if (!decoder_.Get(&buffer[i])) ParquetException::EofException();
+ }
+ num_values_ -= max_values;
+ return max_values;
+ }
+
+ private:
+ RleDecoder decoder_;
+};
+
+} // namespace parquet_cpp
+
+#endif
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/parquet/encodings/delta-bit-pack-encoding.h
----------------------------------------------------------------------
diff --git a/src/parquet/encodings/delta-bit-pack-encoding.h b/src/parquet/encodings/delta-bit-pack-encoding.h
new file mode 100644
index 0000000..77a3b26
--- /dev/null
+++ b/src/parquet/encodings/delta-bit-pack-encoding.h
@@ -0,0 +1,116 @@
+// Copyright 2012 Cloudera Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef PARQUET_DELTA_BIT_PACK_ENCODING_H
+#define PARQUET_DELTA_BIT_PACK_ENCODING_H
+
+#include "parquet/encodings/encodings.h"
+
+#include <algorithm>
+#include <vector>
+
+namespace parquet_cpp {
+
+class DeltaBitPackDecoder : public Decoder {
+ public:
+ explicit DeltaBitPackDecoder(const parquet::Type::type& type)
+ : Decoder(type, parquet::Encoding::DELTA_BINARY_PACKED) {
+ if (type != parquet::Type::INT32 && type != parquet::Type::INT64) {
+ throw ParquetException("Delta bit pack encoding should only be for integer data.");
+ }
+ }
+
+ virtual void SetData(int num_values, const uint8_t* data, int len) {
+ num_values_ = num_values;
+ decoder_ = BitReader(data, len);
+ values_current_block_ = 0;
+ values_current_mini_block_ = 0;
+ }
+
+ virtual int GetInt32(int32_t* buffer, int max_values) {
+ return GetInternal(buffer, max_values);
+ }
+
+ virtual int GetInt64(int64_t* buffer, int max_values) {
+ return GetInternal(buffer, max_values);
+ }
+
+ private:
+ void InitBlock() {
+ uint64_t block_size;
+ if (!decoder_.GetVlqInt(&block_size)) ParquetException::EofException();
+ if (!decoder_.GetVlqInt(&num_mini_blocks_)) ParquetException::EofException();
+ if (!decoder_.GetVlqInt(&values_current_block_)) {
+ ParquetException::EofException();
+ }
+ if (!decoder_.GetZigZagVlqInt(&last_value_)) ParquetException::EofException();
+ delta_bit_widths_.resize(num_mini_blocks_);
+
+ if (!decoder_.GetZigZagVlqInt(&min_delta_)) ParquetException::EofException();
+ for (int i = 0; i < num_mini_blocks_; ++i) {
+ if (!decoder_.GetAligned<uint8_t>(1, &delta_bit_widths_[i])) {
+ ParquetException::EofException();
+ }
+ }
+ values_per_mini_block_ = block_size / num_mini_blocks_;
+ mini_block_idx_ = 0;
+ delta_bit_width_ = delta_bit_widths_[0];
+ values_current_mini_block_ = values_per_mini_block_;
+ }
+
+ template <typename T>
+ int GetInternal(T* buffer, int max_values) {
+ max_values = std::min(max_values, num_values_);
+ for (int i = 0; i < max_values; ++i) {
+ if (UNLIKELY(values_current_mini_block_ == 0)) {
+ ++mini_block_idx_;
+ if (mini_block_idx_ < delta_bit_widths_.size()) {
+ delta_bit_width_ = delta_bit_widths_[mini_block_idx_];
+ values_current_mini_block_ = values_per_mini_block_;
+ } else {
+ InitBlock();
+ buffer[i] = last_value_;
+ continue;
+ }
+ }
+
+ // TODO: the key to this algorithm is to decode the entire miniblock at once.
+ int64_t delta;
+ if (!decoder_.GetValue(delta_bit_width_, &delta)) ParquetException::EofException();
+ delta += min_delta_;
+ last_value_ += delta;
+ buffer[i] = last_value_;
+ --values_current_mini_block_;
+ }
+ num_values_ -= max_values;
+ return max_values;
+ }
+
+ BitReader decoder_;
+ uint64_t values_current_block_;
+ uint64_t num_mini_blocks_;
+ uint64_t values_per_mini_block_;
+ uint64_t values_current_mini_block_;
+
+ int64_t min_delta_;
+ int mini_block_idx_;
+ std::vector<uint8_t> delta_bit_widths_;
+ int delta_bit_width_;
+
+ int64_t last_value_;
+};
+
+} // namespace parquet_cpp
+
+#endif
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/parquet/encodings/delta-byte-array-encoding.h
----------------------------------------------------------------------
diff --git a/src/parquet/encodings/delta-byte-array-encoding.h b/src/parquet/encodings/delta-byte-array-encoding.h
new file mode 100644
index 0000000..3396586
--- /dev/null
+++ b/src/parquet/encodings/delta-byte-array-encoding.h
@@ -0,0 +1,74 @@
+// Copyright 2012 Cloudera Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef PARQUET_DELTA_BYTE_ARRAY_ENCODING_H
+#define PARQUET_DELTA_BYTE_ARRAY_ENCODING_H
+
+#include "parquet/encodings/encodings.h"
+
+#include <algorithm>
+
+namespace parquet_cpp {
+
+class DeltaByteArrayDecoder : public Decoder {
+ public:
+ DeltaByteArrayDecoder()
+ : Decoder(parquet::Type::BYTE_ARRAY, parquet::Encoding::DELTA_BYTE_ARRAY),
+ prefix_len_decoder_(parquet::Type::INT32),
+ suffix_decoder_() {
+ }
+
+ virtual void SetData(int num_values, const uint8_t* data, int len) {
+ num_values_ = num_values;
+ if (len == 0) return;
+ int prefix_len_length = *reinterpret_cast<const int*>(data);
+ data += 4;
+ len -= 4;
+ prefix_len_decoder_.SetData(num_values, data, prefix_len_length);
+ data += prefix_len_length;
+ len -= prefix_len_length;
+ suffix_decoder_.SetData(num_values, data, len);
+ }
+
+ // TODO: this doesn't work and requires memory management. We need to allocate
+ // new strings to store the results.
+ virtual int GetByteArray(ByteArray* buffer, int max_values) {
+ max_values = std::min(max_values, num_values_);
+ for (int i = 0; i < max_values; ++i) {
+ int prefix_len = 0;
+ prefix_len_decoder_.GetInt32(&prefix_len, 1);
+ ByteArray suffix;
+ suffix_decoder_.GetByteArray(&suffix, 1);
+ buffer[i].len = prefix_len + suffix.len;
+
+ uint8_t* result = reinterpret_cast<uint8_t*>(malloc(buffer[i].len));
+ memcpy(result, last_value_.ptr, prefix_len);
+ memcpy(result + prefix_len, suffix.ptr, suffix.len);
+
+ buffer[i].ptr = result;
+ last_value_ = buffer[i];
+ }
+ num_values_ -= max_values;
+ return max_values;
+ }
+
+ private:
+ DeltaBitPackDecoder prefix_len_decoder_;
+ DeltaLengthByteArrayDecoder suffix_decoder_;
+ ByteArray last_value_;
+};
+
+} // namespace parquet_cpp
+
+#endif
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/parquet/encodings/delta-length-byte-array-encoding.h
----------------------------------------------------------------------
diff --git a/src/parquet/encodings/delta-length-byte-array-encoding.h b/src/parquet/encodings/delta-length-byte-array-encoding.h
new file mode 100644
index 0000000..06bf39d
--- /dev/null
+++ b/src/parquet/encodings/delta-length-byte-array-encoding.h
@@ -0,0 +1,63 @@
+// Copyright 2012 Cloudera Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef PARQUET_DELTA_LENGTH_BYTE_ARRAY_ENCODING_H
+#define PARQUET_DELTA_LENGTH_BYTE_ARRAY_ENCODING_H
+
+#include "parquet/encodings/encodings.h"
+
+#include <algorithm>
+
+namespace parquet_cpp {
+
+class DeltaLengthByteArrayDecoder : public Decoder {
+ public:
+ DeltaLengthByteArrayDecoder()
+ : Decoder(parquet::Type::BYTE_ARRAY, parquet::Encoding::DELTA_LENGTH_BYTE_ARRAY),
+ len_decoder_(parquet::Type::INT32) {
+ }
+
+ virtual void SetData(int num_values, const uint8_t* data, int len) {
+ num_values_ = num_values;
+ if (len == 0) return;
+ int total_lengths_len = *reinterpret_cast<const int*>(data);
+ data += 4;
+ len_decoder_.SetData(num_values, data, total_lengths_len);
+ data_ = data + total_lengths_len;
+ len_ = len - 4 - total_lengths_len;
+ }
+
+ virtual int GetByteArray(ByteArray* buffer, int max_values) {
+ max_values = std::min(max_values, num_values_);
+ int lengths[max_values];
+ len_decoder_.GetInt32(lengths, max_values);
+ for (int i = 0; i < max_values; ++i) {
+ buffer[i].len = lengths[i];
+ buffer[i].ptr = data_;
+ data_ += lengths[i];
+ len_ -= lengths[i];
+ }
+ num_values_ -= max_values;
+ return max_values;
+ }
+
+ private:
+ DeltaBitPackDecoder len_decoder_;
+ const uint8_t* data_;
+ int len_;
+};
+
+} // namespace parquet_cpp
+
+#endif
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/parquet/encodings/dictionary-encoding.h
----------------------------------------------------------------------
diff --git a/src/parquet/encodings/dictionary-encoding.h b/src/parquet/encodings/dictionary-encoding.h
new file mode 100644
index 0000000..2501b2a
--- /dev/null
+++ b/src/parquet/encodings/dictionary-encoding.h
@@ -0,0 +1,148 @@
+// Copyright 2012 Cloudera Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef PARQUET_DICTIONARY_ENCODING_H
+#define PARQUET_DICTIONARY_ENCODING_H
+
+#include "parquet/encodings/encodings.h"
+
+#include <algorithm>
+#include <vector>
+
+namespace parquet_cpp {
+
+class DictionaryDecoder : public Decoder {
+ public:
+ // Initializes the dictionary with values from 'dictionary'. The data in dictionary
+ // is not guaranteed to persist in memory after this call so the dictionary decoder
+ // needs to copy the data out if necessary.
+ DictionaryDecoder(const parquet::Type::type& type, Decoder* dictionary)
+ : Decoder(type, parquet::Encoding::RLE_DICTIONARY) {
+ int num_dictionary_values = dictionary->values_left();
+ switch (type) {
+ case parquet::Type::BOOLEAN:
+ throw ParquetException("Boolean cols should not be dictionary encoded.");
+
+ case parquet::Type::INT32:
+ int32_dictionary_.resize(num_dictionary_values);
+ dictionary->GetInt32(&int32_dictionary_[0], num_dictionary_values);
+ break;
+ case parquet::Type::INT64:
+ int64_dictionary_.resize(num_dictionary_values);
+ dictionary->GetInt64(&int64_dictionary_[0], num_dictionary_values);
+ break;
+ case parquet::Type::FLOAT:
+ float_dictionary_.resize(num_dictionary_values);
+ dictionary->GetFloat(&float_dictionary_[0], num_dictionary_values);
+ break;
+ case parquet::Type::DOUBLE:
+ double_dictionary_.resize(num_dictionary_values);
+ dictionary->GetDouble(&double_dictionary_[0], num_dictionary_values);
+ break;
+ case parquet::Type::BYTE_ARRAY: {
+ byte_array_dictionary_.resize(num_dictionary_values);
+ dictionary->GetByteArray(&byte_array_dictionary_[0], num_dictionary_values);
+ int total_size = 0;
+ for (int i = 0; i < num_dictionary_values; ++i) {
+ total_size += byte_array_dictionary_[i].len;
+ }
+ byte_array_data_.resize(total_size);
+ int offset = 0;
+ for (int i = 0; i < num_dictionary_values; ++i) {
+ memcpy(&byte_array_data_[offset],
+ byte_array_dictionary_[i].ptr, byte_array_dictionary_[i].len);
+ byte_array_dictionary_[i].ptr = &byte_array_data_[offset];
+ offset += byte_array_dictionary_[i].len;
+ }
+ break;
+ }
+ default:
+ ParquetException::NYI("Unsupported dictionary type");
+ }
+ }
+
+ virtual void SetData(int num_values, const uint8_t* data, int len) {
+ num_values_ = num_values;
+ if (len == 0) return;
+ uint8_t bit_width = *data;
+ ++data;
+ --len;
+ idx_decoder_ = RleDecoder(data, len, bit_width);
+ }
+
+ virtual int GetInt32(int32_t* buffer, int max_values) {
+ max_values = std::min(max_values, num_values_);
+ for (int i = 0; i < max_values; ++i) {
+ buffer[i] = int32_dictionary_[index()];
+ }
+ return max_values;
+ }
+
+ virtual int GetInt64(int64_t* buffer, int max_values) {
+ max_values = std::min(max_values, num_values_);
+ for (int i = 0; i < max_values; ++i) {
+ buffer[i] = int64_dictionary_[index()];
+ }
+ return max_values;
+ }
+
+ virtual int GetFloat(float* buffer, int max_values) {
+ max_values = std::min(max_values, num_values_);
+ for (int i = 0; i < max_values; ++i) {
+ buffer[i] = float_dictionary_[index()];
+ }
+ return max_values;
+ }
+
+ virtual int GetDouble(double* buffer, int max_values) {
+ max_values = std::min(max_values, num_values_);
+ for (int i = 0; i < max_values; ++i) {
+ buffer[i] = double_dictionary_[index()];
+ }
+ return max_values;
+ }
+
+ virtual int GetByteArray(ByteArray* buffer, int max_values) {
+ max_values = std::min(max_values, num_values_);
+ for (int i = 0; i < max_values; ++i) {
+ buffer[i] = byte_array_dictionary_[index()];
+ }
+ return max_values;
+ }
+
+ private:
+ int index() {
+ int idx = 0;
+ if (!idx_decoder_.Get(&idx)) ParquetException::EofException();
+ --num_values_;
+ return idx;
+ }
+
+ // Only one is set.
+ std::vector<int32_t> int32_dictionary_;
+ std::vector<int64_t> int64_dictionary_;
+ std::vector<float> float_dictionary_;
+ std::vector<double> double_dictionary_;
+ std::vector<ByteArray> byte_array_dictionary_;
+
+ // Data that contains the byte array data (byte_array_dictionary_ just has the
+ // pointers).
+ std::vector<uint8_t> byte_array_data_;
+
+ RleDecoder idx_decoder_;
+};
+
+} // namespace parquet_cpp
+
+#endif
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/parquet/encodings/encodings.h
----------------------------------------------------------------------
diff --git a/src/parquet/encodings/encodings.h b/src/parquet/encodings/encodings.h
new file mode 100644
index 0000000..9211bf8
--- /dev/null
+++ b/src/parquet/encodings/encodings.h
@@ -0,0 +1,82 @@
+// Copyright 2012 Cloudera Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef PARQUET_ENCODINGS_ENCODINGS_H
+#define PARQUET_ENCODINGS_ENCODINGS_H
+
+#include <cstdint>
+
+#include "parquet/thrift/parquet_constants.h"
+#include "parquet/thrift/parquet_types.h"
+#include "parquet/util/rle-encoding.h"
+#include "parquet/util/bit-stream-utils.inline.h"
+
+namespace parquet_cpp {
+
+class Decoder {
+ public:
+ virtual ~Decoder() {}
+
+ // Sets the data for a new page. This will be called multiple times on the same
+ // decoder and should reset all internal state.
+ virtual void SetData(int num_values, const uint8_t* data, int len) = 0;
+
+ // Subclasses should override the ones they support. In each of these functions,
+ // the decoder would decode put to 'max_values', storing the result in 'buffer'.
+ // The function returns the number of values decoded, which should be max_values
+ // except for end of the current data page.
+ virtual int GetBool(bool* buffer, int max_values) {
+ throw ParquetException("Decoder does not implement this type.");
+ }
+ virtual int GetInt32(int32_t* buffer, int max_values) {
+ throw ParquetException("Decoder does not implement this type.");
+ }
+ virtual int GetInt64(int64_t* buffer, int max_values) {
+ throw ParquetException("Decoder does not implement this type.");
+ }
+ virtual int GetFloat(float* buffer, int max_values) {
+ throw ParquetException("Decoder does not implement this type.");
+ }
+ virtual int GetDouble(double* buffer, int max_values) {
+ throw ParquetException("Decoder does not implement this type.");
+ }
+ virtual int GetByteArray(ByteArray* buffer, int max_values) {
+ throw ParquetException("Decoder does not implement this type.");
+ }
+
+ // Returns the number of values left (for the last call to SetData()). This is
+ // the number of values left in this page.
+ int values_left() const { return num_values_; }
+
+ const parquet::Encoding::type encoding() const { return encoding_; }
+
+ protected:
+ Decoder(const parquet::Type::type& type, const parquet::Encoding::type& encoding)
+ : type_(type), encoding_(encoding), num_values_(0) {}
+
+ const parquet::Type::type type_;
+ const parquet::Encoding::type encoding_;
+ int num_values_;
+};
+
+} // namespace parquet_cpp
+
+#include "parquet/encodings/bool-encoding.h"
+#include "parquet/encodings/plain-encoding.h"
+#include "parquet/encodings/dictionary-encoding.h"
+#include "parquet/encodings/delta-bit-pack-encoding.h"
+#include "parquet/encodings/delta-length-byte-array-encoding.h"
+#include "parquet/encodings/delta-byte-array-encoding.h"
+
+#endif // PARQUET_ENCODINGS_ENCODINGS_H
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/parquet/encodings/plain-encoding.h
----------------------------------------------------------------------
diff --git a/src/parquet/encodings/plain-encoding.h b/src/parquet/encodings/plain-encoding.h
new file mode 100644
index 0000000..b094cdb
--- /dev/null
+++ b/src/parquet/encodings/plain-encoding.h
@@ -0,0 +1,83 @@
+// Copyright 2012 Cloudera Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef PARQUET_PLAIN_ENCODING_H
+#define PARQUET_PLAIN_ENCODING_H
+
+#include "parquet/encodings/encodings.h"
+
+#include <algorithm>
+
+namespace parquet_cpp {
+
+class PlainDecoder : public Decoder {
+ public:
+ explicit PlainDecoder(const parquet::Type::type& type)
+ : Decoder(type, parquet::Encoding::PLAIN), data_(NULL), len_(0) {
+ }
+
+ virtual void SetData(int num_values, const uint8_t* data, int len) {
+ num_values_ = num_values;
+ data_ = data;
+ len_ = len;
+ }
+
+ int GetValues(void* buffer, int max_values, int byte_size) {
+ max_values = std::min(max_values, num_values_);
+ int size = max_values * byte_size;
+ if (len_ < size) ParquetException::EofException();
+ memcpy(buffer, data_, size);
+ data_ += size;
+ len_ -= size;
+ num_values_ -= max_values;
+ return max_values;
+ }
+
+ virtual int GetInt32(int32_t* buffer, int max_values) {
+ return GetValues(buffer, max_values, sizeof(int32_t));
+ }
+
+ virtual int GetInt64(int64_t* buffer, int max_values) {
+ return GetValues(buffer, max_values, sizeof(int64_t));
+ }
+
+ virtual int GetFloat(float* buffer, int max_values) {
+ return GetValues(buffer, max_values, sizeof(float));
+ }
+
+ virtual int GetDouble(double* buffer, int max_values) {
+ return GetValues(buffer, max_values, sizeof(double));
+ }
+
+ virtual int GetByteArray(ByteArray* buffer, int max_values) {
+ max_values = std::min(max_values, num_values_);
+ for (int i = 0; i < max_values; ++i) {
+ buffer[i].len = *reinterpret_cast<const uint32_t*>(data_);
+ if (len_ < sizeof(uint32_t) + buffer[i].len) ParquetException::EofException();
+ buffer[i].ptr = data_ + sizeof(uint32_t);
+ data_ += sizeof(uint32_t) + buffer[i].len;
+ len_ -= sizeof(uint32_t) + buffer[i].len;
+ }
+ num_values_ -= max_values;
+ return max_values;
+ }
+
+ private:
+ const uint8_t* data_;
+ int len_;
+};
+
+} // namespace parquet_cpp
+
+#endif
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/parquet/parquet.h
----------------------------------------------------------------------
diff --git a/src/parquet/parquet.h b/src/parquet/parquet.h
index c1a73b7..320f003 100644
--- a/src/parquet/parquet.h
+++ b/src/parquet/parquet.h
@@ -17,14 +17,18 @@
#include <exception>
#include <sstream>
-#include <boost/cstdint.hpp>
-#include <boost/scoped_ptr.hpp>
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+// Needed for thrift
#include <boost/shared_ptr.hpp>
-#include <boost/unordered_map.hpp>
-#include "gen-cpp/parquet_constants.h"
-#include "gen-cpp/parquet_types.h"
-#include "impala/rle-encoding.h"
+#include "parquet/thrift/parquet_constants.h"
+#include "parquet/thrift/parquet_types.h"
+#include "parquet/util/rle-encoding.h"
// TCompactProtocol requires some #defines to work right.
#define SIGNED_RIGHT_SHIFT_IS 1
@@ -36,6 +40,17 @@
#include <thrift/protocol/TBinaryProtocol.h>
#include <thrift/transport/TBufferTransports.h>
+namespace std {
+
+template <>
+struct hash<parquet::Encoding::type> {
+ std::size_t operator()(const parquet::Encoding::type& k) const {
+ return hash<int>()(static_cast<int>(k));
+ }
+};
+
+} // namespace std
+
namespace parquet_cpp {
class Codec;
@@ -146,18 +161,18 @@ class ColumnReader {
InputStream* stream_;
// Compression codec to use.
- boost::scoped_ptr<Codec> decompressor_;
+ std::unique_ptr<Codec> decompressor_;
std::vector<uint8_t> decompression_buffer_;
// Map of compression type to decompressor object.
- boost::unordered_map<parquet::Encoding::type, boost::shared_ptr<Decoder> > decoders_;
+ std::unordered_map<parquet::Encoding::type, std::shared_ptr<Decoder> > decoders_;
parquet::PageHeader current_page_header_;
// Not set if field is required.
- boost::scoped_ptr<impala::RleDecoder> definition_level_decoder_;
+ std::unique_ptr<RleDecoder> definition_level_decoder_;
// Not set for flat schemas.
- boost::scoped_ptr<impala::RleDecoder> repetition_level_decoder_;
+ std::unique_ptr<RleDecoder> repetition_level_decoder_;
Decoder* current_decoder_;
int num_buffered_values_;
@@ -241,7 +256,6 @@ inline void DeserializeThriftMsg(const uint8_t* buf, uint32_t* len, T* deseriali
*len = *len - bytes_left;
}
-}
+} // namespace parquet_cpp
#endif
-
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/parquet/thrift/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/src/parquet/thrift/CMakeLists.txt b/src/parquet/thrift/CMakeLists.txt
new file mode 100644
index 0000000..e2a00c9
--- /dev/null
+++ b/src/parquet/thrift/CMakeLists.txt
@@ -0,0 +1,29 @@
+# Copyright 2012 Cloudera Inc.
+
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+add_library(parquet_thrift STATIC
+ parquet_constants.cpp
+ parquet_types.cpp
+)
+set_target_properties(parquet_thrift
+ PROPERTIES
+ LIBRARY_OUTPUT_DIRECTORY "${BUILD_OUTPUT_ROOT_DIRECTORY}")
+
+
+# Headers: thrift
+install(FILES
+ parquet_types.h
+ parquet_constants.h
+ DESTINATION include/parquet/thrift)
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/parquet/thrift/parquet_constants.cpp
----------------------------------------------------------------------
diff --git a/src/parquet/thrift/parquet_constants.cpp b/src/parquet/thrift/parquet_constants.cpp
new file mode 100644
index 0000000..caa5af6
--- /dev/null
+++ b/src/parquet/thrift/parquet_constants.cpp
@@ -0,0 +1,17 @@
+/**
+ * Autogenerated by Thrift Compiler (0.9.0)
+ *
+ * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+ * @generated
+ */
+#include "parquet_constants.h"
+
+namespace parquet {
+
+const parquetConstants g_parquet_constants;
+
+parquetConstants::parquetConstants() {
+}
+
+} // namespace
+
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/parquet/thrift/parquet_constants.h
----------------------------------------------------------------------
diff --git a/src/parquet/thrift/parquet_constants.h b/src/parquet/thrift/parquet_constants.h
new file mode 100644
index 0000000..71d6f58
--- /dev/null
+++ b/src/parquet/thrift/parquet_constants.h
@@ -0,0 +1,24 @@
+/**
+ * Autogenerated by Thrift Compiler (0.9.0)
+ *
+ * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+ * @generated
+ */
+#ifndef parquet_CONSTANTS_H
+#define parquet_CONSTANTS_H
+
+#include "parquet_types.h"
+
+namespace parquet {
+
+class parquetConstants {
+ public:
+ parquetConstants();
+
+};
+
+extern const parquetConstants g_parquet_constants;
+
+} // namespace
+
+#endif
[7/7] parquet-cpp git commit: PARQUET-416: C++11 compilation,
code reorg, libparquet and installation targets
Posted by no...@apache.org.
PARQUET-416: C++11 compilation, code reorg, libparquet and installation targets
Reorganize code into a top level src/parquet directly, add a libparquet shared library, and add install targets for libparquet and its header files. Add cpplint script and `make lint` target for code linting.
Replaces earlier PR #13
Author: Wes McKinney <we...@cloudera.com>
Closes #14 from wesm/libparquet-library and squashes the following commits:
2e356fd [Wes McKinney] PARQUET-416: Compile with C++11 and replace usages of boost::shared_ptr with std::shared_ptr and other C++11 fixes. Reorganize code into a top level src/parquet directly, add a libparquet shared library, and add install targets for libparquet and its header files. Add cpplint script and `make lint` target for code linting.
Project: http://git-wip-us.apache.org/repos/asf/parquet-cpp/repo
Commit: http://git-wip-us.apache.org/repos/asf/parquet-cpp/commit/337cf584
Tree: http://git-wip-us.apache.org/repos/asf/parquet-cpp/tree/337cf584
Diff: http://git-wip-us.apache.org/repos/asf/parquet-cpp/diff/337cf584
Branch: refs/heads/master
Commit: 337cf584ea25a1c6397c4412a706c58e4bd5e58c
Parents: ea30dec
Author: Wes McKinney <we...@cloudera.com>
Authored: Fri Jan 8 15:49:25 2016 -0800
Committer: Nong Li <no...@gmail.com>
Committed: Fri Jan 8 15:49:25 2016 -0800
----------------------------------------------------------------------
.travis.yml | 43 +-
CMakeLists.txt | 127 +-
build-support/cpplint.py | 6323 ++++++++++++++++++
cmake_modules/clean-all.cmake | 14 +
example/CMakeLists.txt | 9 +-
example/decode_benchmark.cc | 7 +-
example/parquet_reader.cc | 26 +-
generated/gen-cpp/CMakeLists.txt | 20 -
generated/gen-cpp/parquet_constants.cpp | 17 -
generated/gen-cpp/parquet_constants.h | 24 -
generated/gen-cpp/parquet_types.cpp | 2006 ------
generated/gen-cpp/parquet_types.h | 1123 ----
src/CMakeLists.txt | 19 -
src/compression/CMakeLists.txt | 18 -
src/compression/codec.h | 72 -
src/compression/lz4-codec.cc | 38 -
src/compression/snappy-codec.cc | 40 -
src/encodings/bool-encoding.h | 47 -
src/encodings/delta-bit-pack-encoding.h | 114 -
src/encodings/delta-byte-array-encoding.h | 73 -
.../delta-length-byte-array-encoding.h | 62 -
src/encodings/dictionary-encoding.h | 146 -
src/encodings/encodings.h | 83 -
src/encodings/plain-encoding.h | 82 -
src/impala/bit-stream-utils.h | 145 -
src/impala/bit-stream-utils.inline.h | 164 -
src/impala/bit-util.h | 174 -
src/impala/compiler-util.h | 38 -
src/impala/logging.h | 31 -
src/impala/rle-encoding.h | 417 --
src/parquet.cc | 35 +-
src/parquet/CMakeLists.txt | 18 +
src/parquet/compression/CMakeLists.txt | 30 +
src/parquet/compression/codec.h | 71 +
src/parquet/compression/lz4-codec.cc | 40 +
src/parquet/compression/snappy-codec.cc | 42 +
src/parquet/encodings/CMakeLists.txt | 24 +
src/parquet/encodings/bool-encoding.h | 48 +
src/parquet/encodings/delta-bit-pack-encoding.h | 116 +
.../encodings/delta-byte-array-encoding.h | 74 +
.../delta-length-byte-array-encoding.h | 63 +
src/parquet/encodings/dictionary-encoding.h | 148 +
src/parquet/encodings/encodings.h | 82 +
src/parquet/encodings/plain-encoding.h | 83 +
src/parquet/parquet.h | 38 +-
src/parquet/thrift/CMakeLists.txt | 29 +
src/parquet/thrift/parquet_constants.cpp | 17 +
src/parquet/thrift/parquet_constants.h | 24 +
src/parquet/thrift/parquet_types.cpp | 2006 ++++++
src/parquet/thrift/parquet_types.h | 1123 ++++
src/parquet/util/CMakeLists.txt | 24 +
src/parquet/util/bit-stream-utils.h | 147 +
src/parquet/util/bit-stream-utils.inline.h | 164 +
src/parquet/util/bit-util.h | 174 +
src/parquet/util/compiler-util.h | 37 +
src/parquet/util/logging.h | 31 +
src/parquet/util/rle-encoding.h | 419 ++
src/parquet/util/stopwatch.h | 49 +
src/util/stopwatch.h | 49 -
59 files changed, 11630 insertions(+), 5077 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/.travis.yml
----------------------------------------------------------------------
diff --git a/.travis.yml b/.travis.yml
index 58b7641..5da9a6f 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,3 +1,5 @@
+sudo: required
+dist: trusty
language: cpp
compiler:
@@ -9,38 +11,51 @@ os:
- osx
addons:
- apt:
- packages:
- - libboost-dev
- #- libsnappy-dev currently handled by thirdparty scipts.
- - libboost-program-options-dev #needed for thrift cpp compilation
- - libboost-test-dev #needed for thrift cpp compilation
- - libssl-dev #needed for thrift cpp compilation
- - libtool #needed for thrift cpp compilation
- - bison #needed for thrift cpp compilation
- - flex #needed for thrift cpp compilation
- - pkg-config #needed for thrift cpp compilation
+ apt:
+ sources:
+ - ubuntu-toolchain-r-test
+ - kalakris-cmake
+ packages:
+ - gcc-4.9
+ - g++-4.9
+ - cmake
+ - valgrind
+ - libboost-dev
+ #- libsnappy-dev currently handled by thirdparty scipts.
+ - libboost-program-options-dev #needed for thrift cpp compilation
+ - libboost-test-dev #needed for thrift cpp compilation
+ - libssl-dev #needed for thrift cpp compilation
+ - libtool #needed for thrift cpp compilation
+ - bison #needed for thrift cpp compilation
+ - flex #needed for thrift cpp compilation
+ - pkg-config #needed for thrift cpp compilation
before_install:
- pushd thirdparty
# thrift cpp
- >
+ if [ $TRAVIS_OS_NAME == osx ]; then
+ brew update &&
+ brew install thrift;
+ fi
+ - >
if [ $TRAVIS_OS_NAME == linux ]; then
- wget http://www.us.apache.org/dist/thrift/0.9.1/thrift-0.9.1.tar.gz &&
+ wget http://archive.apache.org/dist/thrift/0.9.1/thrift-0.9.1.tar.gz &&
tar xfz thrift-0.9.1.tar.gz &&
pushd thrift-0.9.1 &&
- ./configure --without-qt4 --without-c_glib --without-csharp --without-java --without-erlang --without-nodejs --without-lua --without-python --without-perl --without-php --without-php_extension --without-ruby --without-haskell --without-go --without-d --with-cpp --prefix=$HOME/local &&
+ ./configure CXXFLAGS='-fPIC' --without-qt4 --without-c_glib --without-csharp --without-java --without-erlang --without-nodejs --without-lua --without-python --without-perl --without-php --without-php_extension --without-ruby --without-haskell --without-go --without-d --with-cpp --prefix=$HOME/local &&
make clean &&
make install &&
popd;
fi
- - if [ $TRAVIS_OS_NAME == osx ]; then brew install thrift; fi
# snappy and lz4
- ./download_thirdparty.sh
- ./build_thirdparty.sh
- popd
before_script:
+ - export CC="gcc-4.9"
+ - export CXX="g++-4.9"
- mkdir build
- cd build
- THRIFT_HOME=$HOME/local cmake ..
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/CMakeLists.txt b/CMakeLists.txt
index ad0ed5f..eb67f75 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -12,18 +12,64 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-
cmake_minimum_required(VERSION 2.6)
+project(parquet-cpp)
# generate CTest input files
enable_testing()
# where to find cmake modules
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake_modules")
+set(BUILD_SUPPORT_DIR ${CMAKE_CURRENT_SOURCE_DIR}/build-support)
set(THIRDPARTY_PREFIX ${CMAKE_SOURCE_DIR}/thirdparty/installed)
set(CMAKE_PREFIX_PATH ${THIRDPARTY_PREFIX})
+if(APPLE)
+ set(CMAKE_MACOSX_RPATH 1)
+ set(CMAKE_OSX_DEPLOYMENT_TARGET 10.9)
+endif()
+
+if (NOT PARQUET_LINK)
+ set(PARQUET_LINK "a")
+elseif(NOT ("auto" MATCHES "^${PARQUET_LINK}" OR
+ "dynamic" MATCHES "^${PARQUET_LINK}" OR
+ "static" MATCHES "^${PARQUET_LINK}"))
+ message(FATAL_ERROR "Unknown value for PARQUET_LINK, must be auto|dynamic|static")
+else()
+ # Remove all but the first letter.
+ string(SUBSTRING "${PARQUET_LINK}" 0 1 PARQUET_LINK)
+endif()
+
+# if no build build type is specified, default to debug builds
+if (NOT CMAKE_BUILD_TYPE)
+ set(CMAKE_BUILD_TYPE Debug)
+endif(NOT CMAKE_BUILD_TYPE)
+
+# set compile output directory
+string (TOLOWER ${CMAKE_BUILD_TYPE} BUILD_SUBDIR_NAME)
+
+# If build in-source, create the latest symlink. If build out-of-source, which is
+# preferred, simply output the binaries in the build folder
+if (${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_CURRENT_BINARY_DIR})
+ set(BUILD_OUTPUT_ROOT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/build/${BUILD_SUBDIR_NAME}/")
+ # Link build/latest to the current build directory, to avoid developers
+ # accidentally running the latest debug build when in fact they're building
+ # release builds.
+ FILE(MAKE_DIRECTORY ${BUILD_OUTPUT_ROOT_DIRECTORY})
+ if (NOT APPLE)
+ set(MORE_ARGS "-T")
+ endif()
+EXECUTE_PROCESS(COMMAND ln ${MORE_ARGS} -sf ${BUILD_OUTPUT_ROOT_DIRECTORY}
+ ${CMAKE_CURRENT_BINARY_DIR}/build/latest)
+else()
+ set(BUILD_OUTPUT_ROOT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/${BUILD_SUBDIR_NAME}/")
+endif()
+
+############################################################
+# Dependencies
+############################################################
+
# find boost headers and libs
set(Boost_DEBUG TRUE)
set(Boost_USE_MULTITHREADED ON)
@@ -58,22 +104,83 @@ include_directories(SYSTEM ${LZ4_INCLUDE_DIR})
add_library(lz4static STATIC IMPORTED)
set_target_properties(lz4static PROPERTIES IMPORTED_LOCATION ${LZ4_STATIC_LIB})
-SET(CMAKE_CXX_FLAGS "-msse4.2 -Wall -Wno-unused-value -Wno-unused-variable -Wno-sign-compare -Wno-unknown-pragmas")
-SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -ggdb")
-
# Thrift requires these definitions for some types that we use
add_definitions(-DHAVE_INTTYPES_H -DHAVE_NETINET_IN_H -DHAVE_NETDB_H)
add_definitions(-fPIC)
-# where to put generated libraries
-set(LIBRARY_OUTPUT_PATH "${CMAKE_CURRENT_SOURCE_DIR}/build")
+# where to put generated archives (.a files)
+set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY "${BUILD_OUTPUT_ROOT_DIRECTORY}")
+set(ARCHIVE_OUTPUT_DIRECTORY "${BUILD_OUTPUT_ROOT_DIRECTORY}")
+
+# where to put generated libraries (.so files)
+set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${BUILD_OUTPUT_ROOT_DIRECTORY}")
+set(LIBRARY_OUTPUT_DIRECTORY "${BUILD_OUTPUT_ROOT_DIRECTORY}")
# where to put generated binaries
-set(EXECUTABLE_OUTPUT_PATH "${CMAKE_CURRENT_SOURCE_DIR}/bin")
+set(EXECUTABLE_OUTPUT_PATH "${BUILD_OUTPUT_ROOT_DIRECTORY}")
+
+SET(CMAKE_CXX_FLAGS "-std=c++11 -msse4.2 -Wall -Wno-unused-value -Wno-unused-variable -Wno-sign-compare -Wno-unknown-pragmas")
+SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -ggdb")
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/src)
-include_directories(${CMAKE_CURRENT_SOURCE_DIR}/generated)
-add_subdirectory(generated/gen-cpp)
-add_subdirectory(src)
+############################################################
+# "make lint" target
+############################################################
+if (UNIX)
+ # Full lint
+ add_custom_target(lint ${BUILD_SUPPORT_DIR}/cpplint.py
+ --verbose=4
+ --filter=-whitespace/comments,-readability/todo,-build/header_guard,-build/include_order
+ `find ${CMAKE_CURRENT_SOURCE_DIR}/src -name \\*.cc -or -name \\*.h | sed -e '/parquet\\/thrift/g'`)
+endif (UNIX)
+
+############################################################
+# Library config
+
+set(LIBPARQUET_SRCS
+ src/parquet.cc
+)
+
+set(LIBPARQUET_LINK_LIBS
+ parquet_compression
+ parquet_thrift
+ thriftstatic
+)
+
+if ("${PARQUET_LINK}" STREQUAL "d" OR "${PARQUET_LINK}" STREQUAL "a")
+ set(LIBPARQUET_LINKAGE "SHARED")
+else()
+ set(LIBPARQUET_LINKAGE "STATIC")
+endif()
+
+add_library(parquet
+ ${LIBPARQUET_LINKAGE}
+ ${LIBPARQUET_SRCS}
+)
+set_target_properties(parquet
+ PROPERTIES
+ LIBRARY_OUTPUT_DIRECTORY "${BUILD_OUTPUT_ROOT_DIRECTORY}")
+target_link_libraries(parquet ${LIBPARQUET_LINK_LIBS})
+
+if(APPLE)
+ set_target_properties(parquet PROPERTIES LINK_FLAGS "-undefined dynamic_lookup")
+endif()
+
+add_subdirectory(src/parquet)
+add_subdirectory(src/parquet/compression)
+add_subdirectory(src/parquet/encodings)
+add_subdirectory(src/parquet/thrift)
+add_subdirectory(src/parquet/util)
+
add_subdirectory(example)
+
+add_custom_target(clean-all
+ COMMAND ${CMAKE_BUILD_TOOL} clean
+ COMMAND ${CMAKE_COMMAND} -P cmake_modules/clean-all.cmake
+)
+
+# installation
+
+install(TARGETS parquet
+ LIBRARY DESTINATION lib)