You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@parquet.apache.org by no...@apache.org on 2016/01/09 00:51:52 UTC

[1/7] parquet-cpp git commit: PARQUET-416: C++11 compilation, code reorg, libparquet and installation targets

Repository: parquet-cpp
Updated Branches:
  refs/heads/master ea30decd9 -> 337cf584e


http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/parquet/thrift/parquet_types.h
----------------------------------------------------------------------
diff --git a/src/parquet/thrift/parquet_types.h b/src/parquet/thrift/parquet_types.h
new file mode 100644
index 0000000..4360d02
--- /dev/null
+++ b/src/parquet/thrift/parquet_types.h
@@ -0,0 +1,1123 @@
+/**
+ * Autogenerated by Thrift Compiler (0.9.0)
+ *
+ * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+ *  @generated
+ */
+#ifndef parquet_TYPES_H
+#define parquet_TYPES_H
+
+#include <thrift/Thrift.h>
+#include <thrift/TApplicationException.h>
+#include <thrift/protocol/TProtocol.h>
+#include <thrift/transport/TTransport.h>
+
+
+
+namespace parquet {
+
+struct Type {
+  enum type {
+    BOOLEAN = 0,
+    INT32 = 1,
+    INT64 = 2,
+    INT96 = 3,
+    FLOAT = 4,
+    DOUBLE = 5,
+    BYTE_ARRAY = 6,
+    FIXED_LEN_BYTE_ARRAY = 7
+  };
+};
+
+extern const std::map<int, const char*> _Type_VALUES_TO_NAMES;
+
+struct ConvertedType {
+  enum type {
+    UTF8 = 0,
+    MAP = 1,
+    MAP_KEY_VALUE = 2,
+    LIST = 3,
+    ENUM = 4,
+    DECIMAL = 5
+  };
+};
+
+extern const std::map<int, const char*> _ConvertedType_VALUES_TO_NAMES;
+
+struct FieldRepetitionType {
+  enum type {
+    REQUIRED = 0,
+    OPTIONAL = 1,
+    REPEATED = 2
+  };
+};
+
+extern const std::map<int, const char*> _FieldRepetitionType_VALUES_TO_NAMES;
+
+struct Encoding {
+  enum type {
+    PLAIN = 0,
+    PLAIN_DICTIONARY = 2,
+    RLE = 3,
+    BIT_PACKED = 4,
+    DELTA_BINARY_PACKED = 5,
+    DELTA_LENGTH_BYTE_ARRAY = 6,
+    DELTA_BYTE_ARRAY = 7,
+    RLE_DICTIONARY = 8
+  };
+};
+
+extern const std::map<int, const char*> _Encoding_VALUES_TO_NAMES;
+
+struct CompressionCodec {
+  enum type {
+    UNCOMPRESSED = 0,
+    SNAPPY = 1,
+    GZIP = 2,
+    LZO = 3
+  };
+};
+
+extern const std::map<int, const char*> _CompressionCodec_VALUES_TO_NAMES;
+
+struct PageType {
+  enum type {
+    DATA_PAGE = 0,
+    INDEX_PAGE = 1,
+    DICTIONARY_PAGE = 2,
+    DATA_PAGE_V2 = 3
+  };
+};
+
+extern const std::map<int, const char*> _PageType_VALUES_TO_NAMES;
+
+typedef struct _Statistics__isset {
+  _Statistics__isset() : max(false), min(false), null_count(false), distinct_count(false) {}
+  bool max;
+  bool min;
+  bool null_count;
+  bool distinct_count;
+} _Statistics__isset;
+
+class Statistics {
+ public:
+
+  static const char* ascii_fingerprint; // = "CE004821871820DD79A8FD98BB101F6D";
+  static const uint8_t binary_fingerprint[16]; // = {0xCE,0x00,0x48,0x21,0x87,0x18,0x20,0xDD,0x79,0xA8,0xFD,0x98,0xBB,0x10,0x1F,0x6D};
+
+  Statistics() : max(), min(), null_count(0), distinct_count(0) {
+  }
+
+  virtual ~Statistics() throw() {}
+
+  std::string max;
+  std::string min;
+  int64_t null_count;
+  int64_t distinct_count;
+
+  _Statistics__isset __isset;
+
+  void __set_max(const std::string& val) {
+    max = val;
+    __isset.max = true;
+  }
+
+  void __set_min(const std::string& val) {
+    min = val;
+    __isset.min = true;
+  }
+
+  void __set_null_count(const int64_t val) {
+    null_count = val;
+    __isset.null_count = true;
+  }
+
+  void __set_distinct_count(const int64_t val) {
+    distinct_count = val;
+    __isset.distinct_count = true;
+  }
+
+  bool operator == (const Statistics & rhs) const
+  {
+    if (__isset.max != rhs.__isset.max)
+      return false;
+    else if (__isset.max && !(max == rhs.max))
+      return false;
+    if (__isset.min != rhs.__isset.min)
+      return false;
+    else if (__isset.min && !(min == rhs.min))
+      return false;
+    if (__isset.null_count != rhs.__isset.null_count)
+      return false;
+    else if (__isset.null_count && !(null_count == rhs.null_count))
+      return false;
+    if (__isset.distinct_count != rhs.__isset.distinct_count)
+      return false;
+    else if (__isset.distinct_count && !(distinct_count == rhs.distinct_count))
+      return false;
+    return true;
+  }
+  bool operator != (const Statistics &rhs) const {
+    return !(*this == rhs);
+  }
+
+  bool operator < (const Statistics & ) const;
+
+  uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
+  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
+
+};
+
+void swap(Statistics &a, Statistics &b);
+
+typedef struct _SchemaElement__isset {
+  _SchemaElement__isset() : type(false), type_length(false), repetition_type(false), num_children(false), converted_type(false), scale(false), precision(false) {}
+  bool type;
+  bool type_length;
+  bool repetition_type;
+  bool num_children;
+  bool converted_type;
+  bool scale;
+  bool precision;
+} _SchemaElement__isset;
+
+class SchemaElement {
+ public:
+
+  static const char* ascii_fingerprint; // = "388A784401753800444CFEAC8BC1B1A1";
+  static const uint8_t binary_fingerprint[16]; // = {0x38,0x8A,0x78,0x44,0x01,0x75,0x38,0x00,0x44,0x4C,0xFE,0xAC,0x8B,0xC1,0xB1,0xA1};
+
+  SchemaElement() : type((Type::type)0), type_length(0), repetition_type((FieldRepetitionType::type)0), name(), num_children(0), converted_type((ConvertedType::type)0), scale(0), precision(0) {
+  }
+
+  virtual ~SchemaElement() throw() {}
+
+  Type::type type;
+  int32_t type_length;
+  FieldRepetitionType::type repetition_type;
+  std::string name;
+  int32_t num_children;
+  ConvertedType::type converted_type;
+  int32_t scale;
+  int32_t precision;
+
+  _SchemaElement__isset __isset;
+
+  void __set_type(const Type::type val) {
+    type = val;
+    __isset.type = true;
+  }
+
+  void __set_type_length(const int32_t val) {
+    type_length = val;
+    __isset.type_length = true;
+  }
+
+  void __set_repetition_type(const FieldRepetitionType::type val) {
+    repetition_type = val;
+    __isset.repetition_type = true;
+  }
+
+  void __set_name(const std::string& val) {
+    name = val;
+  }
+
+  void __set_num_children(const int32_t val) {
+    num_children = val;
+    __isset.num_children = true;
+  }
+
+  void __set_converted_type(const ConvertedType::type val) {
+    converted_type = val;
+    __isset.converted_type = true;
+  }
+
+  void __set_scale(const int32_t val) {
+    scale = val;
+    __isset.scale = true;
+  }
+
+  void __set_precision(const int32_t val) {
+    precision = val;
+    __isset.precision = true;
+  }
+
+  bool operator == (const SchemaElement & rhs) const
+  {
+    if (__isset.type != rhs.__isset.type)
+      return false;
+    else if (__isset.type && !(type == rhs.type))
+      return false;
+    if (__isset.type_length != rhs.__isset.type_length)
+      return false;
+    else if (__isset.type_length && !(type_length == rhs.type_length))
+      return false;
+    if (__isset.repetition_type != rhs.__isset.repetition_type)
+      return false;
+    else if (__isset.repetition_type && !(repetition_type == rhs.repetition_type))
+      return false;
+    if (!(name == rhs.name))
+      return false;
+    if (__isset.num_children != rhs.__isset.num_children)
+      return false;
+    else if (__isset.num_children && !(num_children == rhs.num_children))
+      return false;
+    if (__isset.converted_type != rhs.__isset.converted_type)
+      return false;
+    else if (__isset.converted_type && !(converted_type == rhs.converted_type))
+      return false;
+    if (__isset.scale != rhs.__isset.scale)
+      return false;
+    else if (__isset.scale && !(scale == rhs.scale))
+      return false;
+    if (__isset.precision != rhs.__isset.precision)
+      return false;
+    else if (__isset.precision && !(precision == rhs.precision))
+      return false;
+    return true;
+  }
+  bool operator != (const SchemaElement &rhs) const {
+    return !(*this == rhs);
+  }
+
+  bool operator < (const SchemaElement & ) const;
+
+  uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
+  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
+
+};
+
+void swap(SchemaElement &a, SchemaElement &b);
+
+typedef struct _DataPageHeader__isset {
+  _DataPageHeader__isset() : statistics(false) {}
+  bool statistics;
+} _DataPageHeader__isset;
+
+class DataPageHeader {
+ public:
+
+  static const char* ascii_fingerprint; // = "5FC1792B0483E9C984475384165040B1";
+  static const uint8_t binary_fingerprint[16]; // = {0x5F,0xC1,0x79,0x2B,0x04,0x83,0xE9,0xC9,0x84,0x47,0x53,0x84,0x16,0x50,0x40,0xB1};
+
+  DataPageHeader() : num_values(0), encoding((Encoding::type)0), definition_level_encoding((Encoding::type)0), repetition_level_encoding((Encoding::type)0) {
+  }
+
+  virtual ~DataPageHeader() throw() {}
+
+  int32_t num_values;
+  Encoding::type encoding;
+  Encoding::type definition_level_encoding;
+  Encoding::type repetition_level_encoding;
+  Statistics statistics;
+
+  _DataPageHeader__isset __isset;
+
+  void __set_num_values(const int32_t val) {
+    num_values = val;
+  }
+
+  void __set_encoding(const Encoding::type val) {
+    encoding = val;
+  }
+
+  void __set_definition_level_encoding(const Encoding::type val) {
+    definition_level_encoding = val;
+  }
+
+  void __set_repetition_level_encoding(const Encoding::type val) {
+    repetition_level_encoding = val;
+  }
+
+  void __set_statistics(const Statistics& val) {
+    statistics = val;
+    __isset.statistics = true;
+  }
+
+  bool operator == (const DataPageHeader & rhs) const
+  {
+    if (!(num_values == rhs.num_values))
+      return false;
+    if (!(encoding == rhs.encoding))
+      return false;
+    if (!(definition_level_encoding == rhs.definition_level_encoding))
+      return false;
+    if (!(repetition_level_encoding == rhs.repetition_level_encoding))
+      return false;
+    if (__isset.statistics != rhs.__isset.statistics)
+      return false;
+    else if (__isset.statistics && !(statistics == rhs.statistics))
+      return false;
+    return true;
+  }
+  bool operator != (const DataPageHeader &rhs) const {
+    return !(*this == rhs);
+  }
+
+  bool operator < (const DataPageHeader & ) const;
+
+  uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
+  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
+
+};
+
+void swap(DataPageHeader &a, DataPageHeader &b);
+
+
+class IndexPageHeader {
+ public:
+
+  static const char* ascii_fingerprint; // = "99914B932BD37A50B983C5E7C90AE93B";
+  static const uint8_t binary_fingerprint[16]; // = {0x99,0x91,0x4B,0x93,0x2B,0xD3,0x7A,0x50,0xB9,0x83,0xC5,0xE7,0xC9,0x0A,0xE9,0x3B};
+
+  IndexPageHeader() {
+  }
+
+  virtual ~IndexPageHeader() throw() {}
+
+
+  bool operator == (const IndexPageHeader & /* rhs */) const
+  {
+    return true;
+  }
+  bool operator != (const IndexPageHeader &rhs) const {
+    return !(*this == rhs);
+  }
+
+  bool operator < (const IndexPageHeader & ) const;
+
+  uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
+  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
+
+};
+
+void swap(IndexPageHeader &a, IndexPageHeader &b);
+
+typedef struct _DictionaryPageHeader__isset {
+  _DictionaryPageHeader__isset() : is_sorted(false) {}
+  bool is_sorted;
+} _DictionaryPageHeader__isset;
+
+class DictionaryPageHeader {
+ public:
+
+  static const char* ascii_fingerprint; // = "B149E4528254D495610C22AE4BD539C5";
+  static const uint8_t binary_fingerprint[16]; // = {0xB1,0x49,0xE4,0x52,0x82,0x54,0xD4,0x95,0x61,0x0C,0x22,0xAE,0x4B,0xD5,0x39,0xC5};
+
+  DictionaryPageHeader() : num_values(0), encoding((Encoding::type)0), is_sorted(0) {
+  }
+
+  virtual ~DictionaryPageHeader() throw() {}
+
+  int32_t num_values;
+  Encoding::type encoding;
+  bool is_sorted;
+
+  _DictionaryPageHeader__isset __isset;
+
+  void __set_num_values(const int32_t val) {
+    num_values = val;
+  }
+
+  void __set_encoding(const Encoding::type val) {
+    encoding = val;
+  }
+
+  void __set_is_sorted(const bool val) {
+    is_sorted = val;
+    __isset.is_sorted = true;
+  }
+
+  bool operator == (const DictionaryPageHeader & rhs) const
+  {
+    if (!(num_values == rhs.num_values))
+      return false;
+    if (!(encoding == rhs.encoding))
+      return false;
+    if (__isset.is_sorted != rhs.__isset.is_sorted)
+      return false;
+    else if (__isset.is_sorted && !(is_sorted == rhs.is_sorted))
+      return false;
+    return true;
+  }
+  bool operator != (const DictionaryPageHeader &rhs) const {
+    return !(*this == rhs);
+  }
+
+  bool operator < (const DictionaryPageHeader & ) const;
+
+  uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
+  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
+
+};
+
+void swap(DictionaryPageHeader &a, DictionaryPageHeader &b);
+
+typedef struct _DataPageHeaderV2__isset {
+  _DataPageHeaderV2__isset() : is_compressed(true), statistics(false) {}
+  bool is_compressed;
+  bool statistics;
+} _DataPageHeaderV2__isset;
+
+class DataPageHeaderV2 {
+ public:
+
+  static const char* ascii_fingerprint; // = "69FF2F6BD1A443440D5E46ABA5A3A919";
+  static const uint8_t binary_fingerprint[16]; // = {0x69,0xFF,0x2F,0x6B,0xD1,0xA4,0x43,0x44,0x0D,0x5E,0x46,0xAB,0xA5,0xA3,0xA9,0x19};
+
+  DataPageHeaderV2() : num_values(0), num_nulls(0), num_rows(0), encoding((Encoding::type)0), definition_levels_byte_length(0), repetition_levels_byte_length(0), is_compressed(true) {
+  }
+
+  virtual ~DataPageHeaderV2() throw() {}
+
+  int32_t num_values;
+  int32_t num_nulls;
+  int32_t num_rows;
+  Encoding::type encoding;
+  int32_t definition_levels_byte_length;
+  int32_t repetition_levels_byte_length;
+  bool is_compressed;
+  Statistics statistics;
+
+  _DataPageHeaderV2__isset __isset;
+
+  void __set_num_values(const int32_t val) {
+    num_values = val;
+  }
+
+  void __set_num_nulls(const int32_t val) {
+    num_nulls = val;
+  }
+
+  void __set_num_rows(const int32_t val) {
+    num_rows = val;
+  }
+
+  void __set_encoding(const Encoding::type val) {
+    encoding = val;
+  }
+
+  void __set_definition_levels_byte_length(const int32_t val) {
+    definition_levels_byte_length = val;
+  }
+
+  void __set_repetition_levels_byte_length(const int32_t val) {
+    repetition_levels_byte_length = val;
+  }
+
+  void __set_is_compressed(const bool val) {
+    is_compressed = val;
+    __isset.is_compressed = true;
+  }
+
+  void __set_statistics(const Statistics& val) {
+    statistics = val;
+    __isset.statistics = true;
+  }
+
+  bool operator == (const DataPageHeaderV2 & rhs) const
+  {
+    if (!(num_values == rhs.num_values))
+      return false;
+    if (!(num_nulls == rhs.num_nulls))
+      return false;
+    if (!(num_rows == rhs.num_rows))
+      return false;
+    if (!(encoding == rhs.encoding))
+      return false;
+    if (!(definition_levels_byte_length == rhs.definition_levels_byte_length))
+      return false;
+    if (!(repetition_levels_byte_length == rhs.repetition_levels_byte_length))
+      return false;
+    if (__isset.is_compressed != rhs.__isset.is_compressed)
+      return false;
+    else if (__isset.is_compressed && !(is_compressed == rhs.is_compressed))
+      return false;
+    if (__isset.statistics != rhs.__isset.statistics)
+      return false;
+    else if (__isset.statistics && !(statistics == rhs.statistics))
+      return false;
+    return true;
+  }
+  bool operator != (const DataPageHeaderV2 &rhs) const {
+    return !(*this == rhs);
+  }
+
+  bool operator < (const DataPageHeaderV2 & ) const;
+
+  uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
+  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
+
+};
+
+void swap(DataPageHeaderV2 &a, DataPageHeaderV2 &b);
+
+typedef struct _PageHeader__isset {
+  _PageHeader__isset() : crc(false), data_page_header(false), index_page_header(false), dictionary_page_header(false), data_page_header_v2(false) {}
+  bool crc;
+  bool data_page_header;
+  bool index_page_header;
+  bool dictionary_page_header;
+  bool data_page_header_v2;
+} _PageHeader__isset;
+
+class PageHeader {
+ public:
+
+  static const char* ascii_fingerprint; // = "B5BD2BDF3756C883A58B30B9C9F204A0";
+  static const uint8_t binary_fingerprint[16]; // = {0xB5,0xBD,0x2B,0xDF,0x37,0x56,0xC8,0x83,0xA5,0x8B,0x30,0xB9,0xC9,0xF2,0x04,0xA0};
+
+  PageHeader() : type((PageType::type)0), uncompressed_page_size(0), compressed_page_size(0), crc(0) {
+  }
+
+  virtual ~PageHeader() throw() {}
+
+  PageType::type type;
+  int32_t uncompressed_page_size;
+  int32_t compressed_page_size;
+  int32_t crc;
+  DataPageHeader data_page_header;
+  IndexPageHeader index_page_header;
+  DictionaryPageHeader dictionary_page_header;
+  DataPageHeaderV2 data_page_header_v2;
+
+  _PageHeader__isset __isset;
+
+  void __set_type(const PageType::type val) {
+    type = val;
+  }
+
+  void __set_uncompressed_page_size(const int32_t val) {
+    uncompressed_page_size = val;
+  }
+
+  void __set_compressed_page_size(const int32_t val) {
+    compressed_page_size = val;
+  }
+
+  void __set_crc(const int32_t val) {
+    crc = val;
+    __isset.crc = true;
+  }
+
+  void __set_data_page_header(const DataPageHeader& val) {
+    data_page_header = val;
+    __isset.data_page_header = true;
+  }
+
+  void __set_index_page_header(const IndexPageHeader& val) {
+    index_page_header = val;
+    __isset.index_page_header = true;
+  }
+
+  void __set_dictionary_page_header(const DictionaryPageHeader& val) {
+    dictionary_page_header = val;
+    __isset.dictionary_page_header = true;
+  }
+
+  void __set_data_page_header_v2(const DataPageHeaderV2& val) {
+    data_page_header_v2 = val;
+    __isset.data_page_header_v2 = true;
+  }
+
+  bool operator == (const PageHeader & rhs) const
+  {
+    if (!(type == rhs.type))
+      return false;
+    if (!(uncompressed_page_size == rhs.uncompressed_page_size))
+      return false;
+    if (!(compressed_page_size == rhs.compressed_page_size))
+      return false;
+    if (__isset.crc != rhs.__isset.crc)
+      return false;
+    else if (__isset.crc && !(crc == rhs.crc))
+      return false;
+    if (__isset.data_page_header != rhs.__isset.data_page_header)
+      return false;
+    else if (__isset.data_page_header && !(data_page_header == rhs.data_page_header))
+      return false;
+    if (__isset.index_page_header != rhs.__isset.index_page_header)
+      return false;
+    else if (__isset.index_page_header && !(index_page_header == rhs.index_page_header))
+      return false;
+    if (__isset.dictionary_page_header != rhs.__isset.dictionary_page_header)
+      return false;
+    else if (__isset.dictionary_page_header && !(dictionary_page_header == rhs.dictionary_page_header))
+      return false;
+    if (__isset.data_page_header_v2 != rhs.__isset.data_page_header_v2)
+      return false;
+    else if (__isset.data_page_header_v2 && !(data_page_header_v2 == rhs.data_page_header_v2))
+      return false;
+    return true;
+  }
+  bool operator != (const PageHeader &rhs) const {
+    return !(*this == rhs);
+  }
+
+  bool operator < (const PageHeader & ) const;
+
+  uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
+  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
+
+};
+
+void swap(PageHeader &a, PageHeader &b);
+
+typedef struct _KeyValue__isset {
+  _KeyValue__isset() : value(false) {}
+  bool value;
+} _KeyValue__isset;
+
+class KeyValue {
+ public:
+
+  static const char* ascii_fingerprint; // = "5B708A954C550ECA9C1A49D3C5CAFAB9";
+  static const uint8_t binary_fingerprint[16]; // = {0x5B,0x70,0x8A,0x95,0x4C,0x55,0x0E,0xCA,0x9C,0x1A,0x49,0xD3,0xC5,0xCA,0xFA,0xB9};
+
+  KeyValue() : key(), value() {
+  }
+
+  virtual ~KeyValue() throw() {}
+
+  std::string key;
+  std::string value;
+
+  _KeyValue__isset __isset;
+
+  void __set_key(const std::string& val) {
+    key = val;
+  }
+
+  void __set_value(const std::string& val) {
+    value = val;
+    __isset.value = true;
+  }
+
+  bool operator == (const KeyValue & rhs) const
+  {
+    if (!(key == rhs.key))
+      return false;
+    if (__isset.value != rhs.__isset.value)
+      return false;
+    else if (__isset.value && !(value == rhs.value))
+      return false;
+    return true;
+  }
+  bool operator != (const KeyValue &rhs) const {
+    return !(*this == rhs);
+  }
+
+  bool operator < (const KeyValue & ) const;
+
+  uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
+  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
+
+};
+
+void swap(KeyValue &a, KeyValue &b);
+
+
+class SortingColumn {
+ public:
+
+  static const char* ascii_fingerprint; // = "F079C2D58A783AD90F9BE05D10DBBC6F";
+  static const uint8_t binary_fingerprint[16]; // = {0xF0,0x79,0xC2,0xD5,0x8A,0x78,0x3A,0xD9,0x0F,0x9B,0xE0,0x5D,0x10,0xDB,0xBC,0x6F};
+
+  SortingColumn() : column_idx(0), descending(0), nulls_first(0) {
+  }
+
+  virtual ~SortingColumn() throw() {}
+
+  int32_t column_idx;
+  bool descending;
+  bool nulls_first;
+
+  void __set_column_idx(const int32_t val) {
+    column_idx = val;
+  }
+
+  void __set_descending(const bool val) {
+    descending = val;
+  }
+
+  void __set_nulls_first(const bool val) {
+    nulls_first = val;
+  }
+
+  bool operator == (const SortingColumn & rhs) const
+  {
+    if (!(column_idx == rhs.column_idx))
+      return false;
+    if (!(descending == rhs.descending))
+      return false;
+    if (!(nulls_first == rhs.nulls_first))
+      return false;
+    return true;
+  }
+  bool operator != (const SortingColumn &rhs) const {
+    return !(*this == rhs);
+  }
+
+  bool operator < (const SortingColumn & ) const;
+
+  uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
+  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
+
+};
+
+void swap(SortingColumn &a, SortingColumn &b);
+
+typedef struct _ColumnMetaData__isset {
+  _ColumnMetaData__isset() : key_value_metadata(false), index_page_offset(false), dictionary_page_offset(false), statistics(false) {}
+  bool key_value_metadata;
+  bool index_page_offset;
+  bool dictionary_page_offset;
+  bool statistics;
+} _ColumnMetaData__isset;
+
+class ColumnMetaData {
+ public:
+
+  static const char* ascii_fingerprint; // = "1AF797732BCB4465C6314FB29B86638D";
+  static const uint8_t binary_fingerprint[16]; // = {0x1A,0xF7,0x97,0x73,0x2B,0xCB,0x44,0x65,0xC6,0x31,0x4F,0xB2,0x9B,0x86,0x63,0x8D};
+
+  ColumnMetaData() : type((Type::type)0), codec((CompressionCodec::type)0), num_values(0), total_uncompressed_size(0), total_compressed_size(0), data_page_offset(0), index_page_offset(0), dictionary_page_offset(0) {
+  }
+
+  virtual ~ColumnMetaData() throw() {}
+
+  Type::type type;
+  std::vector<Encoding::type>  encodings;
+  std::vector<std::string>  path_in_schema;
+  CompressionCodec::type codec;
+  int64_t num_values;
+  int64_t total_uncompressed_size;
+  int64_t total_compressed_size;
+  std::vector<KeyValue>  key_value_metadata;
+  int64_t data_page_offset;
+  int64_t index_page_offset;
+  int64_t dictionary_page_offset;
+  Statistics statistics;
+
+  _ColumnMetaData__isset __isset;
+
+  void __set_type(const Type::type val) {
+    type = val;
+  }
+
+  void __set_encodings(const std::vector<Encoding::type> & val) {
+    encodings = val;
+  }
+
+  void __set_path_in_schema(const std::vector<std::string> & val) {
+    path_in_schema = val;
+  }
+
+  void __set_codec(const CompressionCodec::type val) {
+    codec = val;
+  }
+
+  void __set_num_values(const int64_t val) {
+    num_values = val;
+  }
+
+  void __set_total_uncompressed_size(const int64_t val) {
+    total_uncompressed_size = val;
+  }
+
+  void __set_total_compressed_size(const int64_t val) {
+    total_compressed_size = val;
+  }
+
+  void __set_key_value_metadata(const std::vector<KeyValue> & val) {
+    key_value_metadata = val;
+    __isset.key_value_metadata = true;
+  }
+
+  void __set_data_page_offset(const int64_t val) {
+    data_page_offset = val;
+  }
+
+  void __set_index_page_offset(const int64_t val) {
+    index_page_offset = val;
+    __isset.index_page_offset = true;
+  }
+
+  void __set_dictionary_page_offset(const int64_t val) {
+    dictionary_page_offset = val;
+    __isset.dictionary_page_offset = true;
+  }
+
+  void __set_statistics(const Statistics& val) {
+    statistics = val;
+    __isset.statistics = true;
+  }
+
+  bool operator == (const ColumnMetaData & rhs) const
+  {
+    if (!(type == rhs.type))
+      return false;
+    if (!(encodings == rhs.encodings))
+      return false;
+    if (!(path_in_schema == rhs.path_in_schema))
+      return false;
+    if (!(codec == rhs.codec))
+      return false;
+    if (!(num_values == rhs.num_values))
+      return false;
+    if (!(total_uncompressed_size == rhs.total_uncompressed_size))
+      return false;
+    if (!(total_compressed_size == rhs.total_compressed_size))
+      return false;
+    if (__isset.key_value_metadata != rhs.__isset.key_value_metadata)
+      return false;
+    else if (__isset.key_value_metadata && !(key_value_metadata == rhs.key_value_metadata))
+      return false;
+    if (!(data_page_offset == rhs.data_page_offset))
+      return false;
+    if (__isset.index_page_offset != rhs.__isset.index_page_offset)
+      return false;
+    else if (__isset.index_page_offset && !(index_page_offset == rhs.index_page_offset))
+      return false;
+    if (__isset.dictionary_page_offset != rhs.__isset.dictionary_page_offset)
+      return false;
+    else if (__isset.dictionary_page_offset && !(dictionary_page_offset == rhs.dictionary_page_offset))
+      return false;
+    if (__isset.statistics != rhs.__isset.statistics)
+      return false;
+    else if (__isset.statistics && !(statistics == rhs.statistics))
+      return false;
+    return true;
+  }
+  bool operator != (const ColumnMetaData &rhs) const {
+    return !(*this == rhs);
+  }
+
+  bool operator < (const ColumnMetaData & ) const;
+
+  uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
+  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
+
+};
+
+void swap(ColumnMetaData &a, ColumnMetaData &b);
+
+typedef struct _ColumnChunk__isset {
+  _ColumnChunk__isset() : file_path(false), meta_data(false) {}
+  bool file_path;
+  bool meta_data;
+} _ColumnChunk__isset;
+
+class ColumnChunk {
+ public:
+
+  static const char* ascii_fingerprint; // = "169FC47057EF3D82E2FACDDEC2641AE8";
+  static const uint8_t binary_fingerprint[16]; // = {0x16,0x9F,0xC4,0x70,0x57,0xEF,0x3D,0x82,0xE2,0xFA,0xCD,0xDE,0xC2,0x64,0x1A,0xE8};
+
+  ColumnChunk() : file_path(), file_offset(0) {
+  }
+
+  virtual ~ColumnChunk() throw() {}
+
+  std::string file_path;
+  int64_t file_offset;
+  ColumnMetaData meta_data;
+
+  _ColumnChunk__isset __isset;
+
+  void __set_file_path(const std::string& val) {
+    file_path = val;
+    __isset.file_path = true;
+  }
+
+  void __set_file_offset(const int64_t val) {
+    file_offset = val;
+  }
+
+  void __set_meta_data(const ColumnMetaData& val) {
+    meta_data = val;
+    __isset.meta_data = true;
+  }
+
+  bool operator == (const ColumnChunk & rhs) const
+  {
+    if (__isset.file_path != rhs.__isset.file_path)
+      return false;
+    else if (__isset.file_path && !(file_path == rhs.file_path))
+      return false;
+    if (!(file_offset == rhs.file_offset))
+      return false;
+    if (__isset.meta_data != rhs.__isset.meta_data)
+      return false;
+    else if (__isset.meta_data && !(meta_data == rhs.meta_data))
+      return false;
+    return true;
+  }
+  bool operator != (const ColumnChunk &rhs) const {
+    return !(*this == rhs);
+  }
+
+  bool operator < (const ColumnChunk & ) const;
+
+  uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
+  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
+
+};
+
+void swap(ColumnChunk &a, ColumnChunk &b);
+
+typedef struct _RowGroup__isset {
+  _RowGroup__isset() : sorting_columns(false) {}
+  bool sorting_columns;
+} _RowGroup__isset;
+
+class RowGroup {
+ public:
+
+  static const char* ascii_fingerprint; // = "DC7968627FA826DDC4C6C9BE773586C9";
+  static const uint8_t binary_fingerprint[16]; // = {0xDC,0x79,0x68,0x62,0x7F,0xA8,0x26,0xDD,0xC4,0xC6,0xC9,0xBE,0x77,0x35,0x86,0xC9};
+
+  RowGroup() : total_byte_size(0), num_rows(0) {
+  }
+
+  virtual ~RowGroup() throw() {}
+
+  std::vector<ColumnChunk>  columns;
+  int64_t total_byte_size;
+  int64_t num_rows;
+  std::vector<SortingColumn>  sorting_columns;
+
+  _RowGroup__isset __isset;
+
+  void __set_columns(const std::vector<ColumnChunk> & val) {
+    columns = val;
+  }
+
+  void __set_total_byte_size(const int64_t val) {
+    total_byte_size = val;
+  }
+
+  void __set_num_rows(const int64_t val) {
+    num_rows = val;
+  }
+
+  void __set_sorting_columns(const std::vector<SortingColumn> & val) {
+    sorting_columns = val;
+    __isset.sorting_columns = true;
+  }
+
+  bool operator == (const RowGroup & rhs) const
+  {
+    if (!(columns == rhs.columns))
+      return false;
+    if (!(total_byte_size == rhs.total_byte_size))
+      return false;
+    if (!(num_rows == rhs.num_rows))
+      return false;
+    if (__isset.sorting_columns != rhs.__isset.sorting_columns)
+      return false;
+    else if (__isset.sorting_columns && !(sorting_columns == rhs.sorting_columns))
+      return false;
+    return true;
+  }
+  bool operator != (const RowGroup &rhs) const {
+    return !(*this == rhs);
+  }
+
+  bool operator < (const RowGroup & ) const;
+
+  uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
+  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
+
+};
+
+void swap(RowGroup &a, RowGroup &b);
+
+typedef struct _FileMetaData__isset {
+  _FileMetaData__isset() : key_value_metadata(false), created_by(false) {}
+  bool key_value_metadata;
+  bool created_by;
+} _FileMetaData__isset;
+
+class FileMetaData {
+ public:
+
+  static const char* ascii_fingerprint; // = "44DC7D83A66D54A7B7892A985C4125C9";
+  static const uint8_t binary_fingerprint[16]; // = {0x44,0xDC,0x7D,0x83,0xA6,0x6D,0x54,0xA7,0xB7,0x89,0x2A,0x98,0x5C,0x41,0x25,0xC9};
+
+  FileMetaData() : version(0), num_rows(0), created_by() {
+  }
+
+  virtual ~FileMetaData() throw() {}
+
+  int32_t version;
+  std::vector<SchemaElement>  schema;
+  int64_t num_rows;
+  std::vector<RowGroup>  row_groups;
+  std::vector<KeyValue>  key_value_metadata;
+  std::string created_by;
+
+  _FileMetaData__isset __isset;
+
+  void __set_version(const int32_t val) {
+    version = val;
+  }
+
+  void __set_schema(const std::vector<SchemaElement> & val) {
+    schema = val;
+  }
+
+  void __set_num_rows(const int64_t val) {
+    num_rows = val;
+  }
+
+  void __set_row_groups(const std::vector<RowGroup> & val) {
+    row_groups = val;
+  }
+
+  void __set_key_value_metadata(const std::vector<KeyValue> & val) {
+    key_value_metadata = val;
+    __isset.key_value_metadata = true;
+  }
+
+  void __set_created_by(const std::string& val) {
+    created_by = val;
+    __isset.created_by = true;
+  }
+
+  bool operator == (const FileMetaData & rhs) const
+  {
+    if (!(version == rhs.version))
+      return false;
+    if (!(schema == rhs.schema))
+      return false;
+    if (!(num_rows == rhs.num_rows))
+      return false;
+    if (!(row_groups == rhs.row_groups))
+      return false;
+    if (__isset.key_value_metadata != rhs.__isset.key_value_metadata)
+      return false;
+    else if (__isset.key_value_metadata && !(key_value_metadata == rhs.key_value_metadata))
+      return false;
+    if (__isset.created_by != rhs.__isset.created_by)
+      return false;
+    else if (__isset.created_by && !(created_by == rhs.created_by))
+      return false;
+    return true;
+  }
+  bool operator != (const FileMetaData &rhs) const {
+    return !(*this == rhs);
+  }
+
+  bool operator < (const FileMetaData & ) const;
+
+  uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
+  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
+
+};
+
+void swap(FileMetaData &a, FileMetaData &b);
+
+} // namespace
+
+#endif

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/parquet/util/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/src/parquet/util/CMakeLists.txt b/src/parquet/util/CMakeLists.txt
new file mode 100644
index 0000000..1a5de97
--- /dev/null
+++ b/src/parquet/util/CMakeLists.txt
@@ -0,0 +1,24 @@
+# Copyright 2015 Cloudera Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Headers: util
+install(FILES
+  bit-stream-utils.h
+  bit-stream-utils.inline.h
+  bit-util.h
+  compiler-util.h
+  logging.h
+  rle-encoding.h
+  stopwatch.h
+  DESTINATION include/parquet/util)

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/parquet/util/bit-stream-utils.h
----------------------------------------------------------------------
diff --git a/src/parquet/util/bit-stream-utils.h b/src/parquet/util/bit-stream-utils.h
new file mode 100644
index 0000000..7fba30a
--- /dev/null
+++ b/src/parquet/util/bit-stream-utils.h
@@ -0,0 +1,147 @@
+// Copyright 2012 Cloudera Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+#ifndef PARQUET_UTIL_BIT_STREAM_UTILS_H
+#define PARQUET_UTIL_BIT_STREAM_UTILS_H
+
+#include <string.h>
+#include <algorithm>
+#include <cstdint>
+
+#include "parquet/util/compiler-util.h"
+#include "parquet/util/bit-util.h"
+#include "parquet/util/logging.h"
+
+namespace parquet_cpp {
+
+// Utility class to write bit/byte streams.  This class can write data to either be
+// bit packed or byte aligned (and a single stream that has a mix of both).
+// This class does not allocate memory.
+class BitWriter {
+ public:
+  // buffer: buffer to write bits to.  Buffer should be preallocated with
+  // 'buffer_len' bytes.
+  BitWriter(uint8_t* buffer, int buffer_len) :
+      buffer_(buffer),
+      max_bytes_(buffer_len) {
+    Clear();
+  }
+
+  void Clear() {
+    buffered_values_ = 0;
+    byte_offset_ = 0;
+    bit_offset_ = 0;
+  }
+
+  // The number of current bytes written, including the current byte (i.e. may include a
+  // fraction of a byte). Includes buffered values.
+  int bytes_written() const { return byte_offset_ + BitUtil::Ceil(bit_offset_, 8); }
+  uint8_t* buffer() const { return buffer_; }
+  int buffer_len() const { return max_bytes_; }
+
+  // Writes a value to buffered_values_, flushing to buffer_ if necessary.  This is bit
+  // packed.  Returns false if there was not enough space. num_bits must be <= 32.
+  bool PutValue(uint64_t v, int num_bits);
+
+  // Writes v to the next aligned byte using num_bytes. If T is larger than num_bytes, the
+  // extra high-order bytes will be ignored. Returns false if there was not enough space.
+  template<typename T>
+  bool PutAligned(T v, int num_bytes);
+
+  // Write a Vlq encoded int to the buffer.  Returns false if there was not enough
+  // room.  The value is written byte aligned.
+  // For more details on vlq:
+  // en.wikipedia.org/wiki/Variable-length_quantity
+  bool PutVlqInt(uint32_t v);
+  bool PutZigZagVlqInt(int32_t v);
+
+  // Get a pointer to the next aligned byte and advance the underlying buffer
+  // by num_bytes.
+  // Returns NULL if there was not enough space.
+  uint8_t* GetNextBytePtr(int num_bytes = 1);
+
+  // Flushes all buffered values to the buffer. Call this when done writing to the buffer.
+  // If 'align' is true, buffered_values_ is reset and any future writes will be written
+  // to the next byte boundary.
+  void Flush(bool align = false);
+
+ private:
+  uint8_t* buffer_;
+  int max_bytes_;
+
+  // Bit-packed values are initially written to this variable before being memcpy'd to
+  // buffer_. This is faster than writing values byte by byte directly to buffer_.
+  uint64_t buffered_values_;
+
+  int byte_offset_;       // Offset in buffer_
+  int bit_offset_;        // Offset in buffered_values_
+};
+
+// Utility class to read bit/byte stream.  This class can read bits or bytes
+// that are either byte aligned or not.  It also has utilities to read multiple
+// bytes in one read (e.g. encoded int).
+class BitReader {
+ public:
+  // 'buffer' is the buffer to read from.  The buffer's length is 'buffer_len'.
+  BitReader(const uint8_t* buffer, int buffer_len) :
+      buffer_(buffer),
+      max_bytes_(buffer_len),
+      byte_offset_(0),
+      bit_offset_(0) {
+    int num_bytes = std::min(8, max_bytes_ - byte_offset_);
+    memcpy(&buffered_values_, buffer_ + byte_offset_, num_bytes);
+  }
+
+  BitReader() : buffer_(NULL), max_bytes_(0) {}
+
+  // Gets the next value from the buffer.  Returns true if 'v' could be read or false if
+  // there are not enough bytes left. num_bits must be <= 32.
+  template<typename T>
+  bool GetValue(int num_bits, T* v);
+
+  // Reads a 'num_bytes'-sized value from the buffer and stores it in 'v'. T needs to be a
+  // little-endian native type and big enough to store 'num_bytes'. The value is assumed
+  // to be byte-aligned so the stream will be advanced to the start of the next byte
+  // before 'v' is read. Returns false if there are not enough bytes left.
+  template<typename T>
+  bool GetAligned(int num_bytes, T* v);
+
+  // Reads a vlq encoded int from the stream.  The encoded int must start at the
+  // beginning of a byte. Return false if there were not enough bytes in the buffer.
+  bool GetVlqInt(uint64_t* v);
+  bool GetZigZagVlqInt(int64_t* v);
+
+  // Returns the number of bytes left in the stream, not including the current byte (i.e.,
+  // there may be an additional fraction of a byte).
+  int bytes_left() { return max_bytes_ - (byte_offset_ + BitUtil::Ceil(bit_offset_, 8)); }
+
+  // Maximum byte length of a vlq encoded int
+  static const int MAX_VLQ_BYTE_LEN = 5;
+
+ private:
+  const uint8_t* buffer_;
+  int max_bytes_;
+
+  // Bytes are memcpy'd from buffer_ and values are read from this variable. This is
+  // faster than reading values byte by byte directly from buffer_.
+  uint64_t buffered_values_;
+
+  int byte_offset_;       // Offset in buffer_
+  int bit_offset_;        // Offset in buffered_values_
+};
+
+} // namespace parquet_cpp
+
+#endif

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/parquet/util/bit-stream-utils.inline.h
----------------------------------------------------------------------
diff --git a/src/parquet/util/bit-stream-utils.inline.h b/src/parquet/util/bit-stream-utils.inline.h
new file mode 100644
index 0000000..8678e50
--- /dev/null
+++ b/src/parquet/util/bit-stream-utils.inline.h
@@ -0,0 +1,164 @@
+// Copyright 2012 Cloudera Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+#ifndef PARQUET_UTIL_BIT_STREAM_UTILS_INLINE_H
+#define PARQUET_UTIL_BIT_STREAM_UTILS_INLINE_H
+
+#include "parquet/util/bit-stream-utils.h"
+
+namespace parquet_cpp {
+
+inline bool BitWriter::PutValue(uint64_t v, int num_bits) {
+  // TODO: revisit this limit if necessary (can be raised to 64 by fixing some edge cases)
+  DCHECK_LE(num_bits, 32);
+  DCHECK_EQ(v >> num_bits, 0) << "v = " << v << ", num_bits = " << num_bits;
+
+  if (UNLIKELY(byte_offset_ * 8 + bit_offset_ + num_bits > max_bytes_ * 8)) return false;
+
+  buffered_values_ |= v << bit_offset_;
+  bit_offset_ += num_bits;
+
+  if (UNLIKELY(bit_offset_ >= 64)) {
+    // Flush buffered_values_ and write out bits of v that did not fit
+    memcpy(buffer_ + byte_offset_, &buffered_values_, 8);
+    buffered_values_ = 0;
+    byte_offset_ += 8;
+    bit_offset_ -= 64;
+    buffered_values_ = v >> (num_bits - bit_offset_);
+  }
+  DCHECK_LT(bit_offset_, 64);
+  return true;
+}
+
+inline void BitWriter::Flush(bool align) {
+  int num_bytes = BitUtil::Ceil(bit_offset_, 8);
+  DCHECK_LE(byte_offset_ + num_bytes, max_bytes_);
+  memcpy(buffer_ + byte_offset_, &buffered_values_, num_bytes);
+
+  if (align) {
+    buffered_values_ = 0;
+    byte_offset_ += num_bytes;
+    bit_offset_ = 0;
+  }
+}
+
+inline uint8_t* BitWriter::GetNextBytePtr(int num_bytes) {
+  Flush(/* align */ true);
+  DCHECK_LE(byte_offset_, max_bytes_);
+  if (byte_offset_ + num_bytes > max_bytes_) return NULL;
+  uint8_t* ptr = buffer_ + byte_offset_;
+  byte_offset_ += num_bytes;
+  return ptr;
+}
+
+template<typename T>
+inline bool BitWriter::PutAligned(T val, int num_bytes) {
+  uint8_t* ptr = GetNextBytePtr(num_bytes);
+  if (ptr == NULL) return false;
+  memcpy(ptr, &val, num_bytes);
+  return true;
+}
+
+inline bool BitWriter::PutVlqInt(uint32_t v) {
+  bool result = true;
+  while ((v & 0xFFFFFF80) != 0L) {
+    result &= PutAligned<uint8_t>((v & 0x7F) | 0x80, 1);
+    v >>= 7;
+  }
+  result &= PutAligned<uint8_t>(v & 0x7F, 1);
+  return result;
+}
+
+inline bool BitWriter::PutZigZagVlqInt(int32_t v) {
+  uint32_t u = (v << 1) ^ (v >> 31);
+  return PutVlqInt(u);
+}
+
+template<typename T>
+inline bool BitReader::GetValue(int num_bits, T* v) {
+  // TODO: revisit this limit if necessary
+  DCHECK_LE(num_bits, 32);
+  DCHECK_LE(num_bits, sizeof(T) * 8);
+
+  if (UNLIKELY(byte_offset_ * 8 + bit_offset_ + num_bits > max_bytes_ * 8)) return false;
+
+  *v = BitUtil::TrailingBits(buffered_values_, bit_offset_ + num_bits) >> bit_offset_;
+
+  bit_offset_ += num_bits;
+  if (bit_offset_ >= 64) {
+    byte_offset_ += 8;
+    bit_offset_ -= 64;
+
+    int bytes_remaining = max_bytes_ - byte_offset_;
+    if (LIKELY(bytes_remaining >= 8)) {
+      memcpy(&buffered_values_, buffer_ + byte_offset_, 8);
+    } else {
+      memcpy(&buffered_values_, buffer_ + byte_offset_, bytes_remaining);
+    }
+
+    // Read bits of v that crossed into new buffered_values_
+    *v |= BitUtil::TrailingBits(buffered_values_, bit_offset_)
+          << (num_bits - bit_offset_);
+  }
+  DCHECK_LE(bit_offset_, 64);
+  return true;
+}
+
+template<typename T>
+inline bool BitReader::GetAligned(int num_bytes, T* v) {
+  DCHECK_LE(num_bytes, sizeof(T));
+  int bytes_read = BitUtil::Ceil(bit_offset_, 8);
+  if (UNLIKELY(byte_offset_ + bytes_read + num_bytes > max_bytes_)) return false;
+
+  // Advance byte_offset to next unread byte and read num_bytes
+  byte_offset_ += bytes_read;
+  memcpy(v, buffer_ + byte_offset_, num_bytes);
+  byte_offset_ += num_bytes;
+
+  // Reset buffered_values_
+  bit_offset_ = 0;
+  int bytes_remaining = max_bytes_ - byte_offset_;
+  if (LIKELY(bytes_remaining >= 8)) {
+    memcpy(&buffered_values_, buffer_ + byte_offset_, 8);
+  } else {
+    memcpy(&buffered_values_, buffer_ + byte_offset_, bytes_remaining);
+  }
+  return true;
+}
+
+inline bool BitReader::GetVlqInt(uint64_t* v) {
+  *v = 0;
+  int shift = 0;
+  int num_bytes = 0;
+  uint8_t byte = 0;
+  do {
+    if (!GetAligned<uint8_t>(1, &byte)) return false;
+    *v |= (byte & 0x7F) << shift;
+    shift += 7;
+    DCHECK_LE(++num_bytes, MAX_VLQ_BYTE_LEN);
+  } while ((byte & 0x80) != 0);
+  return true;
+}
+
+inline bool BitReader::GetZigZagVlqInt(int64_t* v) {
+  uint64_t u;
+  if (!GetVlqInt(&u)) return false;
+  *reinterpret_cast<uint64_t*>(v) = (u >> 1) ^ -(u & 1);
+  return true;
+}
+
+} // namespace parquet_cpp
+
+#endif // PARQUET_UTIL_BIT_STREAM_UTILS_INLINE_H

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/parquet/util/bit-util.h
----------------------------------------------------------------------
diff --git a/src/parquet/util/bit-util.h b/src/parquet/util/bit-util.h
new file mode 100644
index 0000000..3fbdbbe
--- /dev/null
+++ b/src/parquet/util/bit-util.h
@@ -0,0 +1,174 @@
+// Copyright 2012 Cloudera Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+#ifndef PARQUET_UTIL_BIT_UTIL_H
+#define PARQUET_UTIL_BIT_UTIL_H
+
+#if defined(__APPLE__)
+  #include <machine/endian.h>
+#else
+  #include <endian.h>
+#endif
+
+#include "parquet/util/compiler-util.h"
+#include "parquet/util/logging.h"
+
+namespace parquet_cpp {
+
+// Utility class to do standard bit tricks
+// TODO: is this in boost or something else like that?
+class BitUtil {
+ public:
+  // Returns the ceil of value/divisor
+  static inline int Ceil(int value, int divisor) {
+    return value / divisor + (value % divisor != 0);
+  }
+
+  // Returns 'value' rounded up to the nearest multiple of 'factor'
+  static inline int RoundUp(int value, int factor) {
+    return (value + (factor - 1)) / factor * factor;
+  }
+
+  // Returns 'value' rounded down to the nearest multiple of 'factor'
+  static inline int RoundDown(int value, int factor) {
+    return (value / factor) * factor;
+  }
+
+  // Returns the number of set bits in x
+  static inline int Popcount(uint64_t x) {
+    int count = 0;
+    for (; x != 0; ++count) x &= x-1;
+    return count;
+  }
+
+  // Returns the 'num_bits' least-significant bits of 'v'.
+  static inline uint64_t TrailingBits(uint64_t v, int num_bits) {
+    if (UNLIKELY(num_bits == 0)) return 0;
+    if (UNLIKELY(num_bits >= 64)) return v;
+    int n = 64 - num_bits;
+    return (v << n) >> n;
+  }
+
+  // Returns ceil(log2(x)).
+  // TODO: this could be faster if we use __builtin_clz.  Fix this if this ever shows up
+  // in a hot path.
+  static inline int Log2(uint64_t x) {
+    if (x == 0) return 0;
+    // Compute result = ceil(log2(x))
+    //                = floor(log2(x - 1)) + 1, for x > 1
+    // by finding the position of the most significant bit (1-indexed) of x - 1
+    // (floor(log2(n)) = MSB(n) (0-indexed))
+    --x;
+    int result = 1;
+    while (x >>= 1) ++result;
+    return result;
+  }
+
+  // Returns the minimum number of bits needed to represent the value of 'x'
+  static inline int NumRequiredBits(uint64_t x) {
+    for (int i = 63; i >= 0; --i) {
+      if (x & 1L << i) return i + 1;
+    }
+    return 0;
+  }
+
+  // Swaps the byte order (i.e. endianess)
+  static inline int64_t ByteSwap(int64_t value) {
+    return __builtin_bswap64(value);
+  }
+  static inline uint64_t ByteSwap(uint64_t value) {
+    return static_cast<uint64_t>(__builtin_bswap64(value));
+  }
+  static inline int32_t ByteSwap(int32_t value) {
+    return __builtin_bswap32(value);
+  }
+  static inline uint32_t ByteSwap(uint32_t value) {
+    return static_cast<uint32_t>(__builtin_bswap32(value));
+  }
+  static inline int16_t ByteSwap(int16_t value) {
+    return (((value >> 8) & 0xff) | ((value & 0xff) << 8));
+  }
+  static inline uint16_t ByteSwap(uint16_t value) {
+    return static_cast<uint16_t>(ByteSwap(static_cast<int16_t>(value)));
+  }
+
+  // Write the swapped bytes into dst. Src and st cannot overlap.
+  static inline void ByteSwap(void* dst, const void* src, int len) {
+    switch (len) {
+      case 1:
+        *reinterpret_cast<int8_t*>(dst) = *reinterpret_cast<const int8_t*>(src);
+        return;
+      case 2:
+        *reinterpret_cast<int16_t*>(dst) =
+            ByteSwap(*reinterpret_cast<const int16_t*>(src));
+        return;
+      case 4:
+        *reinterpret_cast<int32_t*>(dst) =
+            ByteSwap(*reinterpret_cast<const int32_t*>(src));
+        return;
+      case 8:
+        *reinterpret_cast<int64_t*>(dst) =
+            ByteSwap(*reinterpret_cast<const int64_t*>(src));
+        return;
+      default: break;
+    }
+
+    uint8_t* d = reinterpret_cast<uint8_t*>(dst);
+    const uint8_t* s = reinterpret_cast<const uint8_t*>(src);
+    for (int i = 0; i < len; ++i) {
+      d[i] = s[len - i - 1];
+    }
+  }
+
+  // Converts to big endian format (if not already in big endian) from the
+  // machine's native endian format.
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+  static inline int64_t  ToBigEndian(int64_t value)  { return ByteSwap(value); }
+  static inline uint64_t ToBigEndian(uint64_t value) { return ByteSwap(value); }
+  static inline int32_t  ToBigEndian(int32_t value)  { return ByteSwap(value); }
+  static inline uint32_t ToBigEndian(uint32_t value) { return ByteSwap(value); }
+  static inline int16_t  ToBigEndian(int16_t value)  { return ByteSwap(value); }
+  static inline uint16_t ToBigEndian(uint16_t value) { return ByteSwap(value); }
+#else
+  static inline int64_t  ToBigEndian(int64_t val)  { return val; }
+  static inline uint64_t ToBigEndian(uint64_t val) { return val; }
+  static inline int32_t  ToBigEndian(int32_t val)  { return val; }
+  static inline uint32_t ToBigEndian(uint32_t val) { return val; }
+  static inline int16_t  ToBigEndian(int16_t val)  { return val; }
+  static inline uint16_t ToBigEndian(uint16_t val) { return val; }
+#endif
+
+  // Converts from big endian format to the machine's native endian format.
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+  static inline int64_t  FromBigEndian(int64_t value)  { return ByteSwap(value); }
+  static inline uint64_t FromBigEndian(uint64_t value) { return ByteSwap(value); }
+  static inline int32_t  FromBigEndian(int32_t value)  { return ByteSwap(value); }
+  static inline uint32_t FromBigEndian(uint32_t value) { return ByteSwap(value); }
+  static inline int16_t  FromBigEndian(int16_t value)  { return ByteSwap(value); }
+  static inline uint16_t FromBigEndian(uint16_t value) { return ByteSwap(value); }
+#else
+  static inline int64_t  FromBigEndian(int64_t val)  { return val; }
+  static inline uint64_t FromBigEndian(uint64_t val) { return val; }
+  static inline int32_t  FromBigEndian(int32_t val)  { return val; }
+  static inline uint32_t FromBigEndian(uint32_t val) { return val; }
+  static inline int16_t  FromBigEndian(int16_t val)  { return val; }
+  static inline uint16_t FromBigEndian(uint16_t val) { return val; }
+#endif
+
+};
+
+} // namespace parquet_cpp
+
+#endif // PARQUET_UTIL_BIT_UTIL_H

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/parquet/util/compiler-util.h
----------------------------------------------------------------------
diff --git a/src/parquet/util/compiler-util.h b/src/parquet/util/compiler-util.h
new file mode 100644
index 0000000..6425247
--- /dev/null
+++ b/src/parquet/util/compiler-util.h
@@ -0,0 +1,37 @@
+// Copyright 2012 Cloudera Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+#ifndef PARQUET_UTIL_COMPILER_UTIL_H
+#define PARQUET_UTIL_COMPILER_UTIL_H
+
+// Compiler hint that this branch is likely or unlikely to
+// be taken. Take from the "What all programmers should know
+// about memory" paper.
+// example: if (LIKELY(size > 0)) { ... }
+// example: if (UNLIKELY(!status.ok())) { ... }
+#ifdef LIKELY
+#undef LIKELY
+#endif
+
+#ifdef UNLIKELY
+#undef UNLIKELY
+#endif
+
+#define LIKELY(expr) __builtin_expect(!!(expr), 1)
+#define UNLIKELY(expr) __builtin_expect(!!(expr), 0)
+
+#define PREFETCH(addr) __builtin_prefetch(addr)
+
+#endif // PARQUET_UTIL_COMPILER_UTIL_H

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/parquet/util/logging.h
----------------------------------------------------------------------
diff --git a/src/parquet/util/logging.h b/src/parquet/util/logging.h
new file mode 100644
index 0000000..c6e6303
--- /dev/null
+++ b/src/parquet/util/logging.h
@@ -0,0 +1,31 @@
+// Copyright 2012 Cloudera Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+#ifndef PARQUET_UTIL_LOGGING_H
+#define PARQUET_UTIL_LOGGING_H
+
+#include <iostream>
+
+#define DCHECK(condition) while (false) std::cout
+#define DCHECK_EQ(a, b) while (false) std::cout
+#define DCHECK_NE(a, b) while (false) std::cout
+#define DCHECK_GT(a, b) while (false) std::cout
+#define DCHECK_LT(a, b) while (false) std::cout
+#define DCHECK_GE(a, b) while (false) std::cout
+#define DCHECK_LE(a, b) while (false) std::cout
+// Similar to how glog defines DCHECK for release.
+#define LOG(level) while (false) std::cout
+
+#endif

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/parquet/util/rle-encoding.h
----------------------------------------------------------------------
diff --git a/src/parquet/util/rle-encoding.h b/src/parquet/util/rle-encoding.h
new file mode 100644
index 0000000..b074d6d
--- /dev/null
+++ b/src/parquet/util/rle-encoding.h
@@ -0,0 +1,419 @@
+// Copyright 2012 Cloudera Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef PARQUET_UTIL_RLE_ENCODING_H
+#define PARQUET_UTIL_RLE_ENCODING_H
+
+#include <math.h>
+#include <algorithm>
+
+#include "parquet/util/compiler-util.h"
+#include "parquet/util/bit-stream-utils.inline.h"
+#include "parquet/util/bit-util.h"
+#include "parquet/util/logging.h"
+
+namespace parquet_cpp {
+
+// Utility classes to do run length encoding (RLE) for fixed bit width values.  If runs
+// are sufficiently long, RLE is used, otherwise, the values are just bit-packed
+// (literal encoding).
+// For both types of runs, there is a byte-aligned indicator which encodes the length
+// of the run and the type of the run.
+// This encoding has the benefit that when there aren't any long enough runs, values
+// are always decoded at fixed (can be precomputed) bit offsets OR both the value and
+// the run length are byte aligned. This allows for very efficient decoding
+// implementations.
+// The encoding is:
+//    encoded-block := run*
+//    run := literal-run | repeated-run
+//    literal-run := literal-indicator < literal bytes >
+//    repeated-run := repeated-indicator < repeated value. padded to byte boundary >
+//    literal-indicator := varint_encode( number_of_groups << 1 | 1)
+//    repeated-indicator := varint_encode( number_of_repetitions << 1 )
+//
+// Each run is preceded by a varint. The varint's least significant bit is
+// used to indicate whether the run is a literal run or a repeated run. The rest
+// of the varint is used to determine the length of the run (eg how many times the
+// value repeats).
+//
+// In the case of literal runs, the run length is always a multiple of 8 (i.e. encode
+// in groups of 8), so that no matter the bit-width of the value, the sequence will end
+// on a byte boundary without padding.
+// Given that we know it is a multiple of 8, we store the number of 8-groups rather than
+// the actual number of encoded ints. (This means that the total number of encoded values
+// can not be determined from the encoded data, since the number of values in the last
+// group may not be a multiple of 8). For the last group of literal runs, we pad
+// the group to 8 with zeros. This allows for 8 at a time decoding on the read side
+// without the need for additional checks.
+//
+// There is a break-even point when it is more storage efficient to do run length
+// encoding.  For 1 bit-width values, that point is 8 values.  They require 2 bytes
+// for both the repeated encoding or the literal encoding.  This value can always
+// be computed based on the bit-width.
+// TODO: think about how to use this for strings.  The bit packing isn't quite the same.
+//
+// Examples with bit-width 1 (eg encoding booleans):
+// ----------------------------------------
+// 100 1s followed by 100 0s:
+// <varint(100 << 1)> <1, padded to 1 byte>  <varint(100 << 1)> <0, padded to 1 byte>
+//  - (total 4 bytes)
+//
+// alternating 1s and 0s (200 total):
+// 200 ints = 25 groups of 8
+// <varint((25 << 1) | 1)> <25 bytes of values, bitpacked>
+// (total 26 bytes, 1 byte overhead)
+//
+
+// Decoder class for RLE encoded data.
+class RleDecoder {
+ public:
+  // Create a decoder object. buffer/buffer_len is the decoded data.
+  // bit_width is the width of each value (before encoding).
+  RleDecoder(const uint8_t* buffer, int buffer_len, int bit_width)
+    : bit_reader_(buffer, buffer_len),
+      bit_width_(bit_width),
+      current_value_(0),
+      repeat_count_(0),
+      literal_count_(0) {
+    DCHECK_GE(bit_width_, 0);
+    DCHECK_LE(bit_width_, 64);
+  }
+
+  RleDecoder() {}
+
+  // Gets the next value.  Returns false if there are no more.
+  template<typename T>
+  bool Get(T* val);
+
+ private:
+  BitReader bit_reader_;
+  int bit_width_;
+  uint64_t current_value_;
+  uint32_t repeat_count_;
+  uint32_t literal_count_;
+};
+
+// Class to incrementally build the rle data.   This class does not allocate any memory.
+// The encoding has two modes: encoding repeated runs and literal runs.
+// If the run is sufficiently short, it is more efficient to encode as a literal run.
+// This class does so by buffering 8 values at a time.  If they are not all the same
+// they are added to the literal run.  If they are the same, they are added to the
+// repeated run.  When we switch modes, the previous run is flushed out.
+class RleEncoder {
+ public:
+  // buffer/buffer_len: preallocated output buffer.
+  // bit_width: max number of bits for value.
+  // TODO: consider adding a min_repeated_run_length so the caller can control
+  // when values should be encoded as repeated runs.  Currently this is derived
+  // based on the bit_width, which can determine a storage optimal choice.
+  // TODO: allow 0 bit_width (and have dict encoder use it)
+  RleEncoder(uint8_t* buffer, int buffer_len, int bit_width)
+    : bit_width_(bit_width),
+      bit_writer_(buffer, buffer_len) {
+    DCHECK_GE(bit_width_, 1);
+    DCHECK_LE(bit_width_, 64);
+    max_run_byte_size_ = MinBufferSize(bit_width);
+    DCHECK_GE(buffer_len, max_run_byte_size_) << "Input buffer not big enough.";
+    Clear();
+  }
+
+  // Returns the minimum buffer size needed to use the encoder for 'bit_width'
+  // This is the maximum length of a single run for 'bit_width'.
+  // It is not valid to pass a buffer less than this length.
+  static int MinBufferSize(int bit_width) {
+    // 1 indicator byte and MAX_VALUES_PER_LITERAL_RUN 'bit_width' values.
+    int max_literal_run_size = 1 +
+        BitUtil::Ceil(MAX_VALUES_PER_LITERAL_RUN * bit_width, 8);
+    // Up to MAX_VLQ_BYTE_LEN indicator and a single 'bit_width' value.
+    int max_repeated_run_size = BitReader::MAX_VLQ_BYTE_LEN + BitUtil::Ceil(bit_width, 8);
+    return std::max(max_literal_run_size, max_repeated_run_size);
+  }
+
+  // Returns the maximum byte size it could take to encode 'num_values'.
+  static int MaxBufferSize(int bit_width, int num_values) {
+    int bytes_per_run = BitUtil::Ceil(bit_width * MAX_VALUES_PER_LITERAL_RUN, 8.0);
+    int num_runs = BitUtil::Ceil(num_values, MAX_VALUES_PER_LITERAL_RUN);
+    int literal_max_size = num_runs + num_runs * bytes_per_run;
+    int min_run_size = MinBufferSize(bit_width);
+    return std::max(min_run_size, literal_max_size) + min_run_size;
+  }
+
+  // Encode value.  Returns true if the value fits in buffer, false otherwise.
+  // This value must be representable with bit_width_ bits.
+  bool Put(uint64_t value);
+
+  // Flushes any pending values to the underlying buffer.
+  // Returns the total number of bytes written
+  int Flush();
+
+  // Resets all the state in the encoder.
+  void Clear();
+
+  // Returns pointer to underlying buffer
+  uint8_t* buffer() { return bit_writer_.buffer(); }
+  int32_t len() { return bit_writer_.bytes_written(); }
+
+ private:
+  // Flushes any buffered values.  If this is part of a repeated run, this is largely
+  // a no-op.
+  // If it is part of a literal run, this will call FlushLiteralRun, which writes
+  // out the buffered literal values.
+  // If 'done' is true, the current run would be written even if it would normally
+  // have been buffered more.  This should only be called at the end, when the
+  // encoder has received all values even if it would normally continue to be
+  // buffered.
+  void FlushBufferedValues(bool done);
+
+  // Flushes literal values to the underlying buffer.  If update_indicator_byte,
+  // then the current literal run is complete and the indicator byte is updated.
+  void FlushLiteralRun(bool update_indicator_byte);
+
+  // Flushes a repeated run to the underlying buffer.
+  void FlushRepeatedRun();
+
+  // Checks and sets buffer_full_. This must be called after flushing a run to
+  // make sure there are enough bytes remaining to encode the next run.
+  void CheckBufferFull();
+
+  // The maximum number of values in a single literal run
+  // (number of groups encodable by a 1-byte indicator * 8)
+  static const int MAX_VALUES_PER_LITERAL_RUN = (1 << 6) * 8;
+
+  // Number of bits needed to encode the value.
+  const int bit_width_;
+
+  // Underlying buffer.
+  BitWriter bit_writer_;
+
+  // If true, the buffer is full and subsequent Put()'s will fail.
+  bool buffer_full_;
+
+  // The maximum byte size a single run can take.
+  int max_run_byte_size_;
+
+  // We need to buffer at most 8 values for literals.  This happens when the
+  // bit_width is 1 (so 8 values fit in one byte).
+  // TODO: generalize this to other bit widths
+  int64_t buffered_values_[8];
+
+  // Number of values in buffered_values_
+  int num_buffered_values_;
+
+  // The current (also last) value that was written and the count of how
+  // many times in a row that value has been seen.  This is maintained even
+  // if we are in a literal run.  If the repeat_count_ get high enough, we switch
+  // to encoding repeated runs.
+  int64_t current_value_;
+  int repeat_count_;
+
+  // Number of literals in the current run.  This does not include the literals
+  // that might be in buffered_values_.  Only after we've got a group big enough
+  // can we decide if they should part of the literal_count_ or repeat_count_
+  int literal_count_;
+
+  // Pointer to a byte in the underlying buffer that stores the indicator byte.
+  // This is reserved as soon as we need a literal run but the value is written
+  // when the literal run is complete.
+  uint8_t* literal_indicator_byte_;
+};
+
+template<typename T>
+inline bool RleDecoder::Get(T* val) {
+  if (UNLIKELY(literal_count_ == 0 && repeat_count_ == 0)) {
+    // Read the next run's indicator int, it could be a literal or repeated run
+    // The int is encoded as a vlq-encoded value.
+    uint64_t indicator_value = 0;
+    bool result = bit_reader_.GetVlqInt(&indicator_value);
+    if (!result) return false;
+
+    // lsb indicates if it is a literal run or repeated run
+    bool is_literal = indicator_value & 1;
+    if (is_literal) {
+      literal_count_ = (indicator_value >> 1) * 8;
+    } else {
+      repeat_count_ = indicator_value >> 1;
+      bool result = bit_reader_.GetAligned<T>(
+          BitUtil::Ceil(bit_width_, 8), reinterpret_cast<T*>(&current_value_));
+      DCHECK(result);
+    }
+  }
+
+  if (LIKELY(repeat_count_ > 0)) {
+    *val = current_value_;
+    --repeat_count_;
+  } else {
+    DCHECK(literal_count_ > 0);
+    bool result = bit_reader_.GetValue(bit_width_, val);
+    DCHECK(result);
+    --literal_count_;
+  }
+
+  return true;
+}
+
+// This function buffers input values 8 at a time.  After seeing all 8 values,
+// it decides whether they should be encoded as a literal or repeated run.
+inline bool RleEncoder::Put(uint64_t value) {
+  DCHECK(bit_width_ == 64 || value < (1LL << bit_width_));
+  if (UNLIKELY(buffer_full_)) return false;
+
+  if (LIKELY(current_value_ == value)) {
+    ++repeat_count_;
+    if (repeat_count_ > 8) {
+      // This is just a continuation of the current run, no need to buffer the
+      // values.
+      // Note that this is the fast path for long repeated runs.
+      return true;
+    }
+  } else {
+    if (repeat_count_ >= 8) {
+      // We had a run that was long enough but it has ended.  Flush the
+      // current repeated run.
+      DCHECK_EQ(literal_count_, 0);
+      FlushRepeatedRun();
+    }
+    repeat_count_ = 1;
+    current_value_ = value;
+  }
+
+  buffered_values_[num_buffered_values_] = value;
+  if (++num_buffered_values_ == 8) {
+    DCHECK_EQ(literal_count_ % 8, 0);
+    FlushBufferedValues(false);
+  }
+  return true;
+}
+
+inline void RleEncoder::FlushLiteralRun(bool update_indicator_byte) {
+  if (literal_indicator_byte_ == NULL) {
+    // The literal indicator byte has not been reserved yet, get one now.
+    literal_indicator_byte_ = bit_writer_.GetNextBytePtr();
+    DCHECK(literal_indicator_byte_ != NULL);
+  }
+
+  // Write all the buffered values as bit packed literals
+  for (int i = 0; i < num_buffered_values_; ++i) {
+    bool success = bit_writer_.PutValue(buffered_values_[i], bit_width_);
+    DCHECK(success) << "There is a bug in using CheckBufferFull()";
+  }
+  num_buffered_values_ = 0;
+
+  if (update_indicator_byte) {
+    // At this point we need to write the indicator byte for the literal run.
+    // We only reserve one byte, to allow for streaming writes of literal values.
+    // The logic makes sure we flush literal runs often enough to not overrun
+    // the 1 byte.
+    DCHECK_EQ(literal_count_ % 8, 0);
+    int num_groups = literal_count_ / 8;
+    int32_t indicator_value = (num_groups << 1) | 1;
+    DCHECK_EQ(indicator_value & 0xFFFFFF00, 0);
+    *literal_indicator_byte_ = indicator_value;
+    literal_indicator_byte_ = NULL;
+    literal_count_ = 0;
+    CheckBufferFull();
+  }
+}
+
+inline void RleEncoder::FlushRepeatedRun() {
+  DCHECK_GT(repeat_count_, 0);
+  bool result = true;
+  // The lsb of 0 indicates this is a repeated run
+  int32_t indicator_value = repeat_count_ << 1 | 0;
+  result &= bit_writer_.PutVlqInt(indicator_value);
+  result &= bit_writer_.PutAligned(current_value_, BitUtil::Ceil(bit_width_, 8));
+  DCHECK(result);
+  num_buffered_values_ = 0;
+  repeat_count_ = 0;
+  CheckBufferFull();
+}
+
+// Flush the values that have been buffered.  At this point we decide whether
+// we need to switch between the run types or continue the current one.
+inline void RleEncoder::FlushBufferedValues(bool done) {
+  if (repeat_count_ >= 8) {
+    // Clear the buffered values.  They are part of the repeated run now and we
+    // don't want to flush them out as literals.
+    num_buffered_values_ = 0;
+    if (literal_count_ != 0) {
+      // There was a current literal run.  All the values in it have been flushed
+      // but we still need to update the indicator byte.
+      DCHECK_EQ(literal_count_ % 8, 0);
+      DCHECK_EQ(repeat_count_, 8);
+      FlushLiteralRun(true);
+    }
+    DCHECK_EQ(literal_count_, 0);
+    return;
+  }
+
+  literal_count_ += num_buffered_values_;
+  DCHECK_EQ(literal_count_ % 8, 0);
+  int num_groups = literal_count_ / 8;
+  if (num_groups + 1 >= (1 << 6)) {
+    // We need to start a new literal run because the indicator byte we've reserved
+    // cannot store more values.
+    DCHECK(literal_indicator_byte_ != NULL);
+    FlushLiteralRun(true);
+  } else {
+    FlushLiteralRun(done);
+  }
+  repeat_count_ = 0;
+}
+
+inline int RleEncoder::Flush() {
+  if (literal_count_ > 0 || repeat_count_ > 0 || num_buffered_values_ > 0) {
+    bool all_repeat = literal_count_ == 0 &&
+        (repeat_count_ == num_buffered_values_ || num_buffered_values_ == 0);
+    // There is something pending, figure out if it's a repeated or literal run
+    if (repeat_count_ > 0 && all_repeat) {
+      FlushRepeatedRun();
+    } else  {
+      DCHECK_EQ(literal_count_ % 8, 0);
+      // Buffer the last group of literals to 8 by padding with 0s.
+      for (; num_buffered_values_ != 0 && num_buffered_values_ < 8;
+          ++num_buffered_values_) {
+        buffered_values_[num_buffered_values_] = 0;
+      }
+      literal_count_ += num_buffered_values_;
+      FlushLiteralRun(true);
+      repeat_count_ = 0;
+    }
+  }
+  bit_writer_.Flush();
+  DCHECK_EQ(num_buffered_values_, 0);
+  DCHECK_EQ(literal_count_, 0);
+  DCHECK_EQ(repeat_count_, 0);
+
+  return bit_writer_.bytes_written();
+}
+
+inline void RleEncoder::CheckBufferFull() {
+  int bytes_written = bit_writer_.bytes_written();
+  if (bytes_written + max_run_byte_size_ > bit_writer_.buffer_len()) {
+    buffer_full_ = true;
+  }
+}
+
+inline void RleEncoder::Clear() {
+  buffer_full_ = false;
+  current_value_ = 0;
+  repeat_count_ = 0;
+  num_buffered_values_ = 0;
+  literal_count_ = 0;
+  literal_indicator_byte_ = NULL;
+  bit_writer_.Clear();
+}
+
+} // namespace parquet_cpp
+
+#endif // PARQUET_UTIL_RLE_ENCODING_H

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/parquet/util/stopwatch.h
----------------------------------------------------------------------
diff --git a/src/parquet/util/stopwatch.h b/src/parquet/util/stopwatch.h
new file mode 100644
index 0000000..10ed9e9
--- /dev/null
+++ b/src/parquet/util/stopwatch.h
@@ -0,0 +1,49 @@
+// Copyright 2012 Cloudera Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef PARQUET_UTIL_STOPWATCH_H
+#define PARQUET_UTIL_STOPWATCH_H
+
+#include <iostream>
+#include <stdio.h>
+#include <ctime>
+#include <sys/time.h>
+
+namespace parquet_cpp {
+
+class StopWatch {
+ public:
+  StopWatch() {
+  }
+
+  void Start() {
+    gettimeofday(&start_time, 0);
+  }
+
+  // Returns time in nanoseconds.
+  uint64_t Stop() {
+    struct timeval t_time;
+    gettimeofday(&t_time, 0);
+
+    return (1000L * 1000L * 1000L * (t_time.tv_sec - start_time.tv_sec)
+                   + (t_time.tv_usec - start_time.tv_usec));
+  }
+
+ private:
+  struct timeval  start_time;
+};
+
+} // namespace parquet_cpp
+
+#endif

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/util/stopwatch.h
----------------------------------------------------------------------
diff --git a/src/util/stopwatch.h b/src/util/stopwatch.h
deleted file mode 100644
index 145f130..0000000
--- a/src/util/stopwatch.h
+++ /dev/null
@@ -1,49 +0,0 @@
-// Copyright 2012 Cloudera Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef PARQUET_UTIL_STOPWATCH_H
-#define PARQUET_UTIL_STOPWATCH_H
-
-#include <iostream>
-#include <stdio.h>
-#include <ctime>
-#include <sys/time.h>
-
-namespace parquet_cpp {
-
-class StopWatch {
- public:
-  StopWatch() {
-  }
-
-  void Start() {
-    gettimeofday(&start_time, 0);
-  }
-
-  // Returns time in nanoseconds.
-  uint64_t Stop() {
-    struct timeval t_time;
-    gettimeofday(&t_time, 0);
-
-    return (1000L * 1000L * 1000L * (t_time.tv_sec - start_time.tv_sec)
-                   + (t_time.tv_usec - start_time.tv_usec));
-  }
-
- private:
-  struct timeval  start_time;
-};
-
-}
-
-#endif

[4/7] parquet-cpp git commit: PARQUET-416: C++11 compilation, code reorg, libparquet and installation targets

Posted by no...@apache.org.

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/generated/gen-cpp/parquet_types.h
----------------------------------------------------------------------
diff --git a/generated/gen-cpp/parquet_types.h b/generated/gen-cpp/parquet_types.h
deleted file mode 100644
index 4360d02..0000000
--- a/generated/gen-cpp/parquet_types.h
+++ /dev/null
@@ -1,1123 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.0)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-#ifndef parquet_TYPES_H
-#define parquet_TYPES_H
-
-#include <thrift/Thrift.h>
-#include <thrift/TApplicationException.h>
-#include <thrift/protocol/TProtocol.h>
-#include <thrift/transport/TTransport.h>
-
-
-
-namespace parquet {
-
-struct Type {
-  enum type {
-    BOOLEAN = 0,
-    INT32 = 1,
-    INT64 = 2,
-    INT96 = 3,
-    FLOAT = 4,
-    DOUBLE = 5,
-    BYTE_ARRAY = 6,
-    FIXED_LEN_BYTE_ARRAY = 7
-  };
-};
-
-extern const std::map<int, const char*> _Type_VALUES_TO_NAMES;
-
-struct ConvertedType {
-  enum type {
-    UTF8 = 0,
-    MAP = 1,
-    MAP_KEY_VALUE = 2,
-    LIST = 3,
-    ENUM = 4,
-    DECIMAL = 5
-  };
-};
-
-extern const std::map<int, const char*> _ConvertedType_VALUES_TO_NAMES;
-
-struct FieldRepetitionType {
-  enum type {
-    REQUIRED = 0,
-    OPTIONAL = 1,
-    REPEATED = 2
-  };
-};
-
-extern const std::map<int, const char*> _FieldRepetitionType_VALUES_TO_NAMES;
-
-struct Encoding {
-  enum type {
-    PLAIN = 0,
-    PLAIN_DICTIONARY = 2,
-    RLE = 3,
-    BIT_PACKED = 4,
-    DELTA_BINARY_PACKED = 5,
-    DELTA_LENGTH_BYTE_ARRAY = 6,
-    DELTA_BYTE_ARRAY = 7,
-    RLE_DICTIONARY = 8
-  };
-};
-
-extern const std::map<int, const char*> _Encoding_VALUES_TO_NAMES;
-
-struct CompressionCodec {
-  enum type {
-    UNCOMPRESSED = 0,
-    SNAPPY = 1,
-    GZIP = 2,
-    LZO = 3
-  };
-};
-
-extern const std::map<int, const char*> _CompressionCodec_VALUES_TO_NAMES;
-
-struct PageType {
-  enum type {
-    DATA_PAGE = 0,
-    INDEX_PAGE = 1,
-    DICTIONARY_PAGE = 2,
-    DATA_PAGE_V2 = 3
-  };
-};
-
-extern const std::map<int, const char*> _PageType_VALUES_TO_NAMES;
-
-typedef struct _Statistics__isset {
-  _Statistics__isset() : max(false), min(false), null_count(false), distinct_count(false) {}
-  bool max;
-  bool min;
-  bool null_count;
-  bool distinct_count;
-} _Statistics__isset;
-
-class Statistics {
- public:
-
-  static const char* ascii_fingerprint; // = "CE004821871820DD79A8FD98BB101F6D";
-  static const uint8_t binary_fingerprint[16]; // = {0xCE,0x00,0x48,0x21,0x87,0x18,0x20,0xDD,0x79,0xA8,0xFD,0x98,0xBB,0x10,0x1F,0x6D};
-
-  Statistics() : max(), min(), null_count(0), distinct_count(0) {
-  }
-
-  virtual ~Statistics() throw() {}
-
-  std::string max;
-  std::string min;
-  int64_t null_count;
-  int64_t distinct_count;
-
-  _Statistics__isset __isset;
-
-  void __set_max(const std::string& val) {
-    max = val;
-    __isset.max = true;
-  }
-
-  void __set_min(const std::string& val) {
-    min = val;
-    __isset.min = true;
-  }
-
-  void __set_null_count(const int64_t val) {
-    null_count = val;
-    __isset.null_count = true;
-  }
-
-  void __set_distinct_count(const int64_t val) {
-    distinct_count = val;
-    __isset.distinct_count = true;
-  }
-
-  bool operator == (const Statistics & rhs) const
-  {
-    if (__isset.max != rhs.__isset.max)
-      return false;
-    else if (__isset.max && !(max == rhs.max))
-      return false;
-    if (__isset.min != rhs.__isset.min)
-      return false;
-    else if (__isset.min && !(min == rhs.min))
-      return false;
-    if (__isset.null_count != rhs.__isset.null_count)
-      return false;
-    else if (__isset.null_count && !(null_count == rhs.null_count))
-      return false;
-    if (__isset.distinct_count != rhs.__isset.distinct_count)
-      return false;
-    else if (__isset.distinct_count && !(distinct_count == rhs.distinct_count))
-      return false;
-    return true;
-  }
-  bool operator != (const Statistics &rhs) const {
-    return !(*this == rhs);
-  }
-
-  bool operator < (const Statistics & ) const;
-
-  uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
-  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
-
-};
-
-void swap(Statistics &a, Statistics &b);
-
-typedef struct _SchemaElement__isset {
-  _SchemaElement__isset() : type(false), type_length(false), repetition_type(false), num_children(false), converted_type(false), scale(false), precision(false) {}
-  bool type;
-  bool type_length;
-  bool repetition_type;
-  bool num_children;
-  bool converted_type;
-  bool scale;
-  bool precision;
-} _SchemaElement__isset;
-
-class SchemaElement {
- public:
-
-  static const char* ascii_fingerprint; // = "388A784401753800444CFEAC8BC1B1A1";
-  static const uint8_t binary_fingerprint[16]; // = {0x38,0x8A,0x78,0x44,0x01,0x75,0x38,0x00,0x44,0x4C,0xFE,0xAC,0x8B,0xC1,0xB1,0xA1};
-
-  SchemaElement() : type((Type::type)0), type_length(0), repetition_type((FieldRepetitionType::type)0), name(), num_children(0), converted_type((ConvertedType::type)0), scale(0), precision(0) {
-  }
-
-  virtual ~SchemaElement() throw() {}
-
-  Type::type type;
-  int32_t type_length;
-  FieldRepetitionType::type repetition_type;
-  std::string name;
-  int32_t num_children;
-  ConvertedType::type converted_type;
-  int32_t scale;
-  int32_t precision;
-
-  _SchemaElement__isset __isset;
-
-  void __set_type(const Type::type val) {
-    type = val;
-    __isset.type = true;
-  }
-
-  void __set_type_length(const int32_t val) {
-    type_length = val;
-    __isset.type_length = true;
-  }
-
-  void __set_repetition_type(const FieldRepetitionType::type val) {
-    repetition_type = val;
-    __isset.repetition_type = true;
-  }
-
-  void __set_name(const std::string& val) {
-    name = val;
-  }
-
-  void __set_num_children(const int32_t val) {
-    num_children = val;
-    __isset.num_children = true;
-  }
-
-  void __set_converted_type(const ConvertedType::type val) {
-    converted_type = val;
-    __isset.converted_type = true;
-  }
-
-  void __set_scale(const int32_t val) {
-    scale = val;
-    __isset.scale = true;
-  }
-
-  void __set_precision(const int32_t val) {
-    precision = val;
-    __isset.precision = true;
-  }
-
-  bool operator == (const SchemaElement & rhs) const
-  {
-    if (__isset.type != rhs.__isset.type)
-      return false;
-    else if (__isset.type && !(type == rhs.type))
-      return false;
-    if (__isset.type_length != rhs.__isset.type_length)
-      return false;
-    else if (__isset.type_length && !(type_length == rhs.type_length))
-      return false;
-    if (__isset.repetition_type != rhs.__isset.repetition_type)
-      return false;
-    else if (__isset.repetition_type && !(repetition_type == rhs.repetition_type))
-      return false;
-    if (!(name == rhs.name))
-      return false;
-    if (__isset.num_children != rhs.__isset.num_children)
-      return false;
-    else if (__isset.num_children && !(num_children == rhs.num_children))
-      return false;
-    if (__isset.converted_type != rhs.__isset.converted_type)
-      return false;
-    else if (__isset.converted_type && !(converted_type == rhs.converted_type))
-      return false;
-    if (__isset.scale != rhs.__isset.scale)
-      return false;
-    else if (__isset.scale && !(scale == rhs.scale))
-      return false;
-    if (__isset.precision != rhs.__isset.precision)
-      return false;
-    else if (__isset.precision && !(precision == rhs.precision))
-      return false;
-    return true;
-  }
-  bool operator != (const SchemaElement &rhs) const {
-    return !(*this == rhs);
-  }
-
-  bool operator < (const SchemaElement & ) const;
-
-  uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
-  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
-
-};
-
-void swap(SchemaElement &a, SchemaElement &b);
-
-typedef struct _DataPageHeader__isset {
-  _DataPageHeader__isset() : statistics(false) {}
-  bool statistics;
-} _DataPageHeader__isset;
-
-class DataPageHeader {
- public:
-
-  static const char* ascii_fingerprint; // = "5FC1792B0483E9C984475384165040B1";
-  static const uint8_t binary_fingerprint[16]; // = {0x5F,0xC1,0x79,0x2B,0x04,0x83,0xE9,0xC9,0x84,0x47,0x53,0x84,0x16,0x50,0x40,0xB1};
-
-  DataPageHeader() : num_values(0), encoding((Encoding::type)0), definition_level_encoding((Encoding::type)0), repetition_level_encoding((Encoding::type)0) {
-  }
-
-  virtual ~DataPageHeader() throw() {}
-
-  int32_t num_values;
-  Encoding::type encoding;
-  Encoding::type definition_level_encoding;
-  Encoding::type repetition_level_encoding;
-  Statistics statistics;
-
-  _DataPageHeader__isset __isset;
-
-  void __set_num_values(const int32_t val) {
-    num_values = val;
-  }
-
-  void __set_encoding(const Encoding::type val) {
-    encoding = val;
-  }
-
-  void __set_definition_level_encoding(const Encoding::type val) {
-    definition_level_encoding = val;
-  }
-
-  void __set_repetition_level_encoding(const Encoding::type val) {
-    repetition_level_encoding = val;
-  }
-
-  void __set_statistics(const Statistics& val) {
-    statistics = val;
-    __isset.statistics = true;
-  }
-
-  bool operator == (const DataPageHeader & rhs) const
-  {
-    if (!(num_values == rhs.num_values))
-      return false;
-    if (!(encoding == rhs.encoding))
-      return false;
-    if (!(definition_level_encoding == rhs.definition_level_encoding))
-      return false;
-    if (!(repetition_level_encoding == rhs.repetition_level_encoding))
-      return false;
-    if (__isset.statistics != rhs.__isset.statistics)
-      return false;
-    else if (__isset.statistics && !(statistics == rhs.statistics))
-      return false;
-    return true;
-  }
-  bool operator != (const DataPageHeader &rhs) const {
-    return !(*this == rhs);
-  }
-
-  bool operator < (const DataPageHeader & ) const;
-
-  uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
-  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
-
-};
-
-void swap(DataPageHeader &a, DataPageHeader &b);
-
-
-class IndexPageHeader {
- public:
-
-  static const char* ascii_fingerprint; // = "99914B932BD37A50B983C5E7C90AE93B";
-  static const uint8_t binary_fingerprint[16]; // = {0x99,0x91,0x4B,0x93,0x2B,0xD3,0x7A,0x50,0xB9,0x83,0xC5,0xE7,0xC9,0x0A,0xE9,0x3B};
-
-  IndexPageHeader() {
-  }
-
-  virtual ~IndexPageHeader() throw() {}
-
-
-  bool operator == (const IndexPageHeader & /* rhs */) const
-  {
-    return true;
-  }
-  bool operator != (const IndexPageHeader &rhs) const {
-    return !(*this == rhs);
-  }
-
-  bool operator < (const IndexPageHeader & ) const;
-
-  uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
-  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
-
-};
-
-void swap(IndexPageHeader &a, IndexPageHeader &b);
-
-typedef struct _DictionaryPageHeader__isset {
-  _DictionaryPageHeader__isset() : is_sorted(false) {}
-  bool is_sorted;
-} _DictionaryPageHeader__isset;
-
-class DictionaryPageHeader {
- public:
-
-  static const char* ascii_fingerprint; // = "B149E4528254D495610C22AE4BD539C5";
-  static const uint8_t binary_fingerprint[16]; // = {0xB1,0x49,0xE4,0x52,0x82,0x54,0xD4,0x95,0x61,0x0C,0x22,0xAE,0x4B,0xD5,0x39,0xC5};
-
-  DictionaryPageHeader() : num_values(0), encoding((Encoding::type)0), is_sorted(0) {
-  }
-
-  virtual ~DictionaryPageHeader() throw() {}
-
-  int32_t num_values;
-  Encoding::type encoding;
-  bool is_sorted;
-
-  _DictionaryPageHeader__isset __isset;
-
-  void __set_num_values(const int32_t val) {
-    num_values = val;
-  }
-
-  void __set_encoding(const Encoding::type val) {
-    encoding = val;
-  }
-
-  void __set_is_sorted(const bool val) {
-    is_sorted = val;
-    __isset.is_sorted = true;
-  }
-
-  bool operator == (const DictionaryPageHeader & rhs) const
-  {
-    if (!(num_values == rhs.num_values))
-      return false;
-    if (!(encoding == rhs.encoding))
-      return false;
-    if (__isset.is_sorted != rhs.__isset.is_sorted)
-      return false;
-    else if (__isset.is_sorted && !(is_sorted == rhs.is_sorted))
-      return false;
-    return true;
-  }
-  bool operator != (const DictionaryPageHeader &rhs) const {
-    return !(*this == rhs);
-  }
-
-  bool operator < (const DictionaryPageHeader & ) const;
-
-  uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
-  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
-
-};
-
-void swap(DictionaryPageHeader &a, DictionaryPageHeader &b);
-
-typedef struct _DataPageHeaderV2__isset {
-  _DataPageHeaderV2__isset() : is_compressed(true), statistics(false) {}
-  bool is_compressed;
-  bool statistics;
-} _DataPageHeaderV2__isset;
-
-class DataPageHeaderV2 {
- public:
-
-  static const char* ascii_fingerprint; // = "69FF2F6BD1A443440D5E46ABA5A3A919";
-  static const uint8_t binary_fingerprint[16]; // = {0x69,0xFF,0x2F,0x6B,0xD1,0xA4,0x43,0x44,0x0D,0x5E,0x46,0xAB,0xA5,0xA3,0xA9,0x19};
-
-  DataPageHeaderV2() : num_values(0), num_nulls(0), num_rows(0), encoding((Encoding::type)0), definition_levels_byte_length(0), repetition_levels_byte_length(0), is_compressed(true) {
-  }
-
-  virtual ~DataPageHeaderV2() throw() {}
-
-  int32_t num_values;
-  int32_t num_nulls;
-  int32_t num_rows;
-  Encoding::type encoding;
-  int32_t definition_levels_byte_length;
-  int32_t repetition_levels_byte_length;
-  bool is_compressed;
-  Statistics statistics;
-
-  _DataPageHeaderV2__isset __isset;
-
-  void __set_num_values(const int32_t val) {
-    num_values = val;
-  }
-
-  void __set_num_nulls(const int32_t val) {
-    num_nulls = val;
-  }
-
-  void __set_num_rows(const int32_t val) {
-    num_rows = val;
-  }
-
-  void __set_encoding(const Encoding::type val) {
-    encoding = val;
-  }
-
-  void __set_definition_levels_byte_length(const int32_t val) {
-    definition_levels_byte_length = val;
-  }
-
-  void __set_repetition_levels_byte_length(const int32_t val) {
-    repetition_levels_byte_length = val;
-  }
-
-  void __set_is_compressed(const bool val) {
-    is_compressed = val;
-    __isset.is_compressed = true;
-  }
-
-  void __set_statistics(const Statistics& val) {
-    statistics = val;
-    __isset.statistics = true;
-  }
-
-  bool operator == (const DataPageHeaderV2 & rhs) const
-  {
-    if (!(num_values == rhs.num_values))
-      return false;
-    if (!(num_nulls == rhs.num_nulls))
-      return false;
-    if (!(num_rows == rhs.num_rows))
-      return false;
-    if (!(encoding == rhs.encoding))
-      return false;
-    if (!(definition_levels_byte_length == rhs.definition_levels_byte_length))
-      return false;
-    if (!(repetition_levels_byte_length == rhs.repetition_levels_byte_length))
-      return false;
-    if (__isset.is_compressed != rhs.__isset.is_compressed)
-      return false;
-    else if (__isset.is_compressed && !(is_compressed == rhs.is_compressed))
-      return false;
-    if (__isset.statistics != rhs.__isset.statistics)
-      return false;
-    else if (__isset.statistics && !(statistics == rhs.statistics))
-      return false;
-    return true;
-  }
-  bool operator != (const DataPageHeaderV2 &rhs) const {
-    return !(*this == rhs);
-  }
-
-  bool operator < (const DataPageHeaderV2 & ) const;
-
-  uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
-  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
-
-};
-
-void swap(DataPageHeaderV2 &a, DataPageHeaderV2 &b);
-
-typedef struct _PageHeader__isset {
-  _PageHeader__isset() : crc(false), data_page_header(false), index_page_header(false), dictionary_page_header(false), data_page_header_v2(false) {}
-  bool crc;
-  bool data_page_header;
-  bool index_page_header;
-  bool dictionary_page_header;
-  bool data_page_header_v2;
-} _PageHeader__isset;
-
-class PageHeader {
- public:
-
-  static const char* ascii_fingerprint; // = "B5BD2BDF3756C883A58B30B9C9F204A0";
-  static const uint8_t binary_fingerprint[16]; // = {0xB5,0xBD,0x2B,0xDF,0x37,0x56,0xC8,0x83,0xA5,0x8B,0x30,0xB9,0xC9,0xF2,0x04,0xA0};
-
-  PageHeader() : type((PageType::type)0), uncompressed_page_size(0), compressed_page_size(0), crc(0) {
-  }
-
-  virtual ~PageHeader() throw() {}
-
-  PageType::type type;
-  int32_t uncompressed_page_size;
-  int32_t compressed_page_size;
-  int32_t crc;
-  DataPageHeader data_page_header;
-  IndexPageHeader index_page_header;
-  DictionaryPageHeader dictionary_page_header;
-  DataPageHeaderV2 data_page_header_v2;
-
-  _PageHeader__isset __isset;
-
-  void __set_type(const PageType::type val) {
-    type = val;
-  }
-
-  void __set_uncompressed_page_size(const int32_t val) {
-    uncompressed_page_size = val;
-  }
-
-  void __set_compressed_page_size(const int32_t val) {
-    compressed_page_size = val;
-  }
-
-  void __set_crc(const int32_t val) {
-    crc = val;
-    __isset.crc = true;
-  }
-
-  void __set_data_page_header(const DataPageHeader& val) {
-    data_page_header = val;
-    __isset.data_page_header = true;
-  }
-
-  void __set_index_page_header(const IndexPageHeader& val) {
-    index_page_header = val;
-    __isset.index_page_header = true;
-  }
-
-  void __set_dictionary_page_header(const DictionaryPageHeader& val) {
-    dictionary_page_header = val;
-    __isset.dictionary_page_header = true;
-  }
-
-  void __set_data_page_header_v2(const DataPageHeaderV2& val) {
-    data_page_header_v2 = val;
-    __isset.data_page_header_v2 = true;
-  }
-
-  bool operator == (const PageHeader & rhs) const
-  {
-    if (!(type == rhs.type))
-      return false;
-    if (!(uncompressed_page_size == rhs.uncompressed_page_size))
-      return false;
-    if (!(compressed_page_size == rhs.compressed_page_size))
-      return false;
-    if (__isset.crc != rhs.__isset.crc)
-      return false;
-    else if (__isset.crc && !(crc == rhs.crc))
-      return false;
-    if (__isset.data_page_header != rhs.__isset.data_page_header)
-      return false;
-    else if (__isset.data_page_header && !(data_page_header == rhs.data_page_header))
-      return false;
-    if (__isset.index_page_header != rhs.__isset.index_page_header)
-      return false;
-    else if (__isset.index_page_header && !(index_page_header == rhs.index_page_header))
-      return false;
-    if (__isset.dictionary_page_header != rhs.__isset.dictionary_page_header)
-      return false;
-    else if (__isset.dictionary_page_header && !(dictionary_page_header == rhs.dictionary_page_header))
-      return false;
-    if (__isset.data_page_header_v2 != rhs.__isset.data_page_header_v2)
-      return false;
-    else if (__isset.data_page_header_v2 && !(data_page_header_v2 == rhs.data_page_header_v2))
-      return false;
-    return true;
-  }
-  bool operator != (const PageHeader &rhs) const {
-    return !(*this == rhs);
-  }
-
-  bool operator < (const PageHeader & ) const;
-
-  uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
-  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
-
-};
-
-void swap(PageHeader &a, PageHeader &b);
-
-typedef struct _KeyValue__isset {
-  _KeyValue__isset() : value(false) {}
-  bool value;
-} _KeyValue__isset;
-
-class KeyValue {
- public:
-
-  static const char* ascii_fingerprint; // = "5B708A954C550ECA9C1A49D3C5CAFAB9";
-  static const uint8_t binary_fingerprint[16]; // = {0x5B,0x70,0x8A,0x95,0x4C,0x55,0x0E,0xCA,0x9C,0x1A,0x49,0xD3,0xC5,0xCA,0xFA,0xB9};
-
-  KeyValue() : key(), value() {
-  }
-
-  virtual ~KeyValue() throw() {}
-
-  std::string key;
-  std::string value;
-
-  _KeyValue__isset __isset;
-
-  void __set_key(const std::string& val) {
-    key = val;
-  }
-
-  void __set_value(const std::string& val) {
-    value = val;
-    __isset.value = true;
-  }
-
-  bool operator == (const KeyValue & rhs) const
-  {
-    if (!(key == rhs.key))
-      return false;
-    if (__isset.value != rhs.__isset.value)
-      return false;
-    else if (__isset.value && !(value == rhs.value))
-      return false;
-    return true;
-  }
-  bool operator != (const KeyValue &rhs) const {
-    return !(*this == rhs);
-  }
-
-  bool operator < (const KeyValue & ) const;
-
-  uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
-  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
-
-};
-
-void swap(KeyValue &a, KeyValue &b);
-
-
-class SortingColumn {
- public:
-
-  static const char* ascii_fingerprint; // = "F079C2D58A783AD90F9BE05D10DBBC6F";
-  static const uint8_t binary_fingerprint[16]; // = {0xF0,0x79,0xC2,0xD5,0x8A,0x78,0x3A,0xD9,0x0F,0x9B,0xE0,0x5D,0x10,0xDB,0xBC,0x6F};
-
-  SortingColumn() : column_idx(0), descending(0), nulls_first(0) {
-  }
-
-  virtual ~SortingColumn() throw() {}
-
-  int32_t column_idx;
-  bool descending;
-  bool nulls_first;
-
-  void __set_column_idx(const int32_t val) {
-    column_idx = val;
-  }
-
-  void __set_descending(const bool val) {
-    descending = val;
-  }
-
-  void __set_nulls_first(const bool val) {
-    nulls_first = val;
-  }
-
-  bool operator == (const SortingColumn & rhs) const
-  {
-    if (!(column_idx == rhs.column_idx))
-      return false;
-    if (!(descending == rhs.descending))
-      return false;
-    if (!(nulls_first == rhs.nulls_first))
-      return false;
-    return true;
-  }
-  bool operator != (const SortingColumn &rhs) const {
-    return !(*this == rhs);
-  }
-
-  bool operator < (const SortingColumn & ) const;
-
-  uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
-  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
-
-};
-
-void swap(SortingColumn &a, SortingColumn &b);
-
-typedef struct _ColumnMetaData__isset {
-  _ColumnMetaData__isset() : key_value_metadata(false), index_page_offset(false), dictionary_page_offset(false), statistics(false) {}
-  bool key_value_metadata;
-  bool index_page_offset;
-  bool dictionary_page_offset;
-  bool statistics;
-} _ColumnMetaData__isset;
-
-class ColumnMetaData {
- public:
-
-  static const char* ascii_fingerprint; // = "1AF797732BCB4465C6314FB29B86638D";
-  static const uint8_t binary_fingerprint[16]; // = {0x1A,0xF7,0x97,0x73,0x2B,0xCB,0x44,0x65,0xC6,0x31,0x4F,0xB2,0x9B,0x86,0x63,0x8D};
-
-  ColumnMetaData() : type((Type::type)0), codec((CompressionCodec::type)0), num_values(0), total_uncompressed_size(0), total_compressed_size(0), data_page_offset(0), index_page_offset(0), dictionary_page_offset(0) {
-  }
-
-  virtual ~ColumnMetaData() throw() {}
-
-  Type::type type;
-  std::vector<Encoding::type>  encodings;
-  std::vector<std::string>  path_in_schema;
-  CompressionCodec::type codec;
-  int64_t num_values;
-  int64_t total_uncompressed_size;
-  int64_t total_compressed_size;
-  std::vector<KeyValue>  key_value_metadata;
-  int64_t data_page_offset;
-  int64_t index_page_offset;
-  int64_t dictionary_page_offset;
-  Statistics statistics;
-
-  _ColumnMetaData__isset __isset;
-
-  void __set_type(const Type::type val) {
-    type = val;
-  }
-
-  void __set_encodings(const std::vector<Encoding::type> & val) {
-    encodings = val;
-  }
-
-  void __set_path_in_schema(const std::vector<std::string> & val) {
-    path_in_schema = val;
-  }
-
-  void __set_codec(const CompressionCodec::type val) {
-    codec = val;
-  }
-
-  void __set_num_values(const int64_t val) {
-    num_values = val;
-  }
-
-  void __set_total_uncompressed_size(const int64_t val) {
-    total_uncompressed_size = val;
-  }
-
-  void __set_total_compressed_size(const int64_t val) {
-    total_compressed_size = val;
-  }
-
-  void __set_key_value_metadata(const std::vector<KeyValue> & val) {
-    key_value_metadata = val;
-    __isset.key_value_metadata = true;
-  }
-
-  void __set_data_page_offset(const int64_t val) {
-    data_page_offset = val;
-  }
-
-  void __set_index_page_offset(const int64_t val) {
-    index_page_offset = val;
-    __isset.index_page_offset = true;
-  }
-
-  void __set_dictionary_page_offset(const int64_t val) {
-    dictionary_page_offset = val;
-    __isset.dictionary_page_offset = true;
-  }
-
-  void __set_statistics(const Statistics& val) {
-    statistics = val;
-    __isset.statistics = true;
-  }
-
-  bool operator == (const ColumnMetaData & rhs) const
-  {
-    if (!(type == rhs.type))
-      return false;
-    if (!(encodings == rhs.encodings))
-      return false;
-    if (!(path_in_schema == rhs.path_in_schema))
-      return false;
-    if (!(codec == rhs.codec))
-      return false;
-    if (!(num_values == rhs.num_values))
-      return false;
-    if (!(total_uncompressed_size == rhs.total_uncompressed_size))
-      return false;
-    if (!(total_compressed_size == rhs.total_compressed_size))
-      return false;
-    if (__isset.key_value_metadata != rhs.__isset.key_value_metadata)
-      return false;
-    else if (__isset.key_value_metadata && !(key_value_metadata == rhs.key_value_metadata))
-      return false;
-    if (!(data_page_offset == rhs.data_page_offset))
-      return false;
-    if (__isset.index_page_offset != rhs.__isset.index_page_offset)
-      return false;
-    else if (__isset.index_page_offset && !(index_page_offset == rhs.index_page_offset))
-      return false;
-    if (__isset.dictionary_page_offset != rhs.__isset.dictionary_page_offset)
-      return false;
-    else if (__isset.dictionary_page_offset && !(dictionary_page_offset == rhs.dictionary_page_offset))
-      return false;
-    if (__isset.statistics != rhs.__isset.statistics)
-      return false;
-    else if (__isset.statistics && !(statistics == rhs.statistics))
-      return false;
-    return true;
-  }
-  bool operator != (const ColumnMetaData &rhs) const {
-    return !(*this == rhs);
-  }
-
-  bool operator < (const ColumnMetaData & ) const;
-
-  uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
-  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
-
-};
-
-void swap(ColumnMetaData &a, ColumnMetaData &b);
-
-typedef struct _ColumnChunk__isset {
-  _ColumnChunk__isset() : file_path(false), meta_data(false) {}
-  bool file_path;
-  bool meta_data;
-} _ColumnChunk__isset;
-
-class ColumnChunk {
- public:
-
-  static const char* ascii_fingerprint; // = "169FC47057EF3D82E2FACDDEC2641AE8";
-  static const uint8_t binary_fingerprint[16]; // = {0x16,0x9F,0xC4,0x70,0x57,0xEF,0x3D,0x82,0xE2,0xFA,0xCD,0xDE,0xC2,0x64,0x1A,0xE8};
-
-  ColumnChunk() : file_path(), file_offset(0) {
-  }
-
-  virtual ~ColumnChunk() throw() {}
-
-  std::string file_path;
-  int64_t file_offset;
-  ColumnMetaData meta_data;
-
-  _ColumnChunk__isset __isset;
-
-  void __set_file_path(const std::string& val) {
-    file_path = val;
-    __isset.file_path = true;
-  }
-
-  void __set_file_offset(const int64_t val) {
-    file_offset = val;
-  }
-
-  void __set_meta_data(const ColumnMetaData& val) {
-    meta_data = val;
-    __isset.meta_data = true;
-  }
-
-  bool operator == (const ColumnChunk & rhs) const
-  {
-    if (__isset.file_path != rhs.__isset.file_path)
-      return false;
-    else if (__isset.file_path && !(file_path == rhs.file_path))
-      return false;
-    if (!(file_offset == rhs.file_offset))
-      return false;
-    if (__isset.meta_data != rhs.__isset.meta_data)
-      return false;
-    else if (__isset.meta_data && !(meta_data == rhs.meta_data))
-      return false;
-    return true;
-  }
-  bool operator != (const ColumnChunk &rhs) const {
-    return !(*this == rhs);
-  }
-
-  bool operator < (const ColumnChunk & ) const;
-
-  uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
-  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
-
-};
-
-void swap(ColumnChunk &a, ColumnChunk &b);
-
-typedef struct _RowGroup__isset {
-  _RowGroup__isset() : sorting_columns(false) {}
-  bool sorting_columns;
-} _RowGroup__isset;
-
-class RowGroup {
- public:
-
-  static const char* ascii_fingerprint; // = "DC7968627FA826DDC4C6C9BE773586C9";
-  static const uint8_t binary_fingerprint[16]; // = {0xDC,0x79,0x68,0x62,0x7F,0xA8,0x26,0xDD,0xC4,0xC6,0xC9,0xBE,0x77,0x35,0x86,0xC9};
-
-  RowGroup() : total_byte_size(0), num_rows(0) {
-  }
-
-  virtual ~RowGroup() throw() {}
-
-  std::vector<ColumnChunk>  columns;
-  int64_t total_byte_size;
-  int64_t num_rows;
-  std::vector<SortingColumn>  sorting_columns;
-
-  _RowGroup__isset __isset;
-
-  void __set_columns(const std::vector<ColumnChunk> & val) {
-    columns = val;
-  }
-
-  void __set_total_byte_size(const int64_t val) {
-    total_byte_size = val;
-  }
-
-  void __set_num_rows(const int64_t val) {
-    num_rows = val;
-  }
-
-  void __set_sorting_columns(const std::vector<SortingColumn> & val) {
-    sorting_columns = val;
-    __isset.sorting_columns = true;
-  }
-
-  bool operator == (const RowGroup & rhs) const
-  {
-    if (!(columns == rhs.columns))
-      return false;
-    if (!(total_byte_size == rhs.total_byte_size))
-      return false;
-    if (!(num_rows == rhs.num_rows))
-      return false;
-    if (__isset.sorting_columns != rhs.__isset.sorting_columns)
-      return false;
-    else if (__isset.sorting_columns && !(sorting_columns == rhs.sorting_columns))
-      return false;
-    return true;
-  }
-  bool operator != (const RowGroup &rhs) const {
-    return !(*this == rhs);
-  }
-
-  bool operator < (const RowGroup & ) const;
-
-  uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
-  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
-
-};
-
-void swap(RowGroup &a, RowGroup &b);
-
-typedef struct _FileMetaData__isset {
-  _FileMetaData__isset() : key_value_metadata(false), created_by(false) {}
-  bool key_value_metadata;
-  bool created_by;
-} _FileMetaData__isset;
-
-class FileMetaData {
- public:
-
-  static const char* ascii_fingerprint; // = "44DC7D83A66D54A7B7892A985C4125C9";
-  static const uint8_t binary_fingerprint[16]; // = {0x44,0xDC,0x7D,0x83,0xA6,0x6D,0x54,0xA7,0xB7,0x89,0x2A,0x98,0x5C,0x41,0x25,0xC9};
-
-  FileMetaData() : version(0), num_rows(0), created_by() {
-  }
-
-  virtual ~FileMetaData() throw() {}
-
-  int32_t version;
-  std::vector<SchemaElement>  schema;
-  int64_t num_rows;
-  std::vector<RowGroup>  row_groups;
-  std::vector<KeyValue>  key_value_metadata;
-  std::string created_by;
-
-  _FileMetaData__isset __isset;
-
-  void __set_version(const int32_t val) {
-    version = val;
-  }
-
-  void __set_schema(const std::vector<SchemaElement> & val) {
-    schema = val;
-  }
-
-  void __set_num_rows(const int64_t val) {
-    num_rows = val;
-  }
-
-  void __set_row_groups(const std::vector<RowGroup> & val) {
-    row_groups = val;
-  }
-
-  void __set_key_value_metadata(const std::vector<KeyValue> & val) {
-    key_value_metadata = val;
-    __isset.key_value_metadata = true;
-  }
-
-  void __set_created_by(const std::string& val) {
-    created_by = val;
-    __isset.created_by = true;
-  }
-
-  bool operator == (const FileMetaData & rhs) const
-  {
-    if (!(version == rhs.version))
-      return false;
-    if (!(schema == rhs.schema))
-      return false;
-    if (!(num_rows == rhs.num_rows))
-      return false;
-    if (!(row_groups == rhs.row_groups))
-      return false;
-    if (__isset.key_value_metadata != rhs.__isset.key_value_metadata)
-      return false;
-    else if (__isset.key_value_metadata && !(key_value_metadata == rhs.key_value_metadata))
-      return false;
-    if (__isset.created_by != rhs.__isset.created_by)
-      return false;
-    else if (__isset.created_by && !(created_by == rhs.created_by))
-      return false;
-    return true;
-  }
-  bool operator != (const FileMetaData &rhs) const {
-    return !(*this == rhs);
-  }
-
-  bool operator < (const FileMetaData & ) const;
-
-  uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
-  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
-
-};
-
-void swap(FileMetaData &a, FileMetaData &b);
-
-} // namespace
-
-#endif

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
deleted file mode 100644
index 82725d7..0000000
--- a/src/CMakeLists.txt
+++ /dev/null
@@ -1,19 +0,0 @@
-# Copyright 2012 Cloudera Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-add_library(Parquet STATIC
-  parquet.cc
-)
-
-add_subdirectory(compression)

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/compression/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/src/compression/CMakeLists.txt b/src/compression/CMakeLists.txt
deleted file mode 100644
index c8f0d2b..0000000
--- a/src/compression/CMakeLists.txt
+++ /dev/null
@@ -1,18 +0,0 @@
-# Copyright 2012 Cloudera Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-add_library(ParquetCompression STATIC
-  lz4-codec.cc
-  snappy-codec.cc
-)

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/compression/codec.h
----------------------------------------------------------------------
diff --git a/src/compression/codec.h b/src/compression/codec.h
deleted file mode 100644
index 4ce0139..0000000
--- a/src/compression/codec.h
+++ /dev/null
@@ -1,72 +0,0 @@
-// Copyright 2012 Cloudera Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef PARQUET_COMPRESSION_CODEC_H
-#define PARQUET_COMPRESSION_CODEC_H
-
-#include "parquet/parquet.h"
-
-#include <boost/cstdint.hpp>
-#include "gen-cpp/parquet_constants.h"
-#include "gen-cpp/parquet_types.h"
-
-namespace parquet_cpp {
-
-class Codec {
- public:
-  virtual ~Codec() {}
-  virtual void Decompress(int input_len, const uint8_t* input,
-      int output_len, uint8_t* output_buffer) = 0;
-
-  virtual int Compress(int input_len, const uint8_t* input,
-      int output_buffer_len, uint8_t* output_buffer) = 0;
-
-  virtual int MaxCompressedLen(int input_len, const uint8_t* input) = 0;
-
-  virtual const char* name() const = 0;
-};
-
-
-// Snappy codec.
-class SnappyCodec : public Codec {
- public:
-  virtual void Decompress(int input_len, const uint8_t* input,
-      int output_len, uint8_t* output_buffer);
-
-  virtual int Compress(int input_len, const uint8_t* input,
-      int output_buffer_len, uint8_t* output_buffer);
-
-  virtual int MaxCompressedLen(int input_len, const uint8_t* input);
-
-  virtual const char* name() const { return "snappy"; }
-};
-
-// Lz4 codec.
-class Lz4Codec : public Codec {
- public:
-  virtual void Decompress(int input_len, const uint8_t* input,
-      int output_len, uint8_t* output_buffer);
-
-  virtual int Compress(int input_len, const uint8_t* input,
-      int output_buffer_len, uint8_t* output_buffer);
-
-  virtual int MaxCompressedLen(int input_len, const uint8_t* input);
-
-  virtual const char* name() const { return "lz4"; }
-};
-
-}
-
-#endif
-

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/compression/lz4-codec.cc
----------------------------------------------------------------------
diff --git a/src/compression/lz4-codec.cc b/src/compression/lz4-codec.cc
deleted file mode 100644
index 8b8588c..0000000
--- a/src/compression/lz4-codec.cc
+++ /dev/null
@@ -1,38 +0,0 @@
-// Copyright 2012 Cloudera Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "codec.h"
-
-#include <lz4.h>
-
-using namespace parquet_cpp;
-
-void Lz4Codec::Decompress(int input_len, const uint8_t* input,
-      int output_len, uint8_t* output_buffer) {
-  int n = LZ4_uncompress(reinterpret_cast<const char*>(input),
-      reinterpret_cast<char*>(output_buffer), output_len);
-  if (n != input_len) {
-    throw ParquetException("Corrupt lz4 compressed data.");
-  }
-}
-
-int Lz4Codec::MaxCompressedLen(int input_len, const uint8_t* input) {
-  return LZ4_compressBound(input_len);
-}
-
-int Lz4Codec::Compress(int input_len, const uint8_t* input,
-    int output_buffer_len, uint8_t* output_buffer) {
-  return LZ4_compress(reinterpret_cast<const char*>(input),
-      reinterpret_cast<char*>(output_buffer), input_len);
-}

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/compression/snappy-codec.cc
----------------------------------------------------------------------
diff --git a/src/compression/snappy-codec.cc b/src/compression/snappy-codec.cc
deleted file mode 100644
index 96d6559..0000000
--- a/src/compression/snappy-codec.cc
+++ /dev/null
@@ -1,40 +0,0 @@
-// Copyright 2012 Cloudera Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "codec.h"
-
-#include <snappy.h>
-
-using namespace parquet_cpp;
-
-void SnappyCodec::Decompress(int input_len, const uint8_t* input,
-      int output_len, uint8_t* output_buffer) {
-  if (!snappy::RawUncompress(reinterpret_cast<const char*>(input),
-      static_cast<size_t>(input_len), reinterpret_cast<char*>(output_buffer))) {
-    throw ParquetException("Corrupt snappy compressed data.");
-  }
-}
-
-int SnappyCodec::MaxCompressedLen(int input_len, const uint8_t* input) {
-  return snappy::MaxCompressedLength(input_len);
-}
-
-int SnappyCodec::Compress(int input_len, const uint8_t* input,
-    int output_buffer_len, uint8_t* output_buffer) {
-  size_t output_len;
-  snappy::RawCompress(reinterpret_cast<const char*>(input),
-      static_cast<size_t>(input_len), reinterpret_cast<char*>(output_buffer),
-      &output_len);
-  return output_len;
-}

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/encodings/bool-encoding.h
----------------------------------------------------------------------
diff --git a/src/encodings/bool-encoding.h b/src/encodings/bool-encoding.h
deleted file mode 100644
index 1cccd1d..0000000
--- a/src/encodings/bool-encoding.h
+++ /dev/null
@@ -1,47 +0,0 @@
-// Copyright 2012 Cloudera Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef PARQUET_BOOL_ENCODING_H
-#define PARQUET_BOOL_ENCODING_H
-
-#include "encodings.h"
-
-namespace parquet_cpp {
-
-class BoolDecoder : public Decoder {
- public:
-  BoolDecoder() : Decoder(parquet::Type::BOOLEAN, parquet::Encoding::PLAIN) { }
-
-  virtual void SetData(int num_values, const uint8_t* data, int len) {
-    num_values_ = num_values;
-    decoder_ = impala::RleDecoder(data, len, 1);
-  }
-
-  virtual int GetBool(bool* buffer, int max_values) {
-    max_values = std::min(max_values, num_values_);
-    for (int i = 0; i < max_values; ++i) {
-      if (!decoder_.Get(&buffer[i])) ParquetException::EofException();
-    }
-    num_values_ -= max_values;
-    return max_values;
-  }
-
- private:
-  impala::RleDecoder decoder_;
-};
-
-}
-
-#endif
-

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/encodings/delta-bit-pack-encoding.h
----------------------------------------------------------------------
diff --git a/src/encodings/delta-bit-pack-encoding.h b/src/encodings/delta-bit-pack-encoding.h
deleted file mode 100644
index 12de07a..0000000
--- a/src/encodings/delta-bit-pack-encoding.h
+++ /dev/null
@@ -1,114 +0,0 @@
-// Copyright 2012 Cloudera Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef PARQUET_DELTA_BIT_PACK_ENCODING_H
-#define PARQUET_DELTA_BIT_PACK_ENCODING_H
-
-#include "encodings.h"
-
-namespace parquet_cpp {
-
-class DeltaBitPackDecoder : public Decoder {
- public:
-  DeltaBitPackDecoder(const parquet::Type::type& type)
-    : Decoder(type, parquet::Encoding::DELTA_BINARY_PACKED) {
-    if (type != parquet::Type::INT32 && type != parquet::Type::INT64) {
-      throw ParquetException("Delta bit pack encoding should only be for integer data.");
-    }
-  }
-
-  virtual void SetData(int num_values, const uint8_t* data, int len) {
-    num_values_ = num_values;
-    decoder_ = impala::BitReader(data, len);
-    values_current_block_ = 0;
-    values_current_mini_block_ = 0;
-  }
-
-  virtual int GetInt32(int32_t* buffer, int max_values) {
-    return GetInternal(buffer, max_values);
-  }
-
-  virtual int GetInt64(int64_t* buffer, int max_values) {
-    return GetInternal(buffer, max_values);
-  }
-
- private:
-  void InitBlock() {
-    uint64_t block_size;
-    if (!decoder_.GetVlqInt(&block_size)) ParquetException::EofException();
-    if (!decoder_.GetVlqInt(&num_mini_blocks_)) ParquetException::EofException();
-    if (!decoder_.GetVlqInt(&values_current_block_)) {
-      ParquetException::EofException();
-    }
-    if (!decoder_.GetZigZagVlqInt(&last_value_)) ParquetException::EofException();
-    delta_bit_widths_.resize(num_mini_blocks_);
-
-    if (!decoder_.GetZigZagVlqInt(&min_delta_)) ParquetException::EofException();
-    for (int i = 0; i < num_mini_blocks_; ++i) {
-      if (!decoder_.GetAligned<uint8_t>(1, &delta_bit_widths_[i])) {
-        ParquetException::EofException();
-      }
-    }
-    values_per_mini_block_ = block_size / num_mini_blocks_;
-    mini_block_idx_ = 0;
-    delta_bit_width_ = delta_bit_widths_[0];
-    values_current_mini_block_ = values_per_mini_block_;
-  }
-
-  template <typename T>
-  int GetInternal(T* buffer, int max_values) {
-    max_values = std::min(max_values, num_values_);
-    for (int i = 0; i < max_values; ++i) {
-      if (UNLIKELY(values_current_mini_block_ == 0)) {
-        ++mini_block_idx_;
-        if (mini_block_idx_ < delta_bit_widths_.size()) {
-          delta_bit_width_ = delta_bit_widths_[mini_block_idx_];
-          values_current_mini_block_ = values_per_mini_block_;
-        } else {
-          InitBlock();
-          buffer[i] = last_value_;
-          continue;
-        }
-      }
-
-      // TODO: the key to this algorithm is to decode the entire miniblock at once.
-      int64_t delta;
-      if (!decoder_.GetValue(delta_bit_width_, &delta)) ParquetException::EofException();
-      delta += min_delta_;
-      last_value_ += delta;
-      buffer[i] = last_value_;
-      --values_current_mini_block_;
-    }
-    num_values_ -= max_values;
-    return max_values;
-  }
-
-  impala::BitReader decoder_;
-  uint64_t values_current_block_;
-  uint64_t num_mini_blocks_;
-  uint64_t values_per_mini_block_;
-  uint64_t values_current_mini_block_;
-
-  int64_t min_delta_;
-  int mini_block_idx_;
-  std::vector<uint8_t> delta_bit_widths_;
-  int delta_bit_width_;
-
-  int64_t last_value_;
-};
-
-}
-
-#endif
-

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/encodings/delta-byte-array-encoding.h
----------------------------------------------------------------------
diff --git a/src/encodings/delta-byte-array-encoding.h b/src/encodings/delta-byte-array-encoding.h
deleted file mode 100644
index cdbbfde..0000000
--- a/src/encodings/delta-byte-array-encoding.h
+++ /dev/null
@@ -1,73 +0,0 @@
-// Copyright 2012 Cloudera Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef PARQUET_DELTA_BYTE_ARRAY_ENCODING_H
-#define PARQUET_DELTA_BYTE_ARRAY_ENCODING_H
-
-#include "encodings.h"
-
-namespace parquet_cpp {
-
-class DeltaByteArrayDecoder : public Decoder {
- public:
-  DeltaByteArrayDecoder()
-    : Decoder(parquet::Type::BYTE_ARRAY, parquet::Encoding::DELTA_BYTE_ARRAY),
-      prefix_len_decoder_(parquet::Type::INT32),
-      suffix_decoder_() {
-  }
-
-  virtual void SetData(int num_values, const uint8_t* data, int len) {
-    num_values_ = num_values;
-    if (len == 0) return;
-    int prefix_len_length = *reinterpret_cast<const int*>(data);
-    data += 4;
-    len -= 4;
-    prefix_len_decoder_.SetData(num_values, data, prefix_len_length);
-    data += prefix_len_length;
-    len -= prefix_len_length;
-    suffix_decoder_.SetData(num_values, data, len);
-  }
-
-  // TODO: this doesn't work and requires memory management. We need to allocate
-  // new strings to store the results.
-  virtual int GetByteArray(ByteArray* buffer, int max_values) {
-    max_values = std::min(max_values, num_values_);
-    for (int  i = 0; i < max_values; ++i) {
-      int prefix_len = 0;
-      prefix_len_decoder_.GetInt32(&prefix_len, 1);
-      ByteArray suffix;
-      suffix_decoder_.GetByteArray(&suffix, 1);
-      buffer[i].len = prefix_len + suffix.len;
-
-      uint8_t* result = reinterpret_cast<uint8_t*>(malloc(buffer[i].len));
-      memcpy(result, last_value_.ptr, prefix_len);
-      memcpy(result + prefix_len, suffix.ptr, suffix.len);
-
-      buffer[i].ptr = result;
-      last_value_ = buffer[i];
-    }
-    num_values_ -= max_values;
-    return max_values;
-  }
-
- private:
-  DeltaBitPackDecoder prefix_len_decoder_;
-  DeltaLengthByteArrayDecoder suffix_decoder_;
-  ByteArray last_value_;
-};
-
-}
-
-#endif
-

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/encodings/delta-length-byte-array-encoding.h
----------------------------------------------------------------------
diff --git a/src/encodings/delta-length-byte-array-encoding.h b/src/encodings/delta-length-byte-array-encoding.h
deleted file mode 100644
index d6f018c..0000000
--- a/src/encodings/delta-length-byte-array-encoding.h
+++ /dev/null
@@ -1,62 +0,0 @@
-// Copyright 2012 Cloudera Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef PARQUET_DELTA_LENGTH_BYTE_ARRAY_ENCODING_H
-#define PARQUET_DELTA_LENGTH_BYTE_ARRAY_ENCODING_H
-
-#include "encodings.h"
-
-namespace parquet_cpp {
-
-class DeltaLengthByteArrayDecoder : public Decoder {
- public:
-  DeltaLengthByteArrayDecoder()
-    : Decoder(parquet::Type::BYTE_ARRAY, parquet::Encoding::DELTA_LENGTH_BYTE_ARRAY),
-      len_decoder_(parquet::Type::INT32) {
-  }
-
-  virtual void SetData(int num_values, const uint8_t* data, int len) {
-    num_values_ = num_values;
-    if (len == 0) return;
-    int total_lengths_len = *reinterpret_cast<const int*>(data);
-    data += 4;
-    len_decoder_.SetData(num_values, data, total_lengths_len);
-    data_ = data + total_lengths_len;
-    len_ = len - 4 - total_lengths_len;
-  }
-
-  virtual int GetByteArray(ByteArray* buffer, int max_values) {
-    max_values = std::min(max_values, num_values_);
-    int lengths[max_values];
-    len_decoder_.GetInt32(lengths, max_values);
-    for (int  i = 0; i < max_values; ++i) {
-      buffer[i].len = lengths[i];
-      buffer[i].ptr = data_;
-      data_ += lengths[i];
-      len_ -= lengths[i];
-    }
-    num_values_ -= max_values;
-    return max_values;
-  }
-
- private:
-  DeltaBitPackDecoder len_decoder_;
-  const uint8_t* data_;
-  int len_;
-};
-
-}
-
-#endif
-

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/encodings/dictionary-encoding.h
----------------------------------------------------------------------
diff --git a/src/encodings/dictionary-encoding.h b/src/encodings/dictionary-encoding.h
deleted file mode 100644
index ba1e302..0000000
--- a/src/encodings/dictionary-encoding.h
+++ /dev/null
@@ -1,146 +0,0 @@
-// Copyright 2012 Cloudera Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef PARQUET_DICTIONARY_ENCODING_H
-#define PARQUET_DICTIONARY_ENCODING_H
-
-#include "encodings.h"
-
-namespace parquet_cpp {
-
-class DictionaryDecoder : public Decoder {
- public:
-  // Initializes the dictionary with values from 'dictionary'. The data in dictionary
-  // is not guaranteed to persist in memory after this call so the dictionary decoder
-  // needs to copy the data out if necessary.
-  DictionaryDecoder(const parquet::Type::type& type, Decoder* dictionary)
-    : Decoder(type, parquet::Encoding::RLE_DICTIONARY) {
-    int num_dictionary_values = dictionary->values_left();
-    switch (type) {
-      case parquet::Type::BOOLEAN:
-        throw ParquetException("Boolean cols should not be dictionary encoded.");
-
-      case parquet::Type::INT32:
-        int32_dictionary_.resize(num_dictionary_values);
-        dictionary->GetInt32(&int32_dictionary_[0], num_dictionary_values);
-        break;
-      case parquet::Type::INT64:
-        int64_dictionary_.resize(num_dictionary_values);
-        dictionary->GetInt64(&int64_dictionary_[0], num_dictionary_values);
-        break;
-      case parquet::Type::FLOAT:
-        float_dictionary_.resize(num_dictionary_values);
-        dictionary->GetFloat(&float_dictionary_[0], num_dictionary_values);
-        break;
-      case parquet::Type::DOUBLE:
-        double_dictionary_.resize(num_dictionary_values);
-        dictionary->GetDouble(&double_dictionary_[0], num_dictionary_values);
-        break;
-      case parquet::Type::BYTE_ARRAY: {
-        byte_array_dictionary_.resize(num_dictionary_values);
-        dictionary->GetByteArray(&byte_array_dictionary_[0], num_dictionary_values);
-        int total_size = 0;
-        for (int i = 0; i < num_dictionary_values; ++i) {
-          total_size += byte_array_dictionary_[i].len;
-        }
-        byte_array_data_.resize(total_size);
-        int offset = 0;
-        for (int i = 0; i < num_dictionary_values; ++i) {
-          memcpy(&byte_array_data_[offset],
-              byte_array_dictionary_[i].ptr, byte_array_dictionary_[i].len);
-          byte_array_dictionary_[i].ptr = &byte_array_data_[offset];
-          offset += byte_array_dictionary_[i].len;
-        }
-        break;
-      }
-      default:
-        ParquetException::NYI("Unsupported dictionary type");
-    }
-  }
-
-  virtual void SetData(int num_values, const uint8_t* data, int len) {
-    num_values_ = num_values;
-    if (len == 0) return;
-    uint8_t bit_width = *data;
-    ++data;
-    --len;
-    idx_decoder_ = impala::RleDecoder(data, len, bit_width);
-  }
-
-  virtual int GetInt32(int32_t* buffer, int max_values) {
-    max_values = std::min(max_values, num_values_);
-    for (int i = 0; i < max_values; ++i) {
-      buffer[i] = int32_dictionary_[index()];
-    }
-    return max_values;
-  }
-
-  virtual int GetInt64(int64_t* buffer, int max_values) {
-    max_values = std::min(max_values, num_values_);
-    for (int i = 0; i < max_values; ++i) {
-      buffer[i] = int64_dictionary_[index()];
-    }
-    return max_values;
-  }
-
-  virtual int GetFloat(float* buffer, int max_values) {
-    max_values = std::min(max_values, num_values_);
-    for (int i = 0; i < max_values; ++i) {
-      buffer[i] = float_dictionary_[index()];
-    }
-    return max_values;
-  }
-
-  virtual int GetDouble(double* buffer, int max_values) {
-    max_values = std::min(max_values, num_values_);
-    for (int i = 0; i < max_values; ++i) {
-      buffer[i] = double_dictionary_[index()];
-    }
-    return max_values;
-  }
-
-  virtual int GetByteArray(ByteArray* buffer, int max_values) {
-    max_values = std::min(max_values, num_values_);
-    for (int i = 0; i < max_values; ++i) {
-      buffer[i] = byte_array_dictionary_[index()];
-    }
-    return max_values;
-  }
-
- private:
-  int index() {
-    int idx = 0;
-    if (!idx_decoder_.Get(&idx)) ParquetException::EofException();
-    --num_values_;
-    return idx;
-  }
-
-  // Only one is set.
-  std::vector<int32_t> int32_dictionary_;
-  std::vector<int64_t> int64_dictionary_;
-  std::vector<float> float_dictionary_;
-  std::vector<double> double_dictionary_;
-  std::vector<ByteArray> byte_array_dictionary_;
-
-  // Data that contains the byte array data (byte_array_dictionary_ just has the
-  // pointers).
-  std::vector<uint8_t> byte_array_data_;
-
-  impala::RleDecoder idx_decoder_;
-};
-
-}
-
-#endif
-

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/encodings/encodings.h
----------------------------------------------------------------------
diff --git a/src/encodings/encodings.h b/src/encodings/encodings.h
deleted file mode 100644
index e888c1f..0000000
--- a/src/encodings/encodings.h
+++ /dev/null
@@ -1,83 +0,0 @@
-// Copyright 2012 Cloudera Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef PARQUET_ENCODINGS_H
-#define PARQUET_ENCODINGS_H
-
-#include <boost/cstdint.hpp>
-#include "gen-cpp/parquet_constants.h"
-#include "gen-cpp/parquet_types.h"
-
-#include "impala/rle-encoding.h"
-#include "impala/bit-stream-utils.inline.h"
-
-namespace parquet_cpp {
-
-class Decoder {
- public:
-  virtual ~Decoder() {}
-
-  // Sets the data for a new page. This will be called multiple times on the same
-  // decoder and should reset all internal state.
-  virtual void SetData(int num_values, const uint8_t* data, int len) = 0;
-
-  // Subclasses should override the ones they support. In each of these functions,
-  // the decoder would decode put to 'max_values', storing the result in 'buffer'.
-  // The function returns the number of values decoded, which should be max_values
-  // except for end of the current data page.
-  virtual int GetBool(bool* buffer, int max_values) {
-    throw ParquetException("Decoder does not implement this type.");
-  }
-  virtual int GetInt32(int32_t* buffer, int max_values) {
-    throw ParquetException("Decoder does not implement this type.");
-  }
-  virtual int GetInt64(int64_t* buffer, int max_values) {
-    throw ParquetException("Decoder does not implement this type.");
-  }
-  virtual int GetFloat(float* buffer, int max_values) {
-    throw ParquetException("Decoder does not implement this type.");
-  }
-  virtual int GetDouble(double* buffer, int max_values) {
-    throw ParquetException("Decoder does not implement this type.");
-  }
-  virtual int GetByteArray(ByteArray* buffer, int max_values) {
-    throw ParquetException("Decoder does not implement this type.");
-  }
-
-  // Returns the number of values left (for the last call to SetData()). This is
-  // the number of values left in this page.
-  int values_left() const { return num_values_; }
-
-  const parquet::Encoding::type encoding() const { return encoding_; }
-
- protected:
-  Decoder(const parquet::Type::type& type, const parquet::Encoding::type& encoding)
-    : type_(type), encoding_(encoding), num_values_(0) {}
-
-  const parquet::Type::type type_;
-  const parquet::Encoding::type encoding_;
-  int num_values_;
-};
-
-}
-
-#include "bool-encoding.h"
-#include "plain-encoding.h"
-#include "dictionary-encoding.h"
-#include "delta-bit-pack-encoding.h"
-#include "delta-length-byte-array-encoding.h"
-#include "delta-byte-array-encoding.h"
-
-#endif
-

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/encodings/plain-encoding.h
----------------------------------------------------------------------
diff --git a/src/encodings/plain-encoding.h b/src/encodings/plain-encoding.h
deleted file mode 100644
index 511dbdd..0000000
--- a/src/encodings/plain-encoding.h
+++ /dev/null
@@ -1,82 +0,0 @@
-// Copyright 2012 Cloudera Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef PARQUET_PLAIN_ENCODING_H
-#define PARQUET_PLAIN_ENCODING_H
-
-#include "encodings.h"
-
-namespace parquet_cpp {
-
-class PlainDecoder : public Decoder {
- public:
-  PlainDecoder(const parquet::Type::type& type)
-    : Decoder(type, parquet::Encoding::PLAIN), data_(NULL), len_(0) {
-  }
-
-  virtual void SetData(int num_values, const uint8_t* data, int len) {
-    num_values_ = num_values;
-    data_ = data;
-    len_ = len;
-  }
-
-  int GetValues(void* buffer, int max_values, int byte_size) {
-    max_values = std::min(max_values, num_values_);
-    int size = max_values * byte_size;
-    if (len_ < size)  ParquetException::EofException();
-    memcpy(buffer, data_, size);
-    data_ += size;
-    len_ -= size;
-    num_values_ -= max_values;
-    return max_values;
-  }
-
-  virtual int GetInt32(int32_t* buffer, int max_values) {
-    return GetValues(buffer, max_values, sizeof(int32_t));
-  }
-
-  virtual int GetInt64(int64_t* buffer, int max_values) {
-    return GetValues(buffer, max_values, sizeof(int64_t));
-  }
-
-  virtual int GetFloat(float* buffer, int max_values) {
-    return GetValues(buffer, max_values, sizeof(float));
-  }
-
-  virtual int GetDouble(double* buffer, int max_values) {
-    return GetValues(buffer, max_values, sizeof(double));
-  }
-
-  virtual int GetByteArray(ByteArray* buffer, int max_values) {
-    max_values = std::min(max_values, num_values_);
-    for (int i = 0; i < max_values; ++i) {
-      buffer[i].len = *reinterpret_cast<const uint32_t*>(data_);
-      if (len_ < sizeof(uint32_t) + buffer[i].len) ParquetException::EofException();
-      buffer[i].ptr = data_ + sizeof(uint32_t);
-      data_ += sizeof(uint32_t) + buffer[i].len;
-      len_ -= sizeof(uint32_t) + buffer[i].len;
-    }
-    num_values_ -= max_values;
-    return max_values;
-  }
-
- private:
-  const uint8_t* data_;
-  int len_;
-};
-
-}
-
-#endif
-

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/impala/bit-stream-utils.h
----------------------------------------------------------------------
diff --git a/src/impala/bit-stream-utils.h b/src/impala/bit-stream-utils.h
deleted file mode 100644
index 5eba254..0000000
--- a/src/impala/bit-stream-utils.h
+++ /dev/null
@@ -1,145 +0,0 @@
-// Copyright 2012 Cloudera Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef IMPALA_UTIL_BIT_STREAM_UTILS_H
-#define IMPALA_UTIL_BIT_STREAM_UTILS_H
-
-#include <boost/cstdint.hpp>
-#include <string.h>
-#include "impala/compiler-util.h"
-#include "impala/bit-util.h"
-#include "impala/logging.h"
-
-namespace impala {
-
-// Utility class to write bit/byte streams.  This class can write data to either be
-// bit packed or byte aligned (and a single stream that has a mix of both).
-// This class does not allocate memory.
-class BitWriter {
- public:
-  // buffer: buffer to write bits to.  Buffer should be preallocated with
-  // 'buffer_len' bytes.
-  BitWriter(uint8_t* buffer, int buffer_len) :
-      buffer_(buffer),
-      max_bytes_(buffer_len) {
-    Clear();
-  }
-
-  void Clear() {
-    buffered_values_ = 0;
-    byte_offset_ = 0;
-    bit_offset_ = 0;
-  }
-
-  // The number of current bytes written, including the current byte (i.e. may include a
-  // fraction of a byte). Includes buffered values.
-  int bytes_written() const { return byte_offset_ + BitUtil::Ceil(bit_offset_, 8); }
-  uint8_t* buffer() const { return buffer_; }
-  int buffer_len() const { return max_bytes_; }
-
-  // Writes a value to buffered_values_, flushing to buffer_ if necessary.  This is bit
-  // packed.  Returns false if there was not enough space. num_bits must be <= 32.
-  bool PutValue(uint64_t v, int num_bits);
-
-  // Writes v to the next aligned byte using num_bytes. If T is larger than num_bytes, the
-  // extra high-order bytes will be ignored. Returns false if there was not enough space.
-  template<typename T>
-  bool PutAligned(T v, int num_bytes);
-
-  // Write a Vlq encoded int to the buffer.  Returns false if there was not enough
-  // room.  The value is written byte aligned.
-  // For more details on vlq:
-  // en.wikipedia.org/wiki/Variable-length_quantity
-  bool PutVlqInt(uint32_t v);
-  bool PutZigZagVlqInt(int32_t v);
-
-  // Get a pointer to the next aligned byte and advance the underlying buffer
-  // by num_bytes.
-  // Returns NULL if there was not enough space.
-  uint8_t* GetNextBytePtr(int num_bytes = 1);
-
-  // Flushes all buffered values to the buffer. Call this when done writing to the buffer.
-  // If 'align' is true, buffered_values_ is reset and any future writes will be written
-  // to the next byte boundary.
-  void Flush(bool align=false);
-
- private:
-  uint8_t* buffer_;
-  int max_bytes_;
-
-  // Bit-packed values are initially written to this variable before being memcpy'd to
-  // buffer_. This is faster than writing values byte by byte directly to buffer_.
-  uint64_t buffered_values_;
-
-  int byte_offset_;       // Offset in buffer_
-  int bit_offset_;        // Offset in buffered_values_
-};
-
-// Utility class to read bit/byte stream.  This class can read bits or bytes
-// that are either byte aligned or not.  It also has utilities to read multiple
-// bytes in one read (e.g. encoded int).
-class BitReader {
- public:
-  // 'buffer' is the buffer to read from.  The buffer's length is 'buffer_len'.
-  BitReader(const uint8_t* buffer, int buffer_len) :
-      buffer_(buffer),
-      max_bytes_(buffer_len),
-      byte_offset_(0),
-      bit_offset_(0) {
-    int num_bytes = std::min(8, max_bytes_ - byte_offset_);
-    memcpy(&buffered_values_, buffer_ + byte_offset_, num_bytes);
-  }
-
-  BitReader() : buffer_(NULL), max_bytes_(0) {}
-
-  // Gets the next value from the buffer.  Returns true if 'v' could be read or false if
-  // there are not enough bytes left. num_bits must be <= 32.
-  template<typename T>
-  bool GetValue(int num_bits, T* v);
-
-  // Reads a 'num_bytes'-sized value from the buffer and stores it in 'v'. T needs to be a
-  // little-endian native type and big enough to store 'num_bytes'. The value is assumed
-  // to be byte-aligned so the stream will be advanced to the start of the next byte
-  // before 'v' is read. Returns false if there are not enough bytes left.
-  template<typename T>
-  bool GetAligned(int num_bytes, T* v);
-
-  // Reads a vlq encoded int from the stream.  The encoded int must start at the
-  // beginning of a byte. Return false if there were not enough bytes in the buffer.
-  bool GetVlqInt(uint64_t* v);
-  bool GetZigZagVlqInt(int64_t* v);
-
-  // Returns the number of bytes left in the stream, not including the current byte (i.e.,
-  // there may be an additional fraction of a byte).
-  int bytes_left() { return max_bytes_ - (byte_offset_ + BitUtil::Ceil(bit_offset_, 8)); }
-
-  // Maximum byte length of a vlq encoded int
-  static const int MAX_VLQ_BYTE_LEN = 5;
-
- private:
-  const uint8_t* buffer_;
-  int max_bytes_;
-
-  // Bytes are memcpy'd from buffer_ and values are read from this variable. This is
-  // faster than reading values byte by byte directly from buffer_.
-  uint64_t buffered_values_;
-
-  int byte_offset_;       // Offset in buffer_
-  int bit_offset_;        // Offset in buffered_values_
-};
-
-}
-
-#endif

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/impala/bit-stream-utils.inline.h
----------------------------------------------------------------------
diff --git a/src/impala/bit-stream-utils.inline.h b/src/impala/bit-stream-utils.inline.h
deleted file mode 100644
index d84ef4d..0000000
--- a/src/impala/bit-stream-utils.inline.h
+++ /dev/null
@@ -1,164 +0,0 @@
-// Copyright 2012 Cloudera Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef IMPALA_UTIL_BIT_STREAM_UTILS_INLINE_H
-#define IMPALA_UTIL_BIT_STREAM_UTILS_INLINE_H
-
-#include "impala/bit-stream-utils.h"
-
-namespace impala {
-
-inline bool BitWriter::PutValue(uint64_t v, int num_bits) {
-  // TODO: revisit this limit if necessary (can be raised to 64 by fixing some edge cases)
-  DCHECK_LE(num_bits, 32);
-  DCHECK_EQ(v >> num_bits, 0) << "v = " << v << ", num_bits = " << num_bits;
-
-  if (UNLIKELY(byte_offset_ * 8 + bit_offset_ + num_bits > max_bytes_ * 8)) return false;
-
-  buffered_values_ |= v << bit_offset_;
-  bit_offset_ += num_bits;
-
-  if (UNLIKELY(bit_offset_ >= 64)) {
-    // Flush buffered_values_ and write out bits of v that did not fit
-    memcpy(buffer_ + byte_offset_, &buffered_values_, 8);
-    buffered_values_ = 0;
-    byte_offset_ += 8;
-    bit_offset_ -= 64;
-    buffered_values_ = v >> (num_bits - bit_offset_);
-  }
-  DCHECK_LT(bit_offset_, 64);
-  return true;
-}
-
-inline void BitWriter::Flush(bool align) {
-  int num_bytes = BitUtil::Ceil(bit_offset_, 8);
-  DCHECK_LE(byte_offset_ + num_bytes, max_bytes_);
-  memcpy(buffer_ + byte_offset_, &buffered_values_, num_bytes);
-
-  if (align) {
-    buffered_values_ = 0;
-    byte_offset_ += num_bytes;
-    bit_offset_ = 0;
-  }
-}
-
-inline uint8_t* BitWriter::GetNextBytePtr(int num_bytes) {
-  Flush(/* align */ true);
-  DCHECK_LE(byte_offset_, max_bytes_);
-  if (byte_offset_ + num_bytes > max_bytes_) return NULL;
-  uint8_t* ptr = buffer_ + byte_offset_;
-  byte_offset_ += num_bytes;
-  return ptr;
-}
-
-template<typename T>
-inline bool BitWriter::PutAligned(T val, int num_bytes) {
-  uint8_t* ptr = GetNextBytePtr(num_bytes);
-  if (ptr == NULL) return false;
-  memcpy(ptr, &val, num_bytes);
-  return true;
-}
-
-inline bool BitWriter::PutVlqInt(uint32_t v) {
-  bool result = true;
-  while ((v & 0xFFFFFF80) != 0L) {
-    result &= PutAligned<uint8_t>((v & 0x7F) | 0x80, 1);
-    v >>= 7;
-  }
-  result &= PutAligned<uint8_t>(v & 0x7F, 1);
-  return result;
-}
-
-inline bool BitWriter::PutZigZagVlqInt(int32_t v) {
-  uint32_t u = (v << 1) ^ (v >> 31);
-  return PutVlqInt(u);
-}
-
-template<typename T>
-inline bool BitReader::GetValue(int num_bits, T* v) {
-  // TODO: revisit this limit if necessary
-  DCHECK_LE(num_bits, 32);
-  DCHECK_LE(num_bits, sizeof(T) * 8);
-
-  if (UNLIKELY(byte_offset_ * 8 + bit_offset_ + num_bits > max_bytes_ * 8)) return false;
-
-  *v = BitUtil::TrailingBits(buffered_values_, bit_offset_ + num_bits) >> bit_offset_;
-
-  bit_offset_ += num_bits;
-  if (bit_offset_ >= 64) {
-    byte_offset_ += 8;
-    bit_offset_ -= 64;
-
-    int bytes_remaining = max_bytes_ - byte_offset_;
-    if (LIKELY(bytes_remaining >= 8)) {
-      memcpy(&buffered_values_, buffer_ + byte_offset_, 8);
-    } else {
-      memcpy(&buffered_values_, buffer_ + byte_offset_, bytes_remaining);
-    }
-
-    // Read bits of v that crossed into new buffered_values_
-    *v |= BitUtil::TrailingBits(buffered_values_, bit_offset_)
-          << (num_bits - bit_offset_);
-  }
-  DCHECK_LE(bit_offset_, 64);
-  return true;
-}
-
-template<typename T>
-inline bool BitReader::GetAligned(int num_bytes, T* v) {
-  DCHECK_LE(num_bytes, sizeof(T));
-  int bytes_read = BitUtil::Ceil(bit_offset_, 8);
-  if (UNLIKELY(byte_offset_ + bytes_read + num_bytes > max_bytes_)) return false;
-
-  // Advance byte_offset to next unread byte and read num_bytes
-  byte_offset_ += bytes_read;
-  memcpy(v, buffer_ + byte_offset_, num_bytes);
-  byte_offset_ += num_bytes;
-
-  // Reset buffered_values_
-  bit_offset_ = 0;
-  int bytes_remaining = max_bytes_ - byte_offset_;
-  if (LIKELY(bytes_remaining >= 8)) {
-    memcpy(&buffered_values_, buffer_ + byte_offset_, 8);
-  } else {
-    memcpy(&buffered_values_, buffer_ + byte_offset_, bytes_remaining);
-  }
-  return true;
-}
-
-inline bool BitReader::GetVlqInt(uint64_t* v) {
-  *v = 0;
-  int shift = 0;
-  int num_bytes = 0;
-  uint8_t byte = 0;
-  do {
-    if (!GetAligned<uint8_t>(1, &byte)) return false;
-    *v |= (byte & 0x7F) << shift;
-    shift += 7;
-    DCHECK_LE(++num_bytes, MAX_VLQ_BYTE_LEN);
-  } while ((byte & 0x80) != 0);
-  return true;
-}
-
-inline bool BitReader::GetZigZagVlqInt(int64_t* v) {
-  uint64_t u;
-  if (!GetVlqInt(&u)) return false;
-  *reinterpret_cast<uint64_t*>(v) = (u >> 1) ^ -(u & 1);
-  return true;
-}
-
-}
-
-#endif

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/impala/bit-util.h
----------------------------------------------------------------------
diff --git a/src/impala/bit-util.h b/src/impala/bit-util.h
deleted file mode 100644
index c2b6055..0000000
--- a/src/impala/bit-util.h
+++ /dev/null
@@ -1,174 +0,0 @@
-// Copyright 2012 Cloudera Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef IMPALA_BIT_UTIL_H
-#define IMPALA_BIT_UTIL_H
-
-#if defined(__APPLE__)
-  #include <machine/endian.h>
-#else
-  #include <endian.h>
-#endif
-
-#include "impala/compiler-util.h"
-#include "impala/logging.h"
-
-namespace impala {
-
-// Utility class to do standard bit tricks
-// TODO: is this in boost or something else like that?
-class BitUtil {
- public:
-  // Returns the ceil of value/divisor
-  static inline int Ceil(int value, int divisor) {
-    return value / divisor + (value % divisor != 0);
-  }
-
-  // Returns 'value' rounded up to the nearest multiple of 'factor'
-  static inline int RoundUp(int value, int factor) {
-    return (value + (factor - 1)) / factor * factor;
-  }
-
-  // Returns 'value' rounded down to the nearest multiple of 'factor'
-  static inline int RoundDown(int value, int factor) {
-    return (value / factor) * factor;
-  }
-
-  // Returns the number of set bits in x
-  static inline int Popcount(uint64_t x) {
-    int count = 0;
-    for (; x != 0; ++count) x &= x-1;
-    return count;
-  }
-
-  // Returns the 'num_bits' least-significant bits of 'v'.
-  static inline uint64_t TrailingBits(uint64_t v, int num_bits) {
-    if (UNLIKELY(num_bits == 0)) return 0;
-    if (UNLIKELY(num_bits >= 64)) return v;
-    int n = 64 - num_bits;
-    return (v << n) >> n;
-  }
-
-  // Returns ceil(log2(x)).
-  // TODO: this could be faster if we use __builtin_clz.  Fix this if this ever shows up
-  // in a hot path.
-  static inline int Log2(uint64_t x) {
-    if (x == 0) return 0;
-    // Compute result = ceil(log2(x))
-    //                = floor(log2(x - 1)) + 1, for x > 1
-    // by finding the position of the most significant bit (1-indexed) of x - 1
-    // (floor(log2(n)) = MSB(n) (0-indexed))
-    --x;
-    int result = 1;
-    while (x >>= 1) ++result;
-    return result;
-  }
-
-  // Returns the minimum number of bits needed to represent the value of 'x'
-  static inline int NumRequiredBits(uint64_t x) {
-    for (int i = 63; i >= 0; --i) {
-      if (x & 1L << i) return i + 1;
-    }
-    return 0;
-  }
-
-  // Swaps the byte order (i.e. endianess)
-  static inline int64_t ByteSwap(int64_t value) {
-    return __builtin_bswap64(value);
-  }
-  static inline uint64_t ByteSwap(uint64_t value) {
-    return static_cast<uint64_t>(__builtin_bswap64(value));
-  }
-  static inline int32_t ByteSwap(int32_t value) {
-    return __builtin_bswap32(value);
-  }
-  static inline uint32_t ByteSwap(uint32_t value) {
-    return static_cast<uint32_t>(__builtin_bswap32(value));
-  }
-  static inline int16_t ByteSwap(int16_t value) {
-    return (((value >> 8) & 0xff) | ((value & 0xff) << 8));
-  }
-  static inline uint16_t ByteSwap(uint16_t value) {
-    return static_cast<uint16_t>(ByteSwap(static_cast<int16_t>(value)));
-  }
-
-  // Write the swapped bytes into dst. Src and st cannot overlap.
-  static inline void ByteSwap(void* dst, const void* src, int len) {
-    switch (len) {
-      case 1:
-        *reinterpret_cast<int8_t*>(dst) = *reinterpret_cast<const int8_t*>(src);
-        return;
-      case 2:
-        *reinterpret_cast<int16_t*>(dst) =
-            ByteSwap(*reinterpret_cast<const int16_t*>(src));
-        return;
-      case 4:
-        *reinterpret_cast<int32_t*>(dst) =
-            ByteSwap(*reinterpret_cast<const int32_t*>(src));
-        return;
-      case 8:
-        *reinterpret_cast<int64_t*>(dst) =
-            ByteSwap(*reinterpret_cast<const int64_t*>(src));
-        return;
-      default: break;
-    }
-
-    uint8_t* d = reinterpret_cast<uint8_t*>(dst);
-    const uint8_t* s = reinterpret_cast<const uint8_t*>(src);
-    for (int i = 0; i < len; ++i) {
-      d[i] = s[len - i - 1];
-    }
-  }
-
-  // Converts to big endian format (if not already in big endian) from the
-  // machine's native endian format.
-#if __BYTE_ORDER == __LITTLE_ENDIAN
-  static inline int64_t  ToBigEndian(int64_t value)  { return ByteSwap(value); }
-  static inline uint64_t ToBigEndian(uint64_t value) { return ByteSwap(value); }
-  static inline int32_t  ToBigEndian(int32_t value)  { return ByteSwap(value); }
-  static inline uint32_t ToBigEndian(uint32_t value) { return ByteSwap(value); }
-  static inline int16_t  ToBigEndian(int16_t value)  { return ByteSwap(value); }
-  static inline uint16_t ToBigEndian(uint16_t value) { return ByteSwap(value); }
-#else
-  static inline int64_t  ToBigEndian(int64_t val)  { return val; }
-  static inline uint64_t ToBigEndian(uint64_t val) { return val; }
-  static inline int32_t  ToBigEndian(int32_t val)  { return val; }
-  static inline uint32_t ToBigEndian(uint32_t val) { return val; }
-  static inline int16_t  ToBigEndian(int16_t val)  { return val; }
-  static inline uint16_t ToBigEndian(uint16_t val) { return val; }
-#endif
-
-  // Converts from big endian format to the machine's native endian format.
-#if __BYTE_ORDER == __LITTLE_ENDIAN
-  static inline int64_t  FromBigEndian(int64_t value)  { return ByteSwap(value); }
-  static inline uint64_t FromBigEndian(uint64_t value) { return ByteSwap(value); }
-  static inline int32_t  FromBigEndian(int32_t value)  { return ByteSwap(value); }
-  static inline uint32_t FromBigEndian(uint32_t value) { return ByteSwap(value); }
-  static inline int16_t  FromBigEndian(int16_t value)  { return ByteSwap(value); }
-  static inline uint16_t FromBigEndian(uint16_t value) { return ByteSwap(value); }
-#else
-  static inline int64_t  FromBigEndian(int64_t val)  { return val; }
-  static inline uint64_t FromBigEndian(uint64_t val) { return val; }
-  static inline int32_t  FromBigEndian(int32_t val)  { return val; }
-  static inline uint32_t FromBigEndian(uint32_t val) { return val; }
-  static inline int16_t  FromBigEndian(int16_t val)  { return val; }
-  static inline uint16_t FromBigEndian(uint16_t val) { return val; }
-#endif
-
-};
-
-}
-
-#endif

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/impala/compiler-util.h
----------------------------------------------------------------------
diff --git a/src/impala/compiler-util.h b/src/impala/compiler-util.h
deleted file mode 100644
index 6b25cdf..0000000
--- a/src/impala/compiler-util.h
+++ /dev/null
@@ -1,38 +0,0 @@
-// Copyright 2012 Cloudera Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef IMPALA_COMMON_COMPILER_UTIL_H
-#define IMPALA_COMMON_COMPILER_UTIL_H
-
-// Compiler hint that this branch is likely or unlikely to
-// be taken. Take from the "What all programmers should know
-// about memory" paper.
-// example: if (LIKELY(size > 0)) { ... }
-// example: if (UNLIKELY(!status.ok())) { ... }
-#ifdef LIKELY 
-#undef LIKELY
-#endif
-
-#ifdef UNLIKELY 
-#undef UNLIKELY
-#endif
-
-#define LIKELY(expr) __builtin_expect(!!(expr), 1)
-#define UNLIKELY(expr) __builtin_expect(!!(expr), 0)
-
-#define PREFETCH(addr) __builtin_prefetch(addr)
-
-#endif
-

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/impala/logging.h
----------------------------------------------------------------------
diff --git a/src/impala/logging.h b/src/impala/logging.h
deleted file mode 100644
index fc130f3..0000000
--- a/src/impala/logging.h
+++ /dev/null
@@ -1,31 +0,0 @@
-// Copyright 2012 Cloudera Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef IMPALA_COMMON_LOGGING_H
-#define IMPALA_COMMON_LOGGING_H
-
-#include <iostream>
-
-#define DCHECK(condition) while(false) std::cout
-#define DCHECK_EQ(a, b) while(false) std::cout
-#define DCHECK_NE(a, b) while(false) std::cout
-#define DCHECK_GT(a, b) while(false) std::cout
-#define DCHECK_LT(a, b) while(false) std::cout
-#define DCHECK_GE(a, b) while(false) std::cout
-#define DCHECK_LE(a, b) while(false) std::cout
-// Similar to how glog defines DCHECK for release.
-#define LOG(level) while(false) std::cout
-
-#endif

[2/7] parquet-cpp git commit: PARQUET-416: C++11 compilation, code reorg, libparquet and installation targets

Posted by no...@apache.org.

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/parquet/thrift/parquet_types.cpp
----------------------------------------------------------------------
diff --git a/src/parquet/thrift/parquet_types.cpp b/src/parquet/thrift/parquet_types.cpp
new file mode 100644
index 0000000..06d388c
--- /dev/null
+++ b/src/parquet/thrift/parquet_types.cpp
@@ -0,0 +1,2006 @@
+/**
+ * Autogenerated by Thrift Compiler (0.9.0)
+ *
+ * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+ *  @generated
+ */
+#include "parquet_types.h"
+
+#include <algorithm>
+
+namespace parquet {
+
+int _kTypeValues[] = {
+  Type::BOOLEAN,
+  Type::INT32,
+  Type::INT64,
+  Type::INT96,
+  Type::FLOAT,
+  Type::DOUBLE,
+  Type::BYTE_ARRAY,
+  Type::FIXED_LEN_BYTE_ARRAY
+};
+const char* _kTypeNames[] = {
+  "BOOLEAN",
+  "INT32",
+  "INT64",
+  "INT96",
+  "FLOAT",
+  "DOUBLE",
+  "BYTE_ARRAY",
+  "FIXED_LEN_BYTE_ARRAY"
+};
+const std::map<int, const char*> _Type_VALUES_TO_NAMES(::apache::thrift::TEnumIterator(8, _kTypeValues, _kTypeNames), ::apache::thrift::TEnumIterator(-1, NULL, NULL));
+
+int _kConvertedTypeValues[] = {
+  ConvertedType::UTF8,
+  ConvertedType::MAP,
+  ConvertedType::MAP_KEY_VALUE,
+  ConvertedType::LIST,
+  ConvertedType::ENUM,
+  ConvertedType::DECIMAL
+};
+const char* _kConvertedTypeNames[] = {
+  "UTF8",
+  "MAP",
+  "MAP_KEY_VALUE",
+  "LIST",
+  "ENUM",
+  "DECIMAL"
+};
+const std::map<int, const char*> _ConvertedType_VALUES_TO_NAMES(::apache::thrift::TEnumIterator(6, _kConvertedTypeValues, _kConvertedTypeNames), ::apache::thrift::TEnumIterator(-1, NULL, NULL));
+
+int _kFieldRepetitionTypeValues[] = {
+  FieldRepetitionType::REQUIRED,
+  FieldRepetitionType::OPTIONAL,
+  FieldRepetitionType::REPEATED
+};
+const char* _kFieldRepetitionTypeNames[] = {
+  "REQUIRED",
+  "OPTIONAL",
+  "REPEATED"
+};
+const std::map<int, const char*> _FieldRepetitionType_VALUES_TO_NAMES(::apache::thrift::TEnumIterator(3, _kFieldRepetitionTypeValues, _kFieldRepetitionTypeNames), ::apache::thrift::TEnumIterator(-1, NULL, NULL));
+
+int _kEncodingValues[] = {
+  Encoding::PLAIN,
+  Encoding::PLAIN_DICTIONARY,
+  Encoding::RLE,
+  Encoding::BIT_PACKED,
+  Encoding::DELTA_BINARY_PACKED,
+  Encoding::DELTA_LENGTH_BYTE_ARRAY,
+  Encoding::DELTA_BYTE_ARRAY,
+  Encoding::RLE_DICTIONARY
+};
+const char* _kEncodingNames[] = {
+  "PLAIN",
+  "PLAIN_DICTIONARY",
+  "RLE",
+  "BIT_PACKED",
+  "DELTA_BINARY_PACKED",
+  "DELTA_LENGTH_BYTE_ARRAY",
+  "DELTA_BYTE_ARRAY",
+  "RLE_DICTIONARY"
+};
+const std::map<int, const char*> _Encoding_VALUES_TO_NAMES(::apache::thrift::TEnumIterator(8, _kEncodingValues, _kEncodingNames), ::apache::thrift::TEnumIterator(-1, NULL, NULL));
+
+int _kCompressionCodecValues[] = {
+  CompressionCodec::UNCOMPRESSED,
+  CompressionCodec::SNAPPY,
+  CompressionCodec::GZIP,
+  CompressionCodec::LZO
+};
+const char* _kCompressionCodecNames[] = {
+  "UNCOMPRESSED",
+  "SNAPPY",
+  "GZIP",
+  "LZO"
+};
+const std::map<int, const char*> _CompressionCodec_VALUES_TO_NAMES(::apache::thrift::TEnumIterator(4, _kCompressionCodecValues, _kCompressionCodecNames), ::apache::thrift::TEnumIterator(-1, NULL, NULL));
+
+int _kPageTypeValues[] = {
+  PageType::DATA_PAGE,
+  PageType::INDEX_PAGE,
+  PageType::DICTIONARY_PAGE,
+  PageType::DATA_PAGE_V2
+};
+const char* _kPageTypeNames[] = {
+  "DATA_PAGE",
+  "INDEX_PAGE",
+  "DICTIONARY_PAGE",
+  "DATA_PAGE_V2"
+};
+const std::map<int, const char*> _PageType_VALUES_TO_NAMES(::apache::thrift::TEnumIterator(4, _kPageTypeValues, _kPageTypeNames), ::apache::thrift::TEnumIterator(-1, NULL, NULL));
+
+const char* Statistics::ascii_fingerprint = "CE004821871820DD79A8FD98BB101F6D";
+const uint8_t Statistics::binary_fingerprint[16] = {0xCE,0x00,0x48,0x21,0x87,0x18,0x20,0xDD,0x79,0xA8,0xFD,0x98,0xBB,0x10,0x1F,0x6D};
+
+uint32_t Statistics::read(::apache::thrift::protocol::TProtocol* iprot) {
+
+  uint32_t xfer = 0;
+  std::string fname;
+  ::apache::thrift::protocol::TType ftype;
+  int16_t fid;
+
+  xfer += iprot->readStructBegin(fname);
+
+  using ::apache::thrift::protocol::TProtocolException;
+
+
+  while (true)
+  {
+    xfer += iprot->readFieldBegin(fname, ftype, fid);
+    if (ftype == ::apache::thrift::protocol::T_STOP) {
+      break;
+    }
+    switch (fid)
+    {
+      case 1:
+        if (ftype == ::apache::thrift::protocol::T_STRING) {
+          xfer += iprot->readBinary(this->max);
+          this->__isset.max = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 2:
+        if (ftype == ::apache::thrift::protocol::T_STRING) {
+          xfer += iprot->readBinary(this->min);
+          this->__isset.min = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 3:
+        if (ftype == ::apache::thrift::protocol::T_I64) {
+          xfer += iprot->readI64(this->null_count);
+          this->__isset.null_count = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 4:
+        if (ftype == ::apache::thrift::protocol::T_I64) {
+          xfer += iprot->readI64(this->distinct_count);
+          this->__isset.distinct_count = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      default:
+        xfer += iprot->skip(ftype);
+        break;
+    }
+    xfer += iprot->readFieldEnd();
+  }
+
+  xfer += iprot->readStructEnd();
+
+  return xfer;
+}
+
+uint32_t Statistics::write(::apache::thrift::protocol::TProtocol* oprot) const {
+  uint32_t xfer = 0;
+  uint32_t fcnt = 0;
+  xfer += oprot->writeStructBegin("Statistics");
+
+  if (this->__isset.max) {
+    ++fcnt;
+    xfer += oprot->writeFieldBegin("max", ::apache::thrift::protocol::T_STRING, 1);
+    xfer += oprot->writeBinary(this->max);
+    xfer += oprot->writeFieldEnd();
+  }
+  if (this->__isset.min) {
+    ++fcnt;
+    xfer += oprot->writeFieldBegin("min", ::apache::thrift::protocol::T_STRING, 2);
+    xfer += oprot->writeBinary(this->min);
+    xfer += oprot->writeFieldEnd();
+  }
+  if (this->__isset.null_count) {
+    ++fcnt;
+    xfer += oprot->writeFieldBegin("null_count", ::apache::thrift::protocol::T_I64, 3);
+    xfer += oprot->writeI64(this->null_count);
+    xfer += oprot->writeFieldEnd();
+  }
+  if (this->__isset.distinct_count) {
+    ++fcnt;
+    xfer += oprot->writeFieldBegin("distinct_count", ::apache::thrift::protocol::T_I64, 4);
+    xfer += oprot->writeI64(this->distinct_count);
+    xfer += oprot->writeFieldEnd();
+  }
+  xfer += oprot->writeFieldStop();
+  xfer += oprot->writeStructEnd();
+  return xfer;
+}
+
+void swap(Statistics &a, Statistics &b) {
+  using ::std::swap;
+  swap(a.max, b.max);
+  swap(a.min, b.min);
+  swap(a.null_count, b.null_count);
+  swap(a.distinct_count, b.distinct_count);
+  swap(a.__isset, b.__isset);
+}
+
+const char* SchemaElement::ascii_fingerprint = "388A784401753800444CFEAC8BC1B1A1";
+const uint8_t SchemaElement::binary_fingerprint[16] = {0x38,0x8A,0x78,0x44,0x01,0x75,0x38,0x00,0x44,0x4C,0xFE,0xAC,0x8B,0xC1,0xB1,0xA1};
+
+uint32_t SchemaElement::read(::apache::thrift::protocol::TProtocol* iprot) {
+
+  uint32_t xfer = 0;
+  std::string fname;
+  ::apache::thrift::protocol::TType ftype;
+  int16_t fid;
+
+  xfer += iprot->readStructBegin(fname);
+
+  using ::apache::thrift::protocol::TProtocolException;
+
+  bool isset_name = false;
+
+  while (true)
+  {
+    xfer += iprot->readFieldBegin(fname, ftype, fid);
+    if (ftype == ::apache::thrift::protocol::T_STOP) {
+      break;
+    }
+    switch (fid)
+    {
+      case 1:
+        if (ftype == ::apache::thrift::protocol::T_I32) {
+          int32_t ecast0;
+          xfer += iprot->readI32(ecast0);
+          this->type = (Type::type)ecast0;
+          this->__isset.type = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 2:
+        if (ftype == ::apache::thrift::protocol::T_I32) {
+          xfer += iprot->readI32(this->type_length);
+          this->__isset.type_length = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 3:
+        if (ftype == ::apache::thrift::protocol::T_I32) {
+          int32_t ecast1;
+          xfer += iprot->readI32(ecast1);
+          this->repetition_type = (FieldRepetitionType::type)ecast1;
+          this->__isset.repetition_type = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 4:
+        if (ftype == ::apache::thrift::protocol::T_STRING) {
+          xfer += iprot->readString(this->name);
+          isset_name = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 5:
+        if (ftype == ::apache::thrift::protocol::T_I32) {
+          xfer += iprot->readI32(this->num_children);
+          this->__isset.num_children = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 6:
+        if (ftype == ::apache::thrift::protocol::T_I32) {
+          int32_t ecast2;
+          xfer += iprot->readI32(ecast2);
+          this->converted_type = (ConvertedType::type)ecast2;
+          this->__isset.converted_type = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 7:
+        if (ftype == ::apache::thrift::protocol::T_I32) {
+          xfer += iprot->readI32(this->scale);
+          this->__isset.scale = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 8:
+        if (ftype == ::apache::thrift::protocol::T_I32) {
+          xfer += iprot->readI32(this->precision);
+          this->__isset.precision = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      default:
+        xfer += iprot->skip(ftype);
+        break;
+    }
+    xfer += iprot->readFieldEnd();
+  }
+
+  xfer += iprot->readStructEnd();
+
+  if (!isset_name)
+    throw TProtocolException(TProtocolException::INVALID_DATA);
+  return xfer;
+}
+
+uint32_t SchemaElement::write(::apache::thrift::protocol::TProtocol* oprot) const {
+  uint32_t xfer = 0;
+  uint32_t fcnt = 0;
+  xfer += oprot->writeStructBegin("SchemaElement");
+
+  if (this->__isset.type) {
+    ++fcnt;
+    xfer += oprot->writeFieldBegin("type", ::apache::thrift::protocol::T_I32, 1);
+    xfer += oprot->writeI32((int32_t)this->type);
+    xfer += oprot->writeFieldEnd();
+  }
+  if (this->__isset.type_length) {
+    ++fcnt;
+    xfer += oprot->writeFieldBegin("type_length", ::apache::thrift::protocol::T_I32, 2);
+    xfer += oprot->writeI32(this->type_length);
+    xfer += oprot->writeFieldEnd();
+  }
+  if (this->__isset.repetition_type) {
+    ++fcnt;
+    xfer += oprot->writeFieldBegin("repetition_type", ::apache::thrift::protocol::T_I32, 3);
+    xfer += oprot->writeI32((int32_t)this->repetition_type);
+    xfer += oprot->writeFieldEnd();
+  }
+  ++fcnt;
+  xfer += oprot->writeFieldBegin("name", ::apache::thrift::protocol::T_STRING, 4);
+  xfer += oprot->writeString(this->name);
+  xfer += oprot->writeFieldEnd();
+
+  if (this->__isset.num_children) {
+    ++fcnt;
+    xfer += oprot->writeFieldBegin("num_children", ::apache::thrift::protocol::T_I32, 5);
+    xfer += oprot->writeI32(this->num_children);
+    xfer += oprot->writeFieldEnd();
+  }
+  if (this->__isset.converted_type) {
+    ++fcnt;
+    xfer += oprot->writeFieldBegin("converted_type", ::apache::thrift::protocol::T_I32, 6);
+    xfer += oprot->writeI32((int32_t)this->converted_type);
+    xfer += oprot->writeFieldEnd();
+  }
+  if (this->__isset.scale) {
+    ++fcnt;
+    xfer += oprot->writeFieldBegin("scale", ::apache::thrift::protocol::T_I32, 7);
+    xfer += oprot->writeI32(this->scale);
+    xfer += oprot->writeFieldEnd();
+  }
+  if (this->__isset.precision) {
+    ++fcnt;
+    xfer += oprot->writeFieldBegin("precision", ::apache::thrift::protocol::T_I32, 8);
+    xfer += oprot->writeI32(this->precision);
+    xfer += oprot->writeFieldEnd();
+  }
+  xfer += oprot->writeFieldStop();
+  xfer += oprot->writeStructEnd();
+  return xfer;
+}
+
+void swap(SchemaElement &a, SchemaElement &b) {
+  using ::std::swap;
+  swap(a.type, b.type);
+  swap(a.type_length, b.type_length);
+  swap(a.repetition_type, b.repetition_type);
+  swap(a.name, b.name);
+  swap(a.num_children, b.num_children);
+  swap(a.converted_type, b.converted_type);
+  swap(a.scale, b.scale);
+  swap(a.precision, b.precision);
+  swap(a.__isset, b.__isset);
+}
+
+const char* DataPageHeader::ascii_fingerprint = "5FC1792B0483E9C984475384165040B1";
+const uint8_t DataPageHeader::binary_fingerprint[16] = {0x5F,0xC1,0x79,0x2B,0x04,0x83,0xE9,0xC9,0x84,0x47,0x53,0x84,0x16,0x50,0x40,0xB1};
+
+uint32_t DataPageHeader::read(::apache::thrift::protocol::TProtocol* iprot) {
+
+  uint32_t xfer = 0;
+  std::string fname;
+  ::apache::thrift::protocol::TType ftype;
+  int16_t fid;
+
+  xfer += iprot->readStructBegin(fname);
+
+  using ::apache::thrift::protocol::TProtocolException;
+
+  bool isset_num_values = false;
+  bool isset_encoding = false;
+  bool isset_definition_level_encoding = false;
+  bool isset_repetition_level_encoding = false;
+
+  while (true)
+  {
+    xfer += iprot->readFieldBegin(fname, ftype, fid);
+    if (ftype == ::apache::thrift::protocol::T_STOP) {
+      break;
+    }
+    switch (fid)
+    {
+      case 1:
+        if (ftype == ::apache::thrift::protocol::T_I32) {
+          xfer += iprot->readI32(this->num_values);
+          isset_num_values = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 2:
+        if (ftype == ::apache::thrift::protocol::T_I32) {
+          int32_t ecast3;
+          xfer += iprot->readI32(ecast3);
+          this->encoding = (Encoding::type)ecast3;
+          isset_encoding = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 3:
+        if (ftype == ::apache::thrift::protocol::T_I32) {
+          int32_t ecast4;
+          xfer += iprot->readI32(ecast4);
+          this->definition_level_encoding = (Encoding::type)ecast4;
+          isset_definition_level_encoding = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 4:
+        if (ftype == ::apache::thrift::protocol::T_I32) {
+          int32_t ecast5;
+          xfer += iprot->readI32(ecast5);
+          this->repetition_level_encoding = (Encoding::type)ecast5;
+          isset_repetition_level_encoding = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 5:
+        if (ftype == ::apache::thrift::protocol::T_STRUCT) {
+          xfer += this->statistics.read(iprot);
+          this->__isset.statistics = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      default:
+        xfer += iprot->skip(ftype);
+        break;
+    }
+    xfer += iprot->readFieldEnd();
+  }
+
+  xfer += iprot->readStructEnd();
+
+  if (!isset_num_values)
+    throw TProtocolException(TProtocolException::INVALID_DATA);
+  if (!isset_encoding)
+    throw TProtocolException(TProtocolException::INVALID_DATA);
+  if (!isset_definition_level_encoding)
+    throw TProtocolException(TProtocolException::INVALID_DATA);
+  if (!isset_repetition_level_encoding)
+    throw TProtocolException(TProtocolException::INVALID_DATA);
+  return xfer;
+}
+
+uint32_t DataPageHeader::write(::apache::thrift::protocol::TProtocol* oprot) const {
+  uint32_t xfer = 0;
+  uint32_t fcnt = 0;
+  xfer += oprot->writeStructBegin("DataPageHeader");
+
+  ++fcnt;
+  xfer += oprot->writeFieldBegin("num_values", ::apache::thrift::protocol::T_I32, 1);
+  xfer += oprot->writeI32(this->num_values);
+  xfer += oprot->writeFieldEnd();
+
+  ++fcnt;
+  xfer += oprot->writeFieldBegin("encoding", ::apache::thrift::protocol::T_I32, 2);
+  xfer += oprot->writeI32((int32_t)this->encoding);
+  xfer += oprot->writeFieldEnd();
+
+  ++fcnt;
+  xfer += oprot->writeFieldBegin("definition_level_encoding", ::apache::thrift::protocol::T_I32, 3);
+  xfer += oprot->writeI32((int32_t)this->definition_level_encoding);
+  xfer += oprot->writeFieldEnd();
+
+  ++fcnt;
+  xfer += oprot->writeFieldBegin("repetition_level_encoding", ::apache::thrift::protocol::T_I32, 4);
+  xfer += oprot->writeI32((int32_t)this->repetition_level_encoding);
+  xfer += oprot->writeFieldEnd();
+
+  if (this->__isset.statistics) {
+    ++fcnt;
+    xfer += oprot->writeFieldBegin("statistics", ::apache::thrift::protocol::T_STRUCT, 5);
+    xfer += this->statistics.write(oprot);
+    xfer += oprot->writeFieldEnd();
+  }
+  xfer += oprot->writeFieldStop();
+  xfer += oprot->writeStructEnd();
+  return xfer;
+}
+
+void swap(DataPageHeader &a, DataPageHeader &b) {
+  using ::std::swap;
+  swap(a.num_values, b.num_values);
+  swap(a.encoding, b.encoding);
+  swap(a.definition_level_encoding, b.definition_level_encoding);
+  swap(a.repetition_level_encoding, b.repetition_level_encoding);
+  swap(a.statistics, b.statistics);
+  swap(a.__isset, b.__isset);
+}
+
+const char* IndexPageHeader::ascii_fingerprint = "99914B932BD37A50B983C5E7C90AE93B";
+const uint8_t IndexPageHeader::binary_fingerprint[16] = {0x99,0x91,0x4B,0x93,0x2B,0xD3,0x7A,0x50,0xB9,0x83,0xC5,0xE7,0xC9,0x0A,0xE9,0x3B};
+
+uint32_t IndexPageHeader::read(::apache::thrift::protocol::TProtocol* iprot) {
+
+  uint32_t xfer = 0;
+  std::string fname;
+  ::apache::thrift::protocol::TType ftype;
+  int16_t fid;
+
+  xfer += iprot->readStructBegin(fname);
+
+  using ::apache::thrift::protocol::TProtocolException;
+
+
+  while (true)
+  {
+    xfer += iprot->readFieldBegin(fname, ftype, fid);
+    if (ftype == ::apache::thrift::protocol::T_STOP) {
+      break;
+    }
+    xfer += iprot->skip(ftype);
+    xfer += iprot->readFieldEnd();
+  }
+
+  xfer += iprot->readStructEnd();
+
+  return xfer;
+}
+
+uint32_t IndexPageHeader::write(::apache::thrift::protocol::TProtocol* oprot) const {
+  uint32_t xfer = 0;
+  uint32_t fcnt = 0;
+  xfer += oprot->writeStructBegin("IndexPageHeader");
+
+  xfer += oprot->writeFieldStop();
+  xfer += oprot->writeStructEnd();
+  return xfer;
+}
+
+void swap(IndexPageHeader &a, IndexPageHeader &b) {
+  using ::std::swap;
+  (void) a;
+  (void) b;
+}
+
+const char* DictionaryPageHeader::ascii_fingerprint = "B149E4528254D495610C22AE4BD539C5";
+const uint8_t DictionaryPageHeader::binary_fingerprint[16] = {0xB1,0x49,0xE4,0x52,0x82,0x54,0xD4,0x95,0x61,0x0C,0x22,0xAE,0x4B,0xD5,0x39,0xC5};
+
+uint32_t DictionaryPageHeader::read(::apache::thrift::protocol::TProtocol* iprot) {
+
+  uint32_t xfer = 0;
+  std::string fname;
+  ::apache::thrift::protocol::TType ftype;
+  int16_t fid;
+
+  xfer += iprot->readStructBegin(fname);
+
+  using ::apache::thrift::protocol::TProtocolException;
+
+  bool isset_num_values = false;
+  bool isset_encoding = false;
+
+  while (true)
+  {
+    xfer += iprot->readFieldBegin(fname, ftype, fid);
+    if (ftype == ::apache::thrift::protocol::T_STOP) {
+      break;
+    }
+    switch (fid)
+    {
+      case 1:
+        if (ftype == ::apache::thrift::protocol::T_I32) {
+          xfer += iprot->readI32(this->num_values);
+          isset_num_values = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 2:
+        if (ftype == ::apache::thrift::protocol::T_I32) {
+          int32_t ecast6;
+          xfer += iprot->readI32(ecast6);
+          this->encoding = (Encoding::type)ecast6;
+          isset_encoding = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 3:
+        if (ftype == ::apache::thrift::protocol::T_BOOL) {
+          xfer += iprot->readBool(this->is_sorted);
+          this->__isset.is_sorted = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      default:
+        xfer += iprot->skip(ftype);
+        break;
+    }
+    xfer += iprot->readFieldEnd();
+  }
+
+  xfer += iprot->readStructEnd();
+
+  if (!isset_num_values)
+    throw TProtocolException(TProtocolException::INVALID_DATA);
+  if (!isset_encoding)
+    throw TProtocolException(TProtocolException::INVALID_DATA);
+  return xfer;
+}
+
+uint32_t DictionaryPageHeader::write(::apache::thrift::protocol::TProtocol* oprot) const {
+  uint32_t xfer = 0;
+  uint32_t fcnt = 0;
+  xfer += oprot->writeStructBegin("DictionaryPageHeader");
+
+  ++fcnt;
+  xfer += oprot->writeFieldBegin("num_values", ::apache::thrift::protocol::T_I32, 1);
+  xfer += oprot->writeI32(this->num_values);
+  xfer += oprot->writeFieldEnd();
+
+  ++fcnt;
+  xfer += oprot->writeFieldBegin("encoding", ::apache::thrift::protocol::T_I32, 2);
+  xfer += oprot->writeI32((int32_t)this->encoding);
+  xfer += oprot->writeFieldEnd();
+
+  if (this->__isset.is_sorted) {
+    ++fcnt;
+    xfer += oprot->writeFieldBegin("is_sorted", ::apache::thrift::protocol::T_BOOL, 3);
+    xfer += oprot->writeBool(this->is_sorted);
+    xfer += oprot->writeFieldEnd();
+  }
+  xfer += oprot->writeFieldStop();
+  xfer += oprot->writeStructEnd();
+  return xfer;
+}
+
+void swap(DictionaryPageHeader &a, DictionaryPageHeader &b) {
+  using ::std::swap;
+  swap(a.num_values, b.num_values);
+  swap(a.encoding, b.encoding);
+  swap(a.is_sorted, b.is_sorted);
+  swap(a.__isset, b.__isset);
+}
+
+const char* DataPageHeaderV2::ascii_fingerprint = "69FF2F6BD1A443440D5E46ABA5A3A919";
+const uint8_t DataPageHeaderV2::binary_fingerprint[16] = {0x69,0xFF,0x2F,0x6B,0xD1,0xA4,0x43,0x44,0x0D,0x5E,0x46,0xAB,0xA5,0xA3,0xA9,0x19};
+
+uint32_t DataPageHeaderV2::read(::apache::thrift::protocol::TProtocol* iprot) {
+
+  uint32_t xfer = 0;
+  std::string fname;
+  ::apache::thrift::protocol::TType ftype;
+  int16_t fid;
+
+  xfer += iprot->readStructBegin(fname);
+
+  using ::apache::thrift::protocol::TProtocolException;
+
+  bool isset_num_values = false;
+  bool isset_num_nulls = false;
+  bool isset_num_rows = false;
+  bool isset_encoding = false;
+  bool isset_definition_levels_byte_length = false;
+  bool isset_repetition_levels_byte_length = false;
+
+  while (true)
+  {
+    xfer += iprot->readFieldBegin(fname, ftype, fid);
+    if (ftype == ::apache::thrift::protocol::T_STOP) {
+      break;
+    }
+    switch (fid)
+    {
+      case 1:
+        if (ftype == ::apache::thrift::protocol::T_I32) {
+          xfer += iprot->readI32(this->num_values);
+          isset_num_values = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 2:
+        if (ftype == ::apache::thrift::protocol::T_I32) {
+          xfer += iprot->readI32(this->num_nulls);
+          isset_num_nulls = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 3:
+        if (ftype == ::apache::thrift::protocol::T_I32) {
+          xfer += iprot->readI32(this->num_rows);
+          isset_num_rows = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 4:
+        if (ftype == ::apache::thrift::protocol::T_I32) {
+          int32_t ecast7;
+          xfer += iprot->readI32(ecast7);
+          this->encoding = (Encoding::type)ecast7;
+          isset_encoding = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 5:
+        if (ftype == ::apache::thrift::protocol::T_I32) {
+          xfer += iprot->readI32(this->definition_levels_byte_length);
+          isset_definition_levels_byte_length = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 6:
+        if (ftype == ::apache::thrift::protocol::T_I32) {
+          xfer += iprot->readI32(this->repetition_levels_byte_length);
+          isset_repetition_levels_byte_length = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 7:
+        if (ftype == ::apache::thrift::protocol::T_BOOL) {
+          xfer += iprot->readBool(this->is_compressed);
+          this->__isset.is_compressed = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 8:
+        if (ftype == ::apache::thrift::protocol::T_STRUCT) {
+          xfer += this->statistics.read(iprot);
+          this->__isset.statistics = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      default:
+        xfer += iprot->skip(ftype);
+        break;
+    }
+    xfer += iprot->readFieldEnd();
+  }
+
+  xfer += iprot->readStructEnd();
+
+  if (!isset_num_values)
+    throw TProtocolException(TProtocolException::INVALID_DATA);
+  if (!isset_num_nulls)
+    throw TProtocolException(TProtocolException::INVALID_DATA);
+  if (!isset_num_rows)
+    throw TProtocolException(TProtocolException::INVALID_DATA);
+  if (!isset_encoding)
+    throw TProtocolException(TProtocolException::INVALID_DATA);
+  if (!isset_definition_levels_byte_length)
+    throw TProtocolException(TProtocolException::INVALID_DATA);
+  if (!isset_repetition_levels_byte_length)
+    throw TProtocolException(TProtocolException::INVALID_DATA);
+  return xfer;
+}
+
+uint32_t DataPageHeaderV2::write(::apache::thrift::protocol::TProtocol* oprot) const {
+  uint32_t xfer = 0;
+  uint32_t fcnt = 0;
+  xfer += oprot->writeStructBegin("DataPageHeaderV2");
+
+  ++fcnt;
+  xfer += oprot->writeFieldBegin("num_values", ::apache::thrift::protocol::T_I32, 1);
+  xfer += oprot->writeI32(this->num_values);
+  xfer += oprot->writeFieldEnd();
+
+  ++fcnt;
+  xfer += oprot->writeFieldBegin("num_nulls", ::apache::thrift::protocol::T_I32, 2);
+  xfer += oprot->writeI32(this->num_nulls);
+  xfer += oprot->writeFieldEnd();
+
+  ++fcnt;
+  xfer += oprot->writeFieldBegin("num_rows", ::apache::thrift::protocol::T_I32, 3);
+  xfer += oprot->writeI32(this->num_rows);
+  xfer += oprot->writeFieldEnd();
+
+  ++fcnt;
+  xfer += oprot->writeFieldBegin("encoding", ::apache::thrift::protocol::T_I32, 4);
+  xfer += oprot->writeI32((int32_t)this->encoding);
+  xfer += oprot->writeFieldEnd();
+
+  ++fcnt;
+  xfer += oprot->writeFieldBegin("definition_levels_byte_length", ::apache::thrift::protocol::T_I32, 5);
+  xfer += oprot->writeI32(this->definition_levels_byte_length);
+  xfer += oprot->writeFieldEnd();
+
+  ++fcnt;
+  xfer += oprot->writeFieldBegin("repetition_levels_byte_length", ::apache::thrift::protocol::T_I32, 6);
+  xfer += oprot->writeI32(this->repetition_levels_byte_length);
+  xfer += oprot->writeFieldEnd();
+
+  if (this->__isset.is_compressed) {
+    ++fcnt;
+    xfer += oprot->writeFieldBegin("is_compressed", ::apache::thrift::protocol::T_BOOL, 7);
+    xfer += oprot->writeBool(this->is_compressed);
+    xfer += oprot->writeFieldEnd();
+  }
+  if (this->__isset.statistics) {
+    ++fcnt;
+    xfer += oprot->writeFieldBegin("statistics", ::apache::thrift::protocol::T_STRUCT, 8);
+    xfer += this->statistics.write(oprot);
+    xfer += oprot->writeFieldEnd();
+  }
+  xfer += oprot->writeFieldStop();
+  xfer += oprot->writeStructEnd();
+  return xfer;
+}
+
+void swap(DataPageHeaderV2 &a, DataPageHeaderV2 &b) {
+  using ::std::swap;
+  swap(a.num_values, b.num_values);
+  swap(a.num_nulls, b.num_nulls);
+  swap(a.num_rows, b.num_rows);
+  swap(a.encoding, b.encoding);
+  swap(a.definition_levels_byte_length, b.definition_levels_byte_length);
+  swap(a.repetition_levels_byte_length, b.repetition_levels_byte_length);
+  swap(a.is_compressed, b.is_compressed);
+  swap(a.statistics, b.statistics);
+  swap(a.__isset, b.__isset);
+}
+
+const char* PageHeader::ascii_fingerprint = "B5BD2BDF3756C883A58B30B9C9F204A0";
+const uint8_t PageHeader::binary_fingerprint[16] = {0xB5,0xBD,0x2B,0xDF,0x37,0x56,0xC8,0x83,0xA5,0x8B,0x30,0xB9,0xC9,0xF2,0x04,0xA0};
+
+uint32_t PageHeader::read(::apache::thrift::protocol::TProtocol* iprot) {
+
+  uint32_t xfer = 0;
+  std::string fname;
+  ::apache::thrift::protocol::TType ftype;
+  int16_t fid;
+
+  xfer += iprot->readStructBegin(fname);
+
+  using ::apache::thrift::protocol::TProtocolException;
+
+  bool isset_type = false;
+  bool isset_uncompressed_page_size = false;
+  bool isset_compressed_page_size = false;
+
+  while (true)
+  {
+    xfer += iprot->readFieldBegin(fname, ftype, fid);
+    if (ftype == ::apache::thrift::protocol::T_STOP) {
+      break;
+    }
+    switch (fid)
+    {
+      case 1:
+        if (ftype == ::apache::thrift::protocol::T_I32) {
+          int32_t ecast8;
+          xfer += iprot->readI32(ecast8);
+          this->type = (PageType::type)ecast8;
+          isset_type = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 2:
+        if (ftype == ::apache::thrift::protocol::T_I32) {
+          xfer += iprot->readI32(this->uncompressed_page_size);
+          isset_uncompressed_page_size = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 3:
+        if (ftype == ::apache::thrift::protocol::T_I32) {
+          xfer += iprot->readI32(this->compressed_page_size);
+          isset_compressed_page_size = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 4:
+        if (ftype == ::apache::thrift::protocol::T_I32) {
+          xfer += iprot->readI32(this->crc);
+          this->__isset.crc = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 5:
+        if (ftype == ::apache::thrift::protocol::T_STRUCT) {
+          xfer += this->data_page_header.read(iprot);
+          this->__isset.data_page_header = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 6:
+        if (ftype == ::apache::thrift::protocol::T_STRUCT) {
+          xfer += this->index_page_header.read(iprot);
+          this->__isset.index_page_header = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 7:
+        if (ftype == ::apache::thrift::protocol::T_STRUCT) {
+          xfer += this->dictionary_page_header.read(iprot);
+          this->__isset.dictionary_page_header = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 8:
+        if (ftype == ::apache::thrift::protocol::T_STRUCT) {
+          xfer += this->data_page_header_v2.read(iprot);
+          this->__isset.data_page_header_v2 = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      default:
+        xfer += iprot->skip(ftype);
+        break;
+    }
+    xfer += iprot->readFieldEnd();
+  }
+
+  xfer += iprot->readStructEnd();
+
+  if (!isset_type)
+    throw TProtocolException(TProtocolException::INVALID_DATA);
+  if (!isset_uncompressed_page_size)
+    throw TProtocolException(TProtocolException::INVALID_DATA);
+  if (!isset_compressed_page_size)
+    throw TProtocolException(TProtocolException::INVALID_DATA);
+  return xfer;
+}
+
+uint32_t PageHeader::write(::apache::thrift::protocol::TProtocol* oprot) const {
+  uint32_t xfer = 0;
+  uint32_t fcnt = 0;
+  xfer += oprot->writeStructBegin("PageHeader");
+
+  ++fcnt;
+  xfer += oprot->writeFieldBegin("type", ::apache::thrift::protocol::T_I32, 1);
+  xfer += oprot->writeI32((int32_t)this->type);
+  xfer += oprot->writeFieldEnd();
+
+  ++fcnt;
+  xfer += oprot->writeFieldBegin("uncompressed_page_size", ::apache::thrift::protocol::T_I32, 2);
+  xfer += oprot->writeI32(this->uncompressed_page_size);
+  xfer += oprot->writeFieldEnd();
+
+  ++fcnt;
+  xfer += oprot->writeFieldBegin("compressed_page_size", ::apache::thrift::protocol::T_I32, 3);
+  xfer += oprot->writeI32(this->compressed_page_size);
+  xfer += oprot->writeFieldEnd();
+
+  if (this->__isset.crc) {
+    ++fcnt;
+    xfer += oprot->writeFieldBegin("crc", ::apache::thrift::protocol::T_I32, 4);
+    xfer += oprot->writeI32(this->crc);
+    xfer += oprot->writeFieldEnd();
+  }
+  if (this->__isset.data_page_header) {
+    ++fcnt;
+    xfer += oprot->writeFieldBegin("data_page_header", ::apache::thrift::protocol::T_STRUCT, 5);
+    xfer += this->data_page_header.write(oprot);
+    xfer += oprot->writeFieldEnd();
+  }
+  if (this->__isset.index_page_header) {
+    ++fcnt;
+    xfer += oprot->writeFieldBegin("index_page_header", ::apache::thrift::protocol::T_STRUCT, 6);
+    xfer += this->index_page_header.write(oprot);
+    xfer += oprot->writeFieldEnd();
+  }
+  if (this->__isset.dictionary_page_header) {
+    ++fcnt;
+    xfer += oprot->writeFieldBegin("dictionary_page_header", ::apache::thrift::protocol::T_STRUCT, 7);
+    xfer += this->dictionary_page_header.write(oprot);
+    xfer += oprot->writeFieldEnd();
+  }
+  if (this->__isset.data_page_header_v2) {
+    ++fcnt;
+    xfer += oprot->writeFieldBegin("data_page_header_v2", ::apache::thrift::protocol::T_STRUCT, 8);
+    xfer += this->data_page_header_v2.write(oprot);
+    xfer += oprot->writeFieldEnd();
+  }
+  xfer += oprot->writeFieldStop();
+  xfer += oprot->writeStructEnd();
+  return xfer;
+}
+
+void swap(PageHeader &a, PageHeader &b) {
+  using ::std::swap;
+  swap(a.type, b.type);
+  swap(a.uncompressed_page_size, b.uncompressed_page_size);
+  swap(a.compressed_page_size, b.compressed_page_size);
+  swap(a.crc, b.crc);
+  swap(a.data_page_header, b.data_page_header);
+  swap(a.index_page_header, b.index_page_header);
+  swap(a.dictionary_page_header, b.dictionary_page_header);
+  swap(a.data_page_header_v2, b.data_page_header_v2);
+  swap(a.__isset, b.__isset);
+}
+
+const char* KeyValue::ascii_fingerprint = "5B708A954C550ECA9C1A49D3C5CAFAB9";
+const uint8_t KeyValue::binary_fingerprint[16] = {0x5B,0x70,0x8A,0x95,0x4C,0x55,0x0E,0xCA,0x9C,0x1A,0x49,0xD3,0xC5,0xCA,0xFA,0xB9};
+
+uint32_t KeyValue::read(::apache::thrift::protocol::TProtocol* iprot) {
+
+  uint32_t xfer = 0;
+  std::string fname;
+  ::apache::thrift::protocol::TType ftype;
+  int16_t fid;
+
+  xfer += iprot->readStructBegin(fname);
+
+  using ::apache::thrift::protocol::TProtocolException;
+
+  bool isset_key = false;
+
+  while (true)
+  {
+    xfer += iprot->readFieldBegin(fname, ftype, fid);
+    if (ftype == ::apache::thrift::protocol::T_STOP) {
+      break;
+    }
+    switch (fid)
+    {
+      case 1:
+        if (ftype == ::apache::thrift::protocol::T_STRING) {
+          xfer += iprot->readString(this->key);
+          isset_key = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 2:
+        if (ftype == ::apache::thrift::protocol::T_STRING) {
+          xfer += iprot->readString(this->value);
+          this->__isset.value = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      default:
+        xfer += iprot->skip(ftype);
+        break;
+    }
+    xfer += iprot->readFieldEnd();
+  }
+
+  xfer += iprot->readStructEnd();
+
+  if (!isset_key)
+    throw TProtocolException(TProtocolException::INVALID_DATA);
+  return xfer;
+}
+
+uint32_t KeyValue::write(::apache::thrift::protocol::TProtocol* oprot) const {
+  uint32_t xfer = 0;
+  uint32_t fcnt = 0;
+  xfer += oprot->writeStructBegin("KeyValue");
+
+  ++fcnt;
+  xfer += oprot->writeFieldBegin("key", ::apache::thrift::protocol::T_STRING, 1);
+  xfer += oprot->writeString(this->key);
+  xfer += oprot->writeFieldEnd();
+
+  if (this->__isset.value) {
+    ++fcnt;
+    xfer += oprot->writeFieldBegin("value", ::apache::thrift::protocol::T_STRING, 2);
+    xfer += oprot->writeString(this->value);
+    xfer += oprot->writeFieldEnd();
+  }
+  xfer += oprot->writeFieldStop();
+  xfer += oprot->writeStructEnd();
+  return xfer;
+}
+
+void swap(KeyValue &a, KeyValue &b) {
+  using ::std::swap;
+  swap(a.key, b.key);
+  swap(a.value, b.value);
+  swap(a.__isset, b.__isset);
+}
+
+const char* SortingColumn::ascii_fingerprint = "F079C2D58A783AD90F9BE05D10DBBC6F";
+const uint8_t SortingColumn::binary_fingerprint[16] = {0xF0,0x79,0xC2,0xD5,0x8A,0x78,0x3A,0xD9,0x0F,0x9B,0xE0,0x5D,0x10,0xDB,0xBC,0x6F};
+
+uint32_t SortingColumn::read(::apache::thrift::protocol::TProtocol* iprot) {
+
+  uint32_t xfer = 0;
+  std::string fname;
+  ::apache::thrift::protocol::TType ftype;
+  int16_t fid;
+
+  xfer += iprot->readStructBegin(fname);
+
+  using ::apache::thrift::protocol::TProtocolException;
+
+  bool isset_column_idx = false;
+  bool isset_descending = false;
+  bool isset_nulls_first = false;
+
+  while (true)
+  {
+    xfer += iprot->readFieldBegin(fname, ftype, fid);
+    if (ftype == ::apache::thrift::protocol::T_STOP) {
+      break;
+    }
+    switch (fid)
+    {
+      case 1:
+        if (ftype == ::apache::thrift::protocol::T_I32) {
+          xfer += iprot->readI32(this->column_idx);
+          isset_column_idx = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 2:
+        if (ftype == ::apache::thrift::protocol::T_BOOL) {
+          xfer += iprot->readBool(this->descending);
+          isset_descending = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 3:
+        if (ftype == ::apache::thrift::protocol::T_BOOL) {
+          xfer += iprot->readBool(this->nulls_first);
+          isset_nulls_first = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      default:
+        xfer += iprot->skip(ftype);
+        break;
+    }
+    xfer += iprot->readFieldEnd();
+  }
+
+  xfer += iprot->readStructEnd();
+
+  if (!isset_column_idx)
+    throw TProtocolException(TProtocolException::INVALID_DATA);
+  if (!isset_descending)
+    throw TProtocolException(TProtocolException::INVALID_DATA);
+  if (!isset_nulls_first)
+    throw TProtocolException(TProtocolException::INVALID_DATA);
+  return xfer;
+}
+
+uint32_t SortingColumn::write(::apache::thrift::protocol::TProtocol* oprot) const {
+  uint32_t xfer = 0;
+  uint32_t fcnt = 0;
+  xfer += oprot->writeStructBegin("SortingColumn");
+
+  ++fcnt;
+  xfer += oprot->writeFieldBegin("column_idx", ::apache::thrift::protocol::T_I32, 1);
+  xfer += oprot->writeI32(this->column_idx);
+  xfer += oprot->writeFieldEnd();
+
+  ++fcnt;
+  xfer += oprot->writeFieldBegin("descending", ::apache::thrift::protocol::T_BOOL, 2);
+  xfer += oprot->writeBool(this->descending);
+  xfer += oprot->writeFieldEnd();
+
+  ++fcnt;
+  xfer += oprot->writeFieldBegin("nulls_first", ::apache::thrift::protocol::T_BOOL, 3);
+  xfer += oprot->writeBool(this->nulls_first);
+  xfer += oprot->writeFieldEnd();
+
+  xfer += oprot->writeFieldStop();
+  xfer += oprot->writeStructEnd();
+  return xfer;
+}
+
+void swap(SortingColumn &a, SortingColumn &b) {
+  using ::std::swap;
+  swap(a.column_idx, b.column_idx);
+  swap(a.descending, b.descending);
+  swap(a.nulls_first, b.nulls_first);
+}
+
+const char* ColumnMetaData::ascii_fingerprint = "1AF797732BCB4465C6314FB29B86638D";
+const uint8_t ColumnMetaData::binary_fingerprint[16] = {0x1A,0xF7,0x97,0x73,0x2B,0xCB,0x44,0x65,0xC6,0x31,0x4F,0xB2,0x9B,0x86,0x63,0x8D};
+
+uint32_t ColumnMetaData::read(::apache::thrift::protocol::TProtocol* iprot) {
+
+  uint32_t xfer = 0;
+  std::string fname;
+  ::apache::thrift::protocol::TType ftype;
+  int16_t fid;
+
+  xfer += iprot->readStructBegin(fname);
+
+  using ::apache::thrift::protocol::TProtocolException;
+
+  bool isset_type = false;
+  bool isset_encodings = false;
+  bool isset_path_in_schema = false;
+  bool isset_codec = false;
+  bool isset_num_values = false;
+  bool isset_total_uncompressed_size = false;
+  bool isset_total_compressed_size = false;
+  bool isset_data_page_offset = false;
+
+  while (true)
+  {
+    xfer += iprot->readFieldBegin(fname, ftype, fid);
+    if (ftype == ::apache::thrift::protocol::T_STOP) {
+      break;
+    }
+    switch (fid)
+    {
+      case 1:
+        if (ftype == ::apache::thrift::protocol::T_I32) {
+          int32_t ecast9;
+          xfer += iprot->readI32(ecast9);
+          this->type = (Type::type)ecast9;
+          isset_type = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 2:
+        if (ftype == ::apache::thrift::protocol::T_LIST) {
+          {
+            this->encodings.clear();
+            uint32_t _size10;
+            ::apache::thrift::protocol::TType _etype13;
+            xfer += iprot->readListBegin(_etype13, _size10);
+            this->encodings.resize(_size10);
+            uint32_t _i14;
+            for (_i14 = 0; _i14 < _size10; ++_i14)
+            {
+              int32_t ecast15;
+              xfer += iprot->readI32(ecast15);
+              this->encodings[_i14] = (Encoding::type)ecast15;
+            }
+            xfer += iprot->readListEnd();
+          }
+          isset_encodings = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 3:
+        if (ftype == ::apache::thrift::protocol::T_LIST) {
+          {
+            this->path_in_schema.clear();
+            uint32_t _size16;
+            ::apache::thrift::protocol::TType _etype19;
+            xfer += iprot->readListBegin(_etype19, _size16);
+            this->path_in_schema.resize(_size16);
+            uint32_t _i20;
+            for (_i20 = 0; _i20 < _size16; ++_i20)
+            {
+              xfer += iprot->readString(this->path_in_schema[_i20]);
+            }
+            xfer += iprot->readListEnd();
+          }
+          isset_path_in_schema = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 4:
+        if (ftype == ::apache::thrift::protocol::T_I32) {
+          int32_t ecast21;
+          xfer += iprot->readI32(ecast21);
+          this->codec = (CompressionCodec::type)ecast21;
+          isset_codec = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 5:
+        if (ftype == ::apache::thrift::protocol::T_I64) {
+          xfer += iprot->readI64(this->num_values);
+          isset_num_values = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 6:
+        if (ftype == ::apache::thrift::protocol::T_I64) {
+          xfer += iprot->readI64(this->total_uncompressed_size);
+          isset_total_uncompressed_size = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 7:
+        if (ftype == ::apache::thrift::protocol::T_I64) {
+          xfer += iprot->readI64(this->total_compressed_size);
+          isset_total_compressed_size = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 8:
+        if (ftype == ::apache::thrift::protocol::T_LIST) {
+          {
+            this->key_value_metadata.clear();
+            uint32_t _size22;
+            ::apache::thrift::protocol::TType _etype25;
+            xfer += iprot->readListBegin(_etype25, _size22);
+            this->key_value_metadata.resize(_size22);
+            uint32_t _i26;
+            for (_i26 = 0; _i26 < _size22; ++_i26)
+            {
+              xfer += this->key_value_metadata[_i26].read(iprot);
+            }
+            xfer += iprot->readListEnd();
+          }
+          this->__isset.key_value_metadata = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 9:
+        if (ftype == ::apache::thrift::protocol::T_I64) {
+          xfer += iprot->readI64(this->data_page_offset);
+          isset_data_page_offset = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 10:
+        if (ftype == ::apache::thrift::protocol::T_I64) {
+          xfer += iprot->readI64(this->index_page_offset);
+          this->__isset.index_page_offset = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 11:
+        if (ftype == ::apache::thrift::protocol::T_I64) {
+          xfer += iprot->readI64(this->dictionary_page_offset);
+          this->__isset.dictionary_page_offset = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 12:
+        if (ftype == ::apache::thrift::protocol::T_STRUCT) {
+          xfer += this->statistics.read(iprot);
+          this->__isset.statistics = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      default:
+        xfer += iprot->skip(ftype);
+        break;
+    }
+    xfer += iprot->readFieldEnd();
+  }
+
+  xfer += iprot->readStructEnd();
+
+  if (!isset_type)
+    throw TProtocolException(TProtocolException::INVALID_DATA);
+  if (!isset_encodings)
+    throw TProtocolException(TProtocolException::INVALID_DATA);
+  if (!isset_path_in_schema)
+    throw TProtocolException(TProtocolException::INVALID_DATA);
+  if (!isset_codec)
+    throw TProtocolException(TProtocolException::INVALID_DATA);
+  if (!isset_num_values)
+    throw TProtocolException(TProtocolException::INVALID_DATA);
+  if (!isset_total_uncompressed_size)
+    throw TProtocolException(TProtocolException::INVALID_DATA);
+  if (!isset_total_compressed_size)
+    throw TProtocolException(TProtocolException::INVALID_DATA);
+  if (!isset_data_page_offset)
+    throw TProtocolException(TProtocolException::INVALID_DATA);
+  return xfer;
+}
+
+uint32_t ColumnMetaData::write(::apache::thrift::protocol::TProtocol* oprot) const {
+  uint32_t xfer = 0;
+  uint32_t fcnt = 0;
+  xfer += oprot->writeStructBegin("ColumnMetaData");
+
+  ++fcnt;
+  xfer += oprot->writeFieldBegin("type", ::apache::thrift::protocol::T_I32, 1);
+  xfer += oprot->writeI32((int32_t)this->type);
+  xfer += oprot->writeFieldEnd();
+
+  ++fcnt;
+  xfer += oprot->writeFieldBegin("encodings", ::apache::thrift::protocol::T_LIST, 2);
+  {
+    xfer += oprot->writeListBegin(::apache::thrift::protocol::T_I32, static_cast<uint32_t>(this->encodings.size()));
+    std::vector<Encoding::type> ::const_iterator _iter27;
+    for (_iter27 = this->encodings.begin(); _iter27 != this->encodings.end(); ++_iter27)
+    {
+      xfer += oprot->writeI32((int32_t)(*_iter27));
+    }
+    xfer += oprot->writeListEnd();
+  }
+  xfer += oprot->writeFieldEnd();
+
+  ++fcnt;
+  xfer += oprot->writeFieldBegin("path_in_schema", ::apache::thrift::protocol::T_LIST, 3);
+  {
+    xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRING, static_cast<uint32_t>(this->path_in_schema.size()));
+    std::vector<std::string> ::const_iterator _iter28;
+    for (_iter28 = this->path_in_schema.begin(); _iter28 != this->path_in_schema.end(); ++_iter28)
+    {
+      xfer += oprot->writeString((*_iter28));
+    }
+    xfer += oprot->writeListEnd();
+  }
+  xfer += oprot->writeFieldEnd();
+
+  ++fcnt;
+  xfer += oprot->writeFieldBegin("codec", ::apache::thrift::protocol::T_I32, 4);
+  xfer += oprot->writeI32((int32_t)this->codec);
+  xfer += oprot->writeFieldEnd();
+
+  ++fcnt;
+  xfer += oprot->writeFieldBegin("num_values", ::apache::thrift::protocol::T_I64, 5);
+  xfer += oprot->writeI64(this->num_values);
+  xfer += oprot->writeFieldEnd();
+
+  ++fcnt;
+  xfer += oprot->writeFieldBegin("total_uncompressed_size", ::apache::thrift::protocol::T_I64, 6);
+  xfer += oprot->writeI64(this->total_uncompressed_size);
+  xfer += oprot->writeFieldEnd();
+
+  ++fcnt;
+  xfer += oprot->writeFieldBegin("total_compressed_size", ::apache::thrift::protocol::T_I64, 7);
+  xfer += oprot->writeI64(this->total_compressed_size);
+  xfer += oprot->writeFieldEnd();
+
+  if (this->__isset.key_value_metadata) {
+    ++fcnt;
+    xfer += oprot->writeFieldBegin("key_value_metadata", ::apache::thrift::protocol::T_LIST, 8);
+    {
+      xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast<uint32_t>(this->key_value_metadata.size()));
+      std::vector<KeyValue> ::const_iterator _iter29;
+      for (_iter29 = this->key_value_metadata.begin(); _iter29 != this->key_value_metadata.end(); ++_iter29)
+      {
+        xfer += (*_iter29).write(oprot);
+      }
+      xfer += oprot->writeListEnd();
+    }
+    xfer += oprot->writeFieldEnd();
+  }
+  ++fcnt;
+  xfer += oprot->writeFieldBegin("data_page_offset", ::apache::thrift::protocol::T_I64, 9);
+  xfer += oprot->writeI64(this->data_page_offset);
+  xfer += oprot->writeFieldEnd();
+
+  if (this->__isset.index_page_offset) {
+    ++fcnt;
+    xfer += oprot->writeFieldBegin("index_page_offset", ::apache::thrift::protocol::T_I64, 10);
+    xfer += oprot->writeI64(this->index_page_offset);
+    xfer += oprot->writeFieldEnd();
+  }
+  if (this->__isset.dictionary_page_offset) {
+    ++fcnt;
+    xfer += oprot->writeFieldBegin("dictionary_page_offset", ::apache::thrift::protocol::T_I64, 11);
+    xfer += oprot->writeI64(this->dictionary_page_offset);
+    xfer += oprot->writeFieldEnd();
+  }
+  if (this->__isset.statistics) {
+    ++fcnt;
+    xfer += oprot->writeFieldBegin("statistics", ::apache::thrift::protocol::T_STRUCT, 12);
+    xfer += this->statistics.write(oprot);
+    xfer += oprot->writeFieldEnd();
+  }
+  xfer += oprot->writeFieldStop();
+  xfer += oprot->writeStructEnd();
+  return xfer;
+}
+
+void swap(ColumnMetaData &a, ColumnMetaData &b) {
+  using ::std::swap;
+  swap(a.type, b.type);
+  swap(a.encodings, b.encodings);
+  swap(a.path_in_schema, b.path_in_schema);
+  swap(a.codec, b.codec);
+  swap(a.num_values, b.num_values);
+  swap(a.total_uncompressed_size, b.total_uncompressed_size);
+  swap(a.total_compressed_size, b.total_compressed_size);
+  swap(a.key_value_metadata, b.key_value_metadata);
+  swap(a.data_page_offset, b.data_page_offset);
+  swap(a.index_page_offset, b.index_page_offset);
+  swap(a.dictionary_page_offset, b.dictionary_page_offset);
+  swap(a.statistics, b.statistics);
+  swap(a.__isset, b.__isset);
+}
+
+const char* ColumnChunk::ascii_fingerprint = "169FC47057EF3D82E2FACDDEC2641AE8";
+const uint8_t ColumnChunk::binary_fingerprint[16] = {0x16,0x9F,0xC4,0x70,0x57,0xEF,0x3D,0x82,0xE2,0xFA,0xCD,0xDE,0xC2,0x64,0x1A,0xE8};
+
+uint32_t ColumnChunk::read(::apache::thrift::protocol::TProtocol* iprot) {
+
+  uint32_t xfer = 0;
+  std::string fname;
+  ::apache::thrift::protocol::TType ftype;
+  int16_t fid;
+
+  xfer += iprot->readStructBegin(fname);
+
+  using ::apache::thrift::protocol::TProtocolException;
+
+  bool isset_file_offset = false;
+
+  while (true)
+  {
+    xfer += iprot->readFieldBegin(fname, ftype, fid);
+    if (ftype == ::apache::thrift::protocol::T_STOP) {
+      break;
+    }
+    switch (fid)
+    {
+      case 1:
+        if (ftype == ::apache::thrift::protocol::T_STRING) {
+          xfer += iprot->readString(this->file_path);
+          this->__isset.file_path = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 2:
+        if (ftype == ::apache::thrift::protocol::T_I64) {
+          xfer += iprot->readI64(this->file_offset);
+          isset_file_offset = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 3:
+        if (ftype == ::apache::thrift::protocol::T_STRUCT) {
+          xfer += this->meta_data.read(iprot);
+          this->__isset.meta_data = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      default:
+        xfer += iprot->skip(ftype);
+        break;
+    }
+    xfer += iprot->readFieldEnd();
+  }
+
+  xfer += iprot->readStructEnd();
+
+  if (!isset_file_offset)
+    throw TProtocolException(TProtocolException::INVALID_DATA);
+  return xfer;
+}
+
+uint32_t ColumnChunk::write(::apache::thrift::protocol::TProtocol* oprot) const {
+  uint32_t xfer = 0;
+  uint32_t fcnt = 0;
+  xfer += oprot->writeStructBegin("ColumnChunk");
+
+  if (this->__isset.file_path) {
+    ++fcnt;
+    xfer += oprot->writeFieldBegin("file_path", ::apache::thrift::protocol::T_STRING, 1);
+    xfer += oprot->writeString(this->file_path);
+    xfer += oprot->writeFieldEnd();
+  }
+  ++fcnt;
+  xfer += oprot->writeFieldBegin("file_offset", ::apache::thrift::protocol::T_I64, 2);
+  xfer += oprot->writeI64(this->file_offset);
+  xfer += oprot->writeFieldEnd();
+
+  if (this->__isset.meta_data) {
+    ++fcnt;
+    xfer += oprot->writeFieldBegin("meta_data", ::apache::thrift::protocol::T_STRUCT, 3);
+    xfer += this->meta_data.write(oprot);
+    xfer += oprot->writeFieldEnd();
+  }
+  xfer += oprot->writeFieldStop();
+  xfer += oprot->writeStructEnd();
+  return xfer;
+}
+
+void swap(ColumnChunk &a, ColumnChunk &b) {
+  using ::std::swap;
+  swap(a.file_path, b.file_path);
+  swap(a.file_offset, b.file_offset);
+  swap(a.meta_data, b.meta_data);
+  swap(a.__isset, b.__isset);
+}
+
+const char* RowGroup::ascii_fingerprint = "DC7968627FA826DDC4C6C9BE773586C9";
+const uint8_t RowGroup::binary_fingerprint[16] = {0xDC,0x79,0x68,0x62,0x7F,0xA8,0x26,0xDD,0xC4,0xC6,0xC9,0xBE,0x77,0x35,0x86,0xC9};
+
+uint32_t RowGroup::read(::apache::thrift::protocol::TProtocol* iprot) {
+
+  uint32_t xfer = 0;
+  std::string fname;
+  ::apache::thrift::protocol::TType ftype;
+  int16_t fid;
+
+  xfer += iprot->readStructBegin(fname);
+
+  using ::apache::thrift::protocol::TProtocolException;
+
+  bool isset_columns = false;
+  bool isset_total_byte_size = false;
+  bool isset_num_rows = false;
+
+  while (true)
+  {
+    xfer += iprot->readFieldBegin(fname, ftype, fid);
+    if (ftype == ::apache::thrift::protocol::T_STOP) {
+      break;
+    }
+    switch (fid)
+    {
+      case 1:
+        if (ftype == ::apache::thrift::protocol::T_LIST) {
+          {
+            this->columns.clear();
+            uint32_t _size30;
+            ::apache::thrift::protocol::TType _etype33;
+            xfer += iprot->readListBegin(_etype33, _size30);
+            this->columns.resize(_size30);
+            uint32_t _i34;
+            for (_i34 = 0; _i34 < _size30; ++_i34)
+            {
+              xfer += this->columns[_i34].read(iprot);
+            }
+            xfer += iprot->readListEnd();
+          }
+          isset_columns = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 2:
+        if (ftype == ::apache::thrift::protocol::T_I64) {
+          xfer += iprot->readI64(this->total_byte_size);
+          isset_total_byte_size = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 3:
+        if (ftype == ::apache::thrift::protocol::T_I64) {
+          xfer += iprot->readI64(this->num_rows);
+          isset_num_rows = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 4:
+        if (ftype == ::apache::thrift::protocol::T_LIST) {
+          {
+            this->sorting_columns.clear();
+            uint32_t _size35;
+            ::apache::thrift::protocol::TType _etype38;
+            xfer += iprot->readListBegin(_etype38, _size35);
+            this->sorting_columns.resize(_size35);
+            uint32_t _i39;
+            for (_i39 = 0; _i39 < _size35; ++_i39)
+            {
+              xfer += this->sorting_columns[_i39].read(iprot);
+            }
+            xfer += iprot->readListEnd();
+          }
+          this->__isset.sorting_columns = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      default:
+        xfer += iprot->skip(ftype);
+        break;
+    }
+    xfer += iprot->readFieldEnd();
+  }
+
+  xfer += iprot->readStructEnd();
+
+  if (!isset_columns)
+    throw TProtocolException(TProtocolException::INVALID_DATA);
+  if (!isset_total_byte_size)
+    throw TProtocolException(TProtocolException::INVALID_DATA);
+  if (!isset_num_rows)
+    throw TProtocolException(TProtocolException::INVALID_DATA);
+  return xfer;
+}
+
+uint32_t RowGroup::write(::apache::thrift::protocol::TProtocol* oprot) const {
+  uint32_t xfer = 0;
+  uint32_t fcnt = 0;
+  xfer += oprot->writeStructBegin("RowGroup");
+
+  ++fcnt;
+  xfer += oprot->writeFieldBegin("columns", ::apache::thrift::protocol::T_LIST, 1);
+  {
+    xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast<uint32_t>(this->columns.size()));
+    std::vector<ColumnChunk> ::const_iterator _iter40;
+    for (_iter40 = this->columns.begin(); _iter40 != this->columns.end(); ++_iter40)
+    {
+      xfer += (*_iter40).write(oprot);
+    }
+    xfer += oprot->writeListEnd();
+  }
+  xfer += oprot->writeFieldEnd();
+
+  ++fcnt;
+  xfer += oprot->writeFieldBegin("total_byte_size", ::apache::thrift::protocol::T_I64, 2);
+  xfer += oprot->writeI64(this->total_byte_size);
+  xfer += oprot->writeFieldEnd();
+
+  ++fcnt;
+  xfer += oprot->writeFieldBegin("num_rows", ::apache::thrift::protocol::T_I64, 3);
+  xfer += oprot->writeI64(this->num_rows);
+  xfer += oprot->writeFieldEnd();
+
+  if (this->__isset.sorting_columns) {
+    ++fcnt;
+    xfer += oprot->writeFieldBegin("sorting_columns", ::apache::thrift::protocol::T_LIST, 4);
+    {
+      xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast<uint32_t>(this->sorting_columns.size()));
+      std::vector<SortingColumn> ::const_iterator _iter41;
+      for (_iter41 = this->sorting_columns.begin(); _iter41 != this->sorting_columns.end(); ++_iter41)
+      {
+        xfer += (*_iter41).write(oprot);
+      }
+      xfer += oprot->writeListEnd();
+    }
+    xfer += oprot->writeFieldEnd();
+  }
+  xfer += oprot->writeFieldStop();
+  xfer += oprot->writeStructEnd();
+  return xfer;
+}
+
+void swap(RowGroup &a, RowGroup &b) {
+  using ::std::swap;
+  swap(a.columns, b.columns);
+  swap(a.total_byte_size, b.total_byte_size);
+  swap(a.num_rows, b.num_rows);
+  swap(a.sorting_columns, b.sorting_columns);
+  swap(a.__isset, b.__isset);
+}
+
+const char* FileMetaData::ascii_fingerprint = "44DC7D83A66D54A7B7892A985C4125C9";
+const uint8_t FileMetaData::binary_fingerprint[16] = {0x44,0xDC,0x7D,0x83,0xA6,0x6D,0x54,0xA7,0xB7,0x89,0x2A,0x98,0x5C,0x41,0x25,0xC9};
+
+uint32_t FileMetaData::read(::apache::thrift::protocol::TProtocol* iprot) {
+
+  uint32_t xfer = 0;
+  std::string fname;
+  ::apache::thrift::protocol::TType ftype;
+  int16_t fid;
+
+  xfer += iprot->readStructBegin(fname);
+
+  using ::apache::thrift::protocol::TProtocolException;
+
+  bool isset_version = false;
+  bool isset_schema = false;
+  bool isset_num_rows = false;
+  bool isset_row_groups = false;
+
+  while (true)
+  {
+    xfer += iprot->readFieldBegin(fname, ftype, fid);
+    if (ftype == ::apache::thrift::protocol::T_STOP) {
+      break;
+    }
+    switch (fid)
+    {
+      case 1:
+        if (ftype == ::apache::thrift::protocol::T_I32) {
+          xfer += iprot->readI32(this->version);
+          isset_version = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 2:
+        if (ftype == ::apache::thrift::protocol::T_LIST) {
+          {
+            this->schema.clear();
+            uint32_t _size42;
+            ::apache::thrift::protocol::TType _etype45;
+            xfer += iprot->readListBegin(_etype45, _size42);
+            this->schema.resize(_size42);
+            uint32_t _i46;
+            for (_i46 = 0; _i46 < _size42; ++_i46)
+            {
+              xfer += this->schema[_i46].read(iprot);
+            }
+            xfer += iprot->readListEnd();
+          }
+          isset_schema = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 3:
+        if (ftype == ::apache::thrift::protocol::T_I64) {
+          xfer += iprot->readI64(this->num_rows);
+          isset_num_rows = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 4:
+        if (ftype == ::apache::thrift::protocol::T_LIST) {
+          {
+            this->row_groups.clear();
+            uint32_t _size47;
+            ::apache::thrift::protocol::TType _etype50;
+            xfer += iprot->readListBegin(_etype50, _size47);
+            this->row_groups.resize(_size47);
+            uint32_t _i51;
+            for (_i51 = 0; _i51 < _size47; ++_i51)
+            {
+              xfer += this->row_groups[_i51].read(iprot);
+            }
+            xfer += iprot->readListEnd();
+          }
+          isset_row_groups = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 5:
+        if (ftype == ::apache::thrift::protocol::T_LIST) {
+          {
+            this->key_value_metadata.clear();
+            uint32_t _size52;
+            ::apache::thrift::protocol::TType _etype55;
+            xfer += iprot->readListBegin(_etype55, _size52);
+            this->key_value_metadata.resize(_size52);
+            uint32_t _i56;
+            for (_i56 = 0; _i56 < _size52; ++_i56)
+            {
+              xfer += this->key_value_metadata[_i56].read(iprot);
+            }
+            xfer += iprot->readListEnd();
+          }
+          this->__isset.key_value_metadata = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 6:
+        if (ftype == ::apache::thrift::protocol::T_STRING) {
+          xfer += iprot->readString(this->created_by);
+          this->__isset.created_by = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      default:
+        xfer += iprot->skip(ftype);
+        break;
+    }
+    xfer += iprot->readFieldEnd();
+  }
+
+  xfer += iprot->readStructEnd();
+
+  if (!isset_version)
+    throw TProtocolException(TProtocolException::INVALID_DATA);
+  if (!isset_schema)
+    throw TProtocolException(TProtocolException::INVALID_DATA);
+  if (!isset_num_rows)
+    throw TProtocolException(TProtocolException::INVALID_DATA);
+  if (!isset_row_groups)
+    throw TProtocolException(TProtocolException::INVALID_DATA);
+  return xfer;
+}
+
+uint32_t FileMetaData::write(::apache::thrift::protocol::TProtocol* oprot) const {
+  uint32_t xfer = 0;
+  uint32_t fcnt = 0;
+  xfer += oprot->writeStructBegin("FileMetaData");
+
+  ++fcnt;
+  xfer += oprot->writeFieldBegin("version", ::apache::thrift::protocol::T_I32, 1);
+  xfer += oprot->writeI32(this->version);
+  xfer += oprot->writeFieldEnd();
+
+  ++fcnt;
+  xfer += oprot->writeFieldBegin("schema", ::apache::thrift::protocol::T_LIST, 2);
+  {
+    xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast<uint32_t>(this->schema.size()));
+    std::vector<SchemaElement> ::const_iterator _iter57;
+    for (_iter57 = this->schema.begin(); _iter57 != this->schema.end(); ++_iter57)
+    {
+      xfer += (*_iter57).write(oprot);
+    }
+    xfer += oprot->writeListEnd();
+  }
+  xfer += oprot->writeFieldEnd();
+
+  ++fcnt;
+  xfer += oprot->writeFieldBegin("num_rows", ::apache::thrift::protocol::T_I64, 3);
+  xfer += oprot->writeI64(this->num_rows);
+  xfer += oprot->writeFieldEnd();
+
+  ++fcnt;
+  xfer += oprot->writeFieldBegin("row_groups", ::apache::thrift::protocol::T_LIST, 4);
+  {
+    xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast<uint32_t>(this->row_groups.size()));
+    std::vector<RowGroup> ::const_iterator _iter58;
+    for (_iter58 = this->row_groups.begin(); _iter58 != this->row_groups.end(); ++_iter58)
+    {
+      xfer += (*_iter58).write(oprot);
+    }
+    xfer += oprot->writeListEnd();
+  }
+  xfer += oprot->writeFieldEnd();
+
+  if (this->__isset.key_value_metadata) {
+    ++fcnt;
+    xfer += oprot->writeFieldBegin("key_value_metadata", ::apache::thrift::protocol::T_LIST, 5);
+    {
+      xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast<uint32_t>(this->key_value_metadata.size()));
+      std::vector<KeyValue> ::const_iterator _iter59;
+      for (_iter59 = this->key_value_metadata.begin(); _iter59 != this->key_value_metadata.end(); ++_iter59)
+      {
+        xfer += (*_iter59).write(oprot);
+      }
+      xfer += oprot->writeListEnd();
+    }
+    xfer += oprot->writeFieldEnd();
+  }
+  if (this->__isset.created_by) {
+    ++fcnt;
+    xfer += oprot->writeFieldBegin("created_by", ::apache::thrift::protocol::T_STRING, 6);
+    xfer += oprot->writeString(this->created_by);
+    xfer += oprot->writeFieldEnd();
+  }
+  xfer += oprot->writeFieldStop();
+  xfer += oprot->writeStructEnd();
+  return xfer;
+}
+
+void swap(FileMetaData &a, FileMetaData &b) {
+  using ::std::swap;
+  swap(a.version, b.version);
+  swap(a.schema, b.schema);
+  swap(a.num_rows, b.num_rows);
+  swap(a.row_groups, b.row_groups);
+  swap(a.key_value_metadata, b.key_value_metadata);
+  swap(a.created_by, b.created_by);
+  swap(a.__isset, b.__isset);
+}
+
+} // namespace

[5/7] parquet-cpp git commit: PARQUET-416: C++11 compilation, code reorg, libparquet and installation targets

Posted by no...@apache.org.

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/cmake_modules/clean-all.cmake
----------------------------------------------------------------------
diff --git a/cmake_modules/clean-all.cmake b/cmake_modules/clean-all.cmake
new file mode 100644
index 0000000..35041ea
--- /dev/null
+++ b/cmake_modules/clean-all.cmake
@@ -0,0 +1,14 @@
+set(cmake_generated ${CMAKE_BINARY_DIR}/CMakeCache.txt
+                    ${CMAKE_BINARY_DIR}/CTestTestfile.cmake
+                    ${CMAKE_BINARY_DIR}/cmake_install.cmake
+                    ${CMAKE_BINARY_DIR}/Makefile
+                    ${CMAKE_BINARY_DIR}/CMakeFiles
+)
+
+foreach(file ${cmake_generated})
+
+  if (EXISTS ${file})
+     file(REMOVE_RECURSE ${file})
+  endif()
+
+endforeach(file)
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/example/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/example/CMakeLists.txt b/example/CMakeLists.txt
index 8844cdd..a9f4fa3 100644
--- a/example/CMakeLists.txt
+++ b/example/CMakeLists.txt
@@ -18,13 +18,10 @@ add_library(Example STATIC
 )
 
 SET(LINK_LIBS
-  Parquet
-  ParquetCompression
-  Example
-  ThriftParquet
+  parquet
+  snappystatic
   thriftstatic
-  lz4static
-  snappystatic)
+  Example)
 
 add_executable(compute_stats compute_stats.cc)
 target_link_libraries(compute_stats ${LINK_LIBS})

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/example/decode_benchmark.cc
----------------------------------------------------------------------
diff --git a/example/decode_benchmark.cc b/example/decode_benchmark.cc
index 1add399..ed4077a 100644
--- a/example/decode_benchmark.cc
+++ b/example/decode_benchmark.cc
@@ -17,11 +17,10 @@
 #include <stdio.h>
 
 #include "example_util.h"
-#include "compression/codec.h"
-#include "encodings/encodings.h"
-#include "util/stopwatch.h"
+#include "parquet/compression/codec.h"
+#include "parquet/encodings/encodings.h"
+#include "parquet/util/stopwatch.h"
 
-using namespace impala;
 using namespace parquet;
 using namespace parquet_cpp;
 using namespace std;

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/example/parquet_reader.cc
----------------------------------------------------------------------
diff --git a/example/parquet_reader.cc b/example/parquet_reader.cc
index c02ffb0..55895ce 100644
--- a/example/parquet_reader.cc
+++ b/example/parquet_reader.cc
@@ -215,7 +215,7 @@ void* read_parquet(char* filename) {
       char *str = (char*)malloc(50);
       assert(str);
       strcpy(str, metadata.schema[j+1].name.c_str());
-      printf("%-"COL_WIDTH"s", str);
+      printf("%-" COL_WIDTH"s", str);
       free(str);
     }
 
@@ -225,22 +225,22 @@ void* read_parquet(char* filename) {
     for (j = 0;j < row_group.columns.size(); ++j)
       switch(type_array[j]) {
         case Type::BOOLEAN:
-          printf("%-"COL_WIDTH"s","BOOLEAN");
+          printf("%-" COL_WIDTH"s","BOOLEAN");
           break;
         case Type::INT32:
-          printf("%-"COL_WIDTH"s","INT32");
+          printf("%-" COL_WIDTH"s","INT32");
           break;
         case Type::INT64:
-          printf("%-"COL_WIDTH"s","INT64");
+          printf("%-" COL_WIDTH"s","INT64");
           break;
         case Type::FLOAT:
-          printf("%-"COL_WIDTH"s","FLOAT");
+          printf("%-" COL_WIDTH"s","FLOAT");
           break;
         case Type::DOUBLE:
-          printf("%-"COL_WIDTH"s","DOUBLE");
+          printf("%-" COL_WIDTH"s","DOUBLE");
           break;
         case Type::BYTE_ARRAY:
-          printf("%-"COL_WIDTH"s","BYTE_ARRAY");
+          printf("%-" COL_WIDTH"s","BYTE_ARRAY");
           break;
         default:
           continue;
@@ -255,26 +255,26 @@ void* read_parquet(char* filename) {
       for (j = 0; j < row_group.columns.size(); ++j) {
         switch(type_array[j]) {
           case Type::BOOLEAN:
-            printf("%-"COL_WIDTH"d",((bool*)(((bool**)column_ptr)[j]))[k]);
+            printf("%-" COL_WIDTH"d",((bool*)(((bool**)column_ptr)[j]))[k]);
             break;
           case Type::INT32:
-            printf("%-"COL_WIDTH"d",((int32_t *)(((int32_t **)column_ptr)[j]))[k]);
+            printf("%-" COL_WIDTH"d",((int32_t *)(((int32_t **)column_ptr)[j]))[k]);
             break;
           case Type::INT64:
-            printf("%-"COL_WIDTH"ld",((int64_t *)(((int64_t **)column_ptr)[j]))[k]);
+            printf("%-" COL_WIDTH"ld",((int64_t *)(((int64_t **)column_ptr)[j]))[k]);
             break;
           case Type::FLOAT:
-            printf("%-"COL_WIDTH"f",((float*)(((float**)column_ptr)[j]))[k]);
+            printf("%-" COL_WIDTH"f",((float*)(((float**)column_ptr)[j]))[k]);
             break;
           case Type::DOUBLE:
-            printf("%-"COL_WIDTH"lf",((double*)(((double**)column_ptr)[j]))[k]);
+            printf("%-" COL_WIDTH"lf",((double*)(((double**)column_ptr)[j]))[k]);
             break;
           case Type::BYTE_ARRAY:
             result = ByteArrayToString( ((ByteArray*)(((ByteArray**)column_ptr)[j]))[k] );
             str1 = (char*)malloc(result.size());
             assert(str1);
             strcpy(str1, result.c_str());
-            printf("%-"COL_WIDTH"s", str1);
+            printf("%-" COL_WIDTH"s", str1);
             free(str1);
             break;
           default:

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/generated/gen-cpp/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/generated/gen-cpp/CMakeLists.txt b/generated/gen-cpp/CMakeLists.txt
deleted file mode 100644
index 262c23f..0000000
--- a/generated/gen-cpp/CMakeLists.txt
+++ /dev/null
@@ -1,20 +0,0 @@
-# Copyright 2012 Cloudera Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-add_library(ThriftParquet STATIC
-  parquet_constants.cpp
-  parquet_types.cpp
-)
-

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/generated/gen-cpp/parquet_constants.cpp
----------------------------------------------------------------------
diff --git a/generated/gen-cpp/parquet_constants.cpp b/generated/gen-cpp/parquet_constants.cpp
deleted file mode 100644
index caa5af6..0000000
--- a/generated/gen-cpp/parquet_constants.cpp
+++ /dev/null
@@ -1,17 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.0)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-#include "parquet_constants.h"
-
-namespace parquet {
-
-const parquetConstants g_parquet_constants;
-
-parquetConstants::parquetConstants() {
-}
-
-} // namespace
-

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/generated/gen-cpp/parquet_constants.h
----------------------------------------------------------------------
diff --git a/generated/gen-cpp/parquet_constants.h b/generated/gen-cpp/parquet_constants.h
deleted file mode 100644
index 71d6f58..0000000
--- a/generated/gen-cpp/parquet_constants.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.0)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-#ifndef parquet_CONSTANTS_H
-#define parquet_CONSTANTS_H
-
-#include "parquet_types.h"
-
-namespace parquet {
-
-class parquetConstants {
- public:
-  parquetConstants();
-
-};
-
-extern const parquetConstants g_parquet_constants;
-
-} // namespace
-
-#endif

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/generated/gen-cpp/parquet_types.cpp
----------------------------------------------------------------------
diff --git a/generated/gen-cpp/parquet_types.cpp b/generated/gen-cpp/parquet_types.cpp
deleted file mode 100644
index 06d388c..0000000
--- a/generated/gen-cpp/parquet_types.cpp
+++ /dev/null
@@ -1,2006 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.0)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-#include "parquet_types.h"
-
-#include <algorithm>
-
-namespace parquet {
-
-int _kTypeValues[] = {
-  Type::BOOLEAN,
-  Type::INT32,
-  Type::INT64,
-  Type::INT96,
-  Type::FLOAT,
-  Type::DOUBLE,
-  Type::BYTE_ARRAY,
-  Type::FIXED_LEN_BYTE_ARRAY
-};
-const char* _kTypeNames[] = {
-  "BOOLEAN",
-  "INT32",
-  "INT64",
-  "INT96",
-  "FLOAT",
-  "DOUBLE",
-  "BYTE_ARRAY",
-  "FIXED_LEN_BYTE_ARRAY"
-};
-const std::map<int, const char*> _Type_VALUES_TO_NAMES(::apache::thrift::TEnumIterator(8, _kTypeValues, _kTypeNames), ::apache::thrift::TEnumIterator(-1, NULL, NULL));
-
-int _kConvertedTypeValues[] = {
-  ConvertedType::UTF8,
-  ConvertedType::MAP,
-  ConvertedType::MAP_KEY_VALUE,
-  ConvertedType::LIST,
-  ConvertedType::ENUM,
-  ConvertedType::DECIMAL
-};
-const char* _kConvertedTypeNames[] = {
-  "UTF8",
-  "MAP",
-  "MAP_KEY_VALUE",
-  "LIST",
-  "ENUM",
-  "DECIMAL"
-};
-const std::map<int, const char*> _ConvertedType_VALUES_TO_NAMES(::apache::thrift::TEnumIterator(6, _kConvertedTypeValues, _kConvertedTypeNames), ::apache::thrift::TEnumIterator(-1, NULL, NULL));
-
-int _kFieldRepetitionTypeValues[] = {
-  FieldRepetitionType::REQUIRED,
-  FieldRepetitionType::OPTIONAL,
-  FieldRepetitionType::REPEATED
-};
-const char* _kFieldRepetitionTypeNames[] = {
-  "REQUIRED",
-  "OPTIONAL",
-  "REPEATED"
-};
-const std::map<int, const char*> _FieldRepetitionType_VALUES_TO_NAMES(::apache::thrift::TEnumIterator(3, _kFieldRepetitionTypeValues, _kFieldRepetitionTypeNames), ::apache::thrift::TEnumIterator(-1, NULL, NULL));
-
-int _kEncodingValues[] = {
-  Encoding::PLAIN,
-  Encoding::PLAIN_DICTIONARY,
-  Encoding::RLE,
-  Encoding::BIT_PACKED,
-  Encoding::DELTA_BINARY_PACKED,
-  Encoding::DELTA_LENGTH_BYTE_ARRAY,
-  Encoding::DELTA_BYTE_ARRAY,
-  Encoding::RLE_DICTIONARY
-};
-const char* _kEncodingNames[] = {
-  "PLAIN",
-  "PLAIN_DICTIONARY",
-  "RLE",
-  "BIT_PACKED",
-  "DELTA_BINARY_PACKED",
-  "DELTA_LENGTH_BYTE_ARRAY",
-  "DELTA_BYTE_ARRAY",
-  "RLE_DICTIONARY"
-};
-const std::map<int, const char*> _Encoding_VALUES_TO_NAMES(::apache::thrift::TEnumIterator(8, _kEncodingValues, _kEncodingNames), ::apache::thrift::TEnumIterator(-1, NULL, NULL));
-
-int _kCompressionCodecValues[] = {
-  CompressionCodec::UNCOMPRESSED,
-  CompressionCodec::SNAPPY,
-  CompressionCodec::GZIP,
-  CompressionCodec::LZO
-};
-const char* _kCompressionCodecNames[] = {
-  "UNCOMPRESSED",
-  "SNAPPY",
-  "GZIP",
-  "LZO"
-};
-const std::map<int, const char*> _CompressionCodec_VALUES_TO_NAMES(::apache::thrift::TEnumIterator(4, _kCompressionCodecValues, _kCompressionCodecNames), ::apache::thrift::TEnumIterator(-1, NULL, NULL));
-
-int _kPageTypeValues[] = {
-  PageType::DATA_PAGE,
-  PageType::INDEX_PAGE,
-  PageType::DICTIONARY_PAGE,
-  PageType::DATA_PAGE_V2
-};
-const char* _kPageTypeNames[] = {
-  "DATA_PAGE",
-  "INDEX_PAGE",
-  "DICTIONARY_PAGE",
-  "DATA_PAGE_V2"
-};
-const std::map<int, const char*> _PageType_VALUES_TO_NAMES(::apache::thrift::TEnumIterator(4, _kPageTypeValues, _kPageTypeNames), ::apache::thrift::TEnumIterator(-1, NULL, NULL));
-
-const char* Statistics::ascii_fingerprint = "CE004821871820DD79A8FD98BB101F6D";
-const uint8_t Statistics::binary_fingerprint[16] = {0xCE,0x00,0x48,0x21,0x87,0x18,0x20,0xDD,0x79,0xA8,0xFD,0x98,0xBB,0x10,0x1F,0x6D};
-
-uint32_t Statistics::read(::apache::thrift::protocol::TProtocol* iprot) {
-
-  uint32_t xfer = 0;
-  std::string fname;
-  ::apache::thrift::protocol::TType ftype;
-  int16_t fid;
-
-  xfer += iprot->readStructBegin(fname);
-
-  using ::apache::thrift::protocol::TProtocolException;
-
-
-  while (true)
-  {
-    xfer += iprot->readFieldBegin(fname, ftype, fid);
-    if (ftype == ::apache::thrift::protocol::T_STOP) {
-      break;
-    }
-    switch (fid)
-    {
-      case 1:
-        if (ftype == ::apache::thrift::protocol::T_STRING) {
-          xfer += iprot->readBinary(this->max);
-          this->__isset.max = true;
-        } else {
-          xfer += iprot->skip(ftype);
-        }
-        break;
-      case 2:
-        if (ftype == ::apache::thrift::protocol::T_STRING) {
-          xfer += iprot->readBinary(this->min);
-          this->__isset.min = true;
-        } else {
-          xfer += iprot->skip(ftype);
-        }
-        break;
-      case 3:
-        if (ftype == ::apache::thrift::protocol::T_I64) {
-          xfer += iprot->readI64(this->null_count);
-          this->__isset.null_count = true;
-        } else {
-          xfer += iprot->skip(ftype);
-        }
-        break;
-      case 4:
-        if (ftype == ::apache::thrift::protocol::T_I64) {
-          xfer += iprot->readI64(this->distinct_count);
-          this->__isset.distinct_count = true;
-        } else {
-          xfer += iprot->skip(ftype);
-        }
-        break;
-      default:
-        xfer += iprot->skip(ftype);
-        break;
-    }
-    xfer += iprot->readFieldEnd();
-  }
-
-  xfer += iprot->readStructEnd();
-
-  return xfer;
-}
-
-uint32_t Statistics::write(::apache::thrift::protocol::TProtocol* oprot) const {
-  uint32_t xfer = 0;
-  uint32_t fcnt = 0;
-  xfer += oprot->writeStructBegin("Statistics");
-
-  if (this->__isset.max) {
-    ++fcnt;
-    xfer += oprot->writeFieldBegin("max", ::apache::thrift::protocol::T_STRING, 1);
-    xfer += oprot->writeBinary(this->max);
-    xfer += oprot->writeFieldEnd();
-  }
-  if (this->__isset.min) {
-    ++fcnt;
-    xfer += oprot->writeFieldBegin("min", ::apache::thrift::protocol::T_STRING, 2);
-    xfer += oprot->writeBinary(this->min);
-    xfer += oprot->writeFieldEnd();
-  }
-  if (this->__isset.null_count) {
-    ++fcnt;
-    xfer += oprot->writeFieldBegin("null_count", ::apache::thrift::protocol::T_I64, 3);
-    xfer += oprot->writeI64(this->null_count);
-    xfer += oprot->writeFieldEnd();
-  }
-  if (this->__isset.distinct_count) {
-    ++fcnt;
-    xfer += oprot->writeFieldBegin("distinct_count", ::apache::thrift::protocol::T_I64, 4);
-    xfer += oprot->writeI64(this->distinct_count);
-    xfer += oprot->writeFieldEnd();
-  }
-  xfer += oprot->writeFieldStop();
-  xfer += oprot->writeStructEnd();
-  return xfer;
-}
-
-void swap(Statistics &a, Statistics &b) {
-  using ::std::swap;
-  swap(a.max, b.max);
-  swap(a.min, b.min);
-  swap(a.null_count, b.null_count);
-  swap(a.distinct_count, b.distinct_count);
-  swap(a.__isset, b.__isset);
-}
-
-const char* SchemaElement::ascii_fingerprint = "388A784401753800444CFEAC8BC1B1A1";
-const uint8_t SchemaElement::binary_fingerprint[16] = {0x38,0x8A,0x78,0x44,0x01,0x75,0x38,0x00,0x44,0x4C,0xFE,0xAC,0x8B,0xC1,0xB1,0xA1};
-
-uint32_t SchemaElement::read(::apache::thrift::protocol::TProtocol* iprot) {
-
-  uint32_t xfer = 0;
-  std::string fname;
-  ::apache::thrift::protocol::TType ftype;
-  int16_t fid;
-
-  xfer += iprot->readStructBegin(fname);
-
-  using ::apache::thrift::protocol::TProtocolException;
-
-  bool isset_name = false;
-
-  while (true)
-  {
-    xfer += iprot->readFieldBegin(fname, ftype, fid);
-    if (ftype == ::apache::thrift::protocol::T_STOP) {
-      break;
-    }
-    switch (fid)
-    {
-      case 1:
-        if (ftype == ::apache::thrift::protocol::T_I32) {
-          int32_t ecast0;
-          xfer += iprot->readI32(ecast0);
-          this->type = (Type::type)ecast0;
-          this->__isset.type = true;
-        } else {
-          xfer += iprot->skip(ftype);
-        }
-        break;
-      case 2:
-        if (ftype == ::apache::thrift::protocol::T_I32) {
-          xfer += iprot->readI32(this->type_length);
-          this->__isset.type_length = true;
-        } else {
-          xfer += iprot->skip(ftype);
-        }
-        break;
-      case 3:
-        if (ftype == ::apache::thrift::protocol::T_I32) {
-          int32_t ecast1;
-          xfer += iprot->readI32(ecast1);
-          this->repetition_type = (FieldRepetitionType::type)ecast1;
-          this->__isset.repetition_type = true;
-        } else {
-          xfer += iprot->skip(ftype);
-        }
-        break;
-      case 4:
-        if (ftype == ::apache::thrift::protocol::T_STRING) {
-          xfer += iprot->readString(this->name);
-          isset_name = true;
-        } else {
-          xfer += iprot->skip(ftype);
-        }
-        break;
-      case 5:
-        if (ftype == ::apache::thrift::protocol::T_I32) {
-          xfer += iprot->readI32(this->num_children);
-          this->__isset.num_children = true;
-        } else {
-          xfer += iprot->skip(ftype);
-        }
-        break;
-      case 6:
-        if (ftype == ::apache::thrift::protocol::T_I32) {
-          int32_t ecast2;
-          xfer += iprot->readI32(ecast2);
-          this->converted_type = (ConvertedType::type)ecast2;
-          this->__isset.converted_type = true;
-        } else {
-          xfer += iprot->skip(ftype);
-        }
-        break;
-      case 7:
-        if (ftype == ::apache::thrift::protocol::T_I32) {
-          xfer += iprot->readI32(this->scale);
-          this->__isset.scale = true;
-        } else {
-          xfer += iprot->skip(ftype);
-        }
-        break;
-      case 8:
-        if (ftype == ::apache::thrift::protocol::T_I32) {
-          xfer += iprot->readI32(this->precision);
-          this->__isset.precision = true;
-        } else {
-          xfer += iprot->skip(ftype);
-        }
-        break;
-      default:
-        xfer += iprot->skip(ftype);
-        break;
-    }
-    xfer += iprot->readFieldEnd();
-  }
-
-  xfer += iprot->readStructEnd();
-
-  if (!isset_name)
-    throw TProtocolException(TProtocolException::INVALID_DATA);
-  return xfer;
-}
-
-uint32_t SchemaElement::write(::apache::thrift::protocol::TProtocol* oprot) const {
-  uint32_t xfer = 0;
-  uint32_t fcnt = 0;
-  xfer += oprot->writeStructBegin("SchemaElement");
-
-  if (this->__isset.type) {
-    ++fcnt;
-    xfer += oprot->writeFieldBegin("type", ::apache::thrift::protocol::T_I32, 1);
-    xfer += oprot->writeI32((int32_t)this->type);
-    xfer += oprot->writeFieldEnd();
-  }
-  if (this->__isset.type_length) {
-    ++fcnt;
-    xfer += oprot->writeFieldBegin("type_length", ::apache::thrift::protocol::T_I32, 2);
-    xfer += oprot->writeI32(this->type_length);
-    xfer += oprot->writeFieldEnd();
-  }
-  if (this->__isset.repetition_type) {
-    ++fcnt;
-    xfer += oprot->writeFieldBegin("repetition_type", ::apache::thrift::protocol::T_I32, 3);
-    xfer += oprot->writeI32((int32_t)this->repetition_type);
-    xfer += oprot->writeFieldEnd();
-  }
-  ++fcnt;
-  xfer += oprot->writeFieldBegin("name", ::apache::thrift::protocol::T_STRING, 4);
-  xfer += oprot->writeString(this->name);
-  xfer += oprot->writeFieldEnd();
-
-  if (this->__isset.num_children) {
-    ++fcnt;
-    xfer += oprot->writeFieldBegin("num_children", ::apache::thrift::protocol::T_I32, 5);
-    xfer += oprot->writeI32(this->num_children);
-    xfer += oprot->writeFieldEnd();
-  }
-  if (this->__isset.converted_type) {
-    ++fcnt;
-    xfer += oprot->writeFieldBegin("converted_type", ::apache::thrift::protocol::T_I32, 6);
-    xfer += oprot->writeI32((int32_t)this->converted_type);
-    xfer += oprot->writeFieldEnd();
-  }
-  if (this->__isset.scale) {
-    ++fcnt;
-    xfer += oprot->writeFieldBegin("scale", ::apache::thrift::protocol::T_I32, 7);
-    xfer += oprot->writeI32(this->scale);
-    xfer += oprot->writeFieldEnd();
-  }
-  if (this->__isset.precision) {
-    ++fcnt;
-    xfer += oprot->writeFieldBegin("precision", ::apache::thrift::protocol::T_I32, 8);
-    xfer += oprot->writeI32(this->precision);
-    xfer += oprot->writeFieldEnd();
-  }
-  xfer += oprot->writeFieldStop();
-  xfer += oprot->writeStructEnd();
-  return xfer;
-}
-
-void swap(SchemaElement &a, SchemaElement &b) {
-  using ::std::swap;
-  swap(a.type, b.type);
-  swap(a.type_length, b.type_length);
-  swap(a.repetition_type, b.repetition_type);
-  swap(a.name, b.name);
-  swap(a.num_children, b.num_children);
-  swap(a.converted_type, b.converted_type);
-  swap(a.scale, b.scale);
-  swap(a.precision, b.precision);
-  swap(a.__isset, b.__isset);
-}
-
-const char* DataPageHeader::ascii_fingerprint = "5FC1792B0483E9C984475384165040B1";
-const uint8_t DataPageHeader::binary_fingerprint[16] = {0x5F,0xC1,0x79,0x2B,0x04,0x83,0xE9,0xC9,0x84,0x47,0x53,0x84,0x16,0x50,0x40,0xB1};
-
-uint32_t DataPageHeader::read(::apache::thrift::protocol::TProtocol* iprot) {
-
-  uint32_t xfer = 0;
-  std::string fname;
-  ::apache::thrift::protocol::TType ftype;
-  int16_t fid;
-
-  xfer += iprot->readStructBegin(fname);
-
-  using ::apache::thrift::protocol::TProtocolException;
-
-  bool isset_num_values = false;
-  bool isset_encoding = false;
-  bool isset_definition_level_encoding = false;
-  bool isset_repetition_level_encoding = false;
-
-  while (true)
-  {
-    xfer += iprot->readFieldBegin(fname, ftype, fid);
-    if (ftype == ::apache::thrift::protocol::T_STOP) {
-      break;
-    }
-    switch (fid)
-    {
-      case 1:
-        if (ftype == ::apache::thrift::protocol::T_I32) {
-          xfer += iprot->readI32(this->num_values);
-          isset_num_values = true;
-        } else {
-          xfer += iprot->skip(ftype);
-        }
-        break;
-      case 2:
-        if (ftype == ::apache::thrift::protocol::T_I32) {
-          int32_t ecast3;
-          xfer += iprot->readI32(ecast3);
-          this->encoding = (Encoding::type)ecast3;
-          isset_encoding = true;
-        } else {
-          xfer += iprot->skip(ftype);
-        }
-        break;
-      case 3:
-        if (ftype == ::apache::thrift::protocol::T_I32) {
-          int32_t ecast4;
-          xfer += iprot->readI32(ecast4);
-          this->definition_level_encoding = (Encoding::type)ecast4;
-          isset_definition_level_encoding = true;
-        } else {
-          xfer += iprot->skip(ftype);
-        }
-        break;
-      case 4:
-        if (ftype == ::apache::thrift::protocol::T_I32) {
-          int32_t ecast5;
-          xfer += iprot->readI32(ecast5);
-          this->repetition_level_encoding = (Encoding::type)ecast5;
-          isset_repetition_level_encoding = true;
-        } else {
-          xfer += iprot->skip(ftype);
-        }
-        break;
-      case 5:
-        if (ftype == ::apache::thrift::protocol::T_STRUCT) {
-          xfer += this->statistics.read(iprot);
-          this->__isset.statistics = true;
-        } else {
-          xfer += iprot->skip(ftype);
-        }
-        break;
-      default:
-        xfer += iprot->skip(ftype);
-        break;
-    }
-    xfer += iprot->readFieldEnd();
-  }
-
-  xfer += iprot->readStructEnd();
-
-  if (!isset_num_values)
-    throw TProtocolException(TProtocolException::INVALID_DATA);
-  if (!isset_encoding)
-    throw TProtocolException(TProtocolException::INVALID_DATA);
-  if (!isset_definition_level_encoding)
-    throw TProtocolException(TProtocolException::INVALID_DATA);
-  if (!isset_repetition_level_encoding)
-    throw TProtocolException(TProtocolException::INVALID_DATA);
-  return xfer;
-}
-
-uint32_t DataPageHeader::write(::apache::thrift::protocol::TProtocol* oprot) const {
-  uint32_t xfer = 0;
-  uint32_t fcnt = 0;
-  xfer += oprot->writeStructBegin("DataPageHeader");
-
-  ++fcnt;
-  xfer += oprot->writeFieldBegin("num_values", ::apache::thrift::protocol::T_I32, 1);
-  xfer += oprot->writeI32(this->num_values);
-  xfer += oprot->writeFieldEnd();
-
-  ++fcnt;
-  xfer += oprot->writeFieldBegin("encoding", ::apache::thrift::protocol::T_I32, 2);
-  xfer += oprot->writeI32((int32_t)this->encoding);
-  xfer += oprot->writeFieldEnd();
-
-  ++fcnt;
-  xfer += oprot->writeFieldBegin("definition_level_encoding", ::apache::thrift::protocol::T_I32, 3);
-  xfer += oprot->writeI32((int32_t)this->definition_level_encoding);
-  xfer += oprot->writeFieldEnd();
-
-  ++fcnt;
-  xfer += oprot->writeFieldBegin("repetition_level_encoding", ::apache::thrift::protocol::T_I32, 4);
-  xfer += oprot->writeI32((int32_t)this->repetition_level_encoding);
-  xfer += oprot->writeFieldEnd();
-
-  if (this->__isset.statistics) {
-    ++fcnt;
-    xfer += oprot->writeFieldBegin("statistics", ::apache::thrift::protocol::T_STRUCT, 5);
-    xfer += this->statistics.write(oprot);
-    xfer += oprot->writeFieldEnd();
-  }
-  xfer += oprot->writeFieldStop();
-  xfer += oprot->writeStructEnd();
-  return xfer;
-}
-
-void swap(DataPageHeader &a, DataPageHeader &b) {
-  using ::std::swap;
-  swap(a.num_values, b.num_values);
-  swap(a.encoding, b.encoding);
-  swap(a.definition_level_encoding, b.definition_level_encoding);
-  swap(a.repetition_level_encoding, b.repetition_level_encoding);
-  swap(a.statistics, b.statistics);
-  swap(a.__isset, b.__isset);
-}
-
-const char* IndexPageHeader::ascii_fingerprint = "99914B932BD37A50B983C5E7C90AE93B";
-const uint8_t IndexPageHeader::binary_fingerprint[16] = {0x99,0x91,0x4B,0x93,0x2B,0xD3,0x7A,0x50,0xB9,0x83,0xC5,0xE7,0xC9,0x0A,0xE9,0x3B};
-
-uint32_t IndexPageHeader::read(::apache::thrift::protocol::TProtocol* iprot) {
-
-  uint32_t xfer = 0;
-  std::string fname;
-  ::apache::thrift::protocol::TType ftype;
-  int16_t fid;
-
-  xfer += iprot->readStructBegin(fname);
-
-  using ::apache::thrift::protocol::TProtocolException;
-
-
-  while (true)
-  {
-    xfer += iprot->readFieldBegin(fname, ftype, fid);
-    if (ftype == ::apache::thrift::protocol::T_STOP) {
-      break;
-    }
-    xfer += iprot->skip(ftype);
-    xfer += iprot->readFieldEnd();
-  }
-
-  xfer += iprot->readStructEnd();
-
-  return xfer;
-}
-
-uint32_t IndexPageHeader::write(::apache::thrift::protocol::TProtocol* oprot) const {
-  uint32_t xfer = 0;
-  uint32_t fcnt = 0;
-  xfer += oprot->writeStructBegin("IndexPageHeader");
-
-  xfer += oprot->writeFieldStop();
-  xfer += oprot->writeStructEnd();
-  return xfer;
-}
-
-void swap(IndexPageHeader &a, IndexPageHeader &b) {
-  using ::std::swap;
-  (void) a;
-  (void) b;
-}
-
-const char* DictionaryPageHeader::ascii_fingerprint = "B149E4528254D495610C22AE4BD539C5";
-const uint8_t DictionaryPageHeader::binary_fingerprint[16] = {0xB1,0x49,0xE4,0x52,0x82,0x54,0xD4,0x95,0x61,0x0C,0x22,0xAE,0x4B,0xD5,0x39,0xC5};
-
-uint32_t DictionaryPageHeader::read(::apache::thrift::protocol::TProtocol* iprot) {
-
-  uint32_t xfer = 0;
-  std::string fname;
-  ::apache::thrift::protocol::TType ftype;
-  int16_t fid;
-
-  xfer += iprot->readStructBegin(fname);
-
-  using ::apache::thrift::protocol::TProtocolException;
-
-  bool isset_num_values = false;
-  bool isset_encoding = false;
-
-  while (true)
-  {
-    xfer += iprot->readFieldBegin(fname, ftype, fid);
-    if (ftype == ::apache::thrift::protocol::T_STOP) {
-      break;
-    }
-    switch (fid)
-    {
-      case 1:
-        if (ftype == ::apache::thrift::protocol::T_I32) {
-          xfer += iprot->readI32(this->num_values);
-          isset_num_values = true;
-        } else {
-          xfer += iprot->skip(ftype);
-        }
-        break;
-      case 2:
-        if (ftype == ::apache::thrift::protocol::T_I32) {
-          int32_t ecast6;
-          xfer += iprot->readI32(ecast6);
-          this->encoding = (Encoding::type)ecast6;
-          isset_encoding = true;
-        } else {
-          xfer += iprot->skip(ftype);
-        }
-        break;
-      case 3:
-        if (ftype == ::apache::thrift::protocol::T_BOOL) {
-          xfer += iprot->readBool(this->is_sorted);
-          this->__isset.is_sorted = true;
-        } else {
-          xfer += iprot->skip(ftype);
-        }
-        break;
-      default:
-        xfer += iprot->skip(ftype);
-        break;
-    }
-    xfer += iprot->readFieldEnd();
-  }
-
-  xfer += iprot->readStructEnd();
-
-  if (!isset_num_values)
-    throw TProtocolException(TProtocolException::INVALID_DATA);
-  if (!isset_encoding)
-    throw TProtocolException(TProtocolException::INVALID_DATA);
-  return xfer;
-}
-
-uint32_t DictionaryPageHeader::write(::apache::thrift::protocol::TProtocol* oprot) const {
-  uint32_t xfer = 0;
-  uint32_t fcnt = 0;
-  xfer += oprot->writeStructBegin("DictionaryPageHeader");
-
-  ++fcnt;
-  xfer += oprot->writeFieldBegin("num_values", ::apache::thrift::protocol::T_I32, 1);
-  xfer += oprot->writeI32(this->num_values);
-  xfer += oprot->writeFieldEnd();
-
-  ++fcnt;
-  xfer += oprot->writeFieldBegin("encoding", ::apache::thrift::protocol::T_I32, 2);
-  xfer += oprot->writeI32((int32_t)this->encoding);
-  xfer += oprot->writeFieldEnd();
-
-  if (this->__isset.is_sorted) {
-    ++fcnt;
-    xfer += oprot->writeFieldBegin("is_sorted", ::apache::thrift::protocol::T_BOOL, 3);
-    xfer += oprot->writeBool(this->is_sorted);
-    xfer += oprot->writeFieldEnd();
-  }
-  xfer += oprot->writeFieldStop();
-  xfer += oprot->writeStructEnd();
-  return xfer;
-}
-
-void swap(DictionaryPageHeader &a, DictionaryPageHeader &b) {
-  using ::std::swap;
-  swap(a.num_values, b.num_values);
-  swap(a.encoding, b.encoding);
-  swap(a.is_sorted, b.is_sorted);
-  swap(a.__isset, b.__isset);
-}
-
-const char* DataPageHeaderV2::ascii_fingerprint = "69FF2F6BD1A443440D5E46ABA5A3A919";
-const uint8_t DataPageHeaderV2::binary_fingerprint[16] = {0x69,0xFF,0x2F,0x6B,0xD1,0xA4,0x43,0x44,0x0D,0x5E,0x46,0xAB,0xA5,0xA3,0xA9,0x19};
-
-uint32_t DataPageHeaderV2::read(::apache::thrift::protocol::TProtocol* iprot) {
-
-  uint32_t xfer = 0;
-  std::string fname;
-  ::apache::thrift::protocol::TType ftype;
-  int16_t fid;
-
-  xfer += iprot->readStructBegin(fname);
-
-  using ::apache::thrift::protocol::TProtocolException;
-
-  bool isset_num_values = false;
-  bool isset_num_nulls = false;
-  bool isset_num_rows = false;
-  bool isset_encoding = false;
-  bool isset_definition_levels_byte_length = false;
-  bool isset_repetition_levels_byte_length = false;
-
-  while (true)
-  {
-    xfer += iprot->readFieldBegin(fname, ftype, fid);
-    if (ftype == ::apache::thrift::protocol::T_STOP) {
-      break;
-    }
-    switch (fid)
-    {
-      case 1:
-        if (ftype == ::apache::thrift::protocol::T_I32) {
-          xfer += iprot->readI32(this->num_values);
-          isset_num_values = true;
-        } else {
-          xfer += iprot->skip(ftype);
-        }
-        break;
-      case 2:
-        if (ftype == ::apache::thrift::protocol::T_I32) {
-          xfer += iprot->readI32(this->num_nulls);
-          isset_num_nulls = true;
-        } else {
-          xfer += iprot->skip(ftype);
-        }
-        break;
-      case 3:
-        if (ftype == ::apache::thrift::protocol::T_I32) {
-          xfer += iprot->readI32(this->num_rows);
-          isset_num_rows = true;
-        } else {
-          xfer += iprot->skip(ftype);
-        }
-        break;
-      case 4:
-        if (ftype == ::apache::thrift::protocol::T_I32) {
-          int32_t ecast7;
-          xfer += iprot->readI32(ecast7);
-          this->encoding = (Encoding::type)ecast7;
-          isset_encoding = true;
-        } else {
-          xfer += iprot->skip(ftype);
-        }
-        break;
-      case 5:
-        if (ftype == ::apache::thrift::protocol::T_I32) {
-          xfer += iprot->readI32(this->definition_levels_byte_length);
-          isset_definition_levels_byte_length = true;
-        } else {
-          xfer += iprot->skip(ftype);
-        }
-        break;
-      case 6:
-        if (ftype == ::apache::thrift::protocol::T_I32) {
-          xfer += iprot->readI32(this->repetition_levels_byte_length);
-          isset_repetition_levels_byte_length = true;
-        } else {
-          xfer += iprot->skip(ftype);
-        }
-        break;
-      case 7:
-        if (ftype == ::apache::thrift::protocol::T_BOOL) {
-          xfer += iprot->readBool(this->is_compressed);
-          this->__isset.is_compressed = true;
-        } else {
-          xfer += iprot->skip(ftype);
-        }
-        break;
-      case 8:
-        if (ftype == ::apache::thrift::protocol::T_STRUCT) {
-          xfer += this->statistics.read(iprot);
-          this->__isset.statistics = true;
-        } else {
-          xfer += iprot->skip(ftype);
-        }
-        break;
-      default:
-        xfer += iprot->skip(ftype);
-        break;
-    }
-    xfer += iprot->readFieldEnd();
-  }
-
-  xfer += iprot->readStructEnd();
-
-  if (!isset_num_values)
-    throw TProtocolException(TProtocolException::INVALID_DATA);
-  if (!isset_num_nulls)
-    throw TProtocolException(TProtocolException::INVALID_DATA);
-  if (!isset_num_rows)
-    throw TProtocolException(TProtocolException::INVALID_DATA);
-  if (!isset_encoding)
-    throw TProtocolException(TProtocolException::INVALID_DATA);
-  if (!isset_definition_levels_byte_length)
-    throw TProtocolException(TProtocolException::INVALID_DATA);
-  if (!isset_repetition_levels_byte_length)
-    throw TProtocolException(TProtocolException::INVALID_DATA);
-  return xfer;
-}
-
-uint32_t DataPageHeaderV2::write(::apache::thrift::protocol::TProtocol* oprot) const {
-  uint32_t xfer = 0;
-  uint32_t fcnt = 0;
-  xfer += oprot->writeStructBegin("DataPageHeaderV2");
-
-  ++fcnt;
-  xfer += oprot->writeFieldBegin("num_values", ::apache::thrift::protocol::T_I32, 1);
-  xfer += oprot->writeI32(this->num_values);
-  xfer += oprot->writeFieldEnd();
-
-  ++fcnt;
-  xfer += oprot->writeFieldBegin("num_nulls", ::apache::thrift::protocol::T_I32, 2);
-  xfer += oprot->writeI32(this->num_nulls);
-  xfer += oprot->writeFieldEnd();
-
-  ++fcnt;
-  xfer += oprot->writeFieldBegin("num_rows", ::apache::thrift::protocol::T_I32, 3);
-  xfer += oprot->writeI32(this->num_rows);
-  xfer += oprot->writeFieldEnd();
-
-  ++fcnt;
-  xfer += oprot->writeFieldBegin("encoding", ::apache::thrift::protocol::T_I32, 4);
-  xfer += oprot->writeI32((int32_t)this->encoding);
-  xfer += oprot->writeFieldEnd();
-
-  ++fcnt;
-  xfer += oprot->writeFieldBegin("definition_levels_byte_length", ::apache::thrift::protocol::T_I32, 5);
-  xfer += oprot->writeI32(this->definition_levels_byte_length);
-  xfer += oprot->writeFieldEnd();
-
-  ++fcnt;
-  xfer += oprot->writeFieldBegin("repetition_levels_byte_length", ::apache::thrift::protocol::T_I32, 6);
-  xfer += oprot->writeI32(this->repetition_levels_byte_length);
-  xfer += oprot->writeFieldEnd();
-
-  if (this->__isset.is_compressed) {
-    ++fcnt;
-    xfer += oprot->writeFieldBegin("is_compressed", ::apache::thrift::protocol::T_BOOL, 7);
-    xfer += oprot->writeBool(this->is_compressed);
-    xfer += oprot->writeFieldEnd();
-  }
-  if (this->__isset.statistics) {
-    ++fcnt;
-    xfer += oprot->writeFieldBegin("statistics", ::apache::thrift::protocol::T_STRUCT, 8);
-    xfer += this->statistics.write(oprot);
-    xfer += oprot->writeFieldEnd();
-  }
-  xfer += oprot->writeFieldStop();
-  xfer += oprot->writeStructEnd();
-  return xfer;
-}
-
-void swap(DataPageHeaderV2 &a, DataPageHeaderV2 &b) {
-  using ::std::swap;
-  swap(a.num_values, b.num_values);
-  swap(a.num_nulls, b.num_nulls);
-  swap(a.num_rows, b.num_rows);
-  swap(a.encoding, b.encoding);
-  swap(a.definition_levels_byte_length, b.definition_levels_byte_length);
-  swap(a.repetition_levels_byte_length, b.repetition_levels_byte_length);
-  swap(a.is_compressed, b.is_compressed);
-  swap(a.statistics, b.statistics);
-  swap(a.__isset, b.__isset);
-}
-
-const char* PageHeader::ascii_fingerprint = "B5BD2BDF3756C883A58B30B9C9F204A0";
-const uint8_t PageHeader::binary_fingerprint[16] = {0xB5,0xBD,0x2B,0xDF,0x37,0x56,0xC8,0x83,0xA5,0x8B,0x30,0xB9,0xC9,0xF2,0x04,0xA0};
-
-uint32_t PageHeader::read(::apache::thrift::protocol::TProtocol* iprot) {
-
-  uint32_t xfer = 0;
-  std::string fname;
-  ::apache::thrift::protocol::TType ftype;
-  int16_t fid;
-
-  xfer += iprot->readStructBegin(fname);
-
-  using ::apache::thrift::protocol::TProtocolException;
-
-  bool isset_type = false;
-  bool isset_uncompressed_page_size = false;
-  bool isset_compressed_page_size = false;
-
-  while (true)
-  {
-    xfer += iprot->readFieldBegin(fname, ftype, fid);
-    if (ftype == ::apache::thrift::protocol::T_STOP) {
-      break;
-    }
-    switch (fid)
-    {
-      case 1:
-        if (ftype == ::apache::thrift::protocol::T_I32) {
-          int32_t ecast8;
-          xfer += iprot->readI32(ecast8);
-          this->type = (PageType::type)ecast8;
-          isset_type = true;
-        } else {
-          xfer += iprot->skip(ftype);
-        }
-        break;
-      case 2:
-        if (ftype == ::apache::thrift::protocol::T_I32) {
-          xfer += iprot->readI32(this->uncompressed_page_size);
-          isset_uncompressed_page_size = true;
-        } else {
-          xfer += iprot->skip(ftype);
-        }
-        break;
-      case 3:
-        if (ftype == ::apache::thrift::protocol::T_I32) {
-          xfer += iprot->readI32(this->compressed_page_size);
-          isset_compressed_page_size = true;
-        } else {
-          xfer += iprot->skip(ftype);
-        }
-        break;
-      case 4:
-        if (ftype == ::apache::thrift::protocol::T_I32) {
-          xfer += iprot->readI32(this->crc);
-          this->__isset.crc = true;
-        } else {
-          xfer += iprot->skip(ftype);
-        }
-        break;
-      case 5:
-        if (ftype == ::apache::thrift::protocol::T_STRUCT) {
-          xfer += this->data_page_header.read(iprot);
-          this->__isset.data_page_header = true;
-        } else {
-          xfer += iprot->skip(ftype);
-        }
-        break;
-      case 6:
-        if (ftype == ::apache::thrift::protocol::T_STRUCT) {
-          xfer += this->index_page_header.read(iprot);
-          this->__isset.index_page_header = true;
-        } else {
-          xfer += iprot->skip(ftype);
-        }
-        break;
-      case 7:
-        if (ftype == ::apache::thrift::protocol::T_STRUCT) {
-          xfer += this->dictionary_page_header.read(iprot);
-          this->__isset.dictionary_page_header = true;
-        } else {
-          xfer += iprot->skip(ftype);
-        }
-        break;
-      case 8:
-        if (ftype == ::apache::thrift::protocol::T_STRUCT) {
-          xfer += this->data_page_header_v2.read(iprot);
-          this->__isset.data_page_header_v2 = true;
-        } else {
-          xfer += iprot->skip(ftype);
-        }
-        break;
-      default:
-        xfer += iprot->skip(ftype);
-        break;
-    }
-    xfer += iprot->readFieldEnd();
-  }
-
-  xfer += iprot->readStructEnd();
-
-  if (!isset_type)
-    throw TProtocolException(TProtocolException::INVALID_DATA);
-  if (!isset_uncompressed_page_size)
-    throw TProtocolException(TProtocolException::INVALID_DATA);
-  if (!isset_compressed_page_size)
-    throw TProtocolException(TProtocolException::INVALID_DATA);
-  return xfer;
-}
-
-uint32_t PageHeader::write(::apache::thrift::protocol::TProtocol* oprot) const {
-  uint32_t xfer = 0;
-  uint32_t fcnt = 0;
-  xfer += oprot->writeStructBegin("PageHeader");
-
-  ++fcnt;
-  xfer += oprot->writeFieldBegin("type", ::apache::thrift::protocol::T_I32, 1);
-  xfer += oprot->writeI32((int32_t)this->type);
-  xfer += oprot->writeFieldEnd();
-
-  ++fcnt;
-  xfer += oprot->writeFieldBegin("uncompressed_page_size", ::apache::thrift::protocol::T_I32, 2);
-  xfer += oprot->writeI32(this->uncompressed_page_size);
-  xfer += oprot->writeFieldEnd();
-
-  ++fcnt;
-  xfer += oprot->writeFieldBegin("compressed_page_size", ::apache::thrift::protocol::T_I32, 3);
-  xfer += oprot->writeI32(this->compressed_page_size);
-  xfer += oprot->writeFieldEnd();
-
-  if (this->__isset.crc) {
-    ++fcnt;
-    xfer += oprot->writeFieldBegin("crc", ::apache::thrift::protocol::T_I32, 4);
-    xfer += oprot->writeI32(this->crc);
-    xfer += oprot->writeFieldEnd();
-  }
-  if (this->__isset.data_page_header) {
-    ++fcnt;
-    xfer += oprot->writeFieldBegin("data_page_header", ::apache::thrift::protocol::T_STRUCT, 5);
-    xfer += this->data_page_header.write(oprot);
-    xfer += oprot->writeFieldEnd();
-  }
-  if (this->__isset.index_page_header) {
-    ++fcnt;
-    xfer += oprot->writeFieldBegin("index_page_header", ::apache::thrift::protocol::T_STRUCT, 6);
-    xfer += this->index_page_header.write(oprot);
-    xfer += oprot->writeFieldEnd();
-  }
-  if (this->__isset.dictionary_page_header) {
-    ++fcnt;
-    xfer += oprot->writeFieldBegin("dictionary_page_header", ::apache::thrift::protocol::T_STRUCT, 7);
-    xfer += this->dictionary_page_header.write(oprot);
-    xfer += oprot->writeFieldEnd();
-  }
-  if (this->__isset.data_page_header_v2) {
-    ++fcnt;
-    xfer += oprot->writeFieldBegin("data_page_header_v2", ::apache::thrift::protocol::T_STRUCT, 8);
-    xfer += this->data_page_header_v2.write(oprot);
-    xfer += oprot->writeFieldEnd();
-  }
-  xfer += oprot->writeFieldStop();
-  xfer += oprot->writeStructEnd();
-  return xfer;
-}
-
-void swap(PageHeader &a, PageHeader &b) {
-  using ::std::swap;
-  swap(a.type, b.type);
-  swap(a.uncompressed_page_size, b.uncompressed_page_size);
-  swap(a.compressed_page_size, b.compressed_page_size);
-  swap(a.crc, b.crc);
-  swap(a.data_page_header, b.data_page_header);
-  swap(a.index_page_header, b.index_page_header);
-  swap(a.dictionary_page_header, b.dictionary_page_header);
-  swap(a.data_page_header_v2, b.data_page_header_v2);
-  swap(a.__isset, b.__isset);
-}
-
-const char* KeyValue::ascii_fingerprint = "5B708A954C550ECA9C1A49D3C5CAFAB9";
-const uint8_t KeyValue::binary_fingerprint[16] = {0x5B,0x70,0x8A,0x95,0x4C,0x55,0x0E,0xCA,0x9C,0x1A,0x49,0xD3,0xC5,0xCA,0xFA,0xB9};
-
-uint32_t KeyValue::read(::apache::thrift::protocol::TProtocol* iprot) {
-
-  uint32_t xfer = 0;
-  std::string fname;
-  ::apache::thrift::protocol::TType ftype;
-  int16_t fid;
-
-  xfer += iprot->readStructBegin(fname);
-
-  using ::apache::thrift::protocol::TProtocolException;
-
-  bool isset_key = false;
-
-  while (true)
-  {
-    xfer += iprot->readFieldBegin(fname, ftype, fid);
-    if (ftype == ::apache::thrift::protocol::T_STOP) {
-      break;
-    }
-    switch (fid)
-    {
-      case 1:
-        if (ftype == ::apache::thrift::protocol::T_STRING) {
-          xfer += iprot->readString(this->key);
-          isset_key = true;
-        } else {
-          xfer += iprot->skip(ftype);
-        }
-        break;
-      case 2:
-        if (ftype == ::apache::thrift::protocol::T_STRING) {
-          xfer += iprot->readString(this->value);
-          this->__isset.value = true;
-        } else {
-          xfer += iprot->skip(ftype);
-        }
-        break;
-      default:
-        xfer += iprot->skip(ftype);
-        break;
-    }
-    xfer += iprot->readFieldEnd();
-  }
-
-  xfer += iprot->readStructEnd();
-
-  if (!isset_key)
-    throw TProtocolException(TProtocolException::INVALID_DATA);
-  return xfer;
-}
-
-uint32_t KeyValue::write(::apache::thrift::protocol::TProtocol* oprot) const {
-  uint32_t xfer = 0;
-  uint32_t fcnt = 0;
-  xfer += oprot->writeStructBegin("KeyValue");
-
-  ++fcnt;
-  xfer += oprot->writeFieldBegin("key", ::apache::thrift::protocol::T_STRING, 1);
-  xfer += oprot->writeString(this->key);
-  xfer += oprot->writeFieldEnd();
-
-  if (this->__isset.value) {
-    ++fcnt;
-    xfer += oprot->writeFieldBegin("value", ::apache::thrift::protocol::T_STRING, 2);
-    xfer += oprot->writeString(this->value);
-    xfer += oprot->writeFieldEnd();
-  }
-  xfer += oprot->writeFieldStop();
-  xfer += oprot->writeStructEnd();
-  return xfer;
-}
-
-void swap(KeyValue &a, KeyValue &b) {
-  using ::std::swap;
-  swap(a.key, b.key);
-  swap(a.value, b.value);
-  swap(a.__isset, b.__isset);
-}
-
-const char* SortingColumn::ascii_fingerprint = "F079C2D58A783AD90F9BE05D10DBBC6F";
-const uint8_t SortingColumn::binary_fingerprint[16] = {0xF0,0x79,0xC2,0xD5,0x8A,0x78,0x3A,0xD9,0x0F,0x9B,0xE0,0x5D,0x10,0xDB,0xBC,0x6F};
-
-uint32_t SortingColumn::read(::apache::thrift::protocol::TProtocol* iprot) {
-
-  uint32_t xfer = 0;
-  std::string fname;
-  ::apache::thrift::protocol::TType ftype;
-  int16_t fid;
-
-  xfer += iprot->readStructBegin(fname);
-
-  using ::apache::thrift::protocol::TProtocolException;
-
-  bool isset_column_idx = false;
-  bool isset_descending = false;
-  bool isset_nulls_first = false;
-
-  while (true)
-  {
-    xfer += iprot->readFieldBegin(fname, ftype, fid);
-    if (ftype == ::apache::thrift::protocol::T_STOP) {
-      break;
-    }
-    switch (fid)
-    {
-      case 1:
-        if (ftype == ::apache::thrift::protocol::T_I32) {
-          xfer += iprot->readI32(this->column_idx);
-          isset_column_idx = true;
-        } else {
-          xfer += iprot->skip(ftype);
-        }
-        break;
-      case 2:
-        if (ftype == ::apache::thrift::protocol::T_BOOL) {
-          xfer += iprot->readBool(this->descending);
-          isset_descending = true;
-        } else {
-          xfer += iprot->skip(ftype);
-        }
-        break;
-      case 3:
-        if (ftype == ::apache::thrift::protocol::T_BOOL) {
-          xfer += iprot->readBool(this->nulls_first);
-          isset_nulls_first = true;
-        } else {
-          xfer += iprot->skip(ftype);
-        }
-        break;
-      default:
-        xfer += iprot->skip(ftype);
-        break;
-    }
-    xfer += iprot->readFieldEnd();
-  }
-
-  xfer += iprot->readStructEnd();
-
-  if (!isset_column_idx)
-    throw TProtocolException(TProtocolException::INVALID_DATA);
-  if (!isset_descending)
-    throw TProtocolException(TProtocolException::INVALID_DATA);
-  if (!isset_nulls_first)
-    throw TProtocolException(TProtocolException::INVALID_DATA);
-  return xfer;
-}
-
-uint32_t SortingColumn::write(::apache::thrift::protocol::TProtocol* oprot) const {
-  uint32_t xfer = 0;
-  uint32_t fcnt = 0;
-  xfer += oprot->writeStructBegin("SortingColumn");
-
-  ++fcnt;
-  xfer += oprot->writeFieldBegin("column_idx", ::apache::thrift::protocol::T_I32, 1);
-  xfer += oprot->writeI32(this->column_idx);
-  xfer += oprot->writeFieldEnd();
-
-  ++fcnt;
-  xfer += oprot->writeFieldBegin("descending", ::apache::thrift::protocol::T_BOOL, 2);
-  xfer += oprot->writeBool(this->descending);
-  xfer += oprot->writeFieldEnd();
-
-  ++fcnt;
-  xfer += oprot->writeFieldBegin("nulls_first", ::apache::thrift::protocol::T_BOOL, 3);
-  xfer += oprot->writeBool(this->nulls_first);
-  xfer += oprot->writeFieldEnd();
-
-  xfer += oprot->writeFieldStop();
-  xfer += oprot->writeStructEnd();
-  return xfer;
-}
-
-void swap(SortingColumn &a, SortingColumn &b) {
-  using ::std::swap;
-  swap(a.column_idx, b.column_idx);
-  swap(a.descending, b.descending);
-  swap(a.nulls_first, b.nulls_first);
-}
-
-const char* ColumnMetaData::ascii_fingerprint = "1AF797732BCB4465C6314FB29B86638D";
-const uint8_t ColumnMetaData::binary_fingerprint[16] = {0x1A,0xF7,0x97,0x73,0x2B,0xCB,0x44,0x65,0xC6,0x31,0x4F,0xB2,0x9B,0x86,0x63,0x8D};
-
-uint32_t ColumnMetaData::read(::apache::thrift::protocol::TProtocol* iprot) {
-
-  uint32_t xfer = 0;
-  std::string fname;
-  ::apache::thrift::protocol::TType ftype;
-  int16_t fid;
-
-  xfer += iprot->readStructBegin(fname);
-
-  using ::apache::thrift::protocol::TProtocolException;
-
-  bool isset_type = false;
-  bool isset_encodings = false;
-  bool isset_path_in_schema = false;
-  bool isset_codec = false;
-  bool isset_num_values = false;
-  bool isset_total_uncompressed_size = false;
-  bool isset_total_compressed_size = false;
-  bool isset_data_page_offset = false;
-
-  while (true)
-  {
-    xfer += iprot->readFieldBegin(fname, ftype, fid);
-    if (ftype == ::apache::thrift::protocol::T_STOP) {
-      break;
-    }
-    switch (fid)
-    {
-      case 1:
-        if (ftype == ::apache::thrift::protocol::T_I32) {
-          int32_t ecast9;
-          xfer += iprot->readI32(ecast9);
-          this->type = (Type::type)ecast9;
-          isset_type = true;
-        } else {
-          xfer += iprot->skip(ftype);
-        }
-        break;
-      case 2:
-        if (ftype == ::apache::thrift::protocol::T_LIST) {
-          {
-            this->encodings.clear();
-            uint32_t _size10;
-            ::apache::thrift::protocol::TType _etype13;
-            xfer += iprot->readListBegin(_etype13, _size10);
-            this->encodings.resize(_size10);
-            uint32_t _i14;
-            for (_i14 = 0; _i14 < _size10; ++_i14)
-            {
-              int32_t ecast15;
-              xfer += iprot->readI32(ecast15);
-              this->encodings[_i14] = (Encoding::type)ecast15;
-            }
-            xfer += iprot->readListEnd();
-          }
-          isset_encodings = true;
-        } else {
-          xfer += iprot->skip(ftype);
-        }
-        break;
-      case 3:
-        if (ftype == ::apache::thrift::protocol::T_LIST) {
-          {
-            this->path_in_schema.clear();
-            uint32_t _size16;
-            ::apache::thrift::protocol::TType _etype19;
-            xfer += iprot->readListBegin(_etype19, _size16);
-            this->path_in_schema.resize(_size16);
-            uint32_t _i20;
-            for (_i20 = 0; _i20 < _size16; ++_i20)
-            {
-              xfer += iprot->readString(this->path_in_schema[_i20]);
-            }
-            xfer += iprot->readListEnd();
-          }
-          isset_path_in_schema = true;
-        } else {
-          xfer += iprot->skip(ftype);
-        }
-        break;
-      case 4:
-        if (ftype == ::apache::thrift::protocol::T_I32) {
-          int32_t ecast21;
-          xfer += iprot->readI32(ecast21);
-          this->codec = (CompressionCodec::type)ecast21;
-          isset_codec = true;
-        } else {
-          xfer += iprot->skip(ftype);
-        }
-        break;
-      case 5:
-        if (ftype == ::apache::thrift::protocol::T_I64) {
-          xfer += iprot->readI64(this->num_values);
-          isset_num_values = true;
-        } else {
-          xfer += iprot->skip(ftype);
-        }
-        break;
-      case 6:
-        if (ftype == ::apache::thrift::protocol::T_I64) {
-          xfer += iprot->readI64(this->total_uncompressed_size);
-          isset_total_uncompressed_size = true;
-        } else {
-          xfer += iprot->skip(ftype);
-        }
-        break;
-      case 7:
-        if (ftype == ::apache::thrift::protocol::T_I64) {
-          xfer += iprot->readI64(this->total_compressed_size);
-          isset_total_compressed_size = true;
-        } else {
-          xfer += iprot->skip(ftype);
-        }
-        break;
-      case 8:
-        if (ftype == ::apache::thrift::protocol::T_LIST) {
-          {
-            this->key_value_metadata.clear();
-            uint32_t _size22;
-            ::apache::thrift::protocol::TType _etype25;
-            xfer += iprot->readListBegin(_etype25, _size22);
-            this->key_value_metadata.resize(_size22);
-            uint32_t _i26;
-            for (_i26 = 0; _i26 < _size22; ++_i26)
-            {
-              xfer += this->key_value_metadata[_i26].read(iprot);
-            }
-            xfer += iprot->readListEnd();
-          }
-          this->__isset.key_value_metadata = true;
-        } else {
-          xfer += iprot->skip(ftype);
-        }
-        break;
-      case 9:
-        if (ftype == ::apache::thrift::protocol::T_I64) {
-          xfer += iprot->readI64(this->data_page_offset);
-          isset_data_page_offset = true;
-        } else {
-          xfer += iprot->skip(ftype);
-        }
-        break;
-      case 10:
-        if (ftype == ::apache::thrift::protocol::T_I64) {
-          xfer += iprot->readI64(this->index_page_offset);
-          this->__isset.index_page_offset = true;
-        } else {
-          xfer += iprot->skip(ftype);
-        }
-        break;
-      case 11:
-        if (ftype == ::apache::thrift::protocol::T_I64) {
-          xfer += iprot->readI64(this->dictionary_page_offset);
-          this->__isset.dictionary_page_offset = true;
-        } else {
-          xfer += iprot->skip(ftype);
-        }
-        break;
-      case 12:
-        if (ftype == ::apache::thrift::protocol::T_STRUCT) {
-          xfer += this->statistics.read(iprot);
-          this->__isset.statistics = true;
-        } else {
-          xfer += iprot->skip(ftype);
-        }
-        break;
-      default:
-        xfer += iprot->skip(ftype);
-        break;
-    }
-    xfer += iprot->readFieldEnd();
-  }
-
-  xfer += iprot->readStructEnd();
-
-  if (!isset_type)
-    throw TProtocolException(TProtocolException::INVALID_DATA);
-  if (!isset_encodings)
-    throw TProtocolException(TProtocolException::INVALID_DATA);
-  if (!isset_path_in_schema)
-    throw TProtocolException(TProtocolException::INVALID_DATA);
-  if (!isset_codec)
-    throw TProtocolException(TProtocolException::INVALID_DATA);
-  if (!isset_num_values)
-    throw TProtocolException(TProtocolException::INVALID_DATA);
-  if (!isset_total_uncompressed_size)
-    throw TProtocolException(TProtocolException::INVALID_DATA);
-  if (!isset_total_compressed_size)
-    throw TProtocolException(TProtocolException::INVALID_DATA);
-  if (!isset_data_page_offset)
-    throw TProtocolException(TProtocolException::INVALID_DATA);
-  return xfer;
-}
-
-uint32_t ColumnMetaData::write(::apache::thrift::protocol::TProtocol* oprot) const {
-  uint32_t xfer = 0;
-  uint32_t fcnt = 0;
-  xfer += oprot->writeStructBegin("ColumnMetaData");
-
-  ++fcnt;
-  xfer += oprot->writeFieldBegin("type", ::apache::thrift::protocol::T_I32, 1);
-  xfer += oprot->writeI32((int32_t)this->type);
-  xfer += oprot->writeFieldEnd();
-
-  ++fcnt;
-  xfer += oprot->writeFieldBegin("encodings", ::apache::thrift::protocol::T_LIST, 2);
-  {
-    xfer += oprot->writeListBegin(::apache::thrift::protocol::T_I32, static_cast<uint32_t>(this->encodings.size()));
-    std::vector<Encoding::type> ::const_iterator _iter27;
-    for (_iter27 = this->encodings.begin(); _iter27 != this->encodings.end(); ++_iter27)
-    {
-      xfer += oprot->writeI32((int32_t)(*_iter27));
-    }
-    xfer += oprot->writeListEnd();
-  }
-  xfer += oprot->writeFieldEnd();
-
-  ++fcnt;
-  xfer += oprot->writeFieldBegin("path_in_schema", ::apache::thrift::protocol::T_LIST, 3);
-  {
-    xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRING, static_cast<uint32_t>(this->path_in_schema.size()));
-    std::vector<std::string> ::const_iterator _iter28;
-    for (_iter28 = this->path_in_schema.begin(); _iter28 != this->path_in_schema.end(); ++_iter28)
-    {
-      xfer += oprot->writeString((*_iter28));
-    }
-    xfer += oprot->writeListEnd();
-  }
-  xfer += oprot->writeFieldEnd();
-
-  ++fcnt;
-  xfer += oprot->writeFieldBegin("codec", ::apache::thrift::protocol::T_I32, 4);
-  xfer += oprot->writeI32((int32_t)this->codec);
-  xfer += oprot->writeFieldEnd();
-
-  ++fcnt;
-  xfer += oprot->writeFieldBegin("num_values", ::apache::thrift::protocol::T_I64, 5);
-  xfer += oprot->writeI64(this->num_values);
-  xfer += oprot->writeFieldEnd();
-
-  ++fcnt;
-  xfer += oprot->writeFieldBegin("total_uncompressed_size", ::apache::thrift::protocol::T_I64, 6);
-  xfer += oprot->writeI64(this->total_uncompressed_size);
-  xfer += oprot->writeFieldEnd();
-
-  ++fcnt;
-  xfer += oprot->writeFieldBegin("total_compressed_size", ::apache::thrift::protocol::T_I64, 7);
-  xfer += oprot->writeI64(this->total_compressed_size);
-  xfer += oprot->writeFieldEnd();
-
-  if (this->__isset.key_value_metadata) {
-    ++fcnt;
-    xfer += oprot->writeFieldBegin("key_value_metadata", ::apache::thrift::protocol::T_LIST, 8);
-    {
-      xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast<uint32_t>(this->key_value_metadata.size()));
-      std::vector<KeyValue> ::const_iterator _iter29;
-      for (_iter29 = this->key_value_metadata.begin(); _iter29 != this->key_value_metadata.end(); ++_iter29)
-      {
-        xfer += (*_iter29).write(oprot);
-      }
-      xfer += oprot->writeListEnd();
-    }
-    xfer += oprot->writeFieldEnd();
-  }
-  ++fcnt;
-  xfer += oprot->writeFieldBegin("data_page_offset", ::apache::thrift::protocol::T_I64, 9);
-  xfer += oprot->writeI64(this->data_page_offset);
-  xfer += oprot->writeFieldEnd();
-
-  if (this->__isset.index_page_offset) {
-    ++fcnt;
-    xfer += oprot->writeFieldBegin("index_page_offset", ::apache::thrift::protocol::T_I64, 10);
-    xfer += oprot->writeI64(this->index_page_offset);
-    xfer += oprot->writeFieldEnd();
-  }
-  if (this->__isset.dictionary_page_offset) {
-    ++fcnt;
-    xfer += oprot->writeFieldBegin("dictionary_page_offset", ::apache::thrift::protocol::T_I64, 11);
-    xfer += oprot->writeI64(this->dictionary_page_offset);
-    xfer += oprot->writeFieldEnd();
-  }
-  if (this->__isset.statistics) {
-    ++fcnt;
-    xfer += oprot->writeFieldBegin("statistics", ::apache::thrift::protocol::T_STRUCT, 12);
-    xfer += this->statistics.write(oprot);
-    xfer += oprot->writeFieldEnd();
-  }
-  xfer += oprot->writeFieldStop();
-  xfer += oprot->writeStructEnd();
-  return xfer;
-}
-
-void swap(ColumnMetaData &a, ColumnMetaData &b) {
-  using ::std::swap;
-  swap(a.type, b.type);
-  swap(a.encodings, b.encodings);
-  swap(a.path_in_schema, b.path_in_schema);
-  swap(a.codec, b.codec);
-  swap(a.num_values, b.num_values);
-  swap(a.total_uncompressed_size, b.total_uncompressed_size);
-  swap(a.total_compressed_size, b.total_compressed_size);
-  swap(a.key_value_metadata, b.key_value_metadata);
-  swap(a.data_page_offset, b.data_page_offset);
-  swap(a.index_page_offset, b.index_page_offset);
-  swap(a.dictionary_page_offset, b.dictionary_page_offset);
-  swap(a.statistics, b.statistics);
-  swap(a.__isset, b.__isset);
-}
-
-const char* ColumnChunk::ascii_fingerprint = "169FC47057EF3D82E2FACDDEC2641AE8";
-const uint8_t ColumnChunk::binary_fingerprint[16] = {0x16,0x9F,0xC4,0x70,0x57,0xEF,0x3D,0x82,0xE2,0xFA,0xCD,0xDE,0xC2,0x64,0x1A,0xE8};
-
-uint32_t ColumnChunk::read(::apache::thrift::protocol::TProtocol* iprot) {
-
-  uint32_t xfer = 0;
-  std::string fname;
-  ::apache::thrift::protocol::TType ftype;
-  int16_t fid;
-
-  xfer += iprot->readStructBegin(fname);
-
-  using ::apache::thrift::protocol::TProtocolException;
-
-  bool isset_file_offset = false;
-
-  while (true)
-  {
-    xfer += iprot->readFieldBegin(fname, ftype, fid);
-    if (ftype == ::apache::thrift::protocol::T_STOP) {
-      break;
-    }
-    switch (fid)
-    {
-      case 1:
-        if (ftype == ::apache::thrift::protocol::T_STRING) {
-          xfer += iprot->readString(this->file_path);
-          this->__isset.file_path = true;
-        } else {
-          xfer += iprot->skip(ftype);
-        }
-        break;
-      case 2:
-        if (ftype == ::apache::thrift::protocol::T_I64) {
-          xfer += iprot->readI64(this->file_offset);
-          isset_file_offset = true;
-        } else {
-          xfer += iprot->skip(ftype);
-        }
-        break;
-      case 3:
-        if (ftype == ::apache::thrift::protocol::T_STRUCT) {
-          xfer += this->meta_data.read(iprot);
-          this->__isset.meta_data = true;
-        } else {
-          xfer += iprot->skip(ftype);
-        }
-        break;
-      default:
-        xfer += iprot->skip(ftype);
-        break;
-    }
-    xfer += iprot->readFieldEnd();
-  }
-
-  xfer += iprot->readStructEnd();
-
-  if (!isset_file_offset)
-    throw TProtocolException(TProtocolException::INVALID_DATA);
-  return xfer;
-}
-
-uint32_t ColumnChunk::write(::apache::thrift::protocol::TProtocol* oprot) const {
-  uint32_t xfer = 0;
-  uint32_t fcnt = 0;
-  xfer += oprot->writeStructBegin("ColumnChunk");
-
-  if (this->__isset.file_path) {
-    ++fcnt;
-    xfer += oprot->writeFieldBegin("file_path", ::apache::thrift::protocol::T_STRING, 1);
-    xfer += oprot->writeString(this->file_path);
-    xfer += oprot->writeFieldEnd();
-  }
-  ++fcnt;
-  xfer += oprot->writeFieldBegin("file_offset", ::apache::thrift::protocol::T_I64, 2);
-  xfer += oprot->writeI64(this->file_offset);
-  xfer += oprot->writeFieldEnd();
-
-  if (this->__isset.meta_data) {
-    ++fcnt;
-    xfer += oprot->writeFieldBegin("meta_data", ::apache::thrift::protocol::T_STRUCT, 3);
-    xfer += this->meta_data.write(oprot);
-    xfer += oprot->writeFieldEnd();
-  }
-  xfer += oprot->writeFieldStop();
-  xfer += oprot->writeStructEnd();
-  return xfer;
-}
-
-void swap(ColumnChunk &a, ColumnChunk &b) {
-  using ::std::swap;
-  swap(a.file_path, b.file_path);
-  swap(a.file_offset, b.file_offset);
-  swap(a.meta_data, b.meta_data);
-  swap(a.__isset, b.__isset);
-}
-
-const char* RowGroup::ascii_fingerprint = "DC7968627FA826DDC4C6C9BE773586C9";
-const uint8_t RowGroup::binary_fingerprint[16] = {0xDC,0x79,0x68,0x62,0x7F,0xA8,0x26,0xDD,0xC4,0xC6,0xC9,0xBE,0x77,0x35,0x86,0xC9};
-
-uint32_t RowGroup::read(::apache::thrift::protocol::TProtocol* iprot) {
-
-  uint32_t xfer = 0;
-  std::string fname;
-  ::apache::thrift::protocol::TType ftype;
-  int16_t fid;
-
-  xfer += iprot->readStructBegin(fname);
-
-  using ::apache::thrift::protocol::TProtocolException;
-
-  bool isset_columns = false;
-  bool isset_total_byte_size = false;
-  bool isset_num_rows = false;
-
-  while (true)
-  {
-    xfer += iprot->readFieldBegin(fname, ftype, fid);
-    if (ftype == ::apache::thrift::protocol::T_STOP) {
-      break;
-    }
-    switch (fid)
-    {
-      case 1:
-        if (ftype == ::apache::thrift::protocol::T_LIST) {
-          {
-            this->columns.clear();
-            uint32_t _size30;
-            ::apache::thrift::protocol::TType _etype33;
-            xfer += iprot->readListBegin(_etype33, _size30);
-            this->columns.resize(_size30);
-            uint32_t _i34;
-            for (_i34 = 0; _i34 < _size30; ++_i34)
-            {
-              xfer += this->columns[_i34].read(iprot);
-            }
-            xfer += iprot->readListEnd();
-          }
-          isset_columns = true;
-        } else {
-          xfer += iprot->skip(ftype);
-        }
-        break;
-      case 2:
-        if (ftype == ::apache::thrift::protocol::T_I64) {
-          xfer += iprot->readI64(this->total_byte_size);
-          isset_total_byte_size = true;
-        } else {
-          xfer += iprot->skip(ftype);
-        }
-        break;
-      case 3:
-        if (ftype == ::apache::thrift::protocol::T_I64) {
-          xfer += iprot->readI64(this->num_rows);
-          isset_num_rows = true;
-        } else {
-          xfer += iprot->skip(ftype);
-        }
-        break;
-      case 4:
-        if (ftype == ::apache::thrift::protocol::T_LIST) {
-          {
-            this->sorting_columns.clear();
-            uint32_t _size35;
-            ::apache::thrift::protocol::TType _etype38;
-            xfer += iprot->readListBegin(_etype38, _size35);
-            this->sorting_columns.resize(_size35);
-            uint32_t _i39;
-            for (_i39 = 0; _i39 < _size35; ++_i39)
-            {
-              xfer += this->sorting_columns[_i39].read(iprot);
-            }
-            xfer += iprot->readListEnd();
-          }
-          this->__isset.sorting_columns = true;
-        } else {
-          xfer += iprot->skip(ftype);
-        }
-        break;
-      default:
-        xfer += iprot->skip(ftype);
-        break;
-    }
-    xfer += iprot->readFieldEnd();
-  }
-
-  xfer += iprot->readStructEnd();
-
-  if (!isset_columns)
-    throw TProtocolException(TProtocolException::INVALID_DATA);
-  if (!isset_total_byte_size)
-    throw TProtocolException(TProtocolException::INVALID_DATA);
-  if (!isset_num_rows)
-    throw TProtocolException(TProtocolException::INVALID_DATA);
-  return xfer;
-}
-
-uint32_t RowGroup::write(::apache::thrift::protocol::TProtocol* oprot) const {
-  uint32_t xfer = 0;
-  uint32_t fcnt = 0;
-  xfer += oprot->writeStructBegin("RowGroup");
-
-  ++fcnt;
-  xfer += oprot->writeFieldBegin("columns", ::apache::thrift::protocol::T_LIST, 1);
-  {
-    xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast<uint32_t>(this->columns.size()));
-    std::vector<ColumnChunk> ::const_iterator _iter40;
-    for (_iter40 = this->columns.begin(); _iter40 != this->columns.end(); ++_iter40)
-    {
-      xfer += (*_iter40).write(oprot);
-    }
-    xfer += oprot->writeListEnd();
-  }
-  xfer += oprot->writeFieldEnd();
-
-  ++fcnt;
-  xfer += oprot->writeFieldBegin("total_byte_size", ::apache::thrift::protocol::T_I64, 2);
-  xfer += oprot->writeI64(this->total_byte_size);
-  xfer += oprot->writeFieldEnd();
-
-  ++fcnt;
-  xfer += oprot->writeFieldBegin("num_rows", ::apache::thrift::protocol::T_I64, 3);
-  xfer += oprot->writeI64(this->num_rows);
-  xfer += oprot->writeFieldEnd();
-
-  if (this->__isset.sorting_columns) {
-    ++fcnt;
-    xfer += oprot->writeFieldBegin("sorting_columns", ::apache::thrift::protocol::T_LIST, 4);
-    {
-      xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast<uint32_t>(this->sorting_columns.size()));
-      std::vector<SortingColumn> ::const_iterator _iter41;
-      for (_iter41 = this->sorting_columns.begin(); _iter41 != this->sorting_columns.end(); ++_iter41)
-      {
-        xfer += (*_iter41).write(oprot);
-      }
-      xfer += oprot->writeListEnd();
-    }
-    xfer += oprot->writeFieldEnd();
-  }
-  xfer += oprot->writeFieldStop();
-  xfer += oprot->writeStructEnd();
-  return xfer;
-}
-
-void swap(RowGroup &a, RowGroup &b) {
-  using ::std::swap;
-  swap(a.columns, b.columns);
-  swap(a.total_byte_size, b.total_byte_size);
-  swap(a.num_rows, b.num_rows);
-  swap(a.sorting_columns, b.sorting_columns);
-  swap(a.__isset, b.__isset);
-}
-
-const char* FileMetaData::ascii_fingerprint = "44DC7D83A66D54A7B7892A985C4125C9";
-const uint8_t FileMetaData::binary_fingerprint[16] = {0x44,0xDC,0x7D,0x83,0xA6,0x6D,0x54,0xA7,0xB7,0x89,0x2A,0x98,0x5C,0x41,0x25,0xC9};
-
-uint32_t FileMetaData::read(::apache::thrift::protocol::TProtocol* iprot) {
-
-  uint32_t xfer = 0;
-  std::string fname;
-  ::apache::thrift::protocol::TType ftype;
-  int16_t fid;
-
-  xfer += iprot->readStructBegin(fname);
-
-  using ::apache::thrift::protocol::TProtocolException;
-
-  bool isset_version = false;
-  bool isset_schema = false;
-  bool isset_num_rows = false;
-  bool isset_row_groups = false;
-
-  while (true)
-  {
-    xfer += iprot->readFieldBegin(fname, ftype, fid);
-    if (ftype == ::apache::thrift::protocol::T_STOP) {
-      break;
-    }
-    switch (fid)
-    {
-      case 1:
-        if (ftype == ::apache::thrift::protocol::T_I32) {
-          xfer += iprot->readI32(this->version);
-          isset_version = true;
-        } else {
-          xfer += iprot->skip(ftype);
-        }
-        break;
-      case 2:
-        if (ftype == ::apache::thrift::protocol::T_LIST) {
-          {
-            this->schema.clear();
-            uint32_t _size42;
-            ::apache::thrift::protocol::TType _etype45;
-            xfer += iprot->readListBegin(_etype45, _size42);
-            this->schema.resize(_size42);
-            uint32_t _i46;
-            for (_i46 = 0; _i46 < _size42; ++_i46)
-            {
-              xfer += this->schema[_i46].read(iprot);
-            }
-            xfer += iprot->readListEnd();
-          }
-          isset_schema = true;
-        } else {
-          xfer += iprot->skip(ftype);
-        }
-        break;
-      case 3:
-        if (ftype == ::apache::thrift::protocol::T_I64) {
-          xfer += iprot->readI64(this->num_rows);
-          isset_num_rows = true;
-        } else {
-          xfer += iprot->skip(ftype);
-        }
-        break;
-      case 4:
-        if (ftype == ::apache::thrift::protocol::T_LIST) {
-          {
-            this->row_groups.clear();
-            uint32_t _size47;
-            ::apache::thrift::protocol::TType _etype50;
-            xfer += iprot->readListBegin(_etype50, _size47);
-            this->row_groups.resize(_size47);
-            uint32_t _i51;
-            for (_i51 = 0; _i51 < _size47; ++_i51)
-            {
-              xfer += this->row_groups[_i51].read(iprot);
-            }
-            xfer += iprot->readListEnd();
-          }
-          isset_row_groups = true;
-        } else {
-          xfer += iprot->skip(ftype);
-        }
-        break;
-      case 5:
-        if (ftype == ::apache::thrift::protocol::T_LIST) {
-          {
-            this->key_value_metadata.clear();
-            uint32_t _size52;
-            ::apache::thrift::protocol::TType _etype55;
-            xfer += iprot->readListBegin(_etype55, _size52);
-            this->key_value_metadata.resize(_size52);
-            uint32_t _i56;
-            for (_i56 = 0; _i56 < _size52; ++_i56)
-            {
-              xfer += this->key_value_metadata[_i56].read(iprot);
-            }
-            xfer += iprot->readListEnd();
-          }
-          this->__isset.key_value_metadata = true;
-        } else {
-          xfer += iprot->skip(ftype);
-        }
-        break;
-      case 6:
-        if (ftype == ::apache::thrift::protocol::T_STRING) {
-          xfer += iprot->readString(this->created_by);
-          this->__isset.created_by = true;
-        } else {
-          xfer += iprot->skip(ftype);
-        }
-        break;
-      default:
-        xfer += iprot->skip(ftype);
-        break;
-    }
-    xfer += iprot->readFieldEnd();
-  }
-
-  xfer += iprot->readStructEnd();
-
-  if (!isset_version)
-    throw TProtocolException(TProtocolException::INVALID_DATA);
-  if (!isset_schema)
-    throw TProtocolException(TProtocolException::INVALID_DATA);
-  if (!isset_num_rows)
-    throw TProtocolException(TProtocolException::INVALID_DATA);
-  if (!isset_row_groups)
-    throw TProtocolException(TProtocolException::INVALID_DATA);
-  return xfer;
-}
-
-uint32_t FileMetaData::write(::apache::thrift::protocol::TProtocol* oprot) const {
-  uint32_t xfer = 0;
-  uint32_t fcnt = 0;
-  xfer += oprot->writeStructBegin("FileMetaData");
-
-  ++fcnt;
-  xfer += oprot->writeFieldBegin("version", ::apache::thrift::protocol::T_I32, 1);
-  xfer += oprot->writeI32(this->version);
-  xfer += oprot->writeFieldEnd();
-
-  ++fcnt;
-  xfer += oprot->writeFieldBegin("schema", ::apache::thrift::protocol::T_LIST, 2);
-  {
-    xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast<uint32_t>(this->schema.size()));
-    std::vector<SchemaElement> ::const_iterator _iter57;
-    for (_iter57 = this->schema.begin(); _iter57 != this->schema.end(); ++_iter57)
-    {
-      xfer += (*_iter57).write(oprot);
-    }
-    xfer += oprot->writeListEnd();
-  }
-  xfer += oprot->writeFieldEnd();
-
-  ++fcnt;
-  xfer += oprot->writeFieldBegin("num_rows", ::apache::thrift::protocol::T_I64, 3);
-  xfer += oprot->writeI64(this->num_rows);
-  xfer += oprot->writeFieldEnd();
-
-  ++fcnt;
-  xfer += oprot->writeFieldBegin("row_groups", ::apache::thrift::protocol::T_LIST, 4);
-  {
-    xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast<uint32_t>(this->row_groups.size()));
-    std::vector<RowGroup> ::const_iterator _iter58;
-    for (_iter58 = this->row_groups.begin(); _iter58 != this->row_groups.end(); ++_iter58)
-    {
-      xfer += (*_iter58).write(oprot);
-    }
-    xfer += oprot->writeListEnd();
-  }
-  xfer += oprot->writeFieldEnd();
-
-  if (this->__isset.key_value_metadata) {
-    ++fcnt;
-    xfer += oprot->writeFieldBegin("key_value_metadata", ::apache::thrift::protocol::T_LIST, 5);
-    {
-      xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast<uint32_t>(this->key_value_metadata.size()));
-      std::vector<KeyValue> ::const_iterator _iter59;
-      for (_iter59 = this->key_value_metadata.begin(); _iter59 != this->key_value_metadata.end(); ++_iter59)
-      {
-        xfer += (*_iter59).write(oprot);
-      }
-      xfer += oprot->writeListEnd();
-    }
-    xfer += oprot->writeFieldEnd();
-  }
-  if (this->__isset.created_by) {
-    ++fcnt;
-    xfer += oprot->writeFieldBegin("created_by", ::apache::thrift::protocol::T_STRING, 6);
-    xfer += oprot->writeString(this->created_by);
-    xfer += oprot->writeFieldEnd();
-  }
-  xfer += oprot->writeFieldStop();
-  xfer += oprot->writeStructEnd();
-  return xfer;
-}
-
-void swap(FileMetaData &a, FileMetaData &b) {
-  using ::std::swap;
-  swap(a.version, b.version);
-  swap(a.schema, b.schema);
-  swap(a.num_rows, b.num_rows);
-  swap(a.row_groups, b.row_groups);
-  swap(a.key_value_metadata, b.key_value_metadata);
-  swap(a.created_by, b.created_by);
-  swap(a.__isset, b.__isset);
-}
-
-} // namespace

[6/7] parquet-cpp git commit: PARQUET-416: C++11 compilation, code reorg, libparquet and installation targets

Posted by no...@apache.org.

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/build-support/cpplint.py
----------------------------------------------------------------------
diff --git a/build-support/cpplint.py b/build-support/cpplint.py
new file mode 100755
index 0000000..ece520b
--- /dev/null
+++ b/build-support/cpplint.py
@@ -0,0 +1,6323 @@
+#!/usr/bin/env python
+#
+# Copyright (c) 2009 Google Inc. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+#    * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#    * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+#    * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""Does google-lint on c++ files.
+
+The goal of this script is to identify places in the code that *may*
+be in non-compliance with google style.  It does not attempt to fix
+up these problems -- the point is to educate.  It does also not
+attempt to find all problems, or to ensure that everything it does
+find is legitimately a problem.
+
+In particular, we can get very confused by /* and // inside strings!
+We do a small hack, which is to ignore //'s with "'s after them on the
+same line, but it is far from perfect (in either direction).
+"""
+
+import codecs
+import copy
+import getopt
+import math  # for log
+import os
+import re
+import sre_compile
+import string
+import sys
+import unicodedata
+
+
+_USAGE = """
+Syntax: cpplint.py [--verbose=#] [--output=vs7] [--filter=-x,+y,...]
+                   [--counting=total|toplevel|detailed] [--root=subdir]
+                   [--linelength=digits]
+        <file> [file] ...
+
+  The style guidelines this tries to follow are those in
+    http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml
+
+  Every problem is given a confidence score from 1-5, with 5 meaning we are
+  certain of the problem, and 1 meaning it could be a legitimate construct.
+  This will miss some errors, and is not a substitute for a code review.
+
+  To suppress false-positive errors of a certain category, add a
+  'NOLINT(category)' comment to the line.  NOLINT or NOLINT(*)
+  suppresses errors of all categories on that line.
+
+  The files passed in will be linted; at least one file must be provided.
+  Default linted extensions are .cc, .cpp, .cu, .cuh and .h.  Change the
+  extensions with the --extensions flag.
+
+  Flags:
+
+    output=vs7
+      By default, the output is formatted to ease emacs parsing.  Visual Studio
+      compatible output (vs7) may also be used.  Other formats are unsupported.
+
+    verbose=#
+      Specify a number 0-5 to restrict errors to certain verbosity levels.
+
+    filter=-x,+y,...
+      Specify a comma-separated list of category-filters to apply: only
+      error messages whose category names pass the filters will be printed.
+      (Category names are printed with the message and look like
+      "[whitespace/indent]".)  Filters are evaluated left to right.
+      "-FOO" and "FOO" means "do not print categories that start with FOO".
+      "+FOO" means "do print categories that start with FOO".
+
+      Examples: --filter=-whitespace,+whitespace/braces
+                --filter=whitespace,runtime/printf,+runtime/printf_format
+                --filter=-,+build/include_what_you_use
+
+      To see a list of all the categories used in cpplint, pass no arg:
+         --filter=
+
+    counting=total|toplevel|detailed
+      The total number of errors found is always printed. If
+      'toplevel' is provided, then the count of errors in each of
+      the top-level categories like 'build' and 'whitespace' will
+      also be printed. If 'detailed' is provided, then a count
+      is provided for each category like 'build/class'.
+
+    root=subdir
+      The root directory used for deriving header guard CPP variable.
+      By default, the header guard CPP variable is calculated as the relative
+      path to the directory that contains .git, .hg, or .svn.  When this flag
+      is specified, the relative path is calculated from the specified
+      directory. If the specified directory does not exist, this flag is
+      ignored.
+
+      Examples:
+        Assuming that src/.git exists, the header guard CPP variables for
+        src/chrome/browser/ui/browser.h are:
+
+        No flag => CHROME_BROWSER_UI_BROWSER_H_
+        --root=chrome => BROWSER_UI_BROWSER_H_
+        --root=chrome/browser => UI_BROWSER_H_
+
+    linelength=digits
+      This is the allowed line length for the project. The default value is
+      80 characters.
+
+      Examples:
+        --linelength=120
+
+    extensions=extension,extension,...
+      The allowed file extensions that cpplint will check
+
+      Examples:
+        --extensions=hpp,cpp
+
+    cpplint.py supports per-directory configurations specified in CPPLINT.cfg
+    files. CPPLINT.cfg file can contain a number of key=value pairs.
+    Currently the following options are supported:
+
+      set noparent
+      filter=+filter1,-filter2,...
+      exclude_files=regex
+      linelength=80
+
+    "set noparent" option prevents cpplint from traversing directory tree
+    upwards looking for more .cfg files in parent directories. This option
+    is usually placed in the top-level project directory.
+
+    The "filter" option is similar in function to --filter flag. It specifies
+    message filters in addition to the |_DEFAULT_FILTERS| and those specified
+    through --filter command-line flag.
+
+    "exclude_files" allows to specify a regular expression to be matched against
+    a file name. If the expression matches, the file is skipped and not run
+    through liner.
+
+    "linelength" allows to specify the allowed line length for the project.
+
+    CPPLINT.cfg has an effect on files in the same directory and all
+    sub-directories, unless overridden by a nested configuration file.
+
+      Example file:
+        filter=-build/include_order,+build/include_alpha
+        exclude_files=.*\.cc
+
+    The above example disables build/include_order warning and enables
+    build/include_alpha as well as excludes all .cc from being
+    processed by linter, in the current directory (where the .cfg
+    file is located) and all sub-directories.
+"""
+
+# We categorize each error message we print.  Here are the categories.
+# We want an explicit list so we can list them all in cpplint --filter=.
+# If you add a new error message with a new category, add it to the list
+# here!  cpplint_unittest.py should tell you if you forget to do this.
+_ERROR_CATEGORIES = [
+    'build/class',
+    'build/c++11',
+    'build/deprecated',
+    'build/endif_comment',
+    'build/explicit_make_pair',
+    'build/forward_decl',
+    'build/header_guard',
+    'build/include',
+    'build/include_alpha',
+    'build/include_order',
+    'build/include_what_you_use',
+    'build/namespaces',
+    'build/printf_format',
+    'build/storage_class',
+    'legal/copyright',
+    'readability/alt_tokens',
+    'readability/braces',
+    'readability/casting',
+    'readability/check',
+    'readability/constructors',
+    'readability/fn_size',
+    'readability/function',
+    'readability/inheritance',
+    'readability/multiline_comment',
+    'readability/multiline_string',
+    'readability/namespace',
+    'readability/nolint',
+    'readability/nul',
+    'readability/strings',
+    'readability/todo',
+    'readability/utf8',
+    'runtime/arrays',
+    'runtime/casting',
+    'runtime/explicit',
+    'runtime/int',
+    'runtime/init',
+    'runtime/invalid_increment',
+    'runtime/member_string_references',
+    'runtime/memset',
+    'runtime/indentation_namespace',
+    'runtime/operator',
+    'runtime/printf',
+    'runtime/printf_format',
+    'runtime/references',
+    'runtime/string',
+    'runtime/threadsafe_fn',
+    'runtime/vlog',
+    'whitespace/blank_line',
+    'whitespace/braces',
+    'whitespace/comma',
+    'whitespace/comments',
+    'whitespace/empty_conditional_body',
+    'whitespace/empty_loop_body',
+    'whitespace/end_of_line',
+    'whitespace/ending_newline',
+    'whitespace/forcolon',
+    'whitespace/indent',
+    'whitespace/line_length',
+    'whitespace/newline',
+    'whitespace/operators',
+    'whitespace/parens',
+    'whitespace/semicolon',
+    'whitespace/tab',
+    'whitespace/todo',
+    ]
+
+# These error categories are no longer enforced by cpplint, but for backwards-
+# compatibility they may still appear in NOLINT comments.
+_LEGACY_ERROR_CATEGORIES = [
+    'readability/streams',
+    ]
+
+# The default state of the category filter. This is overridden by the --filter=
+# flag. By default all errors are on, so only add here categories that should be
+# off by default (i.e., categories that must be enabled by the --filter= flags).
+# All entries here should start with a '-' or '+', as in the --filter= flag.
+_DEFAULT_FILTERS = ['-build/include_alpha']
+
+# We used to check for high-bit characters, but after much discussion we
+# decided those were OK, as long as they were in UTF-8 and didn't represent
+# hard-coded international strings, which belong in a separate i18n file.
+
+# C++ headers
+_CPP_HEADERS = frozenset([
+    # Legacy
+    'algobase.h',
+    'algo.h',
+    'alloc.h',
+    'builtinbuf.h',
+    'bvector.h',
+    'complex.h',
+    'defalloc.h',
+    'deque.h',
+    'editbuf.h',
+    'fstream.h',
+    'function.h',
+    'hash_map',
+    'hash_map.h',
+    'hash_set',
+    'hash_set.h',
+    'hashtable.h',
+    'heap.h',
+    'indstream.h',
+    'iomanip.h',
+    'iostream.h',
+    'istream.h',
+    'iterator.h',
+    'list.h',
+    'map.h',
+    'multimap.h',
+    'multiset.h',
+    'ostream.h',
+    'pair.h',
+    'parsestream.h',
+    'pfstream.h',
+    'procbuf.h',
+    'pthread_alloc',
+    'pthread_alloc.h',
+    'rope',
+    'rope.h',
+    'ropeimpl.h',
+    'set.h',
+    'slist',
+    'slist.h',
+    'stack.h',
+    'stdiostream.h',
+    'stl_alloc.h',
+    'stl_relops.h',
+    'streambuf.h',
+    'stream.h',
+    'strfile.h',
+    'strstream.h',
+    'tempbuf.h',
+    'tree.h',
+    'type_traits.h',
+    'vector.h',
+    # 17.6.1.2 C++ library headers
+    'algorithm',
+    'array',
+    'atomic',
+    'bitset',
+    'chrono',
+    'codecvt',
+    'complex',
+    'condition_variable',
+    'deque',
+    'exception',
+    'forward_list',
+    'fstream',
+    'functional',
+    'future',
+    'initializer_list',
+    'iomanip',
+    'ios',
+    'iosfwd',
+    'iostream',
+    'istream',
+    'iterator',
+    'limits',
+    'list',
+    'locale',
+    'map',
+    'memory',
+    'mutex',
+    'new',
+    'numeric',
+    'ostream',
+    'queue',
+    'random',
+    'ratio',
+    'regex',
+    'set',
+    'sstream',
+    'stack',
+    'stdexcept',
+    'streambuf',
+    'string',
+    'strstream',
+    'system_error',
+    'thread',
+    'tuple',
+    'typeindex',
+    'typeinfo',
+    'type_traits',
+    'unordered_map',
+    'unordered_set',
+    'utility',
+    'valarray',
+    'vector',
+    # 17.6.1.2 C++ headers for C library facilities
+    'cassert',
+    'ccomplex',
+    'cctype',
+    'cerrno',
+    'cfenv',
+    'cfloat',
+    'cinttypes',
+    'ciso646',
+    'climits',
+    'clocale',
+    'cmath',
+    'csetjmp',
+    'csignal',
+    'cstdalign',
+    'cstdarg',
+    'cstdbool',
+    'cstddef',
+    'cstdint',
+    'cstdio',
+    'cstdlib',
+    'cstring',
+    'ctgmath',
+    'ctime',
+    'cuchar',
+    'cwchar',
+    'cwctype',
+    ])
+
+
+# These headers are excluded from [build/include] and [build/include_order]
+# checks:
+# - Anything not following google file name conventions (containing an
+#   uppercase character, such as Python.h or nsStringAPI.h, for example).
+# - Lua headers.
+_THIRD_PARTY_HEADERS_PATTERN = re.compile(
+    r'^(?:[^/]*[A-Z][^/]*\.h|lua\.h|lauxlib\.h|lualib\.h)$')
+
+
+# Assertion macros.  These are defined in base/logging.h and
+# testing/base/gunit.h.  Note that the _M versions need to come first
+# for substring matching to work.
+_CHECK_MACROS = [
+    'DCHECK', 'CHECK',
+    'EXPECT_TRUE_M', 'EXPECT_TRUE',
+    'ASSERT_TRUE_M', 'ASSERT_TRUE',
+    'EXPECT_FALSE_M', 'EXPECT_FALSE',
+    'ASSERT_FALSE_M', 'ASSERT_FALSE',
+    ]
+
+# Replacement macros for CHECK/DCHECK/EXPECT_TRUE/EXPECT_FALSE
+_CHECK_REPLACEMENT = dict([(m, {}) for m in _CHECK_MACROS])
+
+for op, replacement in [('==', 'EQ'), ('!=', 'NE'),
+                        ('>=', 'GE'), ('>', 'GT'),
+                        ('<=', 'LE'), ('<', 'LT')]:
+  _CHECK_REPLACEMENT['DCHECK'][op] = 'DCHECK_%s' % replacement
+  _CHECK_REPLACEMENT['CHECK'][op] = 'CHECK_%s' % replacement
+  _CHECK_REPLACEMENT['EXPECT_TRUE'][op] = 'EXPECT_%s' % replacement
+  _CHECK_REPLACEMENT['ASSERT_TRUE'][op] = 'ASSERT_%s' % replacement
+  _CHECK_REPLACEMENT['EXPECT_TRUE_M'][op] = 'EXPECT_%s_M' % replacement
+  _CHECK_REPLACEMENT['ASSERT_TRUE_M'][op] = 'ASSERT_%s_M' % replacement
+
+for op, inv_replacement in [('==', 'NE'), ('!=', 'EQ'),
+                            ('>=', 'LT'), ('>', 'LE'),
+                            ('<=', 'GT'), ('<', 'GE')]:
+  _CHECK_REPLACEMENT['EXPECT_FALSE'][op] = 'EXPECT_%s' % inv_replacement
+  _CHECK_REPLACEMENT['ASSERT_FALSE'][op] = 'ASSERT_%s' % inv_replacement
+  _CHECK_REPLACEMENT['EXPECT_FALSE_M'][op] = 'EXPECT_%s_M' % inv_replacement
+  _CHECK_REPLACEMENT['ASSERT_FALSE_M'][op] = 'ASSERT_%s_M' % inv_replacement
+
+# Alternative tokens and their replacements.  For full list, see section 2.5
+# Alternative tokens [lex.digraph] in the C++ standard.
+#
+# Digraphs (such as '%:') are not included here since it's a mess to
+# match those on a word boundary.
+_ALT_TOKEN_REPLACEMENT = {
+    'and': '&&',
+    'bitor': '|',
+    'or': '||',
+    'xor': '^',
+    'compl': '~',
+    'bitand': '&',
+    'and_eq': '&=',
+    'or_eq': '|=',
+    'xor_eq': '^=',
+    'not': '!',
+    'not_eq': '!='
+    }
+
+# Compile regular expression that matches all the above keywords.  The "[ =()]"
+# bit is meant to avoid matching these keywords outside of boolean expressions.
+#
+# False positives include C-style multi-line comments and multi-line strings
+# but those have always been troublesome for cpplint.
+_ALT_TOKEN_REPLACEMENT_PATTERN = re.compile(
+    r'[ =()](' + ('|'.join(_ALT_TOKEN_REPLACEMENT.keys())) + r')(?=[ (]|$)')
+
+
+# These constants define types of headers for use with
+# _IncludeState.CheckNextIncludeOrder().
+_C_SYS_HEADER = 1
+_CPP_SYS_HEADER = 2
+_LIKELY_MY_HEADER = 3
+_POSSIBLE_MY_HEADER = 4
+_OTHER_HEADER = 5
+
+# These constants define the current inline assembly state
+_NO_ASM = 0       # Outside of inline assembly block
+_INSIDE_ASM = 1   # Inside inline assembly block
+_END_ASM = 2      # Last line of inline assembly block
+_BLOCK_ASM = 3    # The whole block is an inline assembly block
+
+# Match start of assembly blocks
+_MATCH_ASM = re.compile(r'^\s*(?:asm|_asm|__asm|__asm__)'
+                        r'(?:\s+(volatile|__volatile__))?'
+                        r'\s*[{(]')
+
+
+_regexp_compile_cache = {}
+
+# {str, set(int)}: a map from error categories to sets of linenumbers
+# on which those errors are expected and should be suppressed.
+_error_suppressions = {}
+
+# The root directory used for deriving header guard CPP variable.
+# This is set by --root flag.
+_root = None
+
+# The allowed line length of files.
+# This is set by --linelength flag.
+_line_length = 80
+
+# The allowed extensions for file names
+# This is set by --extensions flag.
+_valid_extensions = set(['cc', 'h', 'cpp', 'cu', 'cuh'])
+
+def ParseNolintSuppressions(filename, raw_line, linenum, error):
+  """Updates the global list of error-suppressions.
+
+  Parses any NOLINT comments on the current line, updating the global
+  error_suppressions store.  Reports an error if the NOLINT comment
+  was malformed.
+
+  Args:
+    filename: str, the name of the input file.
+    raw_line: str, the line of input text, with comments.
+    linenum: int, the number of the current line.
+    error: function, an error handler.
+  """
+  matched = Search(r'\bNOLINT(NEXTLINE)?\b(\([^)]+\))?', raw_line)
+  if matched:
+    if matched.group(1):
+      suppressed_line = linenum + 1
+    else:
+      suppressed_line = linenum
+    category = matched.group(2)
+    if category in (None, '(*)'):  # => "suppress all"
+      _error_suppressions.setdefault(None, set()).add(suppressed_line)
+    else:
+      if category.startswith('(') and category.endswith(')'):
+        category = category[1:-1]
+        if category in _ERROR_CATEGORIES:
+          _error_suppressions.setdefault(category, set()).add(suppressed_line)
+        elif category not in _LEGACY_ERROR_CATEGORIES:
+          error(filename, linenum, 'readability/nolint', 5,
+                'Unknown NOLINT error category: %s' % category)
+
+
+def ResetNolintSuppressions():
+  """Resets the set of NOLINT suppressions to empty."""
+  _error_suppressions.clear()
+
+
+def IsErrorSuppressedByNolint(category, linenum):
+  """Returns true if the specified error category is suppressed on this line.
+
+  Consults the global error_suppressions map populated by
+  ParseNolintSuppressions/ResetNolintSuppressions.
+
+  Args:
+    category: str, the category of the error.
+    linenum: int, the current line number.
+  Returns:
+    bool, True iff the error should be suppressed due to a NOLINT comment.
+  """
+  return (linenum in _error_suppressions.get(category, set()) or
+          linenum in _error_suppressions.get(None, set()))
+
+
+def Match(pattern, s):
+  """Matches the string with the pattern, caching the compiled regexp."""
+  # The regexp compilation caching is inlined in both Match and Search for
+  # performance reasons; factoring it out into a separate function turns out
+  # to be noticeably expensive.
+  if pattern not in _regexp_compile_cache:
+    _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
+  return _regexp_compile_cache[pattern].match(s)
+
+
+def ReplaceAll(pattern, rep, s):
+  """Replaces instances of pattern in a string with a replacement.
+
+  The compiled regex is kept in a cache shared by Match and Search.
+
+  Args:
+    pattern: regex pattern
+    rep: replacement text
+    s: search string
+
+  Returns:
+    string with replacements made (or original string if no replacements)
+  """
+  if pattern not in _regexp_compile_cache:
+    _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
+  return _regexp_compile_cache[pattern].sub(rep, s)
+
+
+def Search(pattern, s):
+  """Searches the string for the pattern, caching the compiled regexp."""
+  if pattern not in _regexp_compile_cache:
+    _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
+  return _regexp_compile_cache[pattern].search(s)
+
+
+class _IncludeState(object):
+  """Tracks line numbers for includes, and the order in which includes appear.
+
+  include_list contains list of lists of (header, line number) pairs.
+  It's a lists of lists rather than just one flat list to make it
+  easier to update across preprocessor boundaries.
+
+  Call CheckNextIncludeOrder() once for each header in the file, passing
+  in the type constants defined above. Calls in an illegal order will
+  raise an _IncludeError with an appropriate error message.
+
+  """
+  # self._section will move monotonically through this set. If it ever
+  # needs to move backwards, CheckNextIncludeOrder will raise an error.
+  _INITIAL_SECTION = 0
+  _MY_H_SECTION = 1
+  _C_SECTION = 2
+  _CPP_SECTION = 3
+  _OTHER_H_SECTION = 4
+
+  _TYPE_NAMES = {
+      _C_SYS_HEADER: 'C system header',
+      _CPP_SYS_HEADER: 'C++ system header',
+      _LIKELY_MY_HEADER: 'header this file implements',
+      _POSSIBLE_MY_HEADER: 'header this file may implement',
+      _OTHER_HEADER: 'other header',
+      }
+  _SECTION_NAMES = {
+      _INITIAL_SECTION: "... nothing. (This can't be an error.)",
+      _MY_H_SECTION: 'a header this file implements',
+      _C_SECTION: 'C system header',
+      _CPP_SECTION: 'C++ system header',
+      _OTHER_H_SECTION: 'other header',
+      }
+
+  def __init__(self):
+    self.include_list = [[]]
+    self.ResetSection('')
+
+  def FindHeader(self, header):
+    """Check if a header has already been included.
+
+    Args:
+      header: header to check.
+    Returns:
+      Line number of previous occurrence, or -1 if the header has not
+      been seen before.
+    """
+    for section_list in self.include_list:
+      for f in section_list:
+        if f[0] == header:
+          return f[1]
+    return -1
+
+  def ResetSection(self, directive):
+    """Reset section checking for preprocessor directive.
+
+    Args:
+      directive: preprocessor directive (e.g. "if", "else").
+    """
+    # The name of the current section.
+    self._section = self._INITIAL_SECTION
+    # The path of last found header.
+    self._last_header = ''
+
+    # Update list of includes.  Note that we never pop from the
+    # include list.
+    if directive in ('if', 'ifdef', 'ifndef'):
+      self.include_list.append([])
+    elif directive in ('else', 'elif'):
+      self.include_list[-1] = []
+
+  def SetLastHeader(self, header_path):
+    self._last_header = header_path
+
+  def CanonicalizeAlphabeticalOrder(self, header_path):
+    """Returns a path canonicalized for alphabetical comparison.
+
+    - replaces "-" with "_" so they both cmp the same.
+    - removes '-inl' since we don't require them to be after the main header.
+    - lowercase everything, just in case.
+
+    Args:
+      header_path: Path to be canonicalized.
+
+    Returns:
+      Canonicalized path.
+    """
+    return header_path.replace('-inl.h', '.h').replace('-', '_').lower()
+
+  def IsInAlphabeticalOrder(self, clean_lines, linenum, header_path):
+    """Check if a header is in alphabetical order with the previous header.
+
+    Args:
+      clean_lines: A CleansedLines instance containing the file.
+      linenum: The number of the line to check.
+      header_path: Canonicalized header to be checked.
+
+    Returns:
+      Returns true if the header is in alphabetical order.
+    """
+    # If previous section is different from current section, _last_header will
+    # be reset to empty string, so it's always less than current header.
+    #
+    # If previous line was a blank line, assume that the headers are
+    # intentionally sorted the way they are.
+    if (self._last_header > header_path and
+        Match(r'^\s*#\s*include\b', clean_lines.elided[linenum - 1])):
+      return False
+    return True
+
+  def CheckNextIncludeOrder(self, header_type):
+    """Returns a non-empty error message if the next header is out of order.
+
+    This function also updates the internal state to be ready to check
+    the next include.
+
+    Args:
+      header_type: One of the _XXX_HEADER constants defined above.
+
+    Returns:
+      The empty string if the header is in the right order, or an
+      error message describing what's wrong.
+
+    """
+    error_message = ('Found %s after %s' %
+                     (self._TYPE_NAMES[header_type],
+                      self._SECTION_NAMES[self._section]))
+
+    last_section = self._section
+
+    if header_type == _C_SYS_HEADER:
+      if self._section <= self._C_SECTION:
+        self._section = self._C_SECTION
+      else:
+        self._last_header = ''
+        return error_message
+    elif header_type == _CPP_SYS_HEADER:
+      if self._section <= self._CPP_SECTION:
+        self._section = self._CPP_SECTION
+      else:
+        self._last_header = ''
+        return error_message
+    elif header_type == _LIKELY_MY_HEADER:
+      if self._section <= self._MY_H_SECTION:
+        self._section = self._MY_H_SECTION
+      else:
+        self._section = self._OTHER_H_SECTION
+    elif header_type == _POSSIBLE_MY_HEADER:
+      if self._section <= self._MY_H_SECTION:
+        self._section = self._MY_H_SECTION
+      else:
+        # This will always be the fallback because we're not sure
+        # enough that the header is associated with this file.
+        self._section = self._OTHER_H_SECTION
+    else:
+      assert header_type == _OTHER_HEADER
+      self._section = self._OTHER_H_SECTION
+
+    if last_section != self._section:
+      self._last_header = ''
+
+    return ''
+
+
+class _CppLintState(object):
+  """Maintains module-wide state.."""
+
+  def __init__(self):
+    self.verbose_level = 1  # global setting.
+    self.error_count = 0    # global count of reported errors
+    # filters to apply when emitting error messages
+    self.filters = _DEFAULT_FILTERS[:]
+    # backup of filter list. Used to restore the state after each file.
+    self._filters_backup = self.filters[:]
+    self.counting = 'total'  # In what way are we counting errors?
+    self.errors_by_category = {}  # string to int dict storing error counts
+
+    # output format:
+    # "emacs" - format that emacs can parse (default)
+    # "vs7" - format that Microsoft Visual Studio 7 can parse
+    self.output_format = 'emacs'
+
+  def SetOutputFormat(self, output_format):
+    """Sets the output format for errors."""
+    self.output_format = output_format
+
+  def SetVerboseLevel(self, level):
+    """Sets the module's verbosity, and returns the previous setting."""
+    last_verbose_level = self.verbose_level
+    self.verbose_level = level
+    return last_verbose_level
+
+  def SetCountingStyle(self, counting_style):
+    """Sets the module's counting options."""
+    self.counting = counting_style
+
+  def SetFilters(self, filters):
+    """Sets the error-message filters.
+
+    These filters are applied when deciding whether to emit a given
+    error message.
+
+    Args:
+      filters: A string of comma-separated filters (eg "+whitespace/indent").
+               Each filter should start with + or -; else we die.
+
+    Raises:
+      ValueError: The comma-separated filters did not all start with '+' or '-'.
+                  E.g. "-,+whitespace,-whitespace/indent,whitespace/badfilter"
+    """
+    # Default filters always have less priority than the flag ones.
+    self.filters = _DEFAULT_FILTERS[:]
+    self.AddFilters(filters)
+
+  def AddFilters(self, filters):
+    """ Adds more filters to the existing list of error-message filters. """
+    for filt in filters.split(','):
+      clean_filt = filt.strip()
+      if clean_filt:
+        self.filters.append(clean_filt)
+    for filt in self.filters:
+      if not (filt.startswith('+') or filt.startswith('-')):
+        raise ValueError('Every filter in --filters must start with + or -'
+                         ' (%s does not)' % filt)
+
+  def BackupFilters(self):
+    """ Saves the current filter list to backup storage."""
+    self._filters_backup = self.filters[:]
+
+  def RestoreFilters(self):
+    """ Restores filters previously backed up."""
+    self.filters = self._filters_backup[:]
+
+  def ResetErrorCounts(self):
+    """Sets the module's error statistic back to zero."""
+    self.error_count = 0
+    self.errors_by_category = {}
+
+  def IncrementErrorCount(self, category):
+    """Bumps the module's error statistic."""
+    self.error_count += 1
+    if self.counting in ('toplevel', 'detailed'):
+      if self.counting != 'detailed':
+        category = category.split('/')[0]
+      if category not in self.errors_by_category:
+        self.errors_by_category[category] = 0
+      self.errors_by_category[category] += 1
+
+  def PrintErrorCounts(self):
+    """Print a summary of errors by category, and the total."""
+    for category, count in self.errors_by_category.iteritems():
+      sys.stderr.write('Category \'%s\' errors found: %d\n' %
+                       (category, count))
+    sys.stderr.write('Total errors found: %d\n' % self.error_count)
+
+_cpplint_state = _CppLintState()
+
+
+def _OutputFormat():
+  """Gets the module's output format."""
+  return _cpplint_state.output_format
+
+
+def _SetOutputFormat(output_format):
+  """Sets the module's output format."""
+  _cpplint_state.SetOutputFormat(output_format)
+
+
+def _VerboseLevel():
+  """Returns the module's verbosity setting."""
+  return _cpplint_state.verbose_level
+
+
+def _SetVerboseLevel(level):
+  """Sets the module's verbosity, and returns the previous setting."""
+  return _cpplint_state.SetVerboseLevel(level)
+
+
+def _SetCountingStyle(level):
+  """Sets the module's counting options."""
+  _cpplint_state.SetCountingStyle(level)
+
+
+def _Filters():
+  """Returns the module's list of output filters, as a list."""
+  return _cpplint_state.filters
+
+
+def _SetFilters(filters):
+  """Sets the module's error-message filters.
+
+  These filters are applied when deciding whether to emit a given
+  error message.
+
+  Args:
+    filters: A string of comma-separated filters (eg "whitespace/indent").
+             Each filter should start with + or -; else we die.
+  """
+  _cpplint_state.SetFilters(filters)
+
+def _AddFilters(filters):
+  """Adds more filter overrides.
+
+  Unlike _SetFilters, this function does not reset the current list of filters
+  available.
+
+  Args:
+    filters: A string of comma-separated filters (eg "whitespace/indent").
+             Each filter should start with + or -; else we die.
+  """
+  _cpplint_state.AddFilters(filters)
+
+def _BackupFilters():
+  """ Saves the current filter list to backup storage."""
+  _cpplint_state.BackupFilters()
+
+def _RestoreFilters():
+  """ Restores filters previously backed up."""
+  _cpplint_state.RestoreFilters()
+
+class _FunctionState(object):
+  """Tracks current function name and the number of lines in its body."""
+
+  _NORMAL_TRIGGER = 250  # for --v=0, 500 for --v=1, etc.
+  _TEST_TRIGGER = 400    # about 50% more than _NORMAL_TRIGGER.
+
+  def __init__(self):
+    self.in_a_function = False
+    self.lines_in_function = 0
+    self.current_function = ''
+
+  def Begin(self, function_name):
+    """Start analyzing function body.
+
+    Args:
+      function_name: The name of the function being tracked.
+    """
+    self.in_a_function = True
+    self.lines_in_function = 0
+    self.current_function = function_name
+
+  def Count(self):
+    """Count line in current function body."""
+    if self.in_a_function:
+      self.lines_in_function += 1
+
+  def Check(self, error, filename, linenum):
+    """Report if too many lines in function body.
+
+    Args:
+      error: The function to call with any errors found.
+      filename: The name of the current file.
+      linenum: The number of the line to check.
+    """
+    if Match(r'T(EST|est)', self.current_function):
+      base_trigger = self._TEST_TRIGGER
+    else:
+      base_trigger = self._NORMAL_TRIGGER
+    trigger = base_trigger * 2**_VerboseLevel()
+
+    if self.lines_in_function > trigger:
+      error_level = int(math.log(self.lines_in_function / base_trigger, 2))
+      # 50 => 0, 100 => 1, 200 => 2, 400 => 3, 800 => 4, 1600 => 5, ...
+      if error_level > 5:
+        error_level = 5
+      error(filename, linenum, 'readability/fn_size', error_level,
+            'Small and focused functions are preferred:'
+            ' %s has %d non-comment lines'
+            ' (error triggered by exceeding %d lines).'  % (
+                self.current_function, self.lines_in_function, trigger))
+
+  def End(self):
+    """Stop analyzing function body."""
+    self.in_a_function = False
+
+
+class _IncludeError(Exception):
+  """Indicates a problem with the include order in a file."""
+  pass
+
+
+class FileInfo(object):
+  """Provides utility functions for filenames.
+
+  FileInfo provides easy access to the components of a file's path
+  relative to the project root.
+  """
+
+  def __init__(self, filename):
+    self._filename = filename
+
+  def FullName(self):
+    """Make Windows paths like Unix."""
+    return os.path.abspath(self._filename).replace('\\', '/')
+
+  def RepositoryName(self):
+    """FullName after removing the local path to the repository.
+
+    If we have a real absolute path name here we can try to do something smart:
+    detecting the root of the checkout and truncating /path/to/checkout from
+    the name so that we get header guards that don't include things like
+    "C:\Documents and Settings\..." or "/home/username/..." in them and thus
+    people on different computers who have checked the source out to different
+    locations won't see bogus errors.
+    """
+    fullname = self.FullName()
+
+    if os.path.exists(fullname):
+      project_dir = os.path.dirname(fullname)
+
+      if os.path.exists(os.path.join(project_dir, ".svn")):
+        # If there's a .svn file in the current directory, we recursively look
+        # up the directory tree for the top of the SVN checkout
+        root_dir = project_dir
+        one_up_dir = os.path.dirname(root_dir)
+        while os.path.exists(os.path.join(one_up_dir, ".svn")):
+          root_dir = os.path.dirname(root_dir)
+          one_up_dir = os.path.dirname(one_up_dir)
+
+        prefix = os.path.commonprefix([root_dir, project_dir])
+        return fullname[len(prefix) + 1:]
+
+      # Not SVN <= 1.6? Try to find a git, hg, or svn top level directory by
+      # searching up from the current path.
+      root_dir = os.path.dirname(fullname)
+      while (root_dir != os.path.dirname(root_dir) and
+             not os.path.exists(os.path.join(root_dir, ".git")) and
+             not os.path.exists(os.path.join(root_dir, ".hg")) and
+             not os.path.exists(os.path.join(root_dir, ".svn"))):
+        root_dir = os.path.dirname(root_dir)
+
+      if (os.path.exists(os.path.join(root_dir, ".git")) or
+          os.path.exists(os.path.join(root_dir, ".hg")) or
+          os.path.exists(os.path.join(root_dir, ".svn"))):
+        prefix = os.path.commonprefix([root_dir, project_dir])
+        return fullname[len(prefix) + 1:]
+
+    # Don't know what to do; header guard warnings may be wrong...
+    return fullname
+
+  def Split(self):
+    """Splits the file into the directory, basename, and extension.
+
+    For 'chrome/browser/browser.cc', Split() would
+    return ('chrome/browser', 'browser', '.cc')
+
+    Returns:
+      A tuple of (directory, basename, extension).
+    """
+
+    googlename = self.RepositoryName()
+    project, rest = os.path.split(googlename)
+    return (project,) + os.path.splitext(rest)
+
+  def BaseName(self):
+    """File base name - text after the final slash, before the final period."""
+    return self.Split()[1]
+
+  def Extension(self):
+    """File extension - text following the final period."""
+    return self.Split()[2]
+
+  def NoExtension(self):
+    """File has no source file extension."""
+    return '/'.join(self.Split()[0:2])
+
+  def IsSource(self):
+    """File has a source file extension."""
+    return self.Extension()[1:] in ('c', 'cc', 'cpp', 'cxx')
+
+
+def _ShouldPrintError(category, confidence, linenum):
+  """If confidence >= verbose, category passes filter and is not suppressed."""
+
+  # There are three ways we might decide not to print an error message:
+  # a "NOLINT(category)" comment appears in the source,
+  # the verbosity level isn't high enough, or the filters filter it out.
+  if IsErrorSuppressedByNolint(category, linenum):
+    return False
+
+  if confidence < _cpplint_state.verbose_level:
+    return False
+
+  is_filtered = False
+  for one_filter in _Filters():
+    if one_filter.startswith('-'):
+      if category.startswith(one_filter[1:]):
+        is_filtered = True
+    elif one_filter.startswith('+'):
+      if category.startswith(one_filter[1:]):
+        is_filtered = False
+    else:
+      assert False  # should have been checked for in SetFilter.
+  if is_filtered:
+    return False
+
+  return True
+
+
+def Error(filename, linenum, category, confidence, message):
+  """Logs the fact we've found a lint error.
+
+  We log where the error was found, and also our confidence in the error,
+  that is, how certain we are this is a legitimate style regression, and
+  not a misidentification or a use that's sometimes justified.
+
+  False positives can be suppressed by the use of
+  "cpplint(category)"  comments on the offending line.  These are
+  parsed into _error_suppressions.
+
+  Args:
+    filename: The name of the file containing the error.
+    linenum: The number of the line containing the error.
+    category: A string used to describe the "category" this bug
+      falls under: "whitespace", say, or "runtime".  Categories
+      may have a hierarchy separated by slashes: "whitespace/indent".
+    confidence: A number from 1-5 representing a confidence score for
+      the error, with 5 meaning that we are certain of the problem,
+      and 1 meaning that it could be a legitimate construct.
+    message: The error message.
+  """
+  if _ShouldPrintError(category, confidence, linenum):
+    _cpplint_state.IncrementErrorCount(category)
+    if _cpplint_state.output_format == 'vs7':
+      sys.stderr.write('%s(%s):  %s  [%s] [%d]\n' % (
+          filename, linenum, message, category, confidence))
+    elif _cpplint_state.output_format == 'eclipse':
+      sys.stderr.write('%s:%s: warning: %s  [%s] [%d]\n' % (
+          filename, linenum, message, category, confidence))
+    else:
+      sys.stderr.write('%s:%s:  %s  [%s] [%d]\n' % (
+          filename, linenum, message, category, confidence))
+
+
+# Matches standard C++ escape sequences per 2.13.2.3 of the C++ standard.
+_RE_PATTERN_CLEANSE_LINE_ESCAPES = re.compile(
+    r'\\([abfnrtv?"\\\']|\d+|x[0-9a-fA-F]+)')
+# Match a single C style comment on the same line.
+_RE_PATTERN_C_COMMENTS = r'/\*(?:[^*]|\*(?!/))*\*/'
+# Matches multi-line C style comments.
+# This RE is a little bit more complicated than one might expect, because we
+# have to take care of space removals tools so we can handle comments inside
+# statements better.
+# The current rule is: We only clear spaces from both sides when we're at the
+# end of the line. Otherwise, we try to remove spaces from the right side,
+# if this doesn't work we try on left side but only if there's a non-character
+# on the right.
+_RE_PATTERN_CLEANSE_LINE_C_COMMENTS = re.compile(
+    r'(\s*' + _RE_PATTERN_C_COMMENTS + r'\s*$|' +
+    _RE_PATTERN_C_COMMENTS + r'\s+|' +
+    r'\s+' + _RE_PATTERN_C_COMMENTS + r'(?=\W)|' +
+    _RE_PATTERN_C_COMMENTS + r')')
+
+
+def IsCppString(line):
+  """Does line terminate so, that the next symbol is in string constant.
+
+  This function does not consider single-line nor multi-line comments.
+
+  Args:
+    line: is a partial line of code starting from the 0..n.
+
+  Returns:
+    True, if next character appended to 'line' is inside a
+    string constant.
+  """
+
+  line = line.replace(r'\\', 'XX')  # after this, \\" does not match to \"
+  return ((line.count('"') - line.count(r'\"') - line.count("'\"'")) & 1) == 1
+
+
+def CleanseRawStrings(raw_lines):
+  """Removes C++11 raw strings from lines.
+
+    Before:
+      static const char kData[] = R"(
+          multi-line string
+          )";
+
+    After:
+      static const char kData[] = ""
+          (replaced by blank line)
+          "";
+
+  Args:
+    raw_lines: list of raw lines.
+
+  Returns:
+    list of lines with C++11 raw strings replaced by empty strings.
+  """
+
+  delimiter = None
+  lines_without_raw_strings = []
+  for line in raw_lines:
+    if delimiter:
+      # Inside a raw string, look for the end
+      end = line.find(delimiter)
+      if end >= 0:
+        # Found the end of the string, match leading space for this
+        # line and resume copying the original lines, and also insert
+        # a "" on the last line.
+        leading_space = Match(r'^(\s*)\S', line)
+        line = leading_space.group(1) + '""' + line[end + len(delimiter):]
+        delimiter = None
+      else:
+        # Haven't found the end yet, append a blank line.
+        line = '""'
+
+    # Look for beginning of a raw string, and replace them with
+    # empty strings.  This is done in a loop to handle multiple raw
+    # strings on the same line.
+    while delimiter is None:
+      # Look for beginning of a raw string.
+      # See 2.14.15 [lex.string] for syntax.
+      matched = Match(r'^(.*)\b(?:R|u8R|uR|UR|LR)"([^\s\\()]*)\((.*)$', line)
+      if matched:
+        delimiter = ')' + matched.group(2) + '"'
+
+        end = matched.group(3).find(delimiter)
+        if end >= 0:
+          # Raw string ended on same line
+          line = (matched.group(1) + '""' +
+                  matched.group(3)[end + len(delimiter):])
+          delimiter = None
+        else:
+          # Start of a multi-line raw string
+          line = matched.group(1) + '""'
+      else:
+        break
+
+    lines_without_raw_strings.append(line)
+
+  # TODO(unknown): if delimiter is not None here, we might want to
+  # emit a warning for unterminated string.
+  return lines_without_raw_strings
+
+
+def FindNextMultiLineCommentStart(lines, lineix):
+  """Find the beginning marker for a multiline comment."""
+  while lineix < len(lines):
+    if lines[lineix].strip().startswith('/*'):
+      # Only return this marker if the comment goes beyond this line
+      if lines[lineix].strip().find('*/', 2) < 0:
+        return lineix
+    lineix += 1
+  return len(lines)
+
+
+def FindNextMultiLineCommentEnd(lines, lineix):
+  """We are inside a comment, find the end marker."""
+  while lineix < len(lines):
+    if lines[lineix].strip().endswith('*/'):
+      return lineix
+    lineix += 1
+  return len(lines)
+
+
+def RemoveMultiLineCommentsFromRange(lines, begin, end):
+  """Clears a range of lines for multi-line comments."""
+  # Having // dummy comments makes the lines non-empty, so we will not get
+  # unnecessary blank line warnings later in the code.
+  for i in range(begin, end):
+    lines[i] = '/**/'
+
+
+def RemoveMultiLineComments(filename, lines, error):
+  """Removes multiline (c-style) comments from lines."""
+  lineix = 0
+  while lineix < len(lines):
+    lineix_begin = FindNextMultiLineCommentStart(lines, lineix)
+    if lineix_begin >= len(lines):
+      return
+    lineix_end = FindNextMultiLineCommentEnd(lines, lineix_begin)
+    if lineix_end >= len(lines):
+      error(filename, lineix_begin + 1, 'readability/multiline_comment', 5,
+            'Could not find end of multi-line comment')
+      return
+    RemoveMultiLineCommentsFromRange(lines, lineix_begin, lineix_end + 1)
+    lineix = lineix_end + 1
+
+
+def CleanseComments(line):
+  """Removes //-comments and single-line C-style /* */ comments.
+
+  Args:
+    line: A line of C++ source.
+
+  Returns:
+    The line with single-line comments removed.
+  """
+  commentpos = line.find('//')
+  if commentpos != -1 and not IsCppString(line[:commentpos]):
+    line = line[:commentpos].rstrip()
+  # get rid of /* ... */
+  return _RE_PATTERN_CLEANSE_LINE_C_COMMENTS.sub('', line)
+
+
+class CleansedLines(object):
+  """Holds 4 copies of all lines with different preprocessing applied to them.
+
+  1) elided member contains lines without strings and comments.
+  2) lines member contains lines without comments.
+  3) raw_lines member contains all the lines without processing.
+  4) lines_without_raw_strings member is same as raw_lines, but with C++11 raw
+     strings removed.
+  All these members are of <type 'list'>, and of the same length.
+  """
+
+  def __init__(self, lines):
+    self.elided = []
+    self.lines = []
+    self.raw_lines = lines
+    self.num_lines = len(lines)
+    self.lines_without_raw_strings = CleanseRawStrings(lines)
+    for linenum in range(len(self.lines_without_raw_strings)):
+      self.lines.append(CleanseComments(
+          self.lines_without_raw_strings[linenum]))
+      elided = self._CollapseStrings(self.lines_without_raw_strings[linenum])
+      self.elided.append(CleanseComments(elided))
+
+  def NumLines(self):
+    """Returns the number of lines represented."""
+    return self.num_lines
+
+  @staticmethod
+  def _CollapseStrings(elided):
+    """Collapses strings and chars on a line to simple "" or '' blocks.
+
+    We nix strings first so we're not fooled by text like '"http://"'
+
+    Args:
+      elided: The line being processed.
+
+    Returns:
+      The line with collapsed strings.
+    """
+    if _RE_PATTERN_INCLUDE.match(elided):
+      return elided
+
+    # Remove escaped characters first to make quote/single quote collapsing
+    # basic.  Things that look like escaped characters shouldn't occur
+    # outside of strings and chars.
+    elided = _RE_PATTERN_CLEANSE_LINE_ESCAPES.sub('', elided)
+
+    # Replace quoted strings and digit separators.  Both single quotes
+    # and double quotes are processed in the same loop, otherwise
+    # nested quotes wouldn't work.
+    collapsed = ''
+    while True:
+      # Find the first quote character
+      match = Match(r'^([^\'"]*)([\'"])(.*)$', elided)
+      if not match:
+        collapsed += elided
+        break
+      head, quote, tail = match.groups()
+
+      if quote == '"':
+        # Collapse double quoted strings
+        second_quote = tail.find('"')
+        if second_quote >= 0:
+          collapsed += head + '""'
+          elided = tail[second_quote + 1:]
+        else:
+          # Unmatched double quote, don't bother processing the rest
+          # of the line since this is probably a multiline string.
+          collapsed += elided
+          break
+      else:
+        # Found single quote, check nearby text to eliminate digit separators.
+        #
+        # There is no special handling for floating point here, because
+        # the integer/fractional/exponent parts would all be parsed
+        # correctly as long as there are digits on both sides of the
+        # separator.  So we are fine as long as we don't see something
+        # like "0.'3" (gcc 4.9.0 will not allow this literal).
+        if Search(r'\b(?:0[bBxX]?|[1-9])[0-9a-fA-F]*$', head):
+          match_literal = Match(r'^((?:\'?[0-9a-zA-Z_])*)(.*)$', "'" + tail)
+          collapsed += head + match_literal.group(1).replace("'", '')
+          elided = match_literal.group(2)
+        else:
+          second_quote = tail.find('\'')
+          if second_quote >= 0:
+            collapsed += head + "''"
+            elided = tail[second_quote + 1:]
+          else:
+            # Unmatched single quote
+            collapsed += elided
+            break
+
+    return collapsed
+
+
+def FindEndOfExpressionInLine(line, startpos, stack):
+  """Find the position just after the end of current parenthesized expression.
+
+  Args:
+    line: a CleansedLines line.
+    startpos: start searching at this position.
+    stack: nesting stack at startpos.
+
+  Returns:
+    On finding matching end: (index just after matching end, None)
+    On finding an unclosed expression: (-1, None)
+    Otherwise: (-1, new stack at end of this line)
+  """
+  for i in xrange(startpos, len(line)):
+    char = line[i]
+    if char in '([{':
+      # Found start of parenthesized expression, push to expression stack
+      stack.append(char)
+    elif char == '<':
+      # Found potential start of template argument list
+      if i > 0 and line[i - 1] == '<':
+        # Left shift operator
+        if stack and stack[-1] == '<':
+          stack.pop()
+          if not stack:
+            return (-1, None)
+      elif i > 0 and Search(r'\boperator\s*$', line[0:i]):
+        # operator<, don't add to stack
+        continue
+      else:
+        # Tentative start of template argument list
+        stack.append('<')
+    elif char in ')]}':
+      # Found end of parenthesized expression.
+      #
+      # If we are currently expecting a matching '>', the pending '<'
+      # must have been an operator.  Remove them from expression stack.
+      while stack and stack[-1] == '<':
+        stack.pop()
+      if not stack:
+        return (-1, None)
+      if ((stack[-1] == '(' and char == ')') or
+          (stack[-1] == '[' and char == ']') or
+          (stack[-1] == '{' and char == '}')):
+        stack.pop()
+        if not stack:
+          return (i + 1, None)
+      else:
+        # Mismatched parentheses
+        return (-1, None)
+    elif char == '>':
+      # Found potential end of template argument list.
+
+      # Ignore "->" and operator functions
+      if (i > 0 and
+          (line[i - 1] == '-' or Search(r'\boperator\s*$', line[0:i - 1]))):
+        continue
+
+      # Pop the stack if there is a matching '<'.  Otherwise, ignore
+      # this '>' since it must be an operator.
+      if stack:
+        if stack[-1] == '<':
+          stack.pop()
+          if not stack:
+            return (i + 1, None)
+    elif char == ';':
+      # Found something that look like end of statements.  If we are currently
+      # expecting a '>', the matching '<' must have been an operator, since
+      # template argument list should not contain statements.
+      while stack and stack[-1] == '<':
+        stack.pop()
+      if not stack:
+        return (-1, None)
+
+  # Did not find end of expression or unbalanced parentheses on this line
+  return (-1, stack)
+
+
+def CloseExpression(clean_lines, linenum, pos):
+  """If input points to ( or { or [ or <, finds the position that closes it.
+
+  If lines[linenum][pos] points to a '(' or '{' or '[' or '<', finds the
+  linenum/pos that correspond to the closing of the expression.
+
+  TODO(unknown): cpplint spends a fair bit of time matching parentheses.
+  Ideally we would want to index all opening and closing parentheses once
+  and have CloseExpression be just a simple lookup, but due to preprocessor
+  tricks, this is not so easy.
+
+  Args:
+    clean_lines: A CleansedLines instance containing the file.
+    linenum: The number of the line to check.
+    pos: A position on the line.
+
+  Returns:
+    A tuple (line, linenum, pos) pointer *past* the closing brace, or
+    (line, len(lines), -1) if we never find a close.  Note we ignore
+    strings and comments when matching; and the line we return is the
+    'cleansed' line at linenum.
+  """
+
+  line = clean_lines.elided[linenum]
+  if (line[pos] not in '({[<') or Match(r'<[<=]', line[pos:]):
+    return (line, clean_lines.NumLines(), -1)
+
+  # Check first line
+  (end_pos, stack) = FindEndOfExpressionInLine(line, pos, [])
+  if end_pos > -1:
+    return (line, linenum, end_pos)
+
+  # Continue scanning forward
+  while stack and linenum < clean_lines.NumLines() - 1:
+    linenum += 1
+    line = clean_lines.elided[linenum]
+    (end_pos, stack) = FindEndOfExpressionInLine(line, 0, stack)
+    if end_pos > -1:
+      return (line, linenum, end_pos)
+
+  # Did not find end of expression before end of file, give up
+  return (line, clean_lines.NumLines(), -1)
+
+
+def FindStartOfExpressionInLine(line, endpos, stack):
+  """Find position at the matching start of current expression.
+
+  This is almost the reverse of FindEndOfExpressionInLine, but note
+  that the input position and returned position differs by 1.
+
+  Args:
+    line: a CleansedLines line.
+    endpos: start searching at this position.
+    stack: nesting stack at endpos.
+
+  Returns:
+    On finding matching start: (index at matching start, None)
+    On finding an unclosed expression: (-1, None)
+    Otherwise: (-1, new stack at beginning of this line)
+  """
+  i = endpos
+  while i >= 0:
+    char = line[i]
+    if char in ')]}':
+      # Found end of expression, push to expression stack
+      stack.append(char)
+    elif char == '>':
+      # Found potential end of template argument list.
+      #
+      # Ignore it if it's a "->" or ">=" or "operator>"
+      if (i > 0 and
+          (line[i - 1] == '-' or
+           Match(r'\s>=\s', line[i - 1:]) or
+           Search(r'\boperator\s*$', line[0:i]))):
+        i -= 1
+      else:
+        stack.append('>')
+    elif char == '<':
+      # Found potential start of template argument list
+      if i > 0 and line[i - 1] == '<':
+        # Left shift operator
+        i -= 1
+      else:
+        # If there is a matching '>', we can pop the expression stack.
+        # Otherwise, ignore this '<' since it must be an operator.
+        if stack and stack[-1] == '>':
+          stack.pop()
+          if not stack:
+            return (i, None)
+    elif char in '([{':
+      # Found start of expression.
+      #
+      # If there are any unmatched '>' on the stack, they must be
+      # operators.  Remove those.
+      while stack and stack[-1] == '>':
+        stack.pop()
+      if not stack:
+        return (-1, None)
+      if ((char == '(' and stack[-1] == ')') or
+          (char == '[' and stack[-1] == ']') or
+          (char == '{' and stack[-1] == '}')):
+        stack.pop()
+        if not stack:
+          return (i, None)
+      else:
+        # Mismatched parentheses
+        return (-1, None)
+    elif char == ';':
+      # Found something that look like end of statements.  If we are currently
+      # expecting a '<', the matching '>' must have been an operator, since
+      # template argument list should not contain statements.
+      while stack and stack[-1] == '>':
+        stack.pop()
+      if not stack:
+        return (-1, None)
+
+    i -= 1
+
+  return (-1, stack)
+
+
+def ReverseCloseExpression(clean_lines, linenum, pos):
+  """If input points to ) or } or ] or >, finds the position that opens it.
+
+  If lines[linenum][pos] points to a ')' or '}' or ']' or '>', finds the
+  linenum/pos that correspond to the opening of the expression.
+
+  Args:
+    clean_lines: A CleansedLines instance containing the file.
+    linenum: The number of the line to check.
+    pos: A position on the line.
+
+  Returns:
+    A tuple (line, linenum, pos) pointer *at* the opening brace, or
+    (line, 0, -1) if we never find the matching opening brace.  Note
+    we ignore strings and comments when matching; and the line we
+    return is the 'cleansed' line at linenum.
+  """
+  line = clean_lines.elided[linenum]
+  if line[pos] not in ')}]>':
+    return (line, 0, -1)
+
+  # Check last line
+  (start_pos, stack) = FindStartOfExpressionInLine(line, pos, [])
+  if start_pos > -1:
+    return (line, linenum, start_pos)
+
+  # Continue scanning backward
+  while stack and linenum > 0:
+    linenum -= 1
+    line = clean_lines.elided[linenum]
+    (start_pos, stack) = FindStartOfExpressionInLine(line, len(line) - 1, stack)
+    if start_pos > -1:
+      return (line, linenum, start_pos)
+
+  # Did not find start of expression before beginning of file, give up
+  return (line, 0, -1)
+
+
+def CheckForCopyright(filename, lines, error):
+  """Logs an error if no Copyright message appears at the top of the file."""
+
+  # We'll say it should occur by line 10. Don't forget there's a
+  # dummy line at the front.
+  for line in xrange(1, min(len(lines), 11)):
+    if re.search(r'Copyright', lines[line], re.I): break
+  else:                       # means no copyright line was found
+    error(filename, 0, 'legal/copyright', 5,
+          'No copyright message found.  '
+          'You should have a line: "Copyright [year] <Copyright Owner>"')
+
+
+def GetIndentLevel(line):
+  """Return the number of leading spaces in line.
+
+  Args:
+    line: A string to check.
+
+  Returns:
+    An integer count of leading spaces, possibly zero.
+  """
+  indent = Match(r'^( *)\S', line)
+  if indent:
+    return len(indent.group(1))
+  else:
+    return 0
+
+
+def GetHeaderGuardCPPVariable(filename):
+  """Returns the CPP variable that should be used as a header guard.
+
+  Args:
+    filename: The name of a C++ header file.
+
+  Returns:
+    The CPP variable that should be used as a header guard in the
+    named file.
+
+  """
+
+  # Restores original filename in case that cpplint is invoked from Emacs's
+  # flymake.
+  filename = re.sub(r'_flymake\.h$', '.h', filename)
+  filename = re.sub(r'/\.flymake/([^/]*)$', r'/\1', filename)
+  # Replace 'c++' with 'cpp'.
+  filename = filename.replace('C++', 'cpp').replace('c++', 'cpp')
+
+  fileinfo = FileInfo(filename)
+  file_path_from_root = fileinfo.RepositoryName()
+  if _root:
+    file_path_from_root = re.sub('^' + _root + os.sep, '', file_path_from_root)
+  return re.sub(r'[^a-zA-Z0-9]', '_', file_path_from_root).upper() + '_'
+
+
+def CheckForHeaderGuard(filename, clean_lines, error):
+  """Checks that the file contains a header guard.
+
+  Logs an error if no #ifndef header guard is present.  For other
+  headers, checks that the full pathname is used.
+
+  Args:
+    filename: The name of the C++ header file.
+    clean_lines: A CleansedLines instance containing the file.
+    error: The function to call with any errors found.
+  """
+
+  # Don't check for header guards if there are error suppression
+  # comments somewhere in this file.
+  #
+  # Because this is silencing a warning for a nonexistent line, we
+  # only support the very specific NOLINT(build/header_guard) syntax,
+  # and not the general NOLINT or NOLINT(*) syntax.
+  raw_lines = clean_lines.lines_without_raw_strings
+  for i in raw_lines:
+    if Search(r'//\s*NOLINT\(build/header_guard\)', i):
+      return
+
+  cppvar = GetHeaderGuardCPPVariable(filename)
+
+  ifndef = ''
+  ifndef_linenum = 0
+  define = ''
+  endif = ''
+  endif_linenum = 0
+  for linenum, line in enumerate(raw_lines):
+    linesplit = line.split()
+    if len(linesplit) >= 2:
+      # find the first occurrence of #ifndef and #define, save arg
+      if not ifndef and linesplit[0] == '#ifndef':
+        # set ifndef to the header guard presented on the #ifndef line.
+        ifndef = linesplit[1]
+        ifndef_linenum = linenum
+      if not define and linesplit[0] == '#define':
+        define = linesplit[1]
+    # find the last occurrence of #endif, save entire line
+    if line.startswith('#endif'):
+      endif = line
+      endif_linenum = linenum
+
+  if not ifndef or not define or ifndef != define:
+    error(filename, 0, 'build/header_guard', 5,
+          'No #ifndef header guard found, suggested CPP variable is: %s' %
+          cppvar)
+    return
+
+  # The guard should be PATH_FILE_H_, but we also allow PATH_FILE_H__
+  # for backward compatibility.
+  if ifndef != cppvar:
+    error_level = 0
+    if ifndef != cppvar + '_':
+      error_level = 5
+
+    ParseNolintSuppressions(filename, raw_lines[ifndef_linenum], ifndef_linenum,
+                            error)
+    error(filename, ifndef_linenum, 'build/header_guard', error_level,
+          '#ifndef header guard has wrong style, please use: %s' % cppvar)
+
+  # Check for "//" comments on endif line.
+  ParseNolintSuppressions(filename, raw_lines[endif_linenum], endif_linenum,
+                          error)
+  match = Match(r'#endif\s*//\s*' + cppvar + r'(_)?\b', endif)
+  if match:
+    if match.group(1) == '_':
+      # Issue low severity warning for deprecated double trailing underscore
+      error(filename, endif_linenum, 'build/header_guard', 0,
+            '#endif line should be "#endif  // %s"' % cppvar)
+    return
+
+  # Didn't find the corresponding "//" comment.  If this file does not
+  # contain any "//" comments at all, it could be that the compiler
+  # only wants "/**/" comments, look for those instead.
+  no_single_line_comments = True
+  for i in xrange(1, len(raw_lines) - 1):
+    line = raw_lines[i]
+    if Match(r'^(?:(?:\'(?:\.|[^\'])*\')|(?:"(?:\.|[^"])*")|[^\'"])*//', line):
+      no_single_line_comments = False
+      break
+
+  if no_single_line_comments:
+    match = Match(r'#endif\s*/\*\s*' + cppvar + r'(_)?\s*\*/', endif)
+    if match:
+      if match.group(1) == '_':
+        # Low severity warning for double trailing underscore
+        error(filename, endif_linenum, 'build/header_guard', 0,
+              '#endif line should be "#endif  /* %s */"' % cppvar)
+      return
+
+  # Didn't find anything
+  error(filename, endif_linenum, 'build/header_guard', 5,
+        '#endif line should be "#endif  // %s"' % cppvar)
+
+
+def CheckHeaderFileIncluded(filename, include_state, error):
+  """Logs an error if a .cc file does not include its header."""
+
+  # Do not check test files
+  if filename.endswith('_test.cc') or filename.endswith('_unittest.cc'):
+    return
+
+  fileinfo = FileInfo(filename)
+  headerfile = filename[0:len(filename) - 2] + 'h'
+  if not os.path.exists(headerfile):
+    return
+  headername = FileInfo(headerfile).RepositoryName()
+  first_include = 0
+  for section_list in include_state.include_list:
+    for f in section_list:
+      if headername in f[0] or f[0] in headername:
+        return
+      if not first_include:
+        first_include = f[1]
+
+  error(filename, first_include, 'build/include', 5,
+        '%s should include its header file %s' % (fileinfo.RepositoryName(),
+                                                  headername))
+
+
+def CheckForBadCharacters(filename, lines, error):
+  """Logs an error for each line containing bad characters.
+
+  Two kinds of bad characters:
+
+  1. Unicode replacement characters: These indicate that either the file
+  contained invalid UTF-8 (likely) or Unicode replacement characters (which
+  it shouldn't).  Note that it's possible for this to throw off line
+  numbering if the invalid UTF-8 occurred adjacent to a newline.
+
+  2. NUL bytes.  These are problematic for some tools.
+
+  Args:
+    filename: The name of the current file.
+    lines: An array of strings, each representing a line of the file.
+    error: The function to call with any errors found.
+  """
+  for linenum, line in enumerate(lines):
+    if u'\ufffd' in line:
+      error(filename, linenum, 'readability/utf8', 5,
+            'Line contains invalid UTF-8 (or Unicode replacement character).')
+    if '\0' in line:
+      error(filename, linenum, 'readability/nul', 5, 'Line contains NUL byte.')
+
+
+def CheckForNewlineAtEOF(filename, lines, error):
+  """Logs an error if there is no newline char at the end of the file.
+
+  Args:
+    filename: The name of the current file.
+    lines: An array of strings, each representing a line of the file.
+    error: The function to call with any errors found.
+  """
+
+  # The array lines() was created by adding two newlines to the
+  # original file (go figure), then splitting on \n.
+  # To verify that the file ends in \n, we just have to make sure the
+  # last-but-two element of lines() exists and is empty.
+  if len(lines) < 3 or lines[-2]:
+    error(filename, len(lines) - 2, 'whitespace/ending_newline', 5,
+          'Could not find a newline character at the end of the file.')
+
+
+def CheckForMultilineCommentsAndStrings(filename, clean_lines, linenum, error):
+  """Logs an error if we see /* ... */ or "..." that extend past one line.
+
+  /* ... */ comments are legit inside macros, for one line.
+  Otherwise, we prefer // comments, so it's ok to warn about the
+  other.  Likewise, it's ok for strings to extend across multiple
+  lines, as long as a line continuation character (backslash)
+  terminates each line. Although not currently prohibited by the C++
+  style guide, it's ugly and unnecessary. We don't do well with either
+  in this lint program, so we warn about both.
+
+  Args:
+    filename: The name of the current file.
+    clean_lines: A CleansedLines instance containing the file.
+    linenum: The number of the line to check.
+    error: The function to call with any errors found.
+  """
+  line = clean_lines.elided[linenum]
+
+  # Remove all \\ (escaped backslashes) from the line. They are OK, and the
+  # second (escaped) slash may trigger later \" detection erroneously.
+  line = line.replace('\\\\', '')
+
+  if line.count('/*') > line.count('*/'):
+    error(filename, linenum, 'readability/multiline_comment', 5,
+          'Complex multi-line /*...*/-style comment found. '
+          'Lint may give bogus warnings.  '
+          'Consider replacing these with //-style comments, '
+          'with #if 0...#endif, '
+          'or with more clearly structured multi-line comments.')
+
+  if (line.count('"') - line.count('\\"')) % 2:
+    error(filename, linenum, 'readability/multiline_string', 5,
+          'Multi-line string ("...") found.  This lint script doesn\'t '
+          'do well with such strings, and may give bogus warnings.  '
+          'Use C++11 raw strings or concatenation instead.')
+
+
+# (non-threadsafe name, thread-safe alternative, validation pattern)
+#
+# The validation pattern is used to eliminate false positives such as:
+#  _rand();               // false positive due to substring match.
+#  ->rand();              // some member function rand().
+#  ACMRandom rand(seed);  // some variable named rand.
+#  ISAACRandom rand();    // another variable named rand.
+#
+# Basically we require the return value of these functions to be used
+# in some expression context on the same line by matching on some
+# operator before the function name.  This eliminates constructors and
+# member function calls.
+_UNSAFE_FUNC_PREFIX = r'(?:[-+*/=%^&|(<]\s*|>\s+)'
+_THREADING_LIST = (
+    ('asctime(', 'asctime_r(', _UNSAFE_FUNC_PREFIX + r'asctime\([^)]+\)'),
+    ('ctime(', 'ctime_r(', _UNSAFE_FUNC_PREFIX + r'ctime\([^)]+\)'),
+    ('getgrgid(', 'getgrgid_r(', _UNSAFE_FUNC_PREFIX + r'getgrgid\([^)]+\)'),
+    ('getgrnam(', 'getgrnam_r(', _UNSAFE_FUNC_PREFIX + r'getgrnam\([^)]+\)'),
+    ('getlogin(', 'getlogin_r(', _UNSAFE_FUNC_PREFIX + r'getlogin\(\)'),
+    ('getpwnam(', 'getpwnam_r(', _UNSAFE_FUNC_PREFIX + r'getpwnam\([^)]+\)'),
+    ('getpwuid(', 'getpwuid_r(', _UNSAFE_FUNC_PREFIX + r'getpwuid\([^)]+\)'),
+    ('gmtime(', 'gmtime_r(', _UNSAFE_FUNC_PREFIX + r'gmtime\([^)]+\)'),
+    ('localtime(', 'localtime_r(', _UNSAFE_FUNC_PREFIX + r'localtime\([^)]+\)'),
+    ('rand(', 'rand_r(', _UNSAFE_FUNC_PREFIX + r'rand\(\)'),
+    ('strtok(', 'strtok_r(',
+     _UNSAFE_FUNC_PREFIX + r'strtok\([^)]+\)'),
+    ('ttyname(', 'ttyname_r(', _UNSAFE_FUNC_PREFIX + r'ttyname\([^)]+\)'),
+    )
+
+
+def CheckPosixThreading(filename, clean_lines, linenum, error):
+  """Checks for calls to thread-unsafe functions.
+
+  Much code has been originally written without consideration of
+  multi-threading. Also, engineers are relying on their old experience;
+  they have learned posix before threading extensions were added. These
+  tests guide the engineers to use thread-safe functions (when using
+  posix directly).
+
+  Args:
+    filename: The name of the current file.
+    clean_lines: A CleansedLines instance containing the file.
+    linenum: The number of the line to check.
+    error: The function to call with any errors found.
+  """
+  line = clean_lines.elided[linenum]
+  for single_thread_func, multithread_safe_func, pattern in _THREADING_LIST:
+    # Additional pattern matching check to confirm that this is the
+    # function we are looking for
+    if Search(pattern, line):
+      error(filename, linenum, 'runtime/threadsafe_fn', 2,
+            'Consider using ' + multithread_safe_func +
+            '...) instead of ' + single_thread_func +
+            '...) for improved thread safety.')
+
+
+def CheckVlogArguments(filename, clean_lines, linenum, error):
+  """Checks that VLOG() is only used for defining a logging level.
+
+  For example, VLOG(2) is correct. VLOG(INFO), VLOG(WARNING), VLOG(ERROR), and
+  VLOG(FATAL) are not.
+
+  Args:
+    filename: The name of the current file.
+    clean_lines: A CleansedLines instance containing the file.
+    linenum: The number of the line to check.
+    error: The function to call with any errors found.
+  """
+  line = clean_lines.elided[linenum]
+  if Search(r'\bVLOG\((INFO|ERROR|WARNING|DFATAL|FATAL)\)', line):
+    error(filename, linenum, 'runtime/vlog', 5,
+          'VLOG() should be used with numeric verbosity level.  '
+          'Use LOG() if you want symbolic severity levels.')
+
+# Matches invalid increment: *count++, which moves pointer instead of
+# incrementing a value.
+_RE_PATTERN_INVALID_INCREMENT = re.compile(
+    r'^\s*\*\w+(\+\+|--);')
+
+
+def CheckInvalidIncrement(filename, clean_lines, linenum, error):
+  """Checks for invalid increment *count++.
+
+  For example following function:
+  void increment_counter(int* count) {
+    *count++;
+  }
+  is invalid, because it effectively does count++, moving pointer, and should
+  be replaced with ++*count, (*count)++ or *count += 1.
+
+  Args:
+    filename: The name of the current file.
+    clean_lines: A CleansedLines instance containing the file.
+    linenum: The number of the line to check.
+    error: The function to call with any errors found.
+  """
+  line = clean_lines.elided[linenum]
+  if _RE_PATTERN_INVALID_INCREMENT.match(line):
+    error(filename, linenum, 'runtime/invalid_increment', 5,
+          'Changing pointer instead of value (or unused value of operator*).')
+
+
+def IsMacroDefinition(clean_lines, linenum):
+  if Search(r'^#define', clean_lines[linenum]):
+    return True
+
+  if linenum > 0 and Search(r'\\$', clean_lines[linenum - 1]):
+    return True
+
+  return False
+
+
+def IsForwardClassDeclaration(clean_lines, linenum):
+  return Match(r'^\s*(\btemplate\b)*.*class\s+\w+;\s*$', clean_lines[linenum])
+
+
+class _BlockInfo(object):
+  """Stores information about a generic block of code."""
+
+  def __init__(self, seen_open_brace):
+    self.seen_open_brace = seen_open_brace
+    self.open_parentheses = 0
+    self.inline_asm = _NO_ASM
+    self.check_namespace_indentation = False
+
+  def CheckBegin(self, filename, clean_lines, linenum, error):
+    """Run checks that applies to text up to the opening brace.
+
+    This is mostly for checking the text after the class identifier
+    and the "{", usually where the base class is specified.  For other
+    blocks, there isn't much to check, so we always pass.
+
+    Args:
+      filename: The name of the current file.
+      clean_lines: A CleansedLines instance containing the file.
+      linenum: The number of the line to check.
+      error: The function to call with any errors found.
+    """
+    pass
+
+  def CheckEnd(self, filename, clean_lines, linenum, error):
+    """Run checks that applies to text after the closing brace.
+
+    This is mostly used for checking end of namespace comments.
+
+    Args:
+      filename: The name of the current file.
+      clean_lines: A CleansedLines instance containing the file.
+      linenum: The number of the line to check.
+      error: The function to call with any errors found.
+    """
+    pass
+
+  def IsBlockInfo(self):
+    """Returns true if this block is a _BlockInfo.
+
+    This is convenient for verifying that an object is an instance of
+    a _BlockInfo, but not an instance of any of the derived classes.
+
+    Returns:
+      True for this class, False for derived classes.
+    """
+    return self.__class__ == _BlockInfo
+
+
+class _ExternCInfo(_BlockInfo):
+  """Stores information about an 'extern "C"' block."""
+
+  def __init__(self):
+    _BlockInfo.__init__(self, True)
+
+
+class _ClassInfo(_BlockInfo):
+  """Stores information about a class."""
+
+  def __init__(self, name, class_or_struct, clean_lines, linenum):
+    _BlockInfo.__init__(self, False)
+    self.name = name
+    self.starting_linenum = linenum
+    self.is_derived = False
+    self.check_namespace_indentation = True
+    if class_or_struct == 'struct':
+      self.access = 'public'
+      self.is_struct = True
+    else:
+      self.access = 'private'
+      self.is_struct = False
+
+    # Remember initial indentation level for this class.  Using raw_lines here
+    # instead of elided to account for leading comments.
+    self.class_indent = GetIndentLevel(clean_lines.raw_lines[linenum])
+
+    # Try to find the end of the class.  This will be confused by things like:
+    #   class A {
+    #   } *x = { ...
+    #
+    # But it's still good enough for CheckSectionSpacing.
+    self.last_line = 0
+    depth = 0
+    for i in range(linenum, clean_lines.NumLines()):
+      line = clean_lines.elided[i]
+      depth += line.count('{') - line.count('}')
+      if not depth:
+        self.last_line = i
+        break
+
+  def CheckBegin(self, filename, clean_lines, linenum, error):
+    # Look for a bare ':'
+    if Search('(^|[^:]):($|[^:])', clean_lines.elided[linenum]):
+      self.is_derived = True
+
+  def CheckEnd(self, filename, clean_lines, linenum, error):
+    # If there is a DISALLOW macro, it should appear near the end of
+    # the class.
+    seen_last_thing_in_class = False
+    for i in xrange(linenum - 1, self.starting_linenum, -1):
+      match = Search(
+          r'\b(DISALLOW_COPY_AND_ASSIGN|DISALLOW_IMPLICIT_CONSTRUCTORS)\(' +
+          self.name + r'\)',
+          clean_lines.elided[i])
+      if match:
+        if seen_last_thing_in_class:
+          error(filename, i, 'readability/constructors', 3,
+                match.group(1) + ' should be the last thing in the class')
+        break
+
+      if not Match(r'^\s*$', clean_lines.elided[i]):
+        seen_last_thing_in_class = True
+
+    # Check that closing brace is aligned with beginning of the class.
+    # Only do this if the closing brace is indented by only whitespaces.
+    # This means we will not check single-line class definitions.
+    indent = Match(r'^( *)\}', clean_lines.elided[linenum])
+    if indent and len(indent.group(1)) != self.class_indent:
+      if self.is_struct:
+        parent = 'struct ' + self.name
+      else:
+        parent = 'class ' + self.name
+      error(filename, linenum, 'whitespace/indent', 3,
+            'Closing brace should be aligned with beginning of %s' % parent)
+
+
+class _NamespaceInfo(_BlockInfo):
+  """Stores information about a namespace."""
+
+  def __init__(self, name, linenum):
+    _BlockInfo.__init__(self, False)
+    self.name = name or ''
+    self.starting_linenum = linenum
+    self.check_namespace_indentation = True
+
+  def CheckEnd(self, filename, clean_lines, linenum, error):
+    """Check end of namespace comments."""
+    line = clean_lines.raw_lines[linenum]
+
+    # Check how many lines is enclosed in this namespace.  Don't issue
+    # warning for missing namespace comments if there aren't enough
+    # lines.  However, do apply checks if there is already an end of
+    # namespace comment and it's incorrect.
+    #
+    # TODO(unknown): We always want to check end of namespace comments
+    # if a namespace is large, but sometimes we also want to apply the
+    # check if a short namespace contained nontrivial things (something
+    # other than forward declarations).  There is currently no logic on
+    # deciding what these nontrivial things are, so this check is
+    # triggered by namespace size only, which works most of the time.
+    if (linenum - self.starting_linenum < 10
+        and not Match(r'};*\s*(//|/\*).*\bnamespace\b', line)):
+      return
+
+    # Look for matching comment at end of namespace.
+    #
+    # Note that we accept C style "/* */" comments for terminating
+    # namespaces, so that code that terminate namespaces inside
+    # preprocessor macros can be cpplint clean.
+    #
+    # We also accept stuff like "// end of namespace <name>." with the
+    # period at the end.
+    #
+    # Besides these, we don't accept anything else, otherwise we might
+    # get false negatives when existing comment is a substring of the
+    # expected namespace.
+    if self.name:
+      # Named namespace
+      if not Match((r'};*\s*(//|/\*).*\bnamespace\s+' + re.escape(self.name) +
+                    r'[\*/\.\\\s]*$'),
+                   line):
+        error(filename, linenum, 'readability/namespace', 5,
+              'Namespace should be terminated with "// namespace %s"' %
+              self.name)
+    else:
+      # Anonymous namespace
+      if not Match(r'};*\s*(//|/\*).*\bnamespace[\*/\.\\\s]*$', line):
+        # If "// namespace anonymous" or "// anonymous namespace (more text)",
+        # mention "// anonymous namespace" as an acceptable form
+        if Match(r'}.*\b(namespace anonymous|anonymous namespace)\b', line):
+          error(filename, linenum, 'readability/namespace', 5,
+                'Anonymous namespace should be terminated with "// namespace"'
+                ' or "// anonymous namespace"')
+        else:
+          error(filename, linenum, 'readability/namespace', 5,
+                'Anonymous namespace should be terminated with "// namespace"')
+
+
+class _PreprocessorInfo(object):
+  """Stores checkpoints of nesting stacks when #if/#else is seen."""
+
+  def __init__(self, stack_before_if):
+    # The entire nesting stack before #if
+    self.stack_before_if = stack_before_if
+
+    # The entire nesting stack up to #else
+    self.stack_before_else = []
+
+    # Whether we have already seen #else or #elif
+    self.seen_else = False
+
+
+class NestingState(object):
+  """Holds states related to parsing braces."""
+
+  def __init__(self):
+    # Stack for tracking all braces.  An object is pushed whenever we
+    # see a "{", and popped when we see a "}".  Only 3 types of
+    # objects are possible:
+    # - _ClassInfo: a class or struct.
+    # - _NamespaceInfo: a namespace.
+    # - _BlockInfo: some other type of block.
+    self.stack = []
+
+    # Top of the previous stack before each Update().
+    #
+    # Because the nesting_stack is updated at the end of each line, we
+    # had to do some convoluted checks to find out what is the current
+    # scope at the beginning of the line.  This check is simplified by
+    # saving the previous top of nesting stack.
+    #
+    # We could save the full stack, but we only need the top.  Copying
+    # the full nesting stack would slow down cpplint by ~10%.
+    self.previous_stack_top = []
+
+    # Stack of _PreprocessorInfo objects.
+    self.pp_stack = []
+
+  def SeenOpenBrace(self):
+    """Check if we have seen the opening brace for the innermost block.
+
+    Returns:
+      True if we have seen the opening brace, False if the innermost
+      block is still expecting an opening brace.
+    """
+    return (not self.stack) or self.stack[-1].seen_open_brace
+
+  def InNamespaceBody(self):
+    """Check if we are currently one level inside a namespace body.
+
+    Returns:
+      True if top of the stack is a namespace block, False otherwise.
+    """
+    return self.stack and isinstance(self.stack[-1], _NamespaceInfo)
+
+  def InExternC(self):
+    """Check if we are currently one level inside an 'extern "C"' block.
+
+    Returns:
+      True if top of the stack is an extern block, False otherwise.
+    """
+    return self.stack and isinstance(self.stack[-1], _ExternCInfo)
+
+  def InClassDeclaration(self):
+    """Check if we are currently one level inside a class or struct declaration.
+
+    Returns:
+      True if top of the stack is a class/struct, False otherwise.
+    """
+    return self.stack and isinstance(self.stack[-1], _ClassInfo)
+
+  def InAsmBlock(self):
+    """Check if we are currently one level inside an inline ASM block.
+
+    Returns:
+      True if the top of the stack is a block containing inline ASM.
+    """
+    return self.stack and self.stack[-1].inline_asm != _NO_ASM
+
+  def InTemplateArgumentList(self, clean_lines, linenum, pos):
+    """Check if current position is inside template argument list.
+
+    Args:
+      clean_lines: A CleansedLines instance containing the file.
+      linenum: The number of the line to check.
+      pos: position just after the suspected template argument.
+    Returns:
+      True if (linenum, pos) is inside template arguments.
+    """
+    while linenum < clean_lines.NumLines():
+      # Find the earliest character that might indicate a template argument
+      line = clean_lines.elided[linenum]
+      match = Match(r'^[^{};=\[\]\.<>]*(.)', line[pos:])
+      if not match:
+        linenum += 1
+        pos = 0
+        continue
+      token = match.group(1)
+      pos += len(match.group(0))
+
+      # These things do not look like template argument list:
+      #   class Suspect {
+      #   class Suspect x; }
+      if token in ('{', '}', ';'): return False
+
+      # These things look like template argument list:
+      #   template <class Suspect>
+      #   template <class Suspect = default_value>
+      #   template <class Suspect[]>
+      #   template <class Suspect...>
+      if token in ('>', '=', '[', ']', '.'): return True
+
+      # Check if token is an unmatched '<'.
+      # If not, move on to the next character.
+      if token != '<':
+        pos += 1
+        if pos >= len(line):
+          linenum += 1
+          pos = 0
+        continue
+
+      # We can't be sure if we just find a single '<', and need to
+      # find the matching '>'.
+      (_, end_line, end_pos) = CloseExpression(clean_lines, linenum, pos - 1)
+      if end_pos < 0:
+        # Not sure if template argument list or syntax error in file
+        return False
+      linenum = end_line
+      pos = end_pos
+    return False
+
+  def UpdatePreprocessor(self, line):
+    """Update preprocessor stack.
+
+    We need to handle preprocessors due to classes like this:
+      #ifdef SWIG
+      struct ResultDetailsPageElementExtensionPoint {
+      #else
+      struct ResultDetailsPageElementExtensionPoint : public Extension {
+      #endif
+
+    We make the following assumptions (good enough for most files):
+    - Preprocessor condition evaluates to true from #if up to first
+      #else/#elif/#endif.
+
+    - Preprocessor condition evaluates to false from #else/#elif up
+      to #endif.  We still perform lint checks on these lines, but
+      these do not affect nesting stack.
+
+    Args:
+      line: current line to check.
+    """
+    if Match(r'^\s*#\s*(if|ifdef|ifndef)\b', line):
+      # Beginning of #if block, save the nesting stack here.  The saved
+      # stack will allow us to restore the parsing state in the #else case.
+      self.pp_stack.append(_PreprocessorInfo(copy.deepcopy(self.stack)))
+    elif Match(r'^\s*#\s*(else|elif)\b', line):
+      # Beginning of #else block
+      if self.pp_stack:
+        if not self.pp_stack[-1].seen_else:
+          # This is the first #else or #elif block.  Remember the
+          # whole nesting stack up to this point.  This is what we
+          # keep after the #endif.
+          self.pp_stack[-1].seen_else = True
+          self.pp_stack[-1].stack_before_else = copy.deepcopy(self.stack)
+
+        # Restore the stack to how it was before the #if
+        self.stack = copy.deepcopy(self.pp_stack[-1].stack_before_if)
+      else:
+        # TODO(unknown): unexpected #else, issue warning?
+        pass
+    elif Match(r'^\s*#\s*endif\b', line):
+      # End of #if or #else blocks.
+      if self.pp_stack:
+        # If we saw an #else, we will need to restore the nesting
+        # stack to its former state before the #else, otherwise we
+        # will just continue from where we left off.
+        if self.pp_stack[-1].seen_else:
+          # Here we can just use a shallow copy since we are the last
+          # reference to it.
+          self.stack = self.pp_stack[-1].stack_before_else
+        # Drop the corresponding #if
+        self.pp_stack.pop()
+      else:
+        # TODO(unknown): unexpected #endif, issue warning?
+        pass
+
+  # TODO(unknown): Update() is too long, but we will refactor later.
+  def Update(self, filename, clean_lines, linenum, error):
+    """Update nesting state with current line.
+
+    Args:
+      filename: The name of the current file.
+      clean_lines: A CleansedLines instance containing the file.
+      linenum: The number of the line to check.
+      error: The function to call with any errors found.
+    """
+    line = clean_lines.elided[linenum]
+
+    # Remember top of the previous nesting stack.
+    #
+    # The stack is always pushed/popped and not modified in place, so
+    # we can just do a shallow copy instead of copy.deepcopy.  Using
+    # deepcopy would slow down cpplint by ~28%.
+    if self.stack:
+      self.previous_stack_top = self.stack[-1]
+    else:
+      self.previous_stack_top = None
+
+    # Update pp_stack
+    self.UpdatePreprocessor(line)
+
+    # Count parentheses.  This is to avoid adding struct arguments to
+    # the nesting stack.
+    if self.stack:
+      inner_block = self.stack[-1]
+      depth_change = line.count('(') - line.count(')')
+      inner_block.open_parentheses += depth_change
+
+      # Also check if we are starting or ending an inline assembly block.
+      if inner_block.inline_asm in (_NO_ASM, _END_ASM):
+        if (depth_change != 0 and
+            inner_block.open_parentheses == 1 and
+            _MATCH_ASM.match(line)):
+          # Enter assembly block
+          inner_block.inline_asm = _INSIDE_ASM
+        else:
+          # Not entering assembly block.  If previous line was _END_ASM,
+          # we will now shift to _NO_ASM state.
+          inner_block.inline_asm = _NO_ASM
+      elif (inner_block.inline_asm == _INSIDE_ASM and
+            inner_block.open_parentheses == 0):
+        # Exit assembly block
+        inner_block.inline_asm = _END_ASM
+
+    # Consume namespace declaration at the beginning of the line.  Do
+    # this in a loop so that we catch same line declarations like this:
+    #   namespace proto2 { namespace bridge { class MessageSet; } }
+    while True:
+      # Match start of namespace.  The "\b\s*" below catches namespace
+      # declarations even if it weren't followed by a whitespace, this
+      # is so that we don't confuse our namespace checker.  The
+      # missing spaces will be flagged by CheckSpacing.
+      namespace_decl_match = Match(r'^\s*namespace\b\s*([:\w]+)?(.*)$', line)
+      if not namespace_decl_match:
+        break
+
+      new_namespace = _NamespaceInfo(namespace_decl_match.group(1), linenum)
+      self.stack.append(new_namespace)
+
+      line = namespace_decl_match.group(2)
+      if line.find('{') != -1:
+        new_namespace.seen_open_brace = True
+        line = line[line.find('{') + 1:]
+
+    # Look for a class declaration in whatever is left of the line
+    # after parsing namespaces.  The regexp accounts for decorated classes
+    # such as in:
+    #   class LOCKABLE API Object {
+    #   };
+    class_decl_match = Match(
+        r'^(\s*(?:template\s*<[\w\s<>,:]*>\s*)?'
+        r'(class|struct)\s+(?:[A-Z_]+\s+)*(\w+(?:::\w+)*))'
+        r'(.*)$', line)
+    if (class_decl_match and
+        (not self.stack or self.stack[-1].open_parentheses == 0)):
+      # We do not want to accept classes that are actually template arguments:
+      #   template <class Ignore1,
+      #             class Ignore2 = Default<Args>,
+      #             template <Args> class Ignore3>
+      #   void Function() {};
+      #
+      # To avoid template argument cases, we scan forward and look for
+      # an unmatched '>'.  If we see one, assume we are inside a
+      # template argument list.
+      end_declaration = len(class_decl_match.group(1))
+      if not self.InTemplateArgumentList(clean_lines, linenum, end_declaration):
+        self.stack.append(_ClassInfo(
+            class_decl_match.group(3), class_decl_match.group(2),
+            clean_lines, linenum))
+        line = class_decl_match.group(4)
+
+    # If we have not yet seen the opening brace for the innermost block,
+    # run checks here.
+    if not self.SeenOpenBrace():
+      self.stack[-1].CheckBegin(filename, clean_lines, linenum, error)
+
+    # Update access control if we are inside a class/struct
+    if self.stack and isinstance(self.stack[-1], _ClassInfo):
+      classinfo = self.stack[-1]
+      access_match = Match(
+          r'^(.*)\b(public|private|protected|signals)(\s+(?:slots\s*)?)?'
+          r':(?:[^:]|$)',
+          line)
+      if access_match:
+        classinfo.access = access_match.group(2)
+
+        # Check that access keywords are indented +1 space.  Skip this
+        # check if the keywords are not preceded by whitespaces.
+        indent = access_match.group(1)
+        if (len(indent) != classinfo.class_indent + 1 and
+            Match(r'^\s*$', in

<TRUNCATED>

[3/7] parquet-cpp git commit: PARQUET-416: C++11 compilation, code reorg, libparquet and installation targets

Posted by no...@apache.org.

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/impala/rle-encoding.h
----------------------------------------------------------------------
diff --git a/src/impala/rle-encoding.h b/src/impala/rle-encoding.h
deleted file mode 100644
index 759f917..0000000
--- a/src/impala/rle-encoding.h
+++ /dev/null
@@ -1,417 +0,0 @@
-// Copyright 2012 Cloudera Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef IMPALA_RLE_ENCODING_H
-#define IMPALA_RLE_ENCODING_H
-
-#include <math.h>
-
-#include "impala/compiler-util.h"
-#include "impala/bit-stream-utils.inline.h"
-#include "impala/bit-util.h"
-#include "impala/logging.h"
-
-namespace impala {
-
-// Utility classes to do run length encoding (RLE) for fixed bit width values.  If runs
-// are sufficiently long, RLE is used, otherwise, the values are just bit-packed
-// (literal encoding).
-// For both types of runs, there is a byte-aligned indicator which encodes the length
-// of the run and the type of the run.
-// This encoding has the benefit that when there aren't any long enough runs, values
-// are always decoded at fixed (can be precomputed) bit offsets OR both the value and
-// the run length are byte aligned. This allows for very efficient decoding
-// implementations.
-// The encoding is:
-//    encoded-block := run*
-//    run := literal-run | repeated-run
-//    literal-run := literal-indicator < literal bytes >
-//    repeated-run := repeated-indicator < repeated value. padded to byte boundary >
-//    literal-indicator := varint_encode( number_of_groups << 1 | 1)
-//    repeated-indicator := varint_encode( number_of_repetitions << 1 )
-//
-// Each run is preceded by a varint. The varint's least significant bit is
-// used to indicate whether the run is a literal run or a repeated run. The rest
-// of the varint is used to determine the length of the run (eg how many times the
-// value repeats).
-//
-// In the case of literal runs, the run length is always a multiple of 8 (i.e. encode
-// in groups of 8), so that no matter the bit-width of the value, the sequence will end
-// on a byte boundary without padding.
-// Given that we know it is a multiple of 8, we store the number of 8-groups rather than
-// the actual number of encoded ints. (This means that the total number of encoded values
-// can not be determined from the encoded data, since the number of values in the last
-// group may not be a multiple of 8). For the last group of literal runs, we pad
-// the group to 8 with zeros. This allows for 8 at a time decoding on the read side
-// without the need for additional checks.
-//
-// There is a break-even point when it is more storage efficient to do run length
-// encoding.  For 1 bit-width values, that point is 8 values.  They require 2 bytes
-// for both the repeated encoding or the literal encoding.  This value can always
-// be computed based on the bit-width.
-// TODO: think about how to use this for strings.  The bit packing isn't quite the same.
-//
-// Examples with bit-width 1 (eg encoding booleans):
-// ----------------------------------------
-// 100 1s followed by 100 0s:
-// <varint(100 << 1)> <1, padded to 1 byte>  <varint(100 << 1)> <0, padded to 1 byte>  
-//  - (total 4 bytes)
-//
-// alternating 1s and 0s (200 total):
-// 200 ints = 25 groups of 8
-// <varint((25 << 1) | 1)> <25 bytes of values, bitpacked>  
-// (total 26 bytes, 1 byte overhead)
-//
-
-// Decoder class for RLE encoded data.
-class RleDecoder {
- public:
-  // Create a decoder object. buffer/buffer_len is the decoded data.
-  // bit_width is the width of each value (before encoding).
-  RleDecoder(const uint8_t* buffer, int buffer_len, int bit_width)
-    : bit_reader_(buffer, buffer_len),
-      bit_width_(bit_width),
-      current_value_(0),
-      repeat_count_(0),
-      literal_count_(0) {
-    DCHECK_GE(bit_width_, 0);
-    DCHECK_LE(bit_width_, 64);
-  }
-
-  RleDecoder() {}
-
-  // Gets the next value.  Returns false if there are no more.
-  template<typename T>
-  bool Get(T* val);
-
- private:
-  BitReader bit_reader_;
-  int bit_width_;
-  uint64_t current_value_;
-  uint32_t repeat_count_;
-  uint32_t literal_count_;
-};
-
-// Class to incrementally build the rle data.   This class does not allocate any memory.
-// The encoding has two modes: encoding repeated runs and literal runs.
-// If the run is sufficiently short, it is more efficient to encode as a literal run.
-// This class does so by buffering 8 values at a time.  If they are not all the same
-// they are added to the literal run.  If they are the same, they are added to the
-// repeated run.  When we switch modes, the previous run is flushed out.
-class RleEncoder {
- public:
-  // buffer/buffer_len: preallocated output buffer.
-  // bit_width: max number of bits for value.
-  // TODO: consider adding a min_repeated_run_length so the caller can control
-  // when values should be encoded as repeated runs.  Currently this is derived
-  // based on the bit_width, which can determine a storage optimal choice.
-  // TODO: allow 0 bit_width (and have dict encoder use it)
-  RleEncoder(uint8_t* buffer, int buffer_len, int bit_width)
-    : bit_width_(bit_width),
-      bit_writer_(buffer, buffer_len) {
-    DCHECK_GE(bit_width_, 1);
-    DCHECK_LE(bit_width_, 64);
-    max_run_byte_size_ = MinBufferSize(bit_width);
-    DCHECK_GE(buffer_len, max_run_byte_size_) << "Input buffer not big enough.";
-    Clear();
-  }
-
-  // Returns the minimum buffer size needed to use the encoder for 'bit_width'
-  // This is the maximum length of a single run for 'bit_width'.
-  // It is not valid to pass a buffer less than this length.
-  static int MinBufferSize(int bit_width) {
-    // 1 indicator byte and MAX_VALUES_PER_LITERAL_RUN 'bit_width' values.
-    int max_literal_run_size = 1 +
-        BitUtil::Ceil(MAX_VALUES_PER_LITERAL_RUN * bit_width, 8);
-    // Up to MAX_VLQ_BYTE_LEN indicator and a single 'bit_width' value.
-    int max_repeated_run_size = BitReader::MAX_VLQ_BYTE_LEN + BitUtil::Ceil(bit_width, 8);
-    return std::max(max_literal_run_size, max_repeated_run_size);
-  }
-
-  // Returns the maximum byte size it could take to encode 'num_values'.
-  static int MaxBufferSize(int bit_width, int num_values) {
-    int bytes_per_run = BitUtil::Ceil(bit_width * MAX_VALUES_PER_LITERAL_RUN, 8.0);
-    int num_runs = BitUtil::Ceil(num_values, MAX_VALUES_PER_LITERAL_RUN);
-    int literal_max_size = num_runs + num_runs * bytes_per_run;
-    int min_run_size = MinBufferSize(bit_width);
-    return std::max(min_run_size, literal_max_size) + min_run_size;
-  }
-
-  // Encode value.  Returns true if the value fits in buffer, false otherwise.
-  // This value must be representable with bit_width_ bits.
-  bool Put(uint64_t value);
-
-  // Flushes any pending values to the underlying buffer.
-  // Returns the total number of bytes written
-  int Flush();
-
-  // Resets all the state in the encoder.
-  void Clear();
-
-  // Returns pointer to underlying buffer
-  uint8_t* buffer() { return bit_writer_.buffer(); }
-  int32_t len() { return bit_writer_.bytes_written(); }
-
- private:
-  // Flushes any buffered values.  If this is part of a repeated run, this is largely
-  // a no-op.
-  // If it is part of a literal run, this will call FlushLiteralRun, which writes
-  // out the buffered literal values.
-  // If 'done' is true, the current run would be written even if it would normally
-  // have been buffered more.  This should only be called at the end, when the
-  // encoder has received all values even if it would normally continue to be
-  // buffered.
-  void FlushBufferedValues(bool done);
-
-  // Flushes literal values to the underlying buffer.  If update_indicator_byte,
-  // then the current literal run is complete and the indicator byte is updated.
-  void FlushLiteralRun(bool update_indicator_byte);
-
-  // Flushes a repeated run to the underlying buffer.
-  void FlushRepeatedRun();
-
-  // Checks and sets buffer_full_. This must be called after flushing a run to
-  // make sure there are enough bytes remaining to encode the next run.
-  void CheckBufferFull();
-
-  // The maximum number of values in a single literal run
-  // (number of groups encodable by a 1-byte indicator * 8)
-  static const int MAX_VALUES_PER_LITERAL_RUN = (1 << 6) * 8;
-
-  // Number of bits needed to encode the value.
-  const int bit_width_;
-
-  // Underlying buffer.
-  BitWriter bit_writer_;
-
-  // If true, the buffer is full and subsequent Put()'s will fail.
-  bool buffer_full_;
-
-  // The maximum byte size a single run can take.
-  int max_run_byte_size_;
-
-  // We need to buffer at most 8 values for literals.  This happens when the
-  // bit_width is 1 (so 8 values fit in one byte).
-  // TODO: generalize this to other bit widths
-  int64_t buffered_values_[8];
-
-  // Number of values in buffered_values_
-  int num_buffered_values_;
-
-  // The current (also last) value that was written and the count of how
-  // many times in a row that value has been seen.  This is maintained even
-  // if we are in a literal run.  If the repeat_count_ get high enough, we switch
-  // to encoding repeated runs.
-  int64_t current_value_;
-  int repeat_count_;
-
-  // Number of literals in the current run.  This does not include the literals
-  // that might be in buffered_values_.  Only after we've got a group big enough
-  // can we decide if they should part of the literal_count_ or repeat_count_
-  int literal_count_;
-
-  // Pointer to a byte in the underlying buffer that stores the indicator byte.
-  // This is reserved as soon as we need a literal run but the value is written
-  // when the literal run is complete.
-  uint8_t* literal_indicator_byte_;
-};
-
-template<typename T>
-inline bool RleDecoder::Get(T* val) {
-  if (UNLIKELY(literal_count_ == 0 && repeat_count_ == 0)) {
-    // Read the next run's indicator int, it could be a literal or repeated run
-    // The int is encoded as a vlq-encoded value.
-    uint64_t indicator_value = 0;
-    bool result = bit_reader_.GetVlqInt(&indicator_value);
-    if (!result) return false;
-
-    // lsb indicates if it is a literal run or repeated run
-    bool is_literal = indicator_value & 1;
-    if (is_literal) {
-      literal_count_ = (indicator_value >> 1) * 8;
-    } else {
-      repeat_count_ = indicator_value >> 1;
-      bool result = bit_reader_.GetAligned<T>(
-          BitUtil::Ceil(bit_width_, 8), reinterpret_cast<T*>(&current_value_));
-      DCHECK(result);
-    }
-  }
-
-  if (LIKELY(repeat_count_ > 0)) {
-    *val = current_value_;
-    --repeat_count_;
-  } else {
-    DCHECK(literal_count_ > 0);
-    bool result = bit_reader_.GetValue(bit_width_, val);
-    DCHECK(result);
-    --literal_count_;
-  }
-
-  return true;
-}
-
-// This function buffers input values 8 at a time.  After seeing all 8 values,
-// it decides whether they should be encoded as a literal or repeated run.
-inline bool RleEncoder::Put(uint64_t value) {
-  DCHECK(bit_width_ == 64 || value < (1LL << bit_width_));
-  if (UNLIKELY(buffer_full_)) return false;
-
-  if (LIKELY(current_value_ == value)) {
-    ++repeat_count_;
-    if (repeat_count_ > 8) {
-      // This is just a continuation of the current run, no need to buffer the
-      // values.
-      // Note that this is the fast path for long repeated runs.
-      return true;
-    }
-  } else {
-    if (repeat_count_ >= 8) {
-      // We had a run that was long enough but it has ended.  Flush the
-      // current repeated run.
-      DCHECK_EQ(literal_count_, 0);
-      FlushRepeatedRun();
-    }
-    repeat_count_ = 1;
-    current_value_ = value;
-  }
-
-  buffered_values_[num_buffered_values_] = value;
-  if (++num_buffered_values_ == 8) {
-    DCHECK_EQ(literal_count_ % 8, 0);
-    FlushBufferedValues(false);
-  }
-  return true;
-}
-
-inline void RleEncoder::FlushLiteralRun(bool update_indicator_byte) {
-  if (literal_indicator_byte_ == NULL) {
-    // The literal indicator byte has not been reserved yet, get one now.
-    literal_indicator_byte_ = bit_writer_.GetNextBytePtr();
-    DCHECK(literal_indicator_byte_ != NULL);
-  }
-
-  // Write all the buffered values as bit packed literals
-  for (int i = 0; i < num_buffered_values_; ++i) {
-    bool success = bit_writer_.PutValue(buffered_values_[i], bit_width_);
-    DCHECK(success) << "There is a bug in using CheckBufferFull()";
-  }
-  num_buffered_values_ = 0;
-
-  if (update_indicator_byte) {
-    // At this point we need to write the indicator byte for the literal run.
-    // We only reserve one byte, to allow for streaming writes of literal values.
-    // The logic makes sure we flush literal runs often enough to not overrun
-    // the 1 byte.
-    DCHECK_EQ(literal_count_ % 8, 0);
-    int num_groups = literal_count_ / 8;
-    int32_t indicator_value = (num_groups << 1) | 1;
-    DCHECK_EQ(indicator_value & 0xFFFFFF00, 0);
-    *literal_indicator_byte_ = indicator_value;
-    literal_indicator_byte_ = NULL;
-    literal_count_ = 0;
-    CheckBufferFull();
-  }
-}
-
-inline void RleEncoder::FlushRepeatedRun() {
-  DCHECK_GT(repeat_count_, 0);
-  bool result = true;
-  // The lsb of 0 indicates this is a repeated run
-  int32_t indicator_value = repeat_count_ << 1 | 0;
-  result &= bit_writer_.PutVlqInt(indicator_value);
-  result &= bit_writer_.PutAligned(current_value_, BitUtil::Ceil(bit_width_, 8));
-  DCHECK(result);
-  num_buffered_values_ = 0;
-  repeat_count_ = 0;
-  CheckBufferFull();
-}
-
-// Flush the values that have been buffered.  At this point we decide whether
-// we need to switch between the run types or continue the current one.
-inline void RleEncoder::FlushBufferedValues(bool done) {
-  if (repeat_count_ >= 8) {
-    // Clear the buffered values.  They are part of the repeated run now and we
-    // don't want to flush them out as literals.
-    num_buffered_values_ = 0;
-    if (literal_count_ != 0) {
-      // There was a current literal run.  All the values in it have been flushed
-      // but we still need to update the indicator byte.
-      DCHECK_EQ(literal_count_ % 8, 0);
-      DCHECK_EQ(repeat_count_, 8);
-      FlushLiteralRun(true);
-    }
-    DCHECK_EQ(literal_count_, 0);
-    return;
-  }
-
-  literal_count_ += num_buffered_values_;
-  DCHECK_EQ(literal_count_ % 8, 0);
-  int num_groups = literal_count_ / 8;
-  if (num_groups + 1 >= (1 << 6)) {
-    // We need to start a new literal run because the indicator byte we've reserved
-    // cannot store more values.
-    DCHECK(literal_indicator_byte_ != NULL);
-    FlushLiteralRun(true);
-  } else {
-    FlushLiteralRun(done);
-  }
-  repeat_count_ = 0;
-}
-
-inline int RleEncoder::Flush() {
-  if (literal_count_ > 0 || repeat_count_ > 0 || num_buffered_values_ > 0) {
-    bool all_repeat = literal_count_ == 0 &&
-        (repeat_count_ == num_buffered_values_ || num_buffered_values_ == 0);
-    // There is something pending, figure out if it's a repeated or literal run
-    if (repeat_count_ > 0 && all_repeat) {
-      FlushRepeatedRun();
-    } else  {
-      DCHECK_EQ(literal_count_ % 8, 0);
-      // Buffer the last group of literals to 8 by padding with 0s.
-      for (; num_buffered_values_ != 0 && num_buffered_values_ < 8;
-          ++num_buffered_values_) {
-        buffered_values_[num_buffered_values_] = 0;
-      }
-      literal_count_ += num_buffered_values_;
-      FlushLiteralRun(true);
-      repeat_count_ = 0;
-    }
-  }
-  bit_writer_.Flush();
-  DCHECK_EQ(num_buffered_values_, 0);
-  DCHECK_EQ(literal_count_, 0);
-  DCHECK_EQ(repeat_count_, 0);
-
-  return bit_writer_.bytes_written();
-}
-
-inline void RleEncoder::CheckBufferFull() {
-  int bytes_written = bit_writer_.bytes_written();
-  if (bytes_written + max_run_byte_size_ > bit_writer_.buffer_len()) {
-    buffer_full_ = true;
-  }
-}
-
-inline void RleEncoder::Clear() {
-  buffer_full_ = false;
-  current_value_ = 0;
-  repeat_count_ = 0;
-  num_buffered_values_ = 0;
-  literal_count_ = 0;
-  literal_indicator_byte_ = NULL;
-  bit_writer_.Clear();
-}
-
-}
-#endif

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/parquet.cc
----------------------------------------------------------------------
diff --git a/src/parquet.cc b/src/parquet.cc
index 6c939ae..f71d32b 100644
--- a/src/parquet.cc
+++ b/src/parquet.cc
@@ -13,9 +13,10 @@
 // limitations under the License.
 
 #include "parquet/parquet.h"
-#include "encodings/encodings.h"
-#include "compression/codec.h"
+#include "parquet/encodings/encodings.h"
+#include "parquet/compression/codec.h"
 
+#include <algorithm>
 #include <string>
 #include <string.h>
 
@@ -23,18 +24,21 @@
 
 const int DATA_PAGE_SIZE = 64 * 1024;
 
-using namespace boost;
-using namespace parquet;
-using namespace std;
-
 namespace parquet_cpp {
 
+using parquet::CompressionCodec;
+using parquet::Encoding;
+using parquet::FieldRepetitionType;
+using parquet::PageType;
+using parquet::SchemaElement;
+using parquet::Type;
+
 InMemoryInputStream::InMemoryInputStream(const uint8_t* buffer, int64_t len) :
   buffer_(buffer), len_(len), offset_(0) {
 }
 
 const uint8_t* InMemoryInputStream::Peek(int num_to_peek, int* num_bytes) {
-  *num_bytes = ::min(static_cast<int64_t>(num_to_peek), len_ - offset_);
+  *num_bytes = std::min(static_cast<int64_t>(num_to_peek), len_ - offset_);
   return buffer_ + offset_;
 }
 
@@ -47,7 +51,7 @@ const uint8_t* InMemoryInputStream::Read(int num_to_read, int* num_bytes) {
 ColumnReader::~ColumnReader() {
 }
 
-ColumnReader::ColumnReader(const ColumnMetaData* metadata,
+ColumnReader::ColumnReader(const parquet::ColumnMetaData* metadata,
     const SchemaElement* schema, InputStream* stream)
   : metadata_(metadata),
     schema_(schema),
@@ -96,7 +100,7 @@ ColumnReader::ColumnReader(const ColumnMetaData* metadata,
 
 void ColumnReader::BatchDecode() {
   buffered_values_offset_ = 0;
-  uint8_t* buf= &values_buffer_[0];
+  uint8_t* buf = &values_buffer_[0];
   int batch_size = config_.batch_size;
   switch (metadata_->type) {
     case parquet::Type::BOOLEAN:
@@ -164,7 +168,7 @@ bool ColumnReader::ReadNewPage() {
     }
 
     if (current_page_header_.type == PageType::DICTIONARY_PAGE) {
-      boost::unordered_map<Encoding::type, boost::shared_ptr<Decoder> >::iterator it =
+      std::unordered_map<Encoding::type, std::shared_ptr<Decoder> >::iterator it =
           decoders_.find(Encoding::RLE_DICTIONARY);
       if (it != decoders_.end()) {
         throw ParquetException("Column cannot have more than one dictionary.");
@@ -173,7 +177,7 @@ bool ColumnReader::ReadNewPage() {
       PlainDecoder dictionary(schema_->type);
       dictionary.SetData(current_page_header_.dictionary_page_header.num_values,
           buffer, uncompressed_len);
-      boost::shared_ptr<Decoder> decoder(
+      std::shared_ptr<Decoder> decoder(
           new DictionaryDecoder(schema_->type, &dictionary));
       decoders_[Encoding::RLE_DICTIONARY] = decoder;
       current_decoder_ = decoders_[Encoding::RLE_DICTIONARY].get();
@@ -187,7 +191,7 @@ bool ColumnReader::ReadNewPage() {
         int num_definition_bytes = *reinterpret_cast<const uint32_t*>(buffer);
         buffer += sizeof(uint32_t);
         definition_level_decoder_.reset(
-            new impala::RleDecoder(buffer, num_definition_bytes, 1));
+            new RleDecoder(buffer, num_definition_bytes, 1));
         buffer += num_definition_bytes;
         uncompressed_len -= sizeof(uint32_t);
         uncompressed_len -= num_definition_bytes;
@@ -200,14 +204,14 @@ bool ColumnReader::ReadNewPage() {
       Encoding::type encoding = current_page_header_.data_page_header.encoding;
       if (IsDictionaryIndexEncoding(encoding)) encoding = Encoding::RLE_DICTIONARY;
 
-      boost::unordered_map<Encoding::type, boost::shared_ptr<Decoder> >::iterator it =
+      std::unordered_map<Encoding::type, std::shared_ptr<Decoder> >::iterator it =
           decoders_.find(encoding);
       if (it != decoders_.end()) {
         current_decoder_ = it->second.get();
       } else {
         switch (encoding) {
           case Encoding::PLAIN: {
-            boost::shared_ptr<Decoder> decoder;
+            std::shared_ptr<Decoder> decoder;
             if (schema_->type == Type::BOOLEAN) {
               decoder.reset(new BoolDecoder());
             } else {
@@ -239,5 +243,4 @@ bool ColumnReader::ReadNewPage() {
   return true;
 }
 
-}
-
+} // namespace parquet_cpp

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/parquet/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/src/parquet/CMakeLists.txt b/src/parquet/CMakeLists.txt
new file mode 100644
index 0000000..11eaeb6
--- /dev/null
+++ b/src/parquet/CMakeLists.txt
@@ -0,0 +1,18 @@
+# Copyright 2015 Cloudera Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Headers: top level
+install(FILES
+  parquet.h
+  DESTINATION include/parquet)

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/parquet/compression/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/src/parquet/compression/CMakeLists.txt b/src/parquet/compression/CMakeLists.txt
new file mode 100644
index 0000000..291ef03
--- /dev/null
+++ b/src/parquet/compression/CMakeLists.txt
@@ -0,0 +1,30 @@
+# Copyright 2012 Cloudera Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+add_library(parquet_compression STATIC
+  lz4-codec.cc
+  snappy-codec.cc
+)
+target_link_libraries(parquet_compression
+  lz4static
+  snappystatic)
+
+set_target_properties(parquet_compression
+  PROPERTIES
+  LIBRARY_OUTPUT_DIRECTORY "${BUILD_OUTPUT_ROOT_DIRECTORY}")
+
+# Headers: compression
+install(FILES
+  codec.h
+  DESTINATION include/parquet/compression)

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/parquet/compression/codec.h
----------------------------------------------------------------------
diff --git a/src/parquet/compression/codec.h b/src/parquet/compression/codec.h
new file mode 100644
index 0000000..8166847
--- /dev/null
+++ b/src/parquet/compression/codec.h
@@ -0,0 +1,71 @@
+// Copyright 2012 Cloudera Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef PARQUET_COMPRESSION_CODEC_H
+#define PARQUET_COMPRESSION_CODEC_H
+
+#include "parquet/parquet.h"
+
+#include <cstdint>
+#include "parquet/thrift/parquet_constants.h"
+#include "parquet/thrift/parquet_types.h"
+
+namespace parquet_cpp {
+
+class Codec {
+ public:
+  virtual ~Codec() {}
+  virtual void Decompress(int input_len, const uint8_t* input,
+      int output_len, uint8_t* output_buffer) = 0;
+
+  virtual int Compress(int input_len, const uint8_t* input,
+      int output_buffer_len, uint8_t* output_buffer) = 0;
+
+  virtual int MaxCompressedLen(int input_len, const uint8_t* input) = 0;
+
+  virtual const char* name() const = 0;
+};
+
+
+// Snappy codec.
+class SnappyCodec : public Codec {
+ public:
+  virtual void Decompress(int input_len, const uint8_t* input,
+      int output_len, uint8_t* output_buffer);
+
+  virtual int Compress(int input_len, const uint8_t* input,
+      int output_buffer_len, uint8_t* output_buffer);
+
+  virtual int MaxCompressedLen(int input_len, const uint8_t* input);
+
+  virtual const char* name() const { return "snappy"; }
+};
+
+// Lz4 codec.
+class Lz4Codec : public Codec {
+ public:
+  virtual void Decompress(int input_len, const uint8_t* input,
+      int output_len, uint8_t* output_buffer);
+
+  virtual int Compress(int input_len, const uint8_t* input,
+      int output_buffer_len, uint8_t* output_buffer);
+
+  virtual int MaxCompressedLen(int input_len, const uint8_t* input);
+
+  virtual const char* name() const { return "lz4"; }
+};
+
+} // namespace parquet_cpp
+
+#endif

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/parquet/compression/lz4-codec.cc
----------------------------------------------------------------------
diff --git a/src/parquet/compression/lz4-codec.cc b/src/parquet/compression/lz4-codec.cc
new file mode 100644
index 0000000..6655387
--- /dev/null
+++ b/src/parquet/compression/lz4-codec.cc
@@ -0,0 +1,40 @@
+// Copyright 2012 Cloudera Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "parquet/compression/codec.h"
+
+#include <lz4.h>
+
+namespace parquet_cpp {
+
+void Lz4Codec::Decompress(int input_len, const uint8_t* input,
+      int output_len, uint8_t* output_buffer) {
+  int n = LZ4_uncompress(reinterpret_cast<const char*>(input),
+      reinterpret_cast<char*>(output_buffer), output_len);
+  if (n != input_len) {
+    throw parquet_cpp::ParquetException("Corrupt lz4 compressed data.");
+  }
+}
+
+int Lz4Codec::MaxCompressedLen(int input_len, const uint8_t* input) {
+  return LZ4_compressBound(input_len);
+}
+
+int Lz4Codec::Compress(int input_len, const uint8_t* input,
+    int output_buffer_len, uint8_t* output_buffer) {
+  return LZ4_compress(reinterpret_cast<const char*>(input),
+      reinterpret_cast<char*>(output_buffer), input_len);
+}
+
+} // namespace parquet_cpp

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/parquet/compression/snappy-codec.cc
----------------------------------------------------------------------
diff --git a/src/parquet/compression/snappy-codec.cc b/src/parquet/compression/snappy-codec.cc
new file mode 100644
index 0000000..0633d47
--- /dev/null
+++ b/src/parquet/compression/snappy-codec.cc
@@ -0,0 +1,42 @@
+// Copyright 2012 Cloudera Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "parquet/compression/codec.h"
+
+#include <snappy.h>
+
+namespace parquet_cpp {
+
+void SnappyCodec::Decompress(int input_len, const uint8_t* input,
+      int output_len, uint8_t* output_buffer) {
+  if (!snappy::RawUncompress(reinterpret_cast<const char*>(input),
+      static_cast<size_t>(input_len), reinterpret_cast<char*>(output_buffer))) {
+    throw parquet_cpp::ParquetException("Corrupt snappy compressed data.");
+  }
+}
+
+int SnappyCodec::MaxCompressedLen(int input_len, const uint8_t* input) {
+  return snappy::MaxCompressedLength(input_len);
+}
+
+int SnappyCodec::Compress(int input_len, const uint8_t* input,
+    int output_buffer_len, uint8_t* output_buffer) {
+  size_t output_len;
+  snappy::RawCompress(reinterpret_cast<const char*>(input),
+      static_cast<size_t>(input_len), reinterpret_cast<char*>(output_buffer),
+      &output_len);
+  return output_len;
+}
+
+} // namespace parquet_cpp

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/parquet/encodings/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/src/parquet/encodings/CMakeLists.txt b/src/parquet/encodings/CMakeLists.txt
new file mode 100644
index 0000000..72baf48
--- /dev/null
+++ b/src/parquet/encodings/CMakeLists.txt
@@ -0,0 +1,24 @@
+# Copyright 2015 Cloudera Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Headers: encodings
+install(FILES
+  encodings.h
+  bool-encoding.h
+  delta-bit-pack-encoding.h
+  delta-byte-array-encoding.h
+  delta-length-byte-array-encoding.h
+  dictionary-encoding.h
+  plain-encoding.h
+  DESTINATION include/parquet/encodings)

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/parquet/encodings/bool-encoding.h
----------------------------------------------------------------------
diff --git a/src/parquet/encodings/bool-encoding.h b/src/parquet/encodings/bool-encoding.h
new file mode 100644
index 0000000..8eb55bc
--- /dev/null
+++ b/src/parquet/encodings/bool-encoding.h
@@ -0,0 +1,48 @@
+// Copyright 2012 Cloudera Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef PARQUET_BOOL_ENCODING_H
+#define PARQUET_BOOL_ENCODING_H
+
+#include "parquet/encodings/encodings.h"
+
+#include <algorithm>
+
+namespace parquet_cpp {
+
+class BoolDecoder : public Decoder {
+ public:
+  BoolDecoder() : Decoder(parquet::Type::BOOLEAN, parquet::Encoding::PLAIN) { }
+
+  virtual void SetData(int num_values, const uint8_t* data, int len) {
+    num_values_ = num_values;
+    decoder_ = RleDecoder(data, len, 1);
+  }
+
+  virtual int GetBool(bool* buffer, int max_values) {
+    max_values = std::min(max_values, num_values_);
+    for (int i = 0; i < max_values; ++i) {
+      if (!decoder_.Get(&buffer[i])) ParquetException::EofException();
+    }
+    num_values_ -= max_values;
+    return max_values;
+  }
+
+ private:
+  RleDecoder decoder_;
+};
+
+} // namespace parquet_cpp
+
+#endif

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/parquet/encodings/delta-bit-pack-encoding.h
----------------------------------------------------------------------
diff --git a/src/parquet/encodings/delta-bit-pack-encoding.h b/src/parquet/encodings/delta-bit-pack-encoding.h
new file mode 100644
index 0000000..77a3b26
--- /dev/null
+++ b/src/parquet/encodings/delta-bit-pack-encoding.h
@@ -0,0 +1,116 @@
+// Copyright 2012 Cloudera Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef PARQUET_DELTA_BIT_PACK_ENCODING_H
+#define PARQUET_DELTA_BIT_PACK_ENCODING_H
+
+#include "parquet/encodings/encodings.h"
+
+#include <algorithm>
+#include <vector>
+
+namespace parquet_cpp {
+
+class DeltaBitPackDecoder : public Decoder {
+ public:
+  explicit DeltaBitPackDecoder(const parquet::Type::type& type)
+    : Decoder(type, parquet::Encoding::DELTA_BINARY_PACKED) {
+    if (type != parquet::Type::INT32 && type != parquet::Type::INT64) {
+      throw ParquetException("Delta bit pack encoding should only be for integer data.");
+    }
+  }
+
+  virtual void SetData(int num_values, const uint8_t* data, int len) {
+    num_values_ = num_values;
+    decoder_ = BitReader(data, len);
+    values_current_block_ = 0;
+    values_current_mini_block_ = 0;
+  }
+
+  virtual int GetInt32(int32_t* buffer, int max_values) {
+    return GetInternal(buffer, max_values);
+  }
+
+  virtual int GetInt64(int64_t* buffer, int max_values) {
+    return GetInternal(buffer, max_values);
+  }
+
+ private:
+  void InitBlock() {
+    uint64_t block_size;
+    if (!decoder_.GetVlqInt(&block_size)) ParquetException::EofException();
+    if (!decoder_.GetVlqInt(&num_mini_blocks_)) ParquetException::EofException();
+    if (!decoder_.GetVlqInt(&values_current_block_)) {
+      ParquetException::EofException();
+    }
+    if (!decoder_.GetZigZagVlqInt(&last_value_)) ParquetException::EofException();
+    delta_bit_widths_.resize(num_mini_blocks_);
+
+    if (!decoder_.GetZigZagVlqInt(&min_delta_)) ParquetException::EofException();
+    for (int i = 0; i < num_mini_blocks_; ++i) {
+      if (!decoder_.GetAligned<uint8_t>(1, &delta_bit_widths_[i])) {
+        ParquetException::EofException();
+      }
+    }
+    values_per_mini_block_ = block_size / num_mini_blocks_;
+    mini_block_idx_ = 0;
+    delta_bit_width_ = delta_bit_widths_[0];
+    values_current_mini_block_ = values_per_mini_block_;
+  }
+
+  template <typename T>
+  int GetInternal(T* buffer, int max_values) {
+    max_values = std::min(max_values, num_values_);
+    for (int i = 0; i < max_values; ++i) {
+      if (UNLIKELY(values_current_mini_block_ == 0)) {
+        ++mini_block_idx_;
+        if (mini_block_idx_ < delta_bit_widths_.size()) {
+          delta_bit_width_ = delta_bit_widths_[mini_block_idx_];
+          values_current_mini_block_ = values_per_mini_block_;
+        } else {
+          InitBlock();
+          buffer[i] = last_value_;
+          continue;
+        }
+      }
+
+      // TODO: the key to this algorithm is to decode the entire miniblock at once.
+      int64_t delta;
+      if (!decoder_.GetValue(delta_bit_width_, &delta)) ParquetException::EofException();
+      delta += min_delta_;
+      last_value_ += delta;
+      buffer[i] = last_value_;
+      --values_current_mini_block_;
+    }
+    num_values_ -= max_values;
+    return max_values;
+  }
+
+  BitReader decoder_;
+  uint64_t values_current_block_;
+  uint64_t num_mini_blocks_;
+  uint64_t values_per_mini_block_;
+  uint64_t values_current_mini_block_;
+
+  int64_t min_delta_;
+  int mini_block_idx_;
+  std::vector<uint8_t> delta_bit_widths_;
+  int delta_bit_width_;
+
+  int64_t last_value_;
+};
+
+} // namespace parquet_cpp
+
+#endif

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/parquet/encodings/delta-byte-array-encoding.h
----------------------------------------------------------------------
diff --git a/src/parquet/encodings/delta-byte-array-encoding.h b/src/parquet/encodings/delta-byte-array-encoding.h
new file mode 100644
index 0000000..3396586
--- /dev/null
+++ b/src/parquet/encodings/delta-byte-array-encoding.h
@@ -0,0 +1,74 @@
+// Copyright 2012 Cloudera Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef PARQUET_DELTA_BYTE_ARRAY_ENCODING_H
+#define PARQUET_DELTA_BYTE_ARRAY_ENCODING_H
+
+#include "parquet/encodings/encodings.h"
+
+#include <algorithm>
+
+namespace parquet_cpp {
+
+class DeltaByteArrayDecoder : public Decoder {
+ public:
+  DeltaByteArrayDecoder()
+    : Decoder(parquet::Type::BYTE_ARRAY, parquet::Encoding::DELTA_BYTE_ARRAY),
+      prefix_len_decoder_(parquet::Type::INT32),
+      suffix_decoder_() {
+  }
+
+  virtual void SetData(int num_values, const uint8_t* data, int len) {
+    num_values_ = num_values;
+    if (len == 0) return;
+    int prefix_len_length = *reinterpret_cast<const int*>(data);
+    data += 4;
+    len -= 4;
+    prefix_len_decoder_.SetData(num_values, data, prefix_len_length);
+    data += prefix_len_length;
+    len -= prefix_len_length;
+    suffix_decoder_.SetData(num_values, data, len);
+  }
+
+  // TODO: this doesn't work and requires memory management. We need to allocate
+  // new strings to store the results.
+  virtual int GetByteArray(ByteArray* buffer, int max_values) {
+    max_values = std::min(max_values, num_values_);
+    for (int  i = 0; i < max_values; ++i) {
+      int prefix_len = 0;
+      prefix_len_decoder_.GetInt32(&prefix_len, 1);
+      ByteArray suffix;
+      suffix_decoder_.GetByteArray(&suffix, 1);
+      buffer[i].len = prefix_len + suffix.len;
+
+      uint8_t* result = reinterpret_cast<uint8_t*>(malloc(buffer[i].len));
+      memcpy(result, last_value_.ptr, prefix_len);
+      memcpy(result + prefix_len, suffix.ptr, suffix.len);
+
+      buffer[i].ptr = result;
+      last_value_ = buffer[i];
+    }
+    num_values_ -= max_values;
+    return max_values;
+  }
+
+ private:
+  DeltaBitPackDecoder prefix_len_decoder_;
+  DeltaLengthByteArrayDecoder suffix_decoder_;
+  ByteArray last_value_;
+};
+
+} // namespace parquet_cpp
+
+#endif

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/parquet/encodings/delta-length-byte-array-encoding.h
----------------------------------------------------------------------
diff --git a/src/parquet/encodings/delta-length-byte-array-encoding.h b/src/parquet/encodings/delta-length-byte-array-encoding.h
new file mode 100644
index 0000000..06bf39d
--- /dev/null
+++ b/src/parquet/encodings/delta-length-byte-array-encoding.h
@@ -0,0 +1,63 @@
+// Copyright 2012 Cloudera Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef PARQUET_DELTA_LENGTH_BYTE_ARRAY_ENCODING_H
+#define PARQUET_DELTA_LENGTH_BYTE_ARRAY_ENCODING_H
+
+#include "parquet/encodings/encodings.h"
+
+#include <algorithm>
+
+namespace parquet_cpp {
+
+class DeltaLengthByteArrayDecoder : public Decoder {
+ public:
+  DeltaLengthByteArrayDecoder()
+    : Decoder(parquet::Type::BYTE_ARRAY, parquet::Encoding::DELTA_LENGTH_BYTE_ARRAY),
+      len_decoder_(parquet::Type::INT32) {
+  }
+
+  virtual void SetData(int num_values, const uint8_t* data, int len) {
+    num_values_ = num_values;
+    if (len == 0) return;
+    int total_lengths_len = *reinterpret_cast<const int*>(data);
+    data += 4;
+    len_decoder_.SetData(num_values, data, total_lengths_len);
+    data_ = data + total_lengths_len;
+    len_ = len - 4 - total_lengths_len;
+  }
+
+  virtual int GetByteArray(ByteArray* buffer, int max_values) {
+    max_values = std::min(max_values, num_values_);
+    int lengths[max_values];
+    len_decoder_.GetInt32(lengths, max_values);
+    for (int  i = 0; i < max_values; ++i) {
+      buffer[i].len = lengths[i];
+      buffer[i].ptr = data_;
+      data_ += lengths[i];
+      len_ -= lengths[i];
+    }
+    num_values_ -= max_values;
+    return max_values;
+  }
+
+ private:
+  DeltaBitPackDecoder len_decoder_;
+  const uint8_t* data_;
+  int len_;
+};
+
+} // namespace parquet_cpp
+
+#endif

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/parquet/encodings/dictionary-encoding.h
----------------------------------------------------------------------
diff --git a/src/parquet/encodings/dictionary-encoding.h b/src/parquet/encodings/dictionary-encoding.h
new file mode 100644
index 0000000..2501b2a
--- /dev/null
+++ b/src/parquet/encodings/dictionary-encoding.h
@@ -0,0 +1,148 @@
+// Copyright 2012 Cloudera Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef PARQUET_DICTIONARY_ENCODING_H
+#define PARQUET_DICTIONARY_ENCODING_H
+
+#include "parquet/encodings/encodings.h"
+
+#include <algorithm>
+#include <vector>
+
+namespace parquet_cpp {
+
+class DictionaryDecoder : public Decoder {
+ public:
+  // Initializes the dictionary with values from 'dictionary'. The data in dictionary
+  // is not guaranteed to persist in memory after this call so the dictionary decoder
+  // needs to copy the data out if necessary.
+  DictionaryDecoder(const parquet::Type::type& type, Decoder* dictionary)
+    : Decoder(type, parquet::Encoding::RLE_DICTIONARY) {
+    int num_dictionary_values = dictionary->values_left();
+    switch (type) {
+      case parquet::Type::BOOLEAN:
+        throw ParquetException("Boolean cols should not be dictionary encoded.");
+
+      case parquet::Type::INT32:
+        int32_dictionary_.resize(num_dictionary_values);
+        dictionary->GetInt32(&int32_dictionary_[0], num_dictionary_values);
+        break;
+      case parquet::Type::INT64:
+        int64_dictionary_.resize(num_dictionary_values);
+        dictionary->GetInt64(&int64_dictionary_[0], num_dictionary_values);
+        break;
+      case parquet::Type::FLOAT:
+        float_dictionary_.resize(num_dictionary_values);
+        dictionary->GetFloat(&float_dictionary_[0], num_dictionary_values);
+        break;
+      case parquet::Type::DOUBLE:
+        double_dictionary_.resize(num_dictionary_values);
+        dictionary->GetDouble(&double_dictionary_[0], num_dictionary_values);
+        break;
+      case parquet::Type::BYTE_ARRAY: {
+        byte_array_dictionary_.resize(num_dictionary_values);
+        dictionary->GetByteArray(&byte_array_dictionary_[0], num_dictionary_values);
+        int total_size = 0;
+        for (int i = 0; i < num_dictionary_values; ++i) {
+          total_size += byte_array_dictionary_[i].len;
+        }
+        byte_array_data_.resize(total_size);
+        int offset = 0;
+        for (int i = 0; i < num_dictionary_values; ++i) {
+          memcpy(&byte_array_data_[offset],
+              byte_array_dictionary_[i].ptr, byte_array_dictionary_[i].len);
+          byte_array_dictionary_[i].ptr = &byte_array_data_[offset];
+          offset += byte_array_dictionary_[i].len;
+        }
+        break;
+      }
+      default:
+        ParquetException::NYI("Unsupported dictionary type");
+    }
+  }
+
+  virtual void SetData(int num_values, const uint8_t* data, int len) {
+    num_values_ = num_values;
+    if (len == 0) return;
+    uint8_t bit_width = *data;
+    ++data;
+    --len;
+    idx_decoder_ = RleDecoder(data, len, bit_width);
+  }
+
+  virtual int GetInt32(int32_t* buffer, int max_values) {
+    max_values = std::min(max_values, num_values_);
+    for (int i = 0; i < max_values; ++i) {
+      buffer[i] = int32_dictionary_[index()];
+    }
+    return max_values;
+  }
+
+  virtual int GetInt64(int64_t* buffer, int max_values) {
+    max_values = std::min(max_values, num_values_);
+    for (int i = 0; i < max_values; ++i) {
+      buffer[i] = int64_dictionary_[index()];
+    }
+    return max_values;
+  }
+
+  virtual int GetFloat(float* buffer, int max_values) {
+    max_values = std::min(max_values, num_values_);
+    for (int i = 0; i < max_values; ++i) {
+      buffer[i] = float_dictionary_[index()];
+    }
+    return max_values;
+  }
+
+  virtual int GetDouble(double* buffer, int max_values) {
+    max_values = std::min(max_values, num_values_);
+    for (int i = 0; i < max_values; ++i) {
+      buffer[i] = double_dictionary_[index()];
+    }
+    return max_values;
+  }
+
+  virtual int GetByteArray(ByteArray* buffer, int max_values) {
+    max_values = std::min(max_values, num_values_);
+    for (int i = 0; i < max_values; ++i) {
+      buffer[i] = byte_array_dictionary_[index()];
+    }
+    return max_values;
+  }
+
+ private:
+  int index() {
+    int idx = 0;
+    if (!idx_decoder_.Get(&idx)) ParquetException::EofException();
+    --num_values_;
+    return idx;
+  }
+
+  // Only one is set.
+  std::vector<int32_t> int32_dictionary_;
+  std::vector<int64_t> int64_dictionary_;
+  std::vector<float> float_dictionary_;
+  std::vector<double> double_dictionary_;
+  std::vector<ByteArray> byte_array_dictionary_;
+
+  // Data that contains the byte array data (byte_array_dictionary_ just has the
+  // pointers).
+  std::vector<uint8_t> byte_array_data_;
+
+  RleDecoder idx_decoder_;
+};
+
+} // namespace parquet_cpp
+
+#endif

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/parquet/encodings/encodings.h
----------------------------------------------------------------------
diff --git a/src/parquet/encodings/encodings.h b/src/parquet/encodings/encodings.h
new file mode 100644
index 0000000..9211bf8
--- /dev/null
+++ b/src/parquet/encodings/encodings.h
@@ -0,0 +1,82 @@
+// Copyright 2012 Cloudera Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef PARQUET_ENCODINGS_ENCODINGS_H
+#define PARQUET_ENCODINGS_ENCODINGS_H
+
+#include <cstdint>
+
+#include "parquet/thrift/parquet_constants.h"
+#include "parquet/thrift/parquet_types.h"
+#include "parquet/util/rle-encoding.h"
+#include "parquet/util/bit-stream-utils.inline.h"
+
+namespace parquet_cpp {
+
+class Decoder {
+ public:
+  virtual ~Decoder() {}
+
+  // Sets the data for a new page. This will be called multiple times on the same
+  // decoder and should reset all internal state.
+  virtual void SetData(int num_values, const uint8_t* data, int len) = 0;
+
+  // Subclasses should override the ones they support. In each of these functions,
+  // the decoder would decode put to 'max_values', storing the result in 'buffer'.
+  // The function returns the number of values decoded, which should be max_values
+  // except for end of the current data page.
+  virtual int GetBool(bool* buffer, int max_values) {
+    throw ParquetException("Decoder does not implement this type.");
+  }
+  virtual int GetInt32(int32_t* buffer, int max_values) {
+    throw ParquetException("Decoder does not implement this type.");
+  }
+  virtual int GetInt64(int64_t* buffer, int max_values) {
+    throw ParquetException("Decoder does not implement this type.");
+  }
+  virtual int GetFloat(float* buffer, int max_values) {
+    throw ParquetException("Decoder does not implement this type.");
+  }
+  virtual int GetDouble(double* buffer, int max_values) {
+    throw ParquetException("Decoder does not implement this type.");
+  }
+  virtual int GetByteArray(ByteArray* buffer, int max_values) {
+    throw ParquetException("Decoder does not implement this type.");
+  }
+
+  // Returns the number of values left (for the last call to SetData()). This is
+  // the number of values left in this page.
+  int values_left() const { return num_values_; }
+
+  const parquet::Encoding::type encoding() const { return encoding_; }
+
+ protected:
+  Decoder(const parquet::Type::type& type, const parquet::Encoding::type& encoding)
+    : type_(type), encoding_(encoding), num_values_(0) {}
+
+  const parquet::Type::type type_;
+  const parquet::Encoding::type encoding_;
+  int num_values_;
+};
+
+} // namespace parquet_cpp
+
+#include "parquet/encodings/bool-encoding.h"
+#include "parquet/encodings/plain-encoding.h"
+#include "parquet/encodings/dictionary-encoding.h"
+#include "parquet/encodings/delta-bit-pack-encoding.h"
+#include "parquet/encodings/delta-length-byte-array-encoding.h"
+#include "parquet/encodings/delta-byte-array-encoding.h"
+
+#endif // PARQUET_ENCODINGS_ENCODINGS_H

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/parquet/encodings/plain-encoding.h
----------------------------------------------------------------------
diff --git a/src/parquet/encodings/plain-encoding.h b/src/parquet/encodings/plain-encoding.h
new file mode 100644
index 0000000..b094cdb
--- /dev/null
+++ b/src/parquet/encodings/plain-encoding.h
@@ -0,0 +1,83 @@
+// Copyright 2012 Cloudera Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef PARQUET_PLAIN_ENCODING_H
+#define PARQUET_PLAIN_ENCODING_H
+
+#include "parquet/encodings/encodings.h"
+
+#include <algorithm>
+
+namespace parquet_cpp {
+
+class PlainDecoder : public Decoder {
+ public:
+  explicit PlainDecoder(const parquet::Type::type& type)
+    : Decoder(type, parquet::Encoding::PLAIN), data_(NULL), len_(0) {
+  }
+
+  virtual void SetData(int num_values, const uint8_t* data, int len) {
+    num_values_ = num_values;
+    data_ = data;
+    len_ = len;
+  }
+
+  int GetValues(void* buffer, int max_values, int byte_size) {
+    max_values = std::min(max_values, num_values_);
+    int size = max_values * byte_size;
+    if (len_ < size)  ParquetException::EofException();
+    memcpy(buffer, data_, size);
+    data_ += size;
+    len_ -= size;
+    num_values_ -= max_values;
+    return max_values;
+  }
+
+  virtual int GetInt32(int32_t* buffer, int max_values) {
+    return GetValues(buffer, max_values, sizeof(int32_t));
+  }
+
+  virtual int GetInt64(int64_t* buffer, int max_values) {
+    return GetValues(buffer, max_values, sizeof(int64_t));
+  }
+
+  virtual int GetFloat(float* buffer, int max_values) {
+    return GetValues(buffer, max_values, sizeof(float));
+  }
+
+  virtual int GetDouble(double* buffer, int max_values) {
+    return GetValues(buffer, max_values, sizeof(double));
+  }
+
+  virtual int GetByteArray(ByteArray* buffer, int max_values) {
+    max_values = std::min(max_values, num_values_);
+    for (int i = 0; i < max_values; ++i) {
+      buffer[i].len = *reinterpret_cast<const uint32_t*>(data_);
+      if (len_ < sizeof(uint32_t) + buffer[i].len) ParquetException::EofException();
+      buffer[i].ptr = data_ + sizeof(uint32_t);
+      data_ += sizeof(uint32_t) + buffer[i].len;
+      len_ -= sizeof(uint32_t) + buffer[i].len;
+    }
+    num_values_ -= max_values;
+    return max_values;
+  }
+
+ private:
+  const uint8_t* data_;
+  int len_;
+};
+
+} // namespace parquet_cpp
+
+#endif

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/parquet/parquet.h
----------------------------------------------------------------------
diff --git a/src/parquet/parquet.h b/src/parquet/parquet.h
index c1a73b7..320f003 100644
--- a/src/parquet/parquet.h
+++ b/src/parquet/parquet.h
@@ -17,14 +17,18 @@
 
 #include <exception>
 #include <sstream>
-#include <boost/cstdint.hpp>
-#include <boost/scoped_ptr.hpp>
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+// Needed for thrift
 #include <boost/shared_ptr.hpp>
-#include <boost/unordered_map.hpp>
-#include "gen-cpp/parquet_constants.h"
-#include "gen-cpp/parquet_types.h"
 
-#include "impala/rle-encoding.h"
+#include "parquet/thrift/parquet_constants.h"
+#include "parquet/thrift/parquet_types.h"
+#include "parquet/util/rle-encoding.h"
 
 // TCompactProtocol requires some #defines to work right.
 #define SIGNED_RIGHT_SHIFT_IS 1
@@ -36,6 +40,17 @@
 #include <thrift/protocol/TBinaryProtocol.h>
 #include <thrift/transport/TBufferTransports.h>
 
+namespace std {
+
+template <>
+struct hash<parquet::Encoding::type> {
+  std::size_t operator()(const parquet::Encoding::type& k) const {
+    return hash<int>()(static_cast<int>(k));
+  }
+};
+
+} // namespace std
+
 namespace parquet_cpp {
 
 class Codec;
@@ -146,18 +161,18 @@ class ColumnReader {
   InputStream* stream_;
 
   // Compression codec to use.
-  boost::scoped_ptr<Codec> decompressor_;
+  std::unique_ptr<Codec> decompressor_;
   std::vector<uint8_t> decompression_buffer_;
 
   // Map of compression type to decompressor object.
-  boost::unordered_map<parquet::Encoding::type, boost::shared_ptr<Decoder> > decoders_;
+  std::unordered_map<parquet::Encoding::type, std::shared_ptr<Decoder> > decoders_;
 
   parquet::PageHeader current_page_header_;
 
   // Not set if field is required.
-  boost::scoped_ptr<impala::RleDecoder> definition_level_decoder_;
+  std::unique_ptr<RleDecoder> definition_level_decoder_;
   // Not set for flat schemas.
-  boost::scoped_ptr<impala::RleDecoder> repetition_level_decoder_;
+  std::unique_ptr<RleDecoder> repetition_level_decoder_;
   Decoder* current_decoder_;
   int num_buffered_values_;
 
@@ -241,7 +256,6 @@ inline void DeserializeThriftMsg(const uint8_t* buf, uint32_t* len, T* deseriali
   *len = *len - bytes_left;
 }
 
-}
+} // namespace parquet_cpp
 
 #endif
-

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/parquet/thrift/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/src/parquet/thrift/CMakeLists.txt b/src/parquet/thrift/CMakeLists.txt
new file mode 100644
index 0000000..e2a00c9
--- /dev/null
+++ b/src/parquet/thrift/CMakeLists.txt
@@ -0,0 +1,29 @@
+# Copyright 2012 Cloudera Inc.
+
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+add_library(parquet_thrift STATIC
+  parquet_constants.cpp
+  parquet_types.cpp
+)
+set_target_properties(parquet_thrift
+  PROPERTIES
+  LIBRARY_OUTPUT_DIRECTORY "${BUILD_OUTPUT_ROOT_DIRECTORY}")
+
+
+# Headers: thrift
+install(FILES
+  parquet_types.h
+  parquet_constants.h
+  DESTINATION include/parquet/thrift)

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/parquet/thrift/parquet_constants.cpp
----------------------------------------------------------------------
diff --git a/src/parquet/thrift/parquet_constants.cpp b/src/parquet/thrift/parquet_constants.cpp
new file mode 100644
index 0000000..caa5af6
--- /dev/null
+++ b/src/parquet/thrift/parquet_constants.cpp
@@ -0,0 +1,17 @@
+/**
+ * Autogenerated by Thrift Compiler (0.9.0)
+ *
+ * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+ *  @generated
+ */
+#include "parquet_constants.h"
+
+namespace parquet {
+
+const parquetConstants g_parquet_constants;
+
+parquetConstants::parquetConstants() {
+}
+
+} // namespace
+

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/parquet/thrift/parquet_constants.h
----------------------------------------------------------------------
diff --git a/src/parquet/thrift/parquet_constants.h b/src/parquet/thrift/parquet_constants.h
new file mode 100644
index 0000000..71d6f58
--- /dev/null
+++ b/src/parquet/thrift/parquet_constants.h
@@ -0,0 +1,24 @@
+/**
+ * Autogenerated by Thrift Compiler (0.9.0)
+ *
+ * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+ *  @generated
+ */
+#ifndef parquet_CONSTANTS_H
+#define parquet_CONSTANTS_H
+
+#include "parquet_types.h"
+
+namespace parquet {
+
+class parquetConstants {
+ public:
+  parquetConstants();
+
+};
+
+extern const parquetConstants g_parquet_constants;
+
+} // namespace
+
+#endif

[7/7] parquet-cpp git commit: PARQUET-416: C++11 compilation, code reorg, libparquet and installation targets

Posted by no...@apache.org.

PARQUET-416: C++11 compilation, code reorg, libparquet and installation targets

Reorganize code into a top level src/parquet directly, add a libparquet shared library, and add install targets for libparquet and its header files. Add cpplint script and `make lint` target for code linting.

Replaces earlier PR #13

Author: Wes McKinney <we...@cloudera.com>

Closes #14 from wesm/libparquet-library and squashes the following commits:

2e356fd [Wes McKinney] PARQUET-416: Compile with C++11 and replace usages of boost::shared_ptr with std::shared_ptr and other C++11 fixes. Reorganize code into a top level src/parquet directly, add a libparquet shared library, and add install targets for libparquet and its header files. Add cpplint script and `make lint` target for code linting.


Project: http://git-wip-us.apache.org/repos/asf/parquet-cpp/repo
Commit: http://git-wip-us.apache.org/repos/asf/parquet-cpp/commit/337cf584
Tree: http://git-wip-us.apache.org/repos/asf/parquet-cpp/tree/337cf584
Diff: http://git-wip-us.apache.org/repos/asf/parquet-cpp/diff/337cf584

Branch: refs/heads/master
Commit: 337cf584ea25a1c6397c4412a706c58e4bd5e58c
Parents: ea30dec
Author: Wes McKinney <we...@cloudera.com>
Authored: Fri Jan 8 15:49:25 2016 -0800
Committer: Nong Li <no...@gmail.com>
Committed: Fri Jan 8 15:49:25 2016 -0800

----------------------------------------------------------------------
 .travis.yml                                     |   43 +-
 CMakeLists.txt                                  |  127 +-
 build-support/cpplint.py                        | 6323 ++++++++++++++++++
 cmake_modules/clean-all.cmake                   |   14 +
 example/CMakeLists.txt                          |    9 +-
 example/decode_benchmark.cc                     |    7 +-
 example/parquet_reader.cc                       |   26 +-
 generated/gen-cpp/CMakeLists.txt                |   20 -
 generated/gen-cpp/parquet_constants.cpp         |   17 -
 generated/gen-cpp/parquet_constants.h           |   24 -
 generated/gen-cpp/parquet_types.cpp             | 2006 ------
 generated/gen-cpp/parquet_types.h               | 1123 ----
 src/CMakeLists.txt                              |   19 -
 src/compression/CMakeLists.txt                  |   18 -
 src/compression/codec.h                         |   72 -
 src/compression/lz4-codec.cc                    |   38 -
 src/compression/snappy-codec.cc                 |   40 -
 src/encodings/bool-encoding.h                   |   47 -
 src/encodings/delta-bit-pack-encoding.h         |  114 -
 src/encodings/delta-byte-array-encoding.h       |   73 -
 .../delta-length-byte-array-encoding.h          |   62 -
 src/encodings/dictionary-encoding.h             |  146 -
 src/encodings/encodings.h                       |   83 -
 src/encodings/plain-encoding.h                  |   82 -
 src/impala/bit-stream-utils.h                   |  145 -
 src/impala/bit-stream-utils.inline.h            |  164 -
 src/impala/bit-util.h                           |  174 -
 src/impala/compiler-util.h                      |   38 -
 src/impala/logging.h                            |   31 -
 src/impala/rle-encoding.h                       |  417 --
 src/parquet.cc                                  |   35 +-
 src/parquet/CMakeLists.txt                      |   18 +
 src/parquet/compression/CMakeLists.txt          |   30 +
 src/parquet/compression/codec.h                 |   71 +
 src/parquet/compression/lz4-codec.cc            |   40 +
 src/parquet/compression/snappy-codec.cc         |   42 +
 src/parquet/encodings/CMakeLists.txt            |   24 +
 src/parquet/encodings/bool-encoding.h           |   48 +
 src/parquet/encodings/delta-bit-pack-encoding.h |  116 +
 .../encodings/delta-byte-array-encoding.h       |   74 +
 .../delta-length-byte-array-encoding.h          |   63 +
 src/parquet/encodings/dictionary-encoding.h     |  148 +
 src/parquet/encodings/encodings.h               |   82 +
 src/parquet/encodings/plain-encoding.h          |   83 +
 src/parquet/parquet.h                           |   38 +-
 src/parquet/thrift/CMakeLists.txt               |   29 +
 src/parquet/thrift/parquet_constants.cpp        |   17 +
 src/parquet/thrift/parquet_constants.h          |   24 +
 src/parquet/thrift/parquet_types.cpp            | 2006 ++++++
 src/parquet/thrift/parquet_types.h              | 1123 ++++
 src/parquet/util/CMakeLists.txt                 |   24 +
 src/parquet/util/bit-stream-utils.h             |  147 +
 src/parquet/util/bit-stream-utils.inline.h      |  164 +
 src/parquet/util/bit-util.h                     |  174 +
 src/parquet/util/compiler-util.h                |   37 +
 src/parquet/util/logging.h                      |   31 +
 src/parquet/util/rle-encoding.h                 |  419 ++
 src/parquet/util/stopwatch.h                    |   49 +
 src/util/stopwatch.h                            |   49 -
 59 files changed, 11630 insertions(+), 5077 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/.travis.yml
----------------------------------------------------------------------
diff --git a/.travis.yml b/.travis.yml
index 58b7641..5da9a6f 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,3 +1,5 @@
+sudo: required
+dist: trusty
 language: cpp
 
 compiler:
@@ -9,38 +11,51 @@ os:
     - osx
 
 addons:
-    apt:
-        packages:
-            - libboost-dev
-              #- libsnappy-dev currently handled by thirdparty scipts.
-            - libboost-program-options-dev #needed for thrift cpp compilation
-            - libboost-test-dev            #needed for thrift cpp compilation
-            - libssl-dev                   #needed for thrift cpp compilation
-            - libtool                      #needed for thrift cpp compilation
-            - bison                        #needed for thrift cpp compilation
-            - flex                         #needed for thrift cpp compilation
-            - pkg-config                   #needed for thrift cpp compilation
+  apt:
+    sources:
+    - ubuntu-toolchain-r-test
+    - kalakris-cmake
+    packages:
+    - gcc-4.9
+    - g++-4.9
+    - cmake
+    - valgrind
+    - libboost-dev
+    #- libsnappy-dev currently handled by thirdparty scipts.
+    - libboost-program-options-dev #needed for thrift cpp compilation
+    - libboost-test-dev            #needed for thrift cpp compilation
+    - libssl-dev                   #needed for thrift cpp compilation
+    - libtool                      #needed for thrift cpp compilation
+    - bison                        #needed for thrift cpp compilation
+    - flex                         #needed for thrift cpp compilation
+    - pkg-config                   #needed for thrift cpp compilation
 
 before_install:
     - pushd thirdparty
     # thrift cpp
     - >
+      if [ $TRAVIS_OS_NAME == osx ]; then
+        brew update &&
+        brew install thrift;
+      fi
+    - >
       if [ $TRAVIS_OS_NAME == linux ]; then
-        wget http://www.us.apache.org/dist/thrift/0.9.1/thrift-0.9.1.tar.gz &&
+        wget http://archive.apache.org/dist/thrift/0.9.1/thrift-0.9.1.tar.gz &&
         tar xfz thrift-0.9.1.tar.gz &&
         pushd thrift-0.9.1 &&
-        ./configure --without-qt4 --without-c_glib --without-csharp --without-java --without-erlang --without-nodejs --without-lua --without-python --without-perl --without-php --without-php_extension --without-ruby --without-haskell --without-go --without-d --with-cpp --prefix=$HOME/local &&
+        ./configure CXXFLAGS='-fPIC' --without-qt4 --without-c_glib --without-csharp --without-java --without-erlang --without-nodejs --without-lua --without-python --without-perl --without-php --without-php_extension --without-ruby --without-haskell --without-go --without-d --with-cpp --prefix=$HOME/local &&
         make clean &&
         make install &&
         popd;
       fi
-    - if [ $TRAVIS_OS_NAME == osx ]; then brew install thrift; fi
     # snappy and lz4
     - ./download_thirdparty.sh
     - ./build_thirdparty.sh
     - popd
 
 before_script:
+    - export CC="gcc-4.9"
+    - export CXX="g++-4.9"
     - mkdir build
     - cd build
     - THRIFT_HOME=$HOME/local cmake ..

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/CMakeLists.txt b/CMakeLists.txt
index ad0ed5f..eb67f75 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -12,18 +12,64 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-
 cmake_minimum_required(VERSION 2.6)
+project(parquet-cpp)
 
 # generate CTest input files
 enable_testing()
 
 # where to find cmake modules
 set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake_modules")
+set(BUILD_SUPPORT_DIR ${CMAKE_CURRENT_SOURCE_DIR}/build-support)
 
 set(THIRDPARTY_PREFIX ${CMAKE_SOURCE_DIR}/thirdparty/installed)
 set(CMAKE_PREFIX_PATH ${THIRDPARTY_PREFIX})
 
+if(APPLE)
+  set(CMAKE_MACOSX_RPATH 1)
+  set(CMAKE_OSX_DEPLOYMENT_TARGET 10.9)
+endif()
+
+if (NOT PARQUET_LINK)
+  set(PARQUET_LINK "a")
+elseif(NOT ("auto" MATCHES "^${PARQUET_LINK}" OR
+            "dynamic" MATCHES "^${PARQUET_LINK}" OR
+            "static" MATCHES "^${PARQUET_LINK}"))
+  message(FATAL_ERROR "Unknown value for PARQUET_LINK, must be auto|dynamic|static")
+else()
+  # Remove all but the first letter.
+  string(SUBSTRING "${PARQUET_LINK}" 0 1 PARQUET_LINK)
+endif()
+
+# if no build build type is specified, default to debug builds
+if (NOT CMAKE_BUILD_TYPE)
+  set(CMAKE_BUILD_TYPE Debug)
+endif(NOT CMAKE_BUILD_TYPE)
+
+# set compile output directory
+string (TOLOWER ${CMAKE_BUILD_TYPE} BUILD_SUBDIR_NAME)
+
+# If build in-source, create the latest symlink. If build out-of-source, which is
+# preferred, simply output the binaries in the build folder
+if (${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_CURRENT_BINARY_DIR})
+  set(BUILD_OUTPUT_ROOT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/build/${BUILD_SUBDIR_NAME}/")
+  # Link build/latest to the current build directory, to avoid developers
+  # accidentally running the latest debug build when in fact they're building
+  # release builds.
+  FILE(MAKE_DIRECTORY ${BUILD_OUTPUT_ROOT_DIRECTORY})
+  if (NOT APPLE)
+    set(MORE_ARGS "-T")
+  endif()
+EXECUTE_PROCESS(COMMAND ln ${MORE_ARGS} -sf ${BUILD_OUTPUT_ROOT_DIRECTORY}
+  ${CMAKE_CURRENT_BINARY_DIR}/build/latest)
+else()
+  set(BUILD_OUTPUT_ROOT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/${BUILD_SUBDIR_NAME}/")
+endif()
+
+############################################################
+# Dependencies
+############################################################
+
 # find boost headers and libs
 set(Boost_DEBUG TRUE)
 set(Boost_USE_MULTITHREADED ON)
@@ -58,22 +104,83 @@ include_directories(SYSTEM ${LZ4_INCLUDE_DIR})
 add_library(lz4static STATIC IMPORTED)
 set_target_properties(lz4static PROPERTIES IMPORTED_LOCATION ${LZ4_STATIC_LIB})
 
-SET(CMAKE_CXX_FLAGS "-msse4.2 -Wall -Wno-unused-value -Wno-unused-variable -Wno-sign-compare -Wno-unknown-pragmas")
-SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -ggdb")
-
 # Thrift requires these definitions for some types that we use
 add_definitions(-DHAVE_INTTYPES_H -DHAVE_NETINET_IN_H -DHAVE_NETDB_H)
 add_definitions(-fPIC)
 
-# where to put generated libraries
-set(LIBRARY_OUTPUT_PATH "${CMAKE_CURRENT_SOURCE_DIR}/build")
+# where to put generated archives (.a files)
+set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY "${BUILD_OUTPUT_ROOT_DIRECTORY}")
+set(ARCHIVE_OUTPUT_DIRECTORY "${BUILD_OUTPUT_ROOT_DIRECTORY}")
+
+# where to put generated libraries (.so files)
+set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${BUILD_OUTPUT_ROOT_DIRECTORY}")
+set(LIBRARY_OUTPUT_DIRECTORY "${BUILD_OUTPUT_ROOT_DIRECTORY}")
 
 # where to put generated binaries
-set(EXECUTABLE_OUTPUT_PATH "${CMAKE_CURRENT_SOURCE_DIR}/bin")
+set(EXECUTABLE_OUTPUT_PATH "${BUILD_OUTPUT_ROOT_DIRECTORY}")
+
+SET(CMAKE_CXX_FLAGS "-std=c++11 -msse4.2 -Wall -Wno-unused-value -Wno-unused-variable -Wno-sign-compare -Wno-unknown-pragmas")
+SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -ggdb")
 
 include_directories(${CMAKE_CURRENT_SOURCE_DIR}/src)
-include_directories(${CMAKE_CURRENT_SOURCE_DIR}/generated)
 
-add_subdirectory(generated/gen-cpp)
-add_subdirectory(src)
+############################################################
+# "make lint" target
+############################################################
+if (UNIX)
+  # Full lint
+  add_custom_target(lint ${BUILD_SUPPORT_DIR}/cpplint.py
+  --verbose=4
+  --filter=-whitespace/comments,-readability/todo,-build/header_guard,-build/include_order
+    `find ${CMAKE_CURRENT_SOURCE_DIR}/src -name \\*.cc -or -name \\*.h | sed -e '/parquet\\/thrift/g'`)
+endif (UNIX)
+
+############################################################
+# Library config
+
+set(LIBPARQUET_SRCS
+  src/parquet.cc
+)
+
+set(LIBPARQUET_LINK_LIBS
+  parquet_compression
+  parquet_thrift
+  thriftstatic
+)
+
+if ("${PARQUET_LINK}" STREQUAL "d" OR "${PARQUET_LINK}" STREQUAL "a")
+  set(LIBPARQUET_LINKAGE "SHARED")
+else()
+  set(LIBPARQUET_LINKAGE "STATIC")
+endif()
+
+add_library(parquet
+  ${LIBPARQUET_LINKAGE}
+  ${LIBPARQUET_SRCS}
+)
+set_target_properties(parquet
+  PROPERTIES
+  LIBRARY_OUTPUT_DIRECTORY "${BUILD_OUTPUT_ROOT_DIRECTORY}")
+target_link_libraries(parquet ${LIBPARQUET_LINK_LIBS})
+
+if(APPLE)
+    set_target_properties(parquet PROPERTIES LINK_FLAGS "-undefined dynamic_lookup")
+endif()
+
+add_subdirectory(src/parquet)
+add_subdirectory(src/parquet/compression)
+add_subdirectory(src/parquet/encodings)
+add_subdirectory(src/parquet/thrift)
+add_subdirectory(src/parquet/util)
+
 add_subdirectory(example)
+
+add_custom_target(clean-all
+   COMMAND ${CMAKE_BUILD_TOOL} clean
+   COMMAND ${CMAKE_COMMAND} -P cmake_modules/clean-all.cmake
+)
+
+# installation
+
+install(TARGETS parquet
+  LIBRARY DESTINATION lib)