You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@parquet.apache.org by we...@apache.org on 2016/09/01 18:57:52 UTC

parquet-cpp git commit: PARQUET-699: Update parquet.thrift from https://github.com/apache/parquet-format

Repository: parquet-cpp
Updated Branches:
  refs/heads/master c0fd08a97 -> bf6716c76


PARQUET-699: Update parquet.thrift from https://github.com/apache/parquet-format

Support logical types TIME_MICROS and TIMESTAMP_MICROS

Also I think the current code was incorrect. Parquet reserved the LogicalTypes 8 and 10, but those were completely omitted types.h. So types with greater indices were mapped incorrectly.

Author: Florian Scheibner <fl...@hotmail.de>

Closes #147 from flode/master and squashes the following commits:

6f81adc [Florian Scheibner] PARQUET-699: Update parquet.thrift from https://github.com/apache/parquet-format


Project: http://git-wip-us.apache.org/repos/asf/parquet-cpp/repo
Commit: http://git-wip-us.apache.org/repos/asf/parquet-cpp/commit/bf6716c7
Tree: http://git-wip-us.apache.org/repos/asf/parquet-cpp/tree/bf6716c7
Diff: http://git-wip-us.apache.org/repos/asf/parquet-cpp/diff/bf6716c7

Branch: refs/heads/master
Commit: bf6716c76c747f637c48162d0a03256acd10e439
Parents: c0fd08a
Author: Florian Scheibner <fl...@hotmail.de>
Authored: Thu Sep 1 14:57:31 2016 -0400
Committer: Wes McKinney <we...@apache.org>
Committed: Thu Sep 1 14:57:31 2016 -0400

----------------------------------------------------------------------
 src/parquet/compression/codec.cc  |  3 +++
 src/parquet/schema/types.cc       |  2 ++
 src/parquet/thrift/parquet.thrift | 23 +++++++++++++++++++++--
 src/parquet/types-test.cc         |  3 +++
 src/parquet/types.cc              | 25 ++++---------------------
 src/parquet/types.h               |  4 +++-
 6 files changed, 36 insertions(+), 24 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/bf6716c7/src/parquet/compression/codec.cc
----------------------------------------------------------------------
diff --git a/src/parquet/compression/codec.cc b/src/parquet/compression/codec.cc
index fed5644..f5aaefd 100644
--- a/src/parquet/compression/codec.cc
+++ b/src/parquet/compression/codec.cc
@@ -37,6 +37,9 @@ std::unique_ptr<Codec> Codec::Create(Compression::type codec_type) {
     case Compression::LZO:
       ParquetException::NYI("LZO codec not implemented");
       break;
+    case Compression::BROTLI:
+      ParquetException::NYI("BROTLI codec not implemented");
+      break;
     default:
       ParquetException::NYI("Unrecognized codec");
       break;

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/bf6716c7/src/parquet/schema/types.cc
----------------------------------------------------------------------
diff --git a/src/parquet/schema/types.cc b/src/parquet/schema/types.cc
index f078f24..2e5d151 100644
--- a/src/parquet/schema/types.cc
+++ b/src/parquet/schema/types.cc
@@ -143,7 +143,9 @@ PrimitiveNode::PrimitiveNode(const std::string& name, Repetition::type repetitio
         throw ParquetException(ss.str());
       }
       break;
+    case LogicalType::TIME_MICROS:
     case LogicalType::TIMESTAMP_MILLIS:
+    case LogicalType::TIMESTAMP_MICROS:
     case LogicalType::UINT_64:
     case LogicalType::INT_64:
       if (type != Type::INT64) {

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/bf6716c7/src/parquet/thrift/parquet.thrift
----------------------------------------------------------------------
diff --git a/src/parquet/thrift/parquet.thrift b/src/parquet/thrift/parquet.thrift
index a85dd22..b61c084 100644
--- a/src/parquet/thrift/parquet.thrift
+++ b/src/parquet/thrift/parquet.thrift
@@ -93,7 +93,14 @@ enum ConvertedType {
    * as an INT32 physical type.
    */
   TIME_MILLIS = 7;
-  // RESERVED = 8;
+
+  /**
+   * A time.
+   *
+   * The total number of microseconds since midnight.  The value is stored as
+   * an INT64 physical type.
+   */
+  TIME_MICROS = 8;
 
   /**
    * A date/time combination
@@ -102,7 +109,14 @@ enum ConvertedType {
    * a physical type of INT64.
    */
   TIMESTAMP_MILLIS = 9;
-  // RESERVED = 10;
+
+  /**
+   * A date/time combination
+   *
+   * Date and time recorded as microseconds since the Unix epoch.  The value is
+   * stored as an INT64 physical type.
+   */
+  TIMESTAMP_MICROS = 10;
 
 
   /**
@@ -308,6 +322,7 @@ enum CompressionCodec {
   SNAPPY = 1;
   GZIP = 2;
   LZO = 3;
+  BROTLI = 4;
 }
 
 enum PageType {
@@ -509,6 +524,9 @@ struct ColumnChunk {
 }
 
 struct RowGroup {
+  /** Metadata for each column chunk in this row group.
+   * This list must have the same order as the SchemaElement list in FileMetaData.
+   **/
   1: required list<ColumnChunk> columns
 
   /** Total byte size of all the uncompressed column data in this row group **/
@@ -553,3 +571,4 @@ struct FileMetaData {
    **/
   6: optional string created_by
 }
+

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/bf6716c7/src/parquet/types-test.cc
----------------------------------------------------------------------
diff --git a/src/parquet/types-test.cc b/src/parquet/types-test.cc
index 0804f9b..59ed456 100644
--- a/src/parquet/types-test.cc
+++ b/src/parquet/types-test.cc
@@ -45,8 +45,11 @@ TEST(TestLogicalTypeToString, LogicalTypes) {
   ASSERT_STREQ("DECIMAL", logical_type_to_string(LogicalType::DECIMAL).c_str());
   ASSERT_STREQ("DATE", logical_type_to_string(LogicalType::DATE).c_str());
   ASSERT_STREQ("TIME_MILLIS", logical_type_to_string(LogicalType::TIME_MILLIS).c_str());
+  ASSERT_STREQ("TIME_MICROS", logical_type_to_string(LogicalType::TIME_MICROS).c_str());
   ASSERT_STREQ(
       "TIMESTAMP_MILLIS", logical_type_to_string(LogicalType::TIMESTAMP_MILLIS).c_str());
+  ASSERT_STREQ(
+      "TIMESTAMP_MICROS", logical_type_to_string(LogicalType::TIMESTAMP_MICROS).c_str());
   ASSERT_STREQ("UINT_8", logical_type_to_string(LogicalType::UINT_8).c_str());
   ASSERT_STREQ("UINT_16", logical_type_to_string(LogicalType::UINT_16).c_str());
   ASSERT_STREQ("UINT_32", logical_type_to_string(LogicalType::UINT_32).c_str());

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/bf6716c7/src/parquet/types.cc
----------------------------------------------------------------------
diff --git a/src/parquet/types.cc b/src/parquet/types.cc
index 89a1f87..7fc5017 100644
--- a/src/parquet/types.cc
+++ b/src/parquet/types.cc
@@ -150,67 +150,50 @@ std::string logical_type_to_string(LogicalType::type t) {
   switch (t) {
     case LogicalType::NONE:
       return "NONE";
-      break;
     case LogicalType::UTF8:
       return "UTF8";
-      break;
     case LogicalType::MAP_KEY_VALUE:
       return "MAP_KEY_VALUE";
-      break;
     case LogicalType::LIST:
       return "LIST";
-      break;
     case LogicalType::ENUM:
       return "ENUM";
-      break;
     case LogicalType::DECIMAL:
       return "DECIMAL";
-      break;
     case LogicalType::DATE:
       return "DATE";
-      break;
     case LogicalType::TIME_MILLIS:
       return "TIME_MILLIS";
-      break;
+    case LogicalType::TIME_MICROS:
+      return "TIME_MICROS";
     case LogicalType::TIMESTAMP_MILLIS:
       return "TIMESTAMP_MILLIS";
-      break;
+    case LogicalType::TIMESTAMP_MICROS:
+      return "TIMESTAMP_MICROS";
     case LogicalType::UINT_8:
       return "UINT_8";
-      break;
     case LogicalType::UINT_16:
       return "UINT_16";
-      break;
     case LogicalType::UINT_32:
       return "UINT_32";
-      break;
     case LogicalType::UINT_64:
       return "UINT_64";
-      break;
     case LogicalType::INT_8:
       return "INT_8";
-      break;
     case LogicalType::INT_16:
       return "INT_16";
-      break;
     case LogicalType::INT_32:
       return "INT_32";
-      break;
     case LogicalType::INT_64:
       return "INT_64";
-      break;
     case LogicalType::JSON:
       return "JSON";
-      break;
     case LogicalType::BSON:
       return "BSON";
-      break;
     case LogicalType::INTERVAL:
       return "INTERVAL";
-      break;
     default:
       return "UNKNOWN";
-      break;
   }
 }
 }  // namespace parquet

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/bf6716c7/src/parquet/types.h
----------------------------------------------------------------------
diff --git a/src/parquet/types.h b/src/parquet/types.h
index c952b06..cb67820 100644
--- a/src/parquet/types.h
+++ b/src/parquet/types.h
@@ -68,7 +68,9 @@ struct LogicalType {
     DECIMAL,
     DATE,
     TIME_MILLIS,
+    TIME_MICROS,
     TIMESTAMP_MILLIS,
+    TIMESTAMP_MICROS,
     UINT_8,
     UINT_16,
     UINT_32,
@@ -104,7 +106,7 @@ struct Encoding {
 
 // Compression, mirrors parquet::CompressionCodec
 struct Compression {
-  enum type { UNCOMPRESSED, SNAPPY, GZIP, LZO };
+  enum type { UNCOMPRESSED, SNAPPY, GZIP, LZO, BROTLI };
 };
 
 // parquet::PageType