You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@parquet.apache.org by we...@apache.org on 2016/09/01 18:57:52 UTC
parquet-cpp git commit: PARQUET-699: Update parquet.thrift from
https://github.com/apache/parquet-format
Repository: parquet-cpp
Updated Branches:
refs/heads/master c0fd08a97 -> bf6716c76
PARQUET-699: Update parquet.thrift from https://github.com/apache/parquet-format
Support logical types TIME_MICROS and TIMESTAMP_MICROS
Also I think the current code was incorrect. Parquet reserved the LogicalTypes 8 and 10, but those were completely omitted types.h. So types with greater indices were mapped incorrectly.
Author: Florian Scheibner <fl...@hotmail.de>
Closes #147 from flode/master and squashes the following commits:
6f81adc [Florian Scheibner] PARQUET-699: Update parquet.thrift from https://github.com/apache/parquet-format
Project: http://git-wip-us.apache.org/repos/asf/parquet-cpp/repo
Commit: http://git-wip-us.apache.org/repos/asf/parquet-cpp/commit/bf6716c7
Tree: http://git-wip-us.apache.org/repos/asf/parquet-cpp/tree/bf6716c7
Diff: http://git-wip-us.apache.org/repos/asf/parquet-cpp/diff/bf6716c7
Branch: refs/heads/master
Commit: bf6716c76c747f637c48162d0a03256acd10e439
Parents: c0fd08a
Author: Florian Scheibner <fl...@hotmail.de>
Authored: Thu Sep 1 14:57:31 2016 -0400
Committer: Wes McKinney <we...@apache.org>
Committed: Thu Sep 1 14:57:31 2016 -0400
----------------------------------------------------------------------
src/parquet/compression/codec.cc | 3 +++
src/parquet/schema/types.cc | 2 ++
src/parquet/thrift/parquet.thrift | 23 +++++++++++++++++++++--
src/parquet/types-test.cc | 3 +++
src/parquet/types.cc | 25 ++++---------------------
src/parquet/types.h | 4 +++-
6 files changed, 36 insertions(+), 24 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/bf6716c7/src/parquet/compression/codec.cc
----------------------------------------------------------------------
diff --git a/src/parquet/compression/codec.cc b/src/parquet/compression/codec.cc
index fed5644..f5aaefd 100644
--- a/src/parquet/compression/codec.cc
+++ b/src/parquet/compression/codec.cc
@@ -37,6 +37,9 @@ std::unique_ptr<Codec> Codec::Create(Compression::type codec_type) {
case Compression::LZO:
ParquetException::NYI("LZO codec not implemented");
break;
+ case Compression::BROTLI:
+ ParquetException::NYI("BROTLI codec not implemented");
+ break;
default:
ParquetException::NYI("Unrecognized codec");
break;
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/bf6716c7/src/parquet/schema/types.cc
----------------------------------------------------------------------
diff --git a/src/parquet/schema/types.cc b/src/parquet/schema/types.cc
index f078f24..2e5d151 100644
--- a/src/parquet/schema/types.cc
+++ b/src/parquet/schema/types.cc
@@ -143,7 +143,9 @@ PrimitiveNode::PrimitiveNode(const std::string& name, Repetition::type repetitio
throw ParquetException(ss.str());
}
break;
+ case LogicalType::TIME_MICROS:
case LogicalType::TIMESTAMP_MILLIS:
+ case LogicalType::TIMESTAMP_MICROS:
case LogicalType::UINT_64:
case LogicalType::INT_64:
if (type != Type::INT64) {
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/bf6716c7/src/parquet/thrift/parquet.thrift
----------------------------------------------------------------------
diff --git a/src/parquet/thrift/parquet.thrift b/src/parquet/thrift/parquet.thrift
index a85dd22..b61c084 100644
--- a/src/parquet/thrift/parquet.thrift
+++ b/src/parquet/thrift/parquet.thrift
@@ -93,7 +93,14 @@ enum ConvertedType {
* as an INT32 physical type.
*/
TIME_MILLIS = 7;
- // RESERVED = 8;
+
+ /**
+ * A time.
+ *
+ * The total number of microseconds since midnight. The value is stored as
+ * an INT64 physical type.
+ */
+ TIME_MICROS = 8;
/**
* A date/time combination
@@ -102,7 +109,14 @@ enum ConvertedType {
* a physical type of INT64.
*/
TIMESTAMP_MILLIS = 9;
- // RESERVED = 10;
+
+ /**
+ * A date/time combination
+ *
+ * Date and time recorded as microseconds since the Unix epoch. The value is
+ * stored as an INT64 physical type.
+ */
+ TIMESTAMP_MICROS = 10;
/**
@@ -308,6 +322,7 @@ enum CompressionCodec {
SNAPPY = 1;
GZIP = 2;
LZO = 3;
+ BROTLI = 4;
}
enum PageType {
@@ -509,6 +524,9 @@ struct ColumnChunk {
}
struct RowGroup {
+ /** Metadata for each column chunk in this row group.
+ * This list must have the same order as the SchemaElement list in FileMetaData.
+ **/
1: required list<ColumnChunk> columns
/** Total byte size of all the uncompressed column data in this row group **/
@@ -553,3 +571,4 @@ struct FileMetaData {
**/
6: optional string created_by
}
+
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/bf6716c7/src/parquet/types-test.cc
----------------------------------------------------------------------
diff --git a/src/parquet/types-test.cc b/src/parquet/types-test.cc
index 0804f9b..59ed456 100644
--- a/src/parquet/types-test.cc
+++ b/src/parquet/types-test.cc
@@ -45,8 +45,11 @@ TEST(TestLogicalTypeToString, LogicalTypes) {
ASSERT_STREQ("DECIMAL", logical_type_to_string(LogicalType::DECIMAL).c_str());
ASSERT_STREQ("DATE", logical_type_to_string(LogicalType::DATE).c_str());
ASSERT_STREQ("TIME_MILLIS", logical_type_to_string(LogicalType::TIME_MILLIS).c_str());
+ ASSERT_STREQ("TIME_MICROS", logical_type_to_string(LogicalType::TIME_MICROS).c_str());
ASSERT_STREQ(
"TIMESTAMP_MILLIS", logical_type_to_string(LogicalType::TIMESTAMP_MILLIS).c_str());
+ ASSERT_STREQ(
+ "TIMESTAMP_MICROS", logical_type_to_string(LogicalType::TIMESTAMP_MICROS).c_str());
ASSERT_STREQ("UINT_8", logical_type_to_string(LogicalType::UINT_8).c_str());
ASSERT_STREQ("UINT_16", logical_type_to_string(LogicalType::UINT_16).c_str());
ASSERT_STREQ("UINT_32", logical_type_to_string(LogicalType::UINT_32).c_str());
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/bf6716c7/src/parquet/types.cc
----------------------------------------------------------------------
diff --git a/src/parquet/types.cc b/src/parquet/types.cc
index 89a1f87..7fc5017 100644
--- a/src/parquet/types.cc
+++ b/src/parquet/types.cc
@@ -150,67 +150,50 @@ std::string logical_type_to_string(LogicalType::type t) {
switch (t) {
case LogicalType::NONE:
return "NONE";
- break;
case LogicalType::UTF8:
return "UTF8";
- break;
case LogicalType::MAP_KEY_VALUE:
return "MAP_KEY_VALUE";
- break;
case LogicalType::LIST:
return "LIST";
- break;
case LogicalType::ENUM:
return "ENUM";
- break;
case LogicalType::DECIMAL:
return "DECIMAL";
- break;
case LogicalType::DATE:
return "DATE";
- break;
case LogicalType::TIME_MILLIS:
return "TIME_MILLIS";
- break;
+ case LogicalType::TIME_MICROS:
+ return "TIME_MICROS";
case LogicalType::TIMESTAMP_MILLIS:
return "TIMESTAMP_MILLIS";
- break;
+ case LogicalType::TIMESTAMP_MICROS:
+ return "TIMESTAMP_MICROS";
case LogicalType::UINT_8:
return "UINT_8";
- break;
case LogicalType::UINT_16:
return "UINT_16";
- break;
case LogicalType::UINT_32:
return "UINT_32";
- break;
case LogicalType::UINT_64:
return "UINT_64";
- break;
case LogicalType::INT_8:
return "INT_8";
- break;
case LogicalType::INT_16:
return "INT_16";
- break;
case LogicalType::INT_32:
return "INT_32";
- break;
case LogicalType::INT_64:
return "INT_64";
- break;
case LogicalType::JSON:
return "JSON";
- break;
case LogicalType::BSON:
return "BSON";
- break;
case LogicalType::INTERVAL:
return "INTERVAL";
- break;
default:
return "UNKNOWN";
- break;
}
}
} // namespace parquet
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/bf6716c7/src/parquet/types.h
----------------------------------------------------------------------
diff --git a/src/parquet/types.h b/src/parquet/types.h
index c952b06..cb67820 100644
--- a/src/parquet/types.h
+++ b/src/parquet/types.h
@@ -68,7 +68,9 @@ struct LogicalType {
DECIMAL,
DATE,
TIME_MILLIS,
+ TIME_MICROS,
TIMESTAMP_MILLIS,
+ TIMESTAMP_MICROS,
UINT_8,
UINT_16,
UINT_32,
@@ -104,7 +106,7 @@ struct Encoding {
// Compression, mirrors parquet::CompressionCodec
struct Compression {
- enum type { UNCOMPRESSED, SNAPPY, GZIP, LZO };
+ enum type { UNCOMPRESSED, SNAPPY, GZIP, LZO, BROTLI };
};
// parquet::PageType