You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2019/06/22 20:00:19 UTC
[arrow] branch master updated: PARQUET-1603: [C++] rename
parquet::LogicalType to parquet::ConvertedType
This is an automated email from the ASF dual-hosted git repository.
wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 95d3e06 PARQUET-1603: [C++] rename parquet::LogicalType to parquet::ConvertedType
95d3e06 is described below
commit 95d3e06a55cdaee239719bc42a24349ebeac6c01
Author: Deepak Majeti <de...@microfocus.com>
AuthorDate: Sat Jun 22 15:00:11 2019 -0500
PARQUET-1603: [C++] rename parquet::LogicalType to parquet::ConvertedType
Changes:
1. Rename Parquet::LogicalType to Parquet::ConvertedType
2. Rename Parquet::LogicalAnnotation to Parquet::LogicalType
3. change related variable names and comments
Author: Deepak Majeti <de...@microfocus.com>
Author: Wes McKinney <we...@apache.org>
Closes #4653 from majetideepak/PARQUET-1603 and squashes the following commits:
859ac6173 <Wes McKinney> Follow LogicalType -> ConvertedType in Python bindings
f5574c60d <Deepak Majeti> clang format
34ea93b28 <Deepak Majeti> fix changes
b256bbdbb <Deepak Majeti> rename annotation to logical_type
d2c9daffa <Deepak Majeti> rename Annotation to LogicalType
8c5877bf9 <Deepak Majeti> rename logical_annotation to logical_type
6cce31276 <Deepak Majeti> rename LogicalAnnotation to LogicalType
0f69c91e2 <Deepak Majeti> rename LogicalType to ConvertedType
---
cpp/examples/parquet/low-level-api/reader_writer.h | 18 +-
cpp/src/parquet/arrow/arrow-reader-writer-test.cc | 47 +-
cpp/src/parquet/arrow/arrow-schema-test.cc | 275 ++--
cpp/src/parquet/arrow/record_reader.cc | 4 +-
cpp/src/parquet/arrow/schema.cc | 207 +--
cpp/src/parquet/column_scanner-test.cc | 12 +-
cpp/src/parquet/column_writer-test.cc | 2 +-
cpp/src/parquet/encoding-test.cc | 2 +-
cpp/src/parquet/metadata.cc | 2 +-
cpp/src/parquet/printer.cc | 11 +-
cpp/src/parquet/reader-test.cc | 22 +-
cpp/src/parquet/schema-internal.h | 2 +-
cpp/src/parquet/schema-test.cc | 1388 ++++++++++----------
cpp/src/parquet/schema.cc | 203 ++-
cpp/src/parquet/schema.h | 60 +-
cpp/src/parquet/statistics-test.cc | 50 +-
cpp/src/parquet/test-util.h | 2 +-
cpp/src/parquet/thrift.h | 8 +-
cpp/src/parquet/types-test.cc | 49 +-
cpp/src/parquet/types.cc | 1074 +++++++--------
cpp/src/parquet/types.h | 212 +--
python/pyarrow/_parquet.pxd | 55 +-
python/pyarrow/_parquet.pyx | 80 +-
python/pyarrow/tests/test_parquet.py | 2 +-
24 files changed, 1835 insertions(+), 1952 deletions(-)
diff --git a/cpp/examples/parquet/low-level-api/reader_writer.h b/cpp/examples/parquet/low-level-api/reader_writer.h
index 3fda0cf..1ffc46e 100644
--- a/cpp/examples/parquet/low-level-api/reader_writer.h
+++ b/cpp/examples/parquet/low-level-api/reader_writer.h
@@ -21,7 +21,7 @@
#include <parquet/api/reader.h>
#include <parquet/api/writer.h>
-using parquet::LogicalType;
+using parquet::ConvertedType;
using parquet::Repetition;
using parquet::Type;
using parquet::schema::GroupNode;
@@ -34,34 +34,34 @@ static std::shared_ptr<GroupNode> SetupSchema() {
// Create a primitive node named 'boolean_field' with type:BOOLEAN,
// repetition:REQUIRED
fields.push_back(PrimitiveNode::Make("boolean_field", Repetition::REQUIRED,
- Type::BOOLEAN, LogicalType::NONE));
+ Type::BOOLEAN, ConvertedType::NONE));
// Create a primitive node named 'int32_field' with type:INT32, repetition:REQUIRED,
// logical type:TIME_MILLIS
fields.push_back(PrimitiveNode::Make("int32_field", Repetition::REQUIRED, Type::INT32,
- LogicalType::TIME_MILLIS));
+ ConvertedType::TIME_MILLIS));
// Create a primitive node named 'int64_field' with type:INT64, repetition:REPEATED
fields.push_back(PrimitiveNode::Make("int64_field", Repetition::REPEATED, Type::INT64,
- LogicalType::NONE));
+ ConvertedType::NONE));
fields.push_back(PrimitiveNode::Make("int96_field", Repetition::REQUIRED, Type::INT96,
- LogicalType::NONE));
+ ConvertedType::NONE));
fields.push_back(PrimitiveNode::Make("float_field", Repetition::REQUIRED, Type::FLOAT,
- LogicalType::NONE));
+ ConvertedType::NONE));
fields.push_back(PrimitiveNode::Make("double_field", Repetition::REQUIRED, Type::DOUBLE,
- LogicalType::NONE));
+ ConvertedType::NONE));
// Create a primitive node named 'ba_field' with type:BYTE_ARRAY, repetition:OPTIONAL
fields.push_back(PrimitiveNode::Make("ba_field", Repetition::OPTIONAL, Type::BYTE_ARRAY,
- LogicalType::NONE));
+ ConvertedType::NONE));
// Create a primitive node named 'flba_field' with type:FIXED_LEN_BYTE_ARRAY,
// repetition:REQUIRED, field_length = FIXED_LENGTH
fields.push_back(PrimitiveNode::Make("flba_field", Repetition::REQUIRED,
- Type::FIXED_LEN_BYTE_ARRAY, LogicalType::NONE,
+ Type::FIXED_LEN_BYTE_ARRAY, ConvertedType::NONE,
FIXED_LENGTH));
// Create a GroupNode named 'schema' using the primitive nodes defined above
diff --git a/cpp/src/parquet/arrow/arrow-reader-writer-test.cc b/cpp/src/parquet/arrow/arrow-reader-writer-test.cc
index 83be7ed..f59db1f 100644
--- a/cpp/src/parquet/arrow/arrow-reader-writer-test.cc
+++ b/cpp/src/parquet/arrow/arrow-reader-writer-test.cc
@@ -87,43 +87,40 @@ static constexpr int LARGE_SIZE = 10000;
static constexpr uint32_t kDefaultSeed = 0;
-std::shared_ptr<const LogicalAnnotation> get_logical_annotation(const ::DataType& type) {
+std::shared_ptr<const LogicalType> get_logical_type(const ::DataType& type) {
switch (type.id()) {
case ArrowId::UINT8:
- return LogicalAnnotation::Int(8, false);
+ return LogicalType::Int(8, false);
case ArrowId::INT8:
- return LogicalAnnotation::Int(8, true);
+ return LogicalType::Int(8, true);
case ArrowId::UINT16:
- return LogicalAnnotation::Int(16, false);
+ return LogicalType::Int(16, false);
case ArrowId::INT16:
- return LogicalAnnotation::Int(16, true);
+ return LogicalType::Int(16, true);
case ArrowId::UINT32:
- return LogicalAnnotation::Int(32, false);
+ return LogicalType::Int(32, false);
case ArrowId::INT32:
- return LogicalAnnotation::Int(32, true);
+ return LogicalType::Int(32, true);
case ArrowId::UINT64:
- return LogicalAnnotation::Int(64, false);
+ return LogicalType::Int(64, false);
case ArrowId::INT64:
- return LogicalAnnotation::Int(64, true);
+ return LogicalType::Int(64, true);
case ArrowId::STRING:
- return LogicalAnnotation::String();
+ return LogicalType::String();
case ArrowId::DATE32:
- return LogicalAnnotation::Date();
+ return LogicalType::Date();
case ArrowId::DATE64:
- return LogicalAnnotation::Date();
+ return LogicalType::Date();
case ArrowId::TIMESTAMP: {
const auto& ts_type = static_cast<const ::arrow::TimestampType&>(type);
const bool adjusted_to_utc = !(ts_type.timezone().empty());
switch (ts_type.unit()) {
case TimeUnit::MILLI:
- return LogicalAnnotation::Timestamp(adjusted_to_utc,
- LogicalAnnotation::TimeUnit::MILLIS);
+ return LogicalType::Timestamp(adjusted_to_utc, LogicalType::TimeUnit::MILLIS);
case TimeUnit::MICRO:
- return LogicalAnnotation::Timestamp(adjusted_to_utc,
- LogicalAnnotation::TimeUnit::MICROS);
+ return LogicalType::Timestamp(adjusted_to_utc, LogicalType::TimeUnit::MICROS);
case TimeUnit::NANO:
- return LogicalAnnotation::Timestamp(adjusted_to_utc,
- LogicalAnnotation::TimeUnit::NANOS);
+ return LogicalType::Timestamp(adjusted_to_utc, LogicalType::TimeUnit::NANOS);
default:
DCHECK(false)
<< "Only MILLI, MICRO, and NANO units supported for Arrow TIMESTAMP.";
@@ -131,14 +128,14 @@ std::shared_ptr<const LogicalAnnotation> get_logical_annotation(const ::DataType
break;
}
case ArrowId::TIME32:
- return LogicalAnnotation::Time(false, LogicalAnnotation::TimeUnit::MILLIS);
+ return LogicalType::Time(false, LogicalType::TimeUnit::MILLIS);
case ArrowId::TIME64: {
const auto& tm_type = static_cast<const ::arrow::TimeType&>(type);
switch (tm_type.unit()) {
case TimeUnit::MICRO:
- return LogicalAnnotation::Time(false, LogicalAnnotation::TimeUnit::MICROS);
+ return LogicalType::Time(false, LogicalType::TimeUnit::MICROS);
case TimeUnit::NANO:
- return LogicalAnnotation::Time(false, LogicalAnnotation::TimeUnit::NANOS);
+ return LogicalType::Time(false, LogicalType::TimeUnit::NANOS);
default:
DCHECK(false) << "Only MICRO and NANO units supported for Arrow TIME64.";
}
@@ -147,16 +144,16 @@ std::shared_ptr<const LogicalAnnotation> get_logical_annotation(const ::DataType
case ArrowId::DICTIONARY: {
const ::arrow::DictionaryType& dict_type =
static_cast<const ::arrow::DictionaryType&>(type);
- return get_logical_annotation(*dict_type.value_type());
+ return get_logical_type(*dict_type.value_type());
}
case ArrowId::DECIMAL: {
const auto& dec_type = static_cast<const ::arrow::Decimal128Type&>(type);
- return LogicalAnnotation::Decimal(dec_type.precision(), dec_type.scale());
+ return LogicalType::Decimal(dec_type.precision(), dec_type.scale());
}
default:
break;
}
- return LogicalAnnotation::None();
+ return LogicalType::None();
}
ParquetType::type get_physical_type(const ::DataType& type) {
@@ -481,7 +478,7 @@ static std::shared_ptr<GroupNode> MakeSimpleSchema(const ::DataType& type,
default:
break;
}
- auto pnode = PrimitiveNode::Make("column1", repetition, get_logical_annotation(type),
+ auto pnode = PrimitiveNode::Make("column1", repetition, get_logical_type(type),
get_physical_type(type), byte_width);
NodePtr node_ =
GroupNode::Make("schema", Repetition::REQUIRED, std::vector<NodePtr>({pnode}));
diff --git a/cpp/src/parquet/arrow/arrow-schema-test.cc b/cpp/src/parquet/arrow/arrow-schema-test.cc
index cedabdb..1089fbb 100644
--- a/cpp/src/parquet/arrow/arrow-schema-test.cc
+++ b/cpp/src/parquet/arrow/arrow-schema-test.cc
@@ -33,7 +33,7 @@ using arrow::Field;
using arrow::TimeUnit;
using ParquetType = parquet::Type;
-using parquet::LogicalAnnotation;
+using parquet::ConvertedType;
using parquet::LogicalType;
using parquet::Repetition;
using parquet::schema::GroupNode;
@@ -115,27 +115,27 @@ TEST_F(TestConvertParquetSchema, ParquetFlatPrimitives) {
parquet_fields.push_back(PrimitiveNode::Make("timestamp", Repetition::REQUIRED,
ParquetType::INT64,
- LogicalType::TIMESTAMP_MILLIS));
+ ConvertedType::TIMESTAMP_MILLIS));
arrow_fields.push_back(std::make_shared<Field>(
"timestamp", ::arrow::timestamp(TimeUnit::MILLI, "UTC"), false));
parquet_fields.push_back(PrimitiveNode::Make("timestamp[us]", Repetition::REQUIRED,
ParquetType::INT64,
- LogicalType::TIMESTAMP_MICROS));
+ ConvertedType::TIMESTAMP_MICROS));
arrow_fields.push_back(std::make_shared<Field>(
"timestamp[us]", ::arrow::timestamp(TimeUnit::MICRO, "UTC"), false));
parquet_fields.push_back(PrimitiveNode::Make("date", Repetition::REQUIRED,
- ParquetType::INT32, LogicalType::DATE));
+ ParquetType::INT32, ConvertedType::DATE));
arrow_fields.push_back(std::make_shared<Field>("date", ::arrow::date32(), false));
parquet_fields.push_back(PrimitiveNode::Make(
- "time32", Repetition::REQUIRED, ParquetType::INT32, LogicalType::TIME_MILLIS));
+ "time32", Repetition::REQUIRED, ParquetType::INT32, ConvertedType::TIME_MILLIS));
arrow_fields.push_back(
std::make_shared<Field>("time32", ::arrow::time32(TimeUnit::MILLI), false));
parquet_fields.push_back(PrimitiveNode::Make(
- "time64", Repetition::REQUIRED, ParquetType::INT64, LogicalType::TIME_MICROS));
+ "time64", Repetition::REQUIRED, ParquetType::INT64, ConvertedType::TIME_MICROS));
arrow_fields.push_back(
std::make_shared<Field>("time64", ::arrow::time64(TimeUnit::MICRO), false));
@@ -156,12 +156,12 @@ TEST_F(TestConvertParquetSchema, ParquetFlatPrimitives) {
arrow_fields.push_back(std::make_shared<Field>("binary", BINARY));
parquet_fields.push_back(PrimitiveNode::Make(
- "string", Repetition::OPTIONAL, ParquetType::BYTE_ARRAY, LogicalType::UTF8));
+ "string", Repetition::OPTIONAL, ParquetType::BYTE_ARRAY, ConvertedType::UTF8));
arrow_fields.push_back(std::make_shared<Field>("string", UTF8));
parquet_fields.push_back(PrimitiveNode::Make("flba-binary", Repetition::OPTIONAL,
ParquetType::FIXED_LEN_BYTE_ARRAY,
- LogicalType::NONE, 12));
+ ConvertedType::NONE, 12));
arrow_fields.push_back(
std::make_shared<Field>("flba-binary", ::arrow::fixed_size_binary(12)));
@@ -174,92 +174,89 @@ TEST_F(TestConvertParquetSchema, ParquetFlatPrimitives) {
TEST_F(TestConvertParquetSchema, ParquetAnnotatedFields) {
struct FieldConstructionArguments {
std::string name;
- std::shared_ptr<const LogicalAnnotation> annotation;
+ std::shared_ptr<const LogicalType> logical_type;
parquet::Type::type physical_type;
int physical_length;
std::shared_ptr<::arrow::DataType> datatype;
};
std::vector<FieldConstructionArguments> cases = {
- {"string", LogicalAnnotation::String(), ParquetType::BYTE_ARRAY, -1,
- ::arrow::utf8()},
- {"enum", LogicalAnnotation::Enum(), ParquetType::BYTE_ARRAY, -1, ::arrow::binary()},
- {"decimal(8, 2)", LogicalAnnotation::Decimal(8, 2), ParquetType::INT32, -1,
+ {"string", LogicalType::String(), ParquetType::BYTE_ARRAY, -1, ::arrow::utf8()},
+ {"enum", LogicalType::Enum(), ParquetType::BYTE_ARRAY, -1, ::arrow::binary()},
+ {"decimal(8, 2)", LogicalType::Decimal(8, 2), ParquetType::INT32, -1,
::arrow::decimal(8, 2)},
- {"decimal(16, 4)", LogicalAnnotation::Decimal(16, 4), ParquetType::INT64, -1,
+ {"decimal(16, 4)", LogicalType::Decimal(16, 4), ParquetType::INT64, -1,
::arrow::decimal(16, 4)},
- {"decimal(32, 8)", LogicalAnnotation::Decimal(32, 8),
- ParquetType::FIXED_LEN_BYTE_ARRAY, 16, ::arrow::decimal(32, 8)},
- {"date", LogicalAnnotation::Date(), ParquetType::INT32, -1, ::arrow::date32()},
- {"time(ms)", LogicalAnnotation::Time(true, LogicalAnnotation::TimeUnit::MILLIS),
+ {"decimal(32, 8)", LogicalType::Decimal(32, 8), ParquetType::FIXED_LEN_BYTE_ARRAY,
+ 16, ::arrow::decimal(32, 8)},
+ {"date", LogicalType::Date(), ParquetType::INT32, -1, ::arrow::date32()},
+ {"time(ms)", LogicalType::Time(true, LogicalType::TimeUnit::MILLIS),
ParquetType::INT32, -1, ::arrow::time32(::arrow::TimeUnit::MILLI)},
- {"time(us)", LogicalAnnotation::Time(true, LogicalAnnotation::TimeUnit::MICROS),
+ {"time(us)", LogicalType::Time(true, LogicalType::TimeUnit::MICROS),
ParquetType::INT64, -1, ::arrow::time64(::arrow::TimeUnit::MICRO)},
- {"time(ns)", LogicalAnnotation::Time(true, LogicalAnnotation::TimeUnit::NANOS),
+ {"time(ns)", LogicalType::Time(true, LogicalType::TimeUnit::NANOS),
ParquetType::INT64, -1, ::arrow::time64(::arrow::TimeUnit::NANO)},
- {"time(ms)", LogicalAnnotation::Time(false, LogicalAnnotation::TimeUnit::MILLIS),
+ {"time(ms)", LogicalType::Time(false, LogicalType::TimeUnit::MILLIS),
ParquetType::INT32, -1, ::arrow::time32(::arrow::TimeUnit::MILLI)},
- {"time(us)", LogicalAnnotation::Time(false, LogicalAnnotation::TimeUnit::MICROS),
+ {"time(us)", LogicalType::Time(false, LogicalType::TimeUnit::MICROS),
ParquetType::INT64, -1, ::arrow::time64(::arrow::TimeUnit::MICRO)},
- {"time(ns)", LogicalAnnotation::Time(false, LogicalAnnotation::TimeUnit::NANOS),
+ {"time(ns)", LogicalType::Time(false, LogicalType::TimeUnit::NANOS),
ParquetType::INT64, -1, ::arrow::time64(::arrow::TimeUnit::NANO)},
- {"timestamp(true, ms)",
- LogicalAnnotation::Timestamp(true, LogicalAnnotation::TimeUnit::MILLIS),
+ {"timestamp(true, ms)", LogicalType::Timestamp(true, LogicalType::TimeUnit::MILLIS),
ParquetType::INT64, -1, ::arrow::timestamp(::arrow::TimeUnit::MILLI, "UTC")},
- {"timestamp(true, us)",
- LogicalAnnotation::Timestamp(true, LogicalAnnotation::TimeUnit::MICROS),
+ {"timestamp(true, us)", LogicalType::Timestamp(true, LogicalType::TimeUnit::MICROS),
ParquetType::INT64, -1, ::arrow::timestamp(::arrow::TimeUnit::MICRO, "UTC")},
- {"timestamp(true, ns)",
- LogicalAnnotation::Timestamp(true, LogicalAnnotation::TimeUnit::NANOS),
+ {"timestamp(true, ns)", LogicalType::Timestamp(true, LogicalType::TimeUnit::NANOS),
ParquetType::INT64, -1, ::arrow::timestamp(::arrow::TimeUnit::NANO, "UTC")},
{"timestamp(false, ms)",
- LogicalAnnotation::Timestamp(false, LogicalAnnotation::TimeUnit::MILLIS),
- ParquetType::INT64, -1, ::arrow::timestamp(::arrow::TimeUnit::MILLI)},
+ LogicalType::Timestamp(false, LogicalType::TimeUnit::MILLIS), ParquetType::INT64,
+ -1, ::arrow::timestamp(::arrow::TimeUnit::MILLI)},
{"timestamp(false, us)",
- LogicalAnnotation::Timestamp(false, LogicalAnnotation::TimeUnit::MICROS),
- ParquetType::INT64, -1, ::arrow::timestamp(::arrow::TimeUnit::MICRO)},
+ LogicalType::Timestamp(false, LogicalType::TimeUnit::MICROS), ParquetType::INT64,
+ -1, ::arrow::timestamp(::arrow::TimeUnit::MICRO)},
{"timestamp(false, ns)",
- LogicalAnnotation::Timestamp(false, LogicalAnnotation::TimeUnit::NANOS),
- ParquetType::INT64, -1, ::arrow::timestamp(::arrow::TimeUnit::NANO)},
- {"int(8, false)", LogicalAnnotation::Int(8, false), ParquetType::INT32, -1,
+ LogicalType::Timestamp(false, LogicalType::TimeUnit::NANOS), ParquetType::INT64,
+ -1, ::arrow::timestamp(::arrow::TimeUnit::NANO)},
+ {"int(8, false)", LogicalType::Int(8, false), ParquetType::INT32, -1,
::arrow::uint8()},
- {"int(8, true)", LogicalAnnotation::Int(8, true), ParquetType::INT32, -1,
+ {"int(8, true)", LogicalType::Int(8, true), ParquetType::INT32, -1,
::arrow::int8()},
- {"int(16, false)", LogicalAnnotation::Int(16, false), ParquetType::INT32, -1,
+ {"int(16, false)", LogicalType::Int(16, false), ParquetType::INT32, -1,
::arrow::uint16()},
- {"int(16, true)", LogicalAnnotation::Int(16, true), ParquetType::INT32, -1,
+ {"int(16, true)", LogicalType::Int(16, true), ParquetType::INT32, -1,
::arrow::int16()},
- {"int(32, false)", LogicalAnnotation::Int(32, false), ParquetType::INT32, -1,
+ {"int(32, false)", LogicalType::Int(32, false), ParquetType::INT32, -1,
::arrow::uint32()},
- {"int(32, true)", LogicalAnnotation::Int(32, true), ParquetType::INT32, -1,
+ {"int(32, true)", LogicalType::Int(32, true), ParquetType::INT32, -1,
::arrow::int32()},
- {"int(64, false)", LogicalAnnotation::Int(64, false), ParquetType::INT64, -1,
+ {"int(64, false)", LogicalType::Int(64, false), ParquetType::INT64, -1,
::arrow::uint64()},
- {"int(64, true)", LogicalAnnotation::Int(64, true), ParquetType::INT64, -1,
+ {"int(64, true)", LogicalType::Int(64, true), ParquetType::INT64, -1,
::arrow::int64()},
- {"json", LogicalAnnotation::JSON(), ParquetType::BYTE_ARRAY, -1, ::arrow::binary()},
- {"bson", LogicalAnnotation::BSON(), ParquetType::BYTE_ARRAY, -1, ::arrow::binary()},
- {"interval", LogicalAnnotation::Interval(), ParquetType::FIXED_LEN_BYTE_ARRAY, 12,
+ {"json", LogicalType::JSON(), ParquetType::BYTE_ARRAY, -1, ::arrow::binary()},
+ {"bson", LogicalType::BSON(), ParquetType::BYTE_ARRAY, -1, ::arrow::binary()},
+ {"interval", LogicalType::Interval(), ParquetType::FIXED_LEN_BYTE_ARRAY, 12,
::arrow::fixed_size_binary(12)},
- {"uuid", LogicalAnnotation::UUID(), ParquetType::FIXED_LEN_BYTE_ARRAY, 16,
+ {"uuid", LogicalType::UUID(), ParquetType::FIXED_LEN_BYTE_ARRAY, 16,
::arrow::fixed_size_binary(16)},
- {"none", LogicalAnnotation::None(), ParquetType::BOOLEAN, -1, ::arrow::boolean()},
- {"none", LogicalAnnotation::None(), ParquetType::INT32, -1, ::arrow::int32()},
- {"none", LogicalAnnotation::None(), ParquetType::INT64, -1, ::arrow::int64()},
- {"none", LogicalAnnotation::None(), ParquetType::FLOAT, -1, ::arrow::float32()},
- {"none", LogicalAnnotation::None(), ParquetType::DOUBLE, -1, ::arrow::float64()},
- {"none", LogicalAnnotation::None(), ParquetType::BYTE_ARRAY, -1, ::arrow::binary()},
- {"none", LogicalAnnotation::None(), ParquetType::FIXED_LEN_BYTE_ARRAY, 64,
+ {"none", LogicalType::None(), ParquetType::BOOLEAN, -1, ::arrow::boolean()},
+ {"none", LogicalType::None(), ParquetType::INT32, -1, ::arrow::int32()},
+ {"none", LogicalType::None(), ParquetType::INT64, -1, ::arrow::int64()},
+ {"none", LogicalType::None(), ParquetType::FLOAT, -1, ::arrow::float32()},
+ {"none", LogicalType::None(), ParquetType::DOUBLE, -1, ::arrow::float64()},
+ {"none", LogicalType::None(), ParquetType::BYTE_ARRAY, -1, ::arrow::binary()},
+ {"none", LogicalType::None(), ParquetType::FIXED_LEN_BYTE_ARRAY, 64,
::arrow::fixed_size_binary(64)},
- {"null", LogicalAnnotation::Null(), ParquetType::BYTE_ARRAY, -1, ::arrow::null()},
+ {"null", LogicalType::Null(), ParquetType::BYTE_ARRAY, -1, ::arrow::null()},
};
std::vector<NodePtr> parquet_fields;
std::vector<std::shared_ptr<Field>> arrow_fields;
for (const FieldConstructionArguments& c : cases) {
- parquet_fields.push_back(PrimitiveNode::Make(
- c.name, Repetition::OPTIONAL, c.annotation, c.physical_type, c.physical_length));
+ parquet_fields.push_back(PrimitiveNode::Make(c.name, Repetition::OPTIONAL,
+ c.logical_type, c.physical_type,
+ c.physical_length));
arrow_fields.push_back(std::make_shared<Field>(c.name, c.datatype));
}
@@ -341,21 +338,21 @@ TEST_F(TestConvertParquetSchema, ParquetFlatDecimals) {
parquet_fields.push_back(PrimitiveNode::Make("flba-decimal", Repetition::OPTIONAL,
ParquetType::FIXED_LEN_BYTE_ARRAY,
- LogicalType::DECIMAL, 4, 8, 4));
+ ConvertedType::DECIMAL, 4, 8, 4));
arrow_fields.push_back(std::make_shared<Field>("flba-decimal", DECIMAL_8_4));
parquet_fields.push_back(PrimitiveNode::Make("binary-decimal", Repetition::OPTIONAL,
ParquetType::BYTE_ARRAY,
- LogicalType::DECIMAL, -1, 8, 4));
+ ConvertedType::DECIMAL, -1, 8, 4));
arrow_fields.push_back(std::make_shared<Field>("binary-decimal", DECIMAL_8_4));
parquet_fields.push_back(PrimitiveNode::Make("int32-decimal", Repetition::OPTIONAL,
- ParquetType::INT32, LogicalType::DECIMAL,
+ ParquetType::INT32, ConvertedType::DECIMAL,
-1, 8, 4));
arrow_fields.push_back(std::make_shared<Field>("int32-decimal", DECIMAL_8_4));
parquet_fields.push_back(PrimitiveNode::Make("int64-decimal", Repetition::OPTIONAL,
- ParquetType::INT64, LogicalType::DECIMAL,
+ ParquetType::INT64, ConvertedType::DECIMAL,
-1, 8, 4));
arrow_fields.push_back(std::make_shared<Field>("int64-decimal", DECIMAL_8_4));
@@ -379,10 +376,10 @@ TEST_F(TestConvertParquetSchema, ParquetLists) {
// }
{
auto element = PrimitiveNode::Make("string", Repetition::OPTIONAL,
- ParquetType::BYTE_ARRAY, LogicalType::UTF8);
+ ParquetType::BYTE_ARRAY, ConvertedType::UTF8);
auto list = GroupNode::Make("list", Repetition::REPEATED, {element});
parquet_fields.push_back(
- GroupNode::Make("my_list", Repetition::REQUIRED, {list}, LogicalType::LIST));
+ GroupNode::Make("my_list", Repetition::REQUIRED, {list}, ConvertedType::LIST));
auto arrow_element = std::make_shared<Field>("string", UTF8, true);
auto arrow_list = std::make_shared<::arrow::ListType>(arrow_element);
arrow_fields.push_back(std::make_shared<Field>("my_list", arrow_list, false));
@@ -396,10 +393,10 @@ TEST_F(TestConvertParquetSchema, ParquetLists) {
// }
{
auto element = PrimitiveNode::Make("string", Repetition::REQUIRED,
- ParquetType::BYTE_ARRAY, LogicalType::UTF8);
+ ParquetType::BYTE_ARRAY, ConvertedType::UTF8);
auto list = GroupNode::Make("list", Repetition::REPEATED, {element});
parquet_fields.push_back(
- GroupNode::Make("my_list", Repetition::OPTIONAL, {list}, LogicalType::LIST));
+ GroupNode::Make("my_list", Repetition::OPTIONAL, {list}, ConvertedType::LIST));
auto arrow_element = std::make_shared<Field>("string", UTF8, false);
auto arrow_list = std::make_shared<::arrow::ListType>(arrow_element);
arrow_fields.push_back(std::make_shared<Field>("my_list", arrow_list, true));
@@ -421,11 +418,11 @@ TEST_F(TestConvertParquetSchema, ParquetLists) {
auto inner_element =
PrimitiveNode::Make("int32", Repetition::REQUIRED, ParquetType::INT32);
auto inner_list = GroupNode::Make("list", Repetition::REPEATED, {inner_element});
- auto element =
- GroupNode::Make("element", Repetition::REQUIRED, {inner_list}, LogicalType::LIST);
+ auto element = GroupNode::Make("element", Repetition::REQUIRED, {inner_list},
+ ConvertedType::LIST);
auto list = GroupNode::Make("list", Repetition::REPEATED, {element});
parquet_fields.push_back(GroupNode::Make("array_of_arrays", Repetition::OPTIONAL,
- {list}, LogicalType::LIST));
+ {list}, ConvertedType::LIST));
auto arrow_inner_element = std::make_shared<Field>("int32", INT32, false);
auto arrow_inner_list = std::make_shared<::arrow::ListType>(arrow_inner_element);
auto arrow_element = std::make_shared<Field>("element", arrow_inner_list, false);
@@ -441,10 +438,10 @@ TEST_F(TestConvertParquetSchema, ParquetLists) {
// }
{
auto element = PrimitiveNode::Make("str", Repetition::REQUIRED,
- ParquetType::BYTE_ARRAY, LogicalType::UTF8);
+ ParquetType::BYTE_ARRAY, ConvertedType::UTF8);
auto list = GroupNode::Make("element", Repetition::REPEATED, {element});
parquet_fields.push_back(
- GroupNode::Make("my_list", Repetition::OPTIONAL, {list}, LogicalType::LIST));
+ GroupNode::Make("my_list", Repetition::OPTIONAL, {list}, ConvertedType::LIST));
auto arrow_element = std::make_shared<Field>("str", UTF8, false);
auto arrow_list = std::make_shared<::arrow::ListType>(arrow_element);
arrow_fields.push_back(std::make_shared<Field>("my_list", arrow_list, true));
@@ -458,7 +455,7 @@ TEST_F(TestConvertParquetSchema, ParquetLists) {
auto element =
PrimitiveNode::Make("element", Repetition::REPEATED, ParquetType::INT32);
parquet_fields.push_back(
- GroupNode::Make("my_list", Repetition::OPTIONAL, {element}, LogicalType::LIST));
+ GroupNode::Make("my_list", Repetition::OPTIONAL, {element}, ConvertedType::LIST));
auto arrow_element = std::make_shared<Field>("element", INT32, false);
auto arrow_list = std::make_shared<::arrow::ListType>(arrow_element);
arrow_fields.push_back(std::make_shared<Field>("my_list", arrow_list, true));
@@ -473,13 +470,13 @@ TEST_F(TestConvertParquetSchema, ParquetLists) {
// }
{
auto str_element = PrimitiveNode::Make("str", Repetition::REQUIRED,
- ParquetType::BYTE_ARRAY, LogicalType::UTF8);
+ ParquetType::BYTE_ARRAY, ConvertedType::UTF8);
auto num_element =
PrimitiveNode::Make("num", Repetition::REQUIRED, ParquetType::INT32);
auto element =
GroupNode::Make("element", Repetition::REPEATED, {str_element, num_element});
parquet_fields.push_back(
- GroupNode::Make("my_list", Repetition::OPTIONAL, {element}, LogicalType::LIST));
+ GroupNode::Make("my_list", Repetition::OPTIONAL, {element}, ConvertedType::LIST));
auto arrow_str = std::make_shared<Field>("str", UTF8, false);
auto arrow_num = std::make_shared<Field>("num", INT32, false);
std::vector<std::shared_ptr<Field>> fields({arrow_str, arrow_num});
@@ -498,10 +495,10 @@ TEST_F(TestConvertParquetSchema, ParquetLists) {
// Special case: group is named array
{
auto element = PrimitiveNode::Make("str", Repetition::REQUIRED,
- ParquetType::BYTE_ARRAY, LogicalType::UTF8);
+ ParquetType::BYTE_ARRAY, ConvertedType::UTF8);
auto array = GroupNode::Make("array", Repetition::REPEATED, {element});
parquet_fields.push_back(
- GroupNode::Make("my_list", Repetition::OPTIONAL, {array}, LogicalType::LIST));
+ GroupNode::Make("my_list", Repetition::OPTIONAL, {array}, ConvertedType::LIST));
auto arrow_str = std::make_shared<Field>("str", UTF8, false);
std::vector<std::shared_ptr<Field>> fields({arrow_str});
auto arrow_struct = std::make_shared<::arrow::StructType>(fields);
@@ -519,10 +516,10 @@ TEST_F(TestConvertParquetSchema, ParquetLists) {
// Special case: group named ends in _tuple
{
auto element = PrimitiveNode::Make("str", Repetition::REQUIRED,
- ParquetType::BYTE_ARRAY, LogicalType::UTF8);
+ ParquetType::BYTE_ARRAY, ConvertedType::UTF8);
auto array = GroupNode::Make("my_list_tuple", Repetition::REPEATED, {element});
parquet_fields.push_back(
- GroupNode::Make("my_list", Repetition::OPTIONAL, {array}, LogicalType::LIST));
+ GroupNode::Make("my_list", Repetition::OPTIONAL, {array}, ConvertedType::LIST));
auto arrow_str = std::make_shared<Field>("str", UTF8, false);
std::vector<std::shared_ptr<Field>> fields({arrow_str});
auto arrow_struct = std::make_shared<::arrow::StructType>(fields);
@@ -777,22 +774,22 @@ TEST_F(TestConvertArrowSchema, ParquetFlatPrimitives) {
arrow_fields.push_back(std::make_shared<Field>("int64", INT64, false));
parquet_fields.push_back(PrimitiveNode::Make("date", Repetition::REQUIRED,
- ParquetType::INT32, LogicalType::DATE));
+ ParquetType::INT32, ConvertedType::DATE));
arrow_fields.push_back(std::make_shared<Field>("date", ::arrow::date32(), false));
parquet_fields.push_back(PrimitiveNode::Make("date64", Repetition::REQUIRED,
- ParquetType::INT32, LogicalType::DATE));
+ ParquetType::INT32, ConvertedType::DATE));
arrow_fields.push_back(std::make_shared<Field>("date64", ::arrow::date64(), false));
parquet_fields.push_back(PrimitiveNode::Make("timestamp", Repetition::REQUIRED,
ParquetType::INT64,
- LogicalType::TIMESTAMP_MILLIS));
+ ConvertedType::TIMESTAMP_MILLIS));
arrow_fields.push_back(std::make_shared<Field>(
"timestamp", ::arrow::timestamp(TimeUnit::MILLI, "UTC"), false));
parquet_fields.push_back(PrimitiveNode::Make("timestamp[us]", Repetition::REQUIRED,
ParquetType::INT64,
- LogicalType::TIMESTAMP_MICROS));
+ ConvertedType::TIMESTAMP_MICROS));
arrow_fields.push_back(std::make_shared<Field>(
"timestamp[us]", ::arrow::timestamp(TimeUnit::MICRO, "UTC"), false));
@@ -805,11 +802,11 @@ TEST_F(TestConvertArrowSchema, ParquetFlatPrimitives) {
arrow_fields.push_back(std::make_shared<Field>("double", DOUBLE));
parquet_fields.push_back(PrimitiveNode::Make(
- "string", Repetition::OPTIONAL, ParquetType::BYTE_ARRAY, LogicalType::UTF8));
+ "string", Repetition::OPTIONAL, ParquetType::BYTE_ARRAY, ConvertedType::UTF8));
arrow_fields.push_back(std::make_shared<Field>("string", UTF8));
parquet_fields.push_back(PrimitiveNode::Make(
- "binary", Repetition::OPTIONAL, ParquetType::BYTE_ARRAY, LogicalType::NONE));
+ "binary", Repetition::OPTIONAL, ParquetType::BYTE_ARRAY, ConvertedType::NONE));
arrow_fields.push_back(std::make_shared<Field>("binary", BINARY));
ASSERT_OK(ConvertSchema(arrow_fields));
@@ -821,87 +818,79 @@ TEST_F(TestConvertArrowSchema, ArrowFields) {
struct FieldConstructionArguments {
std::string name;
std::shared_ptr<::arrow::DataType> datatype;
- std::shared_ptr<const LogicalAnnotation> annotation;
+ std::shared_ptr<const LogicalType> logical_type;
parquet::Type::type physical_type;
int physical_length;
};
std::vector<FieldConstructionArguments> cases = {
- {"boolean", ::arrow::boolean(), LogicalAnnotation::None(), ParquetType::BOOLEAN,
- -1},
- {"binary", ::arrow::binary(), LogicalAnnotation::None(), ParquetType::BYTE_ARRAY,
- -1},
- {"fixed_size_binary", ::arrow::fixed_size_binary(64), LogicalAnnotation::None(),
+ {"boolean", ::arrow::boolean(), LogicalType::None(), ParquetType::BOOLEAN, -1},
+ {"binary", ::arrow::binary(), LogicalType::None(), ParquetType::BYTE_ARRAY, -1},
+ {"fixed_size_binary", ::arrow::fixed_size_binary(64), LogicalType::None(),
ParquetType::FIXED_LEN_BYTE_ARRAY, 64},
- {"uint8", ::arrow::uint8(), LogicalAnnotation::Int(8, false), ParquetType::INT32,
- -1},
- {"int8", ::arrow::int8(), LogicalAnnotation::Int(8, true), ParquetType::INT32, -1},
- {"uint16", ::arrow::uint16(), LogicalAnnotation::Int(16, false), ParquetType::INT32,
- -1},
- {"int16", ::arrow::int16(), LogicalAnnotation::Int(16, true), ParquetType::INT32,
- -1},
- {"uint32", ::arrow::uint32(), LogicalAnnotation::None(), ParquetType::INT64,
+ {"uint8", ::arrow::uint8(), LogicalType::Int(8, false), ParquetType::INT32, -1},
+ {"int8", ::arrow::int8(), LogicalType::Int(8, true), ParquetType::INT32, -1},
+ {"uint16", ::arrow::uint16(), LogicalType::Int(16, false), ParquetType::INT32, -1},
+ {"int16", ::arrow::int16(), LogicalType::Int(16, true), ParquetType::INT32, -1},
+ {"uint32", ::arrow::uint32(), LogicalType::None(), ParquetType::INT64,
-1}, // Parquet 1.0
- {"int32", ::arrow::int32(), LogicalAnnotation::None(), ParquetType::INT32, -1},
- {"uint64", ::arrow::uint64(), LogicalAnnotation::Int(64, false), ParquetType::INT64,
- -1},
- {"int64", ::arrow::int64(), LogicalAnnotation::None(), ParquetType::INT64, -1},
- {"float32", ::arrow::float32(), LogicalAnnotation::None(), ParquetType::FLOAT, -1},
- {"float64", ::arrow::float64(), LogicalAnnotation::None(), ParquetType::DOUBLE, -1},
- {"utf8", ::arrow::utf8(), LogicalAnnotation::String(), ParquetType::BYTE_ARRAY, -1},
- {"decimal(1, 0)", ::arrow::decimal(1, 0), LogicalAnnotation::Decimal(1, 0),
+ {"int32", ::arrow::int32(), LogicalType::None(), ParquetType::INT32, -1},
+ {"uint64", ::arrow::uint64(), LogicalType::Int(64, false), ParquetType::INT64, -1},
+ {"int64", ::arrow::int64(), LogicalType::None(), ParquetType::INT64, -1},
+ {"float32", ::arrow::float32(), LogicalType::None(), ParquetType::FLOAT, -1},
+ {"float64", ::arrow::float64(), LogicalType::None(), ParquetType::DOUBLE, -1},
+ {"utf8", ::arrow::utf8(), LogicalType::String(), ParquetType::BYTE_ARRAY, -1},
+ {"decimal(1, 0)", ::arrow::decimal(1, 0), LogicalType::Decimal(1, 0),
ParquetType::FIXED_LEN_BYTE_ARRAY, 1},
- {"decimal(8, 2)", ::arrow::decimal(8, 2), LogicalAnnotation::Decimal(8, 2),
+ {"decimal(8, 2)", ::arrow::decimal(8, 2), LogicalType::Decimal(8, 2),
ParquetType::FIXED_LEN_BYTE_ARRAY, 4},
- {"decimal(16, 4)", ::arrow::decimal(16, 4), LogicalAnnotation::Decimal(16, 4),
+ {"decimal(16, 4)", ::arrow::decimal(16, 4), LogicalType::Decimal(16, 4),
ParquetType::FIXED_LEN_BYTE_ARRAY, 7},
- {"decimal(32, 8)", ::arrow::decimal(32, 8), LogicalAnnotation::Decimal(32, 8),
+ {"decimal(32, 8)", ::arrow::decimal(32, 8), LogicalType::Decimal(32, 8),
ParquetType::FIXED_LEN_BYTE_ARRAY, 14},
{"time32", ::arrow::time32(::arrow::TimeUnit::MILLI),
- LogicalAnnotation::Time(false, LogicalAnnotation::TimeUnit::MILLIS),
- ParquetType::INT32, -1},
+ LogicalType::Time(false, LogicalType::TimeUnit::MILLIS), ParquetType::INT32, -1},
{"time64(microsecond)", ::arrow::time64(::arrow::TimeUnit::MICRO),
- LogicalAnnotation::Time(false, LogicalAnnotation::TimeUnit::MICROS),
- ParquetType::INT64, -1},
+ LogicalType::Time(false, LogicalType::TimeUnit::MICROS), ParquetType::INT64, -1},
{"time64(nanosecond)", ::arrow::time64(::arrow::TimeUnit::NANO),
- LogicalAnnotation::Time(false, LogicalAnnotation::TimeUnit::NANOS),
- ParquetType::INT64, -1},
+ LogicalType::Time(false, LogicalType::TimeUnit::NANOS), ParquetType::INT64, -1},
{"timestamp(millisecond)", ::arrow::timestamp(::arrow::TimeUnit::MILLI),
- LogicalAnnotation::Timestamp(false, LogicalAnnotation::TimeUnit::MILLIS),
- ParquetType::INT64, -1},
+ LogicalType::Timestamp(false, LogicalType::TimeUnit::MILLIS), ParquetType::INT64,
+ -1},
{"timestamp(microsecond)", ::arrow::timestamp(::arrow::TimeUnit::MICRO),
- LogicalAnnotation::Timestamp(false, LogicalAnnotation::TimeUnit::MICROS),
- ParquetType::INT64, -1},
+ LogicalType::Timestamp(false, LogicalType::TimeUnit::MICROS), ParquetType::INT64,
+ -1},
{"timestamp(nanosecond)", ::arrow::timestamp(::arrow::TimeUnit::NANO),
- LogicalAnnotation::Timestamp(false, LogicalAnnotation::TimeUnit::MICROS),
- ParquetType::INT64, -1},
+ LogicalType::Timestamp(false, LogicalType::TimeUnit::MICROS), ParquetType::INT64,
+ -1},
{"timestamp(millisecond, UTC)", ::arrow::timestamp(::arrow::TimeUnit::MILLI, "UTC"),
- LogicalAnnotation::Timestamp(true, LogicalAnnotation::TimeUnit::MILLIS),
- ParquetType::INT64, -1},
+ LogicalType::Timestamp(true, LogicalType::TimeUnit::MILLIS), ParquetType::INT64,
+ -1},
{"timestamp(microsecond, UTC)", ::arrow::timestamp(::arrow::TimeUnit::MICRO, "UTC"),
- LogicalAnnotation::Timestamp(true, LogicalAnnotation::TimeUnit::MICROS),
- ParquetType::INT64, -1},
+ LogicalType::Timestamp(true, LogicalType::TimeUnit::MICROS), ParquetType::INT64,
+ -1},
{"timestamp(nanosecond, UTC)", ::arrow::timestamp(::arrow::TimeUnit::NANO, "UTC"),
- LogicalAnnotation::Timestamp(true, LogicalAnnotation::TimeUnit::MICROS),
- ParquetType::INT64, -1},
+ LogicalType::Timestamp(true, LogicalType::TimeUnit::MICROS), ParquetType::INT64,
+ -1},
{"timestamp(millisecond, CET)", ::arrow::timestamp(::arrow::TimeUnit::MILLI, "CET"),
- LogicalAnnotation::Timestamp(true, LogicalAnnotation::TimeUnit::MILLIS),
- ParquetType::INT64, -1},
+ LogicalType::Timestamp(true, LogicalType::TimeUnit::MILLIS), ParquetType::INT64,
+ -1},
{"timestamp(microsecond, CET)", ::arrow::timestamp(::arrow::TimeUnit::MICRO, "CET"),
- LogicalAnnotation::Timestamp(true, LogicalAnnotation::TimeUnit::MICROS),
- ParquetType::INT64, -1},
+ LogicalType::Timestamp(true, LogicalType::TimeUnit::MICROS), ParquetType::INT64,
+ -1},
{"timestamp(nanosecond, CET)", ::arrow::timestamp(::arrow::TimeUnit::NANO, "CET"),
- LogicalAnnotation::Timestamp(true, LogicalAnnotation::TimeUnit::MICROS),
- ParquetType::INT64, -1},
- {"null", ::arrow::null(), LogicalAnnotation::Null(), ParquetType::INT32, -1}};
+ LogicalType::Timestamp(true, LogicalType::TimeUnit::MICROS), ParquetType::INT64,
+ -1},
+ {"null", ::arrow::null(), LogicalType::Null(), ParquetType::INT32, -1}};
std::vector<std::shared_ptr<Field>> arrow_fields;
std::vector<NodePtr> parquet_fields;
for (const FieldConstructionArguments& c : cases) {
arrow_fields.push_back(std::make_shared<Field>(c.name, c.datatype, false));
- parquet_fields.push_back(PrimitiveNode::Make(
- c.name, Repetition::REQUIRED, c.annotation, c.physical_type, c.physical_length));
+ parquet_fields.push_back(PrimitiveNode::Make(c.name, Repetition::REQUIRED,
+ c.logical_type, c.physical_type,
+ c.physical_length));
}
ASSERT_OK(ConvertSchema(arrow_fields));
@@ -940,12 +929,12 @@ TEST_F(TestConvertArrowSchema, ParquetFlatPrimitivesAsDictionaries) {
"int64", ::arrow::dictionary(::arrow::int8(), ::arrow::int64()), false));
parquet_fields.push_back(PrimitiveNode::Make("date", Repetition::REQUIRED,
- ParquetType::INT32, LogicalType::DATE));
+ ParquetType::INT32, ConvertedType::DATE));
arrow_fields.push_back(std::make_shared<Field>(
"date", ::arrow::dictionary(::arrow::int8(), ::arrow::date32()), false));
parquet_fields.push_back(PrimitiveNode::Make("date64", Repetition::REQUIRED,
- ParquetType::INT32, LogicalType::DATE));
+ ParquetType::INT32, ConvertedType::DATE));
arrow_fields.push_back(std::make_shared<Field>(
"date64", ::arrow::dictionary(::arrow::int8(), ::arrow::date64()), false));
@@ -960,12 +949,12 @@ TEST_F(TestConvertArrowSchema, ParquetFlatPrimitivesAsDictionaries) {
"double", ::arrow::dictionary(::arrow::int8(), ::arrow::float64())));
parquet_fields.push_back(PrimitiveNode::Make(
- "string", Repetition::OPTIONAL, ParquetType::BYTE_ARRAY, LogicalType::UTF8));
+ "string", Repetition::OPTIONAL, ParquetType::BYTE_ARRAY, ConvertedType::UTF8));
arrow_fields.push_back(std::make_shared<Field>(
"string", ::arrow::dictionary(::arrow::int8(), ::arrow::utf8())));
parquet_fields.push_back(PrimitiveNode::Make(
- "binary", Repetition::OPTIONAL, ParquetType::BYTE_ARRAY, LogicalType::NONE));
+ "binary", Repetition::OPTIONAL, ParquetType::BYTE_ARRAY, ConvertedType::NONE));
arrow_fields.push_back(std::make_shared<Field>(
"binary", ::arrow::dictionary(::arrow::int8(), ::arrow::binary())));
@@ -988,10 +977,10 @@ TEST_F(TestConvertArrowSchema, ParquetLists) {
// }
{
auto element = PrimitiveNode::Make("string", Repetition::OPTIONAL,
- ParquetType::BYTE_ARRAY, LogicalType::UTF8);
+ ParquetType::BYTE_ARRAY, ConvertedType::UTF8);
auto list = GroupNode::Make("list", Repetition::REPEATED, {element});
parquet_fields.push_back(
- GroupNode::Make("my_list", Repetition::REQUIRED, {list}, LogicalType::LIST));
+ GroupNode::Make("my_list", Repetition::REQUIRED, {list}, ConvertedType::LIST));
auto arrow_element = std::make_shared<Field>("string", UTF8, true);
auto arrow_list = std::make_shared<::arrow::ListType>(arrow_element);
arrow_fields.push_back(std::make_shared<Field>("my_list", arrow_list, false));
@@ -1005,10 +994,10 @@ TEST_F(TestConvertArrowSchema, ParquetLists) {
// }
{
auto element = PrimitiveNode::Make("string", Repetition::REQUIRED,
- ParquetType::BYTE_ARRAY, LogicalType::UTF8);
+ ParquetType::BYTE_ARRAY, ConvertedType::UTF8);
auto list = GroupNode::Make("list", Repetition::REPEATED, {element});
parquet_fields.push_back(
- GroupNode::Make("my_list", Repetition::OPTIONAL, {list}, LogicalType::LIST));
+ GroupNode::Make("my_list", Repetition::OPTIONAL, {list}, ConvertedType::LIST));
auto arrow_element = std::make_shared<Field>("string", UTF8, false);
auto arrow_list = std::make_shared<::arrow::ListType>(arrow_element);
arrow_fields.push_back(std::make_shared<Field>("my_list", arrow_list, true));
diff --git a/cpp/src/parquet/arrow/record_reader.cc b/cpp/src/parquet/arrow/record_reader.cc
index b7f9d90..71bedec 100644
--- a/cpp/src/parquet/arrow/record_reader.cc
+++ b/cpp/src/parquet/arrow/record_reader.cc
@@ -631,7 +631,7 @@ class ByteArrayChunkedRecordReader : public TypedRecordReader<ByteArrayType> {
// ARROW-4688(wesm): Using 2^31 - 1 chunks for now
constexpr int32_t kBinaryChunksize = 2147483647;
DCHECK_EQ(descr_->physical_type(), Type::BYTE_ARRAY);
- if (descr_->logical_type() == LogicalType::UTF8) {
+ if (descr_->converted_type() == ConvertedType::UTF8) {
builder_.reset(
new ::arrow::internal::ChunkedStringBuilder(kBinaryChunksize, pool_));
} else {
@@ -867,7 +867,7 @@ bool TypedRecordReader<DType>::ReadNewPage() {
std::shared_ptr<RecordReader> RecordReader::MakeByteArrayRecordReader(
const ColumnDescriptor* descr, arrow::MemoryPool* pool, bool read_dictionary) {
if (read_dictionary) {
- if (descr->logical_type() == LogicalType::UTF8) {
+ if (descr->converted_type() == ConvertedType::UTF8) {
using Builder = ::arrow::StringDictionaryBuilder;
return std::shared_ptr<RecordReader>(
new RecordReader(new ByteArrayDictionaryRecordReader<Builder>(descr, pool)));
diff --git a/cpp/src/parquet/arrow/schema.cc b/cpp/src/parquet/arrow/schema.cc
index 22b8297..04587a9 100644
--- a/cpp/src/parquet/arrow/schema.cc
+++ b/cpp/src/parquet/arrow/schema.cc
@@ -49,7 +49,7 @@ using parquet::schema::NodePtr;
using parquet::schema::PrimitiveNode;
using ParquetType = parquet::Type;
-using parquet::LogicalAnnotation;
+using parquet::ConvertedType;
using parquet::LogicalType;
namespace parquet {
@@ -60,16 +60,16 @@ const auto TIMESTAMP_MS = ::arrow::timestamp(::arrow::TimeUnit::MILLI);
const auto TIMESTAMP_US = ::arrow::timestamp(::arrow::TimeUnit::MICRO);
const auto TIMESTAMP_NS = ::arrow::timestamp(::arrow::TimeUnit::NANO);
-static Status MakeArrowDecimal(const LogicalAnnotation& annotation,
+static Status MakeArrowDecimal(const LogicalType& logical_type,
std::shared_ptr<ArrowType>* out) {
- const auto& decimal = checked_cast<const DecimalAnnotation&>(annotation);
+ const auto& decimal = checked_cast<const DecimalLogicalType&>(logical_type);
*out = ::arrow::decimal(decimal.precision(), decimal.scale());
return Status::OK();
}
-static Status MakeArrowInt(const LogicalAnnotation& annotation,
+static Status MakeArrowInt(const LogicalType& logical_type,
std::shared_ptr<ArrowType>* out) {
- const auto& integer = checked_cast<const IntAnnotation&>(annotation);
+ const auto& integer = checked_cast<const IntLogicalType&>(logical_type);
switch (integer.bit_width()) {
case 8:
*out = integer.is_signed() ? ::arrow::int8() : ::arrow::uint8();
@@ -81,180 +81,179 @@ static Status MakeArrowInt(const LogicalAnnotation& annotation,
*out = integer.is_signed() ? ::arrow::int32() : ::arrow::uint32();
break;
default:
- return Status::TypeError(annotation.ToString(),
+ return Status::TypeError(logical_type.ToString(),
" can not annotate physical type Int32");
}
return Status::OK();
}
-static Status MakeArrowInt64(const LogicalAnnotation& annotation,
+static Status MakeArrowInt64(const LogicalType& logical_type,
std::shared_ptr<ArrowType>* out) {
- const auto& integer = checked_cast<const IntAnnotation&>(annotation);
+ const auto& integer = checked_cast<const IntLogicalType&>(logical_type);
switch (integer.bit_width()) {
case 64:
*out = integer.is_signed() ? ::arrow::int64() : ::arrow::uint64();
break;
default:
- return Status::TypeError(annotation.ToString(),
+ return Status::TypeError(logical_type.ToString(),
" can not annotate physical type Int64");
}
return Status::OK();
}
-static Status MakeArrowTime32(const LogicalAnnotation& annotation,
+static Status MakeArrowTime32(const LogicalType& logical_type,
std::shared_ptr<ArrowType>* out) {
- const auto& time = checked_cast<const TimeAnnotation&>(annotation);
+ const auto& time = checked_cast<const TimeLogicalType&>(logical_type);
switch (time.time_unit()) {
- case LogicalAnnotation::TimeUnit::MILLIS:
+ case LogicalType::TimeUnit::MILLIS:
*out = ::arrow::time32(::arrow::TimeUnit::MILLI);
break;
default:
- return Status::TypeError(annotation.ToString(),
+ return Status::TypeError(logical_type.ToString(),
" can not annotate physical type Time32");
}
return Status::OK();
}
-static Status MakeArrowTime64(const LogicalAnnotation& annotation,
+static Status MakeArrowTime64(const LogicalType& logical_type,
std::shared_ptr<ArrowType>* out) {
- const auto& time = checked_cast<const TimeAnnotation&>(annotation);
+ const auto& time = checked_cast<const TimeLogicalType&>(logical_type);
switch (time.time_unit()) {
- case LogicalAnnotation::TimeUnit::MICROS:
+ case LogicalType::TimeUnit::MICROS:
*out = ::arrow::time64(::arrow::TimeUnit::MICRO);
break;
- case LogicalAnnotation::TimeUnit::NANOS:
+ case LogicalType::TimeUnit::NANOS:
*out = ::arrow::time64(::arrow::TimeUnit::NANO);
break;
default:
- return Status::TypeError(annotation.ToString(),
+ return Status::TypeError(logical_type.ToString(),
" can not annotate physical type Time64");
}
return Status::OK();
}
-static Status MakeArrowTimestamp(const LogicalAnnotation& annotation,
+static Status MakeArrowTimestamp(const LogicalType& logical_type,
std::shared_ptr<ArrowType>* out) {
static const char* utc = "UTC";
- const auto& timestamp = checked_cast<const TimestampAnnotation&>(annotation);
+ const auto& timestamp = checked_cast<const TimestampLogicalType&>(logical_type);
switch (timestamp.time_unit()) {
- case LogicalAnnotation::TimeUnit::MILLIS:
+ case LogicalType::TimeUnit::MILLIS:
*out = (timestamp.is_adjusted_to_utc()
? ::arrow::timestamp(::arrow::TimeUnit::MILLI, utc)
: ::arrow::timestamp(::arrow::TimeUnit::MILLI));
break;
- case LogicalAnnotation::TimeUnit::MICROS:
+ case LogicalType::TimeUnit::MICROS:
*out = (timestamp.is_adjusted_to_utc()
? ::arrow::timestamp(::arrow::TimeUnit::MICRO, utc)
: ::arrow::timestamp(::arrow::TimeUnit::MICRO));
break;
- case LogicalAnnotation::TimeUnit::NANOS:
+ case LogicalType::TimeUnit::NANOS:
*out = (timestamp.is_adjusted_to_utc()
? ::arrow::timestamp(::arrow::TimeUnit::NANO, utc)
: ::arrow::timestamp(::arrow::TimeUnit::NANO));
break;
default:
- return Status::TypeError("Unrecognized time unit in timestamp annotation: ",
- annotation.ToString());
+ return Status::TypeError("Unrecognized time unit in timestamp logical_type: ",
+ logical_type.ToString());
}
return Status::OK();
}
-static Status FromByteArray(const LogicalAnnotation& annotation,
+static Status FromByteArray(const LogicalType& logical_type,
std::shared_ptr<ArrowType>* out) {
- switch (annotation.type()) {
- case LogicalAnnotation::Type::STRING:
+ switch (logical_type.type()) {
+ case LogicalType::Type::STRING:
*out = ::arrow::utf8();
break;
- case LogicalAnnotation::Type::DECIMAL:
- RETURN_NOT_OK(MakeArrowDecimal(annotation, out));
+ case LogicalType::Type::DECIMAL:
+ RETURN_NOT_OK(MakeArrowDecimal(logical_type, out));
break;
- case LogicalAnnotation::Type::NONE:
- case LogicalAnnotation::Type::ENUM:
- case LogicalAnnotation::Type::JSON:
- case LogicalAnnotation::Type::BSON:
+ case LogicalType::Type::NONE:
+ case LogicalType::Type::ENUM:
+ case LogicalType::Type::JSON:
+ case LogicalType::Type::BSON:
*out = ::arrow::binary();
break;
default:
- return Status::NotImplemented("Unhandled logical annotation ",
- annotation.ToString(), " for binary array");
+ return Status::NotImplemented("Unhandled logical logical_type ",
+ logical_type.ToString(), " for binary array");
}
return Status::OK();
}
-static Status FromFLBA(const LogicalAnnotation& annotation, int32_t physical_length,
+static Status FromFLBA(const LogicalType& logical_type, int32_t physical_length,
std::shared_ptr<ArrowType>* out) {
- switch (annotation.type()) {
- case LogicalAnnotation::Type::DECIMAL:
- RETURN_NOT_OK(MakeArrowDecimal(annotation, out));
+ switch (logical_type.type()) {
+ case LogicalType::Type::DECIMAL:
+ RETURN_NOT_OK(MakeArrowDecimal(logical_type, out));
break;
- case LogicalAnnotation::Type::NONE:
- case LogicalAnnotation::Type::INTERVAL:
- case LogicalAnnotation::Type::UUID:
+ case LogicalType::Type::NONE:
+ case LogicalType::Type::INTERVAL:
+ case LogicalType::Type::UUID:
*out = ::arrow::fixed_size_binary(physical_length);
break;
default:
- return Status::NotImplemented("Unhandled logical annotation ",
- annotation.ToString(),
+ return Status::NotImplemented("Unhandled logical logical_type ",
+ logical_type.ToString(),
" for fixed-length binary array");
}
return Status::OK();
}
-static Status FromInt32(const LogicalAnnotation& annotation,
+static Status FromInt32(const LogicalType& logical_type,
std::shared_ptr<ArrowType>* out) {
- switch (annotation.type()) {
- case LogicalAnnotation::Type::INT:
- RETURN_NOT_OK(MakeArrowInt(annotation, out));
+ switch (logical_type.type()) {
+ case LogicalType::Type::INT:
+ RETURN_NOT_OK(MakeArrowInt(logical_type, out));
break;
- case LogicalAnnotation::Type::DATE:
+ case LogicalType::Type::DATE:
*out = ::arrow::date32();
break;
- case LogicalAnnotation::Type::TIME:
- RETURN_NOT_OK(MakeArrowTime32(annotation, out));
+ case LogicalType::Type::TIME:
+ RETURN_NOT_OK(MakeArrowTime32(logical_type, out));
break;
- case LogicalAnnotation::Type::DECIMAL:
- RETURN_NOT_OK(MakeArrowDecimal(annotation, out));
+ case LogicalType::Type::DECIMAL:
+ RETURN_NOT_OK(MakeArrowDecimal(logical_type, out));
break;
- case LogicalAnnotation::Type::NONE:
+ case LogicalType::Type::NONE:
*out = ::arrow::int32();
break;
default:
- return Status::NotImplemented("Unhandled logical type ", annotation.ToString(),
+ return Status::NotImplemented("Unhandled logical type ", logical_type.ToString(),
" for INT32");
}
return Status::OK();
}
-static Status FromInt64(const LogicalAnnotation& annotation,
+static Status FromInt64(const LogicalType& logical_type,
std::shared_ptr<ArrowType>* out) {
- switch (annotation.type()) {
- case LogicalAnnotation::Type::INT:
- RETURN_NOT_OK(MakeArrowInt64(annotation, out));
+ switch (logical_type.type()) {
+ case LogicalType::Type::INT:
+ RETURN_NOT_OK(MakeArrowInt64(logical_type, out));
break;
- case LogicalAnnotation::Type::DECIMAL:
- RETURN_NOT_OK(MakeArrowDecimal(annotation, out));
+ case LogicalType::Type::DECIMAL:
+ RETURN_NOT_OK(MakeArrowDecimal(logical_type, out));
break;
- case LogicalAnnotation::Type::TIMESTAMP:
- RETURN_NOT_OK(MakeArrowTimestamp(annotation, out));
+ case LogicalType::Type::TIMESTAMP:
+ RETURN_NOT_OK(MakeArrowTimestamp(logical_type, out));
break;
- case LogicalAnnotation::Type::TIME:
- RETURN_NOT_OK(MakeArrowTime64(annotation, out));
+ case LogicalType::Type::TIME:
+ RETURN_NOT_OK(MakeArrowTime64(logical_type, out));
break;
- case LogicalAnnotation::Type::NONE:
+ case LogicalType::Type::NONE:
*out = ::arrow::int64();
break;
default:
- return Status::NotImplemented("Unhandled logical type ", annotation.ToString(),
+ return Status::NotImplemented("Unhandled logical type ", logical_type.ToString(),
" for INT64");
}
return Status::OK();
}
Status FromPrimitive(const PrimitiveNode& primitive, std::shared_ptr<ArrowType>* out) {
- const std::shared_ptr<const LogicalAnnotation>& annotation =
- primitive.logical_annotation();
- if (annotation->is_invalid() || annotation->is_null()) {
+ const std::shared_ptr<const LogicalType>& logical_type = primitive.logical_type();
+ if (logical_type->is_invalid() || logical_type->is_null()) {
*out = ::arrow::null();
return Status::OK();
}
@@ -264,10 +263,10 @@ Status FromPrimitive(const PrimitiveNode& primitive, std::shared_ptr<ArrowType>*
*out = ::arrow::boolean();
break;
case ParquetType::INT32:
- RETURN_NOT_OK(FromInt32(*annotation, out));
+ RETURN_NOT_OK(FromInt32(*logical_type, out));
break;
case ParquetType::INT64:
- RETURN_NOT_OK(FromInt64(*annotation, out));
+ RETURN_NOT_OK(FromInt64(*logical_type, out));
break;
case ParquetType::INT96:
*out = TIMESTAMP_NS;
@@ -279,10 +278,10 @@ Status FromPrimitive(const PrimitiveNode& primitive, std::shared_ptr<ArrowType>*
*out = ::arrow::float64();
break;
case ParquetType::BYTE_ARRAY:
- RETURN_NOT_OK(FromByteArray(*annotation, out));
+ RETURN_NOT_OK(FromByteArray(*logical_type, out));
break;
case ParquetType::FIXED_LEN_BYTE_ARRAY:
- RETURN_NOT_OK(FromFLBA(*annotation, primitive.type_length(), out));
+ RETURN_NOT_OK(FromFLBA(*logical_type, primitive.type_length(), out));
break;
default: {
// PARQUET-1565: This can occur if the file is corrupt
@@ -409,7 +408,7 @@ Status NodeToFieldInternal(const Node& node,
}
} else if (node.is_group()) {
const auto& group = static_cast<const GroupNode&>(node);
- if (node.logical_annotation()->is_list()) {
+ if (node.logical_type()->is_list()) {
RETURN_NOT_OK(NodeToList(group, included_leaf_nodes, &type));
} else {
RETURN_NOT_OK(StructFromGroup(group, included_leaf_nodes, &type));
@@ -499,7 +498,7 @@ Status ListToNode(const std::shared_ptr<::arrow::ListType>& type, const std::str
RETURN_NOT_OK(FieldToNode(type->value_field(), properties, arrow_properties, &element));
NodePtr list = GroupNode::Make("list", Repetition::REPEATED, {element});
- *out = GroupNode::Make(name, repetition, {list}, LogicalAnnotation::List());
+ *out = GroupNode::Make(name, repetition, {list}, LogicalType::List());
return Status::OK();
}
@@ -519,28 +518,28 @@ Status StructToNode(const std::shared_ptr<::arrow::StructType>& type,
return Status::OK();
}
-static std::shared_ptr<const LogicalAnnotation> TimestampAnnotationFromArrowTimestamp(
+static std::shared_ptr<const LogicalType> TimestampLogicalTypeFromArrowTimestamp(
const ::arrow::TimestampType& timestamp_type, ::arrow::TimeUnit::type time_unit) {
const bool utc = !(timestamp_type.timezone().empty());
switch (time_unit) {
case ::arrow::TimeUnit::MILLI:
- return LogicalAnnotation::Timestamp(utc, LogicalAnnotation::TimeUnit::MILLIS);
+ return LogicalType::Timestamp(utc, LogicalType::TimeUnit::MILLIS);
case ::arrow::TimeUnit::MICRO:
- return LogicalAnnotation::Timestamp(utc, LogicalAnnotation::TimeUnit::MICROS);
+ return LogicalType::Timestamp(utc, LogicalType::TimeUnit::MICROS);
case ::arrow::TimeUnit::NANO:
- return LogicalAnnotation::Timestamp(utc, LogicalAnnotation::TimeUnit::NANOS);
+ return LogicalType::Timestamp(utc, LogicalType::TimeUnit::NANOS);
case ::arrow::TimeUnit::SECOND:
// No equivalent parquet logical type.
break;
}
- return LogicalAnnotation::None();
+ return LogicalType::None();
}
static Status GetTimestampMetadata(const ::arrow::TimestampType& type,
const WriterProperties& properties,
const ArrowWriterProperties& arrow_properties,
ParquetType::type* physical_type,
- std::shared_ptr<const LogicalAnnotation>* annotation) {
+ std::shared_ptr<const LogicalType>* logical_type) {
const bool coerce = arrow_properties.coerce_timestamps_enabled();
const auto target_unit =
coerce ? arrow_properties.coerce_timestamps_unit() : type.unit();
@@ -553,7 +552,7 @@ static Status GetTimestampMetadata(const ::arrow::TimestampType& type,
}
*physical_type = ParquetType::INT64;
- *annotation = TimestampAnnotationFromArrowTimestamp(type, target_unit);
+ *logical_type = TimestampLogicalTypeFromArrowTimestamp(type, target_unit);
// The user is explicitly asking for timestamp data to be converted to the
// specified units (target_unit).
@@ -590,7 +589,8 @@ static Status GetTimestampMetadata(const ::arrow::TimestampType& type,
// must be coerced to microseconds.
if (properties.version() == ::parquet::ParquetVersion::PARQUET_1_0 &&
type.unit() == ::arrow::TimeUnit::NANO) {
- *annotation = TimestampAnnotationFromArrowTimestamp(type, ::arrow::TimeUnit::MICRO);
+ *logical_type =
+ TimestampLogicalTypeFromArrowTimestamp(type, ::arrow::TimeUnit::MICRO);
return Status::OK();
}
@@ -598,7 +598,8 @@ static Status GetTimestampMetadata(const ::arrow::TimestampType& type,
// however the Arrow seconds time unit can not be represented (annotated) in
// any version of Parquet and so must be coerced to milliseconds.
if (type.unit() == ::arrow::TimeUnit::SECOND) {
- *annotation = TimestampAnnotationFromArrowTimestamp(type, ::arrow::TimeUnit::MILLI);
+ *logical_type =
+ TimestampLogicalTypeFromArrowTimestamp(type, ::arrow::TimeUnit::MILLI);
return Status::OK();
}
@@ -608,7 +609,7 @@ static Status GetTimestampMetadata(const ::arrow::TimestampType& type,
Status FieldToNode(const std::shared_ptr<Field>& field,
const WriterProperties& properties,
const ArrowWriterProperties& arrow_properties, NodePtr* out) {
- std::shared_ptr<const LogicalAnnotation> annotation = LogicalAnnotation::None();
+ std::shared_ptr<const LogicalType> logical_type = LogicalType::None();
ParquetType::type type;
Repetition::type repetition =
field->nullable() ? Repetition::OPTIONAL : Repetition::REQUIRED;
@@ -620,33 +621,33 @@ Status FieldToNode(const std::shared_ptr<Field>& field,
switch (field->type()->id()) {
case ArrowTypeId::NA:
type = ParquetType::INT32;
- annotation = LogicalAnnotation::Null();
+ logical_type = LogicalType::Null();
break;
case ArrowTypeId::BOOL:
type = ParquetType::BOOLEAN;
break;
case ArrowTypeId::UINT8:
type = ParquetType::INT32;
- annotation = LogicalAnnotation::Int(8, false);
+ logical_type = LogicalType::Int(8, false);
break;
case ArrowTypeId::INT8:
type = ParquetType::INT32;
- annotation = LogicalAnnotation::Int(8, true);
+ logical_type = LogicalType::Int(8, true);
break;
case ArrowTypeId::UINT16:
type = ParquetType::INT32;
- annotation = LogicalAnnotation::Int(16, false);
+ logical_type = LogicalType::Int(16, false);
break;
case ArrowTypeId::INT16:
type = ParquetType::INT32;
- annotation = LogicalAnnotation::Int(16, true);
+ logical_type = LogicalType::Int(16, true);
break;
case ArrowTypeId::UINT32:
if (properties.version() == ::parquet::ParquetVersion::PARQUET_1_0) {
type = ParquetType::INT64;
} else {
type = ParquetType::INT32;
- annotation = LogicalAnnotation::Int(32, false);
+ logical_type = LogicalType::Int(32, false);
}
break;
case ArrowTypeId::INT32:
@@ -654,7 +655,7 @@ Status FieldToNode(const std::shared_ptr<Field>& field,
break;
case ArrowTypeId::UINT64:
type = ParquetType::INT64;
- annotation = LogicalAnnotation::Int(64, false);
+ logical_type = LogicalType::Int(64, false);
break;
case ArrowTypeId::INT64:
type = ParquetType::INT64;
@@ -667,7 +668,7 @@ Status FieldToNode(const std::shared_ptr<Field>& field,
break;
case ArrowTypeId::STRING:
type = ParquetType::BYTE_ARRAY;
- annotation = LogicalAnnotation::String();
+ logical_type = LogicalType::String();
break;
case ArrowTypeId::BINARY:
type = ParquetType::BYTE_ARRAY;
@@ -685,32 +686,32 @@ Status FieldToNode(const std::shared_ptr<Field>& field,
precision = decimal_type.precision();
scale = decimal_type.scale();
length = DecimalSize(precision);
- PARQUET_CATCH_NOT_OK(annotation = LogicalAnnotation::Decimal(precision, scale));
+ PARQUET_CATCH_NOT_OK(logical_type = LogicalType::Decimal(precision, scale));
} break;
case ArrowTypeId::DATE32:
type = ParquetType::INT32;
- annotation = LogicalAnnotation::Date();
+ logical_type = LogicalType::Date();
break;
case ArrowTypeId::DATE64:
type = ParquetType::INT32;
- annotation = LogicalAnnotation::Date();
+ logical_type = LogicalType::Date();
break;
case ArrowTypeId::TIMESTAMP:
RETURN_NOT_OK(
GetTimestampMetadata(static_cast<::arrow::TimestampType&>(*field->type()),
- properties, arrow_properties, &type, &annotation));
+ properties, arrow_properties, &type, &logical_type));
break;
case ArrowTypeId::TIME32:
type = ParquetType::INT32;
- annotation = LogicalAnnotation::Time(false, LogicalAnnotation::TimeUnit::MILLIS);
+ logical_type = LogicalType::Time(false, LogicalType::TimeUnit::MILLIS);
break;
case ArrowTypeId::TIME64: {
type = ParquetType::INT64;
auto time_type = static_cast<::arrow::Time64Type*>(field->type().get());
if (time_type->unit() == ::arrow::TimeUnit::NANO) {
- annotation = LogicalAnnotation::Time(false, LogicalAnnotation::TimeUnit::NANOS);
+ logical_type = LogicalType::Time(false, LogicalType::TimeUnit::NANOS);
} else {
- annotation = LogicalAnnotation::Time(false, LogicalAnnotation::TimeUnit::MICROS);
+ logical_type = LogicalType::Time(false, LogicalType::TimeUnit::MICROS);
}
} break;
case ArrowTypeId::STRUCT: {
@@ -740,7 +741,7 @@ Status FieldToNode(const std::shared_ptr<Field>& field,
}
}
- PARQUET_CATCH_NOT_OK(*out = PrimitiveNode::Make(field->name(), repetition, annotation,
+ PARQUET_CATCH_NOT_OK(*out = PrimitiveNode::Make(field->name(), repetition, logical_type,
type, length));
return Status::OK();
diff --git a/cpp/src/parquet/column_scanner-test.cc b/cpp/src/parquet/column_scanner-test.cc
index 9534fe6..bbde0a5 100644
--- a/cpp/src/parquet/column_scanner-test.cc
+++ b/cpp/src/parquet/column_scanner-test.cc
@@ -99,13 +99,13 @@ class TestFlatScanner : public ::testing::Test {
std::shared_ptr<ColumnDescriptor>& d3, int length) {
NodePtr type;
type = schema::PrimitiveNode::Make("c1", Repetition::REQUIRED, Type::type_num,
- LogicalType::NONE, length);
+ ConvertedType::NONE, length);
d1.reset(new ColumnDescriptor(type, 0, 0));
type = schema::PrimitiveNode::Make("c2", Repetition::OPTIONAL, Type::type_num,
- LogicalType::NONE, length);
+ ConvertedType::NONE, length);
d2.reset(new ColumnDescriptor(type, 4, 0));
type = schema::PrimitiveNode::Make("c3", Repetition::REPEATED, Type::type_num,
- LogicalType::NONE, length);
+ ConvertedType::NONE, length);
d3.reset(new ColumnDescriptor(type, 4, 2));
}
@@ -181,7 +181,7 @@ TEST_F(TestFLBAFlatScanner, TestPlainDictScanner) {
TEST_F(TestFLBAFlatScanner, TestSmallBatch) {
NodePtr type =
schema::PrimitiveNode::Make("c1", Repetition::REQUIRED, Type::FIXED_LEN_BYTE_ARRAY,
- LogicalType::DECIMAL, FLBA_LENGTH, 10, 2);
+ ConvertedType::DECIMAL, FLBA_LENGTH, 10, 2);
const ColumnDescriptor d(type, 0, 0);
num_values_ = MakePages<FLBAType>(&d, 1, 100, def_levels_, rep_levels_, values_,
data_buffer_, pages_);
@@ -193,7 +193,7 @@ TEST_F(TestFLBAFlatScanner, TestSmallBatch) {
TEST_F(TestFLBAFlatScanner, TestDescriptorAPI) {
NodePtr type =
schema::PrimitiveNode::Make("c1", Repetition::OPTIONAL, Type::FIXED_LEN_BYTE_ARRAY,
- LogicalType::DECIMAL, FLBA_LENGTH, 10, 2);
+ ConvertedType::DECIMAL, FLBA_LENGTH, 10, 2);
const ColumnDescriptor d(type, 4, 0);
num_values_ = MakePages<FLBAType>(&d, 1, 100, def_levels_, rep_levels_, values_,
data_buffer_, pages_);
@@ -209,7 +209,7 @@ TEST_F(TestFLBAFlatScanner, TestDescriptorAPI) {
TEST_F(TestFLBAFlatScanner, TestFLBAPrinterNext) {
NodePtr type =
schema::PrimitiveNode::Make("c1", Repetition::OPTIONAL, Type::FIXED_LEN_BYTE_ARRAY,
- LogicalType::DECIMAL, FLBA_LENGTH, 10, 2);
+ ConvertedType::DECIMAL, FLBA_LENGTH, 10, 2);
const ColumnDescriptor d(type, 4, 0);
num_values_ = MakePages<FLBAType>(&d, 1, 100, def_levels_, rep_levels_, values_,
data_buffer_, pages_);
diff --git a/cpp/src/parquet/column_writer-test.cc b/cpp/src/parquet/column_writer-test.cc
index b5e2622..dd0d65a 100644
--- a/cpp/src/parquet/column_writer-test.cc
+++ b/cpp/src/parquet/column_writer-test.cc
@@ -663,7 +663,7 @@ TEST(TestColumnWriter, RepeatedListsUpdateSpacedBug) {
// Create schema
NodePtr item = schema::Int32("item"); // optional item
- NodePtr list(GroupNode::Make("b", Repetition::REPEATED, {item}, LogicalType::LIST));
+ NodePtr list(GroupNode::Make("b", Repetition::REPEATED, {item}, ConvertedType::LIST));
NodePtr bag(GroupNode::Make("bag", Repetition::OPTIONAL, {list})); // optional list
std::vector<NodePtr> fields = {bag};
NodePtr root = GroupNode::Make("schema", Repetition::REPEATED, fields);
diff --git a/cpp/src/parquet/encoding-test.cc b/cpp/src/parquet/encoding-test.cc
index aafc5df..cca9edd 100644
--- a/cpp/src/parquet/encoding-test.cc
+++ b/cpp/src/parquet/encoding-test.cc
@@ -146,7 +146,7 @@ template <>
std::shared_ptr<ColumnDescriptor> ExampleDescr<FLBAType>() {
auto node = schema::PrimitiveNode::Make("name", Repetition::OPTIONAL,
Type::FIXED_LEN_BYTE_ARRAY,
- LogicalType::DECIMAL, flba_length, 10, 2);
+ ConvertedType::DECIMAL, flba_length, 10, 2);
return std::make_shared<ColumnDescriptor>(node, 0, 0);
}
diff --git a/cpp/src/parquet/metadata.cc b/cpp/src/parquet/metadata.cc
index 4651105..e676486 100644
--- a/cpp/src/parquet/metadata.cc
+++ b/cpp/src/parquet/metadata.cc
@@ -918,7 +918,7 @@ class FileMetaDataBuilder::FileMetaDataBuilderImpl {
// in the spec yet.
// We always default to `TYPE_DEFINED_ORDER`. We can expose it in
// the API once we have user defined sort orders in the Parquet format.
- // TypeDefinedOrder implies choose SortOrder based on LogicalType/PhysicalType
+ // TypeDefinedOrder implies choose SortOrder based on ConvertedType/PhysicalType
format::TypeDefinedOrder type_defined_order;
format::ColumnOrder column_order;
column_order.__set_TYPE_ORDER(type_defined_order);
diff --git a/cpp/src/parquet/printer.cc b/cpp/src/parquet/printer.cc
index 6e49753..367c0e3 100644
--- a/cpp/src/parquet/printer.cc
+++ b/cpp/src/parquet/printer.cc
@@ -86,10 +86,10 @@ void ParquetFilePrinter::DebugPrint(std::ostream& stream, std::list<int> selecte
const ColumnDescriptor* descr = file_metadata->schema()->Column(i);
stream << "Column " << i << ": " << descr->path()->ToDotString() << " ("
<< TypeToString(descr->physical_type());
- if (descr->logical_type() != LogicalType::NONE) {
- stream << "/" << LogicalTypeToString(descr->logical_type());
+ if (descr->converted_type() != ConvertedType::NONE) {
+ stream << "/" << ConvertedTypeToString(descr->converted_type());
}
- if (descr->logical_type() == LogicalType::DECIMAL) {
+ if (descr->converted_type() == ConvertedType::DECIMAL) {
stream << "(" << descr->type_precision() << "," << descr->type_scale() << ")";
}
stream << ")" << std::endl;
@@ -213,10 +213,9 @@ void ParquetFilePrinter::JSONPrint(std::ostream& stream, std::list<int> selected
const ColumnDescriptor* descr = file_metadata->schema()->Column(i);
stream << " { \"Id\": \"" << i << "\", \"Name\": \"" << descr->name() << "\","
<< " \"PhysicalType\": \"" << TypeToString(descr->physical_type()) << "\","
- << " \"LogicalType\": \"" << LogicalTypeToString(descr->logical_type())
+ << " \"ConvertedType\": \"" << ConvertedTypeToString(descr->converted_type())
<< "\","
- << " \"LogicalAnnotation\": " << (descr->logical_annotation())->ToJSON()
- << " }";
+ << " \"LogicalType\": " << (descr->logical_type())->ToJSON() << " }";
c++;
if (c != static_cast<int>(selected_columns.size())) {
stream << ",\n";
diff --git a/cpp/src/parquet/reader-test.cc b/cpp/src/parquet/reader-test.cc
index e68052e..ce4197f 100644
--- a/cpp/src/parquet/reader-test.cc
+++ b/cpp/src/parquet/reader-test.cc
@@ -327,17 +327,17 @@ TEST(TestJSONWithLocalFile, JSONOutput) {
"NumberOfRealColumns": "11",
"NumberOfColumns": "11",
"Columns": [
- { "Id": "0", "Name": "id", "PhysicalType": "INT32", "LogicalType": "NONE", "LogicalAnnotation": {"Type": "None"} },
- { "Id": "1", "Name": "bool_col", "PhysicalType": "BOOLEAN", "LogicalType": "NONE", "LogicalAnnotation": {"Type": "None"} },
- { "Id": "2", "Name": "tinyint_col", "PhysicalType": "INT32", "LogicalType": "NONE", "LogicalAnnotation": {"Type": "None"} },
- { "Id": "3", "Name": "smallint_col", "PhysicalType": "INT32", "LogicalType": "NONE", "LogicalAnnotation": {"Type": "None"} },
- { "Id": "4", "Name": "int_col", "PhysicalType": "INT32", "LogicalType": "NONE", "LogicalAnnotation": {"Type": "None"} },
- { "Id": "5", "Name": "bigint_col", "PhysicalType": "INT64", "LogicalType": "NONE", "LogicalAnnotation": {"Type": "None"} },
- { "Id": "6", "Name": "float_col", "PhysicalType": "FLOAT", "LogicalType": "NONE", "LogicalAnnotation": {"Type": "None"} },
- { "Id": "7", "Name": "double_col", "PhysicalType": "DOUBLE", "LogicalType": "NONE", "LogicalAnnotation": {"Type": "None"} },
- { "Id": "8", "Name": "date_string_col", "PhysicalType": "BYTE_ARRAY", "LogicalType": "NONE", "LogicalAnnotation": {"Type": "None"} },
- { "Id": "9", "Name": "string_col", "PhysicalType": "BYTE_ARRAY", "LogicalType": "NONE", "LogicalAnnotation": {"Type": "None"} },
- { "Id": "10", "Name": "timestamp_col", "PhysicalType": "INT96", "LogicalType": "NONE", "LogicalAnnotation": {"Type": "None"} }
+ { "Id": "0", "Name": "id", "PhysicalType": "INT32", "ConvertedType": "NONE", "LogicalType": {"Type": "None"} },
+ { "Id": "1", "Name": "bool_col", "PhysicalType": "BOOLEAN", "ConvertedType": "NONE", "LogicalType": {"Type": "None"} },
+ { "Id": "2", "Name": "tinyint_col", "PhysicalType": "INT32", "ConvertedType": "NONE", "LogicalType": {"Type": "None"} },
+ { "Id": "3", "Name": "smallint_col", "PhysicalType": "INT32", "ConvertedType": "NONE", "LogicalType": {"Type": "None"} },
+ { "Id": "4", "Name": "int_col", "PhysicalType": "INT32", "ConvertedType": "NONE", "LogicalType": {"Type": "None"} },
+ { "Id": "5", "Name": "bigint_col", "PhysicalType": "INT64", "ConvertedType": "NONE", "LogicalType": {"Type": "None"} },
+ { "Id": "6", "Name": "float_col", "PhysicalType": "FLOAT", "ConvertedType": "NONE", "LogicalType": {"Type": "None"} },
+ { "Id": "7", "Name": "double_col", "PhysicalType": "DOUBLE", "ConvertedType": "NONE", "LogicalType": {"Type": "None"} },
+ { "Id": "8", "Name": "date_string_col", "PhysicalType": "BYTE_ARRAY", "ConvertedType": "NONE", "LogicalType": {"Type": "None"} },
+ { "Id": "9", "Name": "string_col", "PhysicalType": "BYTE_ARRAY", "ConvertedType": "NONE", "LogicalType": {"Type": "None"} },
+ { "Id": "10", "Name": "timestamp_col", "PhysicalType": "INT96", "ConvertedType": "NONE", "LogicalType": {"Type": "None"} }
],
"RowGroups": [
{
diff --git a/cpp/src/parquet/schema-internal.h b/cpp/src/parquet/schema-internal.h
index 42eac09..ad5ae44 100644
--- a/cpp/src/parquet/schema-internal.h
+++ b/cpp/src/parquet/schema-internal.h
@@ -57,7 +57,7 @@ inline bool HasStructListName(const GroupNode& node) {
inline bool IsSimpleStruct(const Node* node) {
if (!node->is_group()) return false;
if (node->is_repeated()) return false;
- if (node->logical_type() == LogicalType::LIST) return false;
+ if (node->converted_type() == ConvertedType::LIST) return false;
// Special case mentioned in the format spec:
// If the name is array or ends in _tuple, this should be a list of struct
// even for single child elements.
diff --git a/cpp/src/parquet/schema-test.cc b/cpp/src/parquet/schema-test.cc
index 6a580d7..4e11d1f 100644
--- a/cpp/src/parquet/schema-test.cc
+++ b/cpp/src/parquet/schema-test.cc
@@ -36,7 +36,6 @@ using ::arrow::internal::checked_cast;
namespace parquet {
-using format::ConvertedType;
using format::FieldRepetitionType;
using format::SchemaElement;
@@ -106,7 +105,7 @@ class TestPrimitiveNode : public ::testing::Test {
TEST_F(TestPrimitiveNode, Attrs) {
PrimitiveNode node1("foo", Repetition::REPEATED, Type::INT32);
- PrimitiveNode node2("bar", Repetition::OPTIONAL, Type::BYTE_ARRAY, LogicalType::UTF8);
+ PrimitiveNode node2("bar", Repetition::OPTIONAL, Type::BYTE_ARRAY, ConvertedType::UTF8);
ASSERT_EQ("foo", node1.name());
@@ -122,8 +121,8 @@ TEST_F(TestPrimitiveNode, Attrs) {
ASSERT_EQ(Type::BYTE_ARRAY, node2.physical_type());
// logical types
- ASSERT_EQ(LogicalType::NONE, node1.logical_type());
- ASSERT_EQ(LogicalType::UTF8, node2.logical_type());
+ ASSERT_EQ(ConvertedType::NONE, node1.converted_type());
+ ASSERT_EQ(ConvertedType::UTF8, node2.converted_type());
// repetition
PrimitiveNode node3("foo", Repetition::REPEATED, Type::INT32);
@@ -146,16 +145,16 @@ TEST_F(TestPrimitiveNode, FromParquet) {
ASSERT_EQ(id_, prim_node_->id());
ASSERT_EQ(Repetition::OPTIONAL, prim_node_->repetition());
ASSERT_EQ(Type::INT32, prim_node_->physical_type());
- ASSERT_EQ(LogicalType::NONE, prim_node_->logical_type());
+ ASSERT_EQ(ConvertedType::NONE, prim_node_->converted_type());
// Test a logical type
elt = NewPrimitive(name_, FieldRepetitionType::REQUIRED, Type::BYTE_ARRAY, 0);
- elt.__set_converted_type(ConvertedType::UTF8);
+ elt.__set_converted_type(format::ConvertedType::UTF8);
ASSERT_NO_FATAL_FAILURE(Convert(&elt));
ASSERT_EQ(Repetition::REQUIRED, prim_node_->repetition());
ASSERT_EQ(Type::BYTE_ARRAY, prim_node_->physical_type());
- ASSERT_EQ(LogicalType::UTF8, prim_node_->logical_type());
+ ASSERT_EQ(ConvertedType::UTF8, prim_node_->converted_type());
// FIXED_LEN_BYTE_ARRAY
elt = NewPrimitive(name_, FieldRepetitionType::OPTIONAL, Type::FIXED_LEN_BYTE_ARRAY, 0);
@@ -168,16 +167,16 @@ TEST_F(TestPrimitiveNode, FromParquet) {
ASSERT_EQ(Type::FIXED_LEN_BYTE_ARRAY, prim_node_->physical_type());
ASSERT_EQ(16, prim_node_->type_length());
- // ConvertedType::Decimal
+ // format::ConvertedType::Decimal
elt = NewPrimitive(name_, FieldRepetitionType::OPTIONAL, Type::FIXED_LEN_BYTE_ARRAY, 0);
- elt.__set_converted_type(ConvertedType::DECIMAL);
+ elt.__set_converted_type(format::ConvertedType::DECIMAL);
elt.__set_type_length(6);
elt.__set_scale(2);
elt.__set_precision(12);
ASSERT_NO_FATAL_FAILURE(Convert(&elt));
ASSERT_EQ(Type::FIXED_LEN_BYTE_ARRAY, prim_node_->physical_type());
- ASSERT_EQ(LogicalType::DECIMAL, prim_node_->logical_type());
+ ASSERT_EQ(ConvertedType::DECIMAL, prim_node_->converted_type());
ASSERT_EQ(6, prim_node_->type_length());
ASSERT_EQ(2, prim_node_->decimal_metadata().scale);
ASSERT_EQ(12, prim_node_->decimal_metadata().precision);
@@ -197,21 +196,21 @@ TEST_F(TestPrimitiveNode, Equals) {
ASSERT_TRUE(node1.Equals(&node5));
PrimitiveNode flba1("foo", Repetition::REQUIRED, Type::FIXED_LEN_BYTE_ARRAY,
- LogicalType::DECIMAL, 12, 4, 2);
+ ConvertedType::DECIMAL, 12, 4, 2);
PrimitiveNode flba2("foo", Repetition::REQUIRED, Type::FIXED_LEN_BYTE_ARRAY,
- LogicalType::DECIMAL, 1, 4, 2);
+ ConvertedType::DECIMAL, 1, 4, 2);
flba2.SetTypeLength(12);
PrimitiveNode flba3("foo", Repetition::REQUIRED, Type::FIXED_LEN_BYTE_ARRAY,
- LogicalType::DECIMAL, 1, 4, 2);
+ ConvertedType::DECIMAL, 1, 4, 2);
flba3.SetTypeLength(16);
PrimitiveNode flba4("foo", Repetition::REQUIRED, Type::FIXED_LEN_BYTE_ARRAY,
- LogicalType::DECIMAL, 12, 4, 0);
+ ConvertedType::DECIMAL, 12, 4, 0);
PrimitiveNode flba5("foo", Repetition::REQUIRED, Type::FIXED_LEN_BYTE_ARRAY,
- LogicalType::NONE, 12, 4, 0);
+ ConvertedType::NONE, 12, 4, 0);
ASSERT_TRUE(flba1.Equals(&flba2));
ASSERT_FALSE(flba1.Equals(&flba3));
@@ -220,61 +219,62 @@ TEST_F(TestPrimitiveNode, Equals) {
}
TEST_F(TestPrimitiveNode, PhysicalLogicalMapping) {
- ASSERT_NO_THROW(
- PrimitiveNode::Make("foo", Repetition::REQUIRED, Type::INT32, LogicalType::INT_32));
+ ASSERT_NO_THROW(PrimitiveNode::Make("foo", Repetition::REQUIRED, Type::INT32,
+ ConvertedType::INT_32));
ASSERT_NO_THROW(PrimitiveNode::Make("foo", Repetition::REQUIRED, Type::BYTE_ARRAY,
- LogicalType::JSON));
+ ConvertedType::JSON));
ASSERT_THROW(
- PrimitiveNode::Make("foo", Repetition::REQUIRED, Type::INT32, LogicalType::JSON),
+ PrimitiveNode::Make("foo", Repetition::REQUIRED, Type::INT32, ConvertedType::JSON),
ParquetException);
ASSERT_NO_THROW(PrimitiveNode::Make("foo", Repetition::REQUIRED, Type::INT64,
- LogicalType::TIMESTAMP_MILLIS));
- ASSERT_THROW(
- PrimitiveNode::Make("foo", Repetition::REQUIRED, Type::INT32, LogicalType::INT_64),
- ParquetException);
+ ConvertedType::TIMESTAMP_MILLIS));
+ ASSERT_THROW(PrimitiveNode::Make("foo", Repetition::REQUIRED, Type::INT32,
+ ConvertedType::INT_64),
+ ParquetException);
ASSERT_THROW(PrimitiveNode::Make("foo", Repetition::REQUIRED, Type::BYTE_ARRAY,
- LogicalType::INT_8),
+ ConvertedType::INT_8),
ParquetException);
ASSERT_THROW(PrimitiveNode::Make("foo", Repetition::REQUIRED, Type::BYTE_ARRAY,
- LogicalType::INTERVAL),
+ ConvertedType::INTERVAL),
ParquetException);
ASSERT_THROW(PrimitiveNode::Make("foo", Repetition::REQUIRED,
- Type::FIXED_LEN_BYTE_ARRAY, LogicalType::ENUM),
+ Type::FIXED_LEN_BYTE_ARRAY, ConvertedType::ENUM),
ParquetException);
ASSERT_NO_THROW(PrimitiveNode::Make("foo", Repetition::REQUIRED, Type::BYTE_ARRAY,
- LogicalType::ENUM));
+ ConvertedType::ENUM));
ASSERT_THROW(
PrimitiveNode::Make("foo", Repetition::REQUIRED, Type::FIXED_LEN_BYTE_ARRAY,
- LogicalType::DECIMAL, 0, 2, 4),
+ ConvertedType::DECIMAL, 0, 2, 4),
ParquetException);
ASSERT_THROW(PrimitiveNode::Make("foo", Repetition::REQUIRED, Type::FLOAT,
- LogicalType::DECIMAL, 0, 2, 4),
+ ConvertedType::DECIMAL, 0, 2, 4),
ParquetException);
ASSERT_THROW(
PrimitiveNode::Make("foo", Repetition::REQUIRED, Type::FIXED_LEN_BYTE_ARRAY,
- LogicalType::DECIMAL, 0, 4, 0),
+ ConvertedType::DECIMAL, 0, 4, 0),
ParquetException);
ASSERT_THROW(
PrimitiveNode::Make("foo", Repetition::REQUIRED, Type::FIXED_LEN_BYTE_ARRAY,
- LogicalType::DECIMAL, 10, 0, 4),
+ ConvertedType::DECIMAL, 10, 0, 4),
ParquetException);
ASSERT_THROW(
PrimitiveNode::Make("foo", Repetition::REQUIRED, Type::FIXED_LEN_BYTE_ARRAY,
- LogicalType::DECIMAL, 10, 4, -1),
+ ConvertedType::DECIMAL, 10, 4, -1),
ParquetException);
ASSERT_THROW(
PrimitiveNode::Make("foo", Repetition::REQUIRED, Type::FIXED_LEN_BYTE_ARRAY,
- LogicalType::DECIMAL, 10, 2, 4),
+ ConvertedType::DECIMAL, 10, 2, 4),
ParquetException);
ASSERT_NO_THROW(PrimitiveNode::Make("foo", Repetition::REQUIRED,
- Type::FIXED_LEN_BYTE_ARRAY, LogicalType::DECIMAL,
+ Type::FIXED_LEN_BYTE_ARRAY, ConvertedType::DECIMAL,
10, 6, 4));
ASSERT_NO_THROW(PrimitiveNode::Make("foo", Repetition::REQUIRED,
- Type::FIXED_LEN_BYTE_ARRAY, LogicalType::INTERVAL,
+ Type::FIXED_LEN_BYTE_ARRAY, ConvertedType::INTERVAL,
12));
- ASSERT_THROW(PrimitiveNode::Make("foo", Repetition::REQUIRED,
- Type::FIXED_LEN_BYTE_ARRAY, LogicalType::INTERVAL, 10),
- ParquetException);
+ ASSERT_THROW(
+ PrimitiveNode::Make("foo", Repetition::REQUIRED, Type::FIXED_LEN_BYTE_ARRAY,
+ ConvertedType::INTERVAL, 10),
+ ParquetException);
}
// ----------------------------------------------------------------------
@@ -308,7 +308,7 @@ TEST_F(TestGroupNode, Attrs) {
NodeVector fields = Fields1();
GroupNode node1("foo", Repetition::REPEATED, fields);
- GroupNode node2("bar", Repetition::OPTIONAL, fields, LogicalType::LIST);
+ GroupNode node2("bar", Repetition::OPTIONAL, fields, ConvertedType::LIST);
ASSERT_EQ("foo", node1.name());
@@ -326,8 +326,8 @@ TEST_F(TestGroupNode, Attrs) {
ASSERT_EQ(Node::GROUP, node1.node_type());
// logical types
- ASSERT_EQ(LogicalType::NONE, node1.logical_type());
- ASSERT_EQ(LogicalType::LIST, node2.logical_type());
+ ASSERT_EQ(ConvertedType::NONE, node1.converted_type());
+ ASSERT_EQ(ConvertedType::LIST, node2.converted_type());
}
TEST_F(TestGroupNode, Equals) {
@@ -425,7 +425,7 @@ TEST_F(TestSchemaConverter, NestedExample) {
// 3-level list encoding, by hand
elt = NewGroup("b", FieldRepetitionType::REPEATED, 1, 3);
- elt.__set_converted_type(ConvertedType::LIST);
+ elt.__set_converted_type(format::ConvertedType::LIST);
elements.push_back(elt);
elements.push_back(NewPrimitive("item", FieldRepetitionType::OPTIONAL, Type::INT64, 4));
@@ -437,7 +437,7 @@ TEST_F(TestSchemaConverter, NestedExample) {
// 3-level list encoding
NodePtr item = Int64("item");
- NodePtr list(GroupNode::Make("b", Repetition::REPEATED, {item}, LogicalType::LIST));
+ NodePtr list(GroupNode::Make("b", Repetition::REPEATED, {item}, ConvertedType::LIST));
NodePtr bag(GroupNode::Make("bag", Repetition::OPTIONAL, {list}));
fields.push_back(bag);
@@ -505,19 +505,19 @@ class TestSchemaFlatten : public ::testing::Test {
TEST_F(TestSchemaFlatten, DecimalMetadata) {
// Checks that DecimalMetadata is only set for DecimalTypes
NodePtr node = PrimitiveNode::Make("decimal", Repetition::REQUIRED, Type::INT64,
- LogicalType::DECIMAL, -1, 8, 4);
+ ConvertedType::DECIMAL, -1, 8, 4);
NodePtr group =
- GroupNode::Make("group", Repetition::REPEATED, {node}, LogicalType::LIST);
+ GroupNode::Make("group", Repetition::REPEATED, {node}, ConvertedType::LIST);
Flatten(reinterpret_cast<GroupNode*>(group.get()));
ASSERT_EQ("decimal", elements_[1].name);
ASSERT_TRUE(elements_[1].__isset.precision);
ASSERT_TRUE(elements_[1].__isset.scale);
elements_.clear();
- // ... including those created with new logical annotations
+ // ... including those created with new logical types
node = PrimitiveNode::Make("decimal", Repetition::REQUIRED,
- DecimalAnnotation::Make(10, 5), Type::INT64, -1);
- group = GroupNode::Make("group", Repetition::REPEATED, {node}, ListAnnotation::Make());
+ DecimalLogicalType::Make(10, 5), Type::INT64, -1);
+ group = GroupNode::Make("group", Repetition::REPEATED, {node}, ListLogicalType::Make());
Flatten(reinterpret_cast<GroupNode*>(group.get()));
ASSERT_EQ("decimal", elements_[1].name);
ASSERT_TRUE(elements_[1].__isset.precision);
@@ -525,8 +525,8 @@ TEST_F(TestSchemaFlatten, DecimalMetadata) {
elements_.clear();
// Not for integers with no logical type
- group =
- GroupNode::Make("group", Repetition::REPEATED, {Int64("int64")}, LogicalType::LIST);
+ group = GroupNode::Make("group", Repetition::REPEATED, {Int64("int64")},
+ ConvertedType::LIST);
Flatten(reinterpret_cast<GroupNode*>(group.get()));
ASSERT_EQ("int64", elements_[1].name);
ASSERT_FALSE(elements_[0].__isset.precision);
@@ -546,7 +546,7 @@ TEST_F(TestSchemaFlatten, NestedExample) {
// 3-level list encoding, by hand
elt = NewGroup("b", FieldRepetitionType::REPEATED, 1, 3);
- elt.__set_converted_type(ConvertedType::LIST);
+ elt.__set_converted_type(format::ConvertedType::LIST);
format::ListType ls;
format::LogicalType lt;
lt.__set_LIST(ls);
@@ -560,7 +560,7 @@ TEST_F(TestSchemaFlatten, NestedExample) {
// 3-level list encoding
NodePtr item = Int64("item");
- NodePtr list(GroupNode::Make("b", Repetition::REPEATED, {item}, LogicalType::LIST));
+ NodePtr list(GroupNode::Make("b", Repetition::REPEATED, {item}, ConvertedType::LIST));
NodePtr bag(GroupNode::Make("bag", Repetition::OPTIONAL, {list}));
fields.push_back(bag);
@@ -575,7 +575,7 @@ TEST_F(TestSchemaFlatten, NestedExample) {
TEST(TestColumnDescriptor, TestAttrs) {
NodePtr node = PrimitiveNode::Make("name", Repetition::OPTIONAL, Type::BYTE_ARRAY,
- LogicalType::UTF8);
+ ConvertedType::UTF8);
ColumnDescriptor descr(node, 4, 1);
ASSERT_EQ("name", descr.name());
@@ -589,8 +589,8 @@ TEST(TestColumnDescriptor, TestAttrs) {
name: name,
path: ,
physical_type: BYTE_ARRAY,
- logical_type: UTF8,
- logical_annotation: String,
+ converted_type: UTF8,
+ logical_type: String,
max_definition_level: 4,
max_repetition_level: 1,
})";
@@ -598,7 +598,7 @@ TEST(TestColumnDescriptor, TestAttrs) {
// Test FIXED_LEN_BYTE_ARRAY
node = PrimitiveNode::Make("name", Repetition::OPTIONAL, Type::FIXED_LEN_BYTE_ARRAY,
- LogicalType::DECIMAL, 12, 10, 4);
+ ConvertedType::DECIMAL, 12, 10, 4);
descr = ColumnDescriptor(node, 4, 1);
ASSERT_EQ(Type::FIXED_LEN_BYTE_ARRAY, descr.physical_type());
@@ -608,8 +608,8 @@ TEST(TestColumnDescriptor, TestAttrs) {
name: name,
path: ,
physical_type: FIXED_LEN_BYTE_ARRAY,
- logical_type: DECIMAL,
- logical_annotation: Decimal(precision=10, scale=4),
+ converted_type: DECIMAL,
+ logical_type: Decimal(precision=10, scale=4),
max_definition_level: 4,
max_repetition_level: 1,
length: 12,
@@ -645,7 +645,7 @@ TEST_F(TestSchemaDescriptor, Equals) {
NodePtr item2 = Boolean("item2", Repetition::OPTIONAL);
NodePtr item3 = Int32("item3", Repetition::REPEATED);
NodePtr list(GroupNode::Make("records", Repetition::REPEATED, {item1, item2, item3},
- LogicalType::LIST));
+ ConvertedType::LIST));
NodePtr bag(GroupNode::Make("bag", Repetition::OPTIONAL, {list}));
NodePtr bag2(GroupNode::Make("bag", Repetition::REQUIRED, {list}));
@@ -697,7 +697,7 @@ TEST_F(TestSchemaDescriptor, BuildTree) {
NodePtr item2 = Boolean("item2", Repetition::OPTIONAL);
NodePtr item3 = Int32("item3", Repetition::REPEATED);
NodePtr list(GroupNode::Make("records", Repetition::REPEATED, {item1, item2, item3},
- LogicalType::LIST));
+ ConvertedType::LIST));
NodePtr bag(GroupNode::Make("bag", Repetition::OPTIONAL, {list}));
fields.push_back(bag);
@@ -773,15 +773,15 @@ TEST(TestSchemaPrinter, Examples) {
NodePtr item1 = Int64("item1");
NodePtr item2 = Boolean("item2", Repetition::REQUIRED);
NodePtr list(
- GroupNode::Make("b", Repetition::REPEATED, {item1, item2}, LogicalType::LIST));
+ GroupNode::Make("b", Repetition::REPEATED, {item1, item2}, ConvertedType::LIST));
NodePtr bag(GroupNode::Make("bag", Repetition::OPTIONAL, {list}));
fields.push_back(bag);
fields.push_back(PrimitiveNode::Make("c", Repetition::REQUIRED, Type::INT32,
- LogicalType::DECIMAL, -1, 3, 2));
+ ConvertedType::DECIMAL, -1, 3, 2));
fields.push_back(PrimitiveNode::Make("d", Repetition::REQUIRED,
- DecimalAnnotation::Make(10, 5), Type::INT64, -1));
+ DecimalLogicalType::Make(10, 5), Type::INT64, -1));
NodePtr schema = GroupNode::Make("schema", Repetition::REPEATED, fields);
@@ -802,323 +802,313 @@ TEST(TestSchemaPrinter, Examples) {
}
static void ConfirmFactoryEquivalence(
- LogicalType::type converted_type,
- const std::shared_ptr<const LogicalAnnotation>& from_make,
- std::function<bool(const std::shared_ptr<const LogicalAnnotation>&)> check_is_type) {
- std::shared_ptr<const LogicalAnnotation> from_converted_type =
- LogicalAnnotation::FromConvertedType(converted_type);
+ ConvertedType::type converted_type,
+ const std::shared_ptr<const LogicalType>& from_make,
+ std::function<bool(const std::shared_ptr<const LogicalType>&)> check_is_type) {
+ std::shared_ptr<const LogicalType> from_converted_type =
+ LogicalType::FromConvertedType(converted_type);
ASSERT_EQ(from_converted_type->type(), from_make->type())
- << from_make->ToString() << " annotations unexpectedly do not match on type";
+ << from_make->ToString() << " logical types unexpectedly do not match on type";
ASSERT_TRUE(from_converted_type->Equals(*from_make))
- << from_make->ToString() << " annotations unexpectedly not equivalent";
+ << from_make->ToString() << " logical types unexpectedly not equivalent";
ASSERT_TRUE(check_is_type(from_converted_type))
<< from_converted_type->ToString()
- << " annotation (from converted type) does not have expected type property";
+ << " logical type (from converted type) does not have expected type property";
ASSERT_TRUE(check_is_type(from_make))
<< from_make->ToString()
- << " annotation (from Make()) does not have expected type property";
+ << " logical type (from Make()) does not have expected type property";
return;
}
-TEST(TestLogicalAnnotationConstruction, FactoryEquivalence) {
- // For each legacy converted type, ensure that the equivalent annotation object
+TEST(TestLogicalTypeConstruction, FactoryEquivalence) {
+ // For each legacy converted type, ensure that the equivalent logical type object
// can be obtained from either the base class's FromConvertedType() factory method or
- // the annotation type class's Make() method (accessed via convenience methods on the
- // base class) and that these annotation objects are equivalent
+ // the logical type type class's Make() method (accessed via convenience methods on the
+ // base class) and that these logical type objects are equivalent
struct ConfirmFactoryEquivalenceArguments {
- LogicalType::type converted_type;
- std::shared_ptr<const LogicalAnnotation> annotation;
- std::function<bool(const std::shared_ptr<const LogicalAnnotation>&)> check_is_type;
+ ConvertedType::type converted_type;
+ std::shared_ptr<const LogicalType> logical_type;
+ std::function<bool(const std::shared_ptr<const LogicalType>&)> check_is_type;
};
- auto check_is_string = [](const std::shared_ptr<const LogicalAnnotation>& annotation) {
- return annotation->is_string();
+ auto check_is_string = [](const std::shared_ptr<const LogicalType>& logical_type) {
+ return logical_type->is_string();
+ };
+ auto check_is_map = [](const std::shared_ptr<const LogicalType>& logical_type) {
+ return logical_type->is_map();
+ };
+ auto check_is_list = [](const std::shared_ptr<const LogicalType>& logical_type) {
+ return logical_type->is_list();
};
- auto check_is_map = [](const std::shared_ptr<const LogicalAnnotation>& annotation) {
- return annotation->is_map();
+ auto check_is_enum = [](const std::shared_ptr<const LogicalType>& logical_type) {
+ return logical_type->is_enum();
};
- auto check_is_list = [](const std::shared_ptr<const LogicalAnnotation>& annotation) {
- return annotation->is_list();
+ auto check_is_date = [](const std::shared_ptr<const LogicalType>& logical_type) {
+ return logical_type->is_date();
};
- auto check_is_enum = [](const std::shared_ptr<const LogicalAnnotation>& annotation) {
- return annotation->is_enum();
+ auto check_is_time = [](const std::shared_ptr<const LogicalType>& logical_type) {
+ return logical_type->is_time();
};
- auto check_is_date = [](const std::shared_ptr<const LogicalAnnotation>& annotation) {
- return annotation->is_date();
+ auto check_is_timestamp = [](const std::shared_ptr<const LogicalType>& logical_type) {
+ return logical_type->is_timestamp();
};
- auto check_is_time = [](const std::shared_ptr<const LogicalAnnotation>& annotation) {
- return annotation->is_time();
+ auto check_is_int = [](const std::shared_ptr<const LogicalType>& logical_type) {
+ return logical_type->is_int();
};
- auto check_is_timestamp =
- [](const std::shared_ptr<const LogicalAnnotation>& annotation) {
- return annotation->is_timestamp();
- };
- auto check_is_int = [](const std::shared_ptr<const LogicalAnnotation>& annotation) {
- return annotation->is_int();
+ auto check_is_JSON = [](const std::shared_ptr<const LogicalType>& logical_type) {
+ return logical_type->is_JSON();
};
- auto check_is_JSON = [](const std::shared_ptr<const LogicalAnnotation>& annotation) {
- return annotation->is_JSON();
+ auto check_is_BSON = [](const std::shared_ptr<const LogicalType>& logical_type) {
+ return logical_type->is_BSON();
};
- auto check_is_BSON = [](const std::shared_ptr<const LogicalAnnotation>& annotation) {
- return annotation->is_BSON();
+ auto check_is_interval = [](const std::shared_ptr<const LogicalType>& logical_type) {
+ return logical_type->is_interval();
};
- auto check_is_interval =
- [](const std::shared_ptr<const LogicalAnnotation>& annotation) {
- return annotation->is_interval();
- };
- auto check_is_none = [](const std::shared_ptr<const LogicalAnnotation>& annotation) {
- return annotation->is_none();
+ auto check_is_none = [](const std::shared_ptr<const LogicalType>& logical_type) {
+ return logical_type->is_none();
};
std::vector<ConfirmFactoryEquivalenceArguments> cases = {
- {LogicalType::UTF8, LogicalAnnotation::String(), check_is_string},
- {LogicalType::MAP, LogicalAnnotation::Map(), check_is_map},
- {LogicalType::MAP_KEY_VALUE, LogicalAnnotation::Map(), check_is_map},
- {LogicalType::LIST, LogicalAnnotation::List(), check_is_list},
- {LogicalType::ENUM, LogicalAnnotation::Enum(), check_is_enum},
- {LogicalType::DATE, LogicalAnnotation::Date(), check_is_date},
- {LogicalType::TIME_MILLIS,
- LogicalAnnotation::Time(true, LogicalAnnotation::TimeUnit::MILLIS), check_is_time},
- {LogicalType::TIME_MICROS,
- LogicalAnnotation::Time(true, LogicalAnnotation::TimeUnit::MICROS), check_is_time},
- {LogicalType::TIMESTAMP_MILLIS,
- LogicalAnnotation::Timestamp(true, LogicalAnnotation::TimeUnit::MILLIS),
- check_is_timestamp},
- {LogicalType::TIMESTAMP_MICROS,
- LogicalAnnotation::Timestamp(true, LogicalAnnotation::TimeUnit::MICROS),
- check_is_timestamp},
- {LogicalType::UINT_8, LogicalAnnotation::Int(8, false), check_is_int},
- {LogicalType::UINT_16, LogicalAnnotation::Int(16, false), check_is_int},
- {LogicalType::UINT_32, LogicalAnnotation::Int(32, false), check_is_int},
- {LogicalType::UINT_64, LogicalAnnotation::Int(64, false), check_is_int},
- {LogicalType::INT_8, LogicalAnnotation::Int(8, true), check_is_int},
- {LogicalType::INT_16, LogicalAnnotation::Int(16, true), check_is_int},
- {LogicalType::INT_32, LogicalAnnotation::Int(32, true), check_is_int},
- {LogicalType::INT_64, LogicalAnnotation::Int(64, true), check_is_int},
- {LogicalType::JSON, LogicalAnnotation::JSON(), check_is_JSON},
- {LogicalType::BSON, LogicalAnnotation::BSON(), check_is_BSON},
- {LogicalType::INTERVAL, LogicalAnnotation::Interval(), check_is_interval},
- {LogicalType::NONE, LogicalAnnotation::None(), check_is_none}};
+ {ConvertedType::UTF8, LogicalType::String(), check_is_string},
+ {ConvertedType::MAP, LogicalType::Map(), check_is_map},
+ {ConvertedType::MAP_KEY_VALUE, LogicalType::Map(), check_is_map},
+ {ConvertedType::LIST, LogicalType::List(), check_is_list},
+ {ConvertedType::ENUM, LogicalType::Enum(), check_is_enum},
+ {ConvertedType::DATE, LogicalType::Date(), check_is_date},
+ {ConvertedType::TIME_MILLIS, LogicalType::Time(true, LogicalType::TimeUnit::MILLIS),
+ check_is_time},
+ {ConvertedType::TIME_MICROS, LogicalType::Time(true, LogicalType::TimeUnit::MICROS),
+ check_is_time},
+ {ConvertedType::TIMESTAMP_MILLIS,
+ LogicalType::Timestamp(true, LogicalType::TimeUnit::MILLIS), check_is_timestamp},
+ {ConvertedType::TIMESTAMP_MICROS,
+ LogicalType::Timestamp(true, LogicalType::TimeUnit::MICROS), check_is_timestamp},
+ {ConvertedType::UINT_8, LogicalType::Int(8, false), check_is_int},
+ {ConvertedType::UINT_16, LogicalType::Int(16, false), check_is_int},
+ {ConvertedType::UINT_32, LogicalType::Int(32, false), check_is_int},
+ {ConvertedType::UINT_64, LogicalType::Int(64, false), check_is_int},
+ {ConvertedType::INT_8, LogicalType::Int(8, true), check_is_int},
+ {ConvertedType::INT_16, LogicalType::Int(16, true), check_is_int},
+ {ConvertedType::INT_32, LogicalType::Int(32, true), check_is_int},
+ {ConvertedType::INT_64, LogicalType::Int(64, true), check_is_int},
+ {ConvertedType::JSON, LogicalType::JSON(), check_is_JSON},
+ {ConvertedType::BSON, LogicalType::BSON(), check_is_BSON},
+ {ConvertedType::INTERVAL, LogicalType::Interval(), check_is_interval},
+ {ConvertedType::NONE, LogicalType::None(), check_is_none}};
for (const ConfirmFactoryEquivalenceArguments& c : cases) {
- ConfirmFactoryEquivalence(c.converted_type, c.annotation, c.check_is_type);
+ ConfirmFactoryEquivalence(c.converted_type, c.logical_type, c.check_is_type);
}
- // LogicalType::DECIMAL, LogicalAnnotation::Decimal, is_decimal
+ // ConvertedType::DECIMAL, LogicalType::Decimal, is_decimal
schema::DecimalMetadata converted_decimal_metadata;
converted_decimal_metadata.isset = true;
converted_decimal_metadata.precision = 10;
converted_decimal_metadata.scale = 4;
- std::shared_ptr<const LogicalAnnotation> from_converted_type =
- LogicalAnnotation::FromConvertedType(LogicalType::DECIMAL,
- converted_decimal_metadata);
- std::shared_ptr<const LogicalAnnotation> from_make = LogicalAnnotation::Decimal(10, 4);
+ std::shared_ptr<const LogicalType> from_converted_type =
+ LogicalType::FromConvertedType(ConvertedType::DECIMAL, converted_decimal_metadata);
+ std::shared_ptr<const LogicalType> from_make = LogicalType::Decimal(10, 4);
ASSERT_EQ(from_converted_type->type(), from_make->type());
ASSERT_TRUE(from_converted_type->Equals(*from_make));
ASSERT_TRUE(from_converted_type->is_decimal());
ASSERT_TRUE(from_make->is_decimal());
- ASSERT_TRUE(LogicalAnnotation::Decimal(16)->Equals(*LogicalAnnotation::Decimal(16, 0)));
+ ASSERT_TRUE(LogicalType::Decimal(16)->Equals(*LogicalType::Decimal(16, 0)));
}
static void ConfirmConvertedTypeCompatibility(
- const std::shared_ptr<const LogicalAnnotation>& original,
- LogicalType::type expected_converted_type) {
+ const std::shared_ptr<const LogicalType>& original,
+ ConvertedType::type expected_converted_type) {
ASSERT_TRUE(original->is_valid())
- << original->ToString() << " annotation unexpectedly is not valid";
+ << original->ToString() << " logical type unexpectedly is not valid";
schema::DecimalMetadata converted_decimal_metadata;
- LogicalType::type converted_type =
+ ConvertedType::type converted_type =
original->ToConvertedType(&converted_decimal_metadata);
ASSERT_EQ(converted_type, expected_converted_type)
<< original->ToString()
- << " annotation unexpectedly returns incorrect converted type";
+ << " logical type unexpectedly returns incorrect converted type";
ASSERT_FALSE(converted_decimal_metadata.isset)
<< original->ToString()
- << " annotation unexpectedly returns converted decimal metatdata that is set";
+ << " logical type unexpectedly returns converted decimal metatdata that is set";
ASSERT_TRUE(original->is_compatible(converted_type, converted_decimal_metadata))
<< original->ToString()
- << " annotation unexpectedly is incompatible with converted type and decimal "
+ << " logical type unexpectedly is incompatible with converted type and decimal "
"metadata it returned";
ASSERT_FALSE(original->is_compatible(converted_type, {true, 1, 1}))
<< original->ToString()
- << " annotation unexpectedly is compatible with converted decimal metadata that is "
+ << " logical type unexpectedly is compatible with converted decimal metadata that "
+ "is "
"set";
ASSERT_TRUE(original->is_compatible(converted_type))
<< original->ToString()
- << " annotation unexpectedly is incompatible with converted type it returned";
- std::shared_ptr<const LogicalAnnotation> reconstructed =
- LogicalAnnotation::FromConvertedType(converted_type, converted_decimal_metadata);
+ << " logical type unexpectedly is incompatible with converted type it returned";
+ std::shared_ptr<const LogicalType> reconstructed =
+ LogicalType::FromConvertedType(converted_type, converted_decimal_metadata);
ASSERT_TRUE(reconstructed->is_valid()) << "Reconstructed " << reconstructed->ToString()
- << " annotation unexpectedly is not valid";
+ << " logical type unexpectedly is not valid";
ASSERT_TRUE(reconstructed->Equals(*original))
- << "Reconstructed annotation (" << reconstructed->ToString()
- << ") unexpectedly not equivalent to original annotation (" << original->ToString()
- << ")";
+ << "Reconstructed logical type (" << reconstructed->ToString()
+ << ") unexpectedly not equivalent to original logical type ("
+ << original->ToString() << ")";
return;
}
-TEST(TestLogicalAnnotationConstruction, ConvertedTypeCompatibility) {
- // For each legacy converted type, ensure that the equivalent logical annotation
+TEST(TestLogicalTypeConstruction, ConvertedTypeCompatibility) {
+ // For each legacy converted type, ensure that the equivalent logical type
// emits correct, compatible converted type information and that the emitted
- // information can be used to reconstruct another equivalent logical annotation.
+ // information can be used to reconstruct another equivalent logical type.
struct ExpectedConvertedType {
- std::shared_ptr<const LogicalAnnotation> annotation;
- LogicalType::type converted_type;
+ std::shared_ptr<const LogicalType> logical_type;
+ ConvertedType::type converted_type;
};
std::vector<ExpectedConvertedType> cases = {
- {LogicalAnnotation::String(), LogicalType::UTF8},
- {LogicalAnnotation::Map(), LogicalType::MAP},
- {LogicalAnnotation::List(), LogicalType::LIST},
- {LogicalAnnotation::Enum(), LogicalType::ENUM},
- {LogicalAnnotation::Date(), LogicalType::DATE},
- {LogicalAnnotation::Time(true, LogicalAnnotation::TimeUnit::MILLIS),
- LogicalType::TIME_MILLIS},
- {LogicalAnnotation::Time(true, LogicalAnnotation::TimeUnit::MICROS),
- LogicalType::TIME_MICROS},
- {LogicalAnnotation::Timestamp(true, LogicalAnnotation::TimeUnit::MILLIS),
- LogicalType::TIMESTAMP_MILLIS},
- {LogicalAnnotation::Timestamp(true, LogicalAnnotation::TimeUnit::MICROS),
- LogicalType::TIMESTAMP_MICROS},
- {LogicalAnnotation::Int(8, false), LogicalType::UINT_8},
- {LogicalAnnotation::Int(16, false), LogicalType::UINT_16},
- {LogicalAnnotation::Int(32, false), LogicalType::UINT_32},
- {LogicalAnnotation::Int(64, false), LogicalType::UINT_64},
- {LogicalAnnotation::Int(8, true), LogicalType::INT_8},
- {LogicalAnnotation::Int(16, true), LogicalType::INT_16},
- {LogicalAnnotation::Int(32, true), LogicalType::INT_32},
- {LogicalAnnotation::Int(64, true), LogicalType::INT_64},
- {LogicalAnnotation::JSON(), LogicalType::JSON},
- {LogicalAnnotation::BSON(), LogicalType::BSON},
- {LogicalAnnotation::Interval(), LogicalType::INTERVAL},
- {LogicalAnnotation::None(), LogicalType::NONE}};
+ {LogicalType::String(), ConvertedType::UTF8},
+ {LogicalType::Map(), ConvertedType::MAP},
+ {LogicalType::List(), ConvertedType::LIST},
+ {LogicalType::Enum(), ConvertedType::ENUM},
+ {LogicalType::Date(), ConvertedType::DATE},
+ {LogicalType::Time(true, LogicalType::TimeUnit::MILLIS),
+ ConvertedType::TIME_MILLIS},
+ {LogicalType::Time(true, LogicalType::TimeUnit::MICROS),
+ ConvertedType::TIME_MICROS},
+ {LogicalType::Timestamp(true, LogicalType::TimeUnit::MILLIS),
+ ConvertedType::TIMESTAMP_MILLIS},
+ {LogicalType::Timestamp(true, LogicalType::TimeUnit::MICROS),
+ ConvertedType::TIMESTAMP_MICROS},
+ {LogicalType::Int(8, false), ConvertedType::UINT_8},
+ {LogicalType::Int(16, false), ConvertedType::UINT_16},
+ {LogicalType::Int(32, false), ConvertedType::UINT_32},
+ {LogicalType::Int(64, false), ConvertedType::UINT_64},
+ {LogicalType::Int(8, true), ConvertedType::INT_8},
+ {LogicalType::Int(16, true), ConvertedType::INT_16},
+ {LogicalType::Int(32, true), ConvertedType::INT_32},
+ {LogicalType::Int(64, true), ConvertedType::INT_64},
+ {LogicalType::JSON(), ConvertedType::JSON},
+ {LogicalType::BSON(), ConvertedType::BSON},
+ {LogicalType::Interval(), ConvertedType::INTERVAL},
+ {LogicalType::None(), ConvertedType::NONE}};
for (const ExpectedConvertedType& c : cases) {
- ConfirmConvertedTypeCompatibility(c.annotation, c.converted_type);
+ ConfirmConvertedTypeCompatibility(c.logical_type, c.converted_type);
}
// Special cases ...
- std::shared_ptr<const LogicalAnnotation> original;
- LogicalType::type converted_type;
+ std::shared_ptr<const LogicalType> original;
+ ConvertedType::type converted_type;
schema::DecimalMetadata converted_decimal_metadata;
- std::shared_ptr<const LogicalAnnotation> reconstructed;
+ std::shared_ptr<const LogicalType> reconstructed;
// DECIMAL
std::memset(&converted_decimal_metadata, 0x00, sizeof(converted_decimal_metadata));
- original = LogicalAnnotation::Decimal(6, 2);
+ original = LogicalType::Decimal(6, 2);
ASSERT_TRUE(original->is_valid());
converted_type = original->ToConvertedType(&converted_decimal_metadata);
- ASSERT_EQ(converted_type, LogicalType::DECIMAL);
+ ASSERT_EQ(converted_type, ConvertedType::DECIMAL);
ASSERT_TRUE(converted_decimal_metadata.isset);
ASSERT_EQ(converted_decimal_metadata.precision, 6);
ASSERT_EQ(converted_decimal_metadata.scale, 2);
ASSERT_TRUE(original->is_compatible(converted_type, converted_decimal_metadata));
reconstructed =
- LogicalAnnotation::FromConvertedType(converted_type, converted_decimal_metadata);
+ LogicalType::FromConvertedType(converted_type, converted_decimal_metadata);
ASSERT_TRUE(reconstructed->is_valid());
ASSERT_TRUE(reconstructed->Equals(*original));
// Unknown
- original = LogicalAnnotation::Unknown();
+ original = LogicalType::Unknown();
ASSERT_TRUE(original->is_invalid());
ASSERT_FALSE(original->is_valid());
converted_type = original->ToConvertedType(&converted_decimal_metadata);
- ASSERT_EQ(converted_type, LogicalType::NA);
+ ASSERT_EQ(converted_type, ConvertedType::NA);
ASSERT_FALSE(converted_decimal_metadata.isset);
ASSERT_TRUE(original->is_compatible(converted_type, converted_decimal_metadata));
ASSERT_TRUE(original->is_compatible(converted_type));
reconstructed =
- LogicalAnnotation::FromConvertedType(converted_type, converted_decimal_metadata);
+ LogicalType::FromConvertedType(converted_type, converted_decimal_metadata);
ASSERT_TRUE(reconstructed->is_invalid());
ASSERT_TRUE(reconstructed->Equals(*original));
}
static void ConfirmNewTypeIncompatibility(
- const std::shared_ptr<const LogicalAnnotation>& annotation,
- std::function<bool(const std::shared_ptr<const LogicalAnnotation>&)> check_is_type) {
- ASSERT_TRUE(annotation->is_valid())
- << annotation->ToString() << " annotation unexpectedly is not valid";
- ASSERT_TRUE(check_is_type(annotation))
- << annotation->ToString() << " annotation is not expected annotation type";
+ const std::shared_ptr<const LogicalType>& logical_type,
+ std::function<bool(const std::shared_ptr<const LogicalType>&)> check_is_type) {
+ ASSERT_TRUE(logical_type->is_valid())
+ << logical_type->ToString() << " logical type unexpectedly is not valid";
+ ASSERT_TRUE(check_is_type(logical_type))
+ << logical_type->ToString() << " logical type is not expected logical type";
schema::DecimalMetadata converted_decimal_metadata;
- LogicalType::type converted_type =
- annotation->ToConvertedType(&converted_decimal_metadata);
- ASSERT_EQ(converted_type, LogicalType::NONE)
- << annotation->ToString() << " annotation converted type unexpectedly is not NONE";
+ ConvertedType::type converted_type =
+ logical_type->ToConvertedType(&converted_decimal_metadata);
+ ASSERT_EQ(converted_type, ConvertedType::NONE)
+ << logical_type->ToString()
+ << " logical type converted type unexpectedly is not NONE";
ASSERT_FALSE(converted_decimal_metadata.isset)
- << annotation->ToString()
- << " annotation converted decimal metadata unexpectedly is set";
+ << logical_type->ToString()
+ << " logical type converted decimal metadata unexpectedly is set";
return;
}
-TEST(TestLogicalAnnotationConstruction, NewTypeIncompatibility) {
- // For each new logical annotation type, ensure that the logical annotation
+TEST(TestLogicalTypeConstruction, NewTypeIncompatibility) {
+ // For each new logical type, ensure that the type
// correctly reports that it has no legacy equivalent
struct ConfirmNewTypeIncompatibilityArguments {
- std::shared_ptr<const LogicalAnnotation> annotation;
- std::function<bool(const std::shared_ptr<const LogicalAnnotation>&)> check_is_type;
+ std::shared_ptr<const LogicalType> logical_type;
+ std::function<bool(const std::shared_ptr<const LogicalType>&)> check_is_type;
};
- auto check_is_UUID = [](const std::shared_ptr<const LogicalAnnotation>& annotation) {
- return annotation->is_UUID();
+ auto check_is_UUID = [](const std::shared_ptr<const LogicalType>& logical_type) {
+ return logical_type->is_UUID();
+ };
+ auto check_is_null = [](const std::shared_ptr<const LogicalType>& logical_type) {
+ return logical_type->is_null();
};
- auto check_is_null = [](const std::shared_ptr<const LogicalAnnotation>& annotation) {
- return annotation->is_null();
+ auto check_is_time = [](const std::shared_ptr<const LogicalType>& logical_type) {
+ return logical_type->is_time();
};
- auto check_is_time = [](const std::shared_ptr<const LogicalAnnotation>& annotation) {
- return annotation->is_time();
+ auto check_is_timestamp = [](const std::shared_ptr<const LogicalType>& logical_type) {
+ return logical_type->is_timestamp();
};
- auto check_is_timestamp =
- [](const std::shared_ptr<const LogicalAnnotation>& annotation) {
- return annotation->is_timestamp();
- };
std::vector<ConfirmNewTypeIncompatibilityArguments> cases = {
- {LogicalAnnotation::UUID(), check_is_UUID},
- {LogicalAnnotation::Null(), check_is_null},
- {LogicalAnnotation::Time(false, LogicalAnnotation::TimeUnit::MILLIS),
- check_is_time},
- {LogicalAnnotation::Time(false, LogicalAnnotation::TimeUnit::MICROS),
- check_is_time},
- {LogicalAnnotation::Time(false, LogicalAnnotation::TimeUnit::NANOS), check_is_time},
- {LogicalAnnotation::Time(true, LogicalAnnotation::TimeUnit::NANOS), check_is_time},
- {LogicalAnnotation::Timestamp(false, LogicalAnnotation::TimeUnit::MILLIS),
- check_is_timestamp},
- {LogicalAnnotation::Timestamp(false, LogicalAnnotation::TimeUnit::MICROS),
- check_is_timestamp},
- {LogicalAnnotation::Timestamp(false, LogicalAnnotation::TimeUnit::NANOS),
- check_is_timestamp},
- {LogicalAnnotation::Timestamp(true, LogicalAnnotation::TimeUnit::NANOS),
- check_is_timestamp},
+ {LogicalType::UUID(), check_is_UUID},
+ {LogicalType::Null(), check_is_null},
+ {LogicalType::Time(false, LogicalType::TimeUnit::MILLIS), check_is_time},
+ {LogicalType::Time(false, LogicalType::TimeUnit::MICROS), check_is_time},
+ {LogicalType::Time(false, LogicalType::TimeUnit::NANOS), check_is_time},
+ {LogicalType::Time(true, LogicalType::TimeUnit::NANOS), check_is_time},
+ {LogicalType::Timestamp(false, LogicalType::TimeUnit::MILLIS), check_is_timestamp},
+ {LogicalType::Timestamp(false, LogicalType::TimeUnit::MICROS), check_is_timestamp},
+ {LogicalType::Timestamp(false, LogicalType::TimeUnit::NANOS), check_is_timestamp},
+ {LogicalType::Timestamp(true, LogicalType::TimeUnit::NANOS), check_is_timestamp},
};
for (const ConfirmNewTypeIncompatibilityArguments& c : cases) {
- ConfirmNewTypeIncompatibility(c.annotation, c.check_is_type);
+ ConfirmNewTypeIncompatibility(c.logical_type, c.check_is_type);
}
}
-TEST(TestLogicalAnnotationConstruction, FactoryExceptions) {
- // Ensure that annotation construction catches invalid arguments
+TEST(TestLogicalTypeConstruction, FactoryExceptions) {
+ // Ensure that logical type construction catches invalid arguments
std::vector<std::function<void()>> cases = {
[]() {
- TimeAnnotation::Make(true, LogicalAnnotation::TimeUnit::UNKNOWN);
+ TimeLogicalType::Make(true, LogicalType::TimeUnit::UNKNOWN);
}, // Invalid TimeUnit
[]() {
- TimestampAnnotation::Make(true, LogicalAnnotation::TimeUnit::UNKNOWN);
- }, // Invalid TimeUnit
- []() { IntAnnotation::Make(-1, false); }, // Invalid bit width
- []() { IntAnnotation::Make(0, false); }, // Invalid bit width
- []() { IntAnnotation::Make(1, false); }, // Invalid bit width
- []() { IntAnnotation::Make(65, false); }, // Invalid bit width
- []() { DecimalAnnotation::Make(-1); }, // Invalid precision
- []() { DecimalAnnotation::Make(0); }, // Invalid precision
- []() { DecimalAnnotation::Make(0, 0); }, // Invalid precision
- []() { DecimalAnnotation::Make(10, -1); }, // Invalid scale
- []() { DecimalAnnotation::Make(10, 11); } // Invalid scale
+ TimestampLogicalType::Make(true, LogicalType::TimeUnit::UNKNOWN);
+ }, // Invalid TimeUnit
+ []() { IntLogicalType::Make(-1, false); }, // Invalid bit width
+ []() { IntLogicalType::Make(0, false); }, // Invalid bit width
+ []() { IntLogicalType::Make(1, false); }, // Invalid bit width
+ []() { IntLogicalType::Make(65, false); }, // Invalid bit width
+ []() { DecimalLogicalType::Make(-1); }, // Invalid precision
+ []() { DecimalLogicalType::Make(0); }, // Invalid precision
+ []() { DecimalLogicalType::Make(0, 0); }, // Invalid precision
+ []() { DecimalLogicalType::Make(10, -1); }, // Invalid scale
+ []() { DecimalLogicalType::Make(10, 11); } // Invalid scale
};
for (auto f : cases) {
@@ -1126,56 +1116,55 @@ TEST(TestLogicalAnnotationConstruction, FactoryExceptions) {
}
}
-static void ConfirmAnnotationProperties(
- const std::shared_ptr<const LogicalAnnotation>& annotation, bool nested,
- bool serialized, bool valid) {
- ASSERT_TRUE(annotation->is_nested() == nested)
- << annotation->ToString() << " annotation has incorrect nested() property";
- ASSERT_TRUE(annotation->is_serialized() == serialized)
- << annotation->ToString() << " annotation has incorrect serialized() property";
- ASSERT_TRUE(annotation->is_valid() == valid)
- << annotation->ToString() << " annotation has incorrect valid() property";
- ASSERT_TRUE(annotation->is_nonnested() != nested)
- << annotation->ToString() << " annotation has incorrect nonnested() property";
- ASSERT_TRUE(annotation->is_invalid() != valid)
- << annotation->ToString() << " annotation has incorrect invalid() property";
+static void ConfirmLogicalTypeProperties(
+ const std::shared_ptr<const LogicalType>& logical_type, bool nested, bool serialized,
+ bool valid) {
+ ASSERT_TRUE(logical_type->is_nested() == nested)
+ << logical_type->ToString() << " logical type has incorrect nested() property";
+ ASSERT_TRUE(logical_type->is_serialized() == serialized)
+ << logical_type->ToString() << " logical type has incorrect serialized() property";
+ ASSERT_TRUE(logical_type->is_valid() == valid)
+ << logical_type->ToString() << " logical type has incorrect valid() property";
+ ASSERT_TRUE(logical_type->is_nonnested() != nested)
+ << logical_type->ToString() << " logical type has incorrect nonnested() property";
+ ASSERT_TRUE(logical_type->is_invalid() != valid)
+ << logical_type->ToString() << " logical type has incorrect invalid() property";
return;
}
-TEST(TestLogicalAnnotationOperation, AnnotationProperties) {
- // For each annotation type, ensure that the correct general properties are reported
+TEST(TestLogicalTypeOperation, LogicalTypeProperties) {
+ // For each logical type, ensure that the correct general properties are reported
struct ExpectedProperties {
- std::shared_ptr<const LogicalAnnotation> annotation;
+ std::shared_ptr<const LogicalType> logical_type;
bool nested;
bool serialized;
bool valid;
};
std::vector<ExpectedProperties> cases = {
- {StringAnnotation::Make(), false, true, true},
- {MapAnnotation::Make(), true, true, true},
- {ListAnnotation::Make(), true, true, true},
- {EnumAnnotation::Make(), false, true, true},
- {DecimalAnnotation::Make(16, 6), false, true, true},
- {DateAnnotation::Make(), false, true, true},
- {TimeAnnotation::Make(true, LogicalAnnotation::TimeUnit::MICROS), false, true,
- true},
- {TimestampAnnotation::Make(true, LogicalAnnotation::TimeUnit::MICROS), false, true,
+ {StringLogicalType::Make(), false, true, true},
+ {MapLogicalType::Make(), true, true, true},
+ {ListLogicalType::Make(), true, true, true},
+ {EnumLogicalType::Make(), false, true, true},
+ {DecimalLogicalType::Make(16, 6), false, true, true},
+ {DateLogicalType::Make(), false, true, true},
+ {TimeLogicalType::Make(true, LogicalType::TimeUnit::MICROS), false, true, true},
+ {TimestampLogicalType::Make(true, LogicalType::TimeUnit::MICROS), false, true,
true},
- {IntervalAnnotation::Make(), false, true, true},
- {IntAnnotation::Make(8, false), false, true, true},
- {IntAnnotation::Make(64, true), false, true, true},
- {NullAnnotation::Make(), false, true, true},
- {JSONAnnotation::Make(), false, true, true},
- {BSONAnnotation::Make(), false, true, true},
- {UUIDAnnotation::Make(), false, true, true},
- {NoAnnotation::Make(), false, false, true},
- {UnknownAnnotation::Make(), false, false, false},
+ {IntervalLogicalType::Make(), false, true, true},
+ {IntLogicalType::Make(8, false), false, true, true},
+ {IntLogicalType::Make(64, true), false, true, true},
+ {NullLogicalType::Make(), false, true, true},
+ {JSONLogicalType::Make(), false, true, true},
+ {BSONLogicalType::Make(), false, true, true},
+ {UUIDLogicalType::Make(), false, true, true},
+ {NoLogicalType::Make(), false, false, true},
+ {UnknownLogicalType::Make(), false, false, false},
};
for (const ExpectedProperties& c : cases) {
- ConfirmAnnotationProperties(c.annotation, c.nested, c.serialized, c.valid);
+ ConfirmLogicalTypeProperties(c.logical_type, c.nested, c.serialized, c.valid);
}
}
@@ -1186,18 +1175,17 @@ static Type::type physical_type[PHYSICAL_TYPE_COUNT] = {
Type::FLOAT, Type::DOUBLE, Type::BYTE_ARRAY, Type::FIXED_LEN_BYTE_ARRAY};
static void ConfirmSinglePrimitiveTypeApplicability(
- const std::shared_ptr<const LogicalAnnotation>& annotation,
- Type::type applicable_type) {
+ const std::shared_ptr<const LogicalType>& logical_type, Type::type applicable_type) {
for (int i = 0; i < PHYSICAL_TYPE_COUNT; ++i) {
if (physical_type[i] == applicable_type) {
- ASSERT_TRUE(annotation->is_applicable(physical_type[i]))
- << annotation->ToString()
- << " annotation unexpectedly inapplicable to physical type "
+ ASSERT_TRUE(logical_type->is_applicable(physical_type[i]))
+ << logical_type->ToString()
+ << " logical type unexpectedly inapplicable to physical type "
<< TypeToString(physical_type[i]);
} else {
- ASSERT_FALSE(annotation->is_applicable(physical_type[i]))
- << annotation->ToString()
- << " annotation unexpectedly applicable to physical type "
+ ASSERT_FALSE(logical_type->is_applicable(physical_type[i]))
+ << logical_type->ToString()
+ << " logical type unexpectedly applicable to physical type "
<< TypeToString(physical_type[i]);
}
}
@@ -1205,73 +1193,70 @@ static void ConfirmSinglePrimitiveTypeApplicability(
}
static void ConfirmAnyPrimitiveTypeApplicability(
- const std::shared_ptr<const LogicalAnnotation>& annotation) {
+ const std::shared_ptr<const LogicalType>& logical_type) {
for (int i = 0; i < PHYSICAL_TYPE_COUNT; ++i) {
- ASSERT_TRUE(annotation->is_applicable(physical_type[i]))
- << annotation->ToString()
- << " annotation unexpectedly inapplicable to physical type "
+ ASSERT_TRUE(logical_type->is_applicable(physical_type[i]))
+ << logical_type->ToString()
+ << " logical type unexpectedly inapplicable to physical type "
<< TypeToString(physical_type[i]);
}
return;
}
static void ConfirmNoPrimitiveTypeApplicability(
- const std::shared_ptr<const LogicalAnnotation>& annotation) {
+ const std::shared_ptr<const LogicalType>& logical_type) {
for (int i = 0; i < PHYSICAL_TYPE_COUNT; ++i) {
- ASSERT_FALSE(annotation->is_applicable(physical_type[i]))
- << annotation->ToString()
- << " annotation unexpectedly applicable to physical type "
+ ASSERT_FALSE(logical_type->is_applicable(physical_type[i]))
+ << logical_type->ToString()
+ << " logical type unexpectedly applicable to physical type "
<< TypeToString(physical_type[i]);
}
return;
}
-TEST(TestLogicalAnnotationOperation, AnnotationApplicability) {
- // Check that each logical annotation type correctly reports which
+TEST(TestLogicalTypeOperation, LogicalTypeApplicability) {
+ // Check that each logical type correctly reports which
// underlying primitive type(s) it can be applied to
struct ExpectedApplicability {
- std::shared_ptr<const LogicalAnnotation> annotation;
+ std::shared_ptr<const LogicalType> logical_type;
Type::type applicable_type;
};
std::vector<ExpectedApplicability> single_type_cases = {
- {LogicalAnnotation::String(), Type::BYTE_ARRAY},
- {LogicalAnnotation::Enum(), Type::BYTE_ARRAY},
- {LogicalAnnotation::Date(), Type::INT32},
- {LogicalAnnotation::Time(true, LogicalAnnotation::TimeUnit::MILLIS), Type::INT32},
- {LogicalAnnotation::Time(true, LogicalAnnotation::TimeUnit::MICROS), Type::INT64},
- {LogicalAnnotation::Time(true, LogicalAnnotation::TimeUnit::NANOS), Type::INT64},
- {LogicalAnnotation::Timestamp(true, LogicalAnnotation::TimeUnit::MILLIS),
- Type::INT64},
- {LogicalAnnotation::Timestamp(true, LogicalAnnotation::TimeUnit::MICROS),
- Type::INT64},
- {LogicalAnnotation::Timestamp(true, LogicalAnnotation::TimeUnit::NANOS),
- Type::INT64},
- {LogicalAnnotation::Int(8, false), Type::INT32},
- {LogicalAnnotation::Int(16, false), Type::INT32},
- {LogicalAnnotation::Int(32, false), Type::INT32},
- {LogicalAnnotation::Int(64, false), Type::INT64},
- {LogicalAnnotation::Int(8, true), Type::INT32},
- {LogicalAnnotation::Int(16, true), Type::INT32},
- {LogicalAnnotation::Int(32, true), Type::INT32},
- {LogicalAnnotation::Int(64, true), Type::INT64},
- {LogicalAnnotation::JSON(), Type::BYTE_ARRAY},
- {LogicalAnnotation::BSON(), Type::BYTE_ARRAY}};
+ {LogicalType::String(), Type::BYTE_ARRAY},
+ {LogicalType::Enum(), Type::BYTE_ARRAY},
+ {LogicalType::Date(), Type::INT32},
+ {LogicalType::Time(true, LogicalType::TimeUnit::MILLIS), Type::INT32},
+ {LogicalType::Time(true, LogicalType::TimeUnit::MICROS), Type::INT64},
+ {LogicalType::Time(true, LogicalType::TimeUnit::NANOS), Type::INT64},
+ {LogicalType::Timestamp(true, LogicalType::TimeUnit::MILLIS), Type::INT64},
+ {LogicalType::Timestamp(true, LogicalType::TimeUnit::MICROS), Type::INT64},
+ {LogicalType::Timestamp(true, LogicalType::TimeUnit::NANOS), Type::INT64},
+ {LogicalType::Int(8, false), Type::INT32},
+ {LogicalType::Int(16, false), Type::INT32},
+ {LogicalType::Int(32, false), Type::INT32},
+ {LogicalType::Int(64, false), Type::INT64},
+ {LogicalType::Int(8, true), Type::INT32},
+ {LogicalType::Int(16, true), Type::INT32},
+ {LogicalType::Int(32, true), Type::INT32},
+ {LogicalType::Int(64, true), Type::INT64},
+ {LogicalType::JSON(), Type::BYTE_ARRAY},
+ {LogicalType::BSON(), Type::BYTE_ARRAY}};
for (const ExpectedApplicability& c : single_type_cases) {
- ConfirmSinglePrimitiveTypeApplicability(c.annotation, c.applicable_type);
+ ConfirmSinglePrimitiveTypeApplicability(c.logical_type, c.applicable_type);
}
- std::vector<std::shared_ptr<const LogicalAnnotation>> no_type_cases = {
- LogicalAnnotation::Map(), LogicalAnnotation::List()};
+ std::vector<std::shared_ptr<const LogicalType>> no_type_cases = {LogicalType::Map(),
+ LogicalType::List()};
for (auto c : no_type_cases) {
ConfirmNoPrimitiveTypeApplicability(c);
}
- std::vector<std::shared_ptr<const LogicalAnnotation>> any_type_cases = {
- LogicalAnnotation::Null(), LogicalAnnotation::None(), LogicalAnnotation::Unknown()};
+ std::vector<std::shared_ptr<const LogicalType>> any_type_cases = {
+ LogicalType::Null(), LogicalType::None(), LogicalType::Unknown()};
for (auto c : any_type_cases) {
ConfirmAnyPrimitiveTypeApplicability(c);
@@ -1294,49 +1279,51 @@ TEST(TestLogicalAnnotationOperation, AnnotationApplicability) {
{Type::DOUBLE, -1},
{Type::BYTE_ARRAY, -1}};
- std::shared_ptr<const LogicalAnnotation> annotation;
+ std::shared_ptr<const LogicalType> logical_type;
- annotation = LogicalAnnotation::Interval();
- ASSERT_TRUE(annotation->is_applicable(Type::FIXED_LEN_BYTE_ARRAY, 12));
+ logical_type = LogicalType::Interval();
+ ASSERT_TRUE(logical_type->is_applicable(Type::FIXED_LEN_BYTE_ARRAY, 12));
for (const InapplicableType& t : inapplicable_types) {
- ASSERT_FALSE(annotation->is_applicable(t.physical_type, t.physical_length));
+ ASSERT_FALSE(logical_type->is_applicable(t.physical_type, t.physical_length));
}
- annotation = LogicalAnnotation::UUID();
- ASSERT_TRUE(annotation->is_applicable(Type::FIXED_LEN_BYTE_ARRAY, 16));
+ logical_type = LogicalType::UUID();
+ ASSERT_TRUE(logical_type->is_applicable(Type::FIXED_LEN_BYTE_ARRAY, 16));
for (const InapplicableType& t : inapplicable_types) {
- ASSERT_FALSE(annotation->is_applicable(t.physical_type, t.physical_length));
+ ASSERT_FALSE(logical_type->is_applicable(t.physical_type, t.physical_length));
}
}
-TEST(TestLogicalAnnotationOperation, DecimalAnnotationApplicability) {
- // Check that the decimal logical annotation type correctly reports which
+TEST(TestLogicalTypeOperation, DecimalLogicalTypeApplicability) {
+ // Check that the decimal logical type correctly reports which
// underlying primitive type(s) it can be applied to
- std::shared_ptr<const LogicalAnnotation> annotation;
+ std::shared_ptr<const LogicalType> logical_type;
for (int32_t precision = 1; precision <= 9; ++precision) {
- annotation = DecimalAnnotation::Make(precision, 0);
- ASSERT_TRUE(annotation->is_applicable(Type::INT32))
- << annotation->ToString() << " unexpectedly inapplicable to physical type INT32";
+ logical_type = DecimalLogicalType::Make(precision, 0);
+ ASSERT_TRUE(logical_type->is_applicable(Type::INT32))
+ << logical_type->ToString()
+ << " unexpectedly inapplicable to physical type INT32";
}
- annotation = DecimalAnnotation::Make(10, 0);
- ASSERT_FALSE(annotation->is_applicable(Type::INT32))
- << annotation->ToString() << " unexpectedly applicable to physical type INT32";
+ logical_type = DecimalLogicalType::Make(10, 0);
+ ASSERT_FALSE(logical_type->is_applicable(Type::INT32))
+ << logical_type->ToString() << " unexpectedly applicable to physical type INT32";
for (int32_t precision = 1; precision <= 18; ++precision) {
- annotation = DecimalAnnotation::Make(precision, 0);
- ASSERT_TRUE(annotation->is_applicable(Type::INT64))
- << annotation->ToString() << " unexpectedly inapplicable to physical type INT64";
+ logical_type = DecimalLogicalType::Make(precision, 0);
+ ASSERT_TRUE(logical_type->is_applicable(Type::INT64))
+ << logical_type->ToString()
+ << " unexpectedly inapplicable to physical type INT64";
}
- annotation = DecimalAnnotation::Make(19, 0);
- ASSERT_FALSE(annotation->is_applicable(Type::INT64))
- << annotation->ToString() << " unexpectedly applicable to physical type INT64";
+ logical_type = DecimalLogicalType::Make(19, 0);
+ ASSERT_FALSE(logical_type->is_applicable(Type::INT64))
+ << logical_type->ToString() << " unexpectedly applicable to physical type INT64";
for (int32_t precision = 1; precision <= 36; ++precision) {
- annotation = DecimalAnnotation::Make(precision, 0);
- ASSERT_TRUE(annotation->is_applicable(Type::BYTE_ARRAY))
- << annotation->ToString()
+ logical_type = DecimalLogicalType::Make(precision, 0);
+ ASSERT_TRUE(logical_type->is_applicable(Type::BYTE_ARRAY))
+ << logical_type->ToString()
<< " unexpectedly inapplicable to physical type BYTE_ARRAY";
}
@@ -1351,217 +1338,205 @@ TEST(TestLogicalAnnotationOperation, DecimalAnnotationApplicability) {
for (const PrecisionLimits& c : cases) {
int32_t precision;
for (precision = 1; precision <= c.precision_limit; ++precision) {
- annotation = DecimalAnnotation::Make(precision, 0);
+ logical_type = DecimalLogicalType::Make(precision, 0);
ASSERT_TRUE(
- annotation->is_applicable(Type::FIXED_LEN_BYTE_ARRAY, c.physical_length))
- << annotation->ToString()
+ logical_type->is_applicable(Type::FIXED_LEN_BYTE_ARRAY, c.physical_length))
+ << logical_type->ToString()
<< " unexpectedly inapplicable to physical type FIXED_LEN_BYTE_ARRAY with "
"length "
<< c.physical_length;
}
- annotation = DecimalAnnotation::Make(precision, 0);
- ASSERT_FALSE(annotation->is_applicable(Type::FIXED_LEN_BYTE_ARRAY, c.physical_length))
- << annotation->ToString()
+ logical_type = DecimalLogicalType::Make(precision, 0);
+ ASSERT_FALSE(
+ logical_type->is_applicable(Type::FIXED_LEN_BYTE_ARRAY, c.physical_length))
+ << logical_type->ToString()
<< " unexpectedly applicable to physical type FIXED_LEN_BYTE_ARRAY with length "
<< c.physical_length;
}
- ASSERT_FALSE((DecimalAnnotation::Make(16, 6))->is_applicable(Type::BOOLEAN));
- ASSERT_FALSE((DecimalAnnotation::Make(16, 6))->is_applicable(Type::FLOAT));
- ASSERT_FALSE((DecimalAnnotation::Make(16, 6))->is_applicable(Type::DOUBLE));
+ ASSERT_FALSE((DecimalLogicalType::Make(16, 6))->is_applicable(Type::BOOLEAN));
+ ASSERT_FALSE((DecimalLogicalType::Make(16, 6))->is_applicable(Type::FLOAT));
+ ASSERT_FALSE((DecimalLogicalType::Make(16, 6))->is_applicable(Type::DOUBLE));
}
-TEST(TestLogicalAnnotationOperation, AnnotationRepresentation) {
- // Ensure that each logical annotation type prints a correct string and
+TEST(TestLogicalTypeOperation, LogicalTypeRepresentation) {
+ // Ensure that each logical type prints a correct string and
// JSON representation
struct ExpectedRepresentation {
- std::shared_ptr<const LogicalAnnotation> annotation;
+ std::shared_ptr<const LogicalType> logical_type;
const char* string_representation;
const char* JSON_representation;
};
std::vector<ExpectedRepresentation> cases = {
- {LogicalAnnotation::Unknown(), "Unknown", R"({"Type": "Unknown"})"},
- {LogicalAnnotation::String(), "String", R"({"Type": "String"})"},
- {LogicalAnnotation::Map(), "Map", R"({"Type": "Map"})"},
- {LogicalAnnotation::List(), "List", R"({"Type": "List"})"},
- {LogicalAnnotation::Enum(), "Enum", R"({"Type": "Enum"})"},
- {LogicalAnnotation::Decimal(10, 4), "Decimal(precision=10, scale=4)",
+ {LogicalType::Unknown(), "Unknown", R"({"Type": "Unknown"})"},
+ {LogicalType::String(), "String", R"({"Type": "String"})"},
+ {LogicalType::Map(), "Map", R"({"Type": "Map"})"},
+ {LogicalType::List(), "List", R"({"Type": "List"})"},
+ {LogicalType::Enum(), "Enum", R"({"Type": "Enum"})"},
+ {LogicalType::Decimal(10, 4), "Decimal(precision=10, scale=4)",
R"({"Type": "Decimal", "precision": 10, "scale": 4})"},
- {LogicalAnnotation::Decimal(10), "Decimal(precision=10, scale=0)",
+ {LogicalType::Decimal(10), "Decimal(precision=10, scale=0)",
R"({"Type": "Decimal", "precision": 10, "scale": 0})"},
- {LogicalAnnotation::Date(), "Date", R"({"Type": "Date"})"},
- {LogicalAnnotation::Time(true, LogicalAnnotation::TimeUnit::MILLIS),
+ {LogicalType::Date(), "Date", R"({"Type": "Date"})"},
+ {LogicalType::Time(true, LogicalType::TimeUnit::MILLIS),
"Time(isAdjustedToUTC=true, timeUnit=milliseconds)",
R"({"Type": "Time", "isAdjustedToUTC": true, "timeUnit": "milliseconds"})"},
- {LogicalAnnotation::Time(true, LogicalAnnotation::TimeUnit::MICROS),
+ {LogicalType::Time(true, LogicalType::TimeUnit::MICROS),
"Time(isAdjustedToUTC=true, timeUnit=microseconds)",
R"({"Type": "Time", "isAdjustedToUTC": true, "timeUnit": "microseconds"})"},
- {LogicalAnnotation::Time(true, LogicalAnnotation::TimeUnit::NANOS),
+ {LogicalType::Time(true, LogicalType::TimeUnit::NANOS),
"Time(isAdjustedToUTC=true, timeUnit=nanoseconds)",
R"({"Type": "Time", "isAdjustedToUTC": true, "timeUnit": "nanoseconds"})"},
- {LogicalAnnotation::Time(false, LogicalAnnotation::TimeUnit::MILLIS),
+ {LogicalType::Time(false, LogicalType::TimeUnit::MILLIS),
"Time(isAdjustedToUTC=false, timeUnit=milliseconds)",
R"({"Type": "Time", "isAdjustedToUTC": false, "timeUnit": "milliseconds"})"},
- {LogicalAnnotation::Time(false, LogicalAnnotation::TimeUnit::MICROS),
+ {LogicalType::Time(false, LogicalType::TimeUnit::MICROS),
"Time(isAdjustedToUTC=false, timeUnit=microseconds)",
R"({"Type": "Time", "isAdjustedToUTC": false, "timeUnit": "microseconds"})"},
- {LogicalAnnotation::Time(false, LogicalAnnotation::TimeUnit::NANOS),
+ {LogicalType::Time(false, LogicalType::TimeUnit::NANOS),
"Time(isAdjustedToUTC=false, timeUnit=nanoseconds)",
R"({"Type": "Time", "isAdjustedToUTC": false, "timeUnit": "nanoseconds"})"},
- {LogicalAnnotation::Timestamp(true, LogicalAnnotation::TimeUnit::MILLIS),
+ {LogicalType::Timestamp(true, LogicalType::TimeUnit::MILLIS),
"Timestamp(isAdjustedToUTC=true, timeUnit=milliseconds)",
R"({"Type": "Timestamp", "isAdjustedToUTC": true, "timeUnit": "milliseconds"})"},
- {LogicalAnnotation::Timestamp(true, LogicalAnnotation::TimeUnit::MICROS),
+ {LogicalType::Timestamp(true, LogicalType::TimeUnit::MICROS),
"Timestamp(isAdjustedToUTC=true, timeUnit=microseconds)",
R"({"Type": "Timestamp", "isAdjustedToUTC": true, "timeUnit": "microseconds"})"},
- {LogicalAnnotation::Timestamp(true, LogicalAnnotation::TimeUnit::NANOS),
+ {LogicalType::Timestamp(true, LogicalType::TimeUnit::NANOS),
"Timestamp(isAdjustedToUTC=true, timeUnit=nanoseconds)",
R"({"Type": "Timestamp", "isAdjustedToUTC": true, "timeUnit": "nanoseconds"})"},
- {LogicalAnnotation::Timestamp(false, LogicalAnnotation::TimeUnit::MILLIS),
+ {LogicalType::Timestamp(false, LogicalType::TimeUnit::MILLIS),
"Timestamp(isAdjustedToUTC=false, timeUnit=milliseconds)",
R"({"Type": "Timestamp", "isAdjustedToUTC": false, "timeUnit": "milliseconds"})"},
- {LogicalAnnotation::Timestamp(false, LogicalAnnotation::TimeUnit::MICROS),
+ {LogicalType::Timestamp(false, LogicalType::TimeUnit::MICROS),
"Timestamp(isAdjustedToUTC=false, timeUnit=microseconds)",
R"({"Type": "Timestamp", "isAdjustedToUTC": false, "timeUnit": "microseconds"})"},
- {LogicalAnnotation::Timestamp(false, LogicalAnnotation::TimeUnit::NANOS),
+ {LogicalType::Timestamp(false, LogicalType::TimeUnit::NANOS),
"Timestamp(isAdjustedToUTC=false, timeUnit=nanoseconds)",
R"({"Type": "Timestamp", "isAdjustedToUTC": false, "timeUnit": "nanoseconds"})"},
- {LogicalAnnotation::Interval(), "Interval", R"({"Type": "Interval"})"},
- {LogicalAnnotation::Int(8, false), "Int(bitWidth=8, isSigned=false)",
+ {LogicalType::Interval(), "Interval", R"({"Type": "Interval"})"},
+ {LogicalType::Int(8, false), "Int(bitWidth=8, isSigned=false)",
R"({"Type": "Int", "bitWidth": 8, "isSigned": false})"},
- {LogicalAnnotation::Int(16, false), "Int(bitWidth=16, isSigned=false)",
+ {LogicalType::Int(16, false), "Int(bitWidth=16, isSigned=false)",
R"({"Type": "Int", "bitWidth": 16, "isSigned": false})"},
- {LogicalAnnotation::Int(32, false), "Int(bitWidth=32, isSigned=false)",
+ {LogicalType::Int(32, false), "Int(bitWidth=32, isSigned=false)",
R"({"Type": "Int", "bitWidth": 32, "isSigned": false})"},
- {LogicalAnnotation::Int(64, false), "Int(bitWidth=64, isSigned=false)",
+ {LogicalType::Int(64, false), "Int(bitWidth=64, isSigned=false)",
R"({"Type": "Int", "bitWidth": 64, "isSigned": false})"},
- {LogicalAnnotation::Int(8, true), "Int(bitWidth=8, isSigned=true)",
+ {LogicalType::Int(8, true), "Int(bitWidth=8, isSigned=true)",
R"({"Type": "Int", "bitWidth": 8, "isSigned": true})"},
- {LogicalAnnotation::Int(16, true), "Int(bitWidth=16, isSigned=true)",
+ {LogicalType::Int(16, true), "Int(bitWidth=16, isSigned=true)",
R"({"Type": "Int", "bitWidth": 16, "isSigned": true})"},
- {LogicalAnnotation::Int(32, true), "Int(bitWidth=32, isSigned=true)",
+ {LogicalType::Int(32, true), "Int(bitWidth=32, isSigned=true)",
R"({"Type": "Int", "bitWidth": 32, "isSigned": true})"},
- {LogicalAnnotation::Int(64, true), "Int(bitWidth=64, isSigned=true)",
+ {LogicalType::Int(64, true), "Int(bitWidth=64, isSigned=true)",
R"({"Type": "Int", "bitWidth": 64, "isSigned": true})"},
- {LogicalAnnotation::Null(), "Null", R"({"Type": "Null"})"},
- {LogicalAnnotation::JSON(), "JSON", R"({"Type": "JSON"})"},
- {LogicalAnnotation::BSON(), "BSON", R"({"Type": "BSON"})"},
- {LogicalAnnotation::UUID(), "UUID", R"({"Type": "UUID"})"},
- {LogicalAnnotation::None(), "None", R"({"Type": "None"})"},
+ {LogicalType::Null(), "Null", R"({"Type": "Null"})"},
+ {LogicalType::JSON(), "JSON", R"({"Type": "JSON"})"},
+ {LogicalType::BSON(), "BSON", R"({"Type": "BSON"})"},
+ {LogicalType::UUID(), "UUID", R"({"Type": "UUID"})"},
+ {LogicalType::None(), "None", R"({"Type": "None"})"},
};
for (const ExpectedRepresentation& c : cases) {
- ASSERT_STREQ(c.annotation->ToString().c_str(), c.string_representation);
- ASSERT_STREQ(c.annotation->ToJSON().c_str(), c.JSON_representation);
+ ASSERT_STREQ(c.logical_type->ToString().c_str(), c.string_representation);
+ ASSERT_STREQ(c.logical_type->ToJSON().c_str(), c.JSON_representation);
}
}
-TEST(TestLogicalAnnotationOperation, AnnotationSortOrder) {
- // Ensure that each logical annotation type reports the correct sort order
+TEST(TestLogicalTypeOperation, LogicalTypeSortOrder) {
+ // Ensure that each logical type reports the correct sort order
struct ExpectedSortOrder {
- std::shared_ptr<const LogicalAnnotation> annotation;
+ std::shared_ptr<const LogicalType> logical_type;
SortOrder::type sort_order;
};
std::vector<ExpectedSortOrder> cases = {
- {LogicalAnnotation::Unknown(), SortOrder::UNKNOWN},
- {LogicalAnnotation::String(), SortOrder::UNSIGNED},
- {LogicalAnnotation::Map(), SortOrder::UNKNOWN},
- {LogicalAnnotation::List(), SortOrder::UNKNOWN},
- {LogicalAnnotation::Enum(), SortOrder::UNSIGNED},
- {LogicalAnnotation::Decimal(8, 2), SortOrder::SIGNED},
- {LogicalAnnotation::Date(), SortOrder::SIGNED},
- {LogicalAnnotation::Time(true, LogicalAnnotation::TimeUnit::MILLIS),
- SortOrder::SIGNED},
- {LogicalAnnotation::Time(true, LogicalAnnotation::TimeUnit::MICROS),
- SortOrder::SIGNED},
- {LogicalAnnotation::Time(true, LogicalAnnotation::TimeUnit::NANOS),
- SortOrder::SIGNED},
- {LogicalAnnotation::Time(false, LogicalAnnotation::TimeUnit::MILLIS),
- SortOrder::SIGNED},
- {LogicalAnnotation::Time(false, LogicalAnnotation::TimeUnit::MICROS),
- SortOrder::SIGNED},
- {LogicalAnnotation::Time(false, LogicalAnnotation::TimeUnit::NANOS),
- SortOrder::SIGNED},
- {LogicalAnnotation::Timestamp(true, LogicalAnnotation::TimeUnit::MILLIS),
- SortOrder::SIGNED},
- {LogicalAnnotation::Timestamp(true, LogicalAnnotation::TimeUnit::MICROS),
- SortOrder::SIGNED},
- {LogicalAnnotation::Timestamp(true, LogicalAnnotation::TimeUnit::NANOS),
- SortOrder::SIGNED},
- {LogicalAnnotation::Timestamp(false, LogicalAnnotation::TimeUnit::MILLIS),
- SortOrder::SIGNED},
- {LogicalAnnotation::Timestamp(false, LogicalAnnotation::TimeUnit::MICROS),
- SortOrder::SIGNED},
- {LogicalAnnotation::Timestamp(false, LogicalAnnotation::TimeUnit::NANOS),
- SortOrder::SIGNED},
- {LogicalAnnotation::Interval(), SortOrder::UNKNOWN},
- {LogicalAnnotation::Int(8, false), SortOrder::UNSIGNED},
- {LogicalAnnotation::Int(16, false), SortOrder::UNSIGNED},
- {LogicalAnnotation::Int(32, false), SortOrder::UNSIGNED},
- {LogicalAnnotation::Int(64, false), SortOrder::UNSIGNED},
- {LogicalAnnotation::Int(8, true), SortOrder::SIGNED},
- {LogicalAnnotation::Int(16, true), SortOrder::SIGNED},
- {LogicalAnnotation::Int(32, true), SortOrder::SIGNED},
- {LogicalAnnotation::Int(64, true), SortOrder::SIGNED},
- {LogicalAnnotation::Null(), SortOrder::UNKNOWN},
- {LogicalAnnotation::JSON(), SortOrder::UNSIGNED},
- {LogicalAnnotation::BSON(), SortOrder::UNSIGNED},
- {LogicalAnnotation::UUID(), SortOrder::UNSIGNED},
- {LogicalAnnotation::None(), SortOrder::UNKNOWN}};
+ {LogicalType::Unknown(), SortOrder::UNKNOWN},
+ {LogicalType::String(), SortOrder::UNSIGNED},
+ {LogicalType::Map(), SortOrder::UNKNOWN},
+ {LogicalType::List(), SortOrder::UNKNOWN},
+ {LogicalType::Enum(), SortOrder::UNSIGNED},
+ {LogicalType::Decimal(8, 2), SortOrder::SIGNED},
+ {LogicalType::Date(), SortOrder::SIGNED},
+ {LogicalType::Time(true, LogicalType::TimeUnit::MILLIS), SortOrder::SIGNED},
+ {LogicalType::Time(true, LogicalType::TimeUnit::MICROS), SortOrder::SIGNED},
+ {LogicalType::Time(true, LogicalType::TimeUnit::NANOS), SortOrder::SIGNED},
+ {LogicalType::Time(false, LogicalType::TimeUnit::MILLIS), SortOrder::SIGNED},
+ {LogicalType::Time(false, LogicalType::TimeUnit::MICROS), SortOrder::SIGNED},
+ {LogicalType::Time(false, LogicalType::TimeUnit::NANOS), SortOrder::SIGNED},
+ {LogicalType::Timestamp(true, LogicalType::TimeUnit::MILLIS), SortOrder::SIGNED},
+ {LogicalType::Timestamp(true, LogicalType::TimeUnit::MICROS), SortOrder::SIGNED},
+ {LogicalType::Timestamp(true, LogicalType::TimeUnit::NANOS), SortOrder::SIGNED},
+ {LogicalType::Timestamp(false, LogicalType::TimeUnit::MILLIS), SortOrder::SIGNED},
+ {LogicalType::Timestamp(false, LogicalType::TimeUnit::MICROS), SortOrder::SIGNED},
+ {LogicalType::Timestamp(false, LogicalType::TimeUnit::NANOS), SortOrder::SIGNED},
+ {LogicalType::Interval(), SortOrder::UNKNOWN},
+ {LogicalType::Int(8, false), SortOrder::UNSIGNED},
+ {LogicalType::Int(16, false), SortOrder::UNSIGNED},
+ {LogicalType::Int(32, false), SortOrder::UNSIGNED},
+ {LogicalType::Int(64, false), SortOrder::UNSIGNED},
+ {LogicalType::Int(8, true), SortOrder::SIGNED},
+ {LogicalType::Int(16, true), SortOrder::SIGNED},
+ {LogicalType::Int(32, true), SortOrder::SIGNED},
+ {LogicalType::Int(64, true), SortOrder::SIGNED},
+ {LogicalType::Null(), SortOrder::UNKNOWN},
+ {LogicalType::JSON(), SortOrder::UNSIGNED},
+ {LogicalType::BSON(), SortOrder::UNSIGNED},
+ {LogicalType::UUID(), SortOrder::UNSIGNED},
+ {LogicalType::None(), SortOrder::UNKNOWN}};
for (const ExpectedSortOrder& c : cases) {
- ASSERT_EQ(c.annotation->sort_order(), c.sort_order)
- << c.annotation->ToString() << " annotation has incorrect sort order";
+ ASSERT_EQ(c.logical_type->sort_order(), c.sort_order)
+ << c.logical_type->ToString() << " logical type has incorrect sort order";
}
}
static void ConfirmPrimitiveNodeFactoryEquivalence(
- const std::shared_ptr<const LogicalAnnotation>& logical_annotation,
- LogicalType::type converted_type, Type::type physical_type, int physical_length,
+ const std::shared_ptr<const LogicalType>& logical_type,
+ ConvertedType::type converted_type, Type::type physical_type, int physical_length,
int precision, int scale) {
std::string name = "something";
Repetition::type repetition = Repetition::REQUIRED;
NodePtr from_converted_type = PrimitiveNode::Make(
name, repetition, physical_type, converted_type, physical_length, precision, scale);
- NodePtr from_logical_annotation = PrimitiveNode::Make(
- name, repetition, logical_annotation, physical_type, physical_length);
- ASSERT_TRUE(from_converted_type->Equals(from_logical_annotation.get()))
+ NodePtr from_logical_type =
+ PrimitiveNode::Make(name, repetition, logical_type, physical_type, physical_length);
+ ASSERT_TRUE(from_converted_type->Equals(from_logical_type.get()))
<< "Primitive node constructed with converted type "
- << LogicalTypeToString(converted_type)
+ << ConvertedTypeToString(converted_type)
<< " unexpectedly not equivalent to primitive node constructed with logical "
- "annotation "
- << logical_annotation->ToString();
+ "type "
+ << logical_type->ToString();
return;
}
static void ConfirmGroupNodeFactoryEquivalence(
- std::string name, const std::shared_ptr<const LogicalAnnotation>& logical_annotation,
- LogicalType::type converted_type) {
+ std::string name, const std::shared_ptr<const LogicalType>& logical_type,
+ ConvertedType::type converted_type) {
Repetition::type repetition = Repetition::OPTIONAL;
NodePtr from_converted_type = GroupNode::Make(name, repetition, {}, converted_type);
- NodePtr from_logical_annotation =
- GroupNode::Make(name, repetition, {}, logical_annotation);
- ASSERT_TRUE(from_converted_type->Equals(from_logical_annotation.get()))
+ NodePtr from_logical_type = GroupNode::Make(name, repetition, {}, logical_type);
+ ASSERT_TRUE(from_converted_type->Equals(from_logical_type.get()))
<< "Group node constructed with converted type "
- << LogicalTypeToString(converted_type)
- << " unexpectedly not equivalent to group node constructed with logical annotation "
- << logical_annotation->ToString();
+ << ConvertedTypeToString(converted_type)
+ << " unexpectedly not equivalent to group node constructed with logical type "
+ << logical_type->ToString();
return;
}
TEST(TestSchemaNodeCreation, FactoryEquivalence) {
// Ensure that the Node factory methods produce equivalent results regardless
- // of whether they are given a converted type or a logical annotation.
+ // of whether they are given a converted type or a logical type.
// Primitive nodes ...
struct PrimitiveNodeFactoryArguments {
- std::shared_ptr<const LogicalAnnotation> annotation;
- LogicalType::type converted_type;
+ std::shared_ptr<const LogicalType> logical_type;
+ ConvertedType::type converted_type;
Type::type physical_type;
int physical_length;
int precision;
@@ -1569,114 +1544,113 @@ TEST(TestSchemaNodeCreation, FactoryEquivalence) {
};
std::vector<PrimitiveNodeFactoryArguments> cases = {
- {LogicalAnnotation::String(), LogicalType::UTF8, Type::BYTE_ARRAY, -1, -1, -1},
- {LogicalAnnotation::Enum(), LogicalType::ENUM, Type::BYTE_ARRAY, -1, -1, -1},
- {LogicalAnnotation::Decimal(16, 6), LogicalType::DECIMAL, Type::INT64, -1, 16, 6},
- {LogicalAnnotation::Date(), LogicalType::DATE, Type::INT32, -1, -1, -1},
- {LogicalAnnotation::Time(true, LogicalAnnotation::TimeUnit::MILLIS),
- LogicalType::TIME_MILLIS, Type::INT32, -1, -1, -1},
- {LogicalAnnotation::Time(true, LogicalAnnotation::TimeUnit::MICROS),
- LogicalType::TIME_MICROS, Type::INT64, -1, -1, -1},
- {LogicalAnnotation::Timestamp(true, LogicalAnnotation::TimeUnit::MILLIS),
- LogicalType::TIMESTAMP_MILLIS, Type::INT64, -1, -1, -1},
- {LogicalAnnotation::Timestamp(true, LogicalAnnotation::TimeUnit::MICROS),
- LogicalType::TIMESTAMP_MICROS, Type::INT64, -1, -1, -1},
- {LogicalAnnotation::Interval(), LogicalType::INTERVAL, Type::FIXED_LEN_BYTE_ARRAY,
- 12, -1, -1},
- {LogicalAnnotation::Int(8, false), LogicalType::UINT_8, Type::INT32, -1, -1, -1},
- {LogicalAnnotation::Int(8, true), LogicalType::INT_8, Type::INT32, -1, -1, -1},
- {LogicalAnnotation::Int(16, false), LogicalType::UINT_16, Type::INT32, -1, -1, -1},
- {LogicalAnnotation::Int(16, true), LogicalType::INT_16, Type::INT32, -1, -1, -1},
- {LogicalAnnotation::Int(32, false), LogicalType::UINT_32, Type::INT32, -1, -1, -1},
- {LogicalAnnotation::Int(32, true), LogicalType::INT_32, Type::INT32, -1, -1, -1},
- {LogicalAnnotation::Int(64, false), LogicalType::UINT_64, Type::INT64, -1, -1, -1},
- {LogicalAnnotation::Int(64, true), LogicalType::INT_64, Type::INT64, -1, -1, -1},
- {LogicalAnnotation::JSON(), LogicalType::JSON, Type::BYTE_ARRAY, -1, -1, -1},
- {LogicalAnnotation::BSON(), LogicalType::BSON, Type::BYTE_ARRAY, -1, -1, -1},
- {LogicalAnnotation::None(), LogicalType::NONE, Type::INT64, -1, -1, -1}};
+ {LogicalType::String(), ConvertedType::UTF8, Type::BYTE_ARRAY, -1, -1, -1},
+ {LogicalType::Enum(), ConvertedType::ENUM, Type::BYTE_ARRAY, -1, -1, -1},
+ {LogicalType::Decimal(16, 6), ConvertedType::DECIMAL, Type::INT64, -1, 16, 6},
+ {LogicalType::Date(), ConvertedType::DATE, Type::INT32, -1, -1, -1},
+ {LogicalType::Time(true, LogicalType::TimeUnit::MILLIS), ConvertedType::TIME_MILLIS,
+ Type::INT32, -1, -1, -1},
+ {LogicalType::Time(true, LogicalType::TimeUnit::MICROS), ConvertedType::TIME_MICROS,
+ Type::INT64, -1, -1, -1},
+ {LogicalType::Timestamp(true, LogicalType::TimeUnit::MILLIS),
+ ConvertedType::TIMESTAMP_MILLIS, Type::INT64, -1, -1, -1},
+ {LogicalType::Timestamp(true, LogicalType::TimeUnit::MICROS),
+ ConvertedType::TIMESTAMP_MICROS, Type::INT64, -1, -1, -1},
+ {LogicalType::Interval(), ConvertedType::INTERVAL, Type::FIXED_LEN_BYTE_ARRAY, 12,
+ -1, -1},
+ {LogicalType::Int(8, false), ConvertedType::UINT_8, Type::INT32, -1, -1, -1},
+ {LogicalType::Int(8, true), ConvertedType::INT_8, Type::INT32, -1, -1, -1},
+ {LogicalType::Int(16, false), ConvertedType::UINT_16, Type::INT32, -1, -1, -1},
+ {LogicalType::Int(16, true), ConvertedType::INT_16, Type::INT32, -1, -1, -1},
+ {LogicalType::Int(32, false), ConvertedType::UINT_32, Type::INT32, -1, -1, -1},
+ {LogicalType::Int(32, true), ConvertedType::INT_32, Type::INT32, -1, -1, -1},
+ {LogicalType::Int(64, false), ConvertedType::UINT_64, Type::INT64, -1, -1, -1},
+ {LogicalType::Int(64, true), ConvertedType::INT_64, Type::INT64, -1, -1, -1},
+ {LogicalType::JSON(), ConvertedType::JSON, Type::BYTE_ARRAY, -1, -1, -1},
+ {LogicalType::BSON(), ConvertedType::BSON, Type::BYTE_ARRAY, -1, -1, -1},
+ {LogicalType::None(), ConvertedType::NONE, Type::INT64, -1, -1, -1}};
for (const PrimitiveNodeFactoryArguments& c : cases) {
- ConfirmPrimitiveNodeFactoryEquivalence(c.annotation, c.converted_type,
+ ConfirmPrimitiveNodeFactoryEquivalence(c.logical_type, c.converted_type,
c.physical_type, c.physical_length,
c.precision, c.scale);
}
// Group nodes ...
- ConfirmGroupNodeFactoryEquivalence("map", LogicalAnnotation::Map(), LogicalType::MAP);
- ConfirmGroupNodeFactoryEquivalence("list", LogicalAnnotation::List(),
- LogicalType::LIST);
+ ConfirmGroupNodeFactoryEquivalence("map", LogicalType::Map(), ConvertedType::MAP);
+ ConfirmGroupNodeFactoryEquivalence("list", LogicalType::List(), ConvertedType::LIST);
}
TEST(TestSchemaNodeCreation, FactoryExceptions) {
- // Ensure that the Node factory method that accepts an annotation refuses to create
+ // Ensure that the Node factory method that accepts a logical type refuses to create
// an object if compatibility conditions are not met
- // Nested annotation on non-group node ...
- ASSERT_ANY_THROW(PrimitiveNode::Make("map", Repetition::REQUIRED, MapAnnotation::Make(),
- Type::INT64));
+ // Nested logical type on non-group node ...
+ ASSERT_ANY_THROW(PrimitiveNode::Make("map", Repetition::REQUIRED,
+ MapLogicalType::Make(), Type::INT64));
// Incompatible primitive type ...
ASSERT_ANY_THROW(PrimitiveNode::Make("string", Repetition::REQUIRED,
- StringAnnotation::Make(), Type::BOOLEAN));
+ StringLogicalType::Make(), Type::BOOLEAN));
// Incompatible primitive length ...
ASSERT_ANY_THROW(PrimitiveNode::Make("interval", Repetition::REQUIRED,
- IntervalAnnotation::Make(),
+ IntervalLogicalType::Make(),
Type::FIXED_LEN_BYTE_ARRAY, 11));
// Primitive too small for given precision ...
ASSERT_ANY_THROW(PrimitiveNode::Make("decimal", Repetition::REQUIRED,
- DecimalAnnotation::Make(16, 6), Type::INT32));
+ DecimalLogicalType::Make(16, 6), Type::INT32));
// Incompatible primitive length ...
ASSERT_ANY_THROW(PrimitiveNode::Make("uuid", Repetition::REQUIRED,
- UUIDAnnotation::Make(), Type::FIXED_LEN_BYTE_ARRAY,
- 64));
+ UUIDLogicalType::Make(),
+ Type::FIXED_LEN_BYTE_ARRAY, 64));
// Non-positive length argument for fixed length binary ...
ASSERT_ANY_THROW(PrimitiveNode::Make("negative_length", Repetition::REQUIRED,
- NoAnnotation::Make(), Type::FIXED_LEN_BYTE_ARRAY,
+ NoLogicalType::Make(), Type::FIXED_LEN_BYTE_ARRAY,
-16));
// Non-positive length argument for fixed length binary ...
ASSERT_ANY_THROW(PrimitiveNode::Make("zero_length", Repetition::REQUIRED,
- NoAnnotation::Make(), Type::FIXED_LEN_BYTE_ARRAY,
+ NoLogicalType::Make(), Type::FIXED_LEN_BYTE_ARRAY,
0));
- // Non-nested annotation on group node ...
+ // Non-nested logical type on group node ...
ASSERT_ANY_THROW(
- GroupNode::Make("list", Repetition::REPEATED, {}, JSONAnnotation::Make()));
+ GroupNode::Make("list", Repetition::REPEATED, {}, JSONLogicalType::Make()));
- // nullptr annotation arguments convert to NoAnnotation/LogicalType::NONE
- std::shared_ptr<const LogicalAnnotation> empty;
+ // nullptr logical type arguments convert to NoLogicalType/ConvertedType::NONE
+ std::shared_ptr<const LogicalType> empty;
NodePtr node;
ASSERT_NO_THROW(
node = PrimitiveNode::Make("value", Repetition::REQUIRED, empty, Type::DOUBLE));
- ASSERT_TRUE(node->logical_annotation()->is_none());
- ASSERT_EQ(node->logical_type(), LogicalType::NONE);
+ ASSERT_TRUE(node->logical_type()->is_none());
+ ASSERT_EQ(node->converted_type(), ConvertedType::NONE);
ASSERT_NO_THROW(node = GroupNode::Make("items", Repetition::REPEATED, {}, empty));
- ASSERT_TRUE(node->logical_annotation()->is_none());
- ASSERT_EQ(node->logical_type(), LogicalType::NONE);
+ ASSERT_TRUE(node->logical_type()->is_none());
+ ASSERT_EQ(node->converted_type(), ConvertedType::NONE);
- // Invalid LogicalType in deserialized element ...
- node = PrimitiveNode::Make("string", Repetition::REQUIRED, StringAnnotation::Make(),
+ // Invalid ConvertedType in deserialized element ...
+ node = PrimitiveNode::Make("string", Repetition::REQUIRED, StringLogicalType::Make(),
Type::BYTE_ARRAY);
- ASSERT_EQ(node->logical_annotation()->type(), LogicalAnnotation::Type::STRING);
- ASSERT_TRUE(node->logical_annotation()->is_valid());
- ASSERT_TRUE(node->logical_annotation()->is_serialized());
+ ASSERT_EQ(node->logical_type()->type(), LogicalType::Type::STRING);
+ ASSERT_TRUE(node->logical_type()->is_valid());
+ ASSERT_TRUE(node->logical_type()->is_serialized());
format::SchemaElement string_intermediary;
node->ToParquet(&string_intermediary);
// ... corrupt the Thrift intermediary ....
string_intermediary.logicalType.__isset.STRING = false;
ASSERT_ANY_THROW(node = PrimitiveNode::FromParquet(&string_intermediary, 1));
- // Invalid TimeUnit in deserialized TimeAnnotation ...
- node = PrimitiveNode::Make(
- "time", Repetition::REQUIRED,
- TimeAnnotation::Make(true, LogicalAnnotation::TimeUnit::NANOS), Type::INT64);
+ // Invalid TimeUnit in deserialized TimeLogicalType ...
+ node = PrimitiveNode::Make("time", Repetition::REQUIRED,
+ TimeLogicalType::Make(true, LogicalType::TimeUnit::NANOS),
+ Type::INT64);
format::SchemaElement time_intermediary;
node->ToParquet(&time_intermediary);
// ... corrupt the Thrift intermediary ....
time_intermediary.logicalType.TIME.unit.__isset.NANOS = false;
ASSERT_ANY_THROW(PrimitiveNode::FromParquet(&time_intermediary, 1));
- // Invalid TimeUnit in deserialized TimestampAnnotation ...
+ // Invalid TimeUnit in deserialized TimestampLogicalType ...
node = PrimitiveNode::Make(
"timestamp", Repetition::REQUIRED,
- TimestampAnnotation::Make(true, LogicalAnnotation::TimeUnit::NANOS), Type::INT64);
+ TimestampLogicalType::Make(true, LogicalType::TimeUnit::NANOS), Type::INT64);
format::SchemaElement timestamp_intermediary;
node->ToParquet(×tamp_intermediary);
// ... corrupt the Thrift intermediary ....
@@ -1686,11 +1660,11 @@ TEST(TestSchemaNodeCreation, FactoryExceptions) {
struct SchemaElementConstructionArguments {
std::string name;
- std::shared_ptr<const LogicalAnnotation> annotation;
+ std::shared_ptr<const LogicalType> logical_type;
Type::type physical_type;
int physical_length;
bool expect_converted_type;
- LogicalType::type converted_type;
+ ConvertedType::type converted_type;
bool expect_logicalType;
std::function<bool()> check_logicalType;
};
@@ -1700,7 +1674,7 @@ class TestSchemaElementConstruction : public ::testing::Test {
TestSchemaElementConstruction* Reconstruct(
const SchemaElementConstructionArguments& c) {
// Make node, create serializable Thrift object from it ...
- node_ = PrimitiveNode::Make(c.name, Repetition::REQUIRED, c.annotation,
+ node_ = PrimitiveNode::Make(c.name, Repetition::REQUIRED, c.logical_type,
c.physical_type, c.physical_length);
element_.reset(new format::SchemaElement);
node_->ToParquet(element_.get());
@@ -1718,31 +1692,34 @@ class TestSchemaElementConstruction : public ::testing::Test {
ASSERT_EQ(element_->name, name_);
if (expect_converted_type_) {
ASSERT_TRUE(element_->__isset.converted_type)
- << node_->logical_annotation()->ToString()
- << " annotation unexpectedly failed to generate a converted type in the Thrift "
+ << node_->logical_type()->ToString()
+ << " logical type unexpectedly failed to generate a converted type in the "
+ "Thrift "
"intermediate object";
ASSERT_EQ(element_->converted_type, ToThrift(converted_type_))
- << node_->logical_annotation()->ToString()
- << " annotation unexpectedly failed to generate correct converted type in the "
+ << node_->logical_type()->ToString()
+ << " logical type unexpectedly failed to generate correct converted type in "
+ "the "
"Thrift intermediate object";
} else {
ASSERT_FALSE(element_->__isset.converted_type)
- << node_->logical_annotation()->ToString()
- << " annotation unexpectedly generated a converted type in the Thrift "
+ << node_->logical_type()->ToString()
+ << " logical type unexpectedly generated a converted type in the Thrift "
"intermediate object";
}
if (expect_logicalType_) {
ASSERT_TRUE(element_->__isset.logicalType)
- << node_->logical_annotation()->ToString()
- << " annotation unexpectedly failed to genverate a logicalType in the Thrift "
+ << node_->logical_type()->ToString()
+ << " logical type unexpectedly failed to genverate a logicalType in the Thrift "
"intermediate object";
- ASSERT_TRUE(check_logicalType_()) << node_->logical_annotation()->ToString()
- << " annotation generated incorrect logicalType "
- "settings in the Thrift intermediate object";
+ ASSERT_TRUE(check_logicalType_())
+ << node_->logical_type()->ToString()
+ << " logical type generated incorrect logicalType "
+ "settings in the Thrift intermediate object";
} else {
ASSERT_FALSE(element_->__isset.logicalType)
- << node_->logical_annotation()->ToString()
- << " annotation unexpectedly generated a logicalType in the Thrift "
+ << node_->logical_type()->ToString()
+ << " logical type unexpectedly generated a logicalType in the Thrift "
"intermediate object";
}
return;
@@ -1753,9 +1730,9 @@ class TestSchemaElementConstruction : public ::testing::Test {
std::unique_ptr<format::SchemaElement> element_;
std::string name_;
bool expect_converted_type_;
- LogicalType::type converted_type_; // expected converted type in Thrift object
+ ConvertedType::type converted_type_; // expected converted type in Thrift object
bool expect_logicalType_;
- std::function<bool()> check_logicalType_; // specialized (by annotation type)
+ std::function<bool()> check_logicalType_; // specialized (by logical type)
// logicalType check for Thrift object
};
@@ -1769,30 +1746,29 @@ class TestSchemaElementConstruction : public ::testing::Test {
TEST_F(TestSchemaElementConstruction, SimpleCases) {
auto check_nothing = []() {
return true;
- }; // used for annotations that don't expect a logicalType to be set
+ }; // used for logical types that don't expect a logicalType to be set
std::vector<SchemaElementConstructionArguments> cases = {
- {"string", LogicalAnnotation::String(), Type::BYTE_ARRAY, -1, true,
- LogicalType::UTF8, true,
- [this]() { return element_->logicalType.__isset.STRING; }},
- {"enum", LogicalAnnotation::Enum(), Type::BYTE_ARRAY, -1, true, LogicalType::ENUM,
- true, [this]() { return element_->logicalType.__isset.ENUM; }},
- {"date", LogicalAnnotation::Date(), Type::INT32, -1, true, LogicalType::DATE, true,
+ {"string", LogicalType::String(), Type::BYTE_ARRAY, -1, true, ConvertedType::UTF8,
+ true, [this]() { return element_->logicalType.__isset.STRING; }},
+ {"enum", LogicalType::Enum(), Type::BYTE_ARRAY, -1, true, ConvertedType::ENUM, true,
+ [this]() { return element_->logicalType.__isset.ENUM; }},
+ {"date", LogicalType::Date(), Type::INT32, -1, true, ConvertedType::DATE, true,
[this]() { return element_->logicalType.__isset.DATE; }},
- {"interval", LogicalAnnotation::Interval(), Type::FIXED_LEN_BYTE_ARRAY, 12, true,
- LogicalType::INTERVAL, false, check_nothing},
- {"null", LogicalAnnotation::Null(), Type::DOUBLE, -1, false, LogicalType::NA, true,
+ {"interval", LogicalType::Interval(), Type::FIXED_LEN_BYTE_ARRAY, 12, true,
+ ConvertedType::INTERVAL, false, check_nothing},
+ {"null", LogicalType::Null(), Type::DOUBLE, -1, false, ConvertedType::NA, true,
[this]() { return element_->logicalType.__isset.UNKNOWN; }},
- {"json", LogicalAnnotation::JSON(), Type::BYTE_ARRAY, -1, true, LogicalType::JSON,
- true, [this]() { return element_->logicalType.__isset.JSON; }},
- {"bson", LogicalAnnotation::BSON(), Type::BYTE_ARRAY, -1, true, LogicalType::BSON,
- true, [this]() { return element_->logicalType.__isset.BSON; }},
- {"uuid", LogicalAnnotation::UUID(), Type::FIXED_LEN_BYTE_ARRAY, 16, false,
- LogicalType::NA, true, [this]() { return element_->logicalType.__isset.UUID; }},
- {"none", LogicalAnnotation::None(), Type::INT64, -1, false, LogicalType::NA, false,
+ {"json", LogicalType::JSON(), Type::BYTE_ARRAY, -1, true, ConvertedType::JSON, true,
+ [this]() { return element_->logicalType.__isset.JSON; }},
+ {"bson", LogicalType::BSON(), Type::BYTE_ARRAY, -1, true, ConvertedType::BSON, true,
+ [this]() { return element_->logicalType.__isset.BSON; }},
+ {"uuid", LogicalType::UUID(), Type::FIXED_LEN_BYTE_ARRAY, 16, false,
+ ConvertedType::NA, true, [this]() { return element_->logicalType.__isset.UUID; }},
+ {"none", LogicalType::None(), Type::INT64, -1, false, ConvertedType::NA, false,
check_nothing},
- {"unknown", LogicalAnnotation::Unknown(), Type::INT64, -1, true, LogicalType::NA,
- false, check_nothing}};
+ {"unknown", LogicalType::Unknown(), Type::INT64, -1, true, ConvertedType::NA, false,
+ check_nothing}};
for (const SchemaElementConstructionArguments& c : cases) {
this->Reconstruct(c)->Inspect();
@@ -1804,10 +1780,10 @@ class TestDecimalSchemaElementConstruction : public TestSchemaElementConstructio
TestDecimalSchemaElementConstruction* Reconstruct(
const SchemaElementConstructionArguments& c) {
TestSchemaElementConstruction::Reconstruct(c);
- const auto& decimal_annotation =
- checked_cast<const DecimalAnnotation&>(*c.annotation);
- precision_ = decimal_annotation.precision();
- scale_ = decimal_annotation.scale();
+ const auto& decimal_logical_type =
+ checked_cast<const DecimalLogicalType&>(*c.logical_type);
+ precision_ = decimal_logical_type.precision();
+ scale_ = decimal_logical_type.scale();
return this;
}
@@ -1829,14 +1805,14 @@ TEST_F(TestDecimalSchemaElementConstruction, DecimalCases) {
auto check_DECIMAL = [this]() { return element_->logicalType.__isset.DECIMAL; };
std::vector<SchemaElementConstructionArguments> cases = {
- {"decimal", LogicalAnnotation::Decimal(16, 6), Type::INT64, -1, true,
- LogicalType::DECIMAL, true, check_DECIMAL},
- {"decimal", LogicalAnnotation::Decimal(1, 0), Type::INT32, -1, true,
- LogicalType::DECIMAL, true, check_DECIMAL},
- {"decimal", LogicalAnnotation::Decimal(10), Type::INT64, -1, true,
- LogicalType::DECIMAL, true, check_DECIMAL},
- {"decimal", LogicalAnnotation::Decimal(11, 11), Type::INT64, -1, true,
- LogicalType::DECIMAL, true, check_DECIMAL},
+ {"decimal", LogicalType::Decimal(16, 6), Type::INT64, -1, true,
+ ConvertedType::DECIMAL, true, check_DECIMAL},
+ {"decimal", LogicalType::Decimal(1, 0), Type::INT32, -1, true,
+ ConvertedType::DECIMAL, true, check_DECIMAL},
+ {"decimal", LogicalType::Decimal(10), Type::INT64, -1, true, ConvertedType::DECIMAL,
+ true, check_DECIMAL},
+ {"decimal", LogicalType::Decimal(11, 11), Type::INT64, -1, true,
+ ConvertedType::DECIMAL, true, check_DECIMAL},
};
for (const SchemaElementConstructionArguments& c : cases) {
@@ -1850,7 +1826,7 @@ class TestTemporalSchemaElementConstruction : public TestSchemaElementConstructi
TestTemporalSchemaElementConstruction* Reconstruct(
const SchemaElementConstructionArguments& c) {
TestSchemaElementConstruction::Reconstruct(c);
- const auto& t = checked_cast<const T&>(*c.annotation);
+ const auto& t = checked_cast<const T&>(*c.logical_type);
adjusted_ = t.is_adjusted_to_utc();
unit_ = t.time_unit();
return this;
@@ -1864,7 +1840,7 @@ class TestTemporalSchemaElementConstruction : public TestSchemaElementConstructi
protected:
bool adjusted_;
- LogicalAnnotation::TimeUnit::unit unit_;
+ LogicalType::TimeUnit::unit unit_;
};
template <>
@@ -1872,16 +1848,16 @@ void TestTemporalSchemaElementConstruction::Inspect<format::TimeType>() {
TestSchemaElementConstruction::Inspect();
ASSERT_EQ(element_->logicalType.TIME.isAdjustedToUTC, adjusted_);
switch (unit_) {
- case LogicalAnnotation::TimeUnit::MILLIS:
+ case LogicalType::TimeUnit::MILLIS:
ASSERT_TRUE(element_->logicalType.TIME.unit.__isset.MILLIS);
break;
- case LogicalAnnotation::TimeUnit::MICROS:
+ case LogicalType::TimeUnit::MICROS:
ASSERT_TRUE(element_->logicalType.TIME.unit.__isset.MICROS);
break;
- case LogicalAnnotation::TimeUnit::NANOS:
+ case LogicalType::TimeUnit::NANOS:
ASSERT_TRUE(element_->logicalType.TIME.unit.__isset.NANOS);
break;
- case LogicalAnnotation::TimeUnit::UNKNOWN:
+ case LogicalType::TimeUnit::UNKNOWN:
default:
FAIL() << "Invalid time unit in test case";
}
@@ -1893,16 +1869,16 @@ void TestTemporalSchemaElementConstruction::Inspect<format::TimestampType>() {
TestSchemaElementConstruction::Inspect();
ASSERT_EQ(element_->logicalType.TIMESTAMP.isAdjustedToUTC, adjusted_);
switch (unit_) {
- case LogicalAnnotation::TimeUnit::MILLIS:
+ case LogicalType::TimeUnit::MILLIS:
ASSERT_TRUE(element_->logicalType.TIMESTAMP.unit.__isset.MILLIS);
break;
- case LogicalAnnotation::TimeUnit::MICROS:
+ case LogicalType::TimeUnit::MICROS:
ASSERT_TRUE(element_->logicalType.TIMESTAMP.unit.__isset.MICROS);
break;
- case LogicalAnnotation::TimeUnit::NANOS:
+ case LogicalType::TimeUnit::NANOS:
ASSERT_TRUE(element_->logicalType.TIMESTAMP.unit.__isset.NANOS);
break;
- case LogicalAnnotation::TimeUnit::UNKNOWN:
+ case LogicalType::TimeUnit::UNKNOWN:
default:
FAIL() << "Invalid time unit in test case";
}
@@ -1913,49 +1889,43 @@ TEST_F(TestTemporalSchemaElementConstruction, TemporalCases) {
auto check_TIME = [this]() { return element_->logicalType.__isset.TIME; };
std::vector<SchemaElementConstructionArguments> time_cases = {
- {"time_T_ms", LogicalAnnotation::Time(true, LogicalAnnotation::TimeUnit::MILLIS),
- Type::INT32, -1, true, LogicalType::TIME_MILLIS, true, check_TIME},
- {"time_F_ms", LogicalAnnotation::Time(false, LogicalAnnotation::TimeUnit::MILLIS),
- Type::INT32, -1, false, LogicalType::NA, true, check_TIME},
- {"time_T_us", LogicalAnnotation::Time(true, LogicalAnnotation::TimeUnit::MICROS),
- Type::INT64, -1, true, LogicalType::TIME_MICROS, true, check_TIME},
- {"time_F_us", LogicalAnnotation::Time(false, LogicalAnnotation::TimeUnit::MICROS),
- Type::INT64, -1, false, LogicalType::NA, true, check_TIME},
- {"time_T_ns", LogicalAnnotation::Time(true, LogicalAnnotation::TimeUnit::NANOS),
- Type::INT64, -1, false, LogicalType::NA, true, check_TIME},
- {"time_F_ns", LogicalAnnotation::Time(false, LogicalAnnotation::TimeUnit::NANOS),
- Type::INT64, -1, false, LogicalType::NA, true, check_TIME},
+ {"time_T_ms", LogicalType::Time(true, LogicalType::TimeUnit::MILLIS), Type::INT32,
+ -1, true, ConvertedType::TIME_MILLIS, true, check_TIME},
+ {"time_F_ms", LogicalType::Time(false, LogicalType::TimeUnit::MILLIS), Type::INT32,
+ -1, false, ConvertedType::NA, true, check_TIME},
+ {"time_T_us", LogicalType::Time(true, LogicalType::TimeUnit::MICROS), Type::INT64,
+ -1, true, ConvertedType::TIME_MICROS, true, check_TIME},
+ {"time_F_us", LogicalType::Time(false, LogicalType::TimeUnit::MICROS), Type::INT64,
+ -1, false, ConvertedType::NA, true, check_TIME},
+ {"time_T_ns", LogicalType::Time(true, LogicalType::TimeUnit::NANOS), Type::INT64,
+ -1, false, ConvertedType::NA, true, check_TIME},
+ {"time_F_ns", LogicalType::Time(false, LogicalType::TimeUnit::NANOS), Type::INT64,
+ -1, false, ConvertedType::NA, true, check_TIME},
};
for (const SchemaElementConstructionArguments& c : time_cases) {
- this->Reconstruct<TimeAnnotation>(c)->Inspect<format::TimeType>();
+ this->Reconstruct<TimeLogicalType>(c)->Inspect<format::TimeType>();
}
auto check_TIMESTAMP = [this]() { return element_->logicalType.__isset.TIMESTAMP; };
std::vector<SchemaElementConstructionArguments> timestamp_cases = {
- {"timestamp_T_ms",
- LogicalAnnotation::Timestamp(true, LogicalAnnotation::TimeUnit::MILLIS),
- Type::INT64, -1, true, LogicalType::TIMESTAMP_MILLIS, true, check_TIMESTAMP},
- {"timestamp_F_ms",
- LogicalAnnotation::Timestamp(false, LogicalAnnotation::TimeUnit::MILLIS),
- Type::INT64, -1, false, LogicalType::NA, true, check_TIMESTAMP},
- {"timestamp_T_us",
- LogicalAnnotation::Timestamp(true, LogicalAnnotation::TimeUnit::MICROS),
- Type::INT64, -1, true, LogicalType::TIMESTAMP_MICROS, true, check_TIMESTAMP},
- {"timestamp_F_us",
- LogicalAnnotation::Timestamp(false, LogicalAnnotation::TimeUnit::MICROS),
- Type::INT64, -1, false, LogicalType::NA, true, check_TIMESTAMP},
- {"timestamp_T_ns",
- LogicalAnnotation::Timestamp(true, LogicalAnnotation::TimeUnit::NANOS),
- Type::INT64, -1, false, LogicalType::NA, true, check_TIMESTAMP},
- {"timestamp_F_ns",
- LogicalAnnotation::Timestamp(false, LogicalAnnotation::TimeUnit::NANOS),
- Type::INT64, -1, false, LogicalType::NA, true, check_TIMESTAMP},
+ {"timestamp_T_ms", LogicalType::Timestamp(true, LogicalType::TimeUnit::MILLIS),
+ Type::INT64, -1, true, ConvertedType::TIMESTAMP_MILLIS, true, check_TIMESTAMP},
+ {"timestamp_F_ms", LogicalType::Timestamp(false, LogicalType::TimeUnit::MILLIS),
+ Type::INT64, -1, false, ConvertedType::NA, true, check_TIMESTAMP},
+ {"timestamp_T_us", LogicalType::Timestamp(true, LogicalType::TimeUnit::MICROS),
+ Type::INT64, -1, true, ConvertedType::TIMESTAMP_MICROS, true, check_TIMESTAMP},
+ {"timestamp_F_us", LogicalType::Timestamp(false, LogicalType::TimeUnit::MICROS),
+ Type::INT64, -1, false, ConvertedType::NA, true, check_TIMESTAMP},
+ {"timestamp_T_ns", LogicalType::Timestamp(true, LogicalType::TimeUnit::NANOS),
+ Type::INT64, -1, false, ConvertedType::NA, true, check_TIMESTAMP},
+ {"timestamp_F_ns", LogicalType::Timestamp(false, LogicalType::TimeUnit::NANOS),
+ Type::INT64, -1, false, ConvertedType::NA, true, check_TIMESTAMP},
};
for (const SchemaElementConstructionArguments& c : timestamp_cases) {
- this->Reconstruct<TimestampAnnotation>(c)->Inspect<format::TimestampType>();
+ this->Reconstruct<TimestampLogicalType>(c)->Inspect<format::TimestampType>();
}
}
@@ -1964,9 +1934,9 @@ class TestIntegerSchemaElementConstruction : public TestSchemaElementConstructio
TestIntegerSchemaElementConstruction* Reconstruct(
const SchemaElementConstructionArguments& c) {
TestSchemaElementConstruction::Reconstruct(c);
- const auto& int_annotation = checked_cast<const IntAnnotation&>(*c.annotation);
- width_ = int_annotation.bit_width();
- signed_ = int_annotation.is_signed();
+ const auto& int_logical_type = checked_cast<const IntLogicalType&>(*c.logical_type);
+ width_ = int_logical_type.bit_width();
+ signed_ = int_logical_type.is_signed();
return this;
}
@@ -1986,22 +1956,22 @@ TEST_F(TestIntegerSchemaElementConstruction, IntegerCases) {
auto check_INTEGER = [this]() { return element_->logicalType.__isset.INTEGER; };
std::vector<SchemaElementConstructionArguments> cases = {
- {"uint8", LogicalAnnotation::Int(8, false), Type::INT32, -1, true,
- LogicalType::UINT_8, true, check_INTEGER},
- {"uint16", LogicalAnnotation::Int(16, false), Type::INT32, -1, true,
- LogicalType::UINT_16, true, check_INTEGER},
- {"uint32", LogicalAnnotation::Int(32, false), Type::INT32, -1, true,
- LogicalType::UINT_32, true, check_INTEGER},
- {"uint64", LogicalAnnotation::Int(64, false), Type::INT64, -1, true,
- LogicalType::UINT_64, true, check_INTEGER},
- {"int8", LogicalAnnotation::Int(8, true), Type::INT32, -1, true, LogicalType::INT_8,
+ {"uint8", LogicalType::Int(8, false), Type::INT32, -1, true, ConvertedType::UINT_8,
+ true, check_INTEGER},
+ {"uint16", LogicalType::Int(16, false), Type::INT32, -1, true,
+ ConvertedType::UINT_16, true, check_INTEGER},
+ {"uint32", LogicalType::Int(32, false), Type::INT32, -1, true,
+ ConvertedType::UINT_32, true, check_INTEGER},
+ {"uint64", LogicalType::Int(64, false), Type::INT64, -1, true,
+ ConvertedType::UINT_64, true, check_INTEGER},
+ {"int8", LogicalType::Int(8, true), Type::INT32, -1, true, ConvertedType::INT_8,
+ true, check_INTEGER},
+ {"int16", LogicalType::Int(16, true), Type::INT32, -1, true, ConvertedType::INT_16,
+ true, check_INTEGER},
+ {"int32", LogicalType::Int(32, true), Type::INT32, -1, true, ConvertedType::INT_32,
+ true, check_INTEGER},
+ {"int64", LogicalType::Int(64, true), Type::INT64, -1, true, ConvertedType::INT_64,
true, check_INTEGER},
- {"int16", LogicalAnnotation::Int(16, true), Type::INT32, -1, true,
- LogicalType::INT_16, true, check_INTEGER},
- {"int32", LogicalAnnotation::Int(32, true), Type::INT32, -1, true,
- LogicalType::INT_32, true, check_INTEGER},
- {"int64", LogicalAnnotation::Int(64, true), Type::INT64, -1, true,
- LogicalType::INT_64, true, check_INTEGER},
};
for (const SchemaElementConstructionArguments& c : cases) {
@@ -2009,37 +1979,37 @@ TEST_F(TestIntegerSchemaElementConstruction, IntegerCases) {
}
}
-TEST(TestLogicalAnnotationSerialization, SchemaElementNestedCases) {
+TEST(TestLogicalTypeSerialization, SchemaElementNestedCases) {
// Confirm that the intermediate Thrift objects created during node serialization
- // contain correct ConvertedType and LogicalType information
+ // contain correct ConvertedType and ConvertedType information
NodePtr string_node = PrimitiveNode::Make("string", Repetition::REQUIRED,
- StringAnnotation::Make(), Type::BYTE_ARRAY);
+ StringLogicalType::Make(), Type::BYTE_ARRAY);
NodePtr date_node = PrimitiveNode::Make("date", Repetition::REQUIRED,
- DateAnnotation::Make(), Type::INT32);
+ DateLogicalType::Make(), Type::INT32);
NodePtr json_node = PrimitiveNode::Make("json", Repetition::REQUIRED,
- JSONAnnotation::Make(), Type::BYTE_ARRAY);
+ JSONLogicalType::Make(), Type::BYTE_ARRAY);
NodePtr uuid_node =
- PrimitiveNode::Make("uuid", Repetition::REQUIRED, UUIDAnnotation::Make(),
+ PrimitiveNode::Make("uuid", Repetition::REQUIRED, UUIDLogicalType::Make(),
Type::FIXED_LEN_BYTE_ARRAY, 16);
NodePtr timestamp_node = PrimitiveNode::Make(
"timestamp", Repetition::REQUIRED,
- TimestampAnnotation::Make(false, LogicalAnnotation::TimeUnit::NANOS), Type::INT64);
+ TimestampLogicalType::Make(false, LogicalType::TimeUnit::NANOS), Type::INT64);
NodePtr int_node = PrimitiveNode::Make("int", Repetition::REQUIRED,
- IntAnnotation::Make(64, false), Type::INT64);
- NodePtr decimal_node = PrimitiveNode::Make("decimal", Repetition::REQUIRED,
- DecimalAnnotation::Make(16, 6), Type::INT64);
+ IntLogicalType::Make(64, false), Type::INT64);
+ NodePtr decimal_node = PrimitiveNode::Make(
+ "decimal", Repetition::REQUIRED, DecimalLogicalType::Make(16, 6), Type::INT64);
NodePtr list_node = GroupNode::Make("list", Repetition::REPEATED,
{string_node, date_node, json_node, uuid_node,
timestamp_node, int_node, decimal_node},
- ListAnnotation::Make());
+ ListLogicalType::Make());
std::vector<format::SchemaElement> list_elements;
ToParquet(reinterpret_cast<GroupNode*>(list_node.get()), &list_elements);
ASSERT_EQ(list_elements[0].name, "list");
ASSERT_TRUE(list_elements[0].__isset.converted_type);
ASSERT_TRUE(list_elements[0].__isset.logicalType);
- ASSERT_EQ(list_elements[0].converted_type, ToThrift(LogicalType::LIST));
+ ASSERT_EQ(list_elements[0].converted_type, ToThrift(ConvertedType::LIST));
ASSERT_TRUE(list_elements[0].logicalType.__isset.LIST);
ASSERT_TRUE(list_elements[1].logicalType.__isset.STRING);
ASSERT_TRUE(list_elements[2].logicalType.__isset.DATE);
@@ -2050,109 +2020,97 @@ TEST(TestLogicalAnnotationSerialization, SchemaElementNestedCases) {
ASSERT_TRUE(list_elements[7].logicalType.__isset.DECIMAL);
NodePtr map_node =
- GroupNode::Make("map", Repetition::REQUIRED, {}, MapAnnotation::Make());
+ GroupNode::Make("map", Repetition::REQUIRED, {}, MapLogicalType::Make());
std::vector<format::SchemaElement> map_elements;
ToParquet(reinterpret_cast<GroupNode*>(map_node.get()), &map_elements);
ASSERT_EQ(map_elements[0].name, "map");
ASSERT_TRUE(map_elements[0].__isset.converted_type);
ASSERT_TRUE(map_elements[0].__isset.logicalType);
- ASSERT_EQ(map_elements[0].converted_type, ToThrift(LogicalType::MAP));
+ ASSERT_EQ(map_elements[0].converted_type, ToThrift(ConvertedType::MAP));
ASSERT_TRUE(map_elements[0].logicalType.__isset.MAP);
}
static void ConfirmPrimitiveNodeRoundtrip(
- const std::shared_ptr<const LogicalAnnotation>& annotation, Type::type physical_type,
+ const std::shared_ptr<const LogicalType>& logical_type, Type::type physical_type,
int physical_length) {
std::shared_ptr<Node> original = PrimitiveNode::Make(
- "something", Repetition::REQUIRED, annotation, physical_type, physical_length);
+ "something", Repetition::REQUIRED, logical_type, physical_type, physical_length);
format::SchemaElement intermediary;
original->ToParquet(&intermediary);
std::unique_ptr<Node> recovered = PrimitiveNode::FromParquet(&intermediary, 1);
ASSERT_TRUE(original->Equals(recovered.get()))
<< "Recovered primitive node unexpectedly not equivalent to original primitive "
- "node constructed with logical annotation "
- << annotation->ToString();
+ "node constructed with logical type "
+ << logical_type->ToString();
return;
}
static void ConfirmGroupNodeRoundtrip(
- std::string name, const std::shared_ptr<const LogicalAnnotation>& annotation) {
+ std::string name, const std::shared_ptr<const LogicalType>& logical_type) {
NodeVector node_vector;
std::shared_ptr<Node> original =
- GroupNode::Make(name, Repetition::REQUIRED, node_vector, annotation);
+ GroupNode::Make(name, Repetition::REQUIRED, node_vector, logical_type);
std::vector<format::SchemaElement> elements;
ToParquet(reinterpret_cast<GroupNode*>(original.get()), &elements);
std::unique_ptr<Node> recovered =
GroupNode::FromParquet(&(elements[0]), 1, node_vector);
ASSERT_TRUE(original->Equals(recovered.get()))
<< "Recovered group node unexpectedly not equivalent to original group node "
- "constructed with logical annotation "
- << annotation->ToString();
+ "constructed with logical type "
+ << logical_type->ToString();
return;
}
-TEST(TestLogicalAnnotationSerialization, Roundtrips) {
+TEST(TestLogicalTypeSerialization, Roundtrips) {
// Confirm that Thrift serialization-deserialization of nodes with logical
- // annotations produces equivalent reconstituted nodes
+ // types produces equivalent reconstituted nodes
// Primitive nodes ...
struct AnnotatedPrimitiveNodeFactoryArguments {
- std::shared_ptr<const LogicalAnnotation> annotation;
+ std::shared_ptr<const LogicalType> logical_type;
Type::type physical_type;
int physical_length;
};
std::vector<AnnotatedPrimitiveNodeFactoryArguments> cases = {
- {LogicalAnnotation::String(), Type::BYTE_ARRAY, -1},
- {LogicalAnnotation::Enum(), Type::BYTE_ARRAY, -1},
- {LogicalAnnotation::Decimal(16, 6), Type::INT64, -1},
- {LogicalAnnotation::Date(), Type::INT32, -1},
- {LogicalAnnotation::Time(true, LogicalAnnotation::TimeUnit::MILLIS), Type::INT32,
- -1},
- {LogicalAnnotation::Time(true, LogicalAnnotation::TimeUnit::MICROS), Type::INT64,
- -1},
- {LogicalAnnotation::Time(true, LogicalAnnotation::TimeUnit::NANOS), Type::INT64,
- -1},
- {LogicalAnnotation::Time(false, LogicalAnnotation::TimeUnit::MILLIS), Type::INT32,
- -1},
- {LogicalAnnotation::Time(false, LogicalAnnotation::TimeUnit::MICROS), Type::INT64,
- -1},
- {LogicalAnnotation::Time(false, LogicalAnnotation::TimeUnit::NANOS), Type::INT64,
- -1},
- {LogicalAnnotation::Timestamp(true, LogicalAnnotation::TimeUnit::MILLIS),
- Type::INT64, -1},
- {LogicalAnnotation::Timestamp(true, LogicalAnnotation::TimeUnit::MICROS),
- Type::INT64, -1},
- {LogicalAnnotation::Timestamp(true, LogicalAnnotation::TimeUnit::NANOS),
- Type::INT64, -1},
- {LogicalAnnotation::Timestamp(false, LogicalAnnotation::TimeUnit::MILLIS),
- Type::INT64, -1},
- {LogicalAnnotation::Timestamp(false, LogicalAnnotation::TimeUnit::MICROS),
- Type::INT64, -1},
- {LogicalAnnotation::Timestamp(false, LogicalAnnotation::TimeUnit::NANOS),
- Type::INT64, -1},
- {LogicalAnnotation::Interval(), Type::FIXED_LEN_BYTE_ARRAY, 12},
- {LogicalAnnotation::Int(8, false), Type::INT32, -1},
- {LogicalAnnotation::Int(16, false), Type::INT32, -1},
- {LogicalAnnotation::Int(32, false), Type::INT32, -1},
- {LogicalAnnotation::Int(64, false), Type::INT64, -1},
- {LogicalAnnotation::Int(8, true), Type::INT32, -1},
- {LogicalAnnotation::Int(16, true), Type::INT32, -1},
- {LogicalAnnotation::Int(32, true), Type::INT32, -1},
- {LogicalAnnotation::Int(64, true), Type::INT64, -1},
- {LogicalAnnotation::Null(), Type::BOOLEAN, -1},
- {LogicalAnnotation::JSON(), Type::BYTE_ARRAY, -1},
- {LogicalAnnotation::BSON(), Type::BYTE_ARRAY, -1},
- {LogicalAnnotation::UUID(), Type::FIXED_LEN_BYTE_ARRAY, 16},
- {LogicalAnnotation::None(), Type::BOOLEAN, -1}};
+ {LogicalType::String(), Type::BYTE_ARRAY, -1},
+ {LogicalType::Enum(), Type::BYTE_ARRAY, -1},
+ {LogicalType::Decimal(16, 6), Type::INT64, -1},
+ {LogicalType::Date(), Type::INT32, -1},
+ {LogicalType::Time(true, LogicalType::TimeUnit::MILLIS), Type::INT32, -1},
+ {LogicalType::Time(true, LogicalType::TimeUnit::MICROS), Type::INT64, -1},
+ {LogicalType::Time(true, LogicalType::TimeUnit::NANOS), Type::INT64, -1},
+ {LogicalType::Time(false, LogicalType::TimeUnit::MILLIS), Type::INT32, -1},
+ {LogicalType::Time(false, LogicalType::TimeUnit::MICROS), Type::INT64, -1},
+ {LogicalType::Time(false, LogicalType::TimeUnit::NANOS), Type::INT64, -1},
+ {LogicalType::Timestamp(true, LogicalType::TimeUnit::MILLIS), Type::INT64, -1},
+ {LogicalType::Timestamp(true, LogicalType::TimeUnit::MICROS), Type::INT64, -1},
+ {LogicalType::Timestamp(true, LogicalType::TimeUnit::NANOS), Type::INT64, -1},
+ {LogicalType::Timestamp(false, LogicalType::TimeUnit::MILLIS), Type::INT64, -1},
+ {LogicalType::Timestamp(false, LogicalType::TimeUnit::MICROS), Type::INT64, -1},
+ {LogicalType::Timestamp(false, LogicalType::TimeUnit::NANOS), Type::INT64, -1},
+ {LogicalType::Interval(), Type::FIXED_LEN_BYTE_ARRAY, 12},
+ {LogicalType::Int(8, false), Type::INT32, -1},
+ {LogicalType::Int(16, false), Type::INT32, -1},
+ {LogicalType::Int(32, false), Type::INT32, -1},
+ {LogicalType::Int(64, false), Type::INT64, -1},
+ {LogicalType::Int(8, true), Type::INT32, -1},
+ {LogicalType::Int(16, true), Type::INT32, -1},
+ {LogicalType::Int(32, true), Type::INT32, -1},
+ {LogicalType::Int(64, true), Type::INT64, -1},
+ {LogicalType::Null(), Type::BOOLEAN, -1},
+ {LogicalType::JSON(), Type::BYTE_ARRAY, -1},
+ {LogicalType::BSON(), Type::BYTE_ARRAY, -1},
+ {LogicalType::UUID(), Type::FIXED_LEN_BYTE_ARRAY, 16},
+ {LogicalType::None(), Type::BOOLEAN, -1}};
for (const AnnotatedPrimitiveNodeFactoryArguments& c : cases) {
- ConfirmPrimitiveNodeRoundtrip(c.annotation, c.physical_type, c.physical_length);
+ ConfirmPrimitiveNodeRoundtrip(c.logical_type, c.physical_type, c.physical_length);
}
// Group nodes ...
- ConfirmGroupNodeRoundtrip("map", LogicalAnnotation::Map());
- ConfirmGroupNodeRoundtrip("list", LogicalAnnotation::List());
+ ConfirmGroupNodeRoundtrip("map", LogicalType::Map());
+ ConfirmGroupNodeRoundtrip("list", LogicalType::List());
}
} // namespace schema
diff --git a/cpp/src/parquet/schema.cc b/cpp/src/parquet/schema.cc
index 6e5d62f..cd2303a 100644
--- a/cpp/src/parquet/schema.cc
+++ b/cpp/src/parquet/schema.cc
@@ -95,8 +95,8 @@ const std::shared_ptr<ColumnPath> Node::path() const {
bool Node::EqualsInternal(const Node* other) const {
return type_ == other->type_ && name_ == other->name_ &&
- repetition_ == other->repetition_ && logical_type_ == other->logical_type_ &&
- logical_annotation_->Equals(*(other->logical_annotation()));
+ repetition_ == other->repetition_ && converted_type_ == other->converted_type_ &&
+ logical_type_->Equals(*(other->logical_type()));
}
void Node::SetParent(const Node* parent) { parent_ = parent; }
@@ -105,9 +105,9 @@ void Node::SetParent(const Node* parent) { parent_ = parent; }
// Primitive node
PrimitiveNode::PrimitiveNode(const std::string& name, Repetition::type repetition,
- Type::type type, LogicalType::type logical_type, int length,
- int precision, int scale, int id)
- : Node(Node::PRIMITIVE, name, repetition, logical_type, id),
+ Type::type type, ConvertedType::type converted_type,
+ int length, int precision, int scale, int id)
+ : Node(Node::PRIMITIVE, name, repetition, converted_type, id),
physical_type_(type),
type_length_(length) {
std::stringstream ss;
@@ -118,20 +118,20 @@ PrimitiveNode::PrimitiveNode(const std::string& name, Repetition::type repetitio
// Check if the physical and logical types match
// Mapping referred from Apache parquet-mr as on 2016-02-22
- switch (logical_type) {
- case LogicalType::NONE:
+ switch (converted_type) {
+ case ConvertedType::NONE:
// Logical type not set
break;
- case LogicalType::UTF8:
- case LogicalType::JSON:
- case LogicalType::BSON:
+ case ConvertedType::UTF8:
+ case ConvertedType::JSON:
+ case ConvertedType::BSON:
if (type != Type::BYTE_ARRAY) {
- ss << LogicalTypeToString(logical_type);
+ ss << ConvertedTypeToString(converted_type);
ss << " can only annotate BYTE_ARRAY fields";
throw ParquetException(ss.str());
}
break;
- case LogicalType::DECIMAL:
+ case ConvertedType::DECIMAL:
if ((type != Type::INT32) && (type != Type::INT64) && (type != Type::BYTE_ARRAY) &&
(type != Type::FIXED_LEN_BYTE_ARRAY)) {
ss << "DECIMAL can only annotate INT32, INT64, BYTE_ARRAY, and FIXED";
@@ -156,56 +156,55 @@ PrimitiveNode::PrimitiveNode(const std::string& name, Repetition::type repetitio
decimal_metadata_.precision = precision;
decimal_metadata_.scale = scale;
break;
- case LogicalType::DATE:
- case LogicalType::TIME_MILLIS:
- case LogicalType::UINT_8:
- case LogicalType::UINT_16:
- case LogicalType::UINT_32:
- case LogicalType::INT_8:
- case LogicalType::INT_16:
- case LogicalType::INT_32:
+ case ConvertedType::DATE:
+ case ConvertedType::TIME_MILLIS:
+ case ConvertedType::UINT_8:
+ case ConvertedType::UINT_16:
+ case ConvertedType::UINT_32:
+ case ConvertedType::INT_8:
+ case ConvertedType::INT_16:
+ case ConvertedType::INT_32:
if (type != Type::INT32) {
- ss << LogicalTypeToString(logical_type);
+ ss << ConvertedTypeToString(converted_type);
ss << " can only annotate INT32";
throw ParquetException(ss.str());
}
break;
- case LogicalType::TIME_MICROS:
- case LogicalType::TIMESTAMP_MILLIS:
- case LogicalType::TIMESTAMP_MICROS:
- case LogicalType::UINT_64:
- case LogicalType::INT_64:
+ case ConvertedType::TIME_MICROS:
+ case ConvertedType::TIMESTAMP_MILLIS:
+ case ConvertedType::TIMESTAMP_MICROS:
+ case ConvertedType::UINT_64:
+ case ConvertedType::INT_64:
if (type != Type::INT64) {
- ss << LogicalTypeToString(logical_type);
+ ss << ConvertedTypeToString(converted_type);
ss << " can only annotate INT64";
throw ParquetException(ss.str());
}
break;
- case LogicalType::INTERVAL:
+ case ConvertedType::INTERVAL:
if ((type != Type::FIXED_LEN_BYTE_ARRAY) || (length != 12)) {
ss << "INTERVAL can only annotate FIXED_LEN_BYTE_ARRAY(12)";
throw ParquetException(ss.str());
}
break;
- case LogicalType::ENUM:
+ case ConvertedType::ENUM:
if (type != Type::BYTE_ARRAY) {
ss << "ENUM can only annotate BYTE_ARRAY fields";
throw ParquetException(ss.str());
}
break;
- case LogicalType::NA:
+ case ConvertedType::NA:
// NA can annotate any type
break;
default:
- ss << LogicalTypeToString(logical_type);
+ ss << ConvertedTypeToString(converted_type);
ss << " can not be applied to a primitive type";
throw ParquetException(ss.str());
}
- // For forward compatibility, create an equivalent logical annotation
- logical_annotation_ =
- LogicalAnnotation::FromConvertedType(logical_type_, decimal_metadata_);
- DCHECK(logical_annotation_ && !logical_annotation_->is_nested() &&
- logical_annotation_->is_compatible(logical_type_, decimal_metadata_));
+ // For forward compatibility, create an equivalent logical type
+ logical_type_ = LogicalType::FromConvertedType(converted_type_, decimal_metadata_);
+ DCHECK(logical_type_ && !logical_type_->is_nested() &&
+ logical_type_->is_compatible(converted_type_, decimal_metadata_));
if (type == Type::FIXED_LEN_BYTE_ARRAY) {
if (length <= 0) {
@@ -217,38 +216,38 @@ PrimitiveNode::PrimitiveNode(const std::string& name, Repetition::type repetitio
}
PrimitiveNode::PrimitiveNode(const std::string& name, Repetition::type repetition,
- std::shared_ptr<const LogicalAnnotation> logical_annotation,
+ std::shared_ptr<const LogicalType> logical_type,
Type::type physical_type, int physical_length, int id)
- : Node(Node::PRIMITIVE, name, repetition, logical_annotation, id),
+ : Node(Node::PRIMITIVE, name, repetition, logical_type, id),
physical_type_(physical_type),
type_length_(physical_length) {
std::stringstream error;
- if (logical_annotation_) {
- // Check for annotation type <=> node type consistency
- if (!logical_annotation_->is_nested()) {
- // Check for annotation type <=> physical type consistency
- if (logical_annotation_->is_applicable(physical_type, physical_length)) {
+ if (logical_type_) {
+ // Check for logical type <=> node type consistency
+ if (!logical_type_->is_nested()) {
+ // Check for logical type <=> physical type consistency
+ if (logical_type_->is_applicable(physical_type, physical_length)) {
// For backward compatibility, assign equivalent legacy
// converted type (if possible)
- logical_type_ = logical_annotation_->ToConvertedType(&decimal_metadata_);
+ converted_type_ = logical_type_->ToConvertedType(&decimal_metadata_);
} else {
- error << logical_annotation_->ToString();
+ error << logical_type_->ToString();
error << " can not be applied to primitive type ";
error << TypeToString(physical_type);
throw ParquetException(error.str());
}
} else {
- error << "Nested annotation type ";
- error << logical_annotation_->ToString();
+ error << "Nested logical type ";
+ error << logical_type_->ToString();
error << " can not be applied to non-group node";
throw ParquetException(error.str());
}
} else {
- logical_annotation_ = NoAnnotation::Make();
- logical_type_ = logical_annotation_->ToConvertedType(&decimal_metadata_);
+ logical_type_ = NoLogicalType::Make();
+ converted_type_ = logical_type_->ToConvertedType(&decimal_metadata_);
}
- DCHECK(logical_annotation_ && !logical_annotation_->is_nested() &&
- logical_annotation_->is_compatible(logical_type_, decimal_metadata_));
+ DCHECK(logical_type_ && !logical_type_->is_nested() &&
+ logical_type_->is_compatible(converted_type_, decimal_metadata_));
if (physical_type == Type::FIXED_LEN_BYTE_ARRAY) {
if (physical_length <= 0) {
@@ -263,7 +262,7 @@ bool PrimitiveNode::EqualsInternal(const PrimitiveNode* other) const {
if (physical_type_ != other->physical_type_) {
return false;
}
- if (logical_type_ == LogicalType::DECIMAL) {
+ if (converted_type_ == ConvertedType::DECIMAL) {
is_equal &= (decimal_metadata_.precision == other->decimal_metadata_.precision) &&
(decimal_metadata_.scale == other->decimal_metadata_.scale);
}
@@ -290,13 +289,12 @@ void PrimitiveNode::VisitConst(Node::ConstVisitor* visitor) const {
// Group node
GroupNode::GroupNode(const std::string& name, Repetition::type repetition,
- const NodeVector& fields, LogicalType::type logical_type, int id)
- : Node(Node::GROUP, name, repetition, logical_type, id), fields_(fields) {
- // For forward compatibility, create an equivalent logical annotation
- logical_annotation_ = LogicalAnnotation::FromConvertedType(logical_type_);
- DCHECK(logical_annotation_ &&
- (logical_annotation_->is_nested() || logical_annotation_->is_none()) &&
- logical_annotation_->is_compatible(logical_type_));
+ const NodeVector& fields, ConvertedType::type converted_type, int id)
+ : Node(Node::GROUP, name, repetition, converted_type, id), fields_(fields) {
+ // For forward compatibility, create an equivalent logical type
+ logical_type_ = LogicalType::FromConvertedType(converted_type_);
+ DCHECK(logical_type_ && (logical_type_->is_nested() || logical_type_->is_none()) &&
+ logical_type_->is_compatible(converted_type_));
field_name_to_idx_.clear();
auto field_idx = 0;
@@ -308,27 +306,26 @@ GroupNode::GroupNode(const std::string& name, Repetition::type repetition,
GroupNode::GroupNode(const std::string& name, Repetition::type repetition,
const NodeVector& fields,
- std::shared_ptr<const LogicalAnnotation> logical_annotation, int id)
- : Node(Node::GROUP, name, repetition, logical_annotation, id), fields_(fields) {
- if (logical_annotation_) {
- // Check for annotation type <=> node type consistency
- if (logical_annotation_->is_nested()) {
+ std::shared_ptr<const LogicalType> logical_type, int id)
+ : Node(Node::GROUP, name, repetition, logical_type, id), fields_(fields) {
+ if (logical_type_) {
+ // Check for logical type <=> node type consistency
+ if (logical_type_->is_nested()) {
// For backward compatibility, assign equivalent legacy converted type (if possible)
- logical_type_ = logical_annotation_->ToConvertedType(nullptr);
+ converted_type_ = logical_type_->ToConvertedType(nullptr);
} else {
std::stringstream error;
- error << "Annotation type ";
- error << logical_annotation_->ToString();
+ error << "Logical type ";
+ error << logical_type_->ToString();
error << " can not be applied to group node";
throw ParquetException(error.str());
}
} else {
- logical_annotation_ = NoAnnotation::Make();
- logical_type_ = logical_annotation_->ToConvertedType(nullptr);
+ logical_type_ = NoLogicalType::Make();
+ converted_type_ = logical_type_->ToConvertedType(nullptr);
}
- DCHECK(logical_annotation_ &&
- (logical_annotation_->is_nested() || logical_annotation_->is_none()) &&
- logical_annotation_->is_compatible(logical_type_));
+ DCHECK(logical_type_ && (logical_type_->is_nested() || logical_type_->is_none()) &&
+ logical_type_->is_compatible(converted_type_));
field_name_to_idx_.clear();
auto field_idx = 0;
@@ -397,12 +394,12 @@ std::unique_ptr<Node> GroupNode::FromParquet(const void* opaque_element, int nod
// updated writer with logical type present
group_node = std::unique_ptr<GroupNode>(
new GroupNode(element->name, FromThrift(element->repetition_type), fields,
- LogicalAnnotation::FromThrift(element->logicalType), node_id));
+ LogicalType::FromThrift(element->logicalType), node_id));
} else {
group_node = std::unique_ptr<GroupNode>(new GroupNode(
element->name, FromThrift(element->repetition_type), fields,
(element->__isset.converted_type ? FromThrift(element->converted_type)
- : LogicalType::NONE),
+ : ConvertedType::NONE),
node_id));
}
@@ -471,20 +468,20 @@ std::unique_ptr<Node> PrimitiveNode::FromParquet(const void* opaque_element,
// updated writer with logical type present
primitive_node = std::unique_ptr<PrimitiveNode>(new PrimitiveNode(
element->name, SafeLoader<Repetition>::Load(&(element->repetition_type)),
- LogicalAnnotation::FromThrift(element->logicalType),
+ LogicalType::FromThrift(element->logicalType),
SafeLoader<Type>::Load(&(element->type)), element->type_length, node_id));
} else if (element->__isset.converted_type) {
// legacy writer with logical type present
primitive_node = std::unique_ptr<PrimitiveNode>(new PrimitiveNode(
element->name, SafeLoader<Repetition>::Load(&(element->repetition_type)),
SafeLoader<Type>::Load(&(element->type)),
- SafeLoader<LogicalType>::Load(&(element->converted_type)), element->type_length,
+ SafeLoader<ConvertedType>::Load(&(element->converted_type)), element->type_length,
element->precision, element->scale, node_id));
} else {
// logical type not present
primitive_node = std::unique_ptr<PrimitiveNode>(new PrimitiveNode(
element->name, SafeLoader<Repetition>::Load(&(element->repetition_type)),
- NoAnnotation::Make(), SafeLoader<Type>::Load(&(element->type)),
+ NoLogicalType::Make(), SafeLoader<Type>::Load(&(element->type)),
element->type_length, node_id));
}
@@ -497,11 +494,11 @@ void GroupNode::ToParquet(void* opaque_element) const {
element->__set_name(name_);
element->__set_num_children(field_count());
element->__set_repetition_type(ToThrift(repetition_));
- if (logical_type_ != LogicalType::NONE) {
- element->__set_converted_type(ToThrift(logical_type_));
+ if (converted_type_ != ConvertedType::NONE) {
+ element->__set_converted_type(ToThrift(converted_type_));
}
- if (logical_annotation_ && logical_annotation_->is_serialized()) {
- element->__set_logicalType(logical_annotation_->ToThrift());
+ if (logical_type_ && logical_type_->is_serialized()) {
+ element->__set_logicalType(logical_type_->ToThrift());
}
return;
}
@@ -510,14 +507,14 @@ void PrimitiveNode::ToParquet(void* opaque_element) const {
format::SchemaElement* element = static_cast<format::SchemaElement*>(opaque_element);
element->__set_name(name_);
element->__set_repetition_type(ToThrift(repetition_));
- if (logical_type_ != LogicalType::NONE) {
- element->__set_converted_type(ToThrift(logical_type_));
+ if (converted_type_ != ConvertedType::NONE) {
+ element->__set_converted_type(ToThrift(converted_type_));
}
- if (logical_annotation_ && logical_annotation_->is_serialized() &&
+ if (logical_type_ && logical_type_->is_serialized() &&
// TODO(tpboudreau): remove the following conjunct to enable serialization
// of IntervalTypes after parquet.thrift recognizes them
- !logical_annotation_->is_interval()) {
- element->__set_logicalType(logical_annotation_->ToThrift());
+ !logical_type_->is_interval()) {
+ element->__set_logicalType(logical_type_->ToThrift());
}
element->__set_type(ToThrift(physical_type_));
if (physical_type_ == Type::FIXED_LEN_BYTE_ARRAY) {
@@ -697,16 +694,17 @@ static void PrintType(const PrimitiveNode* node, std::ostream& stream) {
}
}
-static void PrintLogicalType(const PrimitiveNode* node, std::ostream& stream) {
- auto lt = node->logical_type();
- auto la = node->logical_annotation();
+static void PrintConvertedType(const PrimitiveNode* node, std::ostream& stream) {
+ auto lt = node->converted_type();
+ auto la = node->logical_type();
if (la && la->is_valid() && !la->is_none()) {
stream << " (" << la->ToString() << ")";
- } else if (lt == LogicalType::DECIMAL) {
- stream << " (" << LogicalTypeToString(lt) << "(" << node->decimal_metadata().precision
- << "," << node->decimal_metadata().scale << "))";
- } else if (lt != LogicalType::NONE) {
- stream << " (" << LogicalTypeToString(lt) << ")";
+ } else if (lt == ConvertedType::DECIMAL) {
+ stream << " (" << ConvertedTypeToString(lt) << "("
+ << node->decimal_metadata().precision << "," << node->decimal_metadata().scale
+ << "))";
+ } else if (lt != ConvertedType::NONE) {
+ stream << " (" << ConvertedTypeToString(lt) << ")";
}
}
@@ -715,7 +713,7 @@ void SchemaPrinter::Visit(const PrimitiveNode* node) {
stream_ << " ";
PrintType(node, stream_);
stream_ << " " << node->name();
- PrintLogicalType(node, stream_);
+ PrintConvertedType(node, stream_);
stream_ << ";" << std::endl;
}
@@ -725,12 +723,12 @@ void SchemaPrinter::Visit(const GroupNode* node) {
} else {
PrintRepLevel(node->repetition(), stream_);
stream_ << " group " << node->name();
- auto lt = node->logical_type();
- auto la = node->logical_annotation();
+ auto lt = node->converted_type();
+ auto la = node->logical_type();
if (la && la->is_valid() && !la->is_none()) {
stream_ << " (" << la->ToString() << ")";
- } else if (lt != LogicalType::NONE) {
- stream_ << " (" << LogicalTypeToString(lt) << ")";
+ } else if (lt != ConvertedType::NONE) {
+ stream_ << " (" << ConvertedTypeToString(lt) << ")";
}
stream_ << " {" << std::endl;
}
@@ -924,8 +922,9 @@ std::string ColumnDescriptor::ToString() const {
<< " name: " << name() << "," << std::endl
<< " path: " << path()->ToDotString() << "," << std::endl
<< " physical_type: " << TypeToString(physical_type()) << "," << std::endl
- << " logical_type: " << LogicalTypeToString(logical_type()) << "," << std::endl
- << " logical_annotation: " << logical_annotation()->ToString() << "," << std::endl
+ << " converted_type: " << ConvertedTypeToString(converted_type()) << ","
+ << std::endl
+ << " logical_type: " << logical_type()->ToString() << "," << std::endl
<< " max_definition_level: " << max_definition_level() << "," << std::endl
<< " max_repetition_level: " << max_repetition_level() << "," << std::endl;
@@ -933,7 +932,7 @@ std::string ColumnDescriptor::ToString() const {
ss << " length: " << type_length() << "," << std::endl;
}
- if (logical_type() == parquet::LogicalType::DECIMAL) {
+ if (converted_type() == parquet::ConvertedType::DECIMAL) {
ss << " precision: " << type_precision() << "," << std::endl
<< " scale: " << type_scale() << "," << std::endl;
}
diff --git a/cpp/src/parquet/schema.h b/cpp/src/parquet/schema.h
index e35d659..740edbc 100644
--- a/cpp/src/parquet/schema.h
+++ b/cpp/src/parquet/schema.h
@@ -103,20 +103,20 @@ class PARQUET_EXPORT Node {
enum type { PRIMITIVE, GROUP };
Node(Node::type type, const std::string& name, Repetition::type repetition,
- LogicalType::type logical_type = LogicalType::NONE, int id = -1)
+ ConvertedType::type converted_type = ConvertedType::NONE, int id = -1)
: type_(type),
name_(name),
repetition_(repetition),
- logical_type_(logical_type),
+ converted_type_(converted_type),
id_(id),
parent_(NULLPTR) {}
Node(Node::type type, const std::string& name, Repetition::type repetition,
- std::shared_ptr<const LogicalAnnotation> logical_annotation, int id = -1)
+ std::shared_ptr<const LogicalType> logical_type, int id = -1)
: type_(type),
name_(name),
repetition_(repetition),
- logical_annotation_(logical_annotation),
+ logical_type_(logical_type),
id_(id),
parent_(NULLPTR) {}
@@ -140,11 +140,9 @@ class PARQUET_EXPORT Node {
Repetition::type repetition() const { return repetition_; }
- LogicalType::type logical_type() const { return logical_type_; }
+ ConvertedType::type converted_type() const { return converted_type_; }
- const std::shared_ptr<const LogicalAnnotation>& logical_annotation() const {
- return logical_annotation_;
- }
+ const std::shared_ptr<const LogicalType>& logical_type() const { return logical_type_; }
int id() const { return id_; }
@@ -177,8 +175,8 @@ class PARQUET_EXPORT Node {
Node::type type_;
std::string name_;
Repetition::type repetition_;
- LogicalType::type logical_type_;
- std::shared_ptr<const LogicalAnnotation> logical_annotation_;
+ ConvertedType::type converted_type_;
+ std::shared_ptr<const LogicalType> logical_type_;
int id_;
// Nodes should not be shared, they have a single parent.
const Node* parent_;
@@ -204,16 +202,16 @@ class PARQUET_EXPORT PrimitiveNode : public Node {
static inline NodePtr Make(const std::string& name, Repetition::type repetition,
Type::type type,
- LogicalType::type logical_type = LogicalType::NONE,
+ ConvertedType::type converted_type = ConvertedType::NONE,
int length = -1, int precision = -1, int scale = -1) {
- return NodePtr(new PrimitiveNode(name, repetition, type, logical_type, length,
+ return NodePtr(new PrimitiveNode(name, repetition, type, converted_type, length,
precision, scale));
}
static inline NodePtr Make(const std::string& name, Repetition::type repetition,
- std::shared_ptr<const LogicalAnnotation> logical_annotation,
+ std::shared_ptr<const LogicalType> logical_type,
Type::type primitive_type, int primitive_length = -1) {
- return NodePtr(new PrimitiveNode(name, repetition, logical_annotation, primitive_type,
+ return NodePtr(new PrimitiveNode(name, repetition, logical_type, primitive_type,
primitive_length));
}
@@ -235,11 +233,11 @@ class PARQUET_EXPORT PrimitiveNode : public Node {
private:
PrimitiveNode(const std::string& name, Repetition::type repetition, Type::type type,
- LogicalType::type logical_type = LogicalType::NONE, int length = -1,
+ ConvertedType::type converted_type = ConvertedType::NONE, int length = -1,
int precision = -1, int scale = -1, int id = -1);
PrimitiveNode(const std::string& name, Repetition::type repetition,
- std::shared_ptr<const LogicalAnnotation> logical_annotation,
+ std::shared_ptr<const LogicalType> logical_type,
Type::type primitive_type, int primitive_length = -1, int id = -1);
Type::type physical_type_;
@@ -265,14 +263,14 @@ class PARQUET_EXPORT GroupNode : public Node {
static inline NodePtr Make(const std::string& name, Repetition::type repetition,
const NodeVector& fields,
- LogicalType::type logical_type = LogicalType::NONE) {
- return NodePtr(new GroupNode(name, repetition, fields, logical_type));
+ ConvertedType::type converted_type = ConvertedType::NONE) {
+ return NodePtr(new GroupNode(name, repetition, fields, converted_type));
}
- static inline NodePtr Make(
- const std::string& name, Repetition::type repetition, const NodeVector& fields,
- std::shared_ptr<const LogicalAnnotation> logical_annotation) {
- return NodePtr(new GroupNode(name, repetition, fields, logical_annotation));
+ static inline NodePtr Make(const std::string& name, Repetition::type repetition,
+ const NodeVector& fields,
+ std::shared_ptr<const LogicalType> logical_type) {
+ return NodePtr(new GroupNode(name, repetition, fields, logical_type));
}
bool Equals(const Node* other) const override;
@@ -293,12 +291,12 @@ class PARQUET_EXPORT GroupNode : public Node {
private:
GroupNode(const std::string& name, Repetition::type repetition,
- const NodeVector& fields, LogicalType::type logical_type = LogicalType::NONE,
- int id = -1);
+ const NodeVector& fields,
+ ConvertedType::type converted_type = ConvertedType::NONE, int id = -1);
GroupNode(const std::string& name, Repetition::type repetition,
- const NodeVector& fields,
- std::shared_ptr<const LogicalAnnotation> logical_annotation, int id = -1);
+ const NodeVector& fields, std::shared_ptr<const LogicalType> logical_type,
+ int id = -1);
NodeVector fields_;
bool EqualsInternal(const GroupNode* other) const;
@@ -353,18 +351,18 @@ class PARQUET_EXPORT ColumnDescriptor {
Type::type physical_type() const { return primitive_node_->physical_type(); }
- LogicalType::type logical_type() const { return primitive_node_->logical_type(); }
+ ConvertedType::type converted_type() const { return primitive_node_->converted_type(); }
- const std::shared_ptr<const LogicalAnnotation>& logical_annotation() const {
- return primitive_node_->logical_annotation();
+ const std::shared_ptr<const LogicalType>& logical_type() const {
+ return primitive_node_->logical_type();
}
ColumnOrder column_order() const { return primitive_node_->column_order(); }
SortOrder::type sort_order() const {
- auto la = logical_annotation();
+ auto la = logical_type();
auto pt = physical_type();
- return la ? GetSortOrder(la, pt) : GetSortOrder(logical_type(), pt);
+ return la ? GetSortOrder(la, pt) : GetSortOrder(converted_type(), pt);
}
const std::string& name() const { return primitive_node_->name(); }
diff --git a/cpp/src/parquet/statistics-test.cc b/cpp/src/parquet/statistics-test.cc
index 2a82cb7..fa1caa9 100644
--- a/cpp/src/parquet/statistics-test.cc
+++ b/cpp/src/parquet/statistics-test.cc
@@ -210,7 +210,7 @@ TEST(Comparison, UnsignedInt64) {
uint64_t aaa = 1, bbb = -1;
NodePtr node = PrimitiveNode::Make("UnsignedInt64", Repetition::REQUIRED, Type::INT64,
- LogicalType::UINT_64);
+ ConvertedType::UINT_64);
ColumnDescriptor descr(node, 0, 0);
ASSERT_EQ(SortOrder::UNSIGNED, descr.sort_order());
@@ -227,7 +227,7 @@ TEST(Comparison, UnsignedInt32) {
uint32_t aaa = 1, bbb = -1;
NodePtr node = PrimitiveNode::Make("UnsignedInt32", Repetition::REQUIRED, Type::INT32,
- LogicalType::UINT_32);
+ ConvertedType::UINT_32);
ColumnDescriptor descr(node, 0, 0);
ASSERT_EQ(SortOrder::UNSIGNED, descr.sort_order());
@@ -241,7 +241,7 @@ TEST(Comparison, UnsignedInt32) {
TEST(Comparison, UnknownSortOrder) {
NodePtr node =
PrimitiveNode::Make("Unknown", Repetition::REQUIRED, Type::FIXED_LEN_BYTE_ARRAY,
- LogicalType::INTERVAL, 12);
+ ConvertedType::INTERVAL, 12);
ColumnDescriptor descr(node, 0, 0);
ASSERT_THROW(Comparator::Make(&descr), ParquetException);
@@ -535,19 +535,19 @@ TEST(CorruptStatistics, Basics) {
std::vector<schema::NodePtr> fields;
// Test Physical Types
fields.push_back(schema::PrimitiveNode::Make("col1", Repetition::OPTIONAL, Type::INT32,
- LogicalType::NONE));
+ ConvertedType::NONE));
fields.push_back(schema::PrimitiveNode::Make("col2", Repetition::OPTIONAL,
- Type::BYTE_ARRAY, LogicalType::NONE));
+ Type::BYTE_ARRAY, ConvertedType::NONE));
// Test Logical Types
fields.push_back(schema::PrimitiveNode::Make("col3", Repetition::OPTIONAL, Type::INT32,
- LogicalType::DATE));
+ ConvertedType::DATE));
fields.push_back(schema::PrimitiveNode::Make("col4", Repetition::OPTIONAL, Type::INT32,
- LogicalType::UINT_32));
+ ConvertedType::UINT_32));
fields.push_back(schema::PrimitiveNode::Make("col5", Repetition::OPTIONAL,
Type::FIXED_LEN_BYTE_ARRAY,
- LogicalType::INTERVAL, 12));
+ ConvertedType::INTERVAL, 12));
fields.push_back(schema::PrimitiveNode::Make("col6", Repetition::OPTIONAL,
- Type::BYTE_ARRAY, LogicalType::UTF8));
+ Type::BYTE_ARRAY, ConvertedType::UTF8));
node = schema::GroupNode::Make("schema", Repetition::REQUIRED, fields);
schema.Init(node);
@@ -572,19 +572,19 @@ TEST(CorrectStatistics, Basics) {
std::vector<schema::NodePtr> fields;
// Test Physical Types
fields.push_back(schema::PrimitiveNode::Make("col1", Repetition::OPTIONAL, Type::INT32,
- LogicalType::NONE));
+ ConvertedType::NONE));
fields.push_back(schema::PrimitiveNode::Make("col2", Repetition::OPTIONAL,
- Type::BYTE_ARRAY, LogicalType::NONE));
+ Type::BYTE_ARRAY, ConvertedType::NONE));
// Test Logical Types
fields.push_back(schema::PrimitiveNode::Make("col3", Repetition::OPTIONAL, Type::INT32,
- LogicalType::DATE));
+ ConvertedType::DATE));
fields.push_back(schema::PrimitiveNode::Make("col4", Repetition::OPTIONAL, Type::INT32,
- LogicalType::UINT_32));
+ ConvertedType::UINT_32));
fields.push_back(schema::PrimitiveNode::Make("col5", Repetition::OPTIONAL,
Type::FIXED_LEN_BYTE_ARRAY,
- LogicalType::INTERVAL, 12));
+ ConvertedType::INTERVAL, 12));
fields.push_back(schema::PrimitiveNode::Make("col6", Repetition::OPTIONAL,
- Type::BYTE_ARRAY, LogicalType::UTF8));
+ Type::BYTE_ARRAY, ConvertedType::UTF8));
node = schema::GroupNode::Make("schema", Repetition::REQUIRED, fields);
schema.Init(node);
@@ -609,8 +609,8 @@ class TestStatisticsSortOrder : public ::testing::Test {
typedef typename TestType::c_type T;
void AddNodes(std::string name) {
- fields_.push_back(schema::PrimitiveNode::Make(name, Repetition::REQUIRED,
- TestType::type_num, LogicalType::NONE));
+ fields_.push_back(schema::PrimitiveNode::Make(
+ name, Repetition::REQUIRED, TestType::type_num, ConvertedType::NONE));
}
void SetUpSchema() {
@@ -684,10 +684,10 @@ template <>
void TestStatisticsSortOrder<Int32Type>::AddNodes(std::string name) {
// UINT_32 logical type to set Unsigned Statistics
fields_.push_back(schema::PrimitiveNode::Make(name, Repetition::REQUIRED, Type::INT32,
- LogicalType::UINT_32));
+ ConvertedType::UINT_32));
// INT_32 logical type to set Signed Statistics
fields_.push_back(schema::PrimitiveNode::Make(name, Repetition::REQUIRED, Type::INT32,
- LogicalType::INT_32));
+ ConvertedType::INT_32));
}
template <>
@@ -712,10 +712,10 @@ template <>
void TestStatisticsSortOrder<Int64Type>::AddNodes(std::string name) {
// UINT_64 logical type to set Unsigned Statistics
fields_.push_back(schema::PrimitiveNode::Make(name, Repetition::REQUIRED, Type::INT64,
- LogicalType::UINT_64));
+ ConvertedType::UINT_64));
// INT_64 logical type to set Signed Statistics
fields_.push_back(schema::PrimitiveNode::Make(name, Repetition::REQUIRED, Type::INT64,
- LogicalType::INT_64));
+ ConvertedType::INT_64));
}
template <>
@@ -768,7 +768,7 @@ template <>
void TestStatisticsSortOrder<ByteArrayType>::AddNodes(std::string name) {
// UTF8 logical type to set Unsigned Statistics
fields_.push_back(schema::PrimitiveNode::Make(name, Repetition::REQUIRED,
- Type::BYTE_ARRAY, LogicalType::UTF8));
+ Type::BYTE_ARRAY, ConvertedType::UTF8));
}
template <>
@@ -801,7 +801,7 @@ void TestStatisticsSortOrder<FLBAType>::AddNodes(std::string name) {
// FLBA has only Unsigned Statistics
fields_.push_back(schema::PrimitiveNode::Make(name, Repetition::REQUIRED,
Type::FIXED_LEN_BYTE_ARRAY,
- LogicalType::NONE, FLBA_LENGTH));
+ ConvertedType::NONE, FLBA_LENGTH));
}
template <>
@@ -838,8 +838,8 @@ using TestStatisticsSortOrderFLBA = TestStatisticsSortOrder<FLBAType>;
TEST_F(TestStatisticsSortOrderFLBA, UnknownSortOrder) {
this->fields_.push_back(schema::PrimitiveNode::Make(
- "Column 0", Repetition::REQUIRED, Type::FIXED_LEN_BYTE_ARRAY, LogicalType::INTERVAL,
- FLBA_LENGTH));
+ "Column 0", Repetition::REQUIRED, Type::FIXED_LEN_BYTE_ARRAY,
+ ConvertedType::INTERVAL, FLBA_LENGTH));
this->SetUpSchema();
this->WriteParquet();
diff --git a/cpp/src/parquet/test-util.h b/cpp/src/parquet/test-util.h
index c49dcda..9b78fad 100644
--- a/cpp/src/parquet/test-util.h
+++ b/cpp/src/parquet/test-util.h
@@ -620,7 +620,7 @@ class PrimitiveTypedTest : public ::testing::Test {
for (int i = 0; i < num_columns; ++i) {
std::string name = TestColumnName(i);
fields.push_back(schema::PrimitiveNode::Make(name, repetition, TestType::type_num,
- LogicalType::NONE, FLBA_LENGTH));
+ ConvertedType::NONE, FLBA_LENGTH));
}
node_ = schema::GroupNode::Make("schema", Repetition::REQUIRED, fields);
schema_.Init(node_);
diff --git a/cpp/src/parquet/thrift.h b/cpp/src/parquet/thrift.h
index ffefd12..c7b6207 100644
--- a/cpp/src/parquet/thrift.h
+++ b/cpp/src/parquet/thrift.h
@@ -64,9 +64,9 @@ static inline Type::type FromThrift(format::Type::type type) {
return static_cast<Type::type>(type);
}
-static inline LogicalType::type FromThrift(format::ConvertedType::type type) {
+static inline ConvertedType::type FromThrift(format::ConvertedType::type type) {
// item 0 is NONE
- return static_cast<LogicalType::type>(static_cast<int>(type) + 1);
+ return static_cast<ConvertedType::type>(static_cast<int>(type) + 1);
}
static inline Repetition::type FromThrift(format::FieldRepetitionType::type type) {
@@ -85,9 +85,9 @@ static inline format::Type::type ToThrift(Type::type type) {
return static_cast<format::Type::type>(type);
}
-static inline format::ConvertedType::type ToThrift(LogicalType::type type) {
+static inline format::ConvertedType::type ToThrift(ConvertedType::type type) {
// item 0 is NONE
- DCHECK_NE(type, LogicalType::NONE);
+ DCHECK_NE(type, ConvertedType::NONE);
return static_cast<format::ConvertedType::type>(static_cast<int>(type) - 1);
}
diff --git a/cpp/src/parquet/types-test.cc b/cpp/src/parquet/types-test.cc
index 30ce30c..9da5642 100644
--- a/cpp/src/parquet/types-test.cc
+++ b/cpp/src/parquet/types-test.cc
@@ -34,32 +34,33 @@ TEST(TestTypeToString, PhysicalTypes) {
ASSERT_STREQ("FIXED_LEN_BYTE_ARRAY", TypeToString(Type::FIXED_LEN_BYTE_ARRAY).c_str());
}
-TEST(TestLogicalTypeToString, LogicalTypes) {
- ASSERT_STREQ("NONE", LogicalTypeToString(LogicalType::NONE).c_str());
- ASSERT_STREQ("UTF8", LogicalTypeToString(LogicalType::UTF8).c_str());
- ASSERT_STREQ("MAP", LogicalTypeToString(LogicalType::MAP).c_str());
- ASSERT_STREQ("MAP_KEY_VALUE", LogicalTypeToString(LogicalType::MAP_KEY_VALUE).c_str());
- ASSERT_STREQ("LIST", LogicalTypeToString(LogicalType::LIST).c_str());
- ASSERT_STREQ("ENUM", LogicalTypeToString(LogicalType::ENUM).c_str());
- ASSERT_STREQ("DECIMAL", LogicalTypeToString(LogicalType::DECIMAL).c_str());
- ASSERT_STREQ("DATE", LogicalTypeToString(LogicalType::DATE).c_str());
- ASSERT_STREQ("TIME_MILLIS", LogicalTypeToString(LogicalType::TIME_MILLIS).c_str());
- ASSERT_STREQ("TIME_MICROS", LogicalTypeToString(LogicalType::TIME_MICROS).c_str());
+TEST(TestConvertedTypeToString, ConvertedTypes) {
+ ASSERT_STREQ("NONE", ConvertedTypeToString(ConvertedType::NONE).c_str());
+ ASSERT_STREQ("UTF8", ConvertedTypeToString(ConvertedType::UTF8).c_str());
+ ASSERT_STREQ("MAP", ConvertedTypeToString(ConvertedType::MAP).c_str());
+ ASSERT_STREQ("MAP_KEY_VALUE",
+ ConvertedTypeToString(ConvertedType::MAP_KEY_VALUE).c_str());
+ ASSERT_STREQ("LIST", ConvertedTypeToString(ConvertedType::LIST).c_str());
+ ASSERT_STREQ("ENUM", ConvertedTypeToString(ConvertedType::ENUM).c_str());
+ ASSERT_STREQ("DECIMAL", ConvertedTypeToString(ConvertedType::DECIMAL).c_str());
+ ASSERT_STREQ("DATE", ConvertedTypeToString(ConvertedType::DATE).c_str());
+ ASSERT_STREQ("TIME_MILLIS", ConvertedTypeToString(ConvertedType::TIME_MILLIS).c_str());
+ ASSERT_STREQ("TIME_MICROS", ConvertedTypeToString(ConvertedType::TIME_MICROS).c_str());
ASSERT_STREQ("TIMESTAMP_MILLIS",
- LogicalTypeToString(LogicalType::TIMESTAMP_MILLIS).c_str());
+ ConvertedTypeToString(ConvertedType::TIMESTAMP_MILLIS).c_str());
ASSERT_STREQ("TIMESTAMP_MICROS",
- LogicalTypeToString(LogicalType::TIMESTAMP_MICROS).c_str());
- ASSERT_STREQ("UINT_8", LogicalTypeToString(LogicalType::UINT_8).c_str());
- ASSERT_STREQ("UINT_16", LogicalTypeToString(LogicalType::UINT_16).c_str());
- ASSERT_STREQ("UINT_32", LogicalTypeToString(LogicalType::UINT_32).c_str());
- ASSERT_STREQ("UINT_64", LogicalTypeToString(LogicalType::UINT_64).c_str());
- ASSERT_STREQ("INT_8", LogicalTypeToString(LogicalType::INT_8).c_str());
- ASSERT_STREQ("INT_16", LogicalTypeToString(LogicalType::INT_16).c_str());
- ASSERT_STREQ("INT_32", LogicalTypeToString(LogicalType::INT_32).c_str());
- ASSERT_STREQ("INT_64", LogicalTypeToString(LogicalType::INT_64).c_str());
- ASSERT_STREQ("JSON", LogicalTypeToString(LogicalType::JSON).c_str());
- ASSERT_STREQ("BSON", LogicalTypeToString(LogicalType::BSON).c_str());
- ASSERT_STREQ("INTERVAL", LogicalTypeToString(LogicalType::INTERVAL).c_str());
+ ConvertedTypeToString(ConvertedType::TIMESTAMP_MICROS).c_str());
+ ASSERT_STREQ("UINT_8", ConvertedTypeToString(ConvertedType::UINT_8).c_str());
+ ASSERT_STREQ("UINT_16", ConvertedTypeToString(ConvertedType::UINT_16).c_str());
+ ASSERT_STREQ("UINT_32", ConvertedTypeToString(ConvertedType::UINT_32).c_str());
+ ASSERT_STREQ("UINT_64", ConvertedTypeToString(ConvertedType::UINT_64).c_str());
+ ASSERT_STREQ("INT_8", ConvertedTypeToString(ConvertedType::INT_8).c_str());
+ ASSERT_STREQ("INT_16", ConvertedTypeToString(ConvertedType::INT_16).c_str());
+ ASSERT_STREQ("INT_32", ConvertedTypeToString(ConvertedType::INT_32).c_str());
+ ASSERT_STREQ("INT_64", ConvertedTypeToString(ConvertedType::INT_64).c_str());
+ ASSERT_STREQ("JSON", ConvertedTypeToString(ConvertedType::JSON).c_str());
+ ASSERT_STREQ("BSON", ConvertedTypeToString(ConvertedType::BSON).c_str());
+ ASSERT_STREQ("INTERVAL", ConvertedTypeToString(ConvertedType::INTERVAL).c_str());
}
TEST(TestCompressionToString, Compression) {
diff --git a/cpp/src/parquet/types.cc b/cpp/src/parquet/types.cc
index ee81af3..644e28f 100644
--- a/cpp/src/parquet/types.cc
+++ b/cpp/src/parquet/types.cc
@@ -208,55 +208,55 @@ std::string TypeToString(Type::type t) {
}
}
-std::string LogicalTypeToString(LogicalType::type t) {
+std::string ConvertedTypeToString(ConvertedType::type t) {
switch (t) {
- case LogicalType::NONE:
+ case ConvertedType::NONE:
return "NONE";
- case LogicalType::UTF8:
+ case ConvertedType::UTF8:
return "UTF8";
- case LogicalType::MAP:
+ case ConvertedType::MAP:
return "MAP";
- case LogicalType::MAP_KEY_VALUE:
+ case ConvertedType::MAP_KEY_VALUE:
return "MAP_KEY_VALUE";
- case LogicalType::LIST:
+ case ConvertedType::LIST:
return "LIST";
- case LogicalType::ENUM:
+ case ConvertedType::ENUM:
return "ENUM";
- case LogicalType::DECIMAL:
+ case ConvertedType::DECIMAL:
return "DECIMAL";
- case LogicalType::DATE:
+ case ConvertedType::DATE:
return "DATE";
- case LogicalType::TIME_MILLIS:
+ case ConvertedType::TIME_MILLIS:
return "TIME_MILLIS";
- case LogicalType::TIME_MICROS:
+ case ConvertedType::TIME_MICROS:
return "TIME_MICROS";
- case LogicalType::TIMESTAMP_MILLIS:
+ case ConvertedType::TIMESTAMP_MILLIS:
return "TIMESTAMP_MILLIS";
- case LogicalType::TIMESTAMP_MICROS:
+ case ConvertedType::TIMESTAMP_MICROS:
return "TIMESTAMP_MICROS";
- case LogicalType::UINT_8:
+ case ConvertedType::UINT_8:
return "UINT_8";
- case LogicalType::UINT_16:
+ case ConvertedType::UINT_16:
return "UINT_16";
- case LogicalType::UINT_32:
+ case ConvertedType::UINT_32:
return "UINT_32";
- case LogicalType::UINT_64:
+ case ConvertedType::UINT_64:
return "UINT_64";
- case LogicalType::INT_8:
+ case ConvertedType::INT_8:
return "INT_8";
- case LogicalType::INT_16:
+ case ConvertedType::INT_16:
return "INT_16";
- case LogicalType::INT_32:
+ case ConvertedType::INT_32:
return "INT_32";
- case LogicalType::INT_64:
+ case ConvertedType::INT_64:
return "INT_64";
- case LogicalType::JSON:
+ case ConvertedType::JSON:
return "JSON";
- case LogicalType::BSON:
+ case ConvertedType::BSON:
return "BSON";
- case LogicalType::INTERVAL:
+ case ConvertedType::INTERVAL:
return "INTERVAL";
- case LogicalType::UNDEFINED:
+ case ConvertedType::UNDEFINED:
default:
return "UNKNOWN";
}
@@ -307,46 +307,47 @@ SortOrder::type DefaultSortOrder(Type::type primitive) {
}
// Return the SortOrder of the Parquet Types using Logical or Physical Types
-SortOrder::type GetSortOrder(LogicalType::type converted, Type::type primitive) {
- if (converted == LogicalType::NONE) return DefaultSortOrder(primitive);
+SortOrder::type GetSortOrder(ConvertedType::type converted, Type::type primitive) {
+ if (converted == ConvertedType::NONE) return DefaultSortOrder(primitive);
switch (converted) {
- case LogicalType::INT_8:
- case LogicalType::INT_16:
- case LogicalType::INT_32:
- case LogicalType::INT_64:
- case LogicalType::DATE:
- case LogicalType::TIME_MICROS:
- case LogicalType::TIME_MILLIS:
- case LogicalType::TIMESTAMP_MICROS:
- case LogicalType::TIMESTAMP_MILLIS:
+ case ConvertedType::INT_8:
+ case ConvertedType::INT_16:
+ case ConvertedType::INT_32:
+ case ConvertedType::INT_64:
+ case ConvertedType::DATE:
+ case ConvertedType::TIME_MICROS:
+ case ConvertedType::TIME_MILLIS:
+ case ConvertedType::TIMESTAMP_MICROS:
+ case ConvertedType::TIMESTAMP_MILLIS:
return SortOrder::SIGNED;
- case LogicalType::UINT_8:
- case LogicalType::UINT_16:
- case LogicalType::UINT_32:
- case LogicalType::UINT_64:
- case LogicalType::ENUM:
- case LogicalType::UTF8:
- case LogicalType::BSON:
- case LogicalType::JSON:
+ case ConvertedType::UINT_8:
+ case ConvertedType::UINT_16:
+ case ConvertedType::UINT_32:
+ case ConvertedType::UINT_64:
+ case ConvertedType::ENUM:
+ case ConvertedType::UTF8:
+ case ConvertedType::BSON:
+ case ConvertedType::JSON:
return SortOrder::UNSIGNED;
- case LogicalType::DECIMAL:
- case LogicalType::LIST:
- case LogicalType::MAP:
- case LogicalType::MAP_KEY_VALUE:
- case LogicalType::INTERVAL:
- case LogicalType::NONE: // required instead of default
- case LogicalType::NA: // required instead of default
- case LogicalType::UNDEFINED:
+ case ConvertedType::DECIMAL:
+ case ConvertedType::LIST:
+ case ConvertedType::MAP:
+ case ConvertedType::MAP_KEY_VALUE:
+ case ConvertedType::INTERVAL:
+ case ConvertedType::NONE: // required instead of default
+ case ConvertedType::NA: // required instead of default
+ case ConvertedType::UNDEFINED:
return SortOrder::UNKNOWN;
}
return SortOrder::UNKNOWN;
}
-SortOrder::type GetSortOrder(const std::shared_ptr<const LogicalAnnotation>& annotation,
+SortOrder::type GetSortOrder(const std::shared_ptr<const LogicalType>& logical_type,
Type::type primitive) {
SortOrder::type o = SortOrder::UNKNOWN;
- if (annotation && annotation->is_valid()) {
- o = (annotation->is_none() ? DefaultSortOrder(primitive) : annotation->sort_order());
+ if (logical_type && logical_type->is_valid()) {
+ o = (logical_type->is_none() ? DefaultSortOrder(primitive)
+ : logical_type->sort_order());
}
return o;
}
@@ -354,218 +355,199 @@ SortOrder::type GetSortOrder(const std::shared_ptr<const LogicalAnnotation>& ann
ColumnOrder ColumnOrder::undefined_ = ColumnOrder(ColumnOrder::UNDEFINED);
ColumnOrder ColumnOrder::type_defined_ = ColumnOrder(ColumnOrder::TYPE_DEFINED_ORDER);
-// Static methods for LogicalAnnotation class
+// Static methods for LogicalType class
-std::shared_ptr<const LogicalAnnotation> LogicalAnnotation::FromConvertedType(
- const LogicalType::type converted_type,
+std::shared_ptr<const LogicalType> LogicalType::FromConvertedType(
+ const ConvertedType::type converted_type,
const schema::DecimalMetadata converted_decimal_metadata) {
switch (converted_type) {
- case LogicalType::UTF8:
- return StringAnnotation::Make();
- case LogicalType::MAP_KEY_VALUE:
- case LogicalType::MAP:
- return MapAnnotation::Make();
- case LogicalType::LIST:
- return ListAnnotation::Make();
- case LogicalType::ENUM:
- return EnumAnnotation::Make();
- case LogicalType::DECIMAL:
- return DecimalAnnotation::Make(converted_decimal_metadata.precision,
- converted_decimal_metadata.scale);
- case LogicalType::DATE:
- return DateAnnotation::Make();
- case LogicalType::TIME_MILLIS:
- return TimeAnnotation::Make(true, LogicalAnnotation::TimeUnit::MILLIS);
- case LogicalType::TIME_MICROS:
- return TimeAnnotation::Make(true, LogicalAnnotation::TimeUnit::MICROS);
- case LogicalType::TIMESTAMP_MILLIS:
- return TimestampAnnotation::Make(true, LogicalAnnotation::TimeUnit::MILLIS);
- case LogicalType::TIMESTAMP_MICROS:
- return TimestampAnnotation::Make(true, LogicalAnnotation::TimeUnit::MICROS);
- case LogicalType::INTERVAL:
- return IntervalAnnotation::Make();
- case LogicalType::INT_8:
- return IntAnnotation::Make(8, true);
- case LogicalType::INT_16:
- return IntAnnotation::Make(16, true);
- case LogicalType::INT_32:
- return IntAnnotation::Make(32, true);
- case LogicalType::INT_64:
- return IntAnnotation::Make(64, true);
- case LogicalType::UINT_8:
- return IntAnnotation::Make(8, false);
- case LogicalType::UINT_16:
- return IntAnnotation::Make(16, false);
- case LogicalType::UINT_32:
- return IntAnnotation::Make(32, false);
- case LogicalType::UINT_64:
- return IntAnnotation::Make(64, false);
- case LogicalType::JSON:
- return JSONAnnotation::Make();
- case LogicalType::BSON:
- return BSONAnnotation::Make();
- case LogicalType::NONE:
- return NoAnnotation::Make();
- case LogicalType::NA:
- case LogicalType::UNDEFINED:
- return UnknownAnnotation::Make();
+ case ConvertedType::UTF8:
+ return StringLogicalType::Make();
+ case ConvertedType::MAP_KEY_VALUE:
+ case ConvertedType::MAP:
+ return MapLogicalType::Make();
+ case ConvertedType::LIST:
+ return ListLogicalType::Make();
+ case ConvertedType::ENUM:
+ return EnumLogicalType::Make();
+ case ConvertedType::DECIMAL:
+ return DecimalLogicalType::Make(converted_decimal_metadata.precision,
+ converted_decimal_metadata.scale);
+ case ConvertedType::DATE:
+ return DateLogicalType::Make();
+ case ConvertedType::TIME_MILLIS:
+ return TimeLogicalType::Make(true, LogicalType::TimeUnit::MILLIS);
+ case ConvertedType::TIME_MICROS:
+ return TimeLogicalType::Make(true, LogicalType::TimeUnit::MICROS);
+ case ConvertedType::TIMESTAMP_MILLIS:
+ return TimestampLogicalType::Make(true, LogicalType::TimeUnit::MILLIS);
+ case ConvertedType::TIMESTAMP_MICROS:
+ return TimestampLogicalType::Make(true, LogicalType::TimeUnit::MICROS);
+ case ConvertedType::INTERVAL:
+ return IntervalLogicalType::Make();
+ case ConvertedType::INT_8:
+ return IntLogicalType::Make(8, true);
+ case ConvertedType::INT_16:
+ return IntLogicalType::Make(16, true);
+ case ConvertedType::INT_32:
+ return IntLogicalType::Make(32, true);
+ case ConvertedType::INT_64:
+ return IntLogicalType::Make(64, true);
+ case ConvertedType::UINT_8:
+ return IntLogicalType::Make(8, false);
+ case ConvertedType::UINT_16:
+ return IntLogicalType::Make(16, false);
+ case ConvertedType::UINT_32:
+ return IntLogicalType::Make(32, false);
+ case ConvertedType::UINT_64:
+ return IntLogicalType::Make(64, false);
+ case ConvertedType::JSON:
+ return JSONLogicalType::Make();
+ case ConvertedType::BSON:
+ return BSONLogicalType::Make();
+ case ConvertedType::NONE:
+ return NoLogicalType::Make();
+ case ConvertedType::NA:
+ case ConvertedType::UNDEFINED:
+ return UnknownLogicalType::Make();
}
- return UnknownAnnotation::Make();
+ return UnknownLogicalType::Make();
}
-std::shared_ptr<const LogicalAnnotation> LogicalAnnotation::FromThrift(
+std::shared_ptr<const LogicalType> LogicalType::FromThrift(
const format::LogicalType& type) {
if (type.__isset.STRING) {
- return StringAnnotation::Make();
+ return StringLogicalType::Make();
} else if (type.__isset.MAP) {
- return MapAnnotation::Make();
+ return MapLogicalType::Make();
} else if (type.__isset.LIST) {
- return ListAnnotation::Make();
+ return ListLogicalType::Make();
} else if (type.__isset.ENUM) {
- return EnumAnnotation::Make();
+ return EnumLogicalType::Make();
} else if (type.__isset.DECIMAL) {
- return DecimalAnnotation::Make(type.DECIMAL.precision, type.DECIMAL.scale);
+ return DecimalLogicalType::Make(type.DECIMAL.precision, type.DECIMAL.scale);
} else if (type.__isset.DATE) {
- return DateAnnotation::Make();
+ return DateLogicalType::Make();
} else if (type.__isset.TIME) {
- LogicalAnnotation::TimeUnit::unit unit;
+ LogicalType::TimeUnit::unit unit;
if (type.TIME.unit.__isset.MILLIS) {
- unit = LogicalAnnotation::TimeUnit::MILLIS;
+ unit = LogicalType::TimeUnit::MILLIS;
} else if (type.TIME.unit.__isset.MICROS) {
- unit = LogicalAnnotation::TimeUnit::MICROS;
+ unit = LogicalType::TimeUnit::MICROS;
} else if (type.TIME.unit.__isset.NANOS) {
- unit = LogicalAnnotation::TimeUnit::NANOS;
+ unit = LogicalType::TimeUnit::NANOS;
} else {
- unit = LogicalAnnotation::TimeUnit::UNKNOWN;
+ unit = LogicalType::TimeUnit::UNKNOWN;
}
- return TimeAnnotation::Make(type.TIME.isAdjustedToUTC, unit);
+ return TimeLogicalType::Make(type.TIME.isAdjustedToUTC, unit);
} else if (type.__isset.TIMESTAMP) {
- LogicalAnnotation::TimeUnit::unit unit;
+ LogicalType::TimeUnit::unit unit;
if (type.TIMESTAMP.unit.__isset.MILLIS) {
- unit = LogicalAnnotation::TimeUnit::MILLIS;
+ unit = LogicalType::TimeUnit::MILLIS;
} else if (type.TIMESTAMP.unit.__isset.MICROS) {
- unit = LogicalAnnotation::TimeUnit::MICROS;
+ unit = LogicalType::TimeUnit::MICROS;
} else if (type.TIMESTAMP.unit.__isset.NANOS) {
- unit = LogicalAnnotation::TimeUnit::NANOS;
+ unit = LogicalType::TimeUnit::NANOS;
} else {
- unit = LogicalAnnotation::TimeUnit::UNKNOWN;
+ unit = LogicalType::TimeUnit::UNKNOWN;
}
- return TimestampAnnotation::Make(type.TIMESTAMP.isAdjustedToUTC, unit);
+ return TimestampLogicalType::Make(type.TIMESTAMP.isAdjustedToUTC, unit);
// TODO(tpboudreau): activate the commented code after parquet.thrift
// recognizes IntervalType as a LogicalType
//} else if (type.__isset.INTERVAL) {
- // return IntervalAnnotation::Make();
+ // return IntervalLogicalType::Make();
} else if (type.__isset.INTEGER) {
- return IntAnnotation::Make(static_cast<int>(type.INTEGER.bitWidth),
- type.INTEGER.isSigned);
+ return IntLogicalType::Make(static_cast<int>(type.INTEGER.bitWidth),
+ type.INTEGER.isSigned);
} else if (type.__isset.UNKNOWN) {
- return NullAnnotation::Make();
+ return NullLogicalType::Make();
} else if (type.__isset.JSON) {
- return JSONAnnotation::Make();
+ return JSONLogicalType::Make();
} else if (type.__isset.BSON) {
- return BSONAnnotation::Make();
+ return BSONLogicalType::Make();
} else if (type.__isset.UUID) {
- return UUIDAnnotation::Make();
+ return UUIDLogicalType::Make();
} else {
throw ParquetException("Metadata contains Thrift LogicalType that is not recognized");
}
}
-std::shared_ptr<const LogicalAnnotation> LogicalAnnotation::String() {
- return StringAnnotation::Make();
+std::shared_ptr<const LogicalType> LogicalType::String() {
+ return StringLogicalType::Make();
}
-std::shared_ptr<const LogicalAnnotation> LogicalAnnotation::Map() {
- return MapAnnotation::Make();
-}
+std::shared_ptr<const LogicalType> LogicalType::Map() { return MapLogicalType::Make(); }
-std::shared_ptr<const LogicalAnnotation> LogicalAnnotation::List() {
- return ListAnnotation::Make();
-}
+std::shared_ptr<const LogicalType> LogicalType::List() { return ListLogicalType::Make(); }
-std::shared_ptr<const LogicalAnnotation> LogicalAnnotation::Enum() {
- return EnumAnnotation::Make();
-}
+std::shared_ptr<const LogicalType> LogicalType::Enum() { return EnumLogicalType::Make(); }
-std::shared_ptr<const LogicalAnnotation> LogicalAnnotation::Decimal(int32_t precision,
- int32_t scale) {
- return DecimalAnnotation::Make(precision, scale);
+std::shared_ptr<const LogicalType> LogicalType::Decimal(int32_t precision,
+ int32_t scale) {
+ return DecimalLogicalType::Make(precision, scale);
}
-std::shared_ptr<const LogicalAnnotation> LogicalAnnotation::Date() {
- return DateAnnotation::Make();
-}
+std::shared_ptr<const LogicalType> LogicalType::Date() { return DateLogicalType::Make(); }
-std::shared_ptr<const LogicalAnnotation> LogicalAnnotation::Time(
- bool is_adjusted_to_utc, LogicalAnnotation::TimeUnit::unit time_unit) {
- DCHECK(time_unit != LogicalAnnotation::TimeUnit::UNKNOWN);
- return TimeAnnotation::Make(is_adjusted_to_utc, time_unit);
+std::shared_ptr<const LogicalType> LogicalType::Time(
+ bool is_adjusted_to_utc, LogicalType::TimeUnit::unit time_unit) {
+ DCHECK(time_unit != LogicalType::TimeUnit::UNKNOWN);
+ return TimeLogicalType::Make(is_adjusted_to_utc, time_unit);
}
-std::shared_ptr<const LogicalAnnotation> LogicalAnnotation::Timestamp(
- bool is_adjusted_to_utc, LogicalAnnotation::TimeUnit::unit time_unit) {
- DCHECK(time_unit != LogicalAnnotation::TimeUnit::UNKNOWN);
- return TimestampAnnotation::Make(is_adjusted_to_utc, time_unit);
+std::shared_ptr<const LogicalType> LogicalType::Timestamp(
+ bool is_adjusted_to_utc, LogicalType::TimeUnit::unit time_unit) {
+ DCHECK(time_unit != LogicalType::TimeUnit::UNKNOWN);
+ return TimestampLogicalType::Make(is_adjusted_to_utc, time_unit);
}
-std::shared_ptr<const LogicalAnnotation> LogicalAnnotation::Interval() {
- return IntervalAnnotation::Make();
+std::shared_ptr<const LogicalType> LogicalType::Interval() {
+ return IntervalLogicalType::Make();
}
-std::shared_ptr<const LogicalAnnotation> LogicalAnnotation::Int(int bit_width,
- bool is_signed) {
+std::shared_ptr<const LogicalType> LogicalType::Int(int bit_width, bool is_signed) {
DCHECK(bit_width == 64 || bit_width == 32 || bit_width == 16 || bit_width == 8);
- return IntAnnotation::Make(bit_width, is_signed);
+ return IntLogicalType::Make(bit_width, is_signed);
}
-std::shared_ptr<const LogicalAnnotation> LogicalAnnotation::Null() {
- return NullAnnotation::Make();
-}
+std::shared_ptr<const LogicalType> LogicalType::Null() { return NullLogicalType::Make(); }
-std::shared_ptr<const LogicalAnnotation> LogicalAnnotation::JSON() {
- return JSONAnnotation::Make();
-}
+std::shared_ptr<const LogicalType> LogicalType::JSON() { return JSONLogicalType::Make(); }
-std::shared_ptr<const LogicalAnnotation> LogicalAnnotation::BSON() {
- return BSONAnnotation::Make();
-}
+std::shared_ptr<const LogicalType> LogicalType::BSON() { return BSONLogicalType::Make(); }
-std::shared_ptr<const LogicalAnnotation> LogicalAnnotation::UUID() {
- return UUIDAnnotation::Make();
-}
+std::shared_ptr<const LogicalType> LogicalType::UUID() { return UUIDLogicalType::Make(); }
-std::shared_ptr<const LogicalAnnotation> LogicalAnnotation::None() {
- return NoAnnotation::Make();
-}
+std::shared_ptr<const LogicalType> LogicalType::None() { return NoLogicalType::Make(); }
-std::shared_ptr<const LogicalAnnotation> LogicalAnnotation::Unknown() {
- return UnknownAnnotation::Make();
+std::shared_ptr<const LogicalType> LogicalType::Unknown() {
+ return UnknownLogicalType::Make();
}
/*
- * The annotation implementation classes are built in four layers: (1) the base
+ * The logical type implementation classes are built in four layers: (1) the base
* layer, which establishes the interface and provides generally reusable implementations
* for the ToJSON() and Equals() methods; (2) an intermediate derived layer for the
* "compatibility" methods, which provides implementations for is_compatible() and
* ToConvertedType(); (3) another intermediate layer for the "applicability" methods
* that provides several implementations for the is_applicable() method; and (4) the
- * final derived classes, one for each annotation type, which supply implementations
+ * final derived classes, one for each logical type, which supply implementations
* for those methods that remain virtual (usually just ToString() and ToThrift()) or
* otherwise need to be overridden.
*/
-// LogicalAnnotationImpl base class
+// LogicalTypeImpl base class
-class LogicalAnnotation::Impl {
+class LogicalType::Impl {
public:
virtual bool is_applicable(parquet::Type::type primitive_type,
int32_t primitive_length = -1) const = 0;
- virtual bool is_compatible(LogicalType::type converted_type,
+ virtual bool is_compatible(ConvertedType::type converted_type,
schema::DecimalMetadata converted_decimal_metadata = {
false, -1, -1}) const = 0;
- virtual LogicalType::type ToConvertedType(
+ virtual ConvertedType::type ToConvertedType(
schema::DecimalMetadata* out_decimal_metadata) const = 0;
virtual std::string ToString() const = 0;
@@ -577,17 +559,15 @@ class LogicalAnnotation::Impl {
}
virtual format::LogicalType ToThrift() const {
- // annotation types inheriting this method should never be serialized
+ // logical types inheriting this method should never be serialized
std::stringstream ss;
- ss << "Annotation type " << ToString() << " should not be serialized";
+ ss << "Logical type " << ToString() << " should not be serialized";
throw ParquetException(ss.str());
}
- virtual bool Equals(const LogicalAnnotation& other) const {
- return other.type() == type_;
- }
+ virtual bool Equals(const LogicalType& other) const { return other.type() == type_; }
- LogicalAnnotation::Type::type type() const { return type_; }
+ LogicalType::Type::type type() const { return type_; }
SortOrder::type sort_order() const { return order_; }
@@ -623,115 +603,87 @@ class LogicalAnnotation::Impl {
class Unknown;
protected:
- Impl(LogicalAnnotation::Type::type t, SortOrder::type o) : type_(t), order_(o) {}
+ Impl(LogicalType::Type::type t, SortOrder::type o) : type_(t), order_(o) {}
Impl() = default;
private:
- LogicalAnnotation::Type::type type_ = LogicalAnnotation::Type::UNKNOWN;
+ LogicalType::Type::type type_ = LogicalType::Type::UNKNOWN;
SortOrder::type order_ = SortOrder::UNKNOWN;
};
-// Special methods for public LogicalAnnotation class
+// Special methods for public LogicalType class
-LogicalAnnotation::LogicalAnnotation() = default;
-LogicalAnnotation::~LogicalAnnotation() noexcept = default;
+LogicalType::LogicalType() = default;
+LogicalType::~LogicalType() noexcept = default;
-// Delegating methods for public LogicalAnnotation class
+// Delegating methods for public LogicalType class
-bool LogicalAnnotation::is_applicable(parquet::Type::type primitive_type,
- int32_t primitive_length) const {
+bool LogicalType::is_applicable(parquet::Type::type primitive_type,
+ int32_t primitive_length) const {
return impl_->is_applicable(primitive_type, primitive_length);
}
-bool LogicalAnnotation::is_compatible(
- LogicalType::type converted_type,
+bool LogicalType::is_compatible(
+ ConvertedType::type converted_type,
schema::DecimalMetadata converted_decimal_metadata) const {
return impl_->is_compatible(converted_type, converted_decimal_metadata);
}
-LogicalType::type LogicalAnnotation::ToConvertedType(
+ConvertedType::type LogicalType::ToConvertedType(
schema::DecimalMetadata* out_decimal_metadata) const {
return impl_->ToConvertedType(out_decimal_metadata);
}
-std::string LogicalAnnotation::ToString() const { return impl_->ToString(); }
+std::string LogicalType::ToString() const { return impl_->ToString(); }
-std::string LogicalAnnotation::ToJSON() const { return impl_->ToJSON(); }
+std::string LogicalType::ToJSON() const { return impl_->ToJSON(); }
-format::LogicalType LogicalAnnotation::ToThrift() const { return impl_->ToThrift(); }
+format::LogicalType LogicalType::ToThrift() const { return impl_->ToThrift(); }
-bool LogicalAnnotation::Equals(const LogicalAnnotation& other) const {
- return impl_->Equals(other);
-}
+bool LogicalType::Equals(const LogicalType& other) const { return impl_->Equals(other); }
-LogicalAnnotation::Type::type LogicalAnnotation::type() const { return impl_->type(); }
+LogicalType::Type::type LogicalType::type() const { return impl_->type(); }
-SortOrder::type LogicalAnnotation::sort_order() const { return impl_->sort_order(); }
+SortOrder::type LogicalType::sort_order() const { return impl_->sort_order(); }
-// Type checks for public LogicalAnnotation class
+// Type checks for public LogicalType class
-bool LogicalAnnotation::is_string() const {
- return impl_->type() == LogicalAnnotation::Type::STRING;
-}
-bool LogicalAnnotation::is_map() const {
- return impl_->type() == LogicalAnnotation::Type::MAP;
-}
-bool LogicalAnnotation::is_list() const {
- return impl_->type() == LogicalAnnotation::Type::LIST;
+bool LogicalType::is_string() const { return impl_->type() == LogicalType::Type::STRING; }
+bool LogicalType::is_map() const { return impl_->type() == LogicalType::Type::MAP; }
+bool LogicalType::is_list() const { return impl_->type() == LogicalType::Type::LIST; }
+bool LogicalType::is_enum() const { return impl_->type() == LogicalType::Type::ENUM; }
+bool LogicalType::is_decimal() const {
+ return impl_->type() == LogicalType::Type::DECIMAL;
}
-bool LogicalAnnotation::is_enum() const {
- return impl_->type() == LogicalAnnotation::Type::ENUM;
+bool LogicalType::is_date() const { return impl_->type() == LogicalType::Type::DATE; }
+bool LogicalType::is_time() const { return impl_->type() == LogicalType::Type::TIME; }
+bool LogicalType::is_timestamp() const {
+ return impl_->type() == LogicalType::Type::TIMESTAMP;
}
-bool LogicalAnnotation::is_decimal() const {
- return impl_->type() == LogicalAnnotation::Type::DECIMAL;
+bool LogicalType::is_interval() const {
+ return impl_->type() == LogicalType::Type::INTERVAL;
}
-bool LogicalAnnotation::is_date() const {
- return impl_->type() == LogicalAnnotation::Type::DATE;
+bool LogicalType::is_int() const { return impl_->type() == LogicalType::Type::INT; }
+bool LogicalType::is_null() const { return impl_->type() == LogicalType::Type::NIL; }
+bool LogicalType::is_JSON() const { return impl_->type() == LogicalType::Type::JSON; }
+bool LogicalType::is_BSON() const { return impl_->type() == LogicalType::Type::BSON; }
+bool LogicalType::is_UUID() const { return impl_->type() == LogicalType::Type::UUID; }
+bool LogicalType::is_none() const { return impl_->type() == LogicalType::Type::NONE; }
+bool LogicalType::is_valid() const { return impl_->type() != LogicalType::Type::UNKNOWN; }
+bool LogicalType::is_invalid() const { return !is_valid(); }
+bool LogicalType::is_nested() const {
+ return (impl_->type() == LogicalType::Type::LIST) ||
+ (impl_->type() == LogicalType::Type::MAP);
}
-bool LogicalAnnotation::is_time() const {
- return impl_->type() == LogicalAnnotation::Type::TIME;
-}
-bool LogicalAnnotation::is_timestamp() const {
- return impl_->type() == LogicalAnnotation::Type::TIMESTAMP;
-}
-bool LogicalAnnotation::is_interval() const {
- return impl_->type() == LogicalAnnotation::Type::INTERVAL;
-}
-bool LogicalAnnotation::is_int() const {
- return impl_->type() == LogicalAnnotation::Type::INT;
-}
-bool LogicalAnnotation::is_null() const {
- return impl_->type() == LogicalAnnotation::Type::NIL;
-}
-bool LogicalAnnotation::is_JSON() const {
- return impl_->type() == LogicalAnnotation::Type::JSON;
-}
-bool LogicalAnnotation::is_BSON() const {
- return impl_->type() == LogicalAnnotation::Type::BSON;
-}
-bool LogicalAnnotation::is_UUID() const {
- return impl_->type() == LogicalAnnotation::Type::UUID;
-}
-bool LogicalAnnotation::is_none() const {
- return impl_->type() == LogicalAnnotation::Type::NONE;
-}
-bool LogicalAnnotation::is_valid() const {
- return impl_->type() != LogicalAnnotation::Type::UNKNOWN;
-}
-bool LogicalAnnotation::is_invalid() const { return !is_valid(); }
-bool LogicalAnnotation::is_nested() const {
- return (impl_->type() == LogicalAnnotation::Type::LIST) ||
- (impl_->type() == LogicalAnnotation::Type::MAP);
-}
-bool LogicalAnnotation::is_nonnested() const { return !is_nested(); }
-bool LogicalAnnotation::is_serialized() const {
- return !((impl_->type() == LogicalAnnotation::Type::NONE) ||
- (impl_->type() == LogicalAnnotation::Type::UNKNOWN));
+bool LogicalType::is_nonnested() const { return !is_nested(); }
+bool LogicalType::is_serialized() const {
+ return !((impl_->type() == LogicalType::Type::NONE) ||
+ (impl_->type() == LogicalType::Type::UNKNOWN));
}
-// LogicalAnnotationImpl intermediate "compatibility" classes
+// LogicalTypeImpl intermediate "compatibility" classes
-class LogicalAnnotation::Impl::Compatible : public virtual LogicalAnnotation::Impl {
+class LogicalType::Impl::Compatible : public virtual LogicalType::Impl {
protected:
Compatible() = default;
};
@@ -748,58 +700,57 @@ class LogicalAnnotation::Impl::Compatible : public virtual LogicalAnnotation::Im
#define reset_decimal_metadata(m___) \
{ set_decimal_metadata(m___, false, -1, -1); }
-// For logical annotation types that always translate to the same converted type
-class LogicalAnnotation::Impl::SimpleCompatible
- : public virtual LogicalAnnotation::Impl::Compatible {
+// For logical types that always translate to the same converted type
+class LogicalType::Impl::SimpleCompatible : public virtual LogicalType::Impl::Compatible {
public:
- bool is_compatible(LogicalType::type converted_type,
+ bool is_compatible(ConvertedType::type converted_type,
schema::DecimalMetadata converted_decimal_metadata) const override {
return (converted_type == converted_type_) && !converted_decimal_metadata.isset;
}
- LogicalType::type ToConvertedType(
+ ConvertedType::type ToConvertedType(
schema::DecimalMetadata* out_decimal_metadata) const override {
reset_decimal_metadata(out_decimal_metadata);
return converted_type_;
}
protected:
- explicit SimpleCompatible(LogicalType::type c) : converted_type_(c) {}
+ explicit SimpleCompatible(ConvertedType::type c) : converted_type_(c) {}
private:
- LogicalType::type converted_type_ = LogicalType::NA;
+ ConvertedType::type converted_type_ = ConvertedType::NA;
};
-// For logical annotations that have no corresponding converted type
-class LogicalAnnotation::Impl::Incompatible : public virtual LogicalAnnotation::Impl {
+// For logical types that have no corresponding converted type
+class LogicalType::Impl::Incompatible : public virtual LogicalType::Impl {
public:
- bool is_compatible(LogicalType::type converted_type,
+ bool is_compatible(ConvertedType::type converted_type,
schema::DecimalMetadata converted_decimal_metadata) const override {
- return (converted_type == LogicalType::NONE || converted_type == LogicalType::NA) &&
+ return (converted_type == ConvertedType::NONE ||
+ converted_type == ConvertedType::NA) &&
!converted_decimal_metadata.isset;
}
- LogicalType::type ToConvertedType(
+ ConvertedType::type ToConvertedType(
schema::DecimalMetadata* out_decimal_metadata) const override {
reset_decimal_metadata(out_decimal_metadata);
- return LogicalType::NONE;
+ return ConvertedType::NONE;
}
protected:
Incompatible() = default;
};
-// LogicalAnnotationImpl intermediate "applicability" classes
+// LogicalTypeImpl intermediate "applicability" classes
-class LogicalAnnotation::Impl::Applicable : public virtual LogicalAnnotation::Impl {
+class LogicalType::Impl::Applicable : public virtual LogicalType::Impl {
protected:
Applicable() = default;
};
-// For logical annotations that can apply only to a single
+// For logical types that can apply only to a single
// physical type
-class LogicalAnnotation::Impl::SimpleApplicable
- : public virtual LogicalAnnotation::Impl::Applicable {
+class LogicalType::Impl::SimpleApplicable : public virtual LogicalType::Impl::Applicable {
public:
bool is_applicable(parquet::Type::type primitive_type,
int32_t primitive_length = -1) const override {
@@ -813,10 +764,10 @@ class LogicalAnnotation::Impl::SimpleApplicable
parquet::Type::type type_;
};
-// For logical annotations that can apply only to a particular
+// For logical types that can apply only to a particular
// physical type and physical length combination
-class LogicalAnnotation::Impl::TypeLengthApplicable
- : public virtual LogicalAnnotation::Impl::Applicable {
+class LogicalType::Impl::TypeLengthApplicable
+ : public virtual LogicalType::Impl::Applicable {
public:
bool is_applicable(parquet::Type::type primitive_type,
int32_t primitive_length = -1) const override {
@@ -831,9 +782,9 @@ class LogicalAnnotation::Impl::TypeLengthApplicable
int32_t length_;
};
-// For logical annotations that can apply to any physical type
-class LogicalAnnotation::Impl::UniversalApplicable
- : public virtual LogicalAnnotation::Impl::Applicable {
+// For logical types that can apply to any physical type
+class LogicalType::Impl::UniversalApplicable
+ : public virtual LogicalType::Impl::Applicable {
public:
bool is_applicable(parquet::Type::type primitive_type,
int32_t primitive_length = -1) const override {
@@ -844,9 +795,9 @@ class LogicalAnnotation::Impl::UniversalApplicable
UniversalApplicable() = default;
};
-// For logical annotations that can never apply to any primitive
+// For logical types that can never apply to any primitive
// physical type
-class LogicalAnnotation::Impl::Inapplicable : public virtual LogicalAnnotation::Impl {
+class LogicalType::Impl::Inapplicable : public virtual LogicalType::Impl {
public:
bool is_applicable(parquet::Type::type primitive_type,
int32_t primitive_length = -1) const override {
@@ -857,7 +808,7 @@ class LogicalAnnotation::Impl::Inapplicable : public virtual LogicalAnnotation::
Inapplicable() = default;
};
-// LogicalAnnotation implementation final classes
+// LogicalType implementation final classes
#define OVERRIDE_TOSTRING(n___) \
std::string ToString() const override { return #n___; }
@@ -870,45 +821,43 @@ class LogicalAnnotation::Impl::Inapplicable : public virtual LogicalAnnotation::
return type; \
}
-class LogicalAnnotation::Impl::String final
- : public LogicalAnnotation::Impl::SimpleCompatible,
- public LogicalAnnotation::Impl::SimpleApplicable {
+class LogicalType::Impl::String final : public LogicalType::Impl::SimpleCompatible,
+ public LogicalType::Impl::SimpleApplicable {
public:
- friend class StringAnnotation;
+ friend class StringLogicalType;
OVERRIDE_TOSTRING(String)
OVERRIDE_TOTHRIFT(StringType, STRING)
private:
String()
- : LogicalAnnotation::Impl(LogicalAnnotation::Type::STRING, SortOrder::UNSIGNED),
- LogicalAnnotation::Impl::SimpleCompatible(LogicalType::UTF8),
- LogicalAnnotation::Impl::SimpleApplicable(parquet::Type::BYTE_ARRAY) {}
+ : LogicalType::Impl(LogicalType::Type::STRING, SortOrder::UNSIGNED),
+ LogicalType::Impl::SimpleCompatible(ConvertedType::UTF8),
+ LogicalType::Impl::SimpleApplicable(parquet::Type::BYTE_ARRAY) {}
};
-// Each public annotation class's Make() creation method instantiates a corresponding
-// LogicalAnnotation::Impl::* object and installs that implementation in the annotation
+// Each public logical type class's Make() creation method instantiates a corresponding
+// LogicalType::Impl::* object and installs that implementation in the logical type
// it returns.
-#define GENERATE_MAKE(a___) \
- std::shared_ptr<const LogicalAnnotation> a___##Annotation::Make() { \
- auto* annotation = new a___##Annotation(); \
- annotation->impl_.reset(new LogicalAnnotation::Impl::a___()); \
- return std::shared_ptr<const LogicalAnnotation>(annotation); \
+#define GENERATE_MAKE(a___) \
+ std::shared_ptr<const LogicalType> a___##LogicalType::Make() { \
+ auto* logical_type = new a___##LogicalType(); \
+ logical_type->impl_.reset(new LogicalType::Impl::a___()); \
+ return std::shared_ptr<const LogicalType>(logical_type); \
}
GENERATE_MAKE(String)
-class LogicalAnnotation::Impl::Map final
- : public LogicalAnnotation::Impl::SimpleCompatible,
- public LogicalAnnotation::Impl::Inapplicable {
+class LogicalType::Impl::Map final : public LogicalType::Impl::SimpleCompatible,
+ public LogicalType::Impl::Inapplicable {
public:
- friend class MapAnnotation;
+ friend class MapLogicalType;
- bool is_compatible(LogicalType::type converted_type,
+ bool is_compatible(ConvertedType::type converted_type,
schema::DecimalMetadata converted_decimal_metadata) const override {
- return (converted_type == LogicalType::MAP ||
- converted_type == LogicalType::MAP_KEY_VALUE) &&
+ return (converted_type == ConvertedType::MAP ||
+ converted_type == ConvertedType::MAP_KEY_VALUE) &&
!converted_decimal_metadata.isset;
}
@@ -917,82 +866,79 @@ class LogicalAnnotation::Impl::Map final
private:
Map()
- : LogicalAnnotation::Impl(LogicalAnnotation::Type::MAP, SortOrder::UNKNOWN),
- LogicalAnnotation::Impl::SimpleCompatible(LogicalType::MAP) {}
+ : LogicalType::Impl(LogicalType::Type::MAP, SortOrder::UNKNOWN),
+ LogicalType::Impl::SimpleCompatible(ConvertedType::MAP) {}
};
GENERATE_MAKE(Map)
-class LogicalAnnotation::Impl::List final
- : public LogicalAnnotation::Impl::SimpleCompatible,
- public LogicalAnnotation::Impl::Inapplicable {
+class LogicalType::Impl::List final : public LogicalType::Impl::SimpleCompatible,
+ public LogicalType::Impl::Inapplicable {
public:
- friend class ListAnnotation;
+ friend class ListLogicalType;
OVERRIDE_TOSTRING(List)
OVERRIDE_TOTHRIFT(ListType, LIST)
private:
List()
- : LogicalAnnotation::Impl(LogicalAnnotation::Type::LIST, SortOrder::UNKNOWN),
- LogicalAnnotation::Impl::SimpleCompatible(LogicalType::LIST) {}
+ : LogicalType::Impl(LogicalType::Type::LIST, SortOrder::UNKNOWN),
+ LogicalType::Impl::SimpleCompatible(ConvertedType::LIST) {}
};
GENERATE_MAKE(List)
-class LogicalAnnotation::Impl::Enum final
- : public LogicalAnnotation::Impl::SimpleCompatible,
- public LogicalAnnotation::Impl::SimpleApplicable {
+class LogicalType::Impl::Enum final : public LogicalType::Impl::SimpleCompatible,
+ public LogicalType::Impl::SimpleApplicable {
public:
- friend class EnumAnnotation;
+ friend class EnumLogicalType;
OVERRIDE_TOSTRING(Enum)
OVERRIDE_TOTHRIFT(EnumType, ENUM)
private:
Enum()
- : LogicalAnnotation::Impl(LogicalAnnotation::Type::ENUM, SortOrder::UNSIGNED),
- LogicalAnnotation::Impl::SimpleCompatible(LogicalType::ENUM),
- LogicalAnnotation::Impl::SimpleApplicable(parquet::Type::BYTE_ARRAY) {}
+ : LogicalType::Impl(LogicalType::Type::ENUM, SortOrder::UNSIGNED),
+ LogicalType::Impl::SimpleCompatible(ConvertedType::ENUM),
+ LogicalType::Impl::SimpleApplicable(parquet::Type::BYTE_ARRAY) {}
};
GENERATE_MAKE(Enum)
-// The parameterized annotation types (currently Decimal, Time, Timestamp, and Int)
+// The parameterized logical types (currently Decimal, Time, Timestamp, and Int)
// generally can't reuse the simple method implementations available in the base and
// intermediate classes and must (re)implement them all
-class LogicalAnnotation::Impl::Decimal final
- : public LogicalAnnotation::Impl::Compatible,
- public LogicalAnnotation::Impl::Applicable {
+class LogicalType::Impl::Decimal final : public LogicalType::Impl::Compatible,
+ public LogicalType::Impl::Applicable {
public:
- friend class DecimalAnnotation;
+ friend class DecimalLogicalType;
bool is_applicable(parquet::Type::type primitive_type,
int32_t primitive_length = -1) const override;
- bool is_compatible(LogicalType::type converted_type,
+ bool is_compatible(ConvertedType::type converted_type,
schema::DecimalMetadata converted_decimal_metadata) const override;
- LogicalType::type ToConvertedType(
+ ConvertedType::type ToConvertedType(
schema::DecimalMetadata* out_decimal_metadata) const override;
std::string ToString() const override;
std::string ToJSON() const override;
format::LogicalType ToThrift() const override;
- bool Equals(const LogicalAnnotation& other) const override;
+ bool Equals(const LogicalType& other) const override;
int32_t precision() const { return precision_; }
int32_t scale() const { return scale_; }
private:
Decimal(int32_t p, int32_t s)
- : LogicalAnnotation::Impl(LogicalAnnotation::Type::DECIMAL, SortOrder::SIGNED),
+ : LogicalType::Impl(LogicalType::Type::DECIMAL, SortOrder::SIGNED),
precision_(p),
scale_(s) {}
int32_t precision_ = -1;
int32_t scale_ = -1;
};
-bool LogicalAnnotation::Impl::Decimal::is_applicable(parquet::Type::type primitive_type,
- int32_t primitive_length) const {
+bool LogicalType::Impl::Decimal::is_applicable(parquet::Type::type primitive_type,
+ int32_t primitive_length) const {
bool ok = false;
switch (primitive_type) {
case parquet::Type::INT32: {
@@ -1015,35 +961,35 @@ bool LogicalAnnotation::Impl::Decimal::is_applicable(parquet::Type::type primiti
return ok;
}
-bool LogicalAnnotation::Impl::Decimal::is_compatible(
- LogicalType::type converted_type,
+bool LogicalType::Impl::Decimal::is_compatible(
+ ConvertedType::type converted_type,
schema::DecimalMetadata converted_decimal_metadata) const {
- return converted_type == LogicalType::DECIMAL &&
+ return converted_type == ConvertedType::DECIMAL &&
(converted_decimal_metadata.isset &&
converted_decimal_metadata.scale == scale_ &&
converted_decimal_metadata.precision == precision_);
}
-LogicalType::type LogicalAnnotation::Impl::Decimal::ToConvertedType(
+ConvertedType::type LogicalType::Impl::Decimal::ToConvertedType(
schema::DecimalMetadata* out_decimal_metadata) const {
set_decimal_metadata(out_decimal_metadata, true, precision_, scale_);
- return LogicalType::DECIMAL;
+ return ConvertedType::DECIMAL;
}
-std::string LogicalAnnotation::Impl::Decimal::ToString() const {
+std::string LogicalType::Impl::Decimal::ToString() const {
std::stringstream type;
type << "Decimal(precision=" << precision_ << ", scale=" << scale_ << ")";
return type.str();
}
-std::string LogicalAnnotation::Impl::Decimal::ToJSON() const {
+std::string LogicalType::Impl::Decimal::ToJSON() const {
std::stringstream json;
json << R"({"Type": "Decimal", "precision": )" << precision_ << R"(, "scale": )"
<< scale_ << "}";
return json.str();
}
-format::LogicalType LogicalAnnotation::Impl::Decimal::ToThrift() const {
+format::LogicalType LogicalType::Impl::Decimal::ToThrift() const {
format::LogicalType type;
format::DecimalType decimal_type;
decimal_type.__set_precision(precision_);
@@ -1052,155 +998,154 @@ format::LogicalType LogicalAnnotation::Impl::Decimal::ToThrift() const {
return type;
}
-bool LogicalAnnotation::Impl::Decimal::Equals(const LogicalAnnotation& other) const {
+bool LogicalType::Impl::Decimal::Equals(const LogicalType& other) const {
bool eq = false;
if (other.is_decimal()) {
- const auto& other_decimal = checked_cast<const DecimalAnnotation&>(other);
+ const auto& other_decimal = checked_cast<const DecimalLogicalType&>(other);
eq = (precision_ == other_decimal.precision() && scale_ == other_decimal.scale());
}
return eq;
}
-std::shared_ptr<const LogicalAnnotation> DecimalAnnotation::Make(int32_t precision,
- int32_t scale) {
+std::shared_ptr<const LogicalType> DecimalLogicalType::Make(int32_t precision,
+ int32_t scale) {
if (precision < 1) {
throw ParquetException(
- "Precision must be greater than or equal to 1 for Decimal annotation");
+ "Precision must be greater than or equal to 1 for Decimal logical type");
}
if (scale < 0 || scale > precision) {
throw ParquetException(
"Scale must be a non-negative integer that does not exceed precision for "
- "Decimal annotation");
+ "Decimal logical type");
}
- auto* annotation = new DecimalAnnotation();
- annotation->impl_.reset(new LogicalAnnotation::Impl::Decimal(precision, scale));
- return std::shared_ptr<const LogicalAnnotation>(annotation);
+ auto* logical_type = new DecimalLogicalType();
+ logical_type->impl_.reset(new LogicalType::Impl::Decimal(precision, scale));
+ return std::shared_ptr<const LogicalType>(logical_type);
}
-int32_t DecimalAnnotation::precision() const {
- return (dynamic_cast<const LogicalAnnotation::Impl::Decimal&>(*impl_)).precision();
+int32_t DecimalLogicalType::precision() const {
+ return (dynamic_cast<const LogicalType::Impl::Decimal&>(*impl_)).precision();
}
-int32_t DecimalAnnotation::scale() const {
- return (dynamic_cast<const LogicalAnnotation::Impl::Decimal&>(*impl_)).scale();
+int32_t DecimalLogicalType::scale() const {
+ return (dynamic_cast<const LogicalType::Impl::Decimal&>(*impl_)).scale();
}
-class LogicalAnnotation::Impl::Date final
- : public LogicalAnnotation::Impl::SimpleCompatible,
- public LogicalAnnotation::Impl::SimpleApplicable {
+class LogicalType::Impl::Date final : public LogicalType::Impl::SimpleCompatible,
+ public LogicalType::Impl::SimpleApplicable {
public:
- friend class DateAnnotation;
+ friend class DateLogicalType;
OVERRIDE_TOSTRING(Date)
OVERRIDE_TOTHRIFT(DateType, DATE)
private:
Date()
- : LogicalAnnotation::Impl(LogicalAnnotation::Type::DATE, SortOrder::SIGNED),
- LogicalAnnotation::Impl::SimpleCompatible(LogicalType::DATE),
- LogicalAnnotation::Impl::SimpleApplicable(parquet::Type::INT32) {}
+ : LogicalType::Impl(LogicalType::Type::DATE, SortOrder::SIGNED),
+ LogicalType::Impl::SimpleCompatible(ConvertedType::DATE),
+ LogicalType::Impl::SimpleApplicable(parquet::Type::INT32) {}
};
GENERATE_MAKE(Date)
-#define time_unit_string(u___) \
- ((u___) == LogicalAnnotation::TimeUnit::MILLIS \
- ? "milliseconds" \
- : ((u___) == LogicalAnnotation::TimeUnit::MICROS \
- ? "microseconds" \
- : ((u___) == LogicalAnnotation::TimeUnit::NANOS ? "nanoseconds" \
- : "unknown")))
+#define time_unit_string(u___) \
+ ((u___) == LogicalType::TimeUnit::MILLIS \
+ ? "milliseconds" \
+ : ((u___) == LogicalType::TimeUnit::MICROS \
+ ? "microseconds" \
+ : ((u___) == LogicalType::TimeUnit::NANOS ? "nanoseconds" : "unknown")))
-class LogicalAnnotation::Impl::Time final : public LogicalAnnotation::Impl::Compatible,
- public LogicalAnnotation::Impl::Applicable {
+class LogicalType::Impl::Time final : public LogicalType::Impl::Compatible,
+ public LogicalType::Impl::Applicable {
public:
- friend class TimeAnnotation;
+ friend class TimeLogicalType;
bool is_applicable(parquet::Type::type primitive_type,
int32_t primitive_length = -1) const override;
- bool is_compatible(LogicalType::type converted_type,
+ bool is_compatible(ConvertedType::type converted_type,
schema::DecimalMetadata converted_decimal_metadata) const override;
- LogicalType::type ToConvertedType(
+ ConvertedType::type ToConvertedType(
schema::DecimalMetadata* out_decimal_metadata) const override;
std::string ToString() const override;
std::string ToJSON() const override;
format::LogicalType ToThrift() const override;
- bool Equals(const LogicalAnnotation& other) const override;
+ bool Equals(const LogicalType& other) const override;
bool is_adjusted_to_utc() const { return adjusted_; }
- LogicalAnnotation::TimeUnit::unit time_unit() const { return unit_; }
+ LogicalType::TimeUnit::unit time_unit() const { return unit_; }
private:
- Time(bool a, LogicalAnnotation::TimeUnit::unit u)
- : LogicalAnnotation::Impl(LogicalAnnotation::Type::TIME, SortOrder::SIGNED),
+ Time(bool a, LogicalType::TimeUnit::unit u)
+ : LogicalType::Impl(LogicalType::Type::TIME, SortOrder::SIGNED),
adjusted_(a),
unit_(u) {}
bool adjusted_ = false;
- LogicalAnnotation::TimeUnit::unit unit_;
+ LogicalType::TimeUnit::unit unit_;
};
-bool LogicalAnnotation::Impl::Time::is_applicable(parquet::Type::type primitive_type,
- int32_t primitive_length) const {
+bool LogicalType::Impl::Time::is_applicable(parquet::Type::type primitive_type,
+ int32_t primitive_length) const {
return (primitive_type == parquet::Type::INT32 &&
- unit_ == LogicalAnnotation::TimeUnit::MILLIS) ||
+ unit_ == LogicalType::TimeUnit::MILLIS) ||
(primitive_type == parquet::Type::INT64 &&
- (unit_ == LogicalAnnotation::TimeUnit::MICROS ||
- unit_ == LogicalAnnotation::TimeUnit::NANOS));
+ (unit_ == LogicalType::TimeUnit::MICROS ||
+ unit_ == LogicalType::TimeUnit::NANOS));
}
-bool LogicalAnnotation::Impl::Time::is_compatible(
- LogicalType::type converted_type,
+bool LogicalType::Impl::Time::is_compatible(
+ ConvertedType::type converted_type,
schema::DecimalMetadata converted_decimal_metadata) const {
if (converted_decimal_metadata.isset) {
return false;
- } else if (adjusted_ && unit_ == LogicalAnnotation::TimeUnit::MILLIS) {
- return converted_type == LogicalType::TIME_MILLIS;
- } else if (adjusted_ && unit_ == LogicalAnnotation::TimeUnit::MICROS) {
- return converted_type == LogicalType::TIME_MICROS;
+ } else if (adjusted_ && unit_ == LogicalType::TimeUnit::MILLIS) {
+ return converted_type == ConvertedType::TIME_MILLIS;
+ } else if (adjusted_ && unit_ == LogicalType::TimeUnit::MICROS) {
+ return converted_type == ConvertedType::TIME_MICROS;
} else {
- return (converted_type == LogicalType::NONE) || (converted_type == LogicalType::NA);
+ return (converted_type == ConvertedType::NONE) ||
+ (converted_type == ConvertedType::NA);
}
}
-LogicalType::type LogicalAnnotation::Impl::Time::ToConvertedType(
+ConvertedType::type LogicalType::Impl::Time::ToConvertedType(
schema::DecimalMetadata* out_decimal_metadata) const {
reset_decimal_metadata(out_decimal_metadata);
if (adjusted_) {
- if (unit_ == LogicalAnnotation::TimeUnit::MILLIS) {
- return LogicalType::TIME_MILLIS;
- } else if (unit_ == LogicalAnnotation::TimeUnit::MICROS) {
- return LogicalType::TIME_MICROS;
+ if (unit_ == LogicalType::TimeUnit::MILLIS) {
+ return ConvertedType::TIME_MILLIS;
+ } else if (unit_ == LogicalType::TimeUnit::MICROS) {
+ return ConvertedType::TIME_MICROS;
}
}
- return LogicalType::NONE;
+ return ConvertedType::NONE;
}
-std::string LogicalAnnotation::Impl::Time::ToString() const {
+std::string LogicalType::Impl::Time::ToString() const {
std::stringstream type;
type << "Time(isAdjustedToUTC=" << std::boolalpha << adjusted_
<< ", timeUnit=" << time_unit_string(unit_) << ")";
return type.str();
}
-std::string LogicalAnnotation::Impl::Time::ToJSON() const {
+std::string LogicalType::Impl::Time::ToJSON() const {
std::stringstream json;
json << R"({"Type": "Time", "isAdjustedToUTC": )" << std::boolalpha << adjusted_
<< R"(, "timeUnit": ")" << time_unit_string(unit_) << R"("})";
return json.str();
}
-format::LogicalType LogicalAnnotation::Impl::Time::ToThrift() const {
+format::LogicalType LogicalType::Impl::Time::ToThrift() const {
format::LogicalType type;
format::TimeType time_type;
format::TimeUnit time_unit;
- DCHECK(unit_ != LogicalAnnotation::TimeUnit::UNKNOWN);
- if (unit_ == LogicalAnnotation::TimeUnit::MILLIS) {
+ DCHECK(unit_ != LogicalType::TimeUnit::UNKNOWN);
+ if (unit_ == LogicalType::TimeUnit::MILLIS) {
format::MilliSeconds millis;
time_unit.__set_MILLIS(millis);
- } else if (unit_ == LogicalAnnotation::TimeUnit::MICROS) {
+ } else if (unit_ == LogicalType::TimeUnit::MICROS) {
format::MicroSeconds micros;
time_unit.__set_MICROS(micros);
- } else if (unit_ == LogicalAnnotation::TimeUnit::NANOS) {
+ } else if (unit_ == LogicalType::TimeUnit::NANOS) {
format::NanoSeconds nanos;
time_unit.__set_NANOS(nanos);
}
@@ -1210,121 +1155,119 @@ format::LogicalType LogicalAnnotation::Impl::Time::ToThrift() const {
return type;
}
-bool LogicalAnnotation::Impl::Time::Equals(const LogicalAnnotation& other) const {
+bool LogicalType::Impl::Time::Equals(const LogicalType& other) const {
bool eq = false;
if (other.is_time()) {
- const auto& other_time = checked_cast<const TimeAnnotation&>(other);
+ const auto& other_time = checked_cast<const TimeLogicalType&>(other);
eq =
(adjusted_ == other_time.is_adjusted_to_utc() && unit_ == other_time.time_unit());
}
return eq;
}
-std::shared_ptr<const LogicalAnnotation> TimeAnnotation::Make(
- bool is_adjusted_to_utc, LogicalAnnotation::TimeUnit::unit time_unit) {
- if (time_unit == LogicalAnnotation::TimeUnit::MILLIS ||
- time_unit == LogicalAnnotation::TimeUnit::MICROS ||
- time_unit == LogicalAnnotation::TimeUnit::NANOS) {
- auto* annotation = new TimeAnnotation();
- annotation->impl_.reset(
- new LogicalAnnotation::Impl::Time(is_adjusted_to_utc, time_unit));
- return std::shared_ptr<const LogicalAnnotation>(annotation);
+std::shared_ptr<const LogicalType> TimeLogicalType::Make(
+ bool is_adjusted_to_utc, LogicalType::TimeUnit::unit time_unit) {
+ if (time_unit == LogicalType::TimeUnit::MILLIS ||
+ time_unit == LogicalType::TimeUnit::MICROS ||
+ time_unit == LogicalType::TimeUnit::NANOS) {
+ auto* logical_type = new TimeLogicalType();
+ logical_type->impl_.reset(new LogicalType::Impl::Time(is_adjusted_to_utc, time_unit));
+ return std::shared_ptr<const LogicalType>(logical_type);
} else {
throw ParquetException(
- "TimeUnit must be one of MILLIS, MICROS, or NANOS for Time annotation");
+ "TimeUnit must be one of MILLIS, MICROS, or NANOS for Time logical type");
}
}
-bool TimeAnnotation::is_adjusted_to_utc() const {
- return (dynamic_cast<const LogicalAnnotation::Impl::Time&>(*impl_))
- .is_adjusted_to_utc();
+bool TimeLogicalType::is_adjusted_to_utc() const {
+ return (dynamic_cast<const LogicalType::Impl::Time&>(*impl_)).is_adjusted_to_utc();
}
-LogicalAnnotation::TimeUnit::unit TimeAnnotation::time_unit() const {
- return (dynamic_cast<const LogicalAnnotation::Impl::Time&>(*impl_)).time_unit();
+LogicalType::TimeUnit::unit TimeLogicalType::time_unit() const {
+ return (dynamic_cast<const LogicalType::Impl::Time&>(*impl_)).time_unit();
}
-class LogicalAnnotation::Impl::Timestamp final
- : public LogicalAnnotation::Impl::Compatible,
- public LogicalAnnotation::Impl::SimpleApplicable {
+class LogicalType::Impl::Timestamp final : public LogicalType::Impl::Compatible,
+ public LogicalType::Impl::SimpleApplicable {
public:
- friend class TimestampAnnotation;
+ friend class TimestampLogicalType;
- bool is_compatible(LogicalType::type converted_type,
+ bool is_compatible(ConvertedType::type converted_type,
schema::DecimalMetadata converted_decimal_metadata) const override;
- LogicalType::type ToConvertedType(
+ ConvertedType::type ToConvertedType(
schema::DecimalMetadata* out_decimal_metadata) const override;
std::string ToString() const override;
std::string ToJSON() const override;
format::LogicalType ToThrift() const override;
- bool Equals(const LogicalAnnotation& other) const override;
+ bool Equals(const LogicalType& other) const override;
bool is_adjusted_to_utc() const { return adjusted_; }
- LogicalAnnotation::TimeUnit::unit time_unit() const { return unit_; }
+ LogicalType::TimeUnit::unit time_unit() const { return unit_; }
private:
- Timestamp(bool a, LogicalAnnotation::TimeUnit::unit u)
- : LogicalAnnotation::Impl(LogicalAnnotation::Type::TIMESTAMP, SortOrder::SIGNED),
- LogicalAnnotation::Impl::SimpleApplicable(parquet::Type::INT64),
+ Timestamp(bool a, LogicalType::TimeUnit::unit u)
+ : LogicalType::Impl(LogicalType::Type::TIMESTAMP, SortOrder::SIGNED),
+ LogicalType::Impl::SimpleApplicable(parquet::Type::INT64),
adjusted_(a),
unit_(u) {}
bool adjusted_ = false;
- LogicalAnnotation::TimeUnit::unit unit_;
+ LogicalType::TimeUnit::unit unit_;
};
-bool LogicalAnnotation::Impl::Timestamp::is_compatible(
- LogicalType::type converted_type,
+bool LogicalType::Impl::Timestamp::is_compatible(
+ ConvertedType::type converted_type,
schema::DecimalMetadata converted_decimal_metadata) const {
if (converted_decimal_metadata.isset) {
return false;
- } else if (adjusted_ && unit_ == LogicalAnnotation::TimeUnit::MILLIS) {
- return converted_type == LogicalType::TIMESTAMP_MILLIS;
- } else if (adjusted_ && unit_ == LogicalAnnotation::TimeUnit::MICROS) {
- return converted_type == LogicalType::TIMESTAMP_MICROS;
+ } else if (adjusted_ && unit_ == LogicalType::TimeUnit::MILLIS) {
+ return converted_type == ConvertedType::TIMESTAMP_MILLIS;
+ } else if (adjusted_ && unit_ == LogicalType::TimeUnit::MICROS) {
+ return converted_type == ConvertedType::TIMESTAMP_MICROS;
} else {
- return (converted_type == LogicalType::NONE) || (converted_type == LogicalType::NA);
+ return (converted_type == ConvertedType::NONE) ||
+ (converted_type == ConvertedType::NA);
}
}
-LogicalType::type LogicalAnnotation::Impl::Timestamp::ToConvertedType(
+ConvertedType::type LogicalType::Impl::Timestamp::ToConvertedType(
schema::DecimalMetadata* out_decimal_metadata) const {
reset_decimal_metadata(out_decimal_metadata);
if (adjusted_) {
- if (unit_ == LogicalAnnotation::TimeUnit::MILLIS) {
- return LogicalType::TIMESTAMP_MILLIS;
- } else if (unit_ == LogicalAnnotation::TimeUnit::MICROS) {
- return LogicalType::TIMESTAMP_MICROS;
+ if (unit_ == LogicalType::TimeUnit::MILLIS) {
+ return ConvertedType::TIMESTAMP_MILLIS;
+ } else if (unit_ == LogicalType::TimeUnit::MICROS) {
+ return ConvertedType::TIMESTAMP_MICROS;
}
}
- return LogicalType::NONE;
+ return ConvertedType::NONE;
}
-std::string LogicalAnnotation::Impl::Timestamp::ToString() const {
+std::string LogicalType::Impl::Timestamp::ToString() const {
std::stringstream type;
type << "Timestamp(isAdjustedToUTC=" << std::boolalpha << adjusted_
<< ", timeUnit=" << time_unit_string(unit_) << ")";
return type.str();
}
-std::string LogicalAnnotation::Impl::Timestamp::ToJSON() const {
+std::string LogicalType::Impl::Timestamp::ToJSON() const {
std::stringstream json;
json << R"({"Type": "Timestamp", "isAdjustedToUTC": )" << std::boolalpha << adjusted_
<< R"(, "timeUnit": ")" << time_unit_string(unit_) << R"("})";
return json.str();
}
-format::LogicalType LogicalAnnotation::Impl::Timestamp::ToThrift() const {
+format::LogicalType LogicalType::Impl::Timestamp::ToThrift() const {
format::LogicalType type;
format::TimestampType timestamp_type;
format::TimeUnit time_unit;
- DCHECK(unit_ != LogicalAnnotation::TimeUnit::UNKNOWN);
- if (unit_ == LogicalAnnotation::TimeUnit::MILLIS) {
+ DCHECK(unit_ != LogicalType::TimeUnit::UNKNOWN);
+ if (unit_ == LogicalType::TimeUnit::MILLIS) {
format::MilliSeconds millis;
time_unit.__set_MILLIS(millis);
- } else if (unit_ == LogicalAnnotation::TimeUnit::MICROS) {
+ } else if (unit_ == LogicalType::TimeUnit::MICROS) {
format::MicroSeconds micros;
time_unit.__set_MICROS(micros);
- } else if (unit_ == LogicalAnnotation::TimeUnit::NANOS) {
+ } else if (unit_ == LogicalType::TimeUnit::NANOS) {
format::NanoSeconds nanos;
time_unit.__set_NANOS(nanos);
}
@@ -1334,165 +1277,163 @@ format::LogicalType LogicalAnnotation::Impl::Timestamp::ToThrift() const {
return type;
}
-bool LogicalAnnotation::Impl::Timestamp::Equals(const LogicalAnnotation& other) const {
+bool LogicalType::Impl::Timestamp::Equals(const LogicalType& other) const {
bool eq = false;
if (other.is_timestamp()) {
- const auto& other_timestamp = checked_cast<const TimestampAnnotation&>(other);
+ const auto& other_timestamp = checked_cast<const TimestampLogicalType&>(other);
eq = (adjusted_ == other_timestamp.is_adjusted_to_utc() &&
unit_ == other_timestamp.time_unit());
}
return eq;
}
-std::shared_ptr<const LogicalAnnotation> TimestampAnnotation::Make(
- bool is_adjusted_to_utc, LogicalAnnotation::TimeUnit::unit time_unit) {
- if (time_unit == LogicalAnnotation::TimeUnit::MILLIS ||
- time_unit == LogicalAnnotation::TimeUnit::MICROS ||
- time_unit == LogicalAnnotation::TimeUnit::NANOS) {
- auto* annotation = new TimestampAnnotation();
- annotation->impl_.reset(
- new LogicalAnnotation::Impl::Timestamp(is_adjusted_to_utc, time_unit));
- return std::shared_ptr<const LogicalAnnotation>(annotation);
+std::shared_ptr<const LogicalType> TimestampLogicalType::Make(
+ bool is_adjusted_to_utc, LogicalType::TimeUnit::unit time_unit) {
+ if (time_unit == LogicalType::TimeUnit::MILLIS ||
+ time_unit == LogicalType::TimeUnit::MICROS ||
+ time_unit == LogicalType::TimeUnit::NANOS) {
+ auto* logical_type = new TimestampLogicalType();
+ logical_type->impl_.reset(
+ new LogicalType::Impl::Timestamp(is_adjusted_to_utc, time_unit));
+ return std::shared_ptr<const LogicalType>(logical_type);
} else {
throw ParquetException(
- "TimeUnit must be one of MILLIS, MICROS, or NANOS for Timestamp annotation");
+ "TimeUnit must be one of MILLIS, MICROS, or NANOS for Timestamp logical type");
}
}
-bool TimestampAnnotation::is_adjusted_to_utc() const {
- return (dynamic_cast<const LogicalAnnotation::Impl::Timestamp&>(*impl_))
- .is_adjusted_to_utc();
+bool TimestampLogicalType::is_adjusted_to_utc() const {
+ return (dynamic_cast<const LogicalType::Impl::Timestamp&>(*impl_)).is_adjusted_to_utc();
}
-LogicalAnnotation::TimeUnit::unit TimestampAnnotation::time_unit() const {
- return (dynamic_cast<const LogicalAnnotation::Impl::Timestamp&>(*impl_)).time_unit();
+LogicalType::TimeUnit::unit TimestampLogicalType::time_unit() const {
+ return (dynamic_cast<const LogicalType::Impl::Timestamp&>(*impl_)).time_unit();
}
-class LogicalAnnotation::Impl::Interval final
- : public LogicalAnnotation::Impl::SimpleCompatible,
- public LogicalAnnotation::Impl::TypeLengthApplicable {
+class LogicalType::Impl::Interval final : public LogicalType::Impl::SimpleCompatible,
+ public LogicalType::Impl::TypeLengthApplicable {
public:
- friend class IntervalAnnotation;
+ friend class IntervalLogicalType;
OVERRIDE_TOSTRING(Interval)
// TODO(tpboudreau): uncomment the following line to enable serialization after
- // parquet.thrift recognizes IntervalType as a LogicalType
+ // parquet.thrift recognizes IntervalType as a ConvertedType
// OVERRIDE_TOTHRIFT(IntervalType, INTERVAL)
private:
Interval()
- : LogicalAnnotation::Impl(LogicalAnnotation::Type::INTERVAL, SortOrder::UNKNOWN),
- LogicalAnnotation::Impl::SimpleCompatible(LogicalType::INTERVAL),
- LogicalAnnotation::Impl::TypeLengthApplicable(parquet::Type::FIXED_LEN_BYTE_ARRAY,
- 12) {}
+ : LogicalType::Impl(LogicalType::Type::INTERVAL, SortOrder::UNKNOWN),
+ LogicalType::Impl::SimpleCompatible(ConvertedType::INTERVAL),
+ LogicalType::Impl::TypeLengthApplicable(parquet::Type::FIXED_LEN_BYTE_ARRAY, 12) {
+ }
};
GENERATE_MAKE(Interval)
-class LogicalAnnotation::Impl::Int final : public LogicalAnnotation::Impl::Compatible,
- public LogicalAnnotation::Impl::Applicable {
+class LogicalType::Impl::Int final : public LogicalType::Impl::Compatible,
+ public LogicalType::Impl::Applicable {
public:
- friend class IntAnnotation;
+ friend class IntLogicalType;
bool is_applicable(parquet::Type::type primitive_type,
int32_t primitive_length = -1) const override;
- bool is_compatible(LogicalType::type converted_type,
+ bool is_compatible(ConvertedType::type converted_type,
schema::DecimalMetadata converted_decimal_metadata) const override;
- LogicalType::type ToConvertedType(
+ ConvertedType::type ToConvertedType(
schema::DecimalMetadata* out_decimal_metadata) const override;
std::string ToString() const override;
std::string ToJSON() const override;
format::LogicalType ToThrift() const override;
- bool Equals(const LogicalAnnotation& other) const override;
+ bool Equals(const LogicalType& other) const override;
int bit_width() const { return width_; }
bool is_signed() const { return signed_; }
private:
Int(int w, bool s)
- : LogicalAnnotation::Impl(LogicalAnnotation::Type::INT,
- (s ? SortOrder::SIGNED : SortOrder::UNSIGNED)),
+ : LogicalType::Impl(LogicalType::Type::INT,
+ (s ? SortOrder::SIGNED : SortOrder::UNSIGNED)),
width_(w),
signed_(s) {}
int width_ = 0;
bool signed_ = false;
};
-bool LogicalAnnotation::Impl::Int::is_applicable(parquet::Type::type primitive_type,
- int32_t primitive_length) const {
+bool LogicalType::Impl::Int::is_applicable(parquet::Type::type primitive_type,
+ int32_t primitive_length) const {
return (primitive_type == parquet::Type::INT32 && width_ <= 32) ||
(primitive_type == parquet::Type::INT64 && width_ == 64);
}
-bool LogicalAnnotation::Impl::Int::is_compatible(
- LogicalType::type converted_type,
+bool LogicalType::Impl::Int::is_compatible(
+ ConvertedType::type converted_type,
schema::DecimalMetadata converted_decimal_metadata) const {
if (converted_decimal_metadata.isset) {
return false;
} else if (signed_ && width_ == 8) {
- return converted_type == LogicalType::INT_8;
+ return converted_type == ConvertedType::INT_8;
} else if (signed_ && width_ == 16) {
- return converted_type == LogicalType::INT_16;
+ return converted_type == ConvertedType::INT_16;
} else if (signed_ && width_ == 32) {
- return converted_type == LogicalType::INT_32;
+ return converted_type == ConvertedType::INT_32;
} else if (signed_ && width_ == 64) {
- return converted_type == LogicalType::INT_64;
+ return converted_type == ConvertedType::INT_64;
} else if (!signed_ && width_ == 8) {
- return converted_type == LogicalType::UINT_8;
+ return converted_type == ConvertedType::UINT_8;
} else if (!signed_ && width_ == 16) {
- return converted_type == LogicalType::UINT_16;
+ return converted_type == ConvertedType::UINT_16;
} else if (!signed_ && width_ == 32) {
- return converted_type == LogicalType::UINT_32;
+ return converted_type == ConvertedType::UINT_32;
} else if (!signed_ && width_ == 64) {
- return converted_type == LogicalType::UINT_64;
+ return converted_type == ConvertedType::UINT_64;
} else {
return false;
}
}
-LogicalType::type LogicalAnnotation::Impl::Int::ToConvertedType(
+ConvertedType::type LogicalType::Impl::Int::ToConvertedType(
schema::DecimalMetadata* out_decimal_metadata) const {
reset_decimal_metadata(out_decimal_metadata);
if (signed_) {
switch (width_) {
case 8:
- return LogicalType::INT_8;
+ return ConvertedType::INT_8;
case 16:
- return LogicalType::INT_16;
+ return ConvertedType::INT_16;
case 32:
- return LogicalType::INT_32;
+ return ConvertedType::INT_32;
case 64:
- return LogicalType::INT_64;
+ return ConvertedType::INT_64;
}
} else { // unsigned
switch (width_) {
case 8:
- return LogicalType::UINT_8;
+ return ConvertedType::UINT_8;
case 16:
- return LogicalType::UINT_16;
+ return ConvertedType::UINT_16;
case 32:
- return LogicalType::UINT_32;
+ return ConvertedType::UINT_32;
case 64:
- return LogicalType::UINT_64;
+ return ConvertedType::UINT_64;
}
}
- return LogicalType::NONE;
+ return ConvertedType::NONE;
}
-std::string LogicalAnnotation::Impl::Int::ToString() const {
+std::string LogicalType::Impl::Int::ToString() const {
std::stringstream type;
type << "Int(bitWidth=" << width_ << ", isSigned=" << std::boolalpha << signed_ << ")";
return type.str();
}
-std::string LogicalAnnotation::Impl::Int::ToJSON() const {
+std::string LogicalType::Impl::Int::ToJSON() const {
std::stringstream json;
json << R"({"Type": "Int", "bitWidth": )" << width_ << R"(, "isSigned": )"
<< std::boolalpha << signed_ << "}";
return json.str();
}
-format::LogicalType LogicalAnnotation::Impl::Int::ToThrift() const {
+format::LogicalType LogicalType::Impl::Int::ToThrift() const {
format::LogicalType type;
format::IntType int_type;
DCHECK(width_ == 64 || width_ == 32 || width_ == 16 || width_ == 8);
@@ -1502,132 +1443,125 @@ format::LogicalType LogicalAnnotation::Impl::Int::ToThrift() const {
return type;
}
-bool LogicalAnnotation::Impl::Int::Equals(const LogicalAnnotation& other) const {
+bool LogicalType::Impl::Int::Equals(const LogicalType& other) const {
bool eq = false;
if (other.is_int()) {
- const auto& other_int = checked_cast<const IntAnnotation&>(other);
+ const auto& other_int = checked_cast<const IntLogicalType&>(other);
eq = (width_ == other_int.bit_width() && signed_ == other_int.is_signed());
}
return eq;
}
-std::shared_ptr<const LogicalAnnotation> IntAnnotation::Make(int bit_width,
- bool is_signed) {
+std::shared_ptr<const LogicalType> IntLogicalType::Make(int bit_width, bool is_signed) {
if (bit_width == 8 || bit_width == 16 || bit_width == 32 || bit_width == 64) {
- auto* annotation = new IntAnnotation();
- annotation->impl_.reset(new LogicalAnnotation::Impl::Int(bit_width, is_signed));
- return std::shared_ptr<const LogicalAnnotation>(annotation);
+ auto* logical_type = new IntLogicalType();
+ logical_type->impl_.reset(new LogicalType::Impl::Int(bit_width, is_signed));
+ return std::shared_ptr<const LogicalType>(logical_type);
} else {
throw ParquetException(
- "Bit width must be exactly 8, 16, 32, or 64 for Int annotation");
+ "Bit width must be exactly 8, 16, 32, or 64 for Int logical type");
}
}
-int IntAnnotation::bit_width() const {
- return (dynamic_cast<const LogicalAnnotation::Impl::Int&>(*impl_)).bit_width();
+int IntLogicalType::bit_width() const {
+ return (dynamic_cast<const LogicalType::Impl::Int&>(*impl_)).bit_width();
}
-bool IntAnnotation::is_signed() const {
- return (dynamic_cast<const LogicalAnnotation::Impl::Int&>(*impl_)).is_signed();
+bool IntLogicalType::is_signed() const {
+ return (dynamic_cast<const LogicalType::Impl::Int&>(*impl_)).is_signed();
}
-class LogicalAnnotation::Impl::Null final
- : public LogicalAnnotation::Impl::Incompatible,
- public LogicalAnnotation::Impl::UniversalApplicable {
+class LogicalType::Impl::Null final : public LogicalType::Impl::Incompatible,
+ public LogicalType::Impl::UniversalApplicable {
public:
- friend class NullAnnotation;
+ friend class NullLogicalType;
OVERRIDE_TOSTRING(Null)
OVERRIDE_TOTHRIFT(NullType, UNKNOWN)
private:
- Null() : LogicalAnnotation::Impl(LogicalAnnotation::Type::NIL, SortOrder::UNKNOWN) {}
+ Null() : LogicalType::Impl(LogicalType::Type::NIL, SortOrder::UNKNOWN) {}
};
GENERATE_MAKE(Null)
-class LogicalAnnotation::Impl::JSON final
- : public LogicalAnnotation::Impl::SimpleCompatible,
- public LogicalAnnotation::Impl::SimpleApplicable {
+class LogicalType::Impl::JSON final : public LogicalType::Impl::SimpleCompatible,
+ public LogicalType::Impl::SimpleApplicable {
public:
- friend class JSONAnnotation;
+ friend class JSONLogicalType;
OVERRIDE_TOSTRING(JSON)
OVERRIDE_TOTHRIFT(JsonType, JSON)
private:
JSON()
- : LogicalAnnotation::Impl(LogicalAnnotation::Type::JSON, SortOrder::UNSIGNED),
- LogicalAnnotation::Impl::SimpleCompatible(LogicalType::JSON),
- LogicalAnnotation::Impl::SimpleApplicable(parquet::Type::BYTE_ARRAY) {}
+ : LogicalType::Impl(LogicalType::Type::JSON, SortOrder::UNSIGNED),
+ LogicalType::Impl::SimpleCompatible(ConvertedType::JSON),
+ LogicalType::Impl::SimpleApplicable(parquet::Type::BYTE_ARRAY) {}
};
GENERATE_MAKE(JSON)
-class LogicalAnnotation::Impl::BSON final
- : public LogicalAnnotation::Impl::SimpleCompatible,
- public LogicalAnnotation::Impl::SimpleApplicable {
+class LogicalType::Impl::BSON final : public LogicalType::Impl::SimpleCompatible,
+ public LogicalType::Impl::SimpleApplicable {
public:
- friend class BSONAnnotation;
+ friend class BSONLogicalType;
OVERRIDE_TOSTRING(BSON)
OVERRIDE_TOTHRIFT(BsonType, BSON)
private:
BSON()
- : LogicalAnnotation::Impl(LogicalAnnotation::Type::BSON, SortOrder::UNSIGNED),
- LogicalAnnotation::Impl::SimpleCompatible(LogicalType::BSON),
- LogicalAnnotation::Impl::SimpleApplicable(parquet::Type::BYTE_ARRAY) {}
+ : LogicalType::Impl(LogicalType::Type::BSON, SortOrder::UNSIGNED),
+ LogicalType::Impl::SimpleCompatible(ConvertedType::BSON),
+ LogicalType::Impl::SimpleApplicable(parquet::Type::BYTE_ARRAY) {}
};
GENERATE_MAKE(BSON)
-class LogicalAnnotation::Impl::UUID final
- : public LogicalAnnotation::Impl::Incompatible,
- public LogicalAnnotation::Impl::TypeLengthApplicable {
+class LogicalType::Impl::UUID final : public LogicalType::Impl::Incompatible,
+ public LogicalType::Impl::TypeLengthApplicable {
public:
- friend class UUIDAnnotation;
+ friend class UUIDLogicalType;
OVERRIDE_TOSTRING(UUID)
OVERRIDE_TOTHRIFT(UUIDType, UUID)
private:
UUID()
- : LogicalAnnotation::Impl(LogicalAnnotation::Type::UUID, SortOrder::UNSIGNED),
- LogicalAnnotation::Impl::TypeLengthApplicable(parquet::Type::FIXED_LEN_BYTE_ARRAY,
- 16) {}
+ : LogicalType::Impl(LogicalType::Type::UUID, SortOrder::UNSIGNED),
+ LogicalType::Impl::TypeLengthApplicable(parquet::Type::FIXED_LEN_BYTE_ARRAY, 16) {
+ }
};
GENERATE_MAKE(UUID)
-class LogicalAnnotation::Impl::No final
- : public LogicalAnnotation::Impl::SimpleCompatible,
- public LogicalAnnotation::Impl::UniversalApplicable {
+class LogicalType::Impl::No final : public LogicalType::Impl::SimpleCompatible,
+ public LogicalType::Impl::UniversalApplicable {
public:
- friend class NoAnnotation;
+ friend class NoLogicalType;
OVERRIDE_TOSTRING(None)
private:
No()
- : LogicalAnnotation::Impl(LogicalAnnotation::Type::NONE, SortOrder::UNKNOWN),
- LogicalAnnotation::Impl::SimpleCompatible(LogicalType::NONE) {}
+ : LogicalType::Impl(LogicalType::Type::NONE, SortOrder::UNKNOWN),
+ LogicalType::Impl::SimpleCompatible(ConvertedType::NONE) {}
};
GENERATE_MAKE(No)
-class LogicalAnnotation::Impl::Unknown final
- : public LogicalAnnotation::Impl::SimpleCompatible,
- public LogicalAnnotation::Impl::UniversalApplicable {
+class LogicalType::Impl::Unknown final : public LogicalType::Impl::SimpleCompatible,
+ public LogicalType::Impl::UniversalApplicable {
public:
- friend class UnknownAnnotation;
+ friend class UnknownLogicalType;
OVERRIDE_TOSTRING(Unknown)
private:
Unknown()
- : LogicalAnnotation::Impl(LogicalAnnotation::Type::UNKNOWN, SortOrder::UNKNOWN),
- LogicalAnnotation::Impl::SimpleCompatible(LogicalType::NA) {}
+ : LogicalType::Impl(LogicalType::Type::UNKNOWN, SortOrder::UNKNOWN),
+ LogicalType::Impl::SimpleCompatible(ConvertedType::NA) {}
};
GENERATE_MAKE(Unknown)
diff --git a/cpp/src/parquet/types.h b/cpp/src/parquet/types.h
index 779ea6b..0bfaf99 100644
--- a/cpp/src/parquet/types.h
+++ b/cpp/src/parquet/types.h
@@ -68,7 +68,7 @@ struct Type {
};
// Mirrors parquet::ConvertedType
-struct LogicalType {
+struct ConvertedType {
enum type {
NONE,
UTF8,
@@ -99,6 +99,7 @@ struct LogicalType {
};
};
+// forward declaration
namespace format {
class LogicalType;
@@ -133,8 +134,8 @@ struct DecimalMetadata {
} // namespace schema
-/// \brief Implementation of parquet.thrift LogicalType annotations.
-class PARQUET_EXPORT LogicalAnnotation {
+/// \brief Implementation of parquet.thrift LogicalType types.
+class PARQUET_EXPORT LogicalType {
public:
struct Type {
enum type {
@@ -161,70 +162,69 @@ class PARQUET_EXPORT LogicalAnnotation {
enum unit { UNKNOWN = 0, MILLIS = 1, MICROS, NANOS };
};
- /// \brief If possible, return an annotation equivalent to the given legacy converted
- /// type (and decimal metadata if applicable).
- static std::shared_ptr<const LogicalAnnotation> FromConvertedType(
- const parquet::LogicalType::type converted_type,
+ /// \brief If possible, return a logical type equivalent to the given legacy
+ /// converted type (and decimal metadata if applicable).
+ static std::shared_ptr<const LogicalType> FromConvertedType(
+ const parquet::ConvertedType::type converted_type,
const parquet::schema::DecimalMetadata converted_decimal_metadata = {false, -1,
-1});
- /// \brief Return the annotation represented by the Thrift intermediary object.
- static std::shared_ptr<const LogicalAnnotation> FromThrift(
+ /// \brief Return the logical type represented by the Thrift intermediary object.
+ static std::shared_ptr<const LogicalType> FromThrift(
const parquet::format::LogicalType& thrift_logical_type);
- /// \brief Return the explicitly requested annotation type.
- static std::shared_ptr<const LogicalAnnotation> String();
- static std::shared_ptr<const LogicalAnnotation> Map();
- static std::shared_ptr<const LogicalAnnotation> List();
- static std::shared_ptr<const LogicalAnnotation> Enum();
- static std::shared_ptr<const LogicalAnnotation> Decimal(int32_t precision,
- int32_t scale = 0);
- static std::shared_ptr<const LogicalAnnotation> Date();
- static std::shared_ptr<const LogicalAnnotation> Time(
- bool is_adjusted_to_utc, LogicalAnnotation::TimeUnit::unit time_unit);
- static std::shared_ptr<const LogicalAnnotation> Timestamp(
- bool is_adjusted_to_utc, LogicalAnnotation::TimeUnit::unit time_unit);
- static std::shared_ptr<const LogicalAnnotation> Interval();
- static std::shared_ptr<const LogicalAnnotation> Int(int bit_width, bool is_signed);
- static std::shared_ptr<const LogicalAnnotation> Null();
- static std::shared_ptr<const LogicalAnnotation> JSON();
- static std::shared_ptr<const LogicalAnnotation> BSON();
- static std::shared_ptr<const LogicalAnnotation> UUID();
- static std::shared_ptr<const LogicalAnnotation> None();
- static std::shared_ptr<const LogicalAnnotation> Unknown();
-
- /// \brief Return true if this annotation is consistent with the given underlying
+ /// \brief Return the explicitly requested logical type.
+ static std::shared_ptr<const LogicalType> String();
+ static std::shared_ptr<const LogicalType> Map();
+ static std::shared_ptr<const LogicalType> List();
+ static std::shared_ptr<const LogicalType> Enum();
+ static std::shared_ptr<const LogicalType> Decimal(int32_t precision, int32_t scale = 0);
+ static std::shared_ptr<const LogicalType> Date();
+ static std::shared_ptr<const LogicalType> Time(bool is_adjusted_to_utc,
+ LogicalType::TimeUnit::unit time_unit);
+ static std::shared_ptr<const LogicalType> Timestamp(
+ bool is_adjusted_to_utc, LogicalType::TimeUnit::unit time_unit);
+ static std::shared_ptr<const LogicalType> Interval();
+ static std::shared_ptr<const LogicalType> Int(int bit_width, bool is_signed);
+ static std::shared_ptr<const LogicalType> Null();
+ static std::shared_ptr<const LogicalType> JSON();
+ static std::shared_ptr<const LogicalType> BSON();
+ static std::shared_ptr<const LogicalType> UUID();
+ static std::shared_ptr<const LogicalType> None();
+ static std::shared_ptr<const LogicalType> Unknown();
+
+ /// \brief Return true if this logical type is consistent with the given underlying
/// physical type.
bool is_applicable(parquet::Type::type primitive_type,
int32_t primitive_length = -1) const;
- /// \brief Return true if this annotation is equivalent to the given legacy converted
+ /// \brief Return true if this logical type is equivalent to the given legacy converted
/// type (and decimal metadata if applicable).
- bool is_compatible(parquet::LogicalType::type converted_type,
+ bool is_compatible(parquet::ConvertedType::type converted_type,
parquet::schema::DecimalMetadata converted_decimal_metadata = {
false, -1, -1}) const;
/// \brief If possible, return the legacy converted type (and decimal metadata if
- /// applicable) equivalent to this annotation.
- parquet::LogicalType::type ToConvertedType(
+ /// applicable) equivalent to this logical type.
+ parquet::ConvertedType::type ToConvertedType(
parquet::schema::DecimalMetadata* out_decimal_metadata) const;
- /// \brief Return a printable representation of this annotation.
+ /// \brief Return a printable representation of this logical type.
std::string ToString() const;
- /// \brief Return a JSON representation of this annotation.
+ /// \brief Return a JSON representation of this logical type.
std::string ToJSON() const;
- /// \brief Return a serializable Thrift object for this annotation.
+ /// \brief Return a serializable Thrift object for this logical type.
parquet::format::LogicalType ToThrift() const;
- /// \brief Return true if the given annotation is equivalent to this annotation.
- bool Equals(const LogicalAnnotation& other) const;
+ /// \brief Return true if the given logical type is equivalent to this logical type.
+ bool Equals(const LogicalType& other) const;
- /// \brief Return the enumerated type of this annotation.
- LogicalAnnotation::Type::type type() const;
+ /// \brief Return the enumerated type of this logical type.
+ LogicalType::Type::type type() const;
- /// \brief Return the appropriate sort order for this annotation.
+ /// \brief Return the appropriate sort order for this logical type.
SortOrder::type sort_order() const;
// Type checks ...
@@ -243,182 +243,182 @@ class PARQUET_EXPORT LogicalAnnotation {
bool is_BSON() const;
bool is_UUID() const;
bool is_none() const;
- /// \brief Return true if this annotation is of a known type.
+ /// \brief Return true if this logical type is of a known type.
bool is_valid() const;
bool is_invalid() const;
- /// \brief Return true if this annotation is suitable for a schema GroupNode.
+ /// \brief Return true if this logical type is suitable for a schema GroupNode.
bool is_nested() const;
bool is_nonnested() const;
- /// \brief Return true if this annotation is included in the Thrift output for its node.
+ /// \brief Return true if this logical type is included in the Thrift output for its
+ /// node.
bool is_serialized() const;
- LogicalAnnotation(const LogicalAnnotation&) = delete;
- LogicalAnnotation& operator=(const LogicalAnnotation&) = delete;
- virtual ~LogicalAnnotation() noexcept;
+ LogicalType(const LogicalType&) = delete;
+ LogicalType& operator=(const LogicalType&) = delete;
+ virtual ~LogicalType() noexcept;
protected:
- LogicalAnnotation();
+ LogicalType();
class Impl;
std::unique_ptr<const Impl> impl_;
};
/// \brief Allowed for physical type BYTE_ARRAY, must be encoded as UTF-8.
-class PARQUET_EXPORT StringAnnotation : public LogicalAnnotation {
+class PARQUET_EXPORT StringLogicalType : public LogicalType {
public:
- static std::shared_ptr<const LogicalAnnotation> Make();
+ static std::shared_ptr<const LogicalType> Make();
private:
- StringAnnotation() = default;
+ StringLogicalType() = default;
};
/// \brief Allowed for group nodes only.
-class PARQUET_EXPORT MapAnnotation : public LogicalAnnotation {
+class PARQUET_EXPORT MapLogicalType : public LogicalType {
public:
- static std::shared_ptr<const LogicalAnnotation> Make();
+ static std::shared_ptr<const LogicalType> Make();
private:
- MapAnnotation() = default;
+ MapLogicalType() = default;
};
/// \brief Allowed for group nodes only.
-class PARQUET_EXPORT ListAnnotation : public LogicalAnnotation {
+class PARQUET_EXPORT ListLogicalType : public LogicalType {
public:
- static std::shared_ptr<const LogicalAnnotation> Make();
+ static std::shared_ptr<const LogicalType> Make();
private:
- ListAnnotation() = default;
+ ListLogicalType() = default;
};
/// \brief Allowed for physical type BYTE_ARRAY, must be encoded as UTF-8.
-class PARQUET_EXPORT EnumAnnotation : public LogicalAnnotation {
+class PARQUET_EXPORT EnumLogicalType : public LogicalType {
public:
- static std::shared_ptr<const LogicalAnnotation> Make();
+ static std::shared_ptr<const LogicalType> Make();
private:
- EnumAnnotation() = default;
+ EnumLogicalType() = default;
};
/// \brief Allowed for physical type INT32, INT64, FIXED_LEN_BYTE_ARRAY, or BYTE_ARRAY,
/// depending on the precision.
-class PARQUET_EXPORT DecimalAnnotation : public LogicalAnnotation {
+class PARQUET_EXPORT DecimalLogicalType : public LogicalType {
public:
- static std::shared_ptr<const LogicalAnnotation> Make(int32_t precision,
- int32_t scale = 0);
+ static std::shared_ptr<const LogicalType> Make(int32_t precision, int32_t scale = 0);
int32_t precision() const;
int32_t scale() const;
private:
- DecimalAnnotation() = default;
+ DecimalLogicalType() = default;
};
/// \brief Allowed for physical type INT32.
-class PARQUET_EXPORT DateAnnotation : public LogicalAnnotation {
+class PARQUET_EXPORT DateLogicalType : public LogicalType {
public:
- static std::shared_ptr<const LogicalAnnotation> Make();
+ static std::shared_ptr<const LogicalType> Make();
private:
- DateAnnotation() = default;
+ DateLogicalType() = default;
};
/// \brief Allowed for physical type INT32 (for MILLIS) or INT64 (for MICROS and NANOS).
-class PARQUET_EXPORT TimeAnnotation : public LogicalAnnotation {
+class PARQUET_EXPORT TimeLogicalType : public LogicalType {
public:
- static std::shared_ptr<const LogicalAnnotation> Make(
- bool is_adjusted_to_utc, LogicalAnnotation::TimeUnit::unit time_unit);
+ static std::shared_ptr<const LogicalType> Make(bool is_adjusted_to_utc,
+ LogicalType::TimeUnit::unit time_unit);
bool is_adjusted_to_utc() const;
- LogicalAnnotation::TimeUnit::unit time_unit() const;
+ LogicalType::TimeUnit::unit time_unit() const;
private:
- TimeAnnotation() = default;
+ TimeLogicalType() = default;
};
/// \brief Allowed for physical type INT64.
-class PARQUET_EXPORT TimestampAnnotation : public LogicalAnnotation {
+class PARQUET_EXPORT TimestampLogicalType : public LogicalType {
public:
- static std::shared_ptr<const LogicalAnnotation> Make(
- bool is_adjusted_to_utc, LogicalAnnotation::TimeUnit::unit time_unit);
+ static std::shared_ptr<const LogicalType> Make(bool is_adjusted_to_utc,
+ LogicalType::TimeUnit::unit time_unit);
bool is_adjusted_to_utc() const;
- LogicalAnnotation::TimeUnit::unit time_unit() const;
+ LogicalType::TimeUnit::unit time_unit() const;
private:
- TimestampAnnotation() = default;
+ TimestampLogicalType() = default;
};
/// \brief Allowed for physical type FIXED_LEN_BYTE_ARRAY with length 12
-class PARQUET_EXPORT IntervalAnnotation : public LogicalAnnotation {
+class PARQUET_EXPORT IntervalLogicalType : public LogicalType {
public:
- static std::shared_ptr<const LogicalAnnotation> Make();
+ static std::shared_ptr<const LogicalType> Make();
private:
- IntervalAnnotation() = default;
+ IntervalLogicalType() = default;
};
/// \brief Allowed for physical type INT32 (for bit widths 8, 16, and 32) and INT64
/// (for bit width 64).
-class PARQUET_EXPORT IntAnnotation : public LogicalAnnotation {
+class PARQUET_EXPORT IntLogicalType : public LogicalType {
public:
- static std::shared_ptr<const LogicalAnnotation> Make(int bit_width, bool is_signed);
+ static std::shared_ptr<const LogicalType> Make(int bit_width, bool is_signed);
int bit_width() const;
bool is_signed() const;
private:
- IntAnnotation() = default;
+ IntLogicalType() = default;
};
/// \brief Allowed for any physical type.
-class PARQUET_EXPORT NullAnnotation : public LogicalAnnotation {
+class PARQUET_EXPORT NullLogicalType : public LogicalType {
public:
- static std::shared_ptr<const LogicalAnnotation> Make();
+ static std::shared_ptr<const LogicalType> Make();
private:
- NullAnnotation() = default;
+ NullLogicalType() = default;
};
/// \brief Allowed for physical type BYTE_ARRAY.
-class PARQUET_EXPORT JSONAnnotation : public LogicalAnnotation {
+class PARQUET_EXPORT JSONLogicalType : public LogicalType {
public:
- static std::shared_ptr<const LogicalAnnotation> Make();
+ static std::shared_ptr<const LogicalType> Make();
private:
- JSONAnnotation() = default;
+ JSONLogicalType() = default;
};
/// \brief Allowed for physical type BYTE_ARRAY.
-class PARQUET_EXPORT BSONAnnotation : public LogicalAnnotation {
+class PARQUET_EXPORT BSONLogicalType : public LogicalType {
public:
- static std::shared_ptr<const LogicalAnnotation> Make();
+ static std::shared_ptr<const LogicalType> Make();
private:
- BSONAnnotation() = default;
+ BSONLogicalType() = default;
};
/// \brief Allowed for physical type FIXED_LEN_BYTE_ARRAY with length 16,
/// must encode raw UUID bytes.
-class PARQUET_EXPORT UUIDAnnotation : public LogicalAnnotation {
+class PARQUET_EXPORT UUIDLogicalType : public LogicalType {
public:
- static std::shared_ptr<const LogicalAnnotation> Make();
+ static std::shared_ptr<const LogicalType> Make();
private:
- UUIDAnnotation() = default;
+ UUIDLogicalType() = default;
};
/// \brief Allowed for any physical type.
-class PARQUET_EXPORT NoAnnotation : public LogicalAnnotation {
+class PARQUET_EXPORT NoLogicalType : public LogicalType {
public:
- static std::shared_ptr<const LogicalAnnotation> Make();
+ static std::shared_ptr<const LogicalType> Make();
private:
- NoAnnotation() = default;
+ NoLogicalType() = default;
};
/// \brief Allowed for any type.
-class PARQUET_EXPORT UnknownAnnotation : public LogicalAnnotation {
+class PARQUET_EXPORT UnknownLogicalType : public LogicalType {
public:
- static std::shared_ptr<const LogicalAnnotation> Make();
+ static std::shared_ptr<const LogicalType> Make();
private:
- UnknownAnnotation() = default;
+ UnknownLogicalType() = default;
};
// Data encodings. Mirrors parquet::Encoding
@@ -636,7 +636,7 @@ PARQUET_EXPORT std::string CompressionToString(Compression::type t);
PARQUET_EXPORT std::string EncodingToString(Encoding::type t);
-PARQUET_EXPORT std::string LogicalTypeToString(LogicalType::type t);
+PARQUET_EXPORT std::string ConvertedTypeToString(ConvertedType::type t);
PARQUET_EXPORT std::string TypeToString(Type::type t);
@@ -651,11 +651,11 @@ PARQUET_EXPORT int GetTypeByteSize(Type::type t);
PARQUET_EXPORT SortOrder::type DefaultSortOrder(Type::type primitive);
-PARQUET_EXPORT SortOrder::type GetSortOrder(LogicalType::type converted,
+PARQUET_EXPORT SortOrder::type GetSortOrder(ConvertedType::type converted,
Type::type primitive);
PARQUET_EXPORT SortOrder::type GetSortOrder(
- const std::shared_ptr<const LogicalAnnotation>& annotation, Type::type primitive);
+ const std::shared_ptr<const LogicalType>& logical_type, Type::type primitive);
} // namespace parquet
diff --git a/python/pyarrow/_parquet.pxd b/python/pyarrow/_parquet.pxd
index 53f3297..c34c89e 100644
--- a/python/pyarrow/_parquet.pxd
+++ b/python/pyarrow/_parquet.pxd
@@ -54,32 +54,33 @@ cdef extern from "parquet/api/schema.h" namespace "parquet" nogil:
ParquetType_BYTE_ARRAY" parquet::Type::BYTE_ARRAY"
ParquetType_FIXED_LEN_BYTE_ARRAY" parquet::Type::FIXED_LEN_BYTE_ARRAY"
- enum ParquetLogicalType" parquet::LogicalType::type":
- ParquetLogicalType_NONE" parquet::LogicalType::NONE"
- ParquetLogicalType_UTF8" parquet::LogicalType::UTF8"
- ParquetLogicalType_MAP" parquet::LogicalType::MAP"
- ParquetLogicalType_MAP_KEY_VALUE" parquet::LogicalType::MAP_KEY_VALUE"
- ParquetLogicalType_LIST" parquet::LogicalType::LIST"
- ParquetLogicalType_ENUM" parquet::LogicalType::ENUM"
- ParquetLogicalType_DECIMAL" parquet::LogicalType::DECIMAL"
- ParquetLogicalType_DATE" parquet::LogicalType::DATE"
- ParquetLogicalType_TIME_MILLIS" parquet::LogicalType::TIME_MILLIS"
- ParquetLogicalType_TIME_MICROS" parquet::LogicalType::TIME_MICROS"
- ParquetLogicalType_TIMESTAMP_MILLIS \
- " parquet::LogicalType::TIMESTAMP_MILLIS"
- ParquetLogicalType_TIMESTAMP_MICROS \
- " parquet::LogicalType::TIMESTAMP_MICROS"
- ParquetLogicalType_UINT_8" parquet::LogicalType::UINT_8"
- ParquetLogicalType_UINT_16" parquet::LogicalType::UINT_16"
- ParquetLogicalType_UINT_32" parquet::LogicalType::UINT_32"
- ParquetLogicalType_UINT_64" parquet::LogicalType::UINT_64"
- ParquetLogicalType_INT_8" parquet::LogicalType::INT_8"
- ParquetLogicalType_INT_16" parquet::LogicalType::INT_16"
- ParquetLogicalType_INT_32" parquet::LogicalType::INT_32"
- ParquetLogicalType_INT_64" parquet::LogicalType::INT_64"
- ParquetLogicalType_JSON" parquet::LogicalType::JSON"
- ParquetLogicalType_BSON" parquet::LogicalType::BSON"
- ParquetLogicalType_INTERVAL" parquet::LogicalType::INTERVAL"
+ enum ParquetConvertedType" parquet::ConvertedType::type":
+ ParquetConvertedType_NONE" parquet::ConvertedType::NONE"
+ ParquetConvertedType_UTF8" parquet::ConvertedType::UTF8"
+ ParquetConvertedType_MAP" parquet::ConvertedType::MAP"
+ ParquetConvertedType_MAP_KEY_VALUE \
+ " parquet::ConvertedType::MAP_KEY_VALUE"
+ ParquetConvertedType_LIST" parquet::ConvertedType::LIST"
+ ParquetConvertedType_ENUM" parquet::ConvertedType::ENUM"
+ ParquetConvertedType_DECIMAL" parquet::ConvertedType::DECIMAL"
+ ParquetConvertedType_DATE" parquet::ConvertedType::DATE"
+ ParquetConvertedType_TIME_MILLIS" parquet::ConvertedType::TIME_MILLIS"
+ ParquetConvertedType_TIME_MICROS" parquet::ConvertedType::TIME_MICROS"
+ ParquetConvertedType_TIMESTAMP_MILLIS \
+ " parquet::ConvertedType::TIMESTAMP_MILLIS"
+ ParquetConvertedType_TIMESTAMP_MICROS \
+ " parquet::ConvertedType::TIMESTAMP_MICROS"
+ ParquetConvertedType_UINT_8" parquet::ConvertedType::UINT_8"
+ ParquetConvertedType_UINT_16" parquet::ConvertedType::UINT_16"
+ ParquetConvertedType_UINT_32" parquet::ConvertedType::UINT_32"
+ ParquetConvertedType_UINT_64" parquet::ConvertedType::UINT_64"
+ ParquetConvertedType_INT_8" parquet::ConvertedType::INT_8"
+ ParquetConvertedType_INT_16" parquet::ConvertedType::INT_16"
+ ParquetConvertedType_INT_32" parquet::ConvertedType::INT_32"
+ ParquetConvertedType_INT_64" parquet::ConvertedType::INT_64"
+ ParquetConvertedType_JSON" parquet::ConvertedType::JSON"
+ ParquetConvertedType_BSON" parquet::ConvertedType::BSON"
+ ParquetConvertedType_INTERVAL" parquet::ConvertedType::INTERVAL"
enum ParquetRepetition" parquet::Repetition::type":
ParquetRepetition_REQUIRED" parquet::REPETITION::REQUIRED"
@@ -124,7 +125,7 @@ cdef extern from "parquet/api/schema.h" namespace "parquet" nogil:
int16_t max_repetition_level()
ParquetType physical_type()
- ParquetLogicalType logical_type()
+ ParquetConvertedType converted_type()
const c_string& name()
int type_length()
int type_precision()
diff --git a/python/pyarrow/_parquet.pyx b/python/pyarrow/_parquet.pyx
index 262d485..6cfc104 100644
--- a/python/pyarrow/_parquet.pyx
+++ b/python/pyarrow/_parquet.pyx
@@ -581,10 +581,10 @@ cdef class ParquetSchema:
elements = []
for i in range(self.schema.num_columns()):
col = self.column(i)
- logical_type = col.logical_type
+ converted_type = col.converted_type
formatted = '{0}: {1}'.format(col.path, col.physical_type)
- if logical_type != 'NONE':
- formatted += ' {0}'.format(logical_type)
+ if converted_type != 'NONE':
+ formatted += ' {0}'.format(converted_type)
elements.append(formatted)
return """{0}
@@ -668,13 +668,13 @@ cdef class ColumnSchema:
def __repr__(self):
physical_type = self.physical_type
- logical_type = self.logical_type
- if logical_type == 'DECIMAL':
- logical_type = 'DECIMAL({0}, {1})'.format(self.precision,
- self.scale)
+ converted_type = self.converted_type
+ if converted_type == 'DECIMAL':
+ converted_type = 'DECIMAL({0}, {1})'.format(self.precision,
+ self.scale)
elif physical_type == 'FIXED_LEN_BYTE_ARRAY':
- logical_type = ('FIXED_LEN_BYTE_ARRAY(length={0})'
- .format(self.length))
+ converted_type = ('FIXED_LEN_BYTE_ARRAY(length={0})'
+ .format(self.length))
return """<ParquetColumnSchema>
name: {0}
@@ -682,9 +682,10 @@ cdef class ColumnSchema:
max_definition_level: {2}
max_repetition_level: {3}
physical_type: {4}
- logical_type: {5}""".format(self.name, self.path, self.max_definition_level,
- self.max_repetition_level, physical_type,
- logical_type)
+ converted_type: {5}""".format(self.name, self.path,
+ self.max_definition_level,
+ self.max_repetition_level, physical_type,
+ converted_type)
@property
def name(self):
@@ -707,8 +708,13 @@ cdef class ColumnSchema:
return physical_type_name_from_enum(self.descr.physical_type())
@property
+ def converted_type(self):
+ return converted_type_name_from_enum(self.descr.converted_type())
+
+ @property
def logical_type(self):
- return logical_type_name_from_enum(self.descr.logical_type())
+ # TODO: wrap new LogicalType objects
+ return self.converted_type
# FIXED_LEN_BYTE_ARRAY attribute
@property
@@ -738,31 +744,31 @@ cdef physical_type_name_from_enum(ParquetType type_):
}.get(type_, 'UNKNOWN')
-cdef logical_type_name_from_enum(ParquetLogicalType type_):
+cdef converted_type_name_from_enum(ParquetConvertedType type_):
return {
- ParquetLogicalType_NONE: 'NONE',
- ParquetLogicalType_UTF8: 'UTF8',
- ParquetLogicalType_MAP: 'MAP',
- ParquetLogicalType_MAP_KEY_VALUE: 'MAP_KEY_VALUE',
- ParquetLogicalType_LIST: 'LIST',
- ParquetLogicalType_ENUM: 'ENUM',
- ParquetLogicalType_DECIMAL: 'DECIMAL',
- ParquetLogicalType_DATE: 'DATE',
- ParquetLogicalType_TIME_MILLIS: 'TIME_MILLIS',
- ParquetLogicalType_TIME_MICROS: 'TIME_MICROS',
- ParquetLogicalType_TIMESTAMP_MILLIS: 'TIMESTAMP_MILLIS',
- ParquetLogicalType_TIMESTAMP_MICROS: 'TIMESTAMP_MICROS',
- ParquetLogicalType_UINT_8: 'UINT_8',
- ParquetLogicalType_UINT_16: 'UINT_16',
- ParquetLogicalType_UINT_32: 'UINT_32',
- ParquetLogicalType_UINT_64: 'UINT_64',
- ParquetLogicalType_INT_8: 'INT_8',
- ParquetLogicalType_INT_16: 'INT_16',
- ParquetLogicalType_INT_32: 'INT_32',
- ParquetLogicalType_INT_64: 'UINT_64',
- ParquetLogicalType_JSON: 'JSON',
- ParquetLogicalType_BSON: 'BSON',
- ParquetLogicalType_INTERVAL: 'INTERVAL',
+ ParquetConvertedType_NONE: 'NONE',
+ ParquetConvertedType_UTF8: 'UTF8',
+ ParquetConvertedType_MAP: 'MAP',
+ ParquetConvertedType_MAP_KEY_VALUE: 'MAP_KEY_VALUE',
+ ParquetConvertedType_LIST: 'LIST',
+ ParquetConvertedType_ENUM: 'ENUM',
+ ParquetConvertedType_DECIMAL: 'DECIMAL',
+ ParquetConvertedType_DATE: 'DATE',
+ ParquetConvertedType_TIME_MILLIS: 'TIME_MILLIS',
+ ParquetConvertedType_TIME_MICROS: 'TIME_MICROS',
+ ParquetConvertedType_TIMESTAMP_MILLIS: 'TIMESTAMP_MILLIS',
+ ParquetConvertedType_TIMESTAMP_MICROS: 'TIMESTAMP_MICROS',
+ ParquetConvertedType_UINT_8: 'UINT_8',
+ ParquetConvertedType_UINT_16: 'UINT_16',
+ ParquetConvertedType_UINT_32: 'UINT_32',
+ ParquetConvertedType_UINT_64: 'UINT_64',
+ ParquetConvertedType_INT_8: 'INT_8',
+ ParquetConvertedType_INT_16: 'INT_16',
+ ParquetConvertedType_INT_32: 'INT_32',
+ ParquetConvertedType_INT_64: 'UINT_64',
+ ParquetConvertedType_JSON: 'JSON',
+ ParquetConvertedType_BSON: 'BSON',
+ ParquetConvertedType_INTERVAL: 'INTERVAL',
}.get(type_, 'UNKNOWN')
diff --git a/python/pyarrow/tests/test_parquet.py b/python/pyarrow/tests/test_parquet.py
index 9db04f7..b622e40 100644
--- a/python/pyarrow/tests/test_parquet.py
+++ b/python/pyarrow/tests/test_parquet.py
@@ -638,7 +638,7 @@ def test_parquet_metadata_api():
assert col.max_repetition_level == 0
assert col.physical_type == 'BOOLEAN'
- assert col.logical_type == 'NONE'
+ assert col.converted_type == 'NONE'
with pytest.raises(IndexError):
schema[ncols + 1] # +1 for index