You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ks...@apache.org on 2019/07/16 18:37:13 UTC
[arrow] 01/02: ARROW-5889: [C++][Parquet] Add property to indicate
origin from converted type to TimestampLogicalType
This is an automated email from the ASF dual-hosted git repository.
kszucs pushed a commit to branch maint-0.14.x
in repository https://gitbox.apache.org/repos/asf/arrow.git
commit 2752c942db35f3ae5d12c9ae6b77fc8d241c8669
Author: TP Boudreau <tp...@gmail.com>
AuthorDate: Sat Jul 13 12:55:33 2019 -0500
ARROW-5889: [C++][Parquet] Add property to indicate origin from converted type to TimestampLogicalType
This patch:
- adds a new boolean member isFromConvertedType() to the TimestampLogicalType class that is set to "true" if the LogicalType was created from a converted type of TIMESTAMP_MILLIS or TIMESTAMP_MICROS
- prevents writing the TimestampLogicalType in the Parquet schema if this new property is true
- changes the Arrow reader to ignore the isAdjustedToUTC() property of the TimestampLogicalType if the type annotation came from a converted type
Author: TP Boudreau <tp...@gmail.com>
Closes #4856 from tpboudreau/ARROW-5889 and squashes the following commits:
458f06382 <TP Boudreau> Add property showing converted type origin to TimestampLogicalType
---
cpp/src/arrow/ipc/metadata-internal.cc | 16 +++----
cpp/src/parquet/arrow/arrow-schema-test.cc | 9 ++--
cpp/src/parquet/arrow/schema.cc | 21 ++++-----
cpp/src/parquet/schema-test.cc | 72 ++++++++++++++++++++++++------
cpp/src/parquet/types.cc | 46 +++++++++++++------
cpp/src/parquet/types.h | 9 +++-
6 files changed, 124 insertions(+), 49 deletions(-)
diff --git a/cpp/src/arrow/ipc/metadata-internal.cc b/cpp/src/arrow/ipc/metadata-internal.cc
index f9b73df..e505dde 100644
--- a/cpp/src/arrow/ipc/metadata-internal.cc
+++ b/cpp/src/arrow/ipc/metadata-internal.cc
@@ -360,10 +360,9 @@ static Status TypeFromFlatbuffer(const flatbuf::Field* field,
auto type_data = field->type();
if (type_data == nullptr) {
return Status::IOError(
- "Type-pointer in custom metadata of flatbuffer-encoded Field is null.");
+ "Type-pointer in custom metadata of flatbuffer-encoded Field is null.");
}
- RETURN_NOT_OK(
- ConcreteTypeFromFlatbuffer(field->type_type(), type_data, children, out));
+ RETURN_NOT_OK(ConcreteTypeFromFlatbuffer(field->type_type(), type_data, children, out));
// Look for extension metadata in custom_metadata field
// TODO(wesm): Should this be part of the Field Flatbuffers table?
@@ -766,8 +765,8 @@ Status FieldFromFlatbuffer(const flatbuf::Field* field, DictionaryMemo* dictiona
auto int_data = encoding->indexType();
if (int_data == nullptr) {
return Status::IOError(
- "indexType-pointer in custom metadata of flatbuffer-encoded DictionaryEncoding "
- "is null.");
+ "indexType-pointer in custom metadata of flatbuffer-encoded DictionaryEncoding "
+ "is null.");
}
RETURN_NOT_OK(IntFromFlatbuffer(int_data, &index_type));
type = ::arrow::dictionary(index_type, type, encoding->isOrdered());
@@ -1155,7 +1154,7 @@ Status GetTensorMetadata(const Buffer& metadata, std::shared_ptr<DataType>* type
auto type_data = tensor->type();
if (type_data == nullptr) {
return Status::IOError(
- "Type-pointer in custom metadata of flatbuffer-encoded Tensor is null.");
+ "Type-pointer in custom metadata of flatbuffer-encoded Tensor is null.");
}
return ConcreteTypeFromFlatbuffer(tensor->type_type(), type_data, {}, type);
}
@@ -1204,10 +1203,9 @@ Status GetSparseTensorMetadata(const Buffer& metadata, std::shared_ptr<DataType>
auto type_data = sparse_tensor->type();
if (type_data == nullptr) {
return Status::IOError(
- "Type-pointer in custom metadata of flatbuffer-encoded SparseTensor is null.");
+ "Type-pointer in custom metadata of flatbuffer-encoded SparseTensor is null.");
}
- return ConcreteTypeFromFlatbuffer(sparse_tensor->type_type(), type_data, {},
- type);
+ return ConcreteTypeFromFlatbuffer(sparse_tensor->type_type(), type_data, {}, type);
}
// ----------------------------------------------------------------------
diff --git a/cpp/src/parquet/arrow/arrow-schema-test.cc b/cpp/src/parquet/arrow/arrow-schema-test.cc
index 6972620..dc0a02a 100644
--- a/cpp/src/parquet/arrow/arrow-schema-test.cc
+++ b/cpp/src/parquet/arrow/arrow-schema-test.cc
@@ -116,14 +116,14 @@ TEST_F(TestConvertParquetSchema, ParquetFlatPrimitives) {
parquet_fields.push_back(PrimitiveNode::Make("timestamp", Repetition::REQUIRED,
ParquetType::INT64,
ConvertedType::TIMESTAMP_MILLIS));
- arrow_fields.push_back(std::make_shared<Field>(
- "timestamp", ::arrow::timestamp(TimeUnit::MILLI, "UTC"), false));
+ arrow_fields.push_back(
+ std::make_shared<Field>("timestamp", ::arrow::timestamp(TimeUnit::MILLI), false));
parquet_fields.push_back(PrimitiveNode::Make("timestamp[us]", Repetition::REQUIRED,
ParquetType::INT64,
ConvertedType::TIMESTAMP_MICROS));
arrow_fields.push_back(std::make_shared<Field>(
- "timestamp[us]", ::arrow::timestamp(TimeUnit::MICRO, "UTC"), false));
+ "timestamp[us]", ::arrow::timestamp(TimeUnit::MICRO), false));
parquet_fields.push_back(PrimitiveNode::Make("date", Repetition::REQUIRED,
ParquetType::INT32, ConvertedType::DATE));
@@ -856,15 +856,18 @@ TEST_F(TestConvertArrowSchema, ArrowFields) {
LogicalType::Time(true, LogicalType::TimeUnit::NANOS), ParquetType::INT64, -1},
{"timestamp(millisecond)", ::arrow::timestamp(::arrow::TimeUnit::MILLI),
LogicalType::Timestamp(false, LogicalType::TimeUnit::MILLIS,
+ /*is_from_converted_type=*/false,
/*force_set_converted_type=*/true),
ParquetType::INT64, -1},
{"timestamp(microsecond)", ::arrow::timestamp(::arrow::TimeUnit::MICRO),
LogicalType::Timestamp(false, LogicalType::TimeUnit::MICROS,
+ /*is_from_converted_type=*/false,
/*force_set_converted_type=*/true),
ParquetType::INT64, -1},
// Parquet v1, values converted to microseconds
{"timestamp(nanosecond)", ::arrow::timestamp(::arrow::TimeUnit::NANO),
LogicalType::Timestamp(false, LogicalType::TimeUnit::MICROS,
+ /*is_from_converted_type=*/false,
/*force_set_converted_type=*/true),
ParquetType::INT64, -1},
{"timestamp(millisecond, UTC)", ::arrow::timestamp(::arrow::TimeUnit::MILLI, "UTC"),
diff --git a/cpp/src/parquet/arrow/schema.cc b/cpp/src/parquet/arrow/schema.cc
index 12d4194..f77bf38 100644
--- a/cpp/src/parquet/arrow/schema.cc
+++ b/cpp/src/parquet/arrow/schema.cc
@@ -134,23 +134,22 @@ static Status MakeArrowTime64(const LogicalType& logical_type,
static Status MakeArrowTimestamp(const LogicalType& logical_type,
std::shared_ptr<ArrowType>* out) {
- static const char* utc = "UTC";
const auto& timestamp = checked_cast<const TimestampLogicalType&>(logical_type);
+ const bool utc_normalized =
+ timestamp.is_from_converted_type() ? false : timestamp.is_adjusted_to_utc();
+ static const char* utc_timezone = "UTC";
switch (timestamp.time_unit()) {
case LogicalType::TimeUnit::MILLIS:
- *out = (timestamp.is_adjusted_to_utc()
- ? ::arrow::timestamp(::arrow::TimeUnit::MILLI, utc)
- : ::arrow::timestamp(::arrow::TimeUnit::MILLI));
+ *out = (utc_normalized ? ::arrow::timestamp(::arrow::TimeUnit::MILLI, utc_timezone)
+ : ::arrow::timestamp(::arrow::TimeUnit::MILLI));
break;
case LogicalType::TimeUnit::MICROS:
- *out = (timestamp.is_adjusted_to_utc()
- ? ::arrow::timestamp(::arrow::TimeUnit::MICRO, utc)
- : ::arrow::timestamp(::arrow::TimeUnit::MICRO));
+ *out = (utc_normalized ? ::arrow::timestamp(::arrow::TimeUnit::MICRO, utc_timezone)
+ : ::arrow::timestamp(::arrow::TimeUnit::MICRO));
break;
case LogicalType::TimeUnit::NANOS:
- *out = (timestamp.is_adjusted_to_utc()
- ? ::arrow::timestamp(::arrow::TimeUnit::NANO, utc)
- : ::arrow::timestamp(::arrow::TimeUnit::NANO));
+ *out = (utc_normalized ? ::arrow::timestamp(::arrow::TimeUnit::NANO, utc_timezone)
+ : ::arrow::timestamp(::arrow::TimeUnit::NANO));
break;
default:
return Status::TypeError("Unrecognized time unit in timestamp logical_type: ",
@@ -530,9 +529,11 @@ static std::shared_ptr<const LogicalType> TimestampLogicalTypeFromArrowTimestamp
switch (time_unit) {
case ::arrow::TimeUnit::MILLI:
return LogicalType::Timestamp(utc, LogicalType::TimeUnit::MILLIS,
+ /*is_from_converted_type=*/false,
/*force_set_converted_type=*/true);
case ::arrow::TimeUnit::MICRO:
return LogicalType::Timestamp(utc, LogicalType::TimeUnit::MICROS,
+ /*is_from_converted_type=*/false,
/*force_set_converted_type=*/true);
case ::arrow::TimeUnit::NANO:
return LogicalType::Timestamp(utc, LogicalType::TimeUnit::NANOS);
diff --git a/cpp/src/parquet/schema-test.cc b/cpp/src/parquet/schema-test.cc
index badd997..023a1b0 100644
--- a/cpp/src/parquet/schema-test.cc
+++ b/cpp/src/parquet/schema-test.cc
@@ -1398,28 +1398,34 @@ TEST(TestLogicalTypeOperation, LogicalTypeRepresentation) {
R"({"Type": "Time", "isAdjustedToUTC": false, "timeUnit": "nanoseconds"})"},
{LogicalType::Timestamp(true, LogicalType::TimeUnit::MILLIS),
"Timestamp(isAdjustedToUTC=true, timeUnit=milliseconds, "
- "force_set_converted_type=false)",
- R"({"Type": "Timestamp", "isAdjustedToUTC": true, "timeUnit": "milliseconds", "force_set_converted_type": false})"},
+ "is_from_converted_type=false, force_set_converted_type=false)",
+ R"({"Type": "Timestamp", "isAdjustedToUTC": true, "timeUnit": "milliseconds", )"
+ R"("is_from_converted_type": false, "force_set_converted_type": false})"},
{LogicalType::Timestamp(true, LogicalType::TimeUnit::MICROS),
"Timestamp(isAdjustedToUTC=true, timeUnit=microseconds, "
- "force_set_converted_type=false)",
- R"({"Type": "Timestamp", "isAdjustedToUTC": true, "timeUnit": "microseconds", "force_set_converted_type": false})"},
+ "is_from_converted_type=false, force_set_converted_type=false)",
+ R"({"Type": "Timestamp", "isAdjustedToUTC": true, "timeUnit": "microseconds", )"
+ R"("is_from_converted_type": false, "force_set_converted_type": false})"},
{LogicalType::Timestamp(true, LogicalType::TimeUnit::NANOS),
"Timestamp(isAdjustedToUTC=true, timeUnit=nanoseconds, "
- "force_set_converted_type=false)",
- R"({"Type": "Timestamp", "isAdjustedToUTC": true, "timeUnit": "nanoseconds", "force_set_converted_type": false})"},
- {LogicalType::Timestamp(false, LogicalType::TimeUnit::MILLIS, true),
+ "is_from_converted_type=false, force_set_converted_type=false)",
+ R"({"Type": "Timestamp", "isAdjustedToUTC": true, "timeUnit": "nanoseconds", )"
+ R"("is_from_converted_type": false, "force_set_converted_type": false})"},
+ {LogicalType::Timestamp(false, LogicalType::TimeUnit::MILLIS, true, true),
"Timestamp(isAdjustedToUTC=false, timeUnit=milliseconds, "
- "force_set_converted_type=true)",
- R"({"Type": "Timestamp", "isAdjustedToUTC": false, "timeUnit": "milliseconds", "force_set_converted_type": true})"},
+ "is_from_converted_type=true, force_set_converted_type=true)",
+ R"({"Type": "Timestamp", "isAdjustedToUTC": false, "timeUnit": "milliseconds", )"
+ R"("is_from_converted_type": true, "force_set_converted_type": true})"},
{LogicalType::Timestamp(false, LogicalType::TimeUnit::MICROS),
"Timestamp(isAdjustedToUTC=false, timeUnit=microseconds, "
- "force_set_converted_type=false)",
- R"({"Type": "Timestamp", "isAdjustedToUTC": false, "timeUnit": "microseconds", "force_set_converted_type": false})"},
+ "is_from_converted_type=false, force_set_converted_type=false)",
+ R"({"Type": "Timestamp", "isAdjustedToUTC": false, "timeUnit": "microseconds", )"
+ R"("is_from_converted_type": false, "force_set_converted_type": false})"},
{LogicalType::Timestamp(false, LogicalType::TimeUnit::NANOS),
"Timestamp(isAdjustedToUTC=false, timeUnit=nanoseconds, "
- "force_set_converted_type=false)",
- R"({"Type": "Timestamp", "isAdjustedToUTC": false, "timeUnit": "nanoseconds", "force_set_converted_type": false})"},
+ "is_from_converted_type=false, force_set_converted_type=false)",
+ R"({"Type": "Timestamp", "isAdjustedToUTC": false, "timeUnit": "nanoseconds", )"
+ R"("is_from_converted_type": false, "force_set_converted_type": false})"},
{LogicalType::Interval(), "Interval", R"({"Type": "Interval"})"},
{LogicalType::Int(8, false), "Int(bitWidth=8, isSigned=false)",
R"({"Type": "Int", "bitWidth": 8, "isSigned": false})"},
@@ -1673,6 +1679,16 @@ struct SchemaElementConstructionArguments {
std::function<bool()> check_logicalType;
};
+struct LegacySchemaElementConstructionArguments {
+ std::string name;
+ Type::type physical_type;
+ int physical_length;
+ bool expect_converted_type;
+ ConvertedType::type converted_type;
+ bool expect_logicalType;
+ std::function<bool()> check_logicalType;
+};
+
class TestSchemaElementConstruction : public ::testing::Test {
public:
TestSchemaElementConstruction* Reconstruct(
@@ -1692,6 +1708,23 @@ class TestSchemaElementConstruction : public ::testing::Test {
return this;
}
+ TestSchemaElementConstruction* LegacyReconstruct(
+ const LegacySchemaElementConstructionArguments& c) {
+ // Make node, create serializable Thrift object from it ...
+ node_ = PrimitiveNode::Make(c.name, Repetition::REQUIRED, c.physical_type,
+ c.converted_type, c.physical_length);
+ element_.reset(new format::SchemaElement);
+ node_->ToParquet(element_.get());
+
+ // ... then set aside some values for later inspection.
+ name_ = c.name;
+ expect_converted_type_ = c.expect_converted_type;
+ converted_type_ = c.converted_type;
+ expect_logicalType_ = c.expect_logicalType;
+ check_logicalType_ = c.check_logicalType;
+ return this;
+ }
+
void Inspect() {
ASSERT_EQ(element_->name, name_);
if (expect_converted_type_) {
@@ -1777,6 +1810,17 @@ TEST_F(TestSchemaElementConstruction, SimpleCases) {
for (const SchemaElementConstructionArguments& c : cases) {
this->Reconstruct(c)->Inspect();
}
+
+ std::vector<LegacySchemaElementConstructionArguments> legacy_cases = {
+ {"timestamp_ms", Type::INT64, -1, true, ConvertedType::TIMESTAMP_MILLIS, false,
+ check_nothing},
+ {"timestamp_us", Type::INT64, -1, true, ConvertedType::TIMESTAMP_MICROS, false,
+ check_nothing},
+ };
+
+ for (const LegacySchemaElementConstructionArguments& c : legacy_cases) {
+ this->LegacyReconstruct(c)->Inspect();
+ }
}
class TestDecimalSchemaElementConstruction : public TestSchemaElementConstruction {
@@ -1920,6 +1964,7 @@ TEST_F(TestTemporalSchemaElementConstruction, TemporalCases) {
Type::INT64, -1, false, ConvertedType::NA, true, check_TIMESTAMP},
{"timestamp_F_ms_force",
LogicalType::Timestamp(false, LogicalType::TimeUnit::MILLIS,
+ /*is_from_converted_type=*/false,
/*force_set_converted_type=*/true),
Type::INT64, -1, true, ConvertedType::TIMESTAMP_MILLIS, true, check_TIMESTAMP},
{"timestamp_T_us", LogicalType::Timestamp(true, LogicalType::TimeUnit::MICROS),
@@ -1928,6 +1973,7 @@ TEST_F(TestTemporalSchemaElementConstruction, TemporalCases) {
Type::INT64, -1, false, ConvertedType::NA, true, check_TIMESTAMP},
{"timestamp_F_us_force",
LogicalType::Timestamp(false, LogicalType::TimeUnit::MILLIS,
+ /*is_from_converted_type=*/false,
/*force_set_converted_type=*/true),
Type::INT64, -1, true, ConvertedType::TIMESTAMP_MILLIS, true, check_TIMESTAMP},
{"timestamp_T_ns", LogicalType::Timestamp(true, LogicalType::TimeUnit::NANOS),
diff --git a/cpp/src/parquet/types.cc b/cpp/src/parquet/types.cc
index 2c75439..f89170d 100644
--- a/cpp/src/parquet/types.cc
+++ b/cpp/src/parquet/types.cc
@@ -380,9 +380,13 @@ std::shared_ptr<const LogicalType> LogicalType::FromConvertedType(
case ConvertedType::TIME_MICROS:
return TimeLogicalType::Make(true, LogicalType::TimeUnit::MICROS);
case ConvertedType::TIMESTAMP_MILLIS:
- return TimestampLogicalType::Make(true, LogicalType::TimeUnit::MILLIS);
+ return TimestampLogicalType::Make(true, LogicalType::TimeUnit::MILLIS,
+ /*is_from_converted_type=*/true,
+ /*force_set_converted_type=*/false);
case ConvertedType::TIMESTAMP_MICROS:
- return TimestampLogicalType::Make(true, LogicalType::TimeUnit::MICROS);
+ return TimestampLogicalType::Make(true, LogicalType::TimeUnit::MICROS,
+ /*is_from_converted_type=*/true,
+ /*force_set_converted_type=*/false);
case ConvertedType::INTERVAL:
return IntervalLogicalType::Make();
case ConvertedType::INT_8:
@@ -497,9 +501,9 @@ std::shared_ptr<const LogicalType> LogicalType::Time(
std::shared_ptr<const LogicalType> LogicalType::Timestamp(
bool is_adjusted_to_utc, LogicalType::TimeUnit::unit time_unit,
- bool force_set_converted_type) {
+ bool is_from_converted_type, bool force_set_converted_type) {
DCHECK(time_unit != LogicalType::TimeUnit::UNKNOWN);
- return TimestampLogicalType::Make(is_adjusted_to_utc, time_unit,
+ return TimestampLogicalType::Make(is_adjusted_to_utc, time_unit, is_from_converted_type,
force_set_converted_type);
}
@@ -554,6 +558,10 @@ class LogicalType::Impl {
virtual std::string ToString() const = 0;
+ virtual bool is_serialized() const {
+ return !(type_ == LogicalType::Type::NONE || type_ == LogicalType::Type::UNKNOWN);
+ }
+
virtual std::string ToJSON() const {
std::stringstream json;
json << R"({"Type": ")" << ToString() << R"("})";
@@ -678,10 +686,7 @@ bool LogicalType::is_nested() const {
(impl_->type() == LogicalType::Type::MAP);
}
bool LogicalType::is_nonnested() const { return !is_nested(); }
-bool LogicalType::is_serialized() const {
- return !((impl_->type() == LogicalType::Type::NONE) ||
- (impl_->type() == LogicalType::Type::UNKNOWN));
-}
+bool LogicalType::is_serialized() const { return impl_->is_serialized(); }
// LogicalTypeImpl intermediate "compatibility" classes
@@ -1194,6 +1199,7 @@ class LogicalType::Impl::Timestamp final : public LogicalType::Impl::Compatible,
public:
friend class TimestampLogicalType;
+ bool is_serialized() const override;
bool is_compatible(ConvertedType::type converted_type,
schema::DecimalMetadata converted_decimal_metadata) const override;
ConvertedType::type ToConvertedType(
@@ -1206,21 +1212,28 @@ class LogicalType::Impl::Timestamp final : public LogicalType::Impl::Compatible,
bool is_adjusted_to_utc() const { return adjusted_; }
LogicalType::TimeUnit::unit time_unit() const { return unit_; }
+ bool is_from_converted_type() const { return is_from_converted_type_; }
bool force_set_converted_type() const { return force_set_converted_type_; }
private:
- Timestamp(bool adjusted, LogicalType::TimeUnit::unit unit,
+ Timestamp(bool adjusted, LogicalType::TimeUnit::unit unit, bool is_from_converted_type,
bool force_set_converted_type)
: LogicalType::Impl(LogicalType::Type::TIMESTAMP, SortOrder::SIGNED),
LogicalType::Impl::SimpleApplicable(parquet::Type::INT64),
adjusted_(adjusted),
unit_(unit),
+ is_from_converted_type_(is_from_converted_type),
force_set_converted_type_(force_set_converted_type) {}
bool adjusted_ = false;
LogicalType::TimeUnit::unit unit_;
+ bool is_from_converted_type_ = false;
bool force_set_converted_type_ = false;
};
+bool LogicalType::Impl::Timestamp::is_serialized() const {
+ return !is_from_converted_type_;
+}
+
bool LogicalType::Impl::Timestamp::is_compatible(
ConvertedType::type converted_type,
schema::DecimalMetadata converted_decimal_metadata) const {
@@ -1263,6 +1276,7 @@ std::string LogicalType::Impl::Timestamp::ToString() const {
std::stringstream type;
type << "Timestamp(isAdjustedToUTC=" << std::boolalpha << adjusted_
<< ", timeUnit=" << time_unit_string(unit_)
+ << ", is_from_converted_type=" << is_from_converted_type_
<< ", force_set_converted_type=" << force_set_converted_type_ << ")";
return type.str();
}
@@ -1270,8 +1284,9 @@ std::string LogicalType::Impl::Timestamp::ToString() const {
std::string LogicalType::Impl::Timestamp::ToJSON() const {
std::stringstream json;
json << R"({"Type": "Timestamp", "isAdjustedToUTC": )" << std::boolalpha << adjusted_
- << R"(, "timeUnit": ")" << time_unit_string(unit_)
- << R"(", "force_set_converted_type": )" << force_set_converted_type_ << R"(})";
+ << R"(, "timeUnit": ")" << time_unit_string(unit_) << R"(")"
+ << R"(, "is_from_converted_type": )" << is_from_converted_type_
+ << R"(, "force_set_converted_type": )" << force_set_converted_type_ << R"(})";
return json.str();
}
@@ -1308,13 +1323,13 @@ bool LogicalType::Impl::Timestamp::Equals(const LogicalType& other) const {
std::shared_ptr<const LogicalType> TimestampLogicalType::Make(
bool is_adjusted_to_utc, LogicalType::TimeUnit::unit time_unit,
- bool force_set_converted_type) {
+ bool is_from_converted_type, bool force_set_converted_type) {
if (time_unit == LogicalType::TimeUnit::MILLIS ||
time_unit == LogicalType::TimeUnit::MICROS ||
time_unit == LogicalType::TimeUnit::NANOS) {
auto* logical_type = new TimestampLogicalType();
logical_type->impl_.reset(new LogicalType::Impl::Timestamp(
- is_adjusted_to_utc, time_unit, force_set_converted_type));
+ is_adjusted_to_utc, time_unit, is_from_converted_type, force_set_converted_type));
return std::shared_ptr<const LogicalType>(logical_type);
} else {
throw ParquetException(
@@ -1330,6 +1345,11 @@ LogicalType::TimeUnit::unit TimestampLogicalType::time_unit() const {
return (dynamic_cast<const LogicalType::Impl::Timestamp&>(*impl_)).time_unit();
}
+bool TimestampLogicalType::is_from_converted_type() const {
+ return (dynamic_cast<const LogicalType::Impl::Timestamp&>(*impl_))
+ .is_from_converted_type();
+}
+
bool TimestampLogicalType::force_set_converted_type() const {
return (dynamic_cast<const LogicalType::Impl::Timestamp&>(*impl_))
.force_set_converted_type();
diff --git a/cpp/src/parquet/types.h b/cpp/src/parquet/types.h
index 6f1906f..dfa056e 100644
--- a/cpp/src/parquet/types.h
+++ b/cpp/src/parquet/types.h
@@ -186,12 +186,15 @@ class PARQUET_EXPORT LogicalType {
/// \brief Create a Timestamp logical type
/// \param[in] is_adjusted_to_utc set true if the data is UTC-normalized
/// \param[in] time_unit the resolution of the timestamp
+ /// \param[in] is_from_converted_type if true, the timestamp was generated
+ /// by translating a legacy converted type of TIMESTAMP_MILLIS or
+ /// TIMESTAMP_MICROS. Default is false.
/// \param[in] force_set_converted_type if true, always set the
/// legacy ConvertedType TIMESTAMP_MICROS and TIMESTAMP_MILLIS
/// metadata. Default is false
static std::shared_ptr<const LogicalType> Timestamp(
bool is_adjusted_to_utc, LogicalType::TimeUnit::unit time_unit,
- bool force_set_converted_type = false);
+ bool is_from_converted_type = false, bool force_set_converted_type = false);
static std::shared_ptr<const LogicalType> Interval();
static std::shared_ptr<const LogicalType> Int(int bit_width, bool is_signed);
@@ -347,10 +350,14 @@ class PARQUET_EXPORT TimestampLogicalType : public LogicalType {
public:
static std::shared_ptr<const LogicalType> Make(bool is_adjusted_to_utc,
LogicalType::TimeUnit::unit time_unit,
+ bool is_from_converted_type = false,
bool force_set_converted_type = false);
bool is_adjusted_to_utc() const;
LogicalType::TimeUnit::unit time_unit() const;
+ /// \brief If true, will not set LogicalType in Thrift metadata
+ bool is_from_converted_type() const;
+
/// \brief If true, will set ConvertedType for micros and millis
/// resolution in legacy ConvertedType Thrift metadata
bool force_set_converted_type() const;