You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by tu...@apache.org on 2022/04/24 05:42:50 UTC
[arrow-rs] branch master updated: Update datatypes in parquet::basic::LogicalType (#1612)
This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 5374289fa Update datatypes in parquet::basic::LogicalType (#1612)
5374289fa is described below
commit 5374289fa91c3024931eb69ea5a7837e38556547
Author: tfeda <36...@users.noreply.github.com>
AuthorDate: Sun Apr 24 01:42:45 2022 -0400
Update datatypes in parquet::basic::LogicalType (#1612)
* Convert LogicalType::STRING(StringType) to LogicalType::String
* Covert LogicalType::MAP(MapType) to LogicalType::Map
* Convert LogicalType::LIST(ListType) to LogicalType::List
* Converted LogicalType::ENUM(EnumType) to LogicalType::Enum
* Convert LogicalType::DECIMAL(DecimalType) to LogicalType::Decimal { scale, precision }, Fix String proc macro error
* Convert LogicalType::DATE(DateType) to LogicalType::Date
* Convert LogicalType::TIME(TimeType) to LogicalType::Time { is_adjusted_to_u_t_c: bool, unit: TimeUnit }
* Convert LogicalType::TIMESTAMP(TimestampType) to LogicalType::Timestamp { is_adjusted_to_u_t_c: bool, unit: TimeUnit }
* Convert LogicalType::INTEGER(IntType) to LogicalType::Integer { bit_width: i8, is_signed: bool }
* Convert LogicalType::UNKNOWN,JSON,BSON to LogicalType::Unknown,Json,Bson
* Convert LogicalType::UUID to LogicalType::Uuid
* Add ref t to simplify printing in src/arrow/schema from_int32()
---
parquet/src/arrow/schema.rs | 101 +++++-----
parquet/src/basic.rs | 364 ++++++++++++++++++++----------------
parquet/src/column/writer.rs | 4 +-
parquet/src/file/writer.rs | 6 +-
parquet/src/schema/parser.rs | 76 ++++----
parquet/src/schema/printer.rs | 102 +++++-----
parquet/src/schema/types.rs | 72 ++++---
parquet_derive/src/parquet_field.rs | 38 ++--
8 files changed, 400 insertions(+), 363 deletions(-)
diff --git a/parquet/src/arrow/schema.rs b/parquet/src/arrow/schema.rs
index 239311ade..bfccfe7f9 100644
--- a/parquet/src/arrow/schema.rs
+++ b/parquet/src/arrow/schema.rs
@@ -34,8 +34,8 @@ use crate::file::{metadata::KeyValue, properties::WriterProperties};
use crate::schema::types::{ColumnDescriptor, SchemaDescriptor, Type, TypePtr};
use crate::{
basic::{
- ConvertedType, DecimalType, IntType, LogicalType, Repetition, TimeType,
- TimeUnit as ParquetTimeUnit, TimestampType, Type as PhysicalType,
+ ConvertedType, LogicalType, Repetition,
+ TimeUnit as ParquetTimeUnit, Type as PhysicalType,
},
errors::ParquetError,
};
@@ -324,24 +324,24 @@ fn arrow_to_parquet_type(field: &Field) -> Result<Type> {
// create type from field
match field.data_type() {
DataType::Null => Type::primitive_type_builder(name, PhysicalType::INT32)
- .with_logical_type(Some(LogicalType::UNKNOWN(Default::default())))
+ .with_logical_type(Some(LogicalType::Unknown))
.with_repetition(repetition)
.build(),
DataType::Boolean => Type::primitive_type_builder(name, PhysicalType::BOOLEAN)
.with_repetition(repetition)
.build(),
DataType::Int8 => Type::primitive_type_builder(name, PhysicalType::INT32)
- .with_logical_type(Some(LogicalType::INTEGER(IntType {
+ .with_logical_type(Some(LogicalType::Integer {
bit_width: 8,
is_signed: true,
- })))
+ }))
.with_repetition(repetition)
.build(),
DataType::Int16 => Type::primitive_type_builder(name, PhysicalType::INT32)
- .with_logical_type(Some(LogicalType::INTEGER(IntType {
+ .with_logical_type(Some(LogicalType::Integer {
bit_width: 16,
is_signed: true,
- })))
+ }))
.with_repetition(repetition)
.build(),
DataType::Int32 => Type::primitive_type_builder(name, PhysicalType::INT32)
@@ -351,31 +351,31 @@ fn arrow_to_parquet_type(field: &Field) -> Result<Type> {
.with_repetition(repetition)
.build(),
DataType::UInt8 => Type::primitive_type_builder(name, PhysicalType::INT32)
- .with_logical_type(Some(LogicalType::INTEGER(IntType {
+ .with_logical_type(Some(LogicalType::Integer {
bit_width: 8,
is_signed: false,
- })))
+ }))
.with_repetition(repetition)
.build(),
DataType::UInt16 => Type::primitive_type_builder(name, PhysicalType::INT32)
- .with_logical_type(Some(LogicalType::INTEGER(IntType {
+ .with_logical_type(Some(LogicalType::Integer {
bit_width: 16,
is_signed: false,
- })))
+ }))
.with_repetition(repetition)
.build(),
DataType::UInt32 => Type::primitive_type_builder(name, PhysicalType::INT32)
- .with_logical_type(Some(LogicalType::INTEGER(IntType {
+ .with_logical_type(Some(LogicalType::Integer {
bit_width: 32,
is_signed: false,
- })))
+ }))
.with_repetition(repetition)
.build(),
DataType::UInt64 => Type::primitive_type_builder(name, PhysicalType::INT64)
- .with_logical_type(Some(LogicalType::INTEGER(IntType {
+ .with_logical_type(Some(LogicalType::Integer {
bit_width: 64,
is_signed: false,
- })))
+ }))
.with_repetition(repetition)
.build(),
DataType::Float16 => Err(ArrowError("Float16 arrays not supported".to_string())),
@@ -389,7 +389,7 @@ fn arrow_to_parquet_type(field: &Field) -> Result<Type> {
name,
PhysicalType::INT64,
)
- .with_logical_type(Some(LogicalType::TIMESTAMP(TimestampType {
+ .with_logical_type(Some(LogicalType::Timestamp {
is_adjusted_to_u_t_c: matches!(zone, Some(z) if !z.as_str().is_empty()),
unit: match time_unit {
TimeUnit::Second => ParquetTimeUnit::MILLIS(Default::default()),
@@ -397,34 +397,34 @@ fn arrow_to_parquet_type(field: &Field) -> Result<Type> {
TimeUnit::Microsecond => ParquetTimeUnit::MICROS(Default::default()),
TimeUnit::Nanosecond => ParquetTimeUnit::NANOS(Default::default()),
},
- })))
+ }))
.with_repetition(repetition)
.build(),
DataType::Date32 => Type::primitive_type_builder(name, PhysicalType::INT32)
- .with_logical_type(Some(LogicalType::DATE(Default::default())))
+ .with_logical_type(Some(LogicalType::Date))
.with_repetition(repetition)
.build(),
// date64 is cast to date32
DataType::Date64 => Type::primitive_type_builder(name, PhysicalType::INT32)
- .with_logical_type(Some(LogicalType::DATE(Default::default())))
+ .with_logical_type(Some(LogicalType::Date))
.with_repetition(repetition)
.build(),
DataType::Time32(_) => Type::primitive_type_builder(name, PhysicalType::INT32)
- .with_logical_type(Some(LogicalType::TIME(TimeType {
+ .with_logical_type(Some(LogicalType::Time {
is_adjusted_to_u_t_c: false,
unit: ParquetTimeUnit::MILLIS(Default::default()),
- })))
+ }))
.with_repetition(repetition)
.build(),
DataType::Time64(unit) => Type::primitive_type_builder(name, PhysicalType::INT64)
- .with_logical_type(Some(LogicalType::TIME(TimeType {
+ .with_logical_type(Some(LogicalType::Time {
is_adjusted_to_u_t_c: false,
unit: match unit {
TimeUnit::Microsecond => ParquetTimeUnit::MICROS(Default::default()),
TimeUnit::Nanosecond => ParquetTimeUnit::NANOS(Default::default()),
u => unreachable!("Invalid unit for Time64: {:?}", u),
},
- })))
+ }))
.with_repetition(repetition)
.build(),
DataType::Duration(_) => Err(ArrowError(
@@ -465,17 +465,17 @@ fn arrow_to_parquet_type(field: &Field) -> Result<Type> {
Type::primitive_type_builder(name, PhysicalType::FIXED_LEN_BYTE_ARRAY)
.with_repetition(repetition)
.with_length(decimal_length_from_precision(*precision) as i32)
- .with_logical_type(Some(LogicalType::DECIMAL(DecimalType {
+ .with_logical_type(Some(LogicalType::Decimal {
scale: *scale as i32,
precision: *precision as i32,
- })))
+ }))
.with_precision(*precision as i32)
.with_scale(*scale as i32)
.build()
}
DataType::Utf8 | DataType::LargeUtf8 => {
Type::primitive_type_builder(name, PhysicalType::BYTE_ARRAY)
- .with_logical_type(Some(LogicalType::STRING(Default::default())))
+ .with_logical_type(Some(LogicalType::String))
.with_repetition(repetition)
.build()
}
@@ -487,7 +487,7 @@ fn arrow_to_parquet_type(field: &Field) -> Result<Type> {
.with_repetition(Repetition::REPEATED)
.build()?,
)])
- .with_logical_type(Some(LogicalType::LIST(Default::default())))
+ .with_logical_type(Some(LogicalType::List))
.with_repetition(repetition)
.build()
}
@@ -527,7 +527,7 @@ fn arrow_to_parquet_type(field: &Field) -> Result<Type> {
.with_repetition(Repetition::REPEATED)
.build()?,
)])
- .with_logical_type(Some(LogicalType::MAP(Default::default())))
+ .with_logical_type(Some(LogicalType::Map))
.with_repetition(repetition)
.build()
} else {
@@ -669,7 +669,10 @@ impl ParquetTypeConverter<'_> {
self.schema.get_basic_info().converted_type(),
) {
(None, ConvertedType::NONE) => Ok(DataType::Int32),
- (Some(LogicalType::INTEGER(t)), _) => match (t.bit_width, t.is_signed) {
+ (Some(ref t @ LogicalType::Integer {
+ bit_width,
+ is_signed,
+ }), _) => match (bit_width, is_signed) {
(8, true) => Ok(DataType::Int8),
(16, true) => Ok(DataType::Int16),
(32, true) => Ok(DataType::Int32),
@@ -678,20 +681,20 @@ impl ParquetTypeConverter<'_> {
(32, false) => Ok(DataType::UInt32),
_ => Err(ArrowError(format!(
"Cannot create INT32 physical type from {:?}",
- t
+ t,
))),
},
- (Some(LogicalType::DECIMAL(_)), _) => Ok(self.to_decimal()),
- (Some(LogicalType::DATE(_)), _) => Ok(DataType::Date32),
- (Some(LogicalType::TIME(t)), _) => match t.unit {
+ (Some(LogicalType::Decimal {..}), _) => Ok(self.to_decimal()),
+ (Some(LogicalType::Date), _) => Ok(DataType::Date32),
+ (Some(LogicalType::Time { unit, .. }), _) => match unit {
ParquetTimeUnit::MILLIS(_) => Ok(DataType::Time32(TimeUnit::Millisecond)),
_ => Err(ArrowError(format!(
"Cannot create INT32 physical type from {:?}",
- t.unit
+ unit
))),
},
// https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#unknown-always-null
- (Some(LogicalType::UNKNOWN(_)), _) => Ok(DataType::Null),
+ (Some(LogicalType::Unknown), _) => Ok(DataType::Null),
(None, ConvertedType::UINT_8) => Ok(DataType::UInt8),
(None, ConvertedType::UINT_16) => Ok(DataType::UInt16),
(None, ConvertedType::UINT_32) => Ok(DataType::UInt32),
@@ -717,26 +720,26 @@ impl ParquetTypeConverter<'_> {
self.schema.get_basic_info().converted_type(),
) {
(None, ConvertedType::NONE) => Ok(DataType::Int64),
- (Some(LogicalType::INTEGER(t)), _) if t.bit_width == 64 => {
- match t.is_signed {
+ (Some(LogicalType::Integer { bit_width, is_signed }), _) if bit_width == 64 => {
+ match is_signed {
true => Ok(DataType::Int64),
false => Ok(DataType::UInt64),
}
}
- (Some(LogicalType::TIME(t)), _) => match t.unit {
+ (Some(LogicalType::Time { unit, .. }), _) => match unit {
ParquetTimeUnit::MILLIS(_) => Err(ArrowError(
"Cannot create INT64 from MILLIS time unit".to_string(),
)),
ParquetTimeUnit::MICROS(_) => Ok(DataType::Time64(TimeUnit::Microsecond)),
ParquetTimeUnit::NANOS(_) => Ok(DataType::Time64(TimeUnit::Nanosecond)),
},
- (Some(LogicalType::TIMESTAMP(t)), _) => Ok(DataType::Timestamp(
- match t.unit {
+ (Some(LogicalType::Timestamp { is_adjusted_to_u_t_c, unit }), _) => Ok(DataType::Timestamp(
+ match unit {
ParquetTimeUnit::MILLIS(_) => TimeUnit::Millisecond,
ParquetTimeUnit::MICROS(_) => TimeUnit::Microsecond,
ParquetTimeUnit::NANOS(_) => TimeUnit::Nanosecond,
},
- if t.is_adjusted_to_u_t_c {
+ if is_adjusted_to_u_t_c {
Some("UTC".to_string())
} else {
None
@@ -753,7 +756,7 @@ impl ParquetTypeConverter<'_> {
(None, ConvertedType::TIMESTAMP_MICROS) => {
Ok(DataType::Timestamp(TimeUnit::Microsecond, None))
}
- (Some(LogicalType::DECIMAL(_)), _) => Ok(self.to_decimal()),
+ (Some(LogicalType::Decimal {..}), _) => Ok(self.to_decimal()),
(None, ConvertedType::DECIMAL) => Ok(self.to_decimal()),
(logical, converted) => Err(ArrowError(format!(
"Unable to convert parquet INT64 logical type {:?} or converted type {}",
@@ -768,7 +771,7 @@ impl ParquetTypeConverter<'_> {
self.schema.get_basic_info().logical_type(),
self.schema.get_basic_info().converted_type(),
) {
- (Some(LogicalType::DECIMAL(_)), _) => Ok(self.to_decimal()),
+ (Some(LogicalType::Decimal {..}), _) => Ok(self.to_decimal()),
(None, ConvertedType::DECIMAL) => Ok(self.to_decimal()),
(None, ConvertedType::INTERVAL) => {
// There is currently no reliable way of determining which IntervalUnit
@@ -804,10 +807,10 @@ impl ParquetTypeConverter<'_> {
#[allow(clippy::wrong_self_convention)]
fn from_byte_array(&self) -> Result<DataType> {
match (self.schema.get_basic_info().logical_type(), self.schema.get_basic_info().converted_type()) {
- (Some(LogicalType::STRING(_)), _) => Ok(DataType::Utf8),
- (Some(LogicalType::JSON(_)), _) => Ok(DataType::Binary),
- (Some(LogicalType::BSON(_)), _) => Ok(DataType::Binary),
- (Some(LogicalType::ENUM(_)), _) => Ok(DataType::Binary),
+ (Some(LogicalType::String), _) => Ok(DataType::Utf8),
+ (Some(LogicalType::Json), _) => Ok(DataType::Binary),
+ (Some(LogicalType::Bson), _) => Ok(DataType::Binary),
+ (Some(LogicalType::Enum), _) => Ok(DataType::Binary),
(None, ConvertedType::NONE) => Ok(DataType::Binary),
(None, ConvertedType::JSON) => Ok(DataType::Binary),
(None, ConvertedType::BSON) => Ok(DataType::Binary),
@@ -830,8 +833,8 @@ impl ParquetTypeConverter<'_> {
self.schema.get_basic_info().logical_type(),
self.schema.get_basic_info().converted_type(),
) {
- (Some(LogicalType::LIST(_)), _) | (_, ConvertedType::LIST) => self.to_list(),
- (Some(LogicalType::MAP(_)), _)
+ (Some(LogicalType::List), _) | (_, ConvertedType::LIST) => self.to_list(),
+ (Some(LogicalType::Map), _)
| (_, ConvertedType::MAP)
| (_, ConvertedType::MAP_KEY_VALUE) => self.to_map(),
(_, _) => {
diff --git a/parquet/src/basic.rs b/parquet/src/basic.rs
index 198b6a382..7eff2156f 100644
--- a/parquet/src/basic.rs
+++ b/parquet/src/basic.rs
@@ -165,19 +165,31 @@ pub enum ConvertedType {
/// [`ConvertedType`]. Please see the README.md for more details.
#[derive(Debug, Clone, PartialEq)]
pub enum LogicalType {
- STRING(StringType),
- MAP(MapType),
- LIST(ListType),
- ENUM(EnumType),
- DECIMAL(DecimalType),
- DATE(DateType),
- TIME(TimeType),
- TIMESTAMP(TimestampType),
- INTEGER(IntType),
- UNKNOWN(NullType),
- JSON(JsonType),
- BSON(BsonType),
- UUID(UUIDType),
+ String,
+ Map,
+ List,
+ Enum,
+ Decimal {
+ scale: i32,
+ precision: i32,
+ },
+ Date,
+ Time {
+ is_adjusted_to_u_t_c: bool,
+ unit: TimeUnit,
+ },
+ Timestamp {
+ is_adjusted_to_u_t_c: bool,
+ unit: TimeUnit,
+ },
+ Integer {
+ bit_width: i8,
+ is_signed: bool,
+ },
+ Unknown,
+ Json,
+ Bson,
+ Uuid,
}
// ----------------------------------------------------------------------
@@ -335,21 +347,21 @@ impl ColumnOrder {
// TODO: Should this take converted and logical type, for compatibility?
match logical_type {
Some(logical) => match logical {
- LogicalType::STRING(_)
- | LogicalType::ENUM(_)
- | LogicalType::JSON(_)
- | LogicalType::BSON(_) => SortOrder::UNSIGNED,
- LogicalType::INTEGER(t) => match t.is_signed {
+ LogicalType::String
+ | LogicalType::Enum
+ | LogicalType::Json
+ | LogicalType::Bson => SortOrder::UNSIGNED,
+ LogicalType::Integer { is_signed, .. } => match is_signed {
true => SortOrder::SIGNED,
false => SortOrder::UNSIGNED,
},
- LogicalType::MAP(_) | LogicalType::LIST(_) => SortOrder::UNDEFINED,
- LogicalType::DECIMAL(_) => SortOrder::SIGNED,
- LogicalType::DATE(_) => SortOrder::SIGNED,
- LogicalType::TIME(_) => SortOrder::SIGNED,
- LogicalType::TIMESTAMP(_) => SortOrder::SIGNED,
- LogicalType::UNKNOWN(_) => SortOrder::UNDEFINED,
- LogicalType::UUID(_) => SortOrder::UNSIGNED,
+ LogicalType::Map | LogicalType::List => SortOrder::UNDEFINED,
+ LogicalType::Decimal { .. } => SortOrder::SIGNED,
+ LogicalType::Date => SortOrder::SIGNED,
+ LogicalType::Time { .. } => SortOrder::SIGNED,
+ LogicalType::Timestamp { .. } => SortOrder::SIGNED,
+ LogicalType::Unknown => SortOrder::UNDEFINED,
+ LogicalType::Uuid => SortOrder::UNSIGNED,
},
// Fall back to converted type
None => Self::get_converted_sort_order(converted_type, physical_type),
@@ -586,19 +598,31 @@ impl convert::From<ConvertedType> for Option<parquet::ConvertedType> {
impl convert::From<parquet::LogicalType> for LogicalType {
fn from(value: parquet::LogicalType) -> Self {
match value {
- parquet::LogicalType::STRING(t) => LogicalType::STRING(t),
- parquet::LogicalType::MAP(t) => LogicalType::MAP(t),
- parquet::LogicalType::LIST(t) => LogicalType::LIST(t),
- parquet::LogicalType::ENUM(t) => LogicalType::ENUM(t),
- parquet::LogicalType::DECIMAL(t) => LogicalType::DECIMAL(t),
- parquet::LogicalType::DATE(t) => LogicalType::DATE(t),
- parquet::LogicalType::TIME(t) => LogicalType::TIME(t),
- parquet::LogicalType::TIMESTAMP(t) => LogicalType::TIMESTAMP(t),
- parquet::LogicalType::INTEGER(t) => LogicalType::INTEGER(t),
- parquet::LogicalType::UNKNOWN(t) => LogicalType::UNKNOWN(t),
- parquet::LogicalType::JSON(t) => LogicalType::JSON(t),
- parquet::LogicalType::BSON(t) => LogicalType::BSON(t),
- parquet::LogicalType::UUID(t) => LogicalType::UUID(t),
+ parquet::LogicalType::STRING(_) => LogicalType::String,
+ parquet::LogicalType::MAP(_) => LogicalType::Map,
+ parquet::LogicalType::LIST(_) => LogicalType::List,
+ parquet::LogicalType::ENUM(_) => LogicalType::Enum,
+ parquet::LogicalType::DECIMAL(t) => LogicalType::Decimal {
+ scale: t.scale,
+ precision: t.precision,
+ },
+ parquet::LogicalType::DATE(_) => LogicalType::Date,
+ parquet::LogicalType::TIME(t) => LogicalType::Time {
+ is_adjusted_to_u_t_c: t.is_adjusted_to_u_t_c,
+ unit: t.unit,
+ },
+ parquet::LogicalType::TIMESTAMP(t) => LogicalType::Timestamp {
+ is_adjusted_to_u_t_c: t.is_adjusted_to_u_t_c,
+ unit: t.unit,
+ },
+ parquet::LogicalType::INTEGER(t) => LogicalType::Integer {
+ bit_width: t.bit_width,
+ is_signed: t.is_signed,
+ },
+ parquet::LogicalType::UNKNOWN(_) => LogicalType::Unknown,
+ parquet::LogicalType::JSON(_) => LogicalType::Json,
+ parquet::LogicalType::BSON(_) => LogicalType::Bson,
+ parquet::LogicalType::UUID(_) => LogicalType::Uuid,
}
}
}
@@ -606,19 +630,39 @@ impl convert::From<parquet::LogicalType> for LogicalType {
impl convert::From<LogicalType> for parquet::LogicalType {
fn from(value: LogicalType) -> Self {
match value {
- LogicalType::STRING(t) => parquet::LogicalType::STRING(t),
- LogicalType::MAP(t) => parquet::LogicalType::MAP(t),
- LogicalType::LIST(t) => parquet::LogicalType::LIST(t),
- LogicalType::ENUM(t) => parquet::LogicalType::ENUM(t),
- LogicalType::DECIMAL(t) => parquet::LogicalType::DECIMAL(t),
- LogicalType::DATE(t) => parquet::LogicalType::DATE(t),
- LogicalType::TIME(t) => parquet::LogicalType::TIME(t),
- LogicalType::TIMESTAMP(t) => parquet::LogicalType::TIMESTAMP(t),
- LogicalType::INTEGER(t) => parquet::LogicalType::INTEGER(t),
- LogicalType::UNKNOWN(t) => parquet::LogicalType::UNKNOWN(t),
- LogicalType::JSON(t) => parquet::LogicalType::JSON(t),
- LogicalType::BSON(t) => parquet::LogicalType::BSON(t),
- LogicalType::UUID(t) => parquet::LogicalType::UUID(t),
+ LogicalType::String => parquet::LogicalType::STRING(Default::default()),
+ LogicalType::Map => parquet::LogicalType::MAP(Default::default()),
+ LogicalType::List => parquet::LogicalType::LIST(Default::default()),
+ LogicalType::Enum => parquet::LogicalType::ENUM(Default::default()),
+ LogicalType::Decimal { scale, precision } => {
+ parquet::LogicalType::DECIMAL(DecimalType { scale, precision })
+ }
+ LogicalType::Date => parquet::LogicalType::DATE(Default::default()),
+ LogicalType::Time {
+ is_adjusted_to_u_t_c,
+ unit,
+ } => parquet::LogicalType::TIME(TimeType {
+ is_adjusted_to_u_t_c,
+ unit,
+ }),
+ LogicalType::Timestamp {
+ is_adjusted_to_u_t_c,
+ unit,
+ } => parquet::LogicalType::TIMESTAMP(TimestampType {
+ is_adjusted_to_u_t_c,
+ unit,
+ }),
+ LogicalType::Integer {
+ bit_width,
+ is_signed,
+ } => parquet::LogicalType::INTEGER(IntType {
+ bit_width,
+ is_signed,
+ }),
+ LogicalType::Unknown => parquet::LogicalType::UNKNOWN(Default::default()),
+ LogicalType::Json => parquet::LogicalType::JSON(Default::default()),
+ LogicalType::Bson => parquet::LogicalType::BSON(Default::default()),
+ LogicalType::Uuid => parquet::LogicalType::UUID(Default::default()),
}
}
}
@@ -636,23 +680,26 @@ impl From<Option<LogicalType>> for ConvertedType {
fn from(value: Option<LogicalType>) -> Self {
match value {
Some(value) => match value {
- LogicalType::STRING(_) => ConvertedType::UTF8,
- LogicalType::MAP(_) => ConvertedType::MAP,
- LogicalType::LIST(_) => ConvertedType::LIST,
- LogicalType::ENUM(_) => ConvertedType::ENUM,
- LogicalType::DECIMAL(_) => ConvertedType::DECIMAL,
- LogicalType::DATE(_) => ConvertedType::DATE,
- LogicalType::TIME(t) => match t.unit {
+ LogicalType::String => ConvertedType::UTF8,
+ LogicalType::Map => ConvertedType::MAP,
+ LogicalType::List => ConvertedType::LIST,
+ LogicalType::Enum => ConvertedType::ENUM,
+ LogicalType::Decimal { .. } => ConvertedType::DECIMAL,
+ LogicalType::Date => ConvertedType::DATE,
+ LogicalType::Time { unit, .. } => match unit {
TimeUnit::MILLIS(_) => ConvertedType::TIME_MILLIS,
TimeUnit::MICROS(_) => ConvertedType::TIME_MICROS,
TimeUnit::NANOS(_) => ConvertedType::NONE,
},
- LogicalType::TIMESTAMP(t) => match t.unit {
+ LogicalType::Timestamp { unit, .. } => match unit {
TimeUnit::MILLIS(_) => ConvertedType::TIMESTAMP_MILLIS,
TimeUnit::MICROS(_) => ConvertedType::TIMESTAMP_MICROS,
TimeUnit::NANOS(_) => ConvertedType::NONE,
},
- LogicalType::INTEGER(t) => match (t.bit_width, t.is_signed) {
+ LogicalType::Integer {
+ bit_width,
+ is_signed,
+ } => match (bit_width, is_signed) {
(8, true) => ConvertedType::INT_8,
(16, true) => ConvertedType::INT_16,
(32, true) => ConvertedType::INT_32,
@@ -663,10 +710,10 @@ impl From<Option<LogicalType>> for ConvertedType {
(64, false) => ConvertedType::UINT_64,
t => panic!("Integer type {:?} is not supported", t),
},
- LogicalType::UNKNOWN(_) => ConvertedType::NONE,
- LogicalType::JSON(_) => ConvertedType::JSON,
- LogicalType::BSON(_) => ConvertedType::BSON,
- LogicalType::UUID(_) => ConvertedType::NONE,
+ LogicalType::Unknown => ConvertedType::NONE,
+ LogicalType::Json => ConvertedType::JSON,
+ LogicalType::Bson => ConvertedType::BSON,
+ LogicalType::Uuid => ConvertedType::NONE,
},
None => ConvertedType::NONE,
}
@@ -860,31 +907,31 @@ impl str::FromStr for LogicalType {
fn from_str(s: &str) -> result::Result<Self, Self::Err> {
match s {
// The type is a placeholder that gets updated elsewhere
- "INTEGER" => Ok(LogicalType::INTEGER(IntType {
+ "INTEGER" => Ok(LogicalType::Integer {
bit_width: 8,
is_signed: false,
- })),
- "MAP" => Ok(LogicalType::MAP(MapType {})),
- "LIST" => Ok(LogicalType::LIST(ListType {})),
- "ENUM" => Ok(LogicalType::ENUM(EnumType {})),
- "DECIMAL" => Ok(LogicalType::DECIMAL(DecimalType {
+ }),
+ "MAP" => Ok(LogicalType::Map),
+ "LIST" => Ok(LogicalType::List),
+ "ENUM" => Ok(LogicalType::Enum),
+ "DECIMAL" => Ok(LogicalType::Decimal {
precision: -1,
scale: -1,
- })),
- "DATE" => Ok(LogicalType::DATE(DateType {})),
- "TIME" => Ok(LogicalType::TIME(TimeType {
+ }),
+ "DATE" => Ok(LogicalType::Date),
+ "TIME" => Ok(LogicalType::Time {
is_adjusted_to_u_t_c: false,
unit: TimeUnit::MILLIS(parquet::MilliSeconds {}),
- })),
- "TIMESTAMP" => Ok(LogicalType::TIMESTAMP(TimestampType {
+ }),
+ "TIMESTAMP" => Ok(LogicalType::Timestamp {
is_adjusted_to_u_t_c: false,
unit: TimeUnit::MILLIS(parquet::MilliSeconds {}),
- })),
- "STRING" => Ok(LogicalType::STRING(StringType {})),
- "JSON" => Ok(LogicalType::JSON(JsonType {})),
- "BSON" => Ok(LogicalType::BSON(BsonType {})),
- "UUID" => Ok(LogicalType::UUID(UUIDType {})),
- "UNKNOWN" => Ok(LogicalType::UNKNOWN(NullType {})),
+ }),
+ "STRING" => Ok(LogicalType::String),
+ "JSON" => Ok(LogicalType::Json),
+ "BSON" => Ok(LogicalType::Bson),
+ "UUID" => Ok(LogicalType::Uuid),
+ "UNKNOWN" => Ok(LogicalType::Unknown),
"INTERVAL" => Err(general_err!("Interval logical type not yet supported")),
other => Err(general_err!("Invalid logical type {}", other)),
}
@@ -1369,144 +1416,144 @@ mod tests {
let logical_none: Option<LogicalType> = None;
assert_eq!(ConvertedType::from(logical_none), ConvertedType::NONE);
assert_eq!(
- ConvertedType::from(Some(LogicalType::DECIMAL(DecimalType {
+ ConvertedType::from(Some(LogicalType::Decimal {
precision: 20,
scale: 5
- }))),
+ })),
ConvertedType::DECIMAL
);
assert_eq!(
- ConvertedType::from(Some(LogicalType::BSON(Default::default()))),
+ ConvertedType::from(Some(LogicalType::Bson)),
ConvertedType::BSON
);
assert_eq!(
- ConvertedType::from(Some(LogicalType::JSON(Default::default()))),
+ ConvertedType::from(Some(LogicalType::Json)),
ConvertedType::JSON
);
assert_eq!(
- ConvertedType::from(Some(LogicalType::STRING(Default::default()))),
+ ConvertedType::from(Some(LogicalType::String)),
ConvertedType::UTF8
);
assert_eq!(
- ConvertedType::from(Some(LogicalType::DATE(Default::default()))),
+ ConvertedType::from(Some(LogicalType::Date)),
ConvertedType::DATE
);
assert_eq!(
- ConvertedType::from(Some(LogicalType::TIME(TimeType {
+ ConvertedType::from(Some(LogicalType::Time {
unit: TimeUnit::MILLIS(Default::default()),
is_adjusted_to_u_t_c: true,
- }))),
+ })),
ConvertedType::TIME_MILLIS
);
assert_eq!(
- ConvertedType::from(Some(LogicalType::TIME(TimeType {
+ ConvertedType::from(Some(LogicalType::Time {
unit: TimeUnit::MICROS(Default::default()),
is_adjusted_to_u_t_c: true,
- }))),
+ })),
ConvertedType::TIME_MICROS
);
assert_eq!(
- ConvertedType::from(Some(LogicalType::TIME(TimeType {
+ ConvertedType::from(Some(LogicalType::Time {
unit: TimeUnit::NANOS(Default::default()),
is_adjusted_to_u_t_c: false,
- }))),
+ })),
ConvertedType::NONE
);
assert_eq!(
- ConvertedType::from(Some(LogicalType::TIMESTAMP(TimestampType {
+ ConvertedType::from(Some(LogicalType::Timestamp {
unit: TimeUnit::MILLIS(Default::default()),
is_adjusted_to_u_t_c: true,
- }))),
+ })),
ConvertedType::TIMESTAMP_MILLIS
);
assert_eq!(
- ConvertedType::from(Some(LogicalType::TIMESTAMP(TimestampType {
+ ConvertedType::from(Some(LogicalType::Timestamp {
unit: TimeUnit::MICROS(Default::default()),
is_adjusted_to_u_t_c: false,
- }))),
+ })),
ConvertedType::TIMESTAMP_MICROS
);
assert_eq!(
- ConvertedType::from(Some(LogicalType::TIMESTAMP(TimestampType {
+ ConvertedType::from(Some(LogicalType::Timestamp {
unit: TimeUnit::NANOS(Default::default()),
is_adjusted_to_u_t_c: false,
- }))),
+ })),
ConvertedType::NONE
);
assert_eq!(
- ConvertedType::from(Some(LogicalType::INTEGER(IntType {
+ ConvertedType::from(Some(LogicalType::Integer {
bit_width: 8,
is_signed: false
- }))),
+ })),
ConvertedType::UINT_8
);
assert_eq!(
- ConvertedType::from(Some(LogicalType::INTEGER(IntType {
+ ConvertedType::from(Some(LogicalType::Integer {
bit_width: 8,
is_signed: true
- }))),
+ })),
ConvertedType::INT_8
);
assert_eq!(
- ConvertedType::from(Some(LogicalType::INTEGER(IntType {
+ ConvertedType::from(Some(LogicalType::Integer {
bit_width: 16,
is_signed: false
- }))),
+ })),
ConvertedType::UINT_16
);
assert_eq!(
- ConvertedType::from(Some(LogicalType::INTEGER(IntType {
+ ConvertedType::from(Some(LogicalType::Integer {
bit_width: 16,
is_signed: true
- }))),
+ })),
ConvertedType::INT_16
);
assert_eq!(
- ConvertedType::from(Some(LogicalType::INTEGER(IntType {
+ ConvertedType::from(Some(LogicalType::Integer {
bit_width: 32,
is_signed: false
- }))),
+ })),
ConvertedType::UINT_32
);
assert_eq!(
- ConvertedType::from(Some(LogicalType::INTEGER(IntType {
+ ConvertedType::from(Some(LogicalType::Integer {
bit_width: 32,
is_signed: true
- }))),
+ })),
ConvertedType::INT_32
);
assert_eq!(
- ConvertedType::from(Some(LogicalType::INTEGER(IntType {
+ ConvertedType::from(Some(LogicalType::Integer {
bit_width: 64,
is_signed: false
- }))),
+ })),
ConvertedType::UINT_64
);
assert_eq!(
- ConvertedType::from(Some(LogicalType::INTEGER(IntType {
+ ConvertedType::from(Some(LogicalType::Integer {
bit_width: 64,
is_signed: true
- }))),
+ })),
ConvertedType::INT_64
);
assert_eq!(
- ConvertedType::from(Some(LogicalType::LIST(Default::default()))),
+ ConvertedType::from(Some(LogicalType::List)),
ConvertedType::LIST
);
assert_eq!(
- ConvertedType::from(Some(LogicalType::MAP(Default::default()))),
+ ConvertedType::from(Some(LogicalType::Map)),
ConvertedType::MAP
);
assert_eq!(
- ConvertedType::from(Some(LogicalType::UUID(Default::default()))),
+ ConvertedType::from(Some(LogicalType::Uuid)),
ConvertedType::NONE
);
assert_eq!(
- ConvertedType::from(Some(LogicalType::ENUM(Default::default()))),
+ ConvertedType::from(Some(LogicalType::Enum)),
ConvertedType::ENUM
);
assert_eq!(
- ConvertedType::from(Some(LogicalType::UNKNOWN(Default::default()))),
+ ConvertedType::from(Some(LogicalType::Unknown)),
ConvertedType::NONE
);
}
@@ -1787,85 +1834,82 @@ mod tests {
// Unsigned comparison (physical type does not matter)
let unsigned = vec![
- LogicalType::STRING(Default::default()),
- LogicalType::JSON(Default::default()),
- LogicalType::BSON(Default::default()),
- LogicalType::ENUM(Default::default()),
- LogicalType::UUID(Default::default()),
- LogicalType::INTEGER(IntType {
+ LogicalType::String,
+ LogicalType::Json,
+ LogicalType::Bson,
+ LogicalType::Enum,
+ LogicalType::Uuid,
+ LogicalType::Integer {
bit_width: 8,
is_signed: false,
- }),
- LogicalType::INTEGER(IntType {
+ },
+ LogicalType::Integer {
bit_width: 16,
is_signed: false,
- }),
- LogicalType::INTEGER(IntType {
+ },
+ LogicalType::Integer {
bit_width: 32,
is_signed: false,
- }),
- LogicalType::INTEGER(IntType {
+ },
+ LogicalType::Integer {
bit_width: 64,
is_signed: false,
- }),
+ },
];
check_sort_order(unsigned, SortOrder::UNSIGNED);
// Signed comparison (physical type does not matter)
let signed = vec![
- LogicalType::INTEGER(IntType {
+ LogicalType::Integer {
bit_width: 8,
is_signed: true,
- }),
- LogicalType::INTEGER(IntType {
+ },
+ LogicalType::Integer {
bit_width: 8,
is_signed: true,
- }),
- LogicalType::INTEGER(IntType {
+ },
+ LogicalType::Integer {
bit_width: 8,
is_signed: true,
- }),
- LogicalType::INTEGER(IntType {
+ },
+ LogicalType::Integer {
bit_width: 8,
is_signed: true,
- }),
- LogicalType::DECIMAL(DecimalType {
+ },
+ LogicalType::Decimal {
scale: 20,
precision: 4,
- }),
- LogicalType::DATE(Default::default()),
- LogicalType::TIME(TimeType {
+ },
+ LogicalType::Date,
+ LogicalType::Time {
is_adjusted_to_u_t_c: false,
unit: TimeUnit::MILLIS(Default::default()),
- }),
- LogicalType::TIME(TimeType {
+ },
+ LogicalType::Time {
is_adjusted_to_u_t_c: false,
unit: TimeUnit::MICROS(Default::default()),
- }),
- LogicalType::TIME(TimeType {
+ },
+ LogicalType::Time {
is_adjusted_to_u_t_c: true,
unit: TimeUnit::NANOS(Default::default()),
- }),
- LogicalType::TIMESTAMP(TimestampType {
+ },
+ LogicalType::Timestamp {
is_adjusted_to_u_t_c: false,
unit: TimeUnit::MILLIS(Default::default()),
- }),
- LogicalType::TIMESTAMP(TimestampType {
+ },
+ LogicalType::Timestamp {
is_adjusted_to_u_t_c: false,
unit: TimeUnit::MICROS(Default::default()),
- }),
- LogicalType::TIMESTAMP(TimestampType {
+ },
+ LogicalType::Timestamp {
is_adjusted_to_u_t_c: true,
unit: TimeUnit::NANOS(Default::default()),
- }),
+ },
];
check_sort_order(signed, SortOrder::SIGNED);
// Undefined comparison
- let undefined = vec![
- LogicalType::LIST(Default::default()),
- LogicalType::MAP(Default::default()),
- ];
+ let undefined = vec![LogicalType::List, LogicalType::Map];
check_sort_order(undefined, SortOrder::UNDEFINED);
}
diff --git a/parquet/src/column/writer.rs b/parquet/src/column/writer.rs
index 27814669c..016d72e92 100644
--- a/parquet/src/column/writer.rs
+++ b/parquet/src/column/writer.rs
@@ -1000,8 +1000,8 @@ impl<T: DataType> ColumnWriterImpl<T> {
/// Evaluate `a > b` according to underlying logical type.
fn compare_greater(&self, a: &T::T, b: &T::T) -> bool {
- if let Some(LogicalType::INTEGER(int_type)) = self.descr.logical_type() {
- if !int_type.is_signed {
+ if let Some(LogicalType::Integer { is_signed, .. }) = self.descr.logical_type() {
+ if !is_signed {
// need to compare unsigned
return a.as_u64().unwrap() > b.as_u64().unwrap();
}
diff --git a/parquet/src/file/writer.rs b/parquet/src/file/writer.rs
index 754ce1e94..c6d0d1066 100644
--- a/parquet/src/file/writer.rs
+++ b/parquet/src/file/writer.rs
@@ -541,7 +541,7 @@ mod tests {
use std::{fs::File, io::Cursor};
- use crate::basic::{Compression, Encoding, IntType, LogicalType, Repetition, Type};
+ use crate::basic::{Compression, Encoding, LogicalType, Repetition, Type};
use crate::column::page::PageReader;
use crate::compression::{create_codec, Codec};
use crate::file::{
@@ -732,10 +732,10 @@ mod tests {
#[test]
fn test_file_writer_v2_with_metadata() {
let file = tempfile::tempfile().unwrap();
- let field_logical_type = Some(LogicalType::INTEGER(IntType {
+ let field_logical_type = Some(LogicalType::Integer {
bit_width: 8,
is_signed: false,
- }));
+ });
let field = Arc::new(
types::Type::primitive_type_builder("col1", Type::INT32)
.with_logical_type(field_logical_type.clone())
diff --git a/parquet/src/schema/parser.rs b/parquet/src/schema/parser.rs
index 3a840d209..140e3e085 100644
--- a/parquet/src/schema/parser.rs
+++ b/parquet/src/schema/parser.rs
@@ -45,8 +45,7 @@
use std::sync::Arc;
use crate::basic::{
- ConvertedType, DecimalType, IntType, LogicalType, Repetition, TimeType, TimeUnit,
- TimestampType, Type as PhysicalType,
+ ConvertedType, LogicalType, Repetition, TimeUnit, Type as PhysicalType,
};
use crate::errors::{ParquetError, Result};
use crate::schema::types::{Type, TypePtr};
@@ -357,7 +356,7 @@ impl<'a> Parser<'a> {
// Parse the concrete logical type
if let Some(tpe) = &logical {
match tpe {
- LogicalType::DECIMAL(_) => {
+ LogicalType::Decimal { .. } => {
if let Some("(") = self.tokenizer.next() {
precision = parse_i32(
self.tokenizer.next(),
@@ -374,14 +373,11 @@ impl<'a> Parser<'a> {
} else {
scale = 0
}
- logical = Some(LogicalType::DECIMAL(DecimalType {
- scale,
- precision,
- }));
+ logical = Some(LogicalType::Decimal { scale, precision });
converted = ConvertedType::from(logical.clone());
}
}
- LogicalType::TIME(_) => {
+ LogicalType::Time { .. } => {
if let Some("(") = self.tokenizer.next() {
let unit = parse_timeunit(
self.tokenizer.next(),
@@ -395,10 +391,10 @@ impl<'a> Parser<'a> {
"Failed to parse timezone info for TIME type",
)?;
assert_token(self.tokenizer.next(), ")")?;
- logical = Some(LogicalType::TIME(TimeType {
+ logical = Some(LogicalType::Time {
is_adjusted_to_u_t_c,
unit,
- }));
+ });
converted = ConvertedType::from(logical.clone());
} else {
// Invalid token for unit
@@ -406,7 +402,7 @@ impl<'a> Parser<'a> {
}
}
}
- LogicalType::TIMESTAMP(_) => {
+ LogicalType::Timestamp { .. } => {
if let Some("(") = self.tokenizer.next() {
let unit = parse_timeunit(
self.tokenizer.next(),
@@ -420,10 +416,10 @@ impl<'a> Parser<'a> {
"Failed to parse timezone info for TIMESTAMP type",
)?;
assert_token(self.tokenizer.next(), ")")?;
- logical = Some(LogicalType::TIMESTAMP(TimestampType {
+ logical = Some(LogicalType::Timestamp {
is_adjusted_to_u_t_c,
unit,
- }));
+ });
converted = ConvertedType::from(logical.clone());
} else {
// Invalid token for unit
@@ -431,7 +427,7 @@ impl<'a> Parser<'a> {
}
}
}
- LogicalType::INTEGER(_) => {
+ LogicalType::Integer { .. } => {
if let Some("(") = self.tokenizer.next() {
let bit_width = parse_i32(
self.tokenizer.next(),
@@ -453,7 +449,7 @@ impl<'a> Parser<'a> {
}
}
_ => {
- return Err(general_err!("Logical type INTEGER cannot be used with physical type {}", physical_type))
+ return Err(general_err!("Logical type Integer cannot be used with physical type {}", physical_type))
}
}
if let Some(",") = self.tokenizer.next() {
@@ -463,10 +459,10 @@ impl<'a> Parser<'a> {
"Failed to parse is_signed for INTEGER type",
)?;
assert_token(self.tokenizer.next(), ")")?;
- logical = Some(LogicalType::INTEGER(IntType {
+ logical = Some(LogicalType::Integer {
bit_width,
is_signed,
- }));
+ });
converted = ConvertedType::from(logical.clone());
} else {
// Invalid token for unit
@@ -925,10 +921,10 @@ mod tests {
"f1",
PhysicalType::FIXED_LEN_BYTE_ARRAY,
)
- .with_logical_type(Some(LogicalType::DECIMAL(DecimalType {
+ .with_logical_type(Some(LogicalType::Decimal {
precision: 9,
scale: 3,
- })))
+ }))
.with_converted_type(ConvertedType::DECIMAL)
.with_length(5)
.with_precision(9)
@@ -941,10 +937,10 @@ mod tests {
"f2",
PhysicalType::FIXED_LEN_BYTE_ARRAY,
)
- .with_logical_type(Some(LogicalType::DECIMAL(DecimalType {
+ .with_logical_type(Some(LogicalType::Decimal {
precision: 38,
scale: 18,
- })))
+ }))
.with_converted_type(ConvertedType::DECIMAL)
.with_length(16)
.with_precision(38)
@@ -992,9 +988,7 @@ mod tests {
Arc::new(
Type::group_type_builder("a1")
.with_repetition(Repetition::OPTIONAL)
- .with_logical_type(Some(LogicalType::LIST(
- Default::default(),
- )))
+ .with_logical_type(Some(LogicalType::List))
.with_converted_type(ConvertedType::LIST)
.with_fields(&mut vec![Arc::new(
Type::primitive_type_builder(
@@ -1012,9 +1006,7 @@ mod tests {
Arc::new(
Type::group_type_builder("b1")
.with_repetition(Repetition::OPTIONAL)
- .with_logical_type(Some(LogicalType::LIST(
- Default::default(),
- )))
+ .with_logical_type(Some(LogicalType::List))
.with_converted_type(ConvertedType::LIST)
.with_fields(&mut vec![Arc::new(
Type::group_type_builder("b2")
@@ -1101,7 +1093,7 @@ mod tests {
),
Arc::new(
Type::primitive_type_builder("_5", PhysicalType::INT32)
- .with_logical_type(Some(LogicalType::DATE(Default::default())))
+ .with_logical_type(Some(LogicalType::Date))
.with_converted_type(ConvertedType::DATE)
.build()
.unwrap(),
@@ -1148,20 +1140,20 @@ mod tests {
Arc::new(
Type::primitive_type_builder("_1", PhysicalType::INT32)
.with_repetition(Repetition::REQUIRED)
- .with_logical_type(Some(LogicalType::INTEGER(IntType {
+ .with_logical_type(Some(LogicalType::Integer {
bit_width: 8,
is_signed: true,
- })))
+ }))
.build()
.unwrap(),
),
Arc::new(
Type::primitive_type_builder("_2", PhysicalType::INT32)
.with_repetition(Repetition::REQUIRED)
- .with_logical_type(Some(LogicalType::INTEGER(IntType {
+ .with_logical_type(Some(LogicalType::Integer {
bit_width: 16,
is_signed: false,
- })))
+ }))
.build()
.unwrap(),
),
@@ -1179,49 +1171,49 @@ mod tests {
),
Arc::new(
Type::primitive_type_builder("_5", PhysicalType::INT32)
- .with_logical_type(Some(LogicalType::DATE(Default::default())))
+ .with_logical_type(Some(LogicalType::Date))
.build()
.unwrap(),
),
Arc::new(
Type::primitive_type_builder("_6", PhysicalType::INT32)
- .with_logical_type(Some(LogicalType::TIME(TimeType {
+ .with_logical_type(Some(LogicalType::Time {
unit: TimeUnit::MILLIS(Default::default()),
is_adjusted_to_u_t_c: false,
- })))
+ }))
.build()
.unwrap(),
),
Arc::new(
Type::primitive_type_builder("_7", PhysicalType::INT64)
- .with_logical_type(Some(LogicalType::TIME(TimeType {
+ .with_logical_type(Some(LogicalType::Time {
unit: TimeUnit::MICROS(Default::default()),
is_adjusted_to_u_t_c: true,
- })))
+ }))
.build()
.unwrap(),
),
Arc::new(
Type::primitive_type_builder("_8", PhysicalType::INT64)
- .with_logical_type(Some(LogicalType::TIMESTAMP(TimestampType {
+ .with_logical_type(Some(LogicalType::Timestamp {
unit: TimeUnit::MILLIS(Default::default()),
is_adjusted_to_u_t_c: true,
- })))
+ }))
.build()
.unwrap(),
),
Arc::new(
Type::primitive_type_builder("_9", PhysicalType::INT64)
- .with_logical_type(Some(LogicalType::TIMESTAMP(TimestampType {
+ .with_logical_type(Some(LogicalType::Timestamp {
unit: TimeUnit::NANOS(Default::default()),
is_adjusted_to_u_t_c: false,
- })))
+ }))
.build()
.unwrap(),
),
Arc::new(
Type::primitive_type_builder("_10", PhysicalType::BYTE_ARRAY)
- .with_logical_type(Some(LogicalType::STRING(Default::default())))
+ .with_logical_type(Some(LogicalType::String))
.build()
.unwrap(),
),
diff --git a/parquet/src/schema/printer.rs b/parquet/src/schema/printer.rs
index 2c079d2e9..5cfd30dd9 100644
--- a/parquet/src/schema/printer.rs
+++ b/parquet/src/schema/printer.rs
@@ -238,35 +238,40 @@ fn print_logical_and_converted(
) -> String {
match logical_type {
Some(logical_type) => match logical_type {
- LogicalType::INTEGER(t) => {
- format!("INTEGER({},{})", t.bit_width, t.is_signed)
+ LogicalType::Integer {
+ bit_width,
+ is_signed,
+ } => {
+ format!("INTEGER({},{})", bit_width, is_signed)
}
- LogicalType::DECIMAL(t) => {
- format!("DECIMAL({},{})", t.precision, t.scale)
+ LogicalType::Decimal { scale, precision } => {
+ format!("DECIMAL({},{})", precision, scale)
}
- LogicalType::TIMESTAMP(t) => {
+ LogicalType::Timestamp {
+ is_adjusted_to_u_t_c,
+ unit,
+ } => {
format!(
"TIMESTAMP({},{})",
- print_timeunit(&t.unit),
- t.is_adjusted_to_u_t_c
+ print_timeunit(unit),
+ is_adjusted_to_u_t_c
)
}
- LogicalType::TIME(t) => {
- format!(
- "TIME({},{})",
- print_timeunit(&t.unit),
- t.is_adjusted_to_u_t_c
- )
+ LogicalType::Time {
+ is_adjusted_to_u_t_c,
+ unit,
+ } => {
+ format!("TIME({},{})", print_timeunit(unit), is_adjusted_to_u_t_c)
}
- LogicalType::DATE(_) => "DATE".to_string(),
- LogicalType::BSON(_) => "BSON".to_string(),
- LogicalType::JSON(_) => "JSON".to_string(),
- LogicalType::STRING(_) => "STRING".to_string(),
- LogicalType::UUID(_) => "UUID".to_string(),
- LogicalType::ENUM(_) => "ENUM".to_string(),
- LogicalType::LIST(_) => "LIST".to_string(),
- LogicalType::MAP(_) => "MAP".to_string(),
- LogicalType::UNKNOWN(_) => "UNKNOWN".to_string(),
+ LogicalType::Date => "DATE".to_string(),
+ LogicalType::Bson => "BSON".to_string(),
+ LogicalType::Json => "JSON".to_string(),
+ LogicalType::String => "STRING".to_string(),
+ LogicalType::Uuid => "UUID".to_string(),
+ LogicalType::Enum => "ENUM".to_string(),
+ LogicalType::List => "LIST".to_string(),
+ LogicalType::Map => "MAP".to_string(),
+ LogicalType::Unknown => "UNKNOWN".to_string(),
},
None => {
// Also print converted type if it is available
@@ -379,10 +384,7 @@ mod tests {
use std::sync::Arc;
- use crate::basic::{
- DateType, DecimalType, IntType, LogicalType, Repetition, TimeType, TimestampType,
- Type as PhysicalType,
- };
+ use crate::basic::{LogicalType, Repetition, Type as PhysicalType};
use crate::errors::Result;
use crate::schema::{parser::parse_message_type, types::Type};
@@ -434,10 +436,10 @@ mod tests {
build_primitive_type(
"field",
PhysicalType::INT32,
- Some(LogicalType::INTEGER(IntType {
+ Some(LogicalType::Integer {
bit_width: 32,
is_signed: true,
- })),
+ }),
ConvertedType::NONE,
Repetition::REQUIRED,
)
@@ -448,10 +450,10 @@ mod tests {
build_primitive_type(
"field",
PhysicalType::INT32,
- Some(LogicalType::INTEGER(IntType {
+ Some(LogicalType::Integer {
bit_width: 8,
is_signed: false,
- })),
+ }),
ConvertedType::NONE,
Repetition::OPTIONAL,
)
@@ -462,10 +464,10 @@ mod tests {
build_primitive_type(
"field",
PhysicalType::INT32,
- Some(LogicalType::INTEGER(IntType {
+ Some(LogicalType::Integer {
bit_width: 16,
is_signed: true,
- })),
+ }),
ConvertedType::INT_16,
Repetition::REPEATED,
)
@@ -509,10 +511,10 @@ mod tests {
build_primitive_type(
"field",
PhysicalType::INT64,
- Some(LogicalType::TIMESTAMP(TimestampType {
+ Some(LogicalType::Timestamp {
is_adjusted_to_u_t_c: true,
unit: TimeUnit::MILLIS(Default::default()),
- })),
+ }),
ConvertedType::NONE,
Repetition::REQUIRED,
)
@@ -523,7 +525,7 @@ mod tests {
build_primitive_type(
"field",
PhysicalType::INT32,
- Some(LogicalType::DATE(DateType {})),
+ Some(LogicalType::Date),
ConvertedType::NONE,
Repetition::OPTIONAL,
)
@@ -534,10 +536,10 @@ mod tests {
build_primitive_type(
"field",
PhysicalType::INT32,
- Some(LogicalType::TIME(TimeType {
+ Some(LogicalType::Time {
unit: TimeUnit::MILLIS(Default::default()),
is_adjusted_to_u_t_c: false,
- })),
+ }),
ConvertedType::TIME_MILLIS,
Repetition::REQUIRED,
)
@@ -570,7 +572,7 @@ mod tests {
build_primitive_type(
"field",
PhysicalType::BYTE_ARRAY,
- Some(LogicalType::JSON(Default::default())),
+ Some(LogicalType::Json),
ConvertedType::JSON,
Repetition::REQUIRED,
)
@@ -581,7 +583,7 @@ mod tests {
build_primitive_type(
"field",
PhysicalType::BYTE_ARRAY,
- Some(LogicalType::BSON(Default::default())),
+ Some(LogicalType::Bson),
ConvertedType::BSON,
Repetition::REQUIRED,
)
@@ -592,7 +594,7 @@ mod tests {
build_primitive_type(
"field",
PhysicalType::BYTE_ARRAY,
- Some(LogicalType::STRING(Default::default())),
+ Some(LogicalType::String),
ConvertedType::NONE,
Repetition::REQUIRED,
)
@@ -634,7 +636,7 @@ mod tests {
),
(
Type::primitive_type_builder("field", PhysicalType::FIXED_LEN_BYTE_ARRAY)
- .with_logical_type(Some(LogicalType::UUID(Default::default())))
+ .with_logical_type(Some(LogicalType::Uuid))
.with_length(16)
.with_repetition(Repetition::REQUIRED)
.build()
@@ -646,10 +648,10 @@ mod tests {
"decimal",
PhysicalType::FIXED_LEN_BYTE_ARRAY,
)
- .with_logical_type(Some(LogicalType::DECIMAL(DecimalType {
+ .with_logical_type(Some(LogicalType::Decimal {
precision: 32,
scale: 20,
- })))
+ }))
.with_precision(32)
.with_scale(20)
.with_length(decimal_length_from_precision(32))
@@ -699,7 +701,7 @@ mod tests {
.with_id(1)
.build();
let f3 = Type::primitive_type_builder("f3", PhysicalType::BYTE_ARRAY)
- .with_logical_type(Some(LogicalType::STRING(Default::default())))
+ .with_logical_type(Some(LogicalType::String))
.with_id(1)
.build();
let f4 =
@@ -751,7 +753,7 @@ mod tests {
let a1 = Type::group_type_builder("a1")
.with_repetition(Repetition::OPTIONAL)
- .with_logical_type(Some(LogicalType::LIST(Default::default())))
+ .with_logical_type(Some(LogicalType::List))
.with_converted_type(ConvertedType::LIST)
.with_fields(&mut vec![Arc::new(a2)])
.build()
@@ -776,7 +778,7 @@ mod tests {
let b1 = Type::group_type_builder("b1")
.with_repetition(Repetition::OPTIONAL)
- .with_logical_type(Some(LogicalType::LIST(Default::default())))
+ .with_logical_type(Some(LogicalType::List))
.with_converted_type(ConvertedType::LIST)
.with_fields(&mut vec![Arc::new(b2)])
.build()
@@ -835,10 +837,10 @@ mod tests {
fn test_print_and_parse_decimal() {
let f1 = Type::primitive_type_builder("f1", PhysicalType::INT32)
.with_repetition(Repetition::OPTIONAL)
- .with_logical_type(Some(LogicalType::DECIMAL(DecimalType {
+ .with_logical_type(Some(LogicalType::Decimal {
precision: 9,
scale: 2,
- })))
+ }))
.with_converted_type(ConvertedType::DECIMAL)
.with_precision(9)
.with_scale(2)
@@ -847,10 +849,10 @@ mod tests {
let f2 = Type::primitive_type_builder("f2", PhysicalType::INT32)
.with_repetition(Repetition::OPTIONAL)
- .with_logical_type(Some(LogicalType::DECIMAL(DecimalType {
+ .with_logical_type(Some(LogicalType::Decimal {
precision: 9,
scale: 0,
- })))
+ }))
.with_converted_type(ConvertedType::DECIMAL)
.with_precision(9)
.with_scale(0)
diff --git a/parquet/src/schema/types.rs b/parquet/src/schema/types.rs
index b156bb671..1785d2950 100644
--- a/parquet/src/schema/types.rs
+++ b/parquet/src/schema/types.rs
@@ -22,7 +22,7 @@ use std::{collections::HashMap, convert::From, fmt, sync::Arc};
use parquet_format::SchemaElement;
use crate::basic::{
- ConvertedType, LogicalType, Repetition, TimeType, TimeUnit, Type as PhysicalType,
+ ConvertedType, LogicalType, Repetition, TimeUnit, Type as PhysicalType,
};
use crate::errors::{ParquetError, Result};
@@ -306,57 +306,57 @@ impl<'a> PrimitiveTypeBuilder<'a> {
}
// Check that logical type and physical type are compatible
match (logical_type, self.physical_type) {
- (LogicalType::MAP(_), _) | (LogicalType::LIST(_), _) => {
+ (LogicalType::Map, _) | (LogicalType::List, _) => {
return Err(general_err!(
"{:?} cannot be applied to a primitive type",
logical_type
));
}
- (LogicalType::ENUM(_), PhysicalType::BYTE_ARRAY) => {}
- (LogicalType::DECIMAL(t), _) => {
+ (LogicalType::Enum, PhysicalType::BYTE_ARRAY) => {}
+ (LogicalType::Decimal { scale, precision }, _) => {
// Check that scale and precision are consistent with legacy values
- if t.scale != self.scale {
+ if *scale != self.scale {
return Err(general_err!(
"DECIMAL logical type scale {} must match self.scale {}",
- t.scale,
+ scale,
self.scale
));
}
- if t.precision != self.precision {
+ if *precision != self.precision {
return Err(general_err!(
"DECIMAL logical type precision {} must match self.precision {}",
- t.precision,
+ precision,
self.precision
));
}
self.check_decimal_precision_scale()?;
}
- (LogicalType::DATE(_), PhysicalType::INT32) => {}
+ (LogicalType::Date, PhysicalType::INT32) => {}
(
- LogicalType::TIME(TimeType {
+ LogicalType::Time {
unit: TimeUnit::MILLIS(_),
..
- }),
+ },
PhysicalType::INT32,
) => {}
- (LogicalType::TIME(t), PhysicalType::INT64) => {
- if t.unit == TimeUnit::MILLIS(Default::default()) {
+ (LogicalType::Time { unit, .. }, PhysicalType::INT64) => {
+ if *unit == TimeUnit::MILLIS(Default::default()) {
return Err(general_err!(
"Cannot use millisecond unit on INT64 type"
));
}
}
- (LogicalType::TIMESTAMP(_), PhysicalType::INT64) => {}
- (LogicalType::INTEGER(t), PhysicalType::INT32)
- if t.bit_width <= 32 => {}
- (LogicalType::INTEGER(t), PhysicalType::INT64)
- if t.bit_width == 64 => {}
+ (LogicalType::Timestamp { .. }, PhysicalType::INT64) => {}
+ (LogicalType::Integer { bit_width, .. }, PhysicalType::INT32)
+ if *bit_width <= 32 => {}
+ (LogicalType::Integer { bit_width, .. }, PhysicalType::INT64)
+ if *bit_width == 64 => {}
// Null type
- (LogicalType::UNKNOWN(_), PhysicalType::INT32) => {}
- (LogicalType::STRING(_), PhysicalType::BYTE_ARRAY) => {}
- (LogicalType::JSON(_), PhysicalType::BYTE_ARRAY) => {}
- (LogicalType::BSON(_), PhysicalType::BYTE_ARRAY) => {}
- (LogicalType::UUID(_), PhysicalType::FIXED_LEN_BYTE_ARRAY) => {}
+ (LogicalType::Unknown, PhysicalType::INT32) => {}
+ (LogicalType::String, PhysicalType::BYTE_ARRAY) => {}
+ (LogicalType::Json, PhysicalType::BYTE_ARRAY) => {}
+ (LogicalType::Bson, PhysicalType::BYTE_ARRAY) => {}
+ (LogicalType::Uuid, PhysicalType::FIXED_LEN_BYTE_ARRAY) => {}
(a, b) => {
return Err(general_err!(
"Cannot annotate {:?} from {} fields",
@@ -1198,7 +1198,6 @@ fn to_thrift_helper(schema: &Type, elements: &mut Vec<SchemaElement>) {
mod tests {
use super::*;
- use crate::basic::{DecimalType, IntType};
use crate::schema::parser::parse_message_type;
// TODO: add tests for v2 types
@@ -1206,10 +1205,10 @@ mod tests {
#[test]
fn test_primitive_type() {
let mut result = Type::primitive_type_builder("foo", PhysicalType::INT32)
- .with_logical_type(Some(LogicalType::INTEGER(IntType {
+ .with_logical_type(Some(LogicalType::Integer {
bit_width: 32,
is_signed: true,
- })))
+ }))
.with_id(0)
.build();
assert!(result.is_ok());
@@ -1221,10 +1220,10 @@ mod tests {
assert_eq!(basic_info.repetition(), Repetition::OPTIONAL);
assert_eq!(
basic_info.logical_type(),
- Some(LogicalType::INTEGER(IntType {
+ Some(LogicalType::Integer {
bit_width: 32,
is_signed: true
- }))
+ })
);
assert_eq!(basic_info.converted_type(), ConvertedType::INT_32);
assert_eq!(basic_info.id(), 0);
@@ -1239,16 +1238,16 @@ mod tests {
// Test illegal inputs with logical type
result = Type::primitive_type_builder("foo", PhysicalType::INT64)
.with_repetition(Repetition::REPEATED)
- .with_logical_type(Some(LogicalType::INTEGER(IntType {
+ .with_logical_type(Some(LogicalType::Integer {
is_signed: true,
bit_width: 8,
- })))
+ }))
.build();
assert!(result.is_err());
if let Err(e) = result {
assert_eq!(
format!("{}", e),
- "Parquet error: Cannot annotate INTEGER(IntType { bit_width: 8, is_signed: true }) from INT64 fields"
+ "Parquet error: Cannot annotate Integer { bit_width: 8, is_signed: true } from INT64 fields"
);
}
@@ -1281,10 +1280,10 @@ mod tests {
result = Type::primitive_type_builder("foo", PhysicalType::BYTE_ARRAY)
.with_repetition(Repetition::REQUIRED)
- .with_logical_type(Some(LogicalType::DECIMAL(DecimalType {
+ .with_logical_type(Some(LogicalType::Decimal {
scale: 32,
precision: 12,
- })))
+ }))
.with_precision(-1)
.with_scale(-1)
.build();
@@ -1505,7 +1504,7 @@ mod tests {
let result = Type::group_type_builder("foo")
.with_repetition(Repetition::REPEATED)
- .with_logical_type(Some(LogicalType::LIST(Default::default())))
+ .with_logical_type(Some(LogicalType::List))
.with_fields(&mut fields)
.with_id(1)
.build();
@@ -1516,10 +1515,7 @@ mod tests {
assert!(tp.is_group());
assert!(!tp.is_primitive());
assert_eq!(basic_info.repetition(), Repetition::REPEATED);
- assert_eq!(
- basic_info.logical_type(),
- Some(LogicalType::LIST(Default::default()))
- );
+ assert_eq!(basic_info.logical_type(), Some(LogicalType::List));
assert_eq!(basic_info.converted_type(), ConvertedType::LIST);
assert_eq!(basic_info.id(), 1);
assert_eq!(tp.get_fields().len(), 2);
diff --git a/parquet_derive/src/parquet_field.rs b/parquet_derive/src/parquet_field.rs
index 8658f59b9..be2e6efaa 100644
--- a/parquet_derive/src/parquet_field.rs
+++ b/parquet_derive/src/parquet_field.rs
@@ -507,48 +507,48 @@ impl Type {
match last_part.trim() {
"bool" => quote! { None },
- "u8" => quote! { Some(LogicalType::INTEGER(IntType {
+ "u8" => quote! { Some(LogicalType::Integer {
bit_width: 8,
is_signed: false,
- })) },
- "u16" => quote! { Some(LogicalType::INTEGER(IntType {
+ }) },
+ "u16" => quote! { Some(LogicalType::Integer {
bit_width: 16,
is_signed: false,
- })) },
- "u32" => quote! { Some(LogicalType::INTEGER(IntType {
+ }) },
+ "u32" => quote! { Some(LogicalType::Integer {
bit_width: 32,
is_signed: false,
- })) },
- "u64" => quote! { Some(LogicalType::INTEGER(IntType {
+ }) },
+ "u64" => quote! { Some(LogicalType::Integer {
bit_width: 64,
is_signed: false,
- })) },
- "i8" => quote! { Some(LogicalType::INTEGER(IntType {
+ }) },
+ "i8" => quote! { Some(LogicalType::Integer {
bit_width: 8,
is_signed: true,
- })) },
- "i16" => quote! { Some(LogicalType::INTEGER(IntType {
+ }) },
+ "i16" => quote! { Some(LogicalType::Integer {
bit_width: 16,
is_signed: true,
- })) },
+ }) },
"i32" | "i64" => quote! { None },
"usize" => {
- quote! { Some(LogicalType::INTEGER(IntType {
+ quote! { Some(LogicalType::Integer {
bit_width: usize::BITS as i8,
is_signed: false
- })) }
+ }) }
}
"isize" => {
- quote! { Some(LogicalType::INTEGER(IntType {
+ quote! { Some(LogicalType::Integer {
bit_width: usize::BITS as i8,
is_signed: true
- })) }
+ }) }
}
- "NaiveDate" => quote! { Some(LogicalType::DATE(Default::default())) },
+ "NaiveDate" => quote! { Some(LogicalType::Date) },
"NaiveDateTime" => quote! { None },
"f32" | "f64" => quote! { None },
- "String" | "str" => quote! { Some(LogicalType::STRING(Default::default())) },
- "Uuid" => quote! { Some(LogicalType::UUID(Default::default())) },
+ "String" | "str" => quote! { Some(LogicalType::String) },
+ "Uuid" => quote! { Some(LogicalType::Uuid) },
f => unimplemented!("{} currently is not supported", f),
}
}