You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by al...@apache.org on 2021/12/20 14:41:06 UTC
[arrow-rs] branch master updated: Add MONTH_DAY_NANO interval type, impl `ArrowNativeType` for `i128` (#779)
This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 8c17cff Add MONTH_DAY_NANO interval type, impl `ArrowNativeType` for `i128` (#779)
8c17cff is described below
commit 8c17cff0dab80b55e4e47a0eda0c49b17722b205
Author: baishen <ba...@gmail.com>
AuthorDate: Mon Dec 20 22:41:00 2021 +0800
Add MONTH_DAY_NANO interval type, impl `ArrowNativeType` for `i128` (#779)
* support interval MonthDayNano
* fix
* fix
* fix
* fix test
* add IPC integration test
* fix rat
* update patch
* fix
* fmt
* fix
* fix
* fix
* fix
* fix
* fix
* remove integration-testing/unskip.patch
Co-authored-by: Andrew Lamb <an...@nerdnetworks.org>
---
arrow/Cargo.toml | 2 +-
arrow/src/array/array.rs | 6 +++
arrow/src/array/array_primitive.rs | 18 +++++++++
arrow/src/array/builder.rs | 4 ++
arrow/src/array/data.rs | 7 ++++
arrow/src/array/equal/mod.rs | 3 ++
arrow/src/array/mod.rs | 3 ++
arrow/src/array/ord.rs | 3 ++
arrow/src/array/transform/mod.rs | 4 ++
arrow/src/compute/kernels/cast.rs | 2 +
arrow/src/compute/kernels/sort.rs | 5 +++
arrow/src/compute/kernels/take.rs | 12 ++++++
arrow/src/datatypes/datatype.rs | 14 ++++++-
arrow/src/datatypes/mod.rs | 79 ++++++++++++++++++++++++--------------
arrow/src/datatypes/native.rs | 35 +++++++++++++++++
arrow/src/datatypes/numeric.rs | 1 +
arrow/src/datatypes/types.rs | 6 +++
arrow/src/ipc/convert.rs | 9 +++++
arrow/src/ipc/gen/Schema.rs | 14 +++++--
arrow/src/ipc/reader.rs | 3 +-
arrow/src/util/display.rs | 42 ++++++++++++++++++++
arrow/src/util/integration_util.rs | 4 ++
format/Schema.fbs | 13 ++++++-
integration-testing/src/lib.rs | 43 +++++++++++++++++++++
parquet/src/arrow/arrow_writer.rs | 19 +++++++++
25 files changed, 313 insertions(+), 38 deletions(-)
diff --git a/arrow/Cargo.toml b/arrow/Cargo.toml
index 12c141c..d790387 100644
--- a/arrow/Cargo.toml
+++ b/arrow/Cargo.toml
@@ -39,7 +39,7 @@ path = "src/lib.rs"
[dependencies]
serde = { version = "1.0" }
serde_derive = "1.0"
-serde_json = { version = "1.0", features = ["preserve_order"] }
+serde_json = { version = "1.0", features = ["preserve_order", "arbitrary_precision"] }
indexmap = { version = "1.6", features = ["std"] }
rand = { version = "0.8", optional = true }
num = "0.4"
diff --git a/arrow/src/array/array.rs b/arrow/src/array/array.rs
index 34cdb73..7f790ef 100644
--- a/arrow/src/array/array.rs
+++ b/arrow/src/array/array.rs
@@ -275,6 +275,9 @@ pub fn make_array(data: ArrayData) -> ArrayRef {
DataType::Interval(IntervalUnit::DayTime) => {
Arc::new(IntervalDayTimeArray::from(data)) as ArrayRef
}
+ DataType::Interval(IntervalUnit::MonthDayNano) => {
+ Arc::new(IntervalMonthDayNanoArray::from(data)) as ArrayRef
+ }
DataType::Duration(TimeUnit::Second) => {
Arc::new(DurationSecondArray::from(data)) as ArrayRef
}
@@ -415,6 +418,9 @@ pub fn new_null_array(data_type: &DataType, length: usize) -> ArrayRef {
IntervalUnit::DayTime => {
new_null_sized_array::<IntervalDayTimeType>(data_type, length)
}
+ IntervalUnit::MonthDayNano => {
+ new_null_sized_array::<IntervalMonthDayNanoType>(data_type, length)
+ }
},
DataType::FixedSizeBinary(value_len) => make_array(unsafe {
ArrayData::new_unchecked(
diff --git a/arrow/src/array/array_primitive.rs b/arrow/src/array/array_primitive.rs
index ac56b4a..a9e1ba8 100644
--- a/arrow/src/array/array_primitive.rs
+++ b/arrow/src/array/array_primitive.rs
@@ -444,6 +444,7 @@ def_numeric_from_vec!(Time64MicrosecondType);
def_numeric_from_vec!(Time64NanosecondType);
def_numeric_from_vec!(IntervalYearMonthType);
def_numeric_from_vec!(IntervalDayTimeType);
+def_numeric_from_vec!(IntervalMonthDayNanoType);
def_numeric_from_vec!(DurationSecondType);
def_numeric_from_vec!(DurationMillisecondType);
def_numeric_from_vec!(DurationMicrosecondType);
@@ -649,6 +650,23 @@ mod tests {
assert!(arr.is_null(1));
assert_eq!(-5, arr.value(2));
assert_eq!(-5, arr.values()[2]);
+
+ // a month_day_nano interval contains months, days and nanoseconds,
+ // but we do not yet have accessors for the values.
+ // TODO: implement month, day, and nanos access method for month_day_nano.
+ let arr = IntervalMonthDayNanoArray::from(vec![
+ Some(100000000000000000000),
+ None,
+ Some(-500000000000000000000),
+ ]);
+ assert_eq!(3, arr.len());
+ assert_eq!(0, arr.offset());
+ assert_eq!(1, arr.null_count());
+ assert_eq!(100000000000000000000, arr.value(0));
+ assert_eq!(100000000000000000000, arr.values()[0]);
+ assert!(arr.is_null(1));
+ assert_eq!(-500000000000000000000, arr.value(2));
+ assert_eq!(-500000000000000000000, arr.values()[2]);
}
#[test]
diff --git a/arrow/src/array/builder.rs b/arrow/src/array/builder.rs
index 94c05c1..8a5ef6c 100644
--- a/arrow/src/array/builder.rs
+++ b/arrow/src/array/builder.rs
@@ -1686,6 +1686,9 @@ pub fn make_builder(datatype: &DataType, capacity: usize) -> Box<dyn ArrayBuilde
DataType::Interval(IntervalUnit::DayTime) => {
Box::new(IntervalDayTimeBuilder::new(capacity))
}
+ DataType::Interval(IntervalUnit::MonthDayNano) => {
+ Box::new(IntervalMonthDayNanoBuilder::new(capacity))
+ }
DataType::Duration(TimeUnit::Second) => {
Box::new(DurationSecondBuilder::new(capacity))
}
@@ -2031,6 +2034,7 @@ impl FieldData {
| DataType::Time64(_)
| DataType::Interval(IntervalUnit::DayTime)
| DataType::Duration(_) => self.append_null::<Int64Type>()?,
+ DataType::Interval(IntervalUnit::MonthDayNano) => self.append_null::<IntervalMonthDayNanoType>()?,
DataType::UInt8 => self.append_null::<UInt8Type>()?,
DataType::UInt16 => self.append_null::<UInt16Type>()?,
DataType::UInt32 => self.append_null::<UInt32Type>()?,
diff --git a/arrow/src/array/data.rs b/arrow/src/array/data.rs
index 2dc694c..9b46a79 100644
--- a/arrow/src/array/data.rs
+++ b/arrow/src/array/data.rs
@@ -121,6 +121,10 @@ pub(crate) fn new_buffers(data_type: &DataType, capacity: usize) -> [MutableBuff
MutableBuffer::new(capacity * mem::size_of::<i64>()),
empty_buffer,
],
+ DataType::Interval(IntervalUnit::MonthDayNano) => [
+ MutableBuffer::new(capacity * mem::size_of::<i128>()),
+ empty_buffer,
+ ],
DataType::Utf8 | DataType::Binary => {
let mut buffer = MutableBuffer::new((1 + capacity) * mem::size_of::<i32>());
// safety: `unsafe` code assumes that this buffer is initialized with one element
@@ -1178,6 +1182,9 @@ fn layout(data_type: &DataType) -> DataTypeLayout {
DataType::Interval(IntervalUnit::DayTime) => {
DataTypeLayout::new_fixed_width(size_of::<i64>())
}
+ DataType::Interval(IntervalUnit::MonthDayNano) => {
+ DataTypeLayout::new_fixed_width(size_of::<i128>())
+ }
DataType::Duration(_) => DataTypeLayout::new_fixed_width(size_of::<i64>()),
DataType::Binary => DataTypeLayout::new_binary(size_of::<i32>()),
DataType::FixedSizeBinary(bytes_per_value) => {
diff --git a/arrow/src/array/equal/mod.rs b/arrow/src/array/equal/mod.rs
index 0e8d8bb..9a044e6 100644
--- a/arrow/src/array/equal/mod.rs
+++ b/arrow/src/array/equal/mod.rs
@@ -199,6 +199,9 @@ fn equal_values(
| DataType::Duration(_) => primitive_equal::<i64>(
lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len,
),
+ DataType::Interval(IntervalUnit::MonthDayNano) => primitive_equal::<i128>(
+ lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len,
+ ),
DataType::Utf8 | DataType::Binary => variable_sized_equal::<i32>(
lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len,
),
diff --git a/arrow/src/array/mod.rs b/arrow/src/array/mod.rs
index 26db3e6..1145e83 100644
--- a/arrow/src/array/mod.rs
+++ b/arrow/src/array/mod.rs
@@ -385,6 +385,7 @@ pub type Time64MicrosecondArray = PrimitiveArray<Time64MicrosecondType>;
pub type Time64NanosecondArray = PrimitiveArray<Time64NanosecondType>;
pub type IntervalYearMonthArray = PrimitiveArray<IntervalYearMonthType>;
pub type IntervalDayTimeArray = PrimitiveArray<IntervalDayTimeType>;
+pub type IntervalMonthDayNanoArray = PrimitiveArray<IntervalMonthDayNanoType>;
pub type DurationSecondArray = PrimitiveArray<DurationSecondType>;
pub type DurationMillisecondArray = PrimitiveArray<DurationMillisecondType>;
pub type DurationMicrosecondArray = PrimitiveArray<DurationMicrosecondType>;
@@ -425,6 +426,7 @@ pub type Time64MicrosecondBufferBuilder = BufferBuilder<Time64MicrosecondType>;
pub type Time64NanosecondBufferBuilder = BufferBuilder<Time64NanosecondType>;
pub type IntervalYearMonthBufferBuilder = BufferBuilder<IntervalYearMonthType>;
pub type IntervalDayTimeBufferBuilder = BufferBuilder<IntervalDayTimeType>;
+pub type IntervalMonthDayNanoBufferBuilder = BufferBuilder<IntervalMonthDayNanoType>;
pub type DurationSecondBufferBuilder = BufferBuilder<DurationSecondType>;
pub type DurationMillisecondBufferBuilder = BufferBuilder<DurationMillisecondType>;
pub type DurationMicrosecondBufferBuilder = BufferBuilder<DurationMicrosecondType>;
@@ -473,6 +475,7 @@ pub type Time64MicrosecondBuilder = PrimitiveBuilder<Time64MicrosecondType>;
pub type Time64NanosecondBuilder = PrimitiveBuilder<Time64NanosecondType>;
pub type IntervalYearMonthBuilder = PrimitiveBuilder<IntervalYearMonthType>;
pub type IntervalDayTimeBuilder = PrimitiveBuilder<IntervalDayTimeType>;
+pub type IntervalMonthDayNanoBuilder = PrimitiveBuilder<IntervalMonthDayNanoType>;
pub type DurationSecondBuilder = PrimitiveBuilder<DurationSecondType>;
pub type DurationMillisecondBuilder = PrimitiveBuilder<DurationMillisecondType>;
pub type DurationMicrosecondBuilder = PrimitiveBuilder<DurationMicrosecondType>;
diff --git a/arrow/src/array/ord.rs b/arrow/src/array/ord.rs
index d6534ef..37bd009 100644
--- a/arrow/src/array/ord.rs
+++ b/arrow/src/array/ord.rs
@@ -174,6 +174,9 @@ pub fn build_compare(left: &dyn Array, right: &dyn Array) -> Result<DynComparato
(Interval(DayTime), Interval(DayTime)) => {
compare_primitives::<IntervalDayTimeType>(left, right)
}
+ (Interval(MonthDayNano), Interval(MonthDayNano)) => {
+ compare_primitives::<IntervalMonthDayNanoType>(left, right)
+ }
(Duration(Second), Duration(Second)) => {
compare_primitives::<DurationSecondType>(left, right)
}
diff --git a/arrow/src/array/transform/mod.rs b/arrow/src/array/transform/mod.rs
index 9ad3dbf..4d2d139 100644
--- a/arrow/src/array/transform/mod.rs
+++ b/arrow/src/array/transform/mod.rs
@@ -257,6 +257,9 @@ fn build_extend(array: &ArrayData) -> Extend {
| DataType::Interval(IntervalUnit::DayTime) => {
primitive::build_extend::<i64>(array)
}
+ DataType::Interval(IntervalUnit::MonthDayNano) => {
+ primitive::build_extend::<i128>(array)
+ }
DataType::Utf8 | DataType::Binary => variable_size::build_extend::<i32>(array),
DataType::LargeUtf8 | DataType::LargeBinary => {
variable_size::build_extend::<i64>(array)
@@ -298,6 +301,7 @@ fn build_extend_nulls(data_type: &DataType) -> ExtendNulls {
| DataType::Timestamp(_, _)
| DataType::Duration(_)
| DataType::Interval(IntervalUnit::DayTime) => primitive::extend_nulls::<i64>,
+ DataType::Interval(IntervalUnit::MonthDayNano) => primitive::extend_nulls::<i128>,
DataType::Utf8 | DataType::Binary => variable_size::extend_nulls::<i32>,
DataType::LargeUtf8 | DataType::LargeBinary => variable_size::extend_nulls::<i64>,
DataType::List(_) => list::extend_nulls::<i32>,
diff --git a/arrow/src/compute/kernels/cast.rs b/arrow/src/compute/kernels/cast.rs
index 2c69424..4807363 100644
--- a/arrow/src/compute/kernels/cast.rs
+++ b/arrow/src/compute/kernels/cast.rs
@@ -3785,6 +3785,7 @@ mod tests {
Arc::new(Time64NanosecondArray::from(vec![1000, 2000])),
Arc::new(IntervalYearMonthArray::from(vec![1000, 2000])),
Arc::new(IntervalDayTimeArray::from(vec![1000, 2000])),
+ Arc::new(IntervalMonthDayNanoArray::from(vec![1000, 2000])),
Arc::new(DurationSecondArray::from(vec![1000, 2000])),
Arc::new(DurationMillisecondArray::from(vec![1000, 2000])),
Arc::new(DurationMicrosecondArray::from(vec![1000, 2000])),
@@ -3940,6 +3941,7 @@ mod tests {
Duration(TimeUnit::Nanosecond),
Interval(IntervalUnit::YearMonth),
Interval(IntervalUnit::DayTime),
+ Interval(IntervalUnit::MonthDayNano),
Binary,
FixedSizeBinary(10),
LargeBinary,
diff --git a/arrow/src/compute/kernels/sort.rs b/arrow/src/compute/kernels/sort.rs
index 1046853..5aa1fdf 100644
--- a/arrow/src/compute/kernels/sort.rs
+++ b/arrow/src/compute/kernels/sort.rs
@@ -243,6 +243,11 @@ pub fn sort_to_indices(
DataType::Interval(IntervalUnit::DayTime) => {
sort_primitive::<IntervalDayTimeType, _>(values, v, n, cmp, &options, limit)
}
+ DataType::Interval(IntervalUnit::MonthDayNano) => {
+ sort_primitive::<IntervalMonthDayNanoType, _>(
+ values, v, n, cmp, &options, limit,
+ )
+ }
DataType::Duration(TimeUnit::Second) => {
sort_primitive::<DurationSecondType, _>(values, v, n, cmp, &options, limit)
}
diff --git a/arrow/src/compute/kernels/take.rs b/arrow/src/compute/kernels/take.rs
index 5248629..4e14fac 100644
--- a/arrow/src/compute/kernels/take.rs
+++ b/arrow/src/compute/kernels/take.rs
@@ -171,6 +171,9 @@ where
DataType::Interval(IntervalUnit::DayTime) => {
downcast_take!(IntervalDayTimeType, values, indices)
}
+ DataType::Interval(IntervalUnit::MonthDayNano) => {
+ downcast_take!(IntervalMonthDayNanoType, values, indices)
+ }
DataType::Duration(TimeUnit::Second) => {
downcast_take!(DurationSecondType, values, indices)
}
@@ -1186,6 +1189,15 @@ mod tests {
)
.unwrap();
+ // interval_month_day_nano
+ test_take_primitive_arrays::<IntervalMonthDayNanoType>(
+ vec![Some(0), None, Some(2), Some(-15), None],
+ &index,
+ None,
+ vec![Some(-15), None, None, Some(-15), Some(2)],
+ )
+ .unwrap();
+
// duration_second
test_take_primitive_arrays::<DurationSecondType>(
vec![Some(0), None, Some(2), Some(-15), None],
diff --git a/arrow/src/datatypes/datatype.rs b/arrow/src/datatypes/datatype.rs
index 96fb18b..ae61f08 100644
--- a/arrow/src/datatypes/datatype.rs
+++ b/arrow/src/datatypes/datatype.rs
@@ -158,7 +158,7 @@ pub enum TimeUnit {
Nanosecond,
}
-/// YEAR_MONTH or DAY_TIME interval in SQL style.
+/// YEAR_MONTH, DAY_TIME, MONTH_DAY_NANO interval in SQL style.
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub enum IntervalUnit {
/// Indicates the number of elapsed whole months, stored as 4-byte integers.
@@ -166,6 +166,14 @@ pub enum IntervalUnit {
/// Indicates the number of elapsed days and milliseconds,
/// stored as 2 contiguous 32-bit integers (days, milliseconds) (8-bytes in total).
DayTime,
+ /// A triple of the number of elapsed months, days, and nanoseconds.
+ /// The values are stored contiguously in 16 byte blocks. Months and
+ /// days are encoded as 32 bit integers and nanoseconds is encoded as a
+ /// 64 bit integer. All integers are signed. Each field is independent
+ /// (e.g. there is no constraint that nanoseconds have the same sign
+ /// as days or that the quantity of nanoseconds represents less
+ /// than a day's worth of time).
+ MonthDayNano,
}
impl fmt::Display for DataType {
@@ -287,6 +295,9 @@ impl DataType {
Some(p) if p == "YEAR_MONTH" => {
Ok(DataType::Interval(IntervalUnit::YearMonth))
}
+ Some(p) if p == "MONTH_DAY_NANO" => {
+ Ok(DataType::Interval(IntervalUnit::MonthDayNano))
+ }
_ => Err(ArrowError::ParseError(
"interval unit missing or invalid".to_string(),
)),
@@ -442,6 +453,7 @@ impl DataType {
DataType::Interval(unit) => json!({"name": "interval", "unit": match unit {
IntervalUnit::YearMonth => "YEAR_MONTH",
IntervalUnit::DayTime => "DAY_TIME",
+ IntervalUnit::MonthDayNano => "MONTH_DAY_NANO",
}}),
DataType::Duration(unit) => json!({"name": "duration", "unit": match unit {
TimeUnit::Second => "SECOND",
diff --git a/arrow/src/datatypes/mod.rs b/arrow/src/datatypes/mod.rs
index 9920cf9..bc866b0 100644
--- a/arrow/src/datatypes/mod.rs
+++ b/arrow/src/datatypes/mod.rs
@@ -454,13 +454,14 @@ mod tests {
),
Field::new("c19", DataType::Interval(IntervalUnit::DayTime), false),
Field::new("c20", DataType::Interval(IntervalUnit::YearMonth), false),
+ Field::new("c21", DataType::Interval(IntervalUnit::MonthDayNano), false),
Field::new(
- "c21",
+ "c22",
DataType::List(Box::new(Field::new("item", DataType::Boolean, true))),
false,
),
Field::new(
- "c22",
+ "c23",
DataType::FixedSizeList(
Box::new(Field::new("bools", DataType::Boolean, false)),
5,
@@ -468,7 +469,7 @@ mod tests {
false,
),
Field::new(
- "c23",
+ "c24",
DataType::List(Box::new(Field::new(
"inner_list",
DataType::List(Box::new(Field::new(
@@ -481,21 +482,22 @@ mod tests {
true,
),
Field::new(
- "c24",
+ "c25",
DataType::Struct(vec![
Field::new("a", DataType::Utf8, false),
Field::new("b", DataType::UInt16, false),
]),
false,
),
- Field::new("c25", DataType::Interval(IntervalUnit::YearMonth), true),
- Field::new("c26", DataType::Interval(IntervalUnit::DayTime), true),
- Field::new("c27", DataType::Duration(TimeUnit::Second), false),
- Field::new("c28", DataType::Duration(TimeUnit::Millisecond), false),
- Field::new("c29", DataType::Duration(TimeUnit::Microsecond), false),
- Field::new("c30", DataType::Duration(TimeUnit::Nanosecond), false),
+ Field::new("c26", DataType::Interval(IntervalUnit::YearMonth), true),
+ Field::new("c27", DataType::Interval(IntervalUnit::DayTime), true),
+ Field::new("c28", DataType::Interval(IntervalUnit::MonthDayNano), true),
+ Field::new("c29", DataType::Duration(TimeUnit::Second), false),
+ Field::new("c30", DataType::Duration(TimeUnit::Millisecond), false),
+ Field::new("c31", DataType::Duration(TimeUnit::Microsecond), false),
+ Field::new("c32", DataType::Duration(TimeUnit::Nanosecond), false),
Field::new_dict(
- "c31",
+ "c33",
DataType::Dictionary(
Box::new(DataType::Int32),
Box::new(DataType::Utf8),
@@ -504,10 +506,10 @@ mod tests {
123,
true,
),
- Field::new("c32", DataType::LargeBinary, true),
- Field::new("c33", DataType::LargeUtf8, true),
+ Field::new("c34", DataType::LargeBinary, true),
+ Field::new("c35", DataType::LargeUtf8, true),
Field::new(
- "c34",
+ "c36",
DataType::LargeList(Box::new(Field::new(
"inner_large_list",
DataType::LargeList(Box::new(Field::new(
@@ -520,7 +522,7 @@ mod tests {
true,
),
Field::new(
- "c35",
+ "c37",
DataType::Map(
Box::new(Field::new(
"my_entries",
@@ -732,6 +734,15 @@ mod tests {
"name": "c21",
"nullable": false,
"type": {
+ "name": "interval",
+ "unit": "MONTH_DAY_NANO"
+ },
+ "children": []
+ },
+ {
+ "name": "c22",
+ "nullable": false,
+ "type": {
"name": "list"
},
"children": [
@@ -746,7 +757,7 @@ mod tests {
]
},
{
- "name": "c22",
+ "name": "c23",
"nullable": false,
"type": {
"name": "fixedsizelist",
@@ -764,7 +775,7 @@ mod tests {
]
},
{
- "name": "c23",
+ "name": "c24",
"nullable": true,
"type": {
"name": "list"
@@ -790,7 +801,7 @@ mod tests {
]
},
{
- "name": "c24",
+ "name": "c25",
"nullable": false,
"type": {
"name": "struct"
@@ -817,7 +828,7 @@ mod tests {
]
},
{
- "name": "c25",
+ "name": "c26",
"nullable": true,
"type": {
"name": "interval",
@@ -826,7 +837,7 @@ mod tests {
"children": []
},
{
- "name": "c26",
+ "name": "c27",
"nullable": true,
"type": {
"name": "interval",
@@ -835,7 +846,16 @@ mod tests {
"children": []
},
{
- "name": "c27",
+ "name": "c28",
+ "nullable": true,
+ "type": {
+ "name": "interval",
+ "unit": "MONTH_DAY_NANO"
+ },
+ "children": []
+ },
+ {
+ "name": "c29",
"nullable": false,
"type": {
"name": "duration",
@@ -844,7 +864,7 @@ mod tests {
"children": []
},
{
- "name": "c28",
+ "name": "c30",
"nullable": false,
"type": {
"name": "duration",
@@ -853,7 +873,7 @@ mod tests {
"children": []
},
{
- "name": "c29",
+ "name": "c31",
"nullable": false,
"type": {
"name": "duration",
@@ -862,7 +882,7 @@ mod tests {
"children": []
},
{
- "name": "c30",
+ "name": "c32",
"nullable": false,
"type": {
"name": "duration",
@@ -871,7 +891,7 @@ mod tests {
"children": []
},
{
- "name": "c31",
+ "name": "c33",
"nullable": true,
"children": [],
"type": {
@@ -888,7 +908,7 @@ mod tests {
}
},
{
- "name": "c32",
+ "name": "c34",
"nullable": true,
"type": {
"name": "largebinary"
@@ -896,7 +916,7 @@ mod tests {
"children": []
},
{
- "name": "c33",
+ "name": "c35",
"nullable": true,
"type": {
"name": "largeutf8"
@@ -904,7 +924,7 @@ mod tests {
"children": []
},
{
- "name": "c34",
+ "name": "c36",
"nullable": true,
"type": {
"name": "largelist"
@@ -930,7 +950,7 @@ mod tests {
]
},
{
- "name": "c35",
+ "name": "c37",
"nullable": false,
"type": {
"name": "map",
@@ -1156,6 +1176,7 @@ mod tests {
assert_eq!(Some(VNumber(Number::from(1))), 1i16.into_json_value());
assert_eq!(Some(VNumber(Number::from(1))), 1i32.into_json_value());
assert_eq!(Some(VNumber(Number::from(1))), 1i64.into_json_value());
+ assert_eq!(Some(VNumber(Number::from(1))), 1i128.into_json_value());
assert_eq!(Some(VNumber(Number::from(1))), 1u8.into_json_value());
assert_eq!(Some(VNumber(Number::from(1))), 1u16.into_json_value());
assert_eq!(Some(VNumber(Number::from(1))), 1u32.into_json_value());
diff --git a/arrow/src/datatypes/native.rs b/arrow/src/datatypes/native.rs
index 18d593b..019a1f2 100644
--- a/arrow/src/datatypes/native.rs
+++ b/arrow/src/datatypes/native.rs
@@ -67,6 +67,12 @@ pub trait ArrowNativeType:
fn from_i64(_: i64) -> Option<Self> {
None
}
+
+ /// Convert native type from i128.
+ #[inline]
+ fn from_i128(_: i128) -> Option<Self> {
+ None
+ }
}
/// Trait bridging the dynamic-typed nature of Arrow (via [`DataType`]) with the
@@ -201,6 +207,35 @@ impl ArrowNativeType for i64 {
}
}
+impl JsonSerializable for i128 {
+ fn into_json_value(self) -> Option<Value> {
+ Some(self.into())
+ }
+}
+
+impl ArrowNativeType for i128 {
+ #[inline]
+ fn from_usize(v: usize) -> Option<Self> {
+ num::FromPrimitive::from_usize(v)
+ }
+
+ #[inline]
+ fn to_usize(&self) -> Option<usize> {
+ num::ToPrimitive::to_usize(self)
+ }
+
+ #[inline]
+ fn to_isize(&self) -> Option<isize> {
+ num::ToPrimitive::to_isize(self)
+ }
+
+ /// Convert native type from i128.
+ #[inline]
+ fn from_i128(val: i128) -> Option<Self> {
+ Some(val)
+ }
+}
+
impl JsonSerializable for u8 {
fn into_json_value(self) -> Option<Value> {
Some(self.into())
diff --git a/arrow/src/datatypes/numeric.rs b/arrow/src/datatypes/numeric.rs
index 39c6732..cbb953c 100644
--- a/arrow/src/datatypes/numeric.rs
+++ b/arrow/src/datatypes/numeric.rs
@@ -348,6 +348,7 @@ make_numeric_type!(Time64MicrosecondType, i64, i64x8, m64x8);
make_numeric_type!(Time64NanosecondType, i64, i64x8, m64x8);
make_numeric_type!(IntervalYearMonthType, i32, i32x16, m32x16);
make_numeric_type!(IntervalDayTimeType, i64, i64x8, m64x8);
+make_numeric_type!(IntervalMonthDayNanoType, i128, i128x4, m128x4);
make_numeric_type!(DurationSecondType, i64, i64x8, m64x8);
make_numeric_type!(DurationMillisecondType, i64, i64x8, m64x8);
make_numeric_type!(DurationMicrosecondType, i64, i64x8, m64x8);
diff --git a/arrow/src/datatypes/types.rs b/arrow/src/datatypes/types.rs
index 2731e3d..0937c3b 100644
--- a/arrow/src/datatypes/types.rs
+++ b/arrow/src/datatypes/types.rs
@@ -99,6 +99,11 @@ make_type!(
DataType::Interval(IntervalUnit::DayTime)
);
make_type!(
+ IntervalMonthDayNanoType,
+ i128,
+ DataType::Interval(IntervalUnit::MonthDayNano)
+);
+make_type!(
DurationSecondType,
i64,
DataType::Duration(TimeUnit::Second)
@@ -154,6 +159,7 @@ impl ArrowTemporalType for Time64MicrosecondType {}
impl ArrowTemporalType for Time64NanosecondType {}
// impl ArrowTemporalType for IntervalYearMonthType {}
// impl ArrowTemporalType for IntervalDayTimeType {}
+// impl ArrowTemporalType for IntervalMonthDayNanoType {}
impl ArrowTemporalType for DurationSecondType {}
impl ArrowTemporalType for DurationMillisecondType {}
impl ArrowTemporalType for DurationMicrosecondType {}
diff --git a/arrow/src/ipc/convert.rs b/arrow/src/ipc/convert.rs
index dcc9fcc..4d64583 100644
--- a/arrow/src/ipc/convert.rs
+++ b/arrow/src/ipc/convert.rs
@@ -263,6 +263,9 @@ pub(crate) fn get_data_type(field: ipc::Field, may_be_dictionary: bool) -> DataT
DataType::Interval(IntervalUnit::YearMonth)
}
ipc::IntervalUnit::DAY_TIME => DataType::Interval(IntervalUnit::DayTime),
+ ipc::IntervalUnit::MONTH_DAY_NANO => {
+ DataType::Interval(IntervalUnit::MonthDayNano)
+ }
z => panic!("Interval type with unit of {:?} unsupported", z),
}
}
@@ -557,6 +560,7 @@ pub(crate) fn get_fb_field_type<'a>(
let interval_unit = match unit {
IntervalUnit::YearMonth => ipc::IntervalUnit::YEAR_MONTH,
IntervalUnit::DayTime => ipc::IntervalUnit::DAY_TIME,
+ IntervalUnit::MonthDayNano => ipc::IntervalUnit::MONTH_DAY_NANO,
};
builder.add_unit(interval_unit);
FBFieldType {
@@ -757,6 +761,11 @@ mod tests {
DataType::Interval(IntervalUnit::DayTime),
true,
),
+ Field::new(
+ "interval[mdn]",
+ DataType::Interval(IntervalUnit::MonthDayNano),
+ true,
+ ),
Field::new("utf8", DataType::Utf8, false),
Field::new("binary", DataType::Binary, false),
Field::new(
diff --git a/arrow/src/ipc/gen/Schema.rs b/arrow/src/ipc/gen/Schema.rs
index 12af5b5..dd204e0 100644
--- a/arrow/src/ipc/gen/Schema.rs
+++ b/arrow/src/ipc/gen/Schema.rs
@@ -639,8 +639,11 @@ pub const ENUM_MAX_INTERVAL_UNIT: i16 = 1;
note = "Use associated constants instead. This will no longer be generated in 2021."
)]
#[allow(non_camel_case_types)]
-pub const ENUM_VALUES_INTERVAL_UNIT: [IntervalUnit; 2] =
- [IntervalUnit::YEAR_MONTH, IntervalUnit::DAY_TIME];
+pub const ENUM_VALUES_INTERVAL_UNIT: [IntervalUnit; 3] = [
+ IntervalUnit::YEAR_MONTH,
+ IntervalUnit::DAY_TIME,
+ IntervalUnit::MONTH_DAY_NANO,
+];
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
#[repr(transparent)]
@@ -649,15 +652,18 @@ pub struct IntervalUnit(pub i16);
impl IntervalUnit {
pub const YEAR_MONTH: Self = Self(0);
pub const DAY_TIME: Self = Self(1);
+ pub const MONTH_DAY_NANO: Self = Self(2);
pub const ENUM_MIN: i16 = 0;
- pub const ENUM_MAX: i16 = 1;
- pub const ENUM_VALUES: &'static [Self] = &[Self::YEAR_MONTH, Self::DAY_TIME];
+ pub const ENUM_MAX: i16 = 2;
+ pub const ENUM_VALUES: &'static [Self] =
+ &[Self::YEAR_MONTH, Self::DAY_TIME, Self::MONTH_DAY_NANO];
/// Returns the variant's name or "" if unknown.
pub fn variant_name(self) -> Option<&'static str> {
match self {
Self::YEAR_MONTH => Some("YEAR_MONTH"),
Self::DAY_TIME => Some("DAY_TIME"),
+ Self::MONTH_DAY_NANO => Some("MONTH_DAY_NANO"),
_ => None,
}
}
diff --git a/arrow/src/ipc/reader.rs b/arrow/src/ipc/reader.rs
index 3878586..1d9f36d 100644
--- a/arrow/src/ipc/reader.rs
+++ b/arrow/src/ipc/reader.rs
@@ -312,7 +312,8 @@ fn create_primitive_array(
| Timestamp(_, _)
| Date64
| Duration(_)
- | Interval(IntervalUnit::DayTime) => {
+ | Interval(IntervalUnit::DayTime)
+ | Interval(IntervalUnit::MonthDayNano) => {
let mut builder = ArrayData::builder(data_type.clone())
.len(length)
.buffers(buffers[1..].to_vec())
diff --git a/arrow/src/util/display.rs b/arrow/src/util/display.rs
index 09872a7..e427dcc 100644
--- a/arrow/src/util/display.rs
+++ b/arrow/src/util/display.rs
@@ -106,6 +106,45 @@ macro_rules! make_string_interval_day_time {
}};
}
+macro_rules! make_string_interval_month_day_nano {
+ ($column: ident, $row: ident) => {{
+ let array = $column
+ .as_any()
+ .downcast_ref::<array::IntervalMonthDayNanoArray>()
+ .unwrap();
+
+ let s = if array.is_null($row) {
+ "NULL".to_string()
+ } else {
+ let value: u128 = array.value($row) as u128;
+
+ let months_part: i32 =
+ ((value & 0xFFFFFFFF000000000000000000000000) >> 96) as i32;
+ let days_part: i32 = ((value & 0xFFFFFFFF0000000000000000) >> 64) as i32;
+ let nanoseconds_part: i64 = (value & 0xFFFFFFFFFFFFFFFF) as i64;
+
+ let secs = nanoseconds_part / 1000000000;
+ let mins = secs / 60;
+ let hours = mins / 60;
+
+ let secs = secs - (mins * 60);
+ let mins = mins - (hours * 60);
+
+ format!(
+ "0 years {} mons {} days {} hours {} mins {}.{:02} secs",
+ months_part,
+ days_part,
+ hours,
+ mins,
+ secs,
+ (nanoseconds_part % 1000000000),
+ )
+ };
+
+ Ok(s)
+ }};
+}
+
macro_rules! make_string_date {
($array_type:ty, $column: ident, $row: ident) => {{
let array = $column.as_any().downcast_ref::<$array_type>().unwrap();
@@ -308,6 +347,9 @@ pub fn array_value_to_string(column: &array::ArrayRef, row: usize) -> Result<Str
IntervalUnit::YearMonth => {
make_string_interval_year_month!(column, row)
}
+ IntervalUnit::MonthDayNano => {
+ make_string_interval_month_day_nano!(column, row)
+ }
},
DataType::List(_) => make_string_from_list!(column, row),
DataType::Dictionary(index_type, _value_type) => match **index_type {
diff --git a/arrow/src/util/integration_util.rs b/arrow/src/util/integration_util.rs
index 1a402bc..e10de76 100644
--- a/arrow/src/util/integration_util.rs
+++ b/arrow/src/util/integration_util.rs
@@ -286,6 +286,10 @@ impl ArrowJsonBatch {
.collect::<Vec<Value>>();
arr.equals_json(&x.iter().collect::<Vec<&Value>>()[..])
}
+ DataType::Interval(IntervalUnit::MonthDayNano) => {
+ let arr = IntervalMonthDayNanoArray::from(arr.data().clone());
+ arr.equals_json(&json_array.iter().collect::<Vec<&Value>>()[..])
+ }
DataType::UInt8 => {
let arr = arr.as_any().downcast_ref::<UInt8Array>().unwrap();
arr.equals_json(&json_array.iter().collect::<Vec<&Value>>()[..])
diff --git a/format/Schema.fbs b/format/Schema.fbs
index 3b00dd4..9da0951 100644
--- a/format/Schema.fbs
+++ b/format/Schema.fbs
@@ -246,15 +246,24 @@ table Timestamp {
timezone: string;
}
-enum IntervalUnit: short { YEAR_MONTH, DAY_TIME}
+enum IntervalUnit: short { YEAR_MONTH, DAY_TIME, MONTH_DAY_NANO}
// A "calendar" interval which models types that don't necessarily
// have a precise duration without the context of a base timestamp (e.g.
// days can differ in length during day light savings time transitions).
+// All integers in the types below are stored in the endianness indicated
+// by the schema.
// YEAR_MONTH - Indicates the number of elapsed whole months, stored as
-// 4-byte integers.
+// 4-byte signed integers.
// DAY_TIME - Indicates the number of elapsed days and milliseconds,
// stored as 2 contiguous 32-bit integers (8-bytes in total). Support
// of this IntervalUnit is not required for full arrow compatibility.
+// MONTH_DAY_NANO - A triple of the number of elapsed months, days, and nanoseconds.
+// The values are stored contiguously in 16 byte blocks. Months and
+// days are encoded as 32 bit integers and nanoseconds is encoded as a
+// 64 bit integer. All integers are signed. Each field is independent
+// (e.g. there is no constraint that nanoseconds have the same sign
+// as days or that the quantity of nanoseconds represents less
+// than a day's worth of time).
table Interval {
unit: IntervalUnit;
}
diff --git a/integration-testing/src/lib.rs b/integration-testing/src/lib.rs
index f25157f..cb57ffc 100644
--- a/integration-testing/src/lib.rs
+++ b/integration-testing/src/lib.rs
@@ -280,6 +280,49 @@ fn array_from_json(
}
Ok(Arc::new(b.finish()))
}
+ DataType::Interval(IntervalUnit::MonthDayNano) => {
+ let mut b = IntervalMonthDayNanoBuilder::new(json_col.count);
+ for (is_valid, value) in json_col
+ .validity
+ .as_ref()
+ .unwrap()
+ .iter()
+ .zip(json_col.data.unwrap())
+ {
+ match is_valid {
+ 1 => b.append_value(match value {
+ Value::Object(v) => {
+ let months = v.get("months").unwrap();
+ let days = v.get("days").unwrap();
+ let nanoseconds = v.get("nanoseconds").unwrap();
+ match (months, days, nanoseconds) {
+ (
+ Value::Number(months),
+ Value::Number(days),
+ Value::Number(nanoseconds),
+ ) => {
+ let months = months.as_i64().unwrap() as i32;
+ let days = days.as_i64().unwrap() as i32;
+ let nanoseconds = nanoseconds.as_i64().unwrap();
+ let months_days_ns: i128 = ((nanoseconds as i128)
+ & 0xFFFFFFFFFFFFFFFF)
+ << 64
+ | ((days as i128) & 0xFFFFFFFF) << 32
+ | ((months as i128) & 0xFFFFFFFF);
+ months_days_ns
+ }
+ (_, _, _) => {
+ panic!("Unable to parse {:?} as MonthDayNano", v)
+ }
+ }
+ }
+ _ => panic!("Unable to parse {:?} as MonthDayNano", value),
+ }),
+ _ => b.append_null(),
+ }?;
+ }
+ Ok(Arc::new(b.finish()))
+ }
DataType::Float32 => {
let mut b = Float32Builder::new(json_col.count);
for (is_valid, value) in json_col
diff --git a/parquet/src/arrow/arrow_writer.rs b/parquet/src/arrow/arrow_writer.rs
index 643f5a2..9f87428 100644
--- a/parquet/src/arrow/arrow_writer.rs
+++ b/parquet/src/arrow/arrow_writer.rs
@@ -426,6 +426,14 @@ fn write_leaf(
.unwrap();
get_interval_dt_array_slice(array, &indices)
}
+ _ => {
+ return Err(ParquetError::NYI(
+ format!(
+ "Attempting to write an Arrow interval type {:?} to parquet that is not yet implemented",
+ interval_unit
+ )
+ ));
+ }
},
ArrowDataType::FixedSizeBinary(_) => {
let array = column
@@ -1463,6 +1471,17 @@ mod tests {
}
#[test]
+ #[should_panic(
+ expected = "Attempting to write an Arrow interval type MonthDayNano to parquet that is not yet implemented"
+ )]
+ fn interval_month_day_nano_single_column() {
+ required_and_optional::<IntervalMonthDayNanoArray, _>(
+ 0..SMALL_SIZE as i128,
+ "interval_month_day_nano_single_column",
+ );
+ }
+
+ #[test]
fn binary_single_column() {
let one_vec: Vec<u8> = (0..SMALL_SIZE as u8).collect();
let many_vecs: Vec<_> = std::iter::repeat(one_vec).take(SMALL_SIZE).collect();