You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by tu...@apache.org on 2022/11/10 00:15:40 UTC
[arrow-rs] branch master updated: Support cast timestamp to time (#3016)
This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new d76a0d634 Support cast timestamp to time (#3016)
d76a0d634 is described below
commit d76a0d634521bf051d3bb2774af2006006bab999
Author: 哇呜哇呜呀咦耶 <pi...@gmail.com>
AuthorDate: Thu Nov 10 08:15:34 2022 +0800
Support cast timestamp to time (#3016)
* Support cast timestamp to time
* support timestamp to time and add more test cases
* organize imports
* format code
* comment test
* support timezone
* code format
* support both no timezone and a timezone
---
arrow-array/src/temporal_conversions.rs | 34 ++-
arrow-cast/src/cast.rs | 359 +++++++++++++++++++++++++++++++-
2 files changed, 390 insertions(+), 3 deletions(-)
diff --git a/arrow-array/src/temporal_conversions.rs b/arrow-array/src/temporal_conversions.rs
index 8b1064115..a4d910cc8 100644
--- a/arrow-array/src/temporal_conversions.rs
+++ b/arrow-array/src/temporal_conversions.rs
@@ -20,7 +20,9 @@
use crate::timezone::Tz;
use crate::ArrowPrimitiveType;
use arrow_schema::{DataType, TimeUnit};
-use chrono::{DateTime, Duration, NaiveDate, NaiveDateTime, NaiveTime, TimeZone, Utc};
+use chrono::{
+ DateTime, Duration, NaiveDate, NaiveDateTime, NaiveTime, TimeZone, Timelike, Utc,
+};
/// Number of seconds in a day
pub const SECONDS_IN_DAY: i64 = 86_400;
@@ -33,6 +35,10 @@ pub const NANOSECONDS: i64 = 1_000_000_000;
/// Number of milliseconds in a day
pub const MILLISECONDS_IN_DAY: i64 = SECONDS_IN_DAY * MILLISECONDS;
+/// Number of microseconds in a day
+pub const MICROSECONDS_IN_DAY: i64 = SECONDS_IN_DAY * MICROSECONDS;
+/// Number of nanoseconds in a day
+pub const NANOSECONDS_IN_DAY: i64 = SECONDS_IN_DAY * NANOSECONDS;
/// Number of days between 0001-01-01 and 1970-01-01
pub const EPOCH_DAYS_FROM_CE: i32 = 719_163;
@@ -97,6 +103,32 @@ pub fn time64ns_to_time(v: i64) -> Option<NaiveTime> {
)
}
+/// converts [`NaiveTime`] to a `i32` representing a `time32(s)`
+#[inline]
+pub fn time_to_time32s(v: NaiveTime) -> i32 {
+ v.num_seconds_from_midnight() as i32
+}
+
+/// converts [`NaiveTime`] to a `i32` representing a `time32(ms)`
+#[inline]
+pub fn time_to_time32ms(v: NaiveTime) -> i32 {
+ (v.num_seconds_from_midnight() as i64 * MILLISECONDS
+ + v.nanosecond() as i64 * MILLISECONDS / NANOSECONDS) as i32
+}
+
+/// converts [`NaiveTime`] to a `i64` representing a `time64(us)`
+#[inline]
+pub fn time_to_time64us(v: NaiveTime) -> i64 {
+ v.num_seconds_from_midnight() as i64 * MICROSECONDS
+ + v.nanosecond() as i64 * MICROSECONDS / NANOSECONDS
+}
+
+/// converts [`NaiveTime`] to a `i64` representing a `time64(ns)`
+#[inline]
+pub fn time_to_time64ns(v: NaiveTime) -> i64 {
+ v.num_seconds_from_midnight() as i64 * NANOSECONDS + v.nanosecond() as i64
+}
+
/// converts a `i64` representing a `timestamp(s)` to [`NaiveDateTime`]
#[inline]
pub fn timestamp_s_to_datetime(v: i64) -> Option<NaiveDateTime> {
diff --git a/arrow-cast/src/cast.rs b/arrow-cast/src/cast.rs
index 1cc814730..bbd38fbc0 100644
--- a/arrow-cast/src/cast.rs
+++ b/arrow-cast/src/cast.rs
@@ -35,7 +35,7 @@
//! assert_eq!(7.0, c.value(2));
//! ```
-use chrono::{DateTime, NaiveDateTime, Timelike};
+use chrono::{DateTime, NaiveDateTime, NaiveTime, Timelike};
use std::sync::Arc;
use crate::display::{array_value_to_string, lexical_to_string};
@@ -244,8 +244,15 @@ pub fn can_cast_types(from_type: &DataType, to_type: &DataType) -> bool {
}
(Timestamp(_, _), Int64) => true,
(Int64, Timestamp(_, _)) => true,
- (Timestamp(_, _), Timestamp(_, _) | Date32 | Date64) => true,
(Date64, Timestamp(_, None)) => true,
+ (Timestamp(_, _),
+ Timestamp(_, _)
+ | Date32
+ | Date64
+ | Time32(TimeUnit::Second)
+ | Time32(TimeUnit::Millisecond)
+ | Time64(TimeUnit::Microsecond)
+ | Time64(TimeUnit::Nanosecond)) => true,
(Int64, Duration(_)) => true,
(Duration(_), Int64) => true,
(Interval(from_type), Int64) => {
@@ -559,6 +566,24 @@ fn make_timestamp_array(
}
}
+fn as_time_res_with_timezone<T: ArrowPrimitiveType>(
+ v: i64,
+ tz: Option<Tz>,
+) -> Result<NaiveTime, ArrowError> {
+ let time = match tz {
+ Some(tz) => as_datetime_with_timezone::<T>(v, tz).map(|d| d.time()),
+ None => as_datetime::<T>(v).map(|d| d.time()),
+ };
+
+ time.ok_or_else(|| {
+ ArrowError::CastError(format!(
+ "Failed to create naive time with {} {}",
+ std::any::type_name::<T>(),
+ v
+ ))
+ })
+}
+
/// Cast `array` to the provided data type and return a new Array with
/// type `to_type`, if possible. It accepts `CastOptions` to allow consumers
/// to configure cast behavior.
@@ -1561,6 +1586,182 @@ pub fn cast_with_options(
as_primitive_array::<TimestampNanosecondType>(array)
.unary::<_, Date64Type>(|x| x / (NANOSECONDS / MILLISECONDS)),
)),
+ (Timestamp(TimeUnit::Second, tz), Time64(TimeUnit::Microsecond)) => {
+ let tz = tz.as_ref().map(|tz| tz.parse()).transpose()?;
+ Ok(Arc::new(
+ as_primitive_array::<TimestampSecondType>(array)
+ .try_unary::<_, Time64MicrosecondType, ArrowError>(|x| {
+ Ok(time_to_time64us(as_time_res_with_timezone::<
+ TimestampSecondType,
+ >(x, tz)?))
+ })?,
+ ))
+ }
+ (Timestamp(TimeUnit::Second, tz), Time64(TimeUnit::Nanosecond)) => {
+ let tz = tz.as_ref().map(|tz| tz.parse()).transpose()?;
+ Ok(Arc::new(
+ as_primitive_array::<TimestampSecondType>(array)
+ .try_unary::<_, Time64NanosecondType, ArrowError>(|x| {
+ Ok(time_to_time64ns(as_time_res_with_timezone::<
+ TimestampSecondType,
+ >(x, tz)?))
+ })?,
+ ))
+ }
+ (Timestamp(TimeUnit::Millisecond, tz), Time64(TimeUnit::Microsecond)) => {
+ let tz = tz.as_ref().map(|tz| tz.parse()).transpose()?;
+ Ok(Arc::new(
+ as_primitive_array::<TimestampMillisecondType>(array)
+ .try_unary::<_, Time64MicrosecondType, ArrowError>(|x| {
+ Ok(time_to_time64us(as_time_res_with_timezone::<
+ TimestampMillisecondType,
+ >(x, tz)?))
+ })?,
+ ))
+ }
+ (Timestamp(TimeUnit::Millisecond, tz), Time64(TimeUnit::Nanosecond)) => {
+ let tz = tz.as_ref().map(|tz| tz.parse()).transpose()?;
+ Ok(Arc::new(
+ as_primitive_array::<TimestampMillisecondType>(array)
+ .try_unary::<_, Time64NanosecondType, ArrowError>(|x| {
+ Ok(time_to_time64ns(as_time_res_with_timezone::<
+ TimestampMillisecondType,
+ >(x, tz)?))
+ })?,
+ ))
+ }
+ (Timestamp(TimeUnit::Microsecond, tz), Time64(TimeUnit::Microsecond)) => {
+ let tz = tz.as_ref().map(|tz| tz.parse()).transpose()?;
+ Ok(Arc::new(
+ as_primitive_array::<TimestampMicrosecondType>(array)
+ .try_unary::<_, Time64MicrosecondType, ArrowError>(|x| {
+ Ok(time_to_time64us(as_time_res_with_timezone::<
+ TimestampMicrosecondType,
+ >(x, tz)?))
+ })?,
+ ))
+ }
+ (Timestamp(TimeUnit::Microsecond, tz), Time64(TimeUnit::Nanosecond)) => {
+ let tz = tz.as_ref().map(|tz| tz.parse()).transpose()?;
+ Ok(Arc::new(
+ as_primitive_array::<TimestampMicrosecondType>(array)
+ .try_unary::<_, Time64NanosecondType, ArrowError>(|x| {
+ Ok(time_to_time64ns(as_time_res_with_timezone::<
+ TimestampMicrosecondType,
+ >(x, tz)?))
+ })?,
+ ))
+ }
+ (Timestamp(TimeUnit::Nanosecond, tz), Time64(TimeUnit::Microsecond)) => {
+ let tz = tz.as_ref().map(|tz| tz.parse()).transpose()?;
+ Ok(Arc::new(
+ as_primitive_array::<TimestampNanosecondType>(array)
+ .try_unary::<_, Time64MicrosecondType, ArrowError>(|x| {
+ Ok(time_to_time64us(as_time_res_with_timezone::<
+ TimestampNanosecondType,
+ >(x, tz)?))
+ })?,
+ ))
+ }
+ (Timestamp(TimeUnit::Nanosecond, tz), Time64(TimeUnit::Nanosecond)) => {
+ let tz = tz.as_ref().map(|tz| tz.parse()).transpose()?;
+ Ok(Arc::new(
+ as_primitive_array::<TimestampNanosecondType>(array)
+ .try_unary::<_, Time64NanosecondType, ArrowError>(|x| {
+ Ok(time_to_time64ns(as_time_res_with_timezone::<
+ TimestampNanosecondType,
+ >(x, tz)?))
+ })?,
+ ))
+ }
+ (Timestamp(TimeUnit::Second, tz), Time32(TimeUnit::Second)) => {
+ let tz = tz.as_ref().map(|tz| tz.parse()).transpose()?;
+ Ok(Arc::new(
+ as_primitive_array::<TimestampSecondType>(array)
+ .try_unary::<_, Time32SecondType, ArrowError>(|x| {
+ Ok(time_to_time32s(as_time_res_with_timezone::<
+ TimestampSecondType,
+ >(x, tz)?))
+ })?,
+ ))
+ }
+ (Timestamp(TimeUnit::Second, tz), Time32(TimeUnit::Millisecond)) => {
+ let tz = tz.as_ref().map(|tz| tz.parse()).transpose()?;
+ Ok(Arc::new(
+ as_primitive_array::<TimestampSecondType>(array)
+ .try_unary::<_, Time32MillisecondType, ArrowError>(|x| {
+ Ok(time_to_time32ms(as_time_res_with_timezone::<
+ TimestampSecondType,
+ >(x, tz)?))
+ })?,
+ ))
+ }
+ (Timestamp(TimeUnit::Millisecond, tz), Time32(TimeUnit::Second)) => {
+ let tz = tz.as_ref().map(|tz| tz.parse()).transpose()?;
+ Ok(Arc::new(
+ as_primitive_array::<TimestampMillisecondType>(array)
+ .try_unary::<_, Time32SecondType, ArrowError>(|x| {
+ Ok(time_to_time32s(as_time_res_with_timezone::<
+ TimestampMillisecondType,
+ >(x, tz)?))
+ })?,
+ ))
+ }
+ (Timestamp(TimeUnit::Millisecond, tz), Time32(TimeUnit::Millisecond)) => {
+ let tz = tz.as_ref().map(|tz| tz.parse()).transpose()?;
+ Ok(Arc::new(
+ as_primitive_array::<TimestampMillisecondType>(array)
+ .try_unary::<_, Time32MillisecondType, ArrowError>(|x| {
+ Ok(time_to_time32ms(as_time_res_with_timezone::<
+ TimestampMillisecondType,
+ >(x, tz)?))
+ })?,
+ ))
+ }
+ (Timestamp(TimeUnit::Microsecond, tz), Time32(TimeUnit::Second)) => {
+ let tz = tz.as_ref().map(|tz| tz.parse()).transpose()?;
+ Ok(Arc::new(
+ as_primitive_array::<TimestampMicrosecondType>(array)
+ .try_unary::<_, Time32SecondType, ArrowError>(|x| {
+ Ok(time_to_time32s(as_time_res_with_timezone::<
+ TimestampMicrosecondType,
+ >(x, tz)?))
+ })?,
+ ))
+ }
+ (Timestamp(TimeUnit::Microsecond, tz), Time32(TimeUnit::Millisecond)) => {
+ let tz = tz.as_ref().map(|tz| tz.parse()).transpose()?;
+ Ok(Arc::new(
+ as_primitive_array::<TimestampMicrosecondType>(array)
+ .try_unary::<_, Time32MillisecondType, ArrowError>(|x| {
+ Ok(time_to_time32ms(as_time_res_with_timezone::<
+ TimestampMicrosecondType,
+ >(x, tz)?))
+ })?,
+ ))
+ }
+ (Timestamp(TimeUnit::Nanosecond, tz), Time32(TimeUnit::Second)) => {
+ let tz = tz.as_ref().map(|tz| tz.parse()).transpose()?;
+ Ok(Arc::new(
+ as_primitive_array::<TimestampNanosecondType>(array)
+ .try_unary::<_, Time32SecondType, ArrowError>(|x| {
+ Ok(time_to_time32s(as_time_res_with_timezone::<
+ TimestampNanosecondType,
+ >(x, tz)?))
+ })?,
+ ))
+ }
+ (Timestamp(TimeUnit::Nanosecond, tz), Time32(TimeUnit::Millisecond)) => {
+ let tz = tz.as_ref().map(|tz| tz.parse()).transpose()?;
+ Ok(Arc::new(
+ as_primitive_array::<TimestampNanosecondType>(array)
+ .try_unary::<_, Time32MillisecondType, ArrowError>(|x| {
+ Ok(time_to_time32ms(as_time_res_with_timezone::<
+ TimestampNanosecondType,
+ >(x, tz)?))
+ })?,
+ ))
+ }
(Date64, Timestamp(TimeUnit::Second, None)) => Ok(Arc::new(
as_primitive_array::<Date64Type>(array)
@@ -4234,6 +4435,160 @@ mod tests {
assert!(c.is_null(2));
}
+ #[test]
+ fn test_cast_timestamp_to_time64() {
+ // test timestamp secs
+ let a = TimestampSecondArray::from(vec![Some(86405), Some(1), None])
+ .with_timezone("+01:00".to_string());
+ let array = Arc::new(a) as ArrayRef;
+ let b = cast(&array, &DataType::Time64(TimeUnit::Microsecond)).unwrap();
+ let c = b.as_any().downcast_ref::<Time64MicrosecondArray>().unwrap();
+ assert_eq!(3605000000, c.value(0));
+ assert_eq!(3601000000, c.value(1));
+ assert!(c.is_null(2));
+ let b = cast(&array, &DataType::Time64(TimeUnit::Nanosecond)).unwrap();
+ let c = b.as_any().downcast_ref::<Time64NanosecondArray>().unwrap();
+ assert_eq!(3605000000000, c.value(0));
+ assert_eq!(3601000000000, c.value(1));
+ assert!(c.is_null(2));
+
+ // test timestamp milliseconds
+ let a = TimestampMillisecondArray::from(vec![Some(86405000), Some(1000), None])
+ .with_timezone("+01:00".to_string());
+ let array = Arc::new(a) as ArrayRef;
+ let b = cast(&array, &DataType::Time64(TimeUnit::Microsecond)).unwrap();
+ let c = b.as_any().downcast_ref::<Time64MicrosecondArray>().unwrap();
+ assert_eq!(3605000000, c.value(0));
+ assert_eq!(3601000000, c.value(1));
+ assert!(c.is_null(2));
+ let b = cast(&array, &DataType::Time64(TimeUnit::Nanosecond)).unwrap();
+ let c = b.as_any().downcast_ref::<Time64NanosecondArray>().unwrap();
+ assert_eq!(3605000000000, c.value(0));
+ assert_eq!(3601000000000, c.value(1));
+ assert!(c.is_null(2));
+
+ // test timestamp microseconds
+ let a =
+ TimestampMicrosecondArray::from(vec![Some(86405000000), Some(1000000), None])
+ .with_timezone("+01:00".to_string());
+ let array = Arc::new(a) as ArrayRef;
+ let b = cast(&array, &DataType::Time64(TimeUnit::Microsecond)).unwrap();
+ let c = b.as_any().downcast_ref::<Time64MicrosecondArray>().unwrap();
+ assert_eq!(3605000000, c.value(0));
+ assert_eq!(3601000000, c.value(1));
+ assert!(c.is_null(2));
+ let b = cast(&array, &DataType::Time64(TimeUnit::Nanosecond)).unwrap();
+ let c = b.as_any().downcast_ref::<Time64NanosecondArray>().unwrap();
+ assert_eq!(3605000000000, c.value(0));
+ assert_eq!(3601000000000, c.value(1));
+ assert!(c.is_null(2));
+
+ // test timestamp nanoseconds
+ let a = TimestampNanosecondArray::from(vec![
+ Some(86405000000000),
+ Some(1000000000),
+ None,
+ ])
+ .with_timezone("+01:00".to_string());
+ let array = Arc::new(a) as ArrayRef;
+ let b = cast(&array, &DataType::Time64(TimeUnit::Microsecond)).unwrap();
+ let c = b.as_any().downcast_ref::<Time64MicrosecondArray>().unwrap();
+ assert_eq!(3605000000, c.value(0));
+ assert_eq!(3601000000, c.value(1));
+ assert!(c.is_null(2));
+ let b = cast(&array, &DataType::Time64(TimeUnit::Nanosecond)).unwrap();
+ let c = b.as_any().downcast_ref::<Time64NanosecondArray>().unwrap();
+ assert_eq!(3605000000000, c.value(0));
+ assert_eq!(3601000000000, c.value(1));
+ assert!(c.is_null(2));
+
+ // test overflow
+ let a = TimestampSecondArray::from(vec![Some(i64::MAX)])
+ .with_timezone("+01:00".to_string());
+ let array = Arc::new(a) as ArrayRef;
+ let b = cast(&array, &DataType::Time64(TimeUnit::Microsecond));
+ assert!(b.is_err());
+ let b = cast(&array, &DataType::Time64(TimeUnit::Nanosecond));
+ assert!(b.is_err());
+ }
+
+ #[test]
+ fn test_cast_timestamp_to_time32() {
+ // test timestamp secs
+ let a = TimestampSecondArray::from(vec![Some(86405), Some(1), None])
+ .with_timezone("+01:00".to_string());
+ let array = Arc::new(a) as ArrayRef;
+ let b = cast(&array, &DataType::Time32(TimeUnit::Second)).unwrap();
+ let c = b.as_any().downcast_ref::<Time32SecondArray>().unwrap();
+ assert_eq!(3605, c.value(0));
+ assert_eq!(3601, c.value(1));
+ assert!(c.is_null(2));
+ let b = cast(&array, &DataType::Time32(TimeUnit::Millisecond)).unwrap();
+ let c = b.as_any().downcast_ref::<Time32MillisecondArray>().unwrap();
+ assert_eq!(3605000, c.value(0));
+ assert_eq!(3601000, c.value(1));
+ assert!(c.is_null(2));
+
+ // test timestamp milliseconds
+ let a = TimestampMillisecondArray::from(vec![Some(86405000), Some(1000), None])
+ .with_timezone("+01:00".to_string());
+ let array = Arc::new(a) as ArrayRef;
+ let b = cast(&array, &DataType::Time32(TimeUnit::Second)).unwrap();
+ let c = b.as_any().downcast_ref::<Time32SecondArray>().unwrap();
+ assert_eq!(3605, c.value(0));
+ assert_eq!(3601, c.value(1));
+ assert!(c.is_null(2));
+ let b = cast(&array, &DataType::Time32(TimeUnit::Millisecond)).unwrap();
+ let c = b.as_any().downcast_ref::<Time32MillisecondArray>().unwrap();
+ assert_eq!(3605000, c.value(0));
+ assert_eq!(3601000, c.value(1));
+ assert!(c.is_null(2));
+
+ // test timestamp microseconds
+ let a =
+ TimestampMicrosecondArray::from(vec![Some(86405000000), Some(1000000), None])
+ .with_timezone("+01:00".to_string());
+ let array = Arc::new(a) as ArrayRef;
+ let b = cast(&array, &DataType::Time32(TimeUnit::Second)).unwrap();
+ let c = b.as_any().downcast_ref::<Time32SecondArray>().unwrap();
+ assert_eq!(3605, c.value(0));
+ assert_eq!(3601, c.value(1));
+ assert!(c.is_null(2));
+ let b = cast(&array, &DataType::Time32(TimeUnit::Millisecond)).unwrap();
+ let c = b.as_any().downcast_ref::<Time32MillisecondArray>().unwrap();
+ assert_eq!(3605000, c.value(0));
+ assert_eq!(3601000, c.value(1));
+ assert!(c.is_null(2));
+
+ // test timestamp nanoseconds
+ let a = TimestampNanosecondArray::from(vec![
+ Some(86405000000000),
+ Some(1000000000),
+ None,
+ ])
+ .with_timezone("+01:00".to_string());
+ let array = Arc::new(a) as ArrayRef;
+ let b = cast(&array, &DataType::Time32(TimeUnit::Second)).unwrap();
+ let c = b.as_any().downcast_ref::<Time32SecondArray>().unwrap();
+ assert_eq!(3605, c.value(0));
+ assert_eq!(3601, c.value(1));
+ assert!(c.is_null(2));
+ let b = cast(&array, &DataType::Time32(TimeUnit::Millisecond)).unwrap();
+ let c = b.as_any().downcast_ref::<Time32MillisecondArray>().unwrap();
+ assert_eq!(3605000, c.value(0));
+ assert_eq!(3601000, c.value(1));
+ assert!(c.is_null(2));
+
+ // test overflow
+ let a = TimestampSecondArray::from(vec![Some(i64::MAX)])
+ .with_timezone("+01:00".to_string());
+ let array = Arc::new(a) as ArrayRef;
+ let b = cast(&array, &DataType::Time32(TimeUnit::Second));
+ assert!(b.is_err());
+ let b = cast(&array, &DataType::Time32(TimeUnit::Millisecond));
+ assert!(b.is_err());
+ }
+
#[test]
fn test_cast_date64_to_timestamp() {
let a = Date64Array::from(vec![Some(864000000005), Some(1545696000001), None]);