You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by tu...@apache.org on 2022/11/10 00:15:40 UTC

[arrow-rs] branch master updated: Support cast timestamp to time (#3016)

This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new d76a0d634 Support cast timestamp to time (#3016)
d76a0d634 is described below

commit d76a0d634521bf051d3bb2774af2006006bab999
Author: 哇呜哇呜呀咦耶 <pi...@gmail.com>
AuthorDate: Thu Nov 10 08:15:34 2022 +0800

    Support cast timestamp to time (#3016)
    
    * Support cast timestamp to time
    
    * support timestamp to time and add more test cases
    
    * organize imports
    
    * format code
    
    * comment test
    
    * support timezone
    
    * code format
    
    * support both no timezone and a timezone
---
 arrow-array/src/temporal_conversions.rs |  34 ++-
 arrow-cast/src/cast.rs                  | 359 +++++++++++++++++++++++++++++++-
 2 files changed, 390 insertions(+), 3 deletions(-)

diff --git a/arrow-array/src/temporal_conversions.rs b/arrow-array/src/temporal_conversions.rs
index 8b1064115..a4d910cc8 100644
--- a/arrow-array/src/temporal_conversions.rs
+++ b/arrow-array/src/temporal_conversions.rs
@@ -20,7 +20,9 @@
 use crate::timezone::Tz;
 use crate::ArrowPrimitiveType;
 use arrow_schema::{DataType, TimeUnit};
-use chrono::{DateTime, Duration, NaiveDate, NaiveDateTime, NaiveTime, TimeZone, Utc};
+use chrono::{
+    DateTime, Duration, NaiveDate, NaiveDateTime, NaiveTime, TimeZone, Timelike, Utc,
+};
 
 /// Number of seconds in a day
 pub const SECONDS_IN_DAY: i64 = 86_400;
@@ -33,6 +35,10 @@ pub const NANOSECONDS: i64 = 1_000_000_000;
 
 /// Number of milliseconds in a day
 pub const MILLISECONDS_IN_DAY: i64 = SECONDS_IN_DAY * MILLISECONDS;
+/// Number of microseconds in a day
+pub const MICROSECONDS_IN_DAY: i64 = SECONDS_IN_DAY * MICROSECONDS;
+/// Number of nanoseconds in a day
+pub const NANOSECONDS_IN_DAY: i64 = SECONDS_IN_DAY * NANOSECONDS;
 /// Number of days between 0001-01-01 and 1970-01-01
 pub const EPOCH_DAYS_FROM_CE: i32 = 719_163;
 
@@ -97,6 +103,32 @@ pub fn time64ns_to_time(v: i64) -> Option<NaiveTime> {
     )
 }
 
+/// converts [`NaiveTime`] to a `i32` representing a `time32(s)`
+#[inline]
+pub fn time_to_time32s(v: NaiveTime) -> i32 {
+    v.num_seconds_from_midnight() as i32
+}
+
+/// converts [`NaiveTime`] to a `i32` representing a `time32(ms)`
+#[inline]
+pub fn time_to_time32ms(v: NaiveTime) -> i32 {
+    (v.num_seconds_from_midnight() as i64 * MILLISECONDS
+        + v.nanosecond() as i64 * MILLISECONDS / NANOSECONDS) as i32
+}
+
+/// converts [`NaiveTime`] to a `i64` representing a `time64(us)`
+#[inline]
+pub fn time_to_time64us(v: NaiveTime) -> i64 {
+    v.num_seconds_from_midnight() as i64 * MICROSECONDS
+        + v.nanosecond() as i64 * MICROSECONDS / NANOSECONDS
+}
+
+/// converts [`NaiveTime`] to a `i64` representing a `time64(ns)`
+#[inline]
+pub fn time_to_time64ns(v: NaiveTime) -> i64 {
+    v.num_seconds_from_midnight() as i64 * NANOSECONDS + v.nanosecond() as i64
+}
+
 /// converts a `i64` representing a `timestamp(s)` to [`NaiveDateTime`]
 #[inline]
 pub fn timestamp_s_to_datetime(v: i64) -> Option<NaiveDateTime> {
diff --git a/arrow-cast/src/cast.rs b/arrow-cast/src/cast.rs
index 1cc814730..bbd38fbc0 100644
--- a/arrow-cast/src/cast.rs
+++ b/arrow-cast/src/cast.rs
@@ -35,7 +35,7 @@
 //! assert_eq!(7.0, c.value(2));
 //! ```
 
-use chrono::{DateTime, NaiveDateTime, Timelike};
+use chrono::{DateTime, NaiveDateTime, NaiveTime, Timelike};
 use std::sync::Arc;
 
 use crate::display::{array_value_to_string, lexical_to_string};
@@ -244,8 +244,15 @@ pub fn can_cast_types(from_type: &DataType, to_type: &DataType) -> bool {
         }
         (Timestamp(_, _), Int64) => true,
         (Int64, Timestamp(_, _)) => true,
-        (Timestamp(_, _), Timestamp(_, _) | Date32 | Date64) => true,
         (Date64, Timestamp(_, None)) => true,
+        (Timestamp(_, _),
+            Timestamp(_, _)
+            | Date32
+            | Date64
+            | Time32(TimeUnit::Second)
+            | Time32(TimeUnit::Millisecond)
+            | Time64(TimeUnit::Microsecond)
+            | Time64(TimeUnit::Nanosecond)) => true,
         (Int64, Duration(_)) => true,
         (Duration(_), Int64) => true,
         (Interval(from_type), Int64) => {
@@ -559,6 +566,24 @@ fn make_timestamp_array(
     }
 }
 
+fn as_time_res_with_timezone<T: ArrowPrimitiveType>(
+    v: i64,
+    tz: Option<Tz>,
+) -> Result<NaiveTime, ArrowError> {
+    let time = match tz {
+        Some(tz) => as_datetime_with_timezone::<T>(v, tz).map(|d| d.time()),
+        None => as_datetime::<T>(v).map(|d| d.time()),
+    };
+
+    time.ok_or_else(|| {
+        ArrowError::CastError(format!(
+            "Failed to create naive time with {} {}",
+            std::any::type_name::<T>(),
+            v
+        ))
+    })
+}
+
 /// Cast `array` to the provided data type and return a new Array with
 /// type `to_type`, if possible. It accepts `CastOptions` to allow consumers
 /// to configure cast behavior.
@@ -1561,6 +1586,182 @@ pub fn cast_with_options(
             as_primitive_array::<TimestampNanosecondType>(array)
                 .unary::<_, Date64Type>(|x| x / (NANOSECONDS / MILLISECONDS)),
         )),
+        (Timestamp(TimeUnit::Second, tz), Time64(TimeUnit::Microsecond)) => {
+            let tz = tz.as_ref().map(|tz| tz.parse()).transpose()?;
+            Ok(Arc::new(
+                as_primitive_array::<TimestampSecondType>(array)
+                    .try_unary::<_, Time64MicrosecondType, ArrowError>(|x| {
+                        Ok(time_to_time64us(as_time_res_with_timezone::<
+                            TimestampSecondType,
+                        >(x, tz)?))
+                    })?,
+            ))
+        }
+        (Timestamp(TimeUnit::Second, tz), Time64(TimeUnit::Nanosecond)) => {
+            let tz = tz.as_ref().map(|tz| tz.parse()).transpose()?;
+            Ok(Arc::new(
+                as_primitive_array::<TimestampSecondType>(array)
+                    .try_unary::<_, Time64NanosecondType, ArrowError>(|x| {
+                        Ok(time_to_time64ns(as_time_res_with_timezone::<
+                            TimestampSecondType,
+                        >(x, tz)?))
+                    })?,
+            ))
+        }
+        (Timestamp(TimeUnit::Millisecond, tz), Time64(TimeUnit::Microsecond)) => {
+            let tz = tz.as_ref().map(|tz| tz.parse()).transpose()?;
+            Ok(Arc::new(
+                as_primitive_array::<TimestampMillisecondType>(array)
+                    .try_unary::<_, Time64MicrosecondType, ArrowError>(|x| {
+                    Ok(time_to_time64us(as_time_res_with_timezone::<
+                        TimestampMillisecondType,
+                    >(x, tz)?))
+                })?,
+            ))
+        }
+        (Timestamp(TimeUnit::Millisecond, tz), Time64(TimeUnit::Nanosecond)) => {
+            let tz = tz.as_ref().map(|tz| tz.parse()).transpose()?;
+            Ok(Arc::new(
+                as_primitive_array::<TimestampMillisecondType>(array)
+                    .try_unary::<_, Time64NanosecondType, ArrowError>(|x| {
+                    Ok(time_to_time64ns(as_time_res_with_timezone::<
+                        TimestampMillisecondType,
+                    >(x, tz)?))
+                })?,
+            ))
+        }
+        (Timestamp(TimeUnit::Microsecond, tz), Time64(TimeUnit::Microsecond)) => {
+            let tz = tz.as_ref().map(|tz| tz.parse()).transpose()?;
+            Ok(Arc::new(
+                as_primitive_array::<TimestampMicrosecondType>(array)
+                    .try_unary::<_, Time64MicrosecondType, ArrowError>(|x| {
+                    Ok(time_to_time64us(as_time_res_with_timezone::<
+                        TimestampMicrosecondType,
+                    >(x, tz)?))
+                })?,
+            ))
+        }
+        (Timestamp(TimeUnit::Microsecond, tz), Time64(TimeUnit::Nanosecond)) => {
+            let tz = tz.as_ref().map(|tz| tz.parse()).transpose()?;
+            Ok(Arc::new(
+                as_primitive_array::<TimestampMicrosecondType>(array)
+                    .try_unary::<_, Time64NanosecondType, ArrowError>(|x| {
+                    Ok(time_to_time64ns(as_time_res_with_timezone::<
+                        TimestampMicrosecondType,
+                    >(x, tz)?))
+                })?,
+            ))
+        }
+        (Timestamp(TimeUnit::Nanosecond, tz), Time64(TimeUnit::Microsecond)) => {
+            let tz = tz.as_ref().map(|tz| tz.parse()).transpose()?;
+            Ok(Arc::new(
+                as_primitive_array::<TimestampNanosecondType>(array)
+                    .try_unary::<_, Time64MicrosecondType, ArrowError>(|x| {
+                    Ok(time_to_time64us(as_time_res_with_timezone::<
+                        TimestampNanosecondType,
+                    >(x, tz)?))
+                })?,
+            ))
+        }
+        (Timestamp(TimeUnit::Nanosecond, tz), Time64(TimeUnit::Nanosecond)) => {
+            let tz = tz.as_ref().map(|tz| tz.parse()).transpose()?;
+            Ok(Arc::new(
+                as_primitive_array::<TimestampNanosecondType>(array)
+                    .try_unary::<_, Time64NanosecondType, ArrowError>(|x| {
+                    Ok(time_to_time64ns(as_time_res_with_timezone::<
+                        TimestampNanosecondType,
+                    >(x, tz)?))
+                })?,
+            ))
+        }
+        (Timestamp(TimeUnit::Second, tz), Time32(TimeUnit::Second)) => {
+            let tz = tz.as_ref().map(|tz| tz.parse()).transpose()?;
+            Ok(Arc::new(
+                as_primitive_array::<TimestampSecondType>(array)
+                    .try_unary::<_, Time32SecondType, ArrowError>(|x| {
+                        Ok(time_to_time32s(as_time_res_with_timezone::<
+                            TimestampSecondType,
+                        >(x, tz)?))
+                    })?,
+            ))
+        }
+        (Timestamp(TimeUnit::Second, tz), Time32(TimeUnit::Millisecond)) => {
+            let tz = tz.as_ref().map(|tz| tz.parse()).transpose()?;
+            Ok(Arc::new(
+                as_primitive_array::<TimestampSecondType>(array)
+                    .try_unary::<_, Time32MillisecondType, ArrowError>(|x| {
+                        Ok(time_to_time32ms(as_time_res_with_timezone::<
+                            TimestampSecondType,
+                        >(x, tz)?))
+                    })?,
+            ))
+        }
+        (Timestamp(TimeUnit::Millisecond, tz), Time32(TimeUnit::Second)) => {
+            let tz = tz.as_ref().map(|tz| tz.parse()).transpose()?;
+            Ok(Arc::new(
+                as_primitive_array::<TimestampMillisecondType>(array)
+                    .try_unary::<_, Time32SecondType, ArrowError>(|x| {
+                        Ok(time_to_time32s(as_time_res_with_timezone::<
+                            TimestampMillisecondType,
+                        >(x, tz)?))
+                    })?,
+            ))
+        }
+        (Timestamp(TimeUnit::Millisecond, tz), Time32(TimeUnit::Millisecond)) => {
+            let tz = tz.as_ref().map(|tz| tz.parse()).transpose()?;
+            Ok(Arc::new(
+                as_primitive_array::<TimestampMillisecondType>(array)
+                    .try_unary::<_, Time32MillisecondType, ArrowError>(|x| {
+                    Ok(time_to_time32ms(as_time_res_with_timezone::<
+                        TimestampMillisecondType,
+                    >(x, tz)?))
+                })?,
+            ))
+        }
+        (Timestamp(TimeUnit::Microsecond, tz), Time32(TimeUnit::Second)) => {
+            let tz = tz.as_ref().map(|tz| tz.parse()).transpose()?;
+            Ok(Arc::new(
+                as_primitive_array::<TimestampMicrosecondType>(array)
+                    .try_unary::<_, Time32SecondType, ArrowError>(|x| {
+                        Ok(time_to_time32s(as_time_res_with_timezone::<
+                            TimestampMicrosecondType,
+                        >(x, tz)?))
+                    })?,
+            ))
+        }
+        (Timestamp(TimeUnit::Microsecond, tz), Time32(TimeUnit::Millisecond)) => {
+            let tz = tz.as_ref().map(|tz| tz.parse()).transpose()?;
+            Ok(Arc::new(
+                as_primitive_array::<TimestampMicrosecondType>(array)
+                    .try_unary::<_, Time32MillisecondType, ArrowError>(|x| {
+                    Ok(time_to_time32ms(as_time_res_with_timezone::<
+                        TimestampMicrosecondType,
+                    >(x, tz)?))
+                })?,
+            ))
+        }
+        (Timestamp(TimeUnit::Nanosecond, tz), Time32(TimeUnit::Second)) => {
+            let tz = tz.as_ref().map(|tz| tz.parse()).transpose()?;
+            Ok(Arc::new(
+                as_primitive_array::<TimestampNanosecondType>(array)
+                    .try_unary::<_, Time32SecondType, ArrowError>(|x| {
+                        Ok(time_to_time32s(as_time_res_with_timezone::<
+                            TimestampNanosecondType,
+                        >(x, tz)?))
+                    })?,
+            ))
+        }
+        (Timestamp(TimeUnit::Nanosecond, tz), Time32(TimeUnit::Millisecond)) => {
+            let tz = tz.as_ref().map(|tz| tz.parse()).transpose()?;
+            Ok(Arc::new(
+                as_primitive_array::<TimestampNanosecondType>(array)
+                    .try_unary::<_, Time32MillisecondType, ArrowError>(|x| {
+                    Ok(time_to_time32ms(as_time_res_with_timezone::<
+                        TimestampNanosecondType,
+                    >(x, tz)?))
+                })?,
+            ))
+        }
 
         (Date64, Timestamp(TimeUnit::Second, None)) => Ok(Arc::new(
             as_primitive_array::<Date64Type>(array)
@@ -4234,6 +4435,160 @@ mod tests {
         assert!(c.is_null(2));
     }
 
+    #[test]
+    fn test_cast_timestamp_to_time64() {
+        // test timestamp secs
+        let a = TimestampSecondArray::from(vec![Some(86405), Some(1), None])
+            .with_timezone("+01:00".to_string());
+        let array = Arc::new(a) as ArrayRef;
+        let b = cast(&array, &DataType::Time64(TimeUnit::Microsecond)).unwrap();
+        let c = b.as_any().downcast_ref::<Time64MicrosecondArray>().unwrap();
+        assert_eq!(3605000000, c.value(0));
+        assert_eq!(3601000000, c.value(1));
+        assert!(c.is_null(2));
+        let b = cast(&array, &DataType::Time64(TimeUnit::Nanosecond)).unwrap();
+        let c = b.as_any().downcast_ref::<Time64NanosecondArray>().unwrap();
+        assert_eq!(3605000000000, c.value(0));
+        assert_eq!(3601000000000, c.value(1));
+        assert!(c.is_null(2));
+
+        // test timestamp milliseconds
+        let a = TimestampMillisecondArray::from(vec![Some(86405000), Some(1000), None])
+            .with_timezone("+01:00".to_string());
+        let array = Arc::new(a) as ArrayRef;
+        let b = cast(&array, &DataType::Time64(TimeUnit::Microsecond)).unwrap();
+        let c = b.as_any().downcast_ref::<Time64MicrosecondArray>().unwrap();
+        assert_eq!(3605000000, c.value(0));
+        assert_eq!(3601000000, c.value(1));
+        assert!(c.is_null(2));
+        let b = cast(&array, &DataType::Time64(TimeUnit::Nanosecond)).unwrap();
+        let c = b.as_any().downcast_ref::<Time64NanosecondArray>().unwrap();
+        assert_eq!(3605000000000, c.value(0));
+        assert_eq!(3601000000000, c.value(1));
+        assert!(c.is_null(2));
+
+        // test timestamp microseconds
+        let a =
+            TimestampMicrosecondArray::from(vec![Some(86405000000), Some(1000000), None])
+                .with_timezone("+01:00".to_string());
+        let array = Arc::new(a) as ArrayRef;
+        let b = cast(&array, &DataType::Time64(TimeUnit::Microsecond)).unwrap();
+        let c = b.as_any().downcast_ref::<Time64MicrosecondArray>().unwrap();
+        assert_eq!(3605000000, c.value(0));
+        assert_eq!(3601000000, c.value(1));
+        assert!(c.is_null(2));
+        let b = cast(&array, &DataType::Time64(TimeUnit::Nanosecond)).unwrap();
+        let c = b.as_any().downcast_ref::<Time64NanosecondArray>().unwrap();
+        assert_eq!(3605000000000, c.value(0));
+        assert_eq!(3601000000000, c.value(1));
+        assert!(c.is_null(2));
+
+        // test timestamp nanoseconds
+        let a = TimestampNanosecondArray::from(vec![
+            Some(86405000000000),
+            Some(1000000000),
+            None,
+        ])
+        .with_timezone("+01:00".to_string());
+        let array = Arc::new(a) as ArrayRef;
+        let b = cast(&array, &DataType::Time64(TimeUnit::Microsecond)).unwrap();
+        let c = b.as_any().downcast_ref::<Time64MicrosecondArray>().unwrap();
+        assert_eq!(3605000000, c.value(0));
+        assert_eq!(3601000000, c.value(1));
+        assert!(c.is_null(2));
+        let b = cast(&array, &DataType::Time64(TimeUnit::Nanosecond)).unwrap();
+        let c = b.as_any().downcast_ref::<Time64NanosecondArray>().unwrap();
+        assert_eq!(3605000000000, c.value(0));
+        assert_eq!(3601000000000, c.value(1));
+        assert!(c.is_null(2));
+
+        // test overflow
+        let a = TimestampSecondArray::from(vec![Some(i64::MAX)])
+            .with_timezone("+01:00".to_string());
+        let array = Arc::new(a) as ArrayRef;
+        let b = cast(&array, &DataType::Time64(TimeUnit::Microsecond));
+        assert!(b.is_err());
+        let b = cast(&array, &DataType::Time64(TimeUnit::Nanosecond));
+        assert!(b.is_err());
+    }
+
+    #[test]
+    fn test_cast_timestamp_to_time32() {
+        // test timestamp secs
+        let a = TimestampSecondArray::from(vec![Some(86405), Some(1), None])
+            .with_timezone("+01:00".to_string());
+        let array = Arc::new(a) as ArrayRef;
+        let b = cast(&array, &DataType::Time32(TimeUnit::Second)).unwrap();
+        let c = b.as_any().downcast_ref::<Time32SecondArray>().unwrap();
+        assert_eq!(3605, c.value(0));
+        assert_eq!(3601, c.value(1));
+        assert!(c.is_null(2));
+        let b = cast(&array, &DataType::Time32(TimeUnit::Millisecond)).unwrap();
+        let c = b.as_any().downcast_ref::<Time32MillisecondArray>().unwrap();
+        assert_eq!(3605000, c.value(0));
+        assert_eq!(3601000, c.value(1));
+        assert!(c.is_null(2));
+
+        // test timestamp milliseconds
+        let a = TimestampMillisecondArray::from(vec![Some(86405000), Some(1000), None])
+            .with_timezone("+01:00".to_string());
+        let array = Arc::new(a) as ArrayRef;
+        let b = cast(&array, &DataType::Time32(TimeUnit::Second)).unwrap();
+        let c = b.as_any().downcast_ref::<Time32SecondArray>().unwrap();
+        assert_eq!(3605, c.value(0));
+        assert_eq!(3601, c.value(1));
+        assert!(c.is_null(2));
+        let b = cast(&array, &DataType::Time32(TimeUnit::Millisecond)).unwrap();
+        let c = b.as_any().downcast_ref::<Time32MillisecondArray>().unwrap();
+        assert_eq!(3605000, c.value(0));
+        assert_eq!(3601000, c.value(1));
+        assert!(c.is_null(2));
+
+        // test timestamp microseconds
+        let a =
+            TimestampMicrosecondArray::from(vec![Some(86405000000), Some(1000000), None])
+                .with_timezone("+01:00".to_string());
+        let array = Arc::new(a) as ArrayRef;
+        let b = cast(&array, &DataType::Time32(TimeUnit::Second)).unwrap();
+        let c = b.as_any().downcast_ref::<Time32SecondArray>().unwrap();
+        assert_eq!(3605, c.value(0));
+        assert_eq!(3601, c.value(1));
+        assert!(c.is_null(2));
+        let b = cast(&array, &DataType::Time32(TimeUnit::Millisecond)).unwrap();
+        let c = b.as_any().downcast_ref::<Time32MillisecondArray>().unwrap();
+        assert_eq!(3605000, c.value(0));
+        assert_eq!(3601000, c.value(1));
+        assert!(c.is_null(2));
+
+        // test timestamp nanoseconds
+        let a = TimestampNanosecondArray::from(vec![
+            Some(86405000000000),
+            Some(1000000000),
+            None,
+        ])
+        .with_timezone("+01:00".to_string());
+        let array = Arc::new(a) as ArrayRef;
+        let b = cast(&array, &DataType::Time32(TimeUnit::Second)).unwrap();
+        let c = b.as_any().downcast_ref::<Time32SecondArray>().unwrap();
+        assert_eq!(3605, c.value(0));
+        assert_eq!(3601, c.value(1));
+        assert!(c.is_null(2));
+        let b = cast(&array, &DataType::Time32(TimeUnit::Millisecond)).unwrap();
+        let c = b.as_any().downcast_ref::<Time32MillisecondArray>().unwrap();
+        assert_eq!(3605000, c.value(0));
+        assert_eq!(3601000, c.value(1));
+        assert!(c.is_null(2));
+
+        // test overflow
+        let a = TimestampSecondArray::from(vec![Some(i64::MAX)])
+            .with_timezone("+01:00".to_string());
+        let array = Arc::new(a) as ArrayRef;
+        let b = cast(&array, &DataType::Time32(TimeUnit::Second));
+        assert!(b.is_err());
+        let b = cast(&array, &DataType::Time32(TimeUnit::Millisecond));
+        assert!(b.is_err());
+    }
+
     #[test]
     fn test_cast_date64_to_timestamp() {
         let a = Date64Array::from(vec![Some(864000000005), Some(1545696000001), None]);