You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by al...@apache.org on 2021/08/16 21:09:53 UTC
[arrow-rs] branch master updated: allow casting from Timestamp
based arrays to utf8 (#664)
This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new f569e8f allow casting from Timestamp based arrays to utf8 (#664)
f569e8f is described below
commit f569e8f8541764669d661f1bfa314da2eeeb2678
Author: Sumit <su...@users.noreply.github.com>
AuthorDate: Mon Aug 16 23:09:46 2021 +0200
allow casting from Timestamp based arrays to utf8 (#664)
the change adds uses the existing `PrimitiveArray::value_as_datetime` to
support casting from `Timestamp(_,_)` to ``[Large]Utf8`.
---
arrow/src/compute/kernels/cast.rs | 67 +++++++++++++++++++++++++++++++++++++++
1 file changed, 67 insertions(+)
diff --git a/arrow/src/compute/kernels/cast.rs b/arrow/src/compute/kernels/cast.rs
index e4a7a45..593adec 100644
--- a/arrow/src/compute/kernels/cast.rs
+++ b/arrow/src/compute/kernels/cast.rs
@@ -101,6 +101,7 @@ pub fn can_cast_types(from_type: &DataType, to_type: &DataType) -> bool {
(LargeUtf8, Date64) => true,
(LargeUtf8, Timestamp(TimeUnit::Nanosecond, None)) => true,
(LargeUtf8, _) => DataType::is_numeric(to_type),
+ (Timestamp(_, _), Utf8) | (Timestamp(_, _), LargeUtf8) => true,
(_, Utf8) | (_, LargeUtf8) => {
DataType::is_numeric(from_type) || from_type == &Binary
}
@@ -468,6 +469,20 @@ pub fn cast_with_options(
Int64 => cast_numeric_to_string::<Int64Type, i32>(array),
Float32 => cast_numeric_to_string::<Float32Type, i32>(array),
Float64 => cast_numeric_to_string::<Float64Type, i32>(array),
+ Timestamp(unit, _) => match unit {
+ TimeUnit::Nanosecond => {
+ cast_timestamp_to_string::<TimestampNanosecondType, i32>(array)
+ }
+ TimeUnit::Microsecond => {
+ cast_timestamp_to_string::<TimestampMicrosecondType, i32>(array)
+ }
+ TimeUnit::Millisecond => {
+ cast_timestamp_to_string::<TimestampMillisecondType, i32>(array)
+ }
+ TimeUnit::Second => {
+ cast_timestamp_to_string::<TimestampSecondType, i32>(array)
+ }
+ },
Binary => {
let array = array.as_any().downcast_ref::<BinaryArray>().unwrap();
Ok(Arc::new(
@@ -508,6 +523,20 @@ pub fn cast_with_options(
Int64 => cast_numeric_to_string::<Int64Type, i64>(array),
Float32 => cast_numeric_to_string::<Float32Type, i64>(array),
Float64 => cast_numeric_to_string::<Float64Type, i64>(array),
+ Timestamp(unit, _) => match unit {
+ TimeUnit::Nanosecond => {
+ cast_timestamp_to_string::<TimestampNanosecondType, i64>(array)
+ }
+ TimeUnit::Microsecond => {
+ cast_timestamp_to_string::<TimestampMicrosecondType, i64>(array)
+ }
+ TimeUnit::Millisecond => {
+ cast_timestamp_to_string::<TimestampMillisecondType, i64>(array)
+ }
+ TimeUnit::Second => {
+ cast_timestamp_to_string::<TimestampSecondType, i64>(array)
+ }
+ },
Binary => {
let array = array.as_any().downcast_ref::<BinaryArray>().unwrap();
Ok(Arc::new(
@@ -1003,6 +1032,28 @@ where
unsafe { PrimitiveArray::<R>::from_trusted_len_iter(iter) }
}
+/// Cast timestamp types to Utf8/LargeUtf8
+fn cast_timestamp_to_string<T, OffsetSize>(array: &ArrayRef) -> Result<ArrayRef>
+where
+ T: ArrowTemporalType + ArrowNumericType,
+ i64: From<<T as ArrowPrimitiveType>::Native>,
+ OffsetSize: StringOffsetSizeTrait,
+{
+ let array = array.as_any().downcast_ref::<PrimitiveArray<T>>().unwrap();
+
+ Ok(Arc::new(
+ (0..array.len())
+ .map(|ix| {
+ if array.is_null(ix) {
+ None
+ } else {
+ array.value_as_datetime(ix).map(|v| v.to_string())
+ }
+ })
+ .collect::<GenericStringArray<OffsetSize>>(),
+ ))
+}
+
/// Cast numeric types to Utf8
fn cast_numeric_to_string<FROM, OffsetSize>(array: &ArrayRef) -> Result<ArrayRef>
where
@@ -2172,6 +2223,22 @@ mod tests {
}
#[test]
+ fn test_cast_timestamp_to_string() {
+ let a = TimestampMillisecondArray::from_opt_vec(
+ vec![Some(864000000005), Some(1545696000001), None],
+ Some("UTC".to_string()),
+ );
+ let array = Arc::new(a) as ArrayRef;
+ dbg!(&array);
+ let b = cast(&array, &DataType::Utf8).unwrap();
+ let c = b.as_any().downcast_ref::<StringArray>().unwrap();
+ assert_eq!(&DataType::Utf8, c.data_type());
+ assert_eq!("1997-05-19 00:00:00.005", c.value(0));
+ assert_eq!("2018-12-25 00:00:00.001", c.value(1));
+ assert!(c.is_null(2));
+ }
+
+ #[test]
fn test_cast_between_timestamps() {
let a = TimestampMillisecondArray::from_opt_vec(
vec![Some(864000003005), Some(1545696002001), None],