You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by "tustvold (via GitHub)" <gi...@apache.org> on 2023/03/28 21:17:49 UTC

[GitHub] [arrow-datafusion] tustvold commented on a diff in pull request #5764: timestamp interval arithmetic query

tustvold commented on code in PR #5764:
URL: https://github.com/apache/arrow-datafusion/pull/5764#discussion_r1151156279


##########
datafusion/physical-expr/src/expressions/datetime.rs:
##########
@@ -239,6 +251,542 @@ pub fn evaluate_array(
     Ok(ColumnarValue::Array(ret))
 }
 
+macro_rules! ts_sub_op {
+    ($lhs:ident, $rhs:ident, $lhs_tz:ident, $rhs_tz:ident, $coef:expr, $caster:expr, $op:expr, $ts_unit:expr, $mode:expr, $type_in:ty, $type_out:ty) => {{
+        let prim_array_lhs = $caster(&$lhs)?;
+        let prim_array_rhs = $caster(&$rhs)?;
+        let ret = Arc::new(try_binary_op::<$type_in, $type_in, _, $type_out>(
+            prim_array_lhs,
+            prim_array_rhs,
+            |ts1, ts2| {
+                let (lhs_tz, rhs_tz) =
+                    (parse_timezones($lhs_tz), parse_timezones($rhs_tz));
+                Ok($op(
+                    $ts_unit(&with_timezone_to_naive_datetime::<$mode>(
+                        ts1.mul_wrapping($coef),
+                        &lhs_tz,
+                    )?),
+                    $ts_unit(&with_timezone_to_naive_datetime::<$mode>(
+                        ts2.mul_wrapping($coef),
+                        &rhs_tz,
+                    )?),
+                ))
+            },
+        )?) as ArrayRef;
+        ret
+    }};
+}
+macro_rules! interval_op {
+    ($lhs:ident, $rhs:ident, $caster:expr, $op:expr, $sign:ident, $type_in:ty) => {{
+        let prim_array_lhs = $caster(&$lhs)?;
+        let prim_array_rhs = $caster(&$rhs)?;
+        let ret = Arc::new(binary::<$type_in, $type_in, _, $type_in>(
+            prim_array_lhs,
+            prim_array_rhs,
+            |interval1, interval2| $op(interval1, interval2, $sign),
+        )?) as ArrayRef;
+        ret
+    }};
+}
+macro_rules! interval_cross_op {
+    ($lhs:ident, $rhs:ident, $caster1:expr, $caster2:expr, $op:expr, $sign:ident, $commute:ident, $type_in1:ty, $type_in2:ty) => {{
+        let prim_array_lhs = $caster1(&$lhs)?;
+        let prim_array_rhs = $caster2(&$rhs)?;
+        let ret = Arc::new(binary::<$type_in1, $type_in2, _, IntervalMonthDayNanoType>(
+            prim_array_lhs,
+            prim_array_rhs,
+            |interval1, interval2| $op(interval1, interval2, $sign, $commute),
+        )?) as ArrayRef;
+        ret
+    }};
+}
+macro_rules! ts_interval_op {
+    ($lhs:ident, $rhs:ident, $caster1:expr, $caster2:expr, $op:expr, $sign:ident, $type_in1:ty, $type_in2:ty) => {{
+        let prim_array_lhs = $caster1(&$lhs)?;
+        let prim_array_rhs = $caster2(&$rhs)?;
+        let ret = Arc::new(try_binary_op::<$type_in1, $type_in2, _, $type_in1>(
+            prim_array_lhs,
+            prim_array_rhs,
+            |ts, interval| Ok($op(ts, interval as i128, $sign)?),
+        )?) as ArrayRef;
+        ret
+    }};
+}
+// This function evaluates temporal array operations, such as timestamp - timestamp, interval + interval,
+// timestamp + interval, and interval + timestamp. It takes two arrays as input and an integer sign representing
+// the operation (+1 for addition and -1 for subtraction). It returns a ColumnarValue as output, which can hold
+// either a scalar or an array.
+pub fn evaluate_temporal_arrays(
+    array_lhs: &ArrayRef,
+    sign: i32,
+    array_rhs: &ArrayRef,
+) -> Result<ColumnarValue> {
+    let ret = match (array_lhs.data_type(), array_rhs.data_type()) {
+        // Timestamp - Timestamp operations, operands of only the same types are supported.
+        (DataType::Timestamp(_, _), DataType::Timestamp(_, _)) => {
+            ts_array_op(array_lhs, array_rhs)?
+        }
+        // Interval (+ , -) Interval operations
+        (DataType::Interval(_), DataType::Interval(_)) => {
+            interval_array_op(array_lhs, array_rhs, sign)?
+        }
+        // Timestamp (+ , -) Interval and Interval + Timestamp operations
+        // Interval - Timestamp operation is not rational hence not supported
+        (DataType::Timestamp(_, _), DataType::Interval(_)) => {
+            ts_interval_array_op(array_lhs, sign, array_rhs)?
+        }
+        (DataType::Interval(_), DataType::Timestamp(_, _)) if sign == 1 => {
+            ts_interval_array_op(array_rhs, sign, array_lhs)?
+        }
+        (_, _) => Err(DataFusionError::Execution(format!(
+            "Invalid array types for DateIntervalExpr: {:?} {} {:?}",
+            array_lhs.data_type(),
+            sign,
+            array_rhs.data_type()
+        )))?,
+    };
+    Ok(ColumnarValue::Array(ret))
+}
+
+#[inline]
+unsafe fn build_primitive_array<O: ArrowPrimitiveType>(
+    len: usize,
+    buffer: Buffer,
+    null_count: usize,
+    null_buffer: Option<Buffer>,
+) -> PrimitiveArray<O> {
+    PrimitiveArray::from(ArrayData::new_unchecked(
+        O::DATA_TYPE,
+        len,
+        Some(null_count),
+        null_buffer,
+        0,
+        vec![buffer],
+        vec![],
+    ))
+}
+
+pub fn try_binary_op<A, B, F, O>(

Review Comment:
   https://docs.rs/arrow-arith/latest/arrow_arith/arity/fn.try_binary.html



##########
datafusion/common/src/scalar.rs:
##########
@@ -885,24 +1011,35 @@ fn ts_sub_to_interval(
     }
 }
 
+// This function parses the timezone from string to Tz.
+// If it cannot parse or timezone field is [`None`], it returns [`None`].
+pub fn parse_timezones(tz: &Option<String>) -> Option<Tz> {
+    if let Some(tz) = tz {
+        let parsed_tz: Option<Tz> = FromStr::from_str(tz)
+            .map_err(|_| {

Review Comment:
   Why map_err only to discard it?



##########
datafusion/common/src/scalar.rs:
##########
@@ -919,12 +1056,14 @@ fn with_timezone_to_naive_datetime(
 /// This function creates the [`NaiveDateTime`] object corresponding to the
 /// given timestamp, whose tick size is specified by `UNIT_NANOS`.
 #[inline]
-fn ticks_to_naive_datetime<const UNIT_NANOS: i64>(ticks: i64) -> Result<NaiveDateTime> {
-    NaiveDateTime::from_timestamp_opt(
-        (ticks * UNIT_NANOS) / 1_000_000_000,
-        ((ticks * UNIT_NANOS) % 1_000_000_000) as u32,
-    )
-    .ok_or_else(|| {
+fn ticks_to_naive_datetime<const UNIT_NANOS: i128>(ticks: i64) -> Result<NaiveDateTime> {

Review Comment:
   https://docs.rs/arrow-array/latest/arrow_array/temporal_conversions/fn.as_datetime.html



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org