You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by al...@apache.org on 2023/06/28 18:45:01 UTC

[arrow-datafusion] branch main updated: Minor: Simplify date_trunc code and add comments (#6783)

This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 0a4714d1da Minor: Simplify date_trunc code and add comments (#6783)
0a4714d1da is described below

commit 0a4714d1da70d61fb03c43d7968ecd91263a9b57
Author: Andrew Lamb <an...@nerdnetworks.org>
AuthorDate: Wed Jun 28 14:44:55 2023 -0400

    Minor: Simplify date_trunc code and add comments (#6783)
---
 .../physical-expr/src/datetime_expressions.rs      | 47 ++++++++--------------
 1 file changed, 16 insertions(+), 31 deletions(-)

diff --git a/datafusion/physical-expr/src/datetime_expressions.rs b/datafusion/physical-expr/src/datetime_expressions.rs
index 3a36e8a489..e429e7bc45 100644
--- a/datafusion/physical-expr/src/datetime_expressions.rs
+++ b/datafusion/physical-expr/src/datetime_expressions.rs
@@ -214,7 +214,11 @@ fn quarter_month(date: &NaiveDateTime) -> u32 {
     1 + 3 * ((date.month() - 1) / 3)
 }
 
-fn date_trunc_single(granularity: &str, value: i64) -> Result<i64> {
+/// Tuncates the single `value`, expressed in nanoseconds since the
+/// epoch, for granularities greater than 1 second, in taking into
+/// account that some granularities are not uniform durations of time
+/// (e.g. months are not always the same lengths, leap seconds, etc)
+fn date_trunc_coarse(granularity: &str, value: i64) -> Result<i64> {
     if granularity == "millisecond" || granularity == "microsecond" {
         return Ok(value);
     }
@@ -266,11 +270,11 @@ fn date_trunc_single(granularity: &str, value: i64) -> Result<i64> {
     Ok(value.unwrap().timestamp_nanos())
 }
 
+// truncates a single value with the given timeunit to the specified granularity
 fn _date_trunc(
     tu: TimeUnit,
     value: &Option<i64>,
     granularity: &str,
-    f: impl Fn(Option<i64>) -> Result<Option<i64>>,
 ) -> Result<Option<i64>, DataFusionError> {
     let scale = match tu {
         TimeUnit::Second => 1_000_000_000,
@@ -284,9 +288,7 @@ fn _date_trunc(
     };
 
     // convert to nanoseconds
-    let Some(nano) = (f)(Some(value * scale))? else {
-        return Ok(None);
-    };
+    let nano = date_trunc_coarse(granularity, scale * value)?;
 
     let result = match tu {
         TimeUnit::Second => match granularity {
@@ -328,29 +330,24 @@ pub fn date_trunc(args: &[ColumnarValue]) -> Result<ColumnarValue> {
             ));
         };
 
-    let f = |x: Option<i64>| {
-        x.map(|x| date_trunc_single(granularity.as_str(), x))
-            .transpose()
-    };
-
     Ok(match array {
         ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(v, tz_opt)) => {
-            let value = _date_trunc(TimeUnit::Nanosecond, v, granularity.as_str(), f)?;
+            let value = _date_trunc(TimeUnit::Nanosecond, v, granularity.as_str())?;
             let value = ScalarValue::TimestampNanosecond(value, tz_opt.clone());
             ColumnarValue::Scalar(value)
         }
         ColumnarValue::Scalar(ScalarValue::TimestampMicrosecond(v, tz_opt)) => {
-            let value = _date_trunc(TimeUnit::Microsecond, v, granularity.as_str(), f)?;
+            let value = _date_trunc(TimeUnit::Microsecond, v, granularity.as_str())?;
             let value = ScalarValue::TimestampMicrosecond(value, tz_opt.clone());
             ColumnarValue::Scalar(value)
         }
         ColumnarValue::Scalar(ScalarValue::TimestampMillisecond(v, tz_opt)) => {
-            let value = _date_trunc(TimeUnit::Millisecond, v, granularity.as_str(), f)?;
+            let value = _date_trunc(TimeUnit::Millisecond, v, granularity.as_str())?;
             let value = ScalarValue::TimestampMillisecond(value, tz_opt.clone());
             ColumnarValue::Scalar(value)
         }
         ColumnarValue::Scalar(ScalarValue::TimestampSecond(v, tz_opt)) => {
-            let value = _date_trunc(TimeUnit::Second, v, granularity.as_str(), f)?;
+            let value = _date_trunc(TimeUnit::Second, v, granularity.as_str())?;
             let value = ScalarValue::TimestampSecond(value, tz_opt.clone());
             ColumnarValue::Scalar(value)
         }
@@ -361,9 +358,7 @@ pub fn date_trunc(args: &[ColumnarValue]) -> Result<ColumnarValue> {
                     let array = as_timestamp_second_array(array)?;
                     let array = array
                         .iter()
-                        .map(|x| {
-                            _date_trunc(TimeUnit::Second, &x, granularity.as_str(), f)
-                        })
+                        .map(|x| _date_trunc(TimeUnit::Second, &x, granularity.as_str()))
                         .collect::<Result<TimestampSecondArray>>()?;
                     ColumnarValue::Array(Arc::new(array))
                 }
@@ -372,12 +367,7 @@ pub fn date_trunc(args: &[ColumnarValue]) -> Result<ColumnarValue> {
                     let array = array
                         .iter()
                         .map(|x| {
-                            _date_trunc(
-                                TimeUnit::Millisecond,
-                                &x,
-                                granularity.as_str(),
-                                f,
-                            )
+                            _date_trunc(TimeUnit::Millisecond, &x, granularity.as_str())
                         })
                         .collect::<Result<TimestampMillisecondArray>>()?;
                     ColumnarValue::Array(Arc::new(array))
@@ -387,12 +377,7 @@ pub fn date_trunc(args: &[ColumnarValue]) -> Result<ColumnarValue> {
                     let array = array
                         .iter()
                         .map(|x| {
-                            _date_trunc(
-                                TimeUnit::Microsecond,
-                                &x,
-                                granularity.as_str(),
-                                f,
-                            )
+                            _date_trunc(TimeUnit::Microsecond, &x, granularity.as_str())
                         })
                         .collect::<Result<TimestampMicrosecondArray>>()?;
                     ColumnarValue::Array(Arc::new(array))
@@ -402,7 +387,7 @@ pub fn date_trunc(args: &[ColumnarValue]) -> Result<ColumnarValue> {
                     let array = array
                         .iter()
                         .map(|x| {
-                            _date_trunc(TimeUnit::Nanosecond, &x, granularity.as_str(), f)
+                            _date_trunc(TimeUnit::Nanosecond, &x, granularity.as_str())
                         })
                         .collect::<Result<TimestampNanosecondArray>>()?;
 
@@ -996,7 +981,7 @@ mod tests {
         cases.iter().for_each(|(original, granularity, expected)| {
             let left = string_to_timestamp_nanos(original).unwrap();
             let right = string_to_timestamp_nanos(expected).unwrap();
-            let result = date_trunc_single(granularity, left).unwrap();
+            let result = date_trunc_coarse(granularity, left).unwrap();
             assert_eq!(result, right, "{original} = {expected}");
         });
     }