You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by al...@apache.org on 2023/06/28 18:45:01 UTC
[arrow-datafusion] branch main updated: Minor: Simplify date_trunc code and add comments (#6783)
This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 0a4714d1da Minor: Simplify date_trunc code and add comments (#6783)
0a4714d1da is described below
commit 0a4714d1da70d61fb03c43d7968ecd91263a9b57
Author: Andrew Lamb <an...@nerdnetworks.org>
AuthorDate: Wed Jun 28 14:44:55 2023 -0400
Minor: Simplify date_trunc code and add comments (#6783)
---
.../physical-expr/src/datetime_expressions.rs | 47 ++++++++--------------
1 file changed, 16 insertions(+), 31 deletions(-)
diff --git a/datafusion/physical-expr/src/datetime_expressions.rs b/datafusion/physical-expr/src/datetime_expressions.rs
index 3a36e8a489..e429e7bc45 100644
--- a/datafusion/physical-expr/src/datetime_expressions.rs
+++ b/datafusion/physical-expr/src/datetime_expressions.rs
@@ -214,7 +214,11 @@ fn quarter_month(date: &NaiveDateTime) -> u32 {
1 + 3 * ((date.month() - 1) / 3)
}
-fn date_trunc_single(granularity: &str, value: i64) -> Result<i64> {
+/// Tuncates the single `value`, expressed in nanoseconds since the
+/// epoch, for granularities greater than 1 second, in taking into
+/// account that some granularities are not uniform durations of time
+/// (e.g. months are not always the same lengths, leap seconds, etc)
+fn date_trunc_coarse(granularity: &str, value: i64) -> Result<i64> {
if granularity == "millisecond" || granularity == "microsecond" {
return Ok(value);
}
@@ -266,11 +270,11 @@ fn date_trunc_single(granularity: &str, value: i64) -> Result<i64> {
Ok(value.unwrap().timestamp_nanos())
}
+// truncates a single value with the given timeunit to the specified granularity
fn _date_trunc(
tu: TimeUnit,
value: &Option<i64>,
granularity: &str,
- f: impl Fn(Option<i64>) -> Result<Option<i64>>,
) -> Result<Option<i64>, DataFusionError> {
let scale = match tu {
TimeUnit::Second => 1_000_000_000,
@@ -284,9 +288,7 @@ fn _date_trunc(
};
// convert to nanoseconds
- let Some(nano) = (f)(Some(value * scale))? else {
- return Ok(None);
- };
+ let nano = date_trunc_coarse(granularity, scale * value)?;
let result = match tu {
TimeUnit::Second => match granularity {
@@ -328,29 +330,24 @@ pub fn date_trunc(args: &[ColumnarValue]) -> Result<ColumnarValue> {
));
};
- let f = |x: Option<i64>| {
- x.map(|x| date_trunc_single(granularity.as_str(), x))
- .transpose()
- };
-
Ok(match array {
ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(v, tz_opt)) => {
- let value = _date_trunc(TimeUnit::Nanosecond, v, granularity.as_str(), f)?;
+ let value = _date_trunc(TimeUnit::Nanosecond, v, granularity.as_str())?;
let value = ScalarValue::TimestampNanosecond(value, tz_opt.clone());
ColumnarValue::Scalar(value)
}
ColumnarValue::Scalar(ScalarValue::TimestampMicrosecond(v, tz_opt)) => {
- let value = _date_trunc(TimeUnit::Microsecond, v, granularity.as_str(), f)?;
+ let value = _date_trunc(TimeUnit::Microsecond, v, granularity.as_str())?;
let value = ScalarValue::TimestampMicrosecond(value, tz_opt.clone());
ColumnarValue::Scalar(value)
}
ColumnarValue::Scalar(ScalarValue::TimestampMillisecond(v, tz_opt)) => {
- let value = _date_trunc(TimeUnit::Millisecond, v, granularity.as_str(), f)?;
+ let value = _date_trunc(TimeUnit::Millisecond, v, granularity.as_str())?;
let value = ScalarValue::TimestampMillisecond(value, tz_opt.clone());
ColumnarValue::Scalar(value)
}
ColumnarValue::Scalar(ScalarValue::TimestampSecond(v, tz_opt)) => {
- let value = _date_trunc(TimeUnit::Second, v, granularity.as_str(), f)?;
+ let value = _date_trunc(TimeUnit::Second, v, granularity.as_str())?;
let value = ScalarValue::TimestampSecond(value, tz_opt.clone());
ColumnarValue::Scalar(value)
}
@@ -361,9 +358,7 @@ pub fn date_trunc(args: &[ColumnarValue]) -> Result<ColumnarValue> {
let array = as_timestamp_second_array(array)?;
let array = array
.iter()
- .map(|x| {
- _date_trunc(TimeUnit::Second, &x, granularity.as_str(), f)
- })
+ .map(|x| _date_trunc(TimeUnit::Second, &x, granularity.as_str()))
.collect::<Result<TimestampSecondArray>>()?;
ColumnarValue::Array(Arc::new(array))
}
@@ -372,12 +367,7 @@ pub fn date_trunc(args: &[ColumnarValue]) -> Result<ColumnarValue> {
let array = array
.iter()
.map(|x| {
- _date_trunc(
- TimeUnit::Millisecond,
- &x,
- granularity.as_str(),
- f,
- )
+ _date_trunc(TimeUnit::Millisecond, &x, granularity.as_str())
})
.collect::<Result<TimestampMillisecondArray>>()?;
ColumnarValue::Array(Arc::new(array))
@@ -387,12 +377,7 @@ pub fn date_trunc(args: &[ColumnarValue]) -> Result<ColumnarValue> {
let array = array
.iter()
.map(|x| {
- _date_trunc(
- TimeUnit::Microsecond,
- &x,
- granularity.as_str(),
- f,
- )
+ _date_trunc(TimeUnit::Microsecond, &x, granularity.as_str())
})
.collect::<Result<TimestampMicrosecondArray>>()?;
ColumnarValue::Array(Arc::new(array))
@@ -402,7 +387,7 @@ pub fn date_trunc(args: &[ColumnarValue]) -> Result<ColumnarValue> {
let array = array
.iter()
.map(|x| {
- _date_trunc(TimeUnit::Nanosecond, &x, granularity.as_str(), f)
+ _date_trunc(TimeUnit::Nanosecond, &x, granularity.as_str())
})
.collect::<Result<TimestampNanosecondArray>>()?;
@@ -996,7 +981,7 @@ mod tests {
cases.iter().for_each(|(original, granularity, expected)| {
let left = string_to_timestamp_nanos(original).unwrap();
let right = string_to_timestamp_nanos(expected).unwrap();
- let result = date_trunc_single(granularity, left).unwrap();
+ let result = date_trunc_coarse(granularity, left).unwrap();
assert_eq!(result, right, "{original} = {expected}");
});
}