You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by mn...@apache.org on 2023/05/17 10:03:26 UTC
[arrow-datafusion] branch main updated: feat: min/max agg for bool (#6226)
This is an automated email from the ASF dual-hosted git repository.
mneumann pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 5c40142722 feat: min/max agg for bool (#6226)
5c40142722 is described below
commit 5c4014272206397919a4a67ee3ca0af011aeb3f8
Author: Marco Neumann <ma...@crepererum.net>
AuthorDate: Wed May 17 12:03:19 2023 +0200
feat: min/max agg for bool (#6226)
---
.../tests/sqllogictests/test_files/aggregate.slt | 30 ++++++++
datafusion/physical-expr/src/aggregate/min_max.rs | 90 +++++++++++++++++++++-
datafusion/physical-expr/src/expressions/mod.rs | 2 +-
datafusion/row/src/accessor.rs | 2 +
4 files changed, 121 insertions(+), 3 deletions(-)
diff --git a/datafusion/core/tests/sqllogictests/test_files/aggregate.slt b/datafusion/core/tests/sqllogictests/test_files/aggregate.slt
index 47d7d031ce..17d89a9f05 100644
--- a/datafusion/core/tests/sqllogictests/test_files/aggregate.slt
+++ b/datafusion/core/tests/sqllogictests/test_files/aggregate.slt
@@ -1838,3 +1838,33 @@ select max(x_dict) from value_dict group by x_dict % 2 order by max(x_dict);
----
4
5
+
+# bool aggregtion
+statement ok
+CREATE TABLE value_bool(x boolean, g int) AS VALUES (NULL, 0), (false, 0), (true, 0), (false, 1), (true, 2), (NULL, 3);
+
+query B
+select min(x) from value_bool;
+----
+false
+
+query B
+select max(x) from value_bool;
+----
+true
+
+query B
+select min(x) from value_bool group by g order by g;
+----
+false
+false
+true
+NULL
+
+query B
+select max(x) from value_bool group by g order by g;
+----
+true
+false
+true
+NULL
diff --git a/datafusion/physical-expr/src/aggregate/min_max.rs b/datafusion/physical-expr/src/aggregate/min_max.rs
index 3a3d529839..f811dae7b5 100644
--- a/datafusion/physical-expr/src/aggregate/min_max.rs
+++ b/datafusion/physical-expr/src/aggregate/min_max.rs
@@ -26,8 +26,8 @@ use arrow::compute;
use arrow::datatypes::{DataType, TimeUnit};
use arrow::{
array::{
- ArrayRef, Date32Array, Date64Array, Float32Array, Float64Array, Int16Array,
- Int32Array, Int64Array, Int8Array, LargeStringArray, StringArray,
+ ArrayRef, BooleanArray, Date32Array, Date64Array, Float32Array, Float64Array,
+ Int16Array, Int32Array, Int64Array, Int8Array, LargeStringArray, StringArray,
Time32MillisecondArray, Time32SecondArray, Time64MicrosecondArray,
Time64NanosecondArray, TimestampMicrosecondArray, TimestampMillisecondArray,
TimestampNanosecondArray, TimestampSecondArray, UInt16Array, UInt32Array,
@@ -290,6 +290,9 @@ fn min_batch(values: &ArrayRef) -> Result<ScalarValue> {
DataType::LargeUtf8 => {
typed_min_max_batch_string!(values, LargeStringArray, LargeUtf8, min_string)
}
+ DataType::Boolean => {
+ typed_min_max_batch!(values, BooleanArray, Boolean, min_boolean)
+ }
_ => min_max_batch!(values, min),
})
}
@@ -303,6 +306,9 @@ fn max_batch(values: &ArrayRef) -> Result<ScalarValue> {
DataType::LargeUtf8 => {
typed_min_max_batch_string!(values, LargeStringArray, LargeUtf8, max_string)
}
+ DataType::Boolean => {
+ typed_min_max_batch!(values, BooleanArray, Boolean, max_boolean)
+ }
_ => min_max_batch!(values, max),
})
}
@@ -386,6 +392,9 @@ macro_rules! min_max {
)));
}
}
+ (ScalarValue::Boolean(lhs), ScalarValue::Boolean(rhs)) => {
+ typed_min_max!(lhs, rhs, Boolean, $OP)
+ }
(ScalarValue::Float64(lhs), ScalarValue::Float64(rhs)) => {
typed_min_max!(lhs, rhs, Float64, $OP)
}
@@ -532,6 +541,9 @@ macro_rules! min_max {
macro_rules! min_max_v2 {
($INDEX:ident, $ACC:ident, $SCALAR:expr, $OP:ident) => {{
Ok(match $SCALAR {
+ ScalarValue::Boolean(rhs) => {
+ typed_min_max_v2!($INDEX, $ACC, rhs, bool, $OP)
+ }
ScalarValue::Float64(rhs) => {
typed_min_max_v2!($INDEX, $ACC, rhs, f64, $OP)
}
@@ -1429,4 +1441,78 @@ mod tests {
ScalarValue::Time64Nanosecond(Some(5))
)
}
+
+ #[test]
+ fn max_bool() -> Result<()> {
+ let a: ArrayRef = Arc::new(BooleanArray::from(vec![false, false]));
+ generic_test_op!(a, DataType::Boolean, Max, ScalarValue::from(false))?;
+
+ let a: ArrayRef = Arc::new(BooleanArray::from(vec![true, true]));
+ generic_test_op!(a, DataType::Boolean, Max, ScalarValue::from(true))?;
+
+ let a: ArrayRef = Arc::new(BooleanArray::from(vec![false, true, false]));
+ generic_test_op!(a, DataType::Boolean, Max, ScalarValue::from(true))?;
+
+ let a: ArrayRef = Arc::new(BooleanArray::from(vec![true, false, true]));
+ generic_test_op!(a, DataType::Boolean, Max, ScalarValue::from(true))?;
+
+ let a: ArrayRef = Arc::new(BooleanArray::from(Vec::<bool>::new()));
+ generic_test_op!(
+ a,
+ DataType::Boolean,
+ Max,
+ ScalarValue::from(None as Option<bool>)
+ )?;
+
+ let a: ArrayRef = Arc::new(BooleanArray::from(vec![None as Option<bool>]));
+ generic_test_op!(
+ a,
+ DataType::Boolean,
+ Max,
+ ScalarValue::from(None as Option<bool>)
+ )?;
+
+ let a: ArrayRef =
+ Arc::new(BooleanArray::from(vec![None, Some(true), Some(false)]));
+ generic_test_op!(a, DataType::Boolean, Max, ScalarValue::from(true))?;
+
+ Ok(())
+ }
+
+ #[test]
+ fn min_bool() -> Result<()> {
+ let a: ArrayRef = Arc::new(BooleanArray::from(vec![false, false]));
+ generic_test_op!(a, DataType::Boolean, Min, ScalarValue::from(false))?;
+
+ let a: ArrayRef = Arc::new(BooleanArray::from(vec![true, true]));
+ generic_test_op!(a, DataType::Boolean, Min, ScalarValue::from(true))?;
+
+ let a: ArrayRef = Arc::new(BooleanArray::from(vec![false, true, false]));
+ generic_test_op!(a, DataType::Boolean, Min, ScalarValue::from(false))?;
+
+ let a: ArrayRef = Arc::new(BooleanArray::from(vec![true, false, true]));
+ generic_test_op!(a, DataType::Boolean, Min, ScalarValue::from(false))?;
+
+ let a: ArrayRef = Arc::new(BooleanArray::from(Vec::<bool>::new()));
+ generic_test_op!(
+ a,
+ DataType::Boolean,
+ Min,
+ ScalarValue::from(None as Option<bool>)
+ )?;
+
+ let a: ArrayRef = Arc::new(BooleanArray::from(vec![None as Option<bool>]));
+ generic_test_op!(
+ a,
+ DataType::Boolean,
+ Min,
+ ScalarValue::from(None as Option<bool>)
+ )?;
+
+ let a: ArrayRef =
+ Arc::new(BooleanArray::from(vec![None, Some(true), Some(false)]));
+ generic_test_op!(a, DataType::Boolean, Min, ScalarValue::from(false))?;
+
+ Ok(())
+ }
}
diff --git a/datafusion/physical-expr/src/expressions/mod.rs b/datafusion/physical-expr/src/expressions/mod.rs
index afe1ccd99f..140556b765 100644
--- a/datafusion/physical-expr/src/expressions/mod.rs
+++ b/datafusion/physical-expr/src/expressions/mod.rs
@@ -128,7 +128,7 @@ pub(crate) mod tests {
assert_eq!(expected, actual);
- Ok(())
+ Ok(()) as Result<(), DataFusionError>
}};
}
diff --git a/datafusion/row/src/accessor.rs b/datafusion/row/src/accessor.rs
index d4db66f367..a0b5a70df9 100644
--- a/datafusion/row/src/accessor.rs
+++ b/datafusion/row/src/accessor.rs
@@ -327,6 +327,7 @@ impl<'a> RowAccessor<'a> {
fn_add_idx!(f64);
fn_add_idx!(i128);
+ fn_max_min_idx!(bool, max);
fn_max_min_idx!(u8, max);
fn_max_min_idx!(u16, max);
fn_max_min_idx!(u32, max);
@@ -339,6 +340,7 @@ impl<'a> RowAccessor<'a> {
fn_max_min_idx!(f64, max);
fn_max_min_idx!(i128, max);
+ fn_max_min_idx!(bool, min);
fn_max_min_idx!(u8, min);
fn_max_min_idx!(u16, min);
fn_max_min_idx!(u32, min);