You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by mn...@apache.org on 2023/05/17 10:03:26 UTC

[arrow-datafusion] branch main updated: feat: min/max agg for bool (#6226)

This is an automated email from the ASF dual-hosted git repository.

mneumann pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 5c40142722 feat: min/max agg for bool (#6226)
5c40142722 is described below

commit 5c4014272206397919a4a67ee3ca0af011aeb3f8
Author: Marco Neumann <ma...@crepererum.net>
AuthorDate: Wed May 17 12:03:19 2023 +0200

    feat: min/max agg for bool (#6226)
---
 .../tests/sqllogictests/test_files/aggregate.slt   | 30 ++++++++
 datafusion/physical-expr/src/aggregate/min_max.rs  | 90 +++++++++++++++++++++-
 datafusion/physical-expr/src/expressions/mod.rs    |  2 +-
 datafusion/row/src/accessor.rs                     |  2 +
 4 files changed, 121 insertions(+), 3 deletions(-)

diff --git a/datafusion/core/tests/sqllogictests/test_files/aggregate.slt b/datafusion/core/tests/sqllogictests/test_files/aggregate.slt
index 47d7d031ce..17d89a9f05 100644
--- a/datafusion/core/tests/sqllogictests/test_files/aggregate.slt
+++ b/datafusion/core/tests/sqllogictests/test_files/aggregate.slt
@@ -1838,3 +1838,33 @@ select max(x_dict) from value_dict group by x_dict % 2 order by max(x_dict);
 ----
 4
 5
+
+# bool aggregtion
+statement ok
+CREATE TABLE value_bool(x boolean, g int) AS VALUES (NULL, 0), (false, 0), (true, 0), (false, 1), (true, 2), (NULL, 3);
+
+query B
+select min(x) from value_bool;
+----
+false
+
+query B
+select max(x) from value_bool;
+----
+true
+
+query B
+select min(x) from value_bool group by g order by g;
+----
+false
+false
+true
+NULL
+
+query B
+select max(x) from value_bool group by g order by g;
+----
+true
+false
+true
+NULL
diff --git a/datafusion/physical-expr/src/aggregate/min_max.rs b/datafusion/physical-expr/src/aggregate/min_max.rs
index 3a3d529839..f811dae7b5 100644
--- a/datafusion/physical-expr/src/aggregate/min_max.rs
+++ b/datafusion/physical-expr/src/aggregate/min_max.rs
@@ -26,8 +26,8 @@ use arrow::compute;
 use arrow::datatypes::{DataType, TimeUnit};
 use arrow::{
     array::{
-        ArrayRef, Date32Array, Date64Array, Float32Array, Float64Array, Int16Array,
-        Int32Array, Int64Array, Int8Array, LargeStringArray, StringArray,
+        ArrayRef, BooleanArray, Date32Array, Date64Array, Float32Array, Float64Array,
+        Int16Array, Int32Array, Int64Array, Int8Array, LargeStringArray, StringArray,
         Time32MillisecondArray, Time32SecondArray, Time64MicrosecondArray,
         Time64NanosecondArray, TimestampMicrosecondArray, TimestampMillisecondArray,
         TimestampNanosecondArray, TimestampSecondArray, UInt16Array, UInt32Array,
@@ -290,6 +290,9 @@ fn min_batch(values: &ArrayRef) -> Result<ScalarValue> {
         DataType::LargeUtf8 => {
             typed_min_max_batch_string!(values, LargeStringArray, LargeUtf8, min_string)
         }
+        DataType::Boolean => {
+            typed_min_max_batch!(values, BooleanArray, Boolean, min_boolean)
+        }
         _ => min_max_batch!(values, min),
     })
 }
@@ -303,6 +306,9 @@ fn max_batch(values: &ArrayRef) -> Result<ScalarValue> {
         DataType::LargeUtf8 => {
             typed_min_max_batch_string!(values, LargeStringArray, LargeUtf8, max_string)
         }
+        DataType::Boolean => {
+            typed_min_max_batch!(values, BooleanArray, Boolean, max_boolean)
+        }
         _ => min_max_batch!(values, max),
     })
 }
@@ -386,6 +392,9 @@ macro_rules! min_max {
                 )));
                 }
             }
+            (ScalarValue::Boolean(lhs), ScalarValue::Boolean(rhs)) => {
+                typed_min_max!(lhs, rhs, Boolean, $OP)
+            }
             (ScalarValue::Float64(lhs), ScalarValue::Float64(rhs)) => {
                 typed_min_max!(lhs, rhs, Float64, $OP)
             }
@@ -532,6 +541,9 @@ macro_rules! min_max {
 macro_rules! min_max_v2 {
     ($INDEX:ident, $ACC:ident, $SCALAR:expr, $OP:ident) => {{
         Ok(match $SCALAR {
+            ScalarValue::Boolean(rhs) => {
+                typed_min_max_v2!($INDEX, $ACC, rhs, bool, $OP)
+            }
             ScalarValue::Float64(rhs) => {
                 typed_min_max_v2!($INDEX, $ACC, rhs, f64, $OP)
             }
@@ -1429,4 +1441,78 @@ mod tests {
             ScalarValue::Time64Nanosecond(Some(5))
         )
     }
+
+    #[test]
+    fn max_bool() -> Result<()> {
+        let a: ArrayRef = Arc::new(BooleanArray::from(vec![false, false]));
+        generic_test_op!(a, DataType::Boolean, Max, ScalarValue::from(false))?;
+
+        let a: ArrayRef = Arc::new(BooleanArray::from(vec![true, true]));
+        generic_test_op!(a, DataType::Boolean, Max, ScalarValue::from(true))?;
+
+        let a: ArrayRef = Arc::new(BooleanArray::from(vec![false, true, false]));
+        generic_test_op!(a, DataType::Boolean, Max, ScalarValue::from(true))?;
+
+        let a: ArrayRef = Arc::new(BooleanArray::from(vec![true, false, true]));
+        generic_test_op!(a, DataType::Boolean, Max, ScalarValue::from(true))?;
+
+        let a: ArrayRef = Arc::new(BooleanArray::from(Vec::<bool>::new()));
+        generic_test_op!(
+            a,
+            DataType::Boolean,
+            Max,
+            ScalarValue::from(None as Option<bool>)
+        )?;
+
+        let a: ArrayRef = Arc::new(BooleanArray::from(vec![None as Option<bool>]));
+        generic_test_op!(
+            a,
+            DataType::Boolean,
+            Max,
+            ScalarValue::from(None as Option<bool>)
+        )?;
+
+        let a: ArrayRef =
+            Arc::new(BooleanArray::from(vec![None, Some(true), Some(false)]));
+        generic_test_op!(a, DataType::Boolean, Max, ScalarValue::from(true))?;
+
+        Ok(())
+    }
+
+    #[test]
+    fn min_bool() -> Result<()> {
+        let a: ArrayRef = Arc::new(BooleanArray::from(vec![false, false]));
+        generic_test_op!(a, DataType::Boolean, Min, ScalarValue::from(false))?;
+
+        let a: ArrayRef = Arc::new(BooleanArray::from(vec![true, true]));
+        generic_test_op!(a, DataType::Boolean, Min, ScalarValue::from(true))?;
+
+        let a: ArrayRef = Arc::new(BooleanArray::from(vec![false, true, false]));
+        generic_test_op!(a, DataType::Boolean, Min, ScalarValue::from(false))?;
+
+        let a: ArrayRef = Arc::new(BooleanArray::from(vec![true, false, true]));
+        generic_test_op!(a, DataType::Boolean, Min, ScalarValue::from(false))?;
+
+        let a: ArrayRef = Arc::new(BooleanArray::from(Vec::<bool>::new()));
+        generic_test_op!(
+            a,
+            DataType::Boolean,
+            Min,
+            ScalarValue::from(None as Option<bool>)
+        )?;
+
+        let a: ArrayRef = Arc::new(BooleanArray::from(vec![None as Option<bool>]));
+        generic_test_op!(
+            a,
+            DataType::Boolean,
+            Min,
+            ScalarValue::from(None as Option<bool>)
+        )?;
+
+        let a: ArrayRef =
+            Arc::new(BooleanArray::from(vec![None, Some(true), Some(false)]));
+        generic_test_op!(a, DataType::Boolean, Min, ScalarValue::from(false))?;
+
+        Ok(())
+    }
 }
diff --git a/datafusion/physical-expr/src/expressions/mod.rs b/datafusion/physical-expr/src/expressions/mod.rs
index afe1ccd99f..140556b765 100644
--- a/datafusion/physical-expr/src/expressions/mod.rs
+++ b/datafusion/physical-expr/src/expressions/mod.rs
@@ -128,7 +128,7 @@ pub(crate) mod tests {
 
             assert_eq!(expected, actual);
 
-            Ok(())
+            Ok(()) as Result<(), DataFusionError>
         }};
     }
 
diff --git a/datafusion/row/src/accessor.rs b/datafusion/row/src/accessor.rs
index d4db66f367..a0b5a70df9 100644
--- a/datafusion/row/src/accessor.rs
+++ b/datafusion/row/src/accessor.rs
@@ -327,6 +327,7 @@ impl<'a> RowAccessor<'a> {
     fn_add_idx!(f64);
     fn_add_idx!(i128);
 
+    fn_max_min_idx!(bool, max);
     fn_max_min_idx!(u8, max);
     fn_max_min_idx!(u16, max);
     fn_max_min_idx!(u32, max);
@@ -339,6 +340,7 @@ impl<'a> RowAccessor<'a> {
     fn_max_min_idx!(f64, max);
     fn_max_min_idx!(i128, max);
 
+    fn_max_min_idx!(bool, min);
     fn_max_min_idx!(u8, min);
     fn_max_min_idx!(u16, min);
     fn_max_min_idx!(u32, min);