You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by al...@apache.org on 2023/06/11 17:57:02 UTC

[arrow-datafusion] branch main updated: fix: median with even number of decimal128 not working (#6634)

This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 945191fa6c fix: median with even number of decimal128 not working (#6634)
945191fa6c is described below

commit 945191fa6cd573050717b54e52e65d008e183b75
Author: Igor Izvekov <iz...@gmail.com>
AuthorDate: Sun Jun 11 20:56:56 2023 +0300

    fix: median with even number of decimal128 not working (#6634)
---
 .../tests/sqllogictests/test_files/decimal.slt     |   6 +
 datafusion/physical-expr/src/aggregate/median.rs   | 140 +++++++++++++++++++++
 2 files changed, 146 insertions(+)

diff --git a/datafusion/core/tests/sqllogictests/test_files/decimal.slt b/datafusion/core/tests/sqllogictests/test_files/decimal.slt
index a6ec1edfd0..fd4e80e1af 100644
--- a/datafusion/core/tests/sqllogictests/test_files/decimal.slt
+++ b/datafusion/core/tests/sqllogictests/test_files/decimal.slt
@@ -124,6 +124,12 @@ select arrow_typeof(avg(c1)), avg(c1) from decimal_simple;
 Decimal128(14, 10) 0.0000366666
 
 
+query TR
+select arrow_typeof(median(c1)), median(c1) from decimal_simple;
+----
+Decimal128(10, 6) 0.00004
+
+
 query RRIBR rowsort
 select * from decimal_simple where c1=CAST(0.00002 as Decimal(10,8));
 ----
diff --git a/datafusion/physical-expr/src/aggregate/median.rs b/datafusion/physical-expr/src/aggregate/median.rs
index 62114d624c..9c2d85bba0 100644
--- a/datafusion/physical-expr/src/aggregate/median.rs
+++ b/datafusion/physical-expr/src/aggregate/median.rs
@@ -196,6 +196,9 @@ impl Accumulator for MedianAccumulator {
                 ScalarValue::UInt64(Some(v)) => ScalarValue::UInt64(Some(v / 2)),
                 ScalarValue::Float32(Some(v)) => ScalarValue::Float32(Some(v / 2.0)),
                 ScalarValue::Float64(Some(v)) => ScalarValue::Float64(Some(v / 2.0)),
+                ScalarValue::Decimal128(Some(v), p, s) => {
+                    ScalarValue::Decimal128(Some(v / 2), p, s)
+                }
                 v => {
                     return Err(DataFusionError::Internal(format!(
                         "Unsupported type in MedianAccumulator: {v:?}"
@@ -230,3 +233,140 @@ fn scalar_at_index(
         .expect("Convert uint32 to usize");
     ScalarValue::try_from_array(array, array_index)
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::expressions::col;
+    use crate::expressions::tests::aggregate;
+    use crate::generic_test_op;
+    use arrow::record_batch::RecordBatch;
+    use arrow::{array::*, datatypes::*};
+    use datafusion_common::Result;
+
+    #[test]
+    fn median_decimal() -> Result<()> {
+        // test median
+        let array: ArrayRef = Arc::new(
+            (1..7)
+                .map(Some)
+                .collect::<Decimal128Array>()
+                .with_precision_and_scale(10, 4)?,
+        );
+
+        generic_test_op!(
+            array,
+            DataType::Decimal128(10, 4),
+            Median,
+            ScalarValue::Decimal128(Some(3), 10, 4)
+        )
+    }
+
+    #[test]
+    fn median_decimal_with_nulls() -> Result<()> {
+        let array: ArrayRef = Arc::new(
+            (1..6)
+                .map(|i| if i == 2 { None } else { Some(i) })
+                .collect::<Decimal128Array>()
+                .with_precision_and_scale(10, 4)?,
+        );
+        generic_test_op!(
+            array,
+            DataType::Decimal128(10, 4),
+            Median,
+            ScalarValue::Decimal128(Some(3), 10, 4)
+        )
+    }
+
+    #[test]
+    fn median_decimal_all_nulls() -> Result<()> {
+        // test median
+        let array: ArrayRef = Arc::new(
+            std::iter::repeat::<Option<i128>>(None)
+                .take(6)
+                .collect::<Decimal128Array>()
+                .with_precision_and_scale(10, 4)?,
+        );
+        generic_test_op!(
+            array,
+            DataType::Decimal128(10, 4),
+            Median,
+            ScalarValue::Decimal128(None, 10, 4)
+        )
+    }
+
+    #[test]
+    fn median_i32_odd() -> Result<()> {
+        let a: ArrayRef = Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5]));
+        generic_test_op!(a, DataType::Int32, Median, ScalarValue::from(3_i32))
+    }
+
+    #[test]
+    fn median_i32_even() -> Result<()> {
+        let a: ArrayRef = Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5, 6]));
+        generic_test_op!(a, DataType::Int32, Median, ScalarValue::from(3_i32))
+    }
+
+    #[test]
+    fn median_i32_with_nulls() -> Result<()> {
+        let a: ArrayRef = Arc::new(Int32Array::from(vec![
+            Some(1),
+            None,
+            Some(3),
+            Some(4),
+            Some(5),
+        ]));
+        generic_test_op!(a, DataType::Int32, Median, ScalarValue::from(3i32))
+    }
+
+    #[test]
+    fn median_i32_all_nulls() -> Result<()> {
+        let a: ArrayRef = Arc::new(Int32Array::from(vec![None, None]));
+        generic_test_op!(a, DataType::Int32, Median, ScalarValue::Int32(None))
+    }
+
+    #[test]
+    fn median_u32_odd() -> Result<()> {
+        let a: ArrayRef =
+            Arc::new(UInt32Array::from(vec![1_u32, 2_u32, 3_u32, 4_u32, 5_u32]));
+        generic_test_op!(a, DataType::UInt32, Median, ScalarValue::from(3u32))
+    }
+
+    #[test]
+    fn median_u32_even() -> Result<()> {
+        let a: ArrayRef = Arc::new(UInt32Array::from(vec![
+            1_u32, 2_u32, 3_u32, 4_u32, 5_u32, 6_u32,
+        ]));
+        generic_test_op!(a, DataType::UInt32, Median, ScalarValue::from(3u32))
+    }
+
+    #[test]
+    fn median_f32_odd() -> Result<()> {
+        let a: ArrayRef =
+            Arc::new(Float32Array::from(vec![1_f32, 2_f32, 3_f32, 4_f32, 5_f32]));
+        generic_test_op!(a, DataType::Float32, Median, ScalarValue::from(3_f32))
+    }
+
+    #[test]
+    fn median_f32_even() -> Result<()> {
+        let a: ArrayRef = Arc::new(Float32Array::from(vec![
+            1_f32, 2_f32, 3_f32, 4_f32, 5_f32, 6_f32,
+        ]));
+        generic_test_op!(a, DataType::Float32, Median, ScalarValue::from(3.5_f32))
+    }
+
+    #[test]
+    fn median_f64_odd() -> Result<()> {
+        let a: ArrayRef =
+            Arc::new(Float64Array::from(vec![1_f64, 2_f64, 3_f64, 4_f64, 5_f64]));
+        generic_test_op!(a, DataType::Float64, Median, ScalarValue::from(3_f64))
+    }
+
+    #[test]
+    fn median_f64_even() -> Result<()> {
+        let a: ArrayRef = Arc::new(Float64Array::from(vec![
+            1_f64, 2_f64, 3_f64, 4_f64, 5_f64, 6_f64,
+        ]));
+        generic_test_op!(a, DataType::Float64, Median, ScalarValue::from(3.5_f64))
+    }
+}