You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by al...@apache.org on 2023/06/11 17:57:02 UTC
[arrow-datafusion] branch main updated: fix: median with even number of decimal128 not working (#6634)
This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 945191fa6c fix: median with even number of decimal128 not working (#6634)
945191fa6c is described below
commit 945191fa6cd573050717b54e52e65d008e183b75
Author: Igor Izvekov <iz...@gmail.com>
AuthorDate: Sun Jun 11 20:56:56 2023 +0300
fix: median with even number of decimal128 not working (#6634)
---
.../tests/sqllogictests/test_files/decimal.slt | 6 +
datafusion/physical-expr/src/aggregate/median.rs | 140 +++++++++++++++++++++
2 files changed, 146 insertions(+)
diff --git a/datafusion/core/tests/sqllogictests/test_files/decimal.slt b/datafusion/core/tests/sqllogictests/test_files/decimal.slt
index a6ec1edfd0..fd4e80e1af 100644
--- a/datafusion/core/tests/sqllogictests/test_files/decimal.slt
+++ b/datafusion/core/tests/sqllogictests/test_files/decimal.slt
@@ -124,6 +124,12 @@ select arrow_typeof(avg(c1)), avg(c1) from decimal_simple;
Decimal128(14, 10) 0.0000366666
+query TR
+select arrow_typeof(median(c1)), median(c1) from decimal_simple;
+----
+Decimal128(10, 6) 0.00004
+
+
query RRIBR rowsort
select * from decimal_simple where c1=CAST(0.00002 as Decimal(10,8));
----
diff --git a/datafusion/physical-expr/src/aggregate/median.rs b/datafusion/physical-expr/src/aggregate/median.rs
index 62114d624c..9c2d85bba0 100644
--- a/datafusion/physical-expr/src/aggregate/median.rs
+++ b/datafusion/physical-expr/src/aggregate/median.rs
@@ -196,6 +196,9 @@ impl Accumulator for MedianAccumulator {
ScalarValue::UInt64(Some(v)) => ScalarValue::UInt64(Some(v / 2)),
ScalarValue::Float32(Some(v)) => ScalarValue::Float32(Some(v / 2.0)),
ScalarValue::Float64(Some(v)) => ScalarValue::Float64(Some(v / 2.0)),
+ ScalarValue::Decimal128(Some(v), p, s) => {
+ ScalarValue::Decimal128(Some(v / 2), p, s)
+ }
v => {
return Err(DataFusionError::Internal(format!(
"Unsupported type in MedianAccumulator: {v:?}"
@@ -230,3 +233,140 @@ fn scalar_at_index(
.expect("Convert uint32 to usize");
ScalarValue::try_from_array(array, array_index)
}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use crate::expressions::col;
+ use crate::expressions::tests::aggregate;
+ use crate::generic_test_op;
+ use arrow::record_batch::RecordBatch;
+ use arrow::{array::*, datatypes::*};
+ use datafusion_common::Result;
+
+ #[test]
+ fn median_decimal() -> Result<()> {
+ // test median
+ let array: ArrayRef = Arc::new(
+ (1..7)
+ .map(Some)
+ .collect::<Decimal128Array>()
+ .with_precision_and_scale(10, 4)?,
+ );
+
+ generic_test_op!(
+ array,
+ DataType::Decimal128(10, 4),
+ Median,
+ ScalarValue::Decimal128(Some(3), 10, 4)
+ )
+ }
+
+ #[test]
+ fn median_decimal_with_nulls() -> Result<()> {
+ let array: ArrayRef = Arc::new(
+ (1..6)
+ .map(|i| if i == 2 { None } else { Some(i) })
+ .collect::<Decimal128Array>()
+ .with_precision_and_scale(10, 4)?,
+ );
+ generic_test_op!(
+ array,
+ DataType::Decimal128(10, 4),
+ Median,
+ ScalarValue::Decimal128(Some(3), 10, 4)
+ )
+ }
+
+ #[test]
+ fn median_decimal_all_nulls() -> Result<()> {
+ // test median
+ let array: ArrayRef = Arc::new(
+ std::iter::repeat::<Option<i128>>(None)
+ .take(6)
+ .collect::<Decimal128Array>()
+ .with_precision_and_scale(10, 4)?,
+ );
+ generic_test_op!(
+ array,
+ DataType::Decimal128(10, 4),
+ Median,
+ ScalarValue::Decimal128(None, 10, 4)
+ )
+ }
+
+ #[test]
+ fn median_i32_odd() -> Result<()> {
+ let a: ArrayRef = Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5]));
+ generic_test_op!(a, DataType::Int32, Median, ScalarValue::from(3_i32))
+ }
+
+ #[test]
+ fn median_i32_even() -> Result<()> {
+ let a: ArrayRef = Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5, 6]));
+ generic_test_op!(a, DataType::Int32, Median, ScalarValue::from(3_i32))
+ }
+
+ #[test]
+ fn median_i32_with_nulls() -> Result<()> {
+ let a: ArrayRef = Arc::new(Int32Array::from(vec![
+ Some(1),
+ None,
+ Some(3),
+ Some(4),
+ Some(5),
+ ]));
+ generic_test_op!(a, DataType::Int32, Median, ScalarValue::from(3i32))
+ }
+
+ #[test]
+ fn median_i32_all_nulls() -> Result<()> {
+ let a: ArrayRef = Arc::new(Int32Array::from(vec![None, None]));
+ generic_test_op!(a, DataType::Int32, Median, ScalarValue::Int32(None))
+ }
+
+ #[test]
+ fn median_u32_odd() -> Result<()> {
+ let a: ArrayRef =
+ Arc::new(UInt32Array::from(vec![1_u32, 2_u32, 3_u32, 4_u32, 5_u32]));
+ generic_test_op!(a, DataType::UInt32, Median, ScalarValue::from(3u32))
+ }
+
+ #[test]
+ fn median_u32_even() -> Result<()> {
+ let a: ArrayRef = Arc::new(UInt32Array::from(vec![
+ 1_u32, 2_u32, 3_u32, 4_u32, 5_u32, 6_u32,
+ ]));
+ generic_test_op!(a, DataType::UInt32, Median, ScalarValue::from(3u32))
+ }
+
+ #[test]
+ fn median_f32_odd() -> Result<()> {
+ let a: ArrayRef =
+ Arc::new(Float32Array::from(vec![1_f32, 2_f32, 3_f32, 4_f32, 5_f32]));
+ generic_test_op!(a, DataType::Float32, Median, ScalarValue::from(3_f32))
+ }
+
+ #[test]
+ fn median_f32_even() -> Result<()> {
+ let a: ArrayRef = Arc::new(Float32Array::from(vec![
+ 1_f32, 2_f32, 3_f32, 4_f32, 5_f32, 6_f32,
+ ]));
+ generic_test_op!(a, DataType::Float32, Median, ScalarValue::from(3.5_f32))
+ }
+
+ #[test]
+ fn median_f64_odd() -> Result<()> {
+ let a: ArrayRef =
+ Arc::new(Float64Array::from(vec![1_f64, 2_f64, 3_f64, 4_f64, 5_f64]));
+ generic_test_op!(a, DataType::Float64, Median, ScalarValue::from(3_f64))
+ }
+
+ #[test]
+ fn median_f64_even() -> Result<()> {
+ let a: ArrayRef = Arc::new(Float64Array::from(vec![
+ 1_f64, 2_f64, 3_f64, 4_f64, 5_f64, 6_f64,
+ ]));
+ generic_test_op!(a, DataType::Float64, Median, ScalarValue::from(3.5_f64))
+ }
+}