You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by al...@apache.org on 2022/11/19 10:21:31 UTC

[arrow-datafusion] branch master updated: Fix the percentile argument for ApproxPercentileCont must be Float64, not Decimal128(2, 1) (#4228)

This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/master by this push:
     new 446c2eafd Fix the percentile argument for ApproxPercentileCont must be Float64, not Decimal128(2, 1) (#4228)
446c2eafd is described below

commit 446c2eafdbff3340e12216ff5daa0f25ee4e2088
Author: comphead <co...@users.noreply.github.com>
AuthorDate: Sat Nov 19 02:21:26 2022 -0800

    Fix the percentile argument for ApproxPercentileCont must be Float64, not Decimal128(2, 1) (#4228)
    
    * Percentile coerce to f64 from decimal
    
    * Added test
    
    * fix message
---
 datafusion/core/tests/sql/aggregates.rs           | 21 +++++++++++++++++++++
 datafusion/expr/src/type_coercion/aggregates.rs   | 19 ++++++++++++-------
 datafusion/physical-expr/src/aggregate/tdigest.rs |  1 -
 3 files changed, 33 insertions(+), 8 deletions(-)

diff --git a/datafusion/core/tests/sql/aggregates.rs b/datafusion/core/tests/sql/aggregates.rs
index 91f75c0eb..88ae4d00d 100644
--- a/datafusion/core/tests/sql/aggregates.rs
+++ b/datafusion/core/tests/sql/aggregates.rs
@@ -2373,3 +2373,24 @@ async fn array_agg_one() -> Result<()> {
     assert_batches_eq!(expected, &actual);
     Ok(())
 }
+
+#[tokio::test]
+async fn test_approx_percentile_cont_decimal_support() -> Result<()> {
+    let ctx = SessionContext::new();
+    register_aggregate_csv(&ctx).await?;
+    let sql = "SELECT c1, approx_percentile_cont(c2, cast(0.85 as decimal(10,2))) apc FROM aggregate_test_100 GROUP BY 1 ORDER BY 1";
+    let actual = execute_to_batches(&ctx, sql).await;
+    let expected = vec![
+        "+----+-----+",
+        "| c1 | apc |",
+        "+----+-----+",
+        "| a  | 4   |",
+        "| b  | 5   |",
+        "| c  | 4   |",
+        "| d  | 4   |",
+        "| e  | 4   |",
+        "+----+-----+",
+    ];
+    assert_batches_eq!(expected, &actual);
+    Ok(())
+}
diff --git a/datafusion/expr/src/type_coercion/aggregates.rs b/datafusion/expr/src/type_coercion/aggregates.rs
index 72d6dc398..14f691d97 100644
--- a/datafusion/expr/src/type_coercion/aggregates.rs
+++ b/datafusion/expr/src/type_coercion/aggregates.rs
@@ -23,6 +23,8 @@ use std::ops::Deref;
 
 use crate::{AggregateFunction, Signature, TypeSignature};
 
+use super::functions::can_coerce_from;
+
 pub static STRINGS: &[DataType] = &[DataType::Utf8, DataType::LargeUtf8];
 
 pub static NUMERICS: &[DataType] = &[
@@ -166,19 +168,22 @@ pub fn coerce_types(
                     agg_fun, input_types[0]
                 )));
             }
-            if !matches!(input_types[1], DataType::Float64) {
-                return Err(DataFusionError::Plan(format!(
-                    "The percentile argument for {:?} must be Float64, not {:?}.",
-                    agg_fun, input_types[1]
-                )));
-            }
             if input_types.len() == 3 && !is_integer_arg_type(&input_types[2]) {
                 return Err(DataFusionError::Plan(format!(
                         "The percentile sample points count for {:?} must be integer, not {:?}.",
                         agg_fun, input_types[2]
                     )));
             }
-            Ok(input_types.to_vec())
+            let mut result = input_types.to_vec();
+            if can_coerce_from(&DataType::Float64, &input_types[1]) {
+                result[1] = DataType::Float64;
+            } else {
+                return Err(DataFusionError::Plan(format!(
+                    "Could not coerce the percent argument for {:?} to Float64. Was  {:?}.",
+                    agg_fun, input_types[1]
+                )));
+            }
+            Ok(result)
         }
         AggregateFunction::ApproxPercentileContWithWeight => {
             if !is_approx_percentile_cont_supported_arg_type(&input_types[0]) {
diff --git a/datafusion/physical-expr/src/aggregate/tdigest.rs b/datafusion/physical-expr/src/aggregate/tdigest.rs
index dc9b5bec6..977678e5b 100644
--- a/datafusion/physical-expr/src/aggregate/tdigest.rs
+++ b/datafusion/physical-expr/src/aggregate/tdigest.rs
@@ -231,7 +231,6 @@ impl TDigest {
     }
 
     pub(crate) fn merge_sorted_f64(&self, sorted_values: &[f64]) -> TDigest {
-        dbg!(&sorted_values);
         #[cfg(debug_assertions)]
         debug_assert!(is_sorted(sorted_values), "unsorted input to TDigest");