You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by al...@apache.org on 2022/11/19 10:21:31 UTC
[arrow-datafusion] branch master updated: Fix the percentile argument for ApproxPercentileCont must be Float64, not Decimal128(2, 1) (#4228)
This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/master by this push:
new 446c2eafd Fix the percentile argument for ApproxPercentileCont must be Float64, not Decimal128(2, 1) (#4228)
446c2eafd is described below
commit 446c2eafdbff3340e12216ff5daa0f25ee4e2088
Author: comphead <co...@users.noreply.github.com>
AuthorDate: Sat Nov 19 02:21:26 2022 -0800
Fix the percentile argument for ApproxPercentileCont must be Float64, not Decimal128(2, 1) (#4228)
* Percentile coerce to f64 from decimal
* Added test
* fix message
---
datafusion/core/tests/sql/aggregates.rs | 21 +++++++++++++++++++++
datafusion/expr/src/type_coercion/aggregates.rs | 19 ++++++++++++-------
datafusion/physical-expr/src/aggregate/tdigest.rs | 1 -
3 files changed, 33 insertions(+), 8 deletions(-)
diff --git a/datafusion/core/tests/sql/aggregates.rs b/datafusion/core/tests/sql/aggregates.rs
index 91f75c0eb..88ae4d00d 100644
--- a/datafusion/core/tests/sql/aggregates.rs
+++ b/datafusion/core/tests/sql/aggregates.rs
@@ -2373,3 +2373,24 @@ async fn array_agg_one() -> Result<()> {
assert_batches_eq!(expected, &actual);
Ok(())
}
+
+#[tokio::test]
+async fn test_approx_percentile_cont_decimal_support() -> Result<()> {
+ let ctx = SessionContext::new();
+ register_aggregate_csv(&ctx).await?;
+ let sql = "SELECT c1, approx_percentile_cont(c2, cast(0.85 as decimal(10,2))) apc FROM aggregate_test_100 GROUP BY 1 ORDER BY 1";
+ let actual = execute_to_batches(&ctx, sql).await;
+ let expected = vec![
+ "+----+-----+",
+ "| c1 | apc |",
+ "+----+-----+",
+ "| a | 4 |",
+ "| b | 5 |",
+ "| c | 4 |",
+ "| d | 4 |",
+ "| e | 4 |",
+ "+----+-----+",
+ ];
+ assert_batches_eq!(expected, &actual);
+ Ok(())
+}
diff --git a/datafusion/expr/src/type_coercion/aggregates.rs b/datafusion/expr/src/type_coercion/aggregates.rs
index 72d6dc398..14f691d97 100644
--- a/datafusion/expr/src/type_coercion/aggregates.rs
+++ b/datafusion/expr/src/type_coercion/aggregates.rs
@@ -23,6 +23,8 @@ use std::ops::Deref;
use crate::{AggregateFunction, Signature, TypeSignature};
+use super::functions::can_coerce_from;
+
pub static STRINGS: &[DataType] = &[DataType::Utf8, DataType::LargeUtf8];
pub static NUMERICS: &[DataType] = &[
@@ -166,19 +168,22 @@ pub fn coerce_types(
agg_fun, input_types[0]
)));
}
- if !matches!(input_types[1], DataType::Float64) {
- return Err(DataFusionError::Plan(format!(
- "The percentile argument for {:?} must be Float64, not {:?}.",
- agg_fun, input_types[1]
- )));
- }
if input_types.len() == 3 && !is_integer_arg_type(&input_types[2]) {
return Err(DataFusionError::Plan(format!(
"The percentile sample points count for {:?} must be integer, not {:?}.",
agg_fun, input_types[2]
)));
}
- Ok(input_types.to_vec())
+ let mut result = input_types.to_vec();
+ if can_coerce_from(&DataType::Float64, &input_types[1]) {
+ result[1] = DataType::Float64;
+ } else {
+ return Err(DataFusionError::Plan(format!(
+ "Could not coerce the percent argument for {:?} to Float64. Was {:?}.",
+ agg_fun, input_types[1]
+ )));
+ }
+ Ok(result)
}
AggregateFunction::ApproxPercentileContWithWeight => {
if !is_approx_percentile_cont_supported_arg_type(&input_types[0]) {
diff --git a/datafusion/physical-expr/src/aggregate/tdigest.rs b/datafusion/physical-expr/src/aggregate/tdigest.rs
index dc9b5bec6..977678e5b 100644
--- a/datafusion/physical-expr/src/aggregate/tdigest.rs
+++ b/datafusion/physical-expr/src/aggregate/tdigest.rs
@@ -231,7 +231,6 @@ impl TDigest {
}
pub(crate) fn merge_sorted_f64(&self, sorted_values: &[f64]) -> TDigest {
- dbg!(&sorted_values);
#[cfg(debug_assertions)]
debug_assert!(is_sorted(sorted_values), "unsorted input to TDigest");