You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by xu...@apache.org on 2023/01/21 09:18:33 UTC
[arrow-datafusion] branch master updated: AVG(null) is NULL (not zero) (#5008)
This is an automated email from the ASF dual-hosted git repository.
xudong963 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/master by this push:
new f5439c806 AVG(null) is NULL (not zero) (#5008)
f5439c806 is described below
commit f5439c806912e69ad974706ab37456ac6bac21e4
Author: Andrew Lamb <an...@nerdnetworks.org>
AuthorDate: Sat Jan 21 10:18:27 2023 +0100
AVG(null) is NULL (not zero) (#5008)
* avg(null) should be null
* Fix code
---
.../tests/sqllogictests/test_files/aggregate.slt | 44 ++++++++++++++++++++++
datafusion/physical-expr/src/aggregate/average.rs | 2 +-
2 files changed, 45 insertions(+), 1 deletion(-)
diff --git a/datafusion/core/tests/sqllogictests/test_files/aggregate.slt b/datafusion/core/tests/sqllogictests/test_files/aggregate.slt
index 5ddec784e..5dccb2427 100644
--- a/datafusion/core/tests/sqllogictests/test_files/aggregate.slt
+++ b/datafusion/core/tests/sqllogictests/test_files/aggregate.slt
@@ -1183,3 +1183,47 @@ query RRRR
select var(sq.column1), var_pop(sq.column1), stddev(sq.column1), stddev_pop(sq.column1) from (values (1.0), (3.0)) as sq;
----
2 1 1.4142135623730951 1
+
+
+# sum / count for all nulls
+statement ok
+create table the_nulls as values (null::bigint, 1), (null::bigint, 1), (null::bigint, 2);
+
+# counts should be zeros (even for nulls)
+query II
+SELECT count(column1), column2 from the_nulls group by column2 order by column2;
+----
+0 1
+0 2
+
+# sums should be null
+query II
+SELECT sum(column1), column2 from the_nulls group by column2 order by column2;
+----
+NULL 1
+NULL 2
+
+# avg should be null
+query II
+SELECT avg(column1), column2 from the_nulls group by column2 order by column2;
+----
+NULL 1
+NULL 2
+
+# min should be null
+query II
+SELECT min(column1), column2 from the_nulls group by column2 order by column2;
+----
+NULL 1
+NULL 2
+
+# max should be null
+query II
+SELECT max(column1), column2 from the_nulls group by column2 order by column2;
+----
+NULL 1
+NULL 2
+
+
+statement ok
+drop table the_nulls;
diff --git a/datafusion/physical-expr/src/aggregate/average.rs b/datafusion/physical-expr/src/aggregate/average.rs
index 216bd56af..e0a43aaa3 100644
--- a/datafusion/physical-expr/src/aggregate/average.rs
+++ b/datafusion/physical-expr/src/aggregate/average.rs
@@ -261,7 +261,7 @@ impl RowAccumulator for AvgRowAccumulator {
assert_eq!(self.sum_datatype, DataType::Float64);
Ok(match accessor.get_u64_opt(self.state_index()) {
None => ScalarValue::Float64(None),
- Some(0) => ScalarValue::Float64(Some(0.0)),
+ Some(0) => ScalarValue::Float64(None),
Some(n) => ScalarValue::Float64(
accessor
.get_f64_opt(self.state_index() + 1)