You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by xu...@apache.org on 2023/01/21 09:18:33 UTC

[arrow-datafusion] branch master updated: AVG(null) is NULL (not zero) (#5008)

This is an automated email from the ASF dual-hosted git repository.

xudong963 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/master by this push:
     new f5439c806 AVG(null) is NULL (not zero) (#5008)
f5439c806 is described below

commit f5439c806912e69ad974706ab37456ac6bac21e4
Author: Andrew Lamb <an...@nerdnetworks.org>
AuthorDate: Sat Jan 21 10:18:27 2023 +0100

    AVG(null) is NULL (not zero) (#5008)
    
    * avg(null) should be null
    
    * Fix code
---
 .../tests/sqllogictests/test_files/aggregate.slt   | 44 ++++++++++++++++++++++
 datafusion/physical-expr/src/aggregate/average.rs  |  2 +-
 2 files changed, 45 insertions(+), 1 deletion(-)

diff --git a/datafusion/core/tests/sqllogictests/test_files/aggregate.slt b/datafusion/core/tests/sqllogictests/test_files/aggregate.slt
index 5ddec784e..5dccb2427 100644
--- a/datafusion/core/tests/sqllogictests/test_files/aggregate.slt
+++ b/datafusion/core/tests/sqllogictests/test_files/aggregate.slt
@@ -1183,3 +1183,47 @@ query RRRR
 select var(sq.column1), var_pop(sq.column1), stddev(sq.column1), stddev_pop(sq.column1) from (values (1.0), (3.0)) as sq;
 ----
 2 1 1.4142135623730951 1
+
+
+# sum / count for all nulls
+statement ok
+create table the_nulls as values (null::bigint, 1), (null::bigint, 1), (null::bigint, 2);
+
+# counts should be zeros (even for nulls)
+query II
+SELECT count(column1), column2 from the_nulls group by column2 order by column2;
+----
+0 1
+0 2
+
+# sums should be null
+query II
+SELECT sum(column1), column2 from the_nulls group by column2 order by column2;
+----
+NULL 1
+NULL 2
+
+# avg should be null
+query II
+SELECT avg(column1), column2 from the_nulls group by column2 order by column2;
+----
+NULL 1
+NULL 2
+
+# min should be null
+query II
+SELECT min(column1), column2 from the_nulls group by column2 order by column2;
+----
+NULL 1
+NULL 2
+
+# max should be null
+query II
+SELECT max(column1), column2 from the_nulls group by column2 order by column2;
+----
+NULL 1
+NULL 2
+
+
+statement ok
+drop table the_nulls;
diff --git a/datafusion/physical-expr/src/aggregate/average.rs b/datafusion/physical-expr/src/aggregate/average.rs
index 216bd56af..e0a43aaa3 100644
--- a/datafusion/physical-expr/src/aggregate/average.rs
+++ b/datafusion/physical-expr/src/aggregate/average.rs
@@ -261,7 +261,7 @@ impl RowAccumulator for AvgRowAccumulator {
         assert_eq!(self.sum_datatype, DataType::Float64);
         Ok(match accessor.get_u64_opt(self.state_index()) {
             None => ScalarValue::Float64(None),
-            Some(0) => ScalarValue::Float64(Some(0.0)),
+            Some(0) => ScalarValue::Float64(None),
             Some(n) => ScalarValue::Float64(
                 accessor
                     .get_f64_opt(self.state_index() + 1)