You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by "alamb (via GitHub)" <gi...@apache.org> on 2023/07/13 16:06:27 UTC

[GitHub] [arrow-datafusion] alamb commented on a diff in pull request #6957: Fix incorrect results in `BitAnd` GroupsAccumulator

alamb commented on code in PR #6957:
URL: https://github.com/apache/arrow-datafusion/pull/6957#discussion_r1262772765


##########
datafusion/physical-expr/src/aggregate/bit_and_or_xor.rs:
##########
@@ -279,35 +280,31 @@ impl AggregateExpr for BitAnd {
         use std::ops::BitAndAssign;
         match self.data_type {
             DataType::Int8 => {
-                instantiate_primitive_accumulator!(self, Int8Type, |x, y| x
-                    .bitand_assign(y))
+                instantiate_accumulator!(self, -1, Int8Type, |x, y| x.bitand_assign(y))

Review Comment:
   the fix is passing in `MAX` / `-1` here to get a bitpattern of all `1` initially



##########
datafusion/core/tests/sqllogictests/test_files/aggregate.slt:
##########
@@ -1489,29 +1519,49 @@ create table bit_aggregate_functions (
   c1 SMALLINT NOT NULL,
   c2 SMALLINT NOT NULL,
   c3 SMALLINT,
+  tag varchar
 )
 as values
-  (5, 10, 11),
-  (33, 11, null),
-  (9, 12, null);
-
-# query_bit_and
-query III
-SELECT bit_and(c1), bit_and(c2), bit_and(c3) FROM bit_aggregate_functions
-----
-1 8 11
-
-# query_bit_or
-query III
-SELECT bit_or(c1), bit_or(c2), bit_or(c3) FROM bit_aggregate_functions
-----
-45 15 11
+  (5,  10, 11,   'A'),
+  (33, 11, null, 'B'),
+  (9,  12, null, 'A');
+
+# query_bit_and, query_bit_or, query_bit_xor
+query IIIIIIIII
+SELECT
+  bit_and(c1),
+  bit_and(c2),
+  bit_and(c3),
+  bit_or(c1),
+  bit_or(c2),
+  bit_or(c3),
+  bit_xor(c1),
+  bit_xor(c2),
+  bit_xor(c3)
+FROM bit_aggregate_functions
+----
+1 8 11 45 15 11 45 13 11
+
+# query_bit_and, query_bit_or, query_bit_xor, with group
+query IIIIIIIIIT
+SELECT
+  bit_and(c1),
+  bit_and(c2),
+  bit_and(c3),
+  bit_or(c1),
+  bit_or(c2),
+  bit_or(c3),
+  bit_xor(c1),
+  bit_xor(c2),
+  bit_xor(c3),
+  tag
+FROM bit_aggregate_functions
+GROUP BY tag
+ORDER BY tag
+----
+1 8 11 13 14 11 12 6 11 A

Review Comment:
   before this PR, this test fails like 
   
   ```
   External error: query result mismatch:
   [SQL] SELECT
     bit_and(c1),
     bit_and(c2),
     bit_and(c3),
     bit_or(c1),
     bit_or(c2),
     bit_or(c3),
     bit_xor(c1),
     bit_xor(c2),
     bit_xor(c3),
     tag
   FROM bit_aggregate_functions
   GROUP BY tag
   ORDER BY tag
   [Diff] (-expected|+actual)
   -   1 8 11 13 14 11 12 6 11 A
   -   33 11 NULL 33 11 NULL 33 11 NULL B
   +   0 0 0 13 14 11 12 6 11 A
   +   0 0 NULL 33 11 NULL 33 11 NULL B
   at tests/sqllogictests/test_files/aggregate.slt:1546
   ```



##########
datafusion/physical-expr/src/aggregate/groups_accumulator/prim_op.rs:
##########
@@ -85,7 +95,7 @@ where
         let values = values[0].as_primitive::<T>();
 
         // update values
-        self.values.resize(total_num_groups, T::default_value());

Review Comment:
   previously this always started at zero, now it starts at the specified value.
   
   I think I can now reduce the code in min/max accumulator as well



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org