You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by tu...@apache.org on 2022/11/07 19:33:34 UTC

[arrow-datafusion] branch master updated: Minor: Use upstream BooleanArray::true_count (#4129)

This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/master by this push:
     new 3892a1fd6 Minor: Use upstream BooleanArray::true_count (#4129)
3892a1fd6 is described below

commit 3892a1fd6862e18cb2c5eafc0aa8ef302f073d8a
Author: Andrew Lamb <an...@nerdnetworks.org>
AuthorDate: Mon Nov 7 14:33:28 2022 -0500

    Minor: Use upstream BooleanArray::true_count (#4129)
---
 .../file_format/parquet/row_filter.rs              | 23 +---------------------
 1 file changed, 1 insertion(+), 22 deletions(-)

diff --git a/datafusion/core/src/physical_plan/file_format/parquet/row_filter.rs b/datafusion/core/src/physical_plan/file_format/parquet/row_filter.rs
index 876891813..1b2ec40ce 100644
--- a/datafusion/core/src/physical_plan/file_format/parquet/row_filter.rs
+++ b/datafusion/core/src/physical_plan/file_format/parquet/row_filter.rs
@@ -133,7 +133,7 @@ impl ArrowPredicate for DatafusionArrowPredicate {
             Ok(array) => {
                 if let Some(mask) = array.as_any().downcast_ref::<BooleanArray>() {
                     let bool_arr = BooleanArray::from(mask.data().clone());
-                    let num_filtered = bool_arr.len() - true_count(&bool_arr);
+                    let num_filtered = bool_arr.len() - bool_arr.true_count();
                     self.rows_filtered.add(num_filtered);
                     timer.stop();
                     Ok(bool_arr)
@@ -151,27 +151,6 @@ impl ArrowPredicate for DatafusionArrowPredicate {
     }
 }
 
-/// Return the number of non null true vaulues in an array
-// TODO remove when https://github.com/apache/arrow-rs/issues/2963 is released
-fn true_count(arr: &BooleanArray) -> usize {
-    match arr.data().null_buffer() {
-        Some(nulls) => {
-            let null_chunks = nulls.bit_chunks(arr.offset(), arr.len());
-            let value_chunks = arr.values().bit_chunks(arr.offset(), arr.len());
-            null_chunks
-                .iter()
-                .zip(value_chunks.iter())
-                .chain(std::iter::once((
-                    null_chunks.remainder_bits(),
-                    value_chunks.remainder_bits(),
-                )))
-                .map(|(a, b)| (a & b).count_ones() as usize)
-                .sum()
-        }
-        None => arr.values().count_set_bits_offset(arr.offset(), arr.len()),
-    }
-}
-
 /// A candidate expression for creating a `RowFilter` contains the
 /// expression as well as data to estimate the cost of evaluating
 /// the resulting expression.