You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by tu...@apache.org on 2022/11/07 19:33:34 UTC
[arrow-datafusion] branch master updated: Minor: Use upstream BooleanArray::true_count (#4129)
This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/master by this push:
new 3892a1fd6 Minor: Use upstream BooleanArray::true_count (#4129)
3892a1fd6 is described below
commit 3892a1fd6862e18cb2c5eafc0aa8ef302f073d8a
Author: Andrew Lamb <an...@nerdnetworks.org>
AuthorDate: Mon Nov 7 14:33:28 2022 -0500
Minor: Use upstream BooleanArray::true_count (#4129)
---
.../file_format/parquet/row_filter.rs | 23 +---------------------
1 file changed, 1 insertion(+), 22 deletions(-)
diff --git a/datafusion/core/src/physical_plan/file_format/parquet/row_filter.rs b/datafusion/core/src/physical_plan/file_format/parquet/row_filter.rs
index 876891813..1b2ec40ce 100644
--- a/datafusion/core/src/physical_plan/file_format/parquet/row_filter.rs
+++ b/datafusion/core/src/physical_plan/file_format/parquet/row_filter.rs
@@ -133,7 +133,7 @@ impl ArrowPredicate for DatafusionArrowPredicate {
Ok(array) => {
if let Some(mask) = array.as_any().downcast_ref::<BooleanArray>() {
let bool_arr = BooleanArray::from(mask.data().clone());
- let num_filtered = bool_arr.len() - true_count(&bool_arr);
+ let num_filtered = bool_arr.len() - bool_arr.true_count();
self.rows_filtered.add(num_filtered);
timer.stop();
Ok(bool_arr)
@@ -151,27 +151,6 @@ impl ArrowPredicate for DatafusionArrowPredicate {
}
}
-/// Return the number of non null true vaulues in an array
-// TODO remove when https://github.com/apache/arrow-rs/issues/2963 is released
-fn true_count(arr: &BooleanArray) -> usize {
- match arr.data().null_buffer() {
- Some(nulls) => {
- let null_chunks = nulls.bit_chunks(arr.offset(), arr.len());
- let value_chunks = arr.values().bit_chunks(arr.offset(), arr.len());
- null_chunks
- .iter()
- .zip(value_chunks.iter())
- .chain(std::iter::once((
- null_chunks.remainder_bits(),
- value_chunks.remainder_bits(),
- )))
- .map(|(a, b)| (a & b).count_ones() as usize)
- .sum()
- }
- None => arr.values().count_set_bits_offset(arr.offset(), arr.len()),
- }
-}
-
/// A candidate expression for creating a `RowFilter` contains the
/// expression as well as data to estimate the cost of evaluating
/// the resulting expression.