You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2022/01/23 14:40:23 UTC

[GitHub] [arrow-rs] tustvold commented on a change in pull request #1228: Unaligned bit chunk

tustvold commented on a change in pull request #1228:
URL: https://github.com/apache/arrow-rs/pull/1228#discussion_r790285853



##########
File path: arrow/src/compute/kernels/filter.rs
##########
@@ -17,184 +17,117 @@
 
 //! Defines miscellaneous array kernels.
 
+use crate::array::*;
 use crate::buffer::buffer_bin_and;
 use crate::datatypes::DataType;
 use crate::error::Result;
 use crate::record_batch::RecordBatch;
-use crate::{array::*, util::bit_chunk_iterator::BitChunkIterator};
-use std::iter::Enumerate;
+use crate::util::bit_chunk_iterator::{UnalignedBitChunk, UnalignedBitChunkIterator};
 
 /// Function that can filter arbitrary arrays
 pub type Filter<'a> = Box<dyn Fn(&ArrayData) -> ArrayData + 'a>;
 
-/// Internal state of [SlicesIterator]
-#[derive(Debug, PartialEq)]
-enum State {
-    // it is iterating over bits of a mask (`u64`, steps of size of 1 slot)
-    Bits(u64),
-    // it is iterating over chunks (steps of size of 64 slots)
-    Chunks,
-    // it is iterating over the remaining bits (steps of size of 1 slot)
-    Remainder,
-    // nothing more to iterate.
-    Finish,
-}
-
 /// An iterator of `(usize, usize)` each representing an interval `[start,end[` whose
 /// slots of a [BooleanArray] are true. Each interval corresponds to a contiguous region of memory to be
 /// "taken" from an array to be filtered.
 #[derive(Debug)]
 pub struct SlicesIterator<'a> {
-    iter: Enumerate<BitChunkIterator<'a>>,
-    state: State,
-    filter: &'a BooleanArray,
-    remainder_mask: u64,
-    remainder_len: usize,
-    chunk_len: usize,
+    iter: UnalignedBitChunkIterator<'a>,
     len: usize,
-    start: usize,
-    on_region: bool,
-    current_chunk: usize,
-    current_bit: usize,
+    offset: usize,
+    current_chunk: u64,
 }
 
 impl<'a> SlicesIterator<'a> {
     pub fn new(filter: &'a BooleanArray) -> Self {
         let values = &filter.data_ref().buffers()[0];
-        let chunks = values.bit_chunks(filter.offset(), filter.len());
+        let len = filter.len();
+        let chunk = UnalignedBitChunk::new(values.as_slice(), filter.offset(), len);
+        let mut iter = chunk.iter();
+
+        let offset = chunk.lead_padding();
+        let current_chunk = iter.next().unwrap_or(0);
 
         Self {
-            iter: chunks.iter().enumerate(),
-            state: State::Chunks,
-            filter,
-            remainder_len: chunks.remainder_len(),
-            chunk_len: chunks.chunk_len(),
-            remainder_mask: chunks.remainder_bits(),
-            len: 0,
-            start: 0,
-            on_region: false,
-            current_chunk: 0,
-            current_bit: 0,
+            iter,
+            len,
+            offset,
+            current_chunk,
         }
     }
 
-    /// Counts the number of set bits in the filter array.
-    fn filter_count(&self) -> usize {
-        let values = self.filter.values();
-        values.count_set_bits_offset(self.filter.offset(), self.filter.len())
-    }
-
-    #[inline]
-    fn current_start(&self) -> usize {
-        self.current_chunk * 64 + self.current_bit
-    }
-
-    #[inline]
-    fn iterate_bits(&mut self, mask: u64, max: usize) -> Option<(usize, usize)> {
-        while self.current_bit < max {
-            if (mask & (1 << self.current_bit)) != 0 {
-                if !self.on_region {
-                    self.start = self.current_start();
-                    self.on_region = true;
-                }
-                self.len += 1;
-            } else if self.on_region {
-                let result = (self.start, self.start + self.len);
-                self.len = 0;
-                self.on_region = false;
-                self.current_bit += 1;
-                return Some(result);
+    fn advance_to_set_bit(&mut self) -> Option<(usize, u32)> {
+        loop {
+            if self.current_chunk != 0 {
+                // Find the index of the first 1
+                let bit_pos = self.current_chunk.trailing_zeros();

Review comment:
       Tbh this is likely the "trick" that is yielding most of the filter speedup. x86 has a specific instruction for this




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org