You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2020/12/18 21:29:38 UTC

[GitHub] [arrow] yordan-pavlov commented on a change in pull request #8960: ARROW-10540: [Rust] Extended filter kernel to all types and improved performance

yordan-pavlov commented on a change in pull request #8960:
URL: https://github.com/apache/arrow/pull/8960#discussion_r546099922



##########
File path: rust/arrow/src/compute/kernels/filter.rs
##########
@@ -17,841 +17,241 @@
 
 //! Defines miscellaneous array kernels.
 
-use crate::array::*;
-use crate::datatypes::*;
-use crate::error::{ArrowError, Result};
+use crate::error::Result;
 use crate::record_batch::RecordBatch;
-use crate::{
-    bitmap::Bitmap,
-    buffer::{Buffer, MutableBuffer},
-    util::bit_util,
-};
-use std::{mem, sync::Arc};
-
-/// trait for copying filtered null bitmap bits
-trait CopyNullBit {
-    fn copy_null_bit(&mut self, source_index: usize);
-    fn copy_null_bits(&mut self, source_index: usize, count: usize);
-    fn null_count(&self) -> usize;
-    fn null_buffer(&mut self) -> Buffer;
-}
-
-/// no-op null bitmap copy implementation,
-/// used when the filtered data array doesn't have a null bitmap
-struct NullBitNoop {}
-
-impl NullBitNoop {
-    fn new() -> Self {
-        NullBitNoop {}
-    }
-}
-
-impl CopyNullBit for NullBitNoop {
-    #[inline]
-    fn copy_null_bit(&mut self, _source_index: usize) {
-        // do nothing
-    }
-
-    #[inline]
-    fn copy_null_bits(&mut self, _source_index: usize, _count: usize) {
-        // do nothing
-    }
-
-    fn null_count(&self) -> usize {
-        0
-    }
-
-    fn null_buffer(&mut self) -> Buffer {
-        Buffer::from([0u8; 0])
-    }
+use crate::{array::*, util::bit_chunk_iterator::BitChunkIterator};
+use std::{iter::Enumerate, sync::Arc};
+
+/// Function that can filter arbitrary arrays
+pub type Filter<'a> = Box<Fn(&ArrayData) -> ArrayData + 'a>;
+
+/// Internal state of [SlicesIterator]
+#[derive(Debug, PartialEq)]
+enum State {
+    // it is iterating over bits of a mask (`u64`, steps of size of 1 slot)
+    Bits(u64),
+    // it is iterating over chunks (steps of size of 64 slots)
+    Chunks,
+    // it is iterating over the remainding bits (steps of size of 1 slot)
+    Remainder,
+    // nothing more to iterate.
+    Finish,
 }
 
-/// null bitmap copy implementation,
-/// used when the filtered data array has a null bitmap
-struct NullBitSetter<'a> {
-    target_buffer: MutableBuffer,
-    source_bytes: &'a [u8],
-    target_index: usize,
-    null_count: usize,
+/// An iterator of `(usize, usize)` each representing an interval `[start,end[` whose
+/// slots of a [BooleanArray] are true. Each interval corresponds to a contiguous region of memory to be
+/// "taken" from an array to be filtered.
+#[derive(Debug)]
+struct SlicesIterator<'a> {
+    iter: Enumerate<BitChunkIterator<'a>>,
+    state: State,
+    filter_count: usize,
+    remainder_mask: u64,
+    remainder_len: usize,
+    chunk_len: usize,
+    len: usize,
+    start: usize,
+    on_region: bool,
+    current_chunk: usize,
+    current_bit: usize,
 }
 
-impl<'a> NullBitSetter<'a> {
-    fn new(null_bitmap: &'a Bitmap) -> Self {
-        let null_bytes = null_bitmap.buffer_ref().data();
-        // create null bitmap buffer with same length and initialize null bitmap buffer to 1s
-        let null_buffer =
-            MutableBuffer::new(null_bytes.len()).with_bitset(null_bytes.len(), true);
-        NullBitSetter {
-            source_bytes: null_bytes,
-            target_buffer: null_buffer,
-            target_index: 0,
-            null_count: 0,
+impl<'a> SlicesIterator<'a> {
+    fn new(filter: &'a BooleanArray) -> Self {
+        let values = &filter.data_ref().buffers()[0];
+
+        // this operation is performed before iteration
+        // because it is fast and allows reserving all the needed memory
+        let filter_count = values.count_set_bits_offset(filter.offset(), filter.len());
+
+        let chunks = values.bit_chunks(filter.offset(), filter.len());
+
+        Self {
+            iter: chunks.iter().enumerate(),
+            state: State::Chunks,
+            filter_count,
+            remainder_len: chunks.remainder_len(),
+            chunk_len: chunks.chunk_len(),
+            remainder_mask: chunks.remainder_bits(),
+            len: 0,
+            start: 0,
+            on_region: false,
+            current_chunk: 0,
+            current_bit: 0,
         }
     }
-}
 
-impl<'a> CopyNullBit for NullBitSetter<'a> {
     #[inline]
-    fn copy_null_bit(&mut self, source_index: usize) {
-        if !bit_util::get_bit(self.source_bytes, source_index) {
-            bit_util::unset_bit(self.target_buffer.data_mut(), self.target_index);
-            self.null_count += 1;
-        }
-        self.target_index += 1;
+    fn current_start(&self) -> usize {
+        self.current_chunk * 64 + self.current_bit
     }
 
     #[inline]
-    fn copy_null_bits(&mut self, source_index: usize, count: usize) {
-        for i in 0..count {
-            self.copy_null_bit(source_index + i);
+    fn iterate_bits(&mut self, mask: u64, max: usize) -> Option<(usize, usize)> {
+        while self.current_bit < max {
+            if (mask & (1 << self.current_bit)) != 0 {
+                if !self.on_region {
+                    self.start = self.current_start();
+                    self.on_region = true;
+                }
+                self.len += 1;
+            } else if self.on_region {
+                let result = (self.start, self.start + self.len);
+                self.len = 0;
+                self.on_region = false;
+                self.current_bit += 1;
+                return Some(result);
+            }
+            self.current_bit += 1;
         }
+        self.current_bit = 0;
+        None
     }
 
-    fn null_count(&self) -> usize {
-        self.null_count
-    }
-
-    fn null_buffer(&mut self) -> Buffer {
-        self.target_buffer.resize(self.target_index);
-        // use mem::replace to detach self.target_buffer from self so that it can be returned
-        let target_buffer = mem::replace(&mut self.target_buffer, MutableBuffer::new(0));
-        target_buffer.freeze()
-    }
-}
-
-fn get_null_bit_setter<'a>(data_array: &'a impl Array) -> Box<CopyNullBit + 'a> {
-    if let Some(null_bitmap) = data_array.data_ref().null_bitmap() {
-        // only return an actual null bit copy implementation if null_bitmap is set
-        Box::new(NullBitSetter::new(null_bitmap))
-    } else {
-        // otherwise return a no-op copy null bit implementation
-        // for improved performance when the filtered array doesn't contain NULLs
-        Box::new(NullBitNoop::new())
-    }
-}
-
-// transmute filter array to u64
-// - optimize filtering with highly selective filters by skipping entire batches of 64 filter bits
-// - if the data array being filtered doesn't have a null bitmap, no time is wasted to copy a null bitmap
-fn filter_array_impl(
-    filter_context: &FilterContext,
-    data_array: &impl Array,
-    array_type: DataType,
-    value_size: usize,
-) -> Result<ArrayDataBuilder> {
-    if filter_context.filter_len > data_array.len() {
-        return Err(ArrowError::ComputeError(
-            "Filter array cannot be larger than data array".to_string(),
-        ));
-    }
-    let filtered_count = filter_context.filtered_count;
-    let filter_mask = &filter_context.filter_mask;
-    let filter_u64 = &filter_context.filter_u64;
-    let data_bytes = data_array.data_ref().buffers()[0].data();
-    let mut target_buffer = MutableBuffer::new(filtered_count * value_size);
-    target_buffer.resize(filtered_count * value_size);
-    let target_bytes = target_buffer.data_mut();
-    let mut target_byte_index: usize = 0;
-    let mut null_bit_setter = get_null_bit_setter(data_array);
-    let null_bit_setter = null_bit_setter.as_mut();
-    let all_ones_batch = !0u64;
-    let data_array_offset = data_array.offset();
-
-    for (i, filter_batch) in filter_u64.iter().enumerate() {
-        // foreach u64 batch
-        let filter_batch = *filter_batch;
-        if filter_batch == 0 {
-            // if batch == 0, all items are filtered out, so skip entire batch
-            continue;
-        } else if filter_batch == all_ones_batch {
-            // if batch == all 1s: copy all 64 values in one go
-            let data_index = (i * 64) + data_array_offset;
-            null_bit_setter.copy_null_bits(data_index, 64);
-            let data_byte_index = data_index * value_size;
-            let data_len = value_size * 64;
-            target_bytes[target_byte_index..(target_byte_index + data_len)]
-                .copy_from_slice(
-                    &data_bytes[data_byte_index..(data_byte_index + data_len)],
-                );
-            target_byte_index += data_len;
-            continue;
-        }
-        for (j, filter_mask) in filter_mask.iter().enumerate() {
-            // foreach bit in batch:
-            if (filter_batch & *filter_mask) != 0 {
-                let data_index = (i * 64) + j + data_array_offset;
-                null_bit_setter.copy_null_bit(data_index);
-                // if filter bit == 1: copy data value bytes
-                let data_byte_index = data_index * value_size;
-                target_bytes[target_byte_index..(target_byte_index + value_size)]
-                    .copy_from_slice(
-                        &data_bytes[data_byte_index..(data_byte_index + value_size)],
-                    );
-                target_byte_index += value_size;
+    /// iterates over chunks.
+    #[inline]
+    fn iterate_chunks(&mut self) -> Option<(usize, usize)> {
+        while let Some((i, mask)) = self.iter.next() {
+            self.current_chunk = i;
+            if mask == 0 {
+                if self.on_region {
+                    let result = (self.start, self.start + self.len);
+                    self.len = 0;
+                    self.on_region = false;
+                    return Some(result);
+                }
+            } else if mask == 18446744073709551615u64 {

Review comment:
       I get that it might be more performant (although I suspect the compiler optimizes this) to not calculate `!0u64` inside the loop , but isn't this going be more readable and obvious if `!0u64` is put in a suitably named constant instead of `18446744073709551615u64` ?




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org