You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2021/05/26 06:21:22 UTC

[GitHub] [arrow-rs] jorgecarleitao commented on a change in pull request #341: Fix filter UB and add fast path

jorgecarleitao commented on a change in pull request #341:
URL: https://github.com/apache/arrow-rs/pull/341#discussion_r639430911



##########
File path: arrow/src/compute/kernels/filter.rs
##########
@@ -225,38 +248,49 @@ pub fn filter(array: &Array, filter: &BooleanArray) -> Result<ArrayRef> {
     if filter.null_count() > 0 {
         // this greatly simplifies subsequent filtering code
         // now we only have a boolean mask to deal with
-        let array_data = filter.data_ref();
-        let null_bitmap = array_data.null_buffer().unwrap();
-        let mask = filter.values();
-        let offset = filter.offset();
-
-        let new_mask = buffer_bin_and(mask, offset, null_bitmap, offset, filter.len());
-
-        let array_data = ArrayData::builder(DataType::Boolean)
-            .len(filter.len())
-            .add_buffer(new_mask)
-            .build();
-        let filter = BooleanArray::from(array_data);
+        let filter = prep_null_mask_filter(filter);
         // fully qualified syntax, because we have an argument with the same name
         return crate::compute::kernels::filter::filter(array, &filter);
     }
 
     let iter = SlicesIterator::new(filter);
-
-    let mut mutable =
-        MutableArrayData::new(vec![array.data_ref()], false, iter.filter_count);
-    iter.for_each(|(start, end)| mutable.extend(0, start, end));
-    let data = mutable.freeze();
-    Ok(make_array(data))
+    match iter.filter_count {
+        0 => {
+            // return empty
+            Ok(new_empty_array(array.data_type()))
+        }
+        len if len == array.len() => {
+            // return all
+            let data = array.data().clone();
+            Ok(make_array(data))
+        }
+        _ => {
+            // actually filter
+            let mut mutable =
+                MutableArrayData::new(vec![array.data_ref()], false, iter.filter_count);
+            iter.for_each(|(start, end)| mutable.extend(0, start, end));
+            let data = mutable.freeze();
+            Ok(make_array(data))
+        }
+    }
 }
 
 /// Returns a new [RecordBatch] with arrays containing only values matching the filter.
-/// WARNING: the nulls of `filter` are ignored and the value on its slot is considered.
-/// Therefore, it is considered undefined behavior to pass `filter` with null values.
 pub fn filter_record_batch(
     record_batch: &RecordBatch,
     filter: &BooleanArray,

Review comment:
       `predicate`? Removes the need for the full qualified name below (and changing an arguments' name is backward compatible in Rust




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org