You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2020/09/26 08:27:25 UTC

[GitHub] [arrow] jhorstmann commented on a change in pull request #8280: ARROW-10103: [Rust] Add contains kernel

jhorstmann commented on a change in pull request #8280:
URL: https://github.com/apache/arrow/pull/8280#discussion_r495431850



##########
File path: rust/arrow/src/compute/kernels/comparison.rs
##########
@@ -555,11 +555,159 @@ where
     compare_op_scalar!(left, right, |a, b| a >= b)
 }
 
+/// Checks if a `GenericListArray` contains a value in the `PrimitiveArray`
+pub fn contains<T, OffsetSize>(
+    left: &PrimitiveArray<T>,
+    right: &GenericListArray<OffsetSize>,
+) -> Result<BooleanArray>
+where
+    T: ArrowNumericType,
+    OffsetSize: OffsetSizeTrait,
+{
+    if left.len() != right.len() {
+        return Err(ArrowError::ComputeError(
+            "Cannot perform comparison operation on arrays of different length"
+                .to_string(),
+        ));
+    }
+
+    let not_both_null_bit_buffer =
+        match compare_option_bitmap(left.data_ref(), right.data_ref(), left.len())? {
+            Some(buff) => buff,
+            None => new_all_set_buffer(left.len()),
+        };
+    let not_both_null_bitmap = not_both_null_bit_buffer.data();
+
+    let left_data = left.data();
+    let left_null_bitmap = match left_data.null_bitmap() {
+        Some(bitmap) => bitmap.clone().into_buffer(),
+        _ => new_all_set_buffer(left.len()),
+    };
+    let left_null_bitmap = left_null_bitmap.data();
+
+    let mut result = BooleanBufferBuilder::new(left.len());
+
+    for i in 0..left.len() {
+        let mut is_in = false;
+
+        // contains(null, null) = false
+        if bit_util::get_bit(not_both_null_bitmap, i) {
+            let list = right.value(i);
+
+            // contains(null, [null]) = true
+            if !bit_util::get_bit(left_null_bitmap, i) {
+                if list.null_count() > 0 {
+                    is_in = true;
+                }
+            } else {
+                let list = list.as_any().downcast_ref::<PrimitiveArray<T>>().unwrap();
+
+                for j in 0..list.len() {
+                    if list.is_valid(j) && (left.value(i) == list.value(j)) {
+                        is_in = true;
+                    }
+                }
+            }
+        }
+        result.append(is_in)?;
+    }
+
+    let data = ArrayData::new(
+        DataType::Boolean,
+        left.len(),
+        None,
+        None,
+        left.offset(),

Review comment:
       Offset should be 0 since the result is a new buffer that only contains the relevant data




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org