You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2020/09/26 01:57:41 UTC

[GitHub] [arrow] nevi-me commented on a change in pull request #8280: ARROW-10103: [Rust] Add contains kernel

nevi-me commented on a change in pull request #8280:
URL: https://github.com/apache/arrow/pull/8280#discussion_r495375649



##########
File path: rust/arrow/src/compute/kernels/comparison.rs
##########
@@ -555,11 +557,159 @@ where
     compare_op_scalar!(left, right, |a, b| a >= b)
 }
 
+/// Checks if a `GenericListArray` contains a value in the `PrimitiveArray`
+pub fn contains<T, OffsetSize>(
+    left: &PrimitiveArray<T>,
+    right: &GenericListArray<OffsetSize>,
+) -> Result<BooleanArray>
+where
+    T: ArrowNumericType,
+    OffsetSize: OffsetSizeTrait,
+{
+    if left.len() != right.len() {
+        return Err(ArrowError::ComputeError(
+            "Cannot perform comparison operation on arrays of different length"
+                .to_string(),
+        ));
+    }
+
+    let not_both_null_bit_buffer =
+        match compare_option_bitmap(left.data_ref(), right.data_ref(), left.len())? {
+            Some(buff) => buff,
+            None => new_all_set_buffer(left.len()),
+        };
+    let not_both_null_bitmap = not_both_null_bit_buffer.data();
+
+    let left_data = left.data();
+    let left_null_bitmap = match left_data.null_bitmap() {
+        Some(bitmap) => bitmap.clone().into_buffer(),
+        _ => new_all_set_buffer(left.len()),
+    };
+    let left_null_bitmap = left_null_bitmap.data();
+
+    let mut result = BooleanBufferBuilder::new(left.len());
+
+    for i in 0..left.len() {
+        let mut is_in = false;
+
+        // contains(null, null) = false
+        if bit_util::get_bit(not_both_null_bitmap, i) {
+            let list = right.value(i);
+
+            // contains(null, [null]) = true
+            if !bit_util::get_bit(left_null_bitmap, i) {
+                if list.null_count() > 0 {
+                    is_in = true;
+                }
+            } else {
+                let list = list.as_any().downcast_ref::<PrimitiveArray<T>>().unwrap();
+
+                for j in 0..list.len() {
+                    if list.is_valid(j) && (left.value(i) == list.value(j)) {
+                        is_in = true;
+                    }
+                }
+            }
+        }
+        result.append(is_in)?;
+    }
+
+    let data = ArrayData::new(
+        DataType::Boolean,
+        left.len(),
+        None,
+        None,
+        left.offset(),
+        vec![result.finish()],
+        vec![],
+    );
+    Ok(PrimitiveArray::<BooleanType>::from(Arc::new(data)))
+}
+
+/// Checks if a `GenericListArray` contains a value in the `GenericStringArray`
+pub fn contains_utf8<OffsetSize>(
+    left: &GenericStringArray<OffsetSize>,
+    right: &ListArray,
+) -> Result<BooleanArray>
+where
+    OffsetSize: OffsetSizeTrait,
+{
+    if left.len() != right.len() {
+        return Err(ArrowError::ComputeError(
+            "Cannot perform comparison operation on arrays of different length"
+                .to_string(),
+        ));
+    }
+
+    let not_both_null_bit_buffer =
+        match compare_option_bitmap(left.data_ref(), right.data_ref(), left.len())? {
+            Some(buff) => buff,
+            None => new_all_set_buffer(left.len()),
+        };
+    let not_both_null_bitmap = not_both_null_bit_buffer.data();
+
+    let left_data = left.data();
+    let left_null_bitmap = match left_data.null_bitmap() {
+        Some(bitmap) => bitmap.clone().into_buffer(),
+        _ => new_all_set_buffer(left.len()),
+    };
+    let left_null_bitmap = left_null_bitmap.data();
+
+    let mut result = BooleanBufferBuilder::new(left.len());
+
+    for i in 0..left.len() {
+        let mut is_in = false;
+
+        // contains(null, null) = false
+        if bit_util::get_bit(not_both_null_bitmap, i) {
+            let list = right.value(i);
+
+            // contains(null, [null]) = true
+            if !bit_util::get_bit(left_null_bitmap, i) {
+                if list.null_count() > 0 {
+                    is_in = true;
+                }
+            } else {
+                let list = list
+                    .as_any()
+                    .downcast_ref::<GenericStringArray<OffsetSize>>()
+                    .unwrap();
+
+                for j in 0..list.len() {
+                    if list.is_valid(j) && (left.value(i) == list.value(j)) {
+                        is_in = true;
+                    }
+                }
+            }
+        }
+        result.append(is_in)?;
+    }
+
+    let data = ArrayData::new(
+        DataType::Boolean,
+        left.len(),
+        None,
+        None,
+        left.offset(),

Review comment:
       Is it correct for us to reuse the offset, or should this be 0? My intuition says the latter, but I'm too tired to figure it out. Same applies above




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org