You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2022/06/04 08:44:36 UTC

[GitHub] [arrow-rs] Ismail-Maj commented on a diff in pull request #1781: Arbitrary size combine option bitmap

Ismail-Maj commented on code in PR #1781:
URL: https://github.com/apache/arrow-rs/pull/1781#discussion_r889505125


##########
arrow/src/compute/util.rs:
##########
@@ -29,33 +29,42 @@ use std::ops::Add;
 /// This function is useful when implementing operations on higher level arrays.
 #[allow(clippy::unnecessary_wraps)]
 pub(super) fn combine_option_bitmap(
-    left_data: &ArrayData,
-    right_data: &ArrayData,
+    arrays: &[&ArrayData],
     len_in_bits: usize,
 ) -> Result<Option<Buffer>> {
-    let left_offset_in_bits = left_data.offset();
-    let right_offset_in_bits = right_data.offset();
-
-    let left = left_data.null_buffer();
-    let right = right_data.null_buffer();
-
-    match left {
-        None => match right {
-            None => Ok(None),
-            Some(r) => Ok(Some(r.bit_slice(right_offset_in_bits, len_in_bits))),
-        },
-        Some(l) => match right {
-            None => Ok(Some(l.bit_slice(left_offset_in_bits, len_in_bits))),
-
-            Some(r) => Ok(Some(buffer_bin_and(
-                l,
-                left_offset_in_bits,
-                r,
-                right_offset_in_bits,
-                len_in_bits,
-            ))),
-        },
+    if arrays.is_empty() {
+        return Err(ArrowError::ComputeError(
+            "Arrays must not be empty".to_string(),
+        ));
     }
+
+    let mut buffers = arrays
+        .iter()
+        .map(|array| (array.null_buffer(), array.offset()));

Review Comment:
   Currently the algorithm clones at most one buffer, I'm suspecting cloning every buffer would make it slower.



##########
arrow/src/compute/util.rs:
##########
@@ -29,33 +29,42 @@ use std::ops::Add;
 /// This function is useful when implementing operations on higher level arrays.
 #[allow(clippy::unnecessary_wraps)]
 pub(super) fn combine_option_bitmap(
-    left_data: &ArrayData,
-    right_data: &ArrayData,
+    arrays: &[&ArrayData],
     len_in_bits: usize,
 ) -> Result<Option<Buffer>> {
-    let left_offset_in_bits = left_data.offset();
-    let right_offset_in_bits = right_data.offset();
-
-    let left = left_data.null_buffer();
-    let right = right_data.null_buffer();
-
-    match left {
-        None => match right {
-            None => Ok(None),
-            Some(r) => Ok(Some(r.bit_slice(right_offset_in_bits, len_in_bits))),
-        },
-        Some(l) => match right {
-            None => Ok(Some(l.bit_slice(left_offset_in_bits, len_in_bits))),
-
-            Some(r) => Ok(Some(buffer_bin_and(
-                l,
-                left_offset_in_bits,
-                r,
-                right_offset_in_bits,
-                len_in_bits,
-            ))),
-        },
+    if arrays.is_empty() {
+        return Err(ArrowError::ComputeError(
+            "Arrays must not be empty".to_string(),
+        ));
     }
+
+    let mut buffers = arrays
+        .iter()
+        .map(|array| (array.null_buffer(), array.offset()));
+
+    let init = buffers.next().unwrap();
+    Ok(buffers
+        .fold((init.0.cloned(), init.1), |acc, buffer| {
+            match (acc, buffer) {
+                ((None, _), (None, _)) => (None, 0),
+                ((Some(buffer), offset), (None, _)) => (Some(buffer), offset),
+                ((None, _), (Some(buffer), offset)) => (Some(buffer.clone()), offset),
+                (
+                    (Some(buffer_left), offset_left),
+                    (Some(buffer_right), offset_right),
+                ) => (
+                    Some(buffer_bin_and(
+                        &buffer_left,
+                        offset_left,
+                        buffer_right,
+                        offset_right,
+                        len_in_bits,
+                    )),
+                    0,
+                ),
+            }
+        })
+        .0)

Review Comment:
   nice catch, thank you!
   I'll push a fix soon.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org