You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2022/06/03 21:22:34 UTC

[GitHub] [arrow-rs] tustvold commented on a diff in pull request #1781: Arbitrary size combine option bitmap

tustvold commented on code in PR #1781:
URL: https://github.com/apache/arrow-rs/pull/1781#discussion_r889358859


##########
arrow/src/compute/util.rs:
##########
@@ -29,33 +29,42 @@ use std::ops::Add;
 /// This function is useful when implementing operations on higher level arrays.
 #[allow(clippy::unnecessary_wraps)]
 pub(super) fn combine_option_bitmap(
-    left_data: &ArrayData,
-    right_data: &ArrayData,
+    arrays: &[&ArrayData],
     len_in_bits: usize,
 ) -> Result<Option<Buffer>> {
-    let left_offset_in_bits = left_data.offset();
-    let right_offset_in_bits = right_data.offset();
-
-    let left = left_data.null_buffer();
-    let right = right_data.null_buffer();
-
-    match left {
-        None => match right {
-            None => Ok(None),
-            Some(r) => Ok(Some(r.bit_slice(right_offset_in_bits, len_in_bits))),
-        },
-        Some(l) => match right {
-            None => Ok(Some(l.bit_slice(left_offset_in_bits, len_in_bits))),
-
-            Some(r) => Ok(Some(buffer_bin_and(
-                l,
-                left_offset_in_bits,
-                r,
-                right_offset_in_bits,
-                len_in_bits,
-            ))),
-        },
+    if arrays.is_empty() {
+        return Err(ArrowError::ComputeError(
+            "Arrays must not be empty".to_string(),
+        ));
     }
+
+    let mut buffers = arrays
+        .iter()
+        .map(|array| (array.null_buffer(), array.offset()));

Review Comment:
   ```suggestion
           .map(|array| (array.null_buffer().cloned(), array.offset()));
   ```
   
   Would allow removing it from the other locations. Again a very minor nit



##########
arrow/src/compute/util.rs:
##########
@@ -29,33 +29,42 @@ use std::ops::Add;
 /// This function is useful when implementing operations on higher level arrays.
 #[allow(clippy::unnecessary_wraps)]
 pub(super) fn combine_option_bitmap(
-    left_data: &ArrayData,
-    right_data: &ArrayData,
+    arrays: &[&ArrayData],
     len_in_bits: usize,
 ) -> Result<Option<Buffer>> {
-    let left_offset_in_bits = left_data.offset();
-    let right_offset_in_bits = right_data.offset();
-
-    let left = left_data.null_buffer();
-    let right = right_data.null_buffer();
-
-    match left {
-        None => match right {
-            None => Ok(None),
-            Some(r) => Ok(Some(r.bit_slice(right_offset_in_bits, len_in_bits))),
-        },
-        Some(l) => match right {
-            None => Ok(Some(l.bit_slice(left_offset_in_bits, len_in_bits))),
-
-            Some(r) => Ok(Some(buffer_bin_and(
-                l,
-                left_offset_in_bits,
-                r,
-                right_offset_in_bits,
-                len_in_bits,
-            ))),
-        },
+    if arrays.is_empty() {
+        return Err(ArrowError::ComputeError(
+            "Arrays must not be empty".to_string(),
+        ));
     }
+
+    let mut buffers = arrays
+        .iter()
+        .map(|array| (array.null_buffer(), array.offset()));
+
+    let init = buffers.next().unwrap();

Review Comment:
   You could theoretically combine this with the check above, e.g. something like
   
   ```
   let init = buffers.next().ok_or_else(|| ArrowError::ComputeError(..))?;
   ```
   
   But definitely not necessary



##########
arrow/src/compute/util.rs:
##########
@@ -29,33 +29,42 @@ use std::ops::Add;
 /// This function is useful when implementing operations on higher level arrays.
 #[allow(clippy::unnecessary_wraps)]
 pub(super) fn combine_option_bitmap(
-    left_data: &ArrayData,
-    right_data: &ArrayData,
+    arrays: &[&ArrayData],
     len_in_bits: usize,
 ) -> Result<Option<Buffer>> {
-    let left_offset_in_bits = left_data.offset();
-    let right_offset_in_bits = right_data.offset();
-
-    let left = left_data.null_buffer();
-    let right = right_data.null_buffer();
-
-    match left {
-        None => match right {
-            None => Ok(None),
-            Some(r) => Ok(Some(r.bit_slice(right_offset_in_bits, len_in_bits))),
-        },
-        Some(l) => match right {
-            None => Ok(Some(l.bit_slice(left_offset_in_bits, len_in_bits))),
-
-            Some(r) => Ok(Some(buffer_bin_and(
-                l,
-                left_offset_in_bits,
-                r,
-                right_offset_in_bits,
-                len_in_bits,
-            ))),
-        },
+    if arrays.is_empty() {
+        return Err(ArrowError::ComputeError(
+            "Arrays must not be empty".to_string(),
+        ));
     }
+
+    let mut buffers = arrays
+        .iter()
+        .map(|array| (array.null_buffer(), array.offset()));
+
+    let init = buffers.next().unwrap();
+    Ok(buffers
+        .fold((init.0.cloned(), init.1), |acc, buffer| {

Review Comment:
   Having the offset here is clever :+1:



##########
arrow/src/compute/util.rs:
##########
@@ -29,33 +29,42 @@ use std::ops::Add;
 /// This function is useful when implementing operations on higher level arrays.
 #[allow(clippy::unnecessary_wraps)]
 pub(super) fn combine_option_bitmap(
-    left_data: &ArrayData,
-    right_data: &ArrayData,
+    arrays: &[&ArrayData],
     len_in_bits: usize,
 ) -> Result<Option<Buffer>> {
-    let left_offset_in_bits = left_data.offset();
-    let right_offset_in_bits = right_data.offset();
-
-    let left = left_data.null_buffer();
-    let right = right_data.null_buffer();
-
-    match left {
-        None => match right {
-            None => Ok(None),
-            Some(r) => Ok(Some(r.bit_slice(right_offset_in_bits, len_in_bits))),
-        },
-        Some(l) => match right {
-            None => Ok(Some(l.bit_slice(left_offset_in_bits, len_in_bits))),
-
-            Some(r) => Ok(Some(buffer_bin_and(
-                l,
-                left_offset_in_bits,
-                r,
-                right_offset_in_bits,
-                len_in_bits,
-            ))),
-        },
+    if arrays.is_empty() {
+        return Err(ArrowError::ComputeError(
+            "Arrays must not be empty".to_string(),
+        ));
     }
+
+    let mut buffers = arrays
+        .iter()
+        .map(|array| (array.null_buffer(), array.offset()));
+
+    let init = buffers.next().unwrap();
+    Ok(buffers
+        .fold((init.0.cloned(), init.1), |acc, buffer| {
+            match (acc, buffer) {
+                ((None, _), (None, _)) => (None, 0),
+                ((Some(buffer), offset), (None, _)) => (Some(buffer), offset),
+                ((None, _), (Some(buffer), offset)) => (Some(buffer.clone()), offset),
+                (
+                    (Some(buffer_left), offset_left),
+                    (Some(buffer_right), offset_right),
+                ) => (
+                    Some(buffer_bin_and(
+                        &buffer_left,
+                        offset_left,
+                        buffer_right,
+                        offset_right,
+                        len_in_bits,
+                    )),
+                    0,
+                ),
+            }
+        })
+        .0)

Review Comment:
   I think there is a bug if there is only a single null buffer, and it has an offset. In particular I think this will not perform the `bit_slice` operation to "normalize" the bitmap. This appears to just discard the offset.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org