You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2021/04/09 16:53:39 UTC

[GitHub] [arrow] jhorstmann commented on a change in pull request #9965: ARROW-12294: [Rust] Fix boolean kleene kernels with no remainder

jhorstmann commented on a change in pull request #9965:
URL: https://github.com/apache/arrow/pull/9965#discussion_r610775158



##########
File path: rust/arrow/src/compute/kernels/boolean.rs
##########
@@ -52,37 +51,16 @@ where
 
     // length and offset of boolean array is measured in bits
     let len = left.len();
-    let left_offset = left.offset();
-    let right_offset = right.offset();
-
-    let left_buffer = left.values();
-    let right_buffer = right.values();
-
-    // If we do not have a validity bitmap, we just use an empty buffer
-    let (left_validity, left_validity_len) = left.data_ref().null_buffer().map_or_else(
-        || (Buffer::from_iter(iter::empty::<bool>()), 0),
-        |buffer| (buffer.clone(), len),
-    );
-    let (right_validity, right_validity_len) =
-        right.data_ref().null_buffer().map_or_else(
-            || (Buffer::from_iter(iter::empty::<bool>()), 0),
-            |buffer| (buffer.clone(), len),
-        );
-
-    let left_chunks = left_buffer.bit_chunks(left_offset, len);
-    let left_valid_chunks = left_validity.bit_chunks(left_offset, left_validity_len);
-    let right_chunks = right_buffer.bit_chunks(right_offset, len);
-    let right_valid_chunks = right_validity.bit_chunks(right_offset, right_validity_len);
 
     // result length measured in bytes (incl. remainder)
     let mut result_len = round_upto_multiple_of_64(len) / 8;
-    // if remainder is absent, the kleene_op code would always resize the result buffers,
-    // which is both unnecessary and expensive. We can prevent the resizing by always
-    // adding 8 additional bytes to the length of both buffers. All bits of these 8 bytes
-    // will always be 0 though.
-    if left_chunks.remainder_len().is_zero() {
+    // if remainder is absent, the kleene_op code would resize the result buffers, which
+    // is both unnecessary and expensive. We can prevent the resizing by adding 8 bytes
+    // to the length of both buffers. All bits of these 8 bytes will be always 0 though.

Review comment:
       We could probably rephrase it and make more clear why we are adding the length.
   
   We are avoiding an edge case later by always appending the and/or of the remainder bits, even there is no actual remainder. In that case the remainder bits will all be zero, but it means we need to allocate 8 bytes more than strictly necessary.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org