You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by tu...@apache.org on 2022/12/02 19:30:16 UTC
[arrow-rs] branch master updated: Add BooleanArray::from_unary and BooleanArray::from_binary (#3258)
This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new ecbb8c237 Add BooleanArray::from_unary and BooleanArray::from_binary (#3258)
ecbb8c237 is described below
commit ecbb8c23765ff6530ca32b5b3139713d6aaebfed
Author: Raphael Taylor-Davies <17...@users.noreply.github.com>
AuthorDate: Fri Dec 2 19:30:11 2022 +0000
Add BooleanArray::from_unary and BooleanArray::from_binary (#3258)
* Add BooleanArray::from_unary and BooleanArray::from_binary
* Add docs
* Tweak signatures
* Remove fallibility from combine_option_bitmap
* Remove unused compare_option_bitmap
* Remove fallibility
* Fix doc
---
arrow-array/src/array/boolean_array.rs | 87 ++++++++++
arrow-data/src/bit_mask.rs | 141 ++++++++++++++++
arrow/src/compute/kernels/arithmetic.rs | 8 +-
arrow/src/compute/kernels/arity.rs | 10 +-
arrow/src/compute/kernels/boolean.rs | 4 +-
arrow/src/compute/kernels/comparison.rs | 56 +-----
arrow/src/compute/kernels/concat_elements.rs | 6 +-
arrow/src/compute/mod.rs | 2 -
arrow/src/compute/util.rs | 243 ---------------------------
9 files changed, 250 insertions(+), 307 deletions(-)
diff --git a/arrow-array/src/array/boolean_array.rs b/arrow-array/src/array/boolean_array.rs
index e166f467a..920fdabc2 100644
--- a/arrow-array/src/array/boolean_array.rs
+++ b/arrow-array/src/array/boolean_array.rs
@@ -20,6 +20,7 @@ use crate::iterator::BooleanIter;
use crate::raw_pointer::RawPtrBox;
use crate::{print_long_array, Array, ArrayAccessor};
use arrow_buffer::{bit_util, Buffer, MutableBuffer};
+use arrow_data::bit_mask::combine_option_bitmap;
use arrow_data::ArrayData;
use arrow_schema::DataType;
use std::any::Any;
@@ -173,6 +174,92 @@ impl BooleanArray {
) -> impl Iterator<Item = Option<bool>> + 'a {
indexes.map(|opt_index| opt_index.map(|index| self.value_unchecked(index)))
}
+
+ /// Create a [`BooleanArray`] by evaluating the operation for
+ /// each element of the provided array
+ ///
+ /// ```
+ /// # use arrow_array::{BooleanArray, Int32Array};
+ ///
+ /// let array = Int32Array::from(vec![1, 2, 3, 4, 5]);
+ /// let r = BooleanArray::from_unary(&array, |x| x > 2);
+ /// assert_eq!(&r, &BooleanArray::from(vec![false, false, true, true, true]));
+ /// ```
+ pub fn from_unary<T: ArrayAccessor, F>(left: T, mut op: F) -> Self
+ where
+ F: FnMut(T::Item) -> bool,
+ {
+ let null_bit_buffer = left
+ .data()
+ .null_buffer()
+ .map(|b| b.bit_slice(left.offset(), left.len()));
+
+ let buffer = MutableBuffer::collect_bool(left.len(), |i| unsafe {
+ // SAFETY: i in range 0..len
+ op(left.value_unchecked(i))
+ });
+
+ let data = unsafe {
+ ArrayData::new_unchecked(
+ DataType::Boolean,
+ left.len(),
+ None,
+ null_bit_buffer,
+ 0,
+ vec![Buffer::from(buffer)],
+ vec![],
+ )
+ };
+ Self::from(data)
+ }
+
+ /// Create a [`BooleanArray`] by evaluating the binary operation for
+ /// each element of the provided arrays
+ ///
+ /// ```
+ /// # use arrow_array::{BooleanArray, Int32Array};
+ ///
+ /// let a = Int32Array::from(vec![1, 2, 3, 4, 5]);
+ /// let b = Int32Array::from(vec![1, 2, 0, 2, 5]);
+ /// let r = BooleanArray::from_binary(&a, &b, |a, b| a == b);
+ /// assert_eq!(&r, &BooleanArray::from(vec![true, true, false, false, true]));
+ /// ```
+ ///
+ /// # Panics
+ ///
+ /// This function panics if left and right are not the same length
+ ///
+ pub fn from_binary<T: ArrayAccessor, S: ArrayAccessor, F>(
+ left: T,
+ right: S,
+ mut op: F,
+ ) -> Self
+ where
+ F: FnMut(T::Item, S::Item) -> bool,
+ {
+ assert_eq!(left.len(), right.len());
+
+ let null_bit_buffer =
+ combine_option_bitmap(&[left.data_ref(), right.data_ref()], left.len());
+
+ let buffer = MutableBuffer::collect_bool(left.len(), |i| unsafe {
+ // SAFETY: i in range 0..len
+ op(left.value_unchecked(i), right.value_unchecked(i))
+ });
+
+ let data = unsafe {
+ ArrayData::new_unchecked(
+ DataType::Boolean,
+ left.len(),
+ None,
+ null_bit_buffer,
+ 0,
+ vec![Buffer::from(buffer)],
+ vec![],
+ )
+ };
+ Self::from(data)
+ }
}
impl Array for BooleanArray {
diff --git a/arrow-data/src/bit_mask.rs b/arrow-data/src/bit_mask.rs
index 6a0a46038..ed8e65257 100644
--- a/arrow-data/src/bit_mask.rs
+++ b/arrow-data/src/bit_mask.rs
@@ -17,8 +17,11 @@
//! Utils for working with packed bit masks
+use crate::ArrayData;
use arrow_buffer::bit_chunk_iterator::BitChunks;
use arrow_buffer::bit_util::{ceil, get_bit, set_bit};
+use arrow_buffer::buffer::buffer_bin_and;
+use arrow_buffer::Buffer;
/// Sets all bits on `write_data` in the range `[offset_write..offset_write+len]` to be equal to the
/// bits in `data` in the range `[offset_read..offset_read+len]`
@@ -62,9 +65,41 @@ pub fn set_bits(
null_count as usize
}
+/// Combines the null bitmaps of multiple arrays using a bitwise `and` operation.
+///
+/// This function is useful when implementing operations on higher level arrays.
+pub fn combine_option_bitmap(
+ arrays: &[&ArrayData],
+ len_in_bits: usize,
+) -> Option<Buffer> {
+ let (buffer, offset) = arrays
+ .iter()
+ .map(|array| (array.null_buffer().cloned(), array.offset()))
+ .reduce(|acc, buffer_and_offset| match (acc, buffer_and_offset) {
+ ((None, _), (None, _)) => (None, 0),
+ ((Some(buffer), offset), (None, _)) | ((None, _), (Some(buffer), offset)) => {
+ (Some(buffer), offset)
+ }
+ ((Some(buffer_left), offset_left), (Some(buffer_right), offset_right)) => (
+ Some(buffer_bin_and(
+ &buffer_left,
+ offset_left,
+ &buffer_right,
+ offset_right,
+ len_in_bits,
+ )),
+ 0,
+ ),
+ })?;
+
+ Some(buffer?.bit_slice(offset, len_in_bits))
+}
+
#[cfg(test)]
mod tests {
use super::*;
+ use arrow_schema::DataType;
+ use std::sync::Arc;
#[test]
fn test_set_bits_aligned() {
@@ -187,4 +222,110 @@ mod tests {
assert_eq!(destination, expected_data);
assert_eq!(result, expected_null_count);
}
+
+ fn make_data_with_null_bit_buffer(
+ len: usize,
+ offset: usize,
+ null_bit_buffer: Option<Buffer>,
+ ) -> Arc<ArrayData> {
+ let buffer = Buffer::from(&vec![11; len + offset]);
+
+ Arc::new(
+ ArrayData::try_new(
+ DataType::UInt8,
+ len,
+ null_bit_buffer,
+ offset,
+ vec![buffer],
+ vec![],
+ )
+ .unwrap(),
+ )
+ }
+
+ #[test]
+ fn test_combine_option_bitmap() {
+ let none_bitmap = make_data_with_null_bit_buffer(8, 0, None);
+ let some_bitmap =
+ make_data_with_null_bit_buffer(8, 0, Some(Buffer::from([0b01001010])));
+ let inverse_bitmap =
+ make_data_with_null_bit_buffer(8, 0, Some(Buffer::from([0b10110101])));
+ let some_other_bitmap =
+ make_data_with_null_bit_buffer(8, 0, Some(Buffer::from([0b11010111])));
+ assert_eq!(None, combine_option_bitmap(&[], 8));
+ assert_eq!(
+ Some(Buffer::from([0b01001010])),
+ combine_option_bitmap(&[&some_bitmap], 8)
+ );
+ assert_eq!(
+ None,
+ combine_option_bitmap(&[&none_bitmap, &none_bitmap], 8)
+ );
+ assert_eq!(
+ Some(Buffer::from([0b01001010])),
+ combine_option_bitmap(&[&some_bitmap, &none_bitmap], 8)
+ );
+ assert_eq!(
+ Some(Buffer::from([0b11010111])),
+ combine_option_bitmap(&[&none_bitmap, &some_other_bitmap], 8)
+ );
+ assert_eq!(
+ Some(Buffer::from([0b01001010])),
+ combine_option_bitmap(&[&some_bitmap, &some_bitmap], 8,)
+ );
+ assert_eq!(
+ Some(Buffer::from([0b0])),
+ combine_option_bitmap(&[&some_bitmap, &inverse_bitmap], 8,)
+ );
+ assert_eq!(
+ Some(Buffer::from([0b01000010])),
+ combine_option_bitmap(&[&some_bitmap, &some_other_bitmap, &none_bitmap], 8,)
+ );
+ assert_eq!(
+ Some(Buffer::from([0b00001001])),
+ combine_option_bitmap(
+ &[
+ &some_bitmap.slice(3, 5),
+ &inverse_bitmap.slice(2, 5),
+ &some_other_bitmap.slice(1, 5)
+ ],
+ 5,
+ )
+ );
+ }
+
+ #[test]
+ fn test_combine_option_bitmap_with_offsets() {
+ let none_bitmap = make_data_with_null_bit_buffer(8, 0, None);
+ let bitmap0 =
+ make_data_with_null_bit_buffer(8, 0, Some(Buffer::from([0b10101010])));
+ let bitmap1 =
+ make_data_with_null_bit_buffer(8, 1, Some(Buffer::from([0b01010100, 0b1])));
+ let bitmap2 =
+ make_data_with_null_bit_buffer(8, 2, Some(Buffer::from([0b10101000, 0b10])));
+ assert_eq!(
+ Some(Buffer::from([0b10101010])),
+ combine_option_bitmap(&[&bitmap1], 8)
+ );
+ assert_eq!(
+ Some(Buffer::from([0b10101010])),
+ combine_option_bitmap(&[&bitmap2], 8)
+ );
+ assert_eq!(
+ Some(Buffer::from([0b10101010])),
+ combine_option_bitmap(&[&bitmap1, &none_bitmap], 8)
+ );
+ assert_eq!(
+ Some(Buffer::from([0b10101010])),
+ combine_option_bitmap(&[&none_bitmap, &bitmap2], 8)
+ );
+ assert_eq!(
+ Some(Buffer::from([0b10101010])),
+ combine_option_bitmap(&[&bitmap0, &bitmap1], 8)
+ );
+ assert_eq!(
+ Some(Buffer::from([0b10101010])),
+ combine_option_bitmap(&[&bitmap1, &bitmap2], 8)
+ );
+ }
}
diff --git a/arrow/src/compute/kernels/arithmetic.rs b/arrow/src/compute/kernels/arithmetic.rs
index cafd63620..23cefe48e 100644
--- a/arrow/src/compute/kernels/arithmetic.rs
+++ b/arrow/src/compute/kernels/arithmetic.rs
@@ -310,10 +310,10 @@ where
}
// Create the combined `Bitmap`
- let null_bit_buffer = crate::compute::util::combine_option_bitmap(
+ let null_bit_buffer = arrow_data::bit_mask::combine_option_bitmap(
&[left.data_ref(), right.data_ref()],
left.len(),
- )?;
+ );
let lanes = T::lanes();
let buffer_size = left.len() * std::mem::size_of::<T::Native>();
@@ -660,10 +660,10 @@ where
)));
}
- let null_bit_buffer = crate::compute::util::combine_option_bitmap(
+ let null_bit_buffer = arrow_data::bit_mask::combine_option_bitmap(
&[left.data_ref(), right.data_ref()],
left.len(),
- )?;
+ );
// Safety justification: Since the inputs are valid Arrow arrays, all values are
// valid indexes into the dictionary (which is verified during construction)
diff --git a/arrow/src/compute/kernels/arity.rs b/arrow/src/compute/kernels/arity.rs
index d0f18cf58..6207ab639 100644
--- a/arrow/src/compute/kernels/arity.rs
+++ b/arrow/src/compute/kernels/arity.rs
@@ -22,12 +22,12 @@ use crate::array::{
PrimitiveArray,
};
use crate::buffer::Buffer;
-use crate::compute::util::combine_option_bitmap;
use crate::datatypes::{ArrowNumericType, ArrowPrimitiveType};
use crate::downcast_dictionary_array;
use crate::error::{ArrowError, Result};
use crate::util::bit_iterator::try_for_each_valid_idx;
use arrow_buffer::MutableBuffer;
+use arrow_data::bit_mask::combine_option_bitmap;
use std::sync::Arc;
#[inline]
@@ -215,7 +215,7 @@ where
return Ok(PrimitiveArray::from(ArrayData::new_empty(&O::DATA_TYPE)));
}
- let null_buffer = combine_option_bitmap(&[a.data(), b.data()], len).unwrap();
+ let null_buffer = combine_option_bitmap(&[a.data(), b.data()], len);
let null_count = null_buffer
.as_ref()
.map(|x| len - x.count_set_bits_offset(0, len))
@@ -275,7 +275,7 @@ where
let len = a.len();
- let null_buffer = combine_option_bitmap(&[a.data(), b.data()], len).unwrap();
+ let null_buffer = combine_option_bitmap(&[a.data(), b.data()], len);
let null_count = null_buffer
.as_ref()
.map(|x| len - x.count_set_bits_offset(0, len))
@@ -333,7 +333,7 @@ where
if a.null_count() == 0 && b.null_count() == 0 {
try_binary_no_nulls(len, a, b, op)
} else {
- let null_buffer = combine_option_bitmap(&[a.data(), b.data()], len).unwrap();
+ let null_buffer = combine_option_bitmap(&[a.data(), b.data()], len);
let null_count = null_buffer
.as_ref()
@@ -401,7 +401,7 @@ where
if a.null_count() == 0 && b.null_count() == 0 {
try_binary_no_nulls_mut(len, a, b, op)
} else {
- let null_buffer = combine_option_bitmap(&[a.data(), b.data()], len).unwrap();
+ let null_buffer = combine_option_bitmap(&[a.data(), b.data()], len);
let null_count = null_buffer
.as_ref()
.map(|x| len - x.count_set_bits_offset(0, len))
diff --git a/arrow/src/compute/kernels/boolean.rs b/arrow/src/compute/kernels/boolean.rs
index 1b33fa19e..aa42f3d20 100644
--- a/arrow/src/compute/kernels/boolean.rs
+++ b/arrow/src/compute/kernels/boolean.rs
@@ -29,10 +29,10 @@ use crate::buffer::{
bitwise_bin_op_helper, bitwise_quaternary_op_helper, buffer_bin_and, buffer_bin_or,
buffer_unary_not, Buffer, MutableBuffer,
};
-use crate::compute::util::combine_option_bitmap;
use crate::datatypes::DataType;
use crate::error::{ArrowError, Result};
use crate::util::bit_util::ceil;
+use arrow_data::bit_mask::combine_option_bitmap;
/// Updates null buffer based on data buffer and null buffer of the operand at other side
/// in boolean AND kernel with Kleene logic. In short, because for AND kernel, null AND false
@@ -108,7 +108,7 @@ pub(crate) fn build_null_buffer_for_and_or(
len_in_bits: usize,
) -> Option<Buffer> {
// `arrays` are not empty, so safely do `unwrap` directly.
- combine_option_bitmap(&[left_data, right_data], len_in_bits).unwrap()
+ combine_option_bitmap(&[left_data, right_data], len_in_bits)
}
/// Updates null buffer based on data buffer and null buffer of the operand at other side
diff --git a/arrow/src/compute/kernels/comparison.rs b/arrow/src/compute/kernels/comparison.rs
index 33a24500a..b672410fe 100644
--- a/arrow/src/compute/kernels/comparison.rs
+++ b/arrow/src/compute/kernels/comparison.rs
@@ -25,12 +25,12 @@
use crate::array::*;
use crate::buffer::{buffer_unary_not, Buffer, MutableBuffer};
-use crate::compute::util::combine_option_bitmap;
use crate::datatypes::*;
#[allow(unused_imports)]
use crate::downcast_dictionary_array;
use crate::error::{ArrowError, Result};
use crate::util::bit_util;
+use arrow_data::bit_mask::combine_option_bitmap;
use arrow_select::take::take;
use num::ToPrimitive;
use regex::Regex;
@@ -53,26 +53,7 @@ where
));
}
- let null_bit_buffer =
- combine_option_bitmap(&[left.data_ref(), right.data_ref()], left.len())?;
-
- let buffer = MutableBuffer::collect_bool(left.len(), |i| unsafe {
- // SAFETY: i in range 0..len
- op(left.value_unchecked(i), right.value_unchecked(i))
- });
-
- let data = unsafe {
- ArrayData::new_unchecked(
- DataType::Boolean,
- left.len(),
- None,
- null_bit_buffer,
- 0,
- vec![Buffer::from(buffer)],
- vec![],
- )
- };
- Ok(BooleanArray::from(data))
+ Ok(BooleanArray::from_binary(left, right, op))
}
/// Helper function to perform boolean lambda function on values from array accessor, this
@@ -81,28 +62,7 @@ fn compare_op_scalar<T: ArrayAccessor, F>(left: T, op: F) -> Result<BooleanArray
where
F: Fn(T::Item) -> bool,
{
- let null_bit_buffer = left
- .data()
- .null_buffer()
- .map(|b| b.bit_slice(left.offset(), left.len()));
-
- let buffer = MutableBuffer::collect_bool(left.len(), |i| unsafe {
- // SAFETY: i in range 0..len
- op(left.value_unchecked(i))
- });
-
- let data = unsafe {
- ArrayData::new_unchecked(
- DataType::Boolean,
- left.len(),
- None,
- null_bit_buffer,
- 0,
- vec![Buffer::from(buffer)],
- vec![],
- )
- };
- Ok(BooleanArray::from(data))
+ Ok(BooleanArray::from_unary(left, op))
}
/// Evaluate `op(left, right)` for [`PrimitiveArray`]s using a specified
@@ -158,7 +118,7 @@ where
}
let null_bit_buffer =
- combine_option_bitmap(&[left.data_ref(), right.data_ref()], left.len())?;
+ combine_option_bitmap(&[left.data_ref(), right.data_ref()], left.len());
let mut result = BooleanBufferBuilder::new(left.len());
for i in 0..left.len() {
@@ -1172,7 +1132,7 @@ pub fn regexp_is_match_utf8<OffsetSize: OffsetSizeTrait>(
));
}
let null_bit_buffer =
- combine_option_bitmap(&[array.data_ref(), regex_array.data_ref()], array.len())?;
+ combine_option_bitmap(&[array.data_ref(), regex_array.data_ref()], array.len());
let mut patterns: HashMap<String, Regex> = HashMap::new();
let mut result = BooleanBufferBuilder::new(array.len());
@@ -2294,7 +2254,7 @@ where
}
let null_bit_buffer =
- combine_option_bitmap(&[left.data_ref(), right.data_ref()], len)?;
+ combine_option_bitmap(&[left.data_ref(), right.data_ref()], len);
// we process the data in chunks so that each iteration results in one u64 of comparison result bits
const CHUNK_SIZE: usize = 64;
@@ -3701,7 +3661,7 @@ where
let num_bytes = bit_util::ceil(left_len, 8);
let not_both_null_bit_buffer =
- match combine_option_bitmap(&[left.data_ref(), right.data_ref()], left_len)? {
+ match combine_option_bitmap(&[left.data_ref(), right.data_ref()], left_len) {
Some(buff) => buff,
None => new_all_set_buffer(num_bytes),
};
@@ -3758,7 +3718,7 @@ where
let num_bytes = bit_util::ceil(left_len, 8);
let not_both_null_bit_buffer =
- match combine_option_bitmap(&[left.data_ref(), right.data_ref()], left_len)? {
+ match combine_option_bitmap(&[left.data_ref(), right.data_ref()], left_len) {
Some(buff) => buff,
None => new_all_set_buffer(num_bytes),
};
diff --git a/arrow/src/compute/kernels/concat_elements.rs b/arrow/src/compute/kernels/concat_elements.rs
index a908ba9ab..25c8f60de 100644
--- a/arrow/src/compute/kernels/concat_elements.rs
+++ b/arrow/src/compute/kernels/concat_elements.rs
@@ -16,8 +16,8 @@
// under the License.
use crate::array::*;
-use crate::compute::util::combine_option_bitmap;
use crate::error::{ArrowError, Result};
+use arrow_data::bit_mask::combine_option_bitmap;
/// Returns the elementwise concatenation of a [`StringArray`].
///
@@ -45,7 +45,7 @@ pub fn concat_elements_utf8<Offset: OffsetSizeTrait>(
)));
}
- let output_bitmap = combine_option_bitmap(&[left.data(), right.data()], left.len())?;
+ let output_bitmap = combine_option_bitmap(&[left.data(), right.data()], left.len());
let left_offsets = left.value_offsets();
let right_offsets = right.value_offsets();
@@ -111,7 +111,7 @@ pub fn concat_elements_utf8_many<Offset: OffsetSizeTrait>(
.collect::<Vec<_>>()
.as_slice(),
size,
- )?;
+ );
let data_values = arrays
.iter()
diff --git a/arrow/src/compute/mod.rs b/arrow/src/compute/mod.rs
index 28e5e6b52..c0b10afe4 100644
--- a/arrow/src/compute/mod.rs
+++ b/arrow/src/compute/mod.rs
@@ -19,8 +19,6 @@
pub mod kernels;
-mod util;
-
pub use self::kernels::aggregate::*;
pub use self::kernels::arithmetic::*;
pub use self::kernels::arity::*;
diff --git a/arrow/src/compute/util.rs b/arrow/src/compute/util.rs
deleted file mode 100644
index 9ddc53501..000000000
--- a/arrow/src/compute/util.rs
+++ /dev/null
@@ -1,243 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Common utilities for computation kernels.
-
-use crate::array::*;
-use crate::buffer::{buffer_bin_and, Buffer};
-use crate::error::{ArrowError, Result};
-
-/// Combines the null bitmaps of multiple arrays using a bitwise `and` operation.
-///
-/// This function is useful when implementing operations on higher level arrays.
-#[allow(clippy::unnecessary_wraps)]
-pub(super) fn combine_option_bitmap(
- arrays: &[&ArrayData],
- len_in_bits: usize,
-) -> Result<Option<Buffer>> {
- arrays
- .iter()
- .map(|array| (array.null_buffer().cloned(), array.offset()))
- .reduce(|acc, buffer_and_offset| match (acc, buffer_and_offset) {
- ((None, _), (None, _)) => (None, 0),
- ((Some(buffer), offset), (None, _)) | ((None, _), (Some(buffer), offset)) => {
- (Some(buffer), offset)
- }
- ((Some(buffer_left), offset_left), (Some(buffer_right), offset_right)) => (
- Some(buffer_bin_and(
- &buffer_left,
- offset_left,
- &buffer_right,
- offset_right,
- len_in_bits,
- )),
- 0,
- ),
- })
- .map_or(
- Err(ArrowError::ComputeError(
- "Arrays must not be empty".to_string(),
- )),
- |(buffer, offset)| {
- Ok(buffer.map(|buffer| buffer.bit_slice(offset, len_in_bits)))
- },
- )
-}
-
-#[cfg(test)]
-pub(super) mod tests {
- use super::*;
-
- use std::sync::Arc;
-
- use crate::array::ArrayData;
- use crate::buffer::buffer_bin_or;
- use crate::datatypes::DataType;
-
- /// Compares the null bitmaps of two arrays using a bitwise `or` operation.
- ///
- /// This function is useful when implementing operations on higher level arrays.
- pub(super) fn compare_option_bitmap(
- left_data: &ArrayData,
- right_data: &ArrayData,
- len_in_bits: usize,
- ) -> Result<Option<Buffer>> {
- let left_offset_in_bits = left_data.offset();
- let right_offset_in_bits = right_data.offset();
-
- let left = left_data.null_buffer();
- let right = right_data.null_buffer();
-
- match left {
- None => match right {
- None => Ok(None),
- Some(r) => Ok(Some(r.bit_slice(right_offset_in_bits, len_in_bits))),
- },
- Some(l) => match right {
- None => Ok(Some(l.bit_slice(left_offset_in_bits, len_in_bits))),
-
- Some(r) => Ok(Some(buffer_bin_or(
- l,
- left_offset_in_bits,
- r,
- right_offset_in_bits,
- len_in_bits,
- ))),
- },
- }
- }
-
- fn make_data_with_null_bit_buffer(
- len: usize,
- offset: usize,
- null_bit_buffer: Option<Buffer>,
- ) -> Arc<ArrayData> {
- let buffer = Buffer::from(&vec![11; len + offset]);
-
- Arc::new(
- ArrayData::try_new(
- DataType::UInt8,
- len,
- null_bit_buffer,
- offset,
- vec![buffer],
- vec![],
- )
- .unwrap(),
- )
- }
-
- #[test]
- fn test_combine_option_bitmap() {
- let none_bitmap = make_data_with_null_bit_buffer(8, 0, None);
- let some_bitmap =
- make_data_with_null_bit_buffer(8, 0, Some(Buffer::from([0b01001010])));
- let inverse_bitmap =
- make_data_with_null_bit_buffer(8, 0, Some(Buffer::from([0b10110101])));
- let some_other_bitmap =
- make_data_with_null_bit_buffer(8, 0, Some(Buffer::from([0b11010111])));
- assert_eq!(
- combine_option_bitmap(&[], 8).unwrap_err().to_string(),
- "Compute error: Arrays must not be empty",
- );
- assert_eq!(
- Some(Buffer::from([0b01001010])),
- combine_option_bitmap(&[&some_bitmap], 8).unwrap()
- );
- assert_eq!(
- None,
- combine_option_bitmap(&[&none_bitmap, &none_bitmap], 8).unwrap()
- );
- assert_eq!(
- Some(Buffer::from([0b01001010])),
- combine_option_bitmap(&[&some_bitmap, &none_bitmap], 8).unwrap()
- );
- assert_eq!(
- Some(Buffer::from([0b11010111])),
- combine_option_bitmap(&[&none_bitmap, &some_other_bitmap], 8).unwrap()
- );
- assert_eq!(
- Some(Buffer::from([0b01001010])),
- combine_option_bitmap(&[&some_bitmap, &some_bitmap], 8,).unwrap()
- );
- assert_eq!(
- Some(Buffer::from([0b0])),
- combine_option_bitmap(&[&some_bitmap, &inverse_bitmap], 8,).unwrap()
- );
- assert_eq!(
- Some(Buffer::from([0b01000010])),
- combine_option_bitmap(&[&some_bitmap, &some_other_bitmap, &none_bitmap], 8,)
- .unwrap()
- );
- assert_eq!(
- Some(Buffer::from([0b00001001])),
- combine_option_bitmap(
- &[
- &some_bitmap.slice(3, 5),
- &inverse_bitmap.slice(2, 5),
- &some_other_bitmap.slice(1, 5)
- ],
- 5,
- )
- .unwrap()
- );
- }
-
- #[test]
- fn test_combine_option_bitmap_with_offsets() {
- let none_bitmap = make_data_with_null_bit_buffer(8, 0, None);
- let bitmap0 =
- make_data_with_null_bit_buffer(8, 0, Some(Buffer::from([0b10101010])));
- let bitmap1 =
- make_data_with_null_bit_buffer(8, 1, Some(Buffer::from([0b01010100, 0b1])));
- let bitmap2 =
- make_data_with_null_bit_buffer(8, 2, Some(Buffer::from([0b10101000, 0b10])));
- assert_eq!(
- Some(Buffer::from([0b10101010])),
- combine_option_bitmap(&[&bitmap1], 8).unwrap()
- );
- assert_eq!(
- Some(Buffer::from([0b10101010])),
- combine_option_bitmap(&[&bitmap2], 8).unwrap()
- );
- assert_eq!(
- Some(Buffer::from([0b10101010])),
- combine_option_bitmap(&[&bitmap1, &none_bitmap], 8).unwrap()
- );
- assert_eq!(
- Some(Buffer::from([0b10101010])),
- combine_option_bitmap(&[&none_bitmap, &bitmap2], 8).unwrap()
- );
- assert_eq!(
- Some(Buffer::from([0b10101010])),
- combine_option_bitmap(&[&bitmap0, &bitmap1], 8).unwrap()
- );
- assert_eq!(
- Some(Buffer::from([0b10101010])),
- combine_option_bitmap(&[&bitmap1, &bitmap2], 8).unwrap()
- );
- }
-
- #[test]
- fn test_compare_option_bitmap() {
- let none_bitmap = make_data_with_null_bit_buffer(8, 0, None);
- let some_bitmap =
- make_data_with_null_bit_buffer(8, 0, Some(Buffer::from([0b01001010])));
- let inverse_bitmap =
- make_data_with_null_bit_buffer(8, 0, Some(Buffer::from([0b10110101])));
- assert_eq!(
- None,
- compare_option_bitmap(&none_bitmap, &none_bitmap, 8).unwrap()
- );
- assert_eq!(
- Some(Buffer::from([0b01001010])),
- compare_option_bitmap(&some_bitmap, &none_bitmap, 8).unwrap()
- );
- assert_eq!(
- Some(Buffer::from([0b01001010])),
- compare_option_bitmap(&none_bitmap, &some_bitmap, 8,).unwrap()
- );
- assert_eq!(
- Some(Buffer::from([0b01001010])),
- compare_option_bitmap(&some_bitmap, &some_bitmap, 8,).unwrap()
- );
- assert_eq!(
- Some(Buffer::from([0b11111111])),
- compare_option_bitmap(&some_bitmap, &inverse_bitmap, 8,).unwrap()
- );
- }
-}