You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by tu...@apache.org on 2023/06/12 13:26:56 UTC
[arrow-rs] branch master updated: Further buffer constructors (#4402)
This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 2c71135a6 Further buffer constructors (#4402)
2c71135a6 is described below
commit 2c71135a683844d51779f94cfd8e5fc35a2624e4
Author: Raphael Taylor-Davies <17...@users.noreply.github.com>
AuthorDate: Mon Jun 12 14:26:48 2023 +0100
Further buffer constructors (#4402)
---
arrow-arith/src/boolean.rs | 36 +++++++---------------------------
arrow-array/src/array/boolean_array.rs | 8 ++++++++
arrow-buffer/src/buffer/boolean.rs | 17 ++++++++++++++++
arrow-buffer/src/buffer/null.rs | 14 ++++++++++---
4 files changed, 43 insertions(+), 32 deletions(-)
diff --git a/arrow-arith/src/boolean.rs b/arrow-arith/src/boolean.rs
index 258d683ad..04c9fb229 100644
--- a/arrow-arith/src/boolean.rs
+++ b/arrow-arith/src/boolean.rs
@@ -23,11 +23,9 @@
//! [here](https://doc.rust-lang.org/stable/core/arch/) for more information.
use arrow_array::*;
-use arrow_buffer::bit_util::ceil;
use arrow_buffer::buffer::{bitwise_bin_op_helper, bitwise_quaternary_op_helper};
-use arrow_buffer::{BooleanBuffer, MutableBuffer, NullBuffer};
-use arrow_data::ArrayData;
-use arrow_schema::{ArrowError, DataType};
+use arrow_buffer::{BooleanBuffer, NullBuffer};
+use arrow_schema::ArrowError;
/// Logical 'and' boolean values with Kleene logic
///
@@ -314,7 +312,7 @@ pub fn not(left: &BooleanArray) -> Result<BooleanArray, ArrowError> {
/// ```
pub fn is_null(input: &dyn Array) -> Result<BooleanArray, ArrowError> {
let values = match input.nulls() {
- None => NullBuffer::new_null(input.len()).into_inner(),
+ None => BooleanBuffer::new_unset(input.len()),
Some(nulls) => !nulls.inner(),
};
@@ -333,31 +331,11 @@ pub fn is_null(input: &dyn Array) -> Result<BooleanArray, ArrowError> {
/// assert_eq!(a_is_not_null, BooleanArray::from(vec![true, true, false]));
/// ```
pub fn is_not_null(input: &dyn Array) -> Result<BooleanArray, ArrowError> {
- let len = input.len();
-
- let output = match input.nulls() {
- None => {
- let len_bytes = ceil(len, 8);
- MutableBuffer::new(len_bytes)
- .with_bitset(len_bytes, true)
- .into()
- }
- Some(nulls) => nulls.inner().sliced(),
- };
-
- let data = unsafe {
- ArrayData::new_unchecked(
- DataType::Boolean,
- len,
- None,
- None,
- 0,
- vec![output],
- vec![],
- )
+ let values = match input.nulls() {
+ None => BooleanBuffer::new_set(input.len()),
+ Some(n) => n.inner().clone(),
};
-
- Ok(BooleanArray::from(data))
+ Ok(BooleanArray::new(values, None))
}
#[cfg(test)]
diff --git a/arrow-array/src/array/boolean_array.rs b/arrow-array/src/array/boolean_array.rs
index e99b71b18..14fa87e13 100644
--- a/arrow-array/src/array/boolean_array.rs
+++ b/arrow-array/src/array/boolean_array.rs
@@ -93,6 +93,14 @@ impl BooleanArray {
Self { values, nulls }
}
+ /// Create a new [`BooleanArray`] with length `len` consisting only of nulls
+ pub fn new_null(len: usize) -> Self {
+ Self {
+ values: BooleanBuffer::new_unset(len),
+ nulls: Some(NullBuffer::new_null(len)),
+ }
+ }
+
/// Returns the length of this array.
pub fn len(&self) -> usize {
self.values.len()
diff --git a/arrow-buffer/src/buffer/boolean.rs b/arrow-buffer/src/buffer/boolean.rs
index 9098926c5..9cc2bc262 100644
--- a/arrow-buffer/src/buffer/boolean.rs
+++ b/arrow-buffer/src/buffer/boolean.rs
@@ -60,6 +60,23 @@ impl BooleanBuffer {
}
}
+ /// Create a new [`BooleanBuffer`] of `length` where all values are `true`
+ pub fn new_set(length: usize) -> Self {
+ let mut builder = BooleanBufferBuilder::new(length);
+ builder.append_n(length, true);
+ builder.finish()
+ }
+
+ /// Create a new [`BooleanBuffer`] of `length` where all values are `false`
+ pub fn new_unset(length: usize) -> Self {
+ let buffer = MutableBuffer::new_null(length).into_buffer();
+ Self {
+ buffer,
+ offset: 0,
+ len: length,
+ }
+ }
+
/// Invokes `f` with indexes `0..len` collecting the boolean results into a new `BooleanBuffer`
pub fn collect_bool<F: FnMut(usize) -> bool>(len: usize, f: F) -> Self {
let buffer = MutableBuffer::collect_bool(len, f);
diff --git a/arrow-buffer/src/buffer/null.rs b/arrow-buffer/src/buffer/null.rs
index 260f5d78d..e0c7d9ef8 100644
--- a/arrow-buffer/src/buffer/null.rs
+++ b/arrow-buffer/src/buffer/null.rs
@@ -41,14 +41,22 @@ impl NullBuffer {
/// Create a new [`NullBuffer`] of length `len` where all values are null
pub fn new_null(len: usize) -> Self {
- let buffer = MutableBuffer::new_null(len).into_buffer();
- let buffer = BooleanBuffer::new(buffer, 0, len);
Self {
- buffer,
+ buffer: BooleanBuffer::new_unset(len),
null_count: len,
}
}
+ /// Create a new [`NullBuffer`] of length `len` where all values are valid
+ ///
+ /// Note: it is more efficient to not set the null buffer if it is known to be all valid
+ pub fn new_valid(len: usize) -> Self {
+ Self {
+ buffer: BooleanBuffer::new_set(len),
+ null_count: 0,
+ }
+ }
+
/// Create a new [`NullBuffer`] with the provided `buffer` and `null_count`
///
/// # Safety