You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by tu...@apache.org on 2023/06/12 13:26:56 UTC

[arrow-rs] branch master updated: Further buffer constructors (#4402)

This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 2c71135a6 Further buffer constructors (#4402)
2c71135a6 is described below

commit 2c71135a683844d51779f94cfd8e5fc35a2624e4
Author: Raphael Taylor-Davies <17...@users.noreply.github.com>
AuthorDate: Mon Jun 12 14:26:48 2023 +0100

    Further buffer constructors (#4402)
---
 arrow-arith/src/boolean.rs             | 36 +++++++---------------------------
 arrow-array/src/array/boolean_array.rs |  8 ++++++++
 arrow-buffer/src/buffer/boolean.rs     | 17 ++++++++++++++++
 arrow-buffer/src/buffer/null.rs        | 14 ++++++++++---
 4 files changed, 43 insertions(+), 32 deletions(-)

diff --git a/arrow-arith/src/boolean.rs b/arrow-arith/src/boolean.rs
index 258d683ad..04c9fb229 100644
--- a/arrow-arith/src/boolean.rs
+++ b/arrow-arith/src/boolean.rs
@@ -23,11 +23,9 @@
 //! [here](https://doc.rust-lang.org/stable/core/arch/) for more information.
 
 use arrow_array::*;
-use arrow_buffer::bit_util::ceil;
 use arrow_buffer::buffer::{bitwise_bin_op_helper, bitwise_quaternary_op_helper};
-use arrow_buffer::{BooleanBuffer, MutableBuffer, NullBuffer};
-use arrow_data::ArrayData;
-use arrow_schema::{ArrowError, DataType};
+use arrow_buffer::{BooleanBuffer, NullBuffer};
+use arrow_schema::ArrowError;
 
 /// Logical 'and' boolean values with Kleene logic
 ///
@@ -314,7 +312,7 @@ pub fn not(left: &BooleanArray) -> Result<BooleanArray, ArrowError> {
 /// ```
 pub fn is_null(input: &dyn Array) -> Result<BooleanArray, ArrowError> {
     let values = match input.nulls() {
-        None => NullBuffer::new_null(input.len()).into_inner(),
+        None => BooleanBuffer::new_unset(input.len()),
         Some(nulls) => !nulls.inner(),
     };
 
@@ -333,31 +331,11 @@ pub fn is_null(input: &dyn Array) -> Result<BooleanArray, ArrowError> {
 /// assert_eq!(a_is_not_null, BooleanArray::from(vec![true, true, false]));
 /// ```
 pub fn is_not_null(input: &dyn Array) -> Result<BooleanArray, ArrowError> {
-    let len = input.len();
-
-    let output = match input.nulls() {
-        None => {
-            let len_bytes = ceil(len, 8);
-            MutableBuffer::new(len_bytes)
-                .with_bitset(len_bytes, true)
-                .into()
-        }
-        Some(nulls) => nulls.inner().sliced(),
-    };
-
-    let data = unsafe {
-        ArrayData::new_unchecked(
-            DataType::Boolean,
-            len,
-            None,
-            None,
-            0,
-            vec![output],
-            vec![],
-        )
+    let values = match input.nulls() {
+        None => BooleanBuffer::new_set(input.len()),
+        Some(n) => n.inner().clone(),
     };
-
-    Ok(BooleanArray::from(data))
+    Ok(BooleanArray::new(values, None))
 }
 
 #[cfg(test)]
diff --git a/arrow-array/src/array/boolean_array.rs b/arrow-array/src/array/boolean_array.rs
index e99b71b18..14fa87e13 100644
--- a/arrow-array/src/array/boolean_array.rs
+++ b/arrow-array/src/array/boolean_array.rs
@@ -93,6 +93,14 @@ impl BooleanArray {
         Self { values, nulls }
     }
 
+    /// Create a new [`BooleanArray`] with length `len` consisting only of nulls
+    pub fn new_null(len: usize) -> Self {
+        Self {
+            values: BooleanBuffer::new_unset(len),
+            nulls: Some(NullBuffer::new_null(len)),
+        }
+    }
+
     /// Returns the length of this array.
     pub fn len(&self) -> usize {
         self.values.len()
diff --git a/arrow-buffer/src/buffer/boolean.rs b/arrow-buffer/src/buffer/boolean.rs
index 9098926c5..9cc2bc262 100644
--- a/arrow-buffer/src/buffer/boolean.rs
+++ b/arrow-buffer/src/buffer/boolean.rs
@@ -60,6 +60,23 @@ impl BooleanBuffer {
         }
     }
 
+    /// Create a new [`BooleanBuffer`] of `length` where all values are `true`
+    pub fn new_set(length: usize) -> Self {
+        let mut builder = BooleanBufferBuilder::new(length);
+        builder.append_n(length, true);
+        builder.finish()
+    }
+
+    /// Create a new [`BooleanBuffer`] of `length` where all values are `false`
+    pub fn new_unset(length: usize) -> Self {
+        let buffer = MutableBuffer::new_null(length).into_buffer();
+        Self {
+            buffer,
+            offset: 0,
+            len: length,
+        }
+    }
+
     /// Invokes `f` with indexes `0..len` collecting the boolean results into a new `BooleanBuffer`
     pub fn collect_bool<F: FnMut(usize) -> bool>(len: usize, f: F) -> Self {
         let buffer = MutableBuffer::collect_bool(len, f);
diff --git a/arrow-buffer/src/buffer/null.rs b/arrow-buffer/src/buffer/null.rs
index 260f5d78d..e0c7d9ef8 100644
--- a/arrow-buffer/src/buffer/null.rs
+++ b/arrow-buffer/src/buffer/null.rs
@@ -41,14 +41,22 @@ impl NullBuffer {
 
     /// Create a new [`NullBuffer`] of length `len` where all values are null
     pub fn new_null(len: usize) -> Self {
-        let buffer = MutableBuffer::new_null(len).into_buffer();
-        let buffer = BooleanBuffer::new(buffer, 0, len);
         Self {
-            buffer,
+            buffer: BooleanBuffer::new_unset(len),
             null_count: len,
         }
     }
 
+    /// Create a new [`NullBuffer`] of length `len` where all values are valid
+    ///
+    /// Note: it is more efficient to not set the null buffer if it is known to be all valid
+    pub fn new_valid(len: usize) -> Self {
+        Self {
+            buffer: BooleanBuffer::new_set(len),
+            null_count: 0,
+        }
+    }
+
     /// Create a new [`NullBuffer`] with the provided `buffer` and `null_count`
     ///
     /// # Safety