You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by al...@apache.org on 2023/06/23 09:57:41 UTC

[arrow-rs] branch master updated: feat: support `NullBuilder` (#4430)

This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new b163b19d2 feat: support `NullBuilder` (#4430)
b163b19d2 is described below

commit b163b19d213c57170789f32a2011cbadf9ab4120
Author: Igor Izvekov <iz...@gmail.com>
AuthorDate: Fri Jun 23 12:57:36 2023 +0300

    feat: support `NullBuilder` (#4430)
    
    * feat: NullBuilder
    
    * fix: docs
    
    * refactor: use method ArrayData::new_null
---
 arrow-array/src/array/null_array.rs       |   6 +
 arrow-array/src/builder/mod.rs            |   2 +
 arrow-array/src/builder/null_builder.rs   | 184 ++++++++++++++++++++++++++++++
 arrow-array/src/builder/struct_builder.rs |   2 +-
 4 files changed, 193 insertions(+), 1 deletion(-)

diff --git a/arrow-array/src/array/null_array.rs b/arrow-array/src/array/null_array.rs
index 7fdd99a39..c054c8904 100644
--- a/arrow-array/src/array/null_array.rs
+++ b/arrow-array/src/array/null_array.rs
@@ -17,6 +17,7 @@
 
 //! Contains the `NullArray` type.
 
+use crate::builder::NullBuilder;
 use crate::{Array, ArrayRef};
 use arrow_buffer::buffer::NullBuffer;
 use arrow_data::{ArrayData, ArrayDataBuilder};
@@ -62,6 +63,11 @@ impl NullArray {
 
         Self { len }
     }
+
+    /// Returns a new null array builder
+    pub fn builder(capacity: usize) -> NullBuilder {
+        NullBuilder::with_capacity(capacity)
+    }
 }
 
 impl Array for NullArray {
diff --git a/arrow-array/src/builder/mod.rs b/arrow-array/src/builder/mod.rs
index 91df8c27c..1e5e6426b 100644
--- a/arrow-array/src/builder/mod.rs
+++ b/arrow-array/src/builder/mod.rs
@@ -164,6 +164,8 @@ mod generic_list_builder;
 pub use generic_list_builder::*;
 mod map_builder;
 pub use map_builder::*;
+mod null_builder;
+pub use null_builder::*;
 mod primitive_builder;
 pub use primitive_builder::*;
 mod primitive_dictionary_builder;
diff --git a/arrow-array/src/builder/null_builder.rs b/arrow-array/src/builder/null_builder.rs
new file mode 100644
index 000000000..0b4345006
--- /dev/null
+++ b/arrow-array/src/builder/null_builder.rs
@@ -0,0 +1,184 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::builder::ArrayBuilder;
+use crate::{ArrayRef, NullArray};
+use arrow_data::ArrayData;
+use arrow_schema::DataType;
+use std::any::Any;
+use std::sync::Arc;
+
+/// Builder for [`NullArray`]
+///
+/// # Example
+///
+/// Create a `NullArray` from a `NullBuilder`
+///
+/// ```
+///
+/// # use arrow_array::{Array, NullArray, builder::NullBuilder};
+///
+/// let mut b = NullBuilder::new();
+/// b.append_empty_value();
+/// b.append_null();
+/// b.append_nulls(3);
+/// b.append_empty_values(3);
+/// let arr = b.finish();
+///
+/// assert_eq!(8, arr.len());
+/// assert_eq!(8, arr.null_count());
+/// ```
+#[derive(Debug)]
+pub struct NullBuilder {
+    len: usize,
+}
+
+impl Default for NullBuilder {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl NullBuilder {
+    /// Creates a new null builder
+    pub fn new() -> Self {
+        Self { len: 0 }
+    }
+
+    /// Creates a new null builder with space for `capacity` elements without re-allocating
+    pub fn with_capacity(capacity: usize) -> Self {
+        Self { len: capacity }
+    }
+
+    /// Returns the capacity of this builder measured in slots of type `T`
+    pub fn capacity(&self) -> usize {
+        self.len
+    }
+
+    /// Appends a null slot into the builder
+    #[inline]
+    pub fn append_null(&mut self) {
+        self.len += 1;
+    }
+
+    /// Appends `n` `null`s into the builder.
+    #[inline]
+    pub fn append_nulls(&mut self, n: usize) {
+        self.len += n;
+    }
+
+    /// Appends a null slot into the builder
+    #[inline]
+    pub fn append_empty_value(&mut self) {
+        self.append_null();
+    }
+
+    /// Appends `n` `null`s into the builder.
+    #[inline]
+    pub fn append_empty_values(&mut self, n: usize) {
+        self.append_nulls(n);
+    }
+
+    /// Builds the [NullArray] and reset this builder.
+    pub fn finish(&mut self) -> NullArray {
+        let len = self.len();
+        let builder = ArrayData::new_null(&DataType::Null, len).into_builder();
+
+        let array_data = unsafe { builder.build_unchecked() };
+        NullArray::from(array_data)
+    }
+
+    /// Builds the [NullArray] without resetting the builder.
+    pub fn finish_cloned(&self) -> NullArray {
+        let len = self.len();
+        let builder = ArrayData::new_null(&DataType::Null, len).into_builder();
+
+        let array_data = unsafe { builder.build_unchecked() };
+        NullArray::from(array_data)
+    }
+}
+
+impl ArrayBuilder for NullBuilder {
+    /// Returns the builder as a non-mutable `Any` reference.
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    /// Returns the builder as a mutable `Any` reference.
+    fn as_any_mut(&mut self) -> &mut dyn Any {
+        self
+    }
+
+    /// Returns the boxed builder as a box of `Any`.
+    fn into_box_any(self: Box<Self>) -> Box<dyn Any> {
+        self
+    }
+
+    /// Returns the number of array slots in the builder
+    fn len(&self) -> usize {
+        self.len
+    }
+
+    /// Returns whether the number of array slots is zero
+    fn is_empty(&self) -> bool {
+        self.len() == 0
+    }
+
+    /// Builds the array and reset this builder.
+    fn finish(&mut self) -> ArrayRef {
+        Arc::new(self.finish())
+    }
+
+    /// Builds the array without resetting the builder.
+    fn finish_cloned(&self) -> ArrayRef {
+        Arc::new(self.finish_cloned())
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::Array;
+
+    #[test]
+    fn test_null_array_builder() {
+        let mut builder = NullArray::builder(10);
+        builder.append_null();
+        builder.append_nulls(4);
+        builder.append_empty_value();
+        builder.append_empty_values(4);
+
+        let arr = builder.finish();
+        assert_eq!(20, arr.len());
+        assert_eq!(0, arr.offset());
+        assert_eq!(20, arr.null_count());
+    }
+
+    #[test]
+    fn test_null_array_builder_finish_cloned() {
+        let mut builder = NullArray::builder(16);
+        builder.append_null();
+        builder.append_empty_value();
+        builder.append_empty_values(3);
+        let mut array = builder.finish_cloned();
+        assert_eq!(21, array.null_count());
+
+        builder.append_empty_values(5);
+        array = builder.finish();
+        assert_eq!(26, array.null_count());
+    }
+}
diff --git a/arrow-array/src/builder/struct_builder.rs b/arrow-array/src/builder/struct_builder.rs
index 04dc5ba73..88a23db6d 100644
--- a/arrow-array/src/builder/struct_builder.rs
+++ b/arrow-array/src/builder/struct_builder.rs
@@ -97,7 +97,7 @@ impl ArrayBuilder for StructBuilder {
 pub fn make_builder(datatype: &DataType, capacity: usize) -> Box<dyn ArrayBuilder> {
     use crate::builder::*;
     match datatype {
-        DataType::Null => unimplemented!(),
+        DataType::Null => Box::new(NullBuilder::with_capacity(capacity)),
         DataType::Boolean => Box::new(BooleanBuilder::with_capacity(capacity)),
         DataType::Int8 => Box::new(Int8Builder::with_capacity(capacity)),
         DataType::Int16 => Box::new(Int16Builder::with_capacity(capacity)),