You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by tu...@apache.org on 2022/06/23 18:39:02 UTC

[arrow-rs] branch master updated: Complete and fixup split of `arrow::array::builder` module (#1843) (#1928)

This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 041e5101f Complete and fixup  split of `arrow::array::builder` module (#1843) (#1928)
041e5101f is described below

commit 041e5101fa64d638ee6a7fb3878c52503838c349
Author: Raphael Taylor-Davies <17...@users.noreply.github.com>
AuthorDate: Thu Jun 23 19:38:56 2022 +0100

    Complete and fixup  split of `arrow::array::builder` module (#1843) (#1928)
    
    * Fix merge conflicts from (#1879)
    
    * Split out of decimal_builder (#1843)
    
    * Fix RAT
    
    * Format
    
    * Restore (#1842)
---
 arrow/src/array/builder/buffer_builder.rs          | 116 ++++++--
 arrow/src/array/builder/decimal_builder.rs         | 318 +++------------------
 .../src/array/builder/fixed_size_binary_builder.rs |  99 +++++++
 arrow/src/array/builder/generic_binary_builder.rs  | 111 +++++++
 arrow/src/array/builder/generic_string_builder.rs  | 123 ++++++++
 arrow/src/array/builder/mod.rs                     |  31 +-
 arrow/src/array/builder/union_builder.rs           | 142 ++++-----
 7 files changed, 512 insertions(+), 428 deletions(-)

diff --git a/arrow/src/array/builder/buffer_builder.rs b/arrow/src/array/builder/buffer_builder.rs
index 83b2afb44..9dd138398 100644
--- a/arrow/src/array/builder/buffer_builder.rs
+++ b/arrow/src/array/builder/buffer_builder.rs
@@ -22,29 +22,6 @@ use crate::datatypes::ArrowNativeType;
 
 use super::PhantomData;
 
-///  Converts a `MutableBuffer` to a `BufferBuilder<T>`.
-///
-/// `slots` is the number of array slots currently represented in the `MutableBuffer`.
-pub(crate) fn mutable_buffer_to_builder<T: ArrowNativeType>(
-    mutable_buffer: MutableBuffer,
-    slots: usize,
-) -> BufferBuilder<T> {
-    BufferBuilder::<T> {
-        buffer: mutable_buffer,
-        len: slots,
-        _marker: PhantomData,
-    }
-}
-
-///  Converts a `BufferBuilder<T>` into its underlying `MutableBuffer`.
-///
-/// `From` is not implemented because associated type bounds are unstable.
-pub(crate) fn builder_to_mutable_buffer<T: ArrowNativeType>(
-    builder: BufferBuilder<T>,
-) -> MutableBuffer {
-    builder.buffer
-}
-
 /// Builder for creating a [`Buffer`](crate::buffer::Buffer) object.
 ///
 /// A [`Buffer`](crate::buffer::Buffer) is the underlying data
@@ -168,8 +145,7 @@ impl<T: ArrowNativeType> BufferBuilder<T> {
     /// ```
     #[inline]
     pub fn advance(&mut self, i: usize) {
-        let new_buffer_len = (self.len + i) * mem::size_of::<T>();
-        self.buffer.resize(new_buffer_len, 0);
+        self.buffer.extend_zeros(i * mem::size_of::<T>());
         self.len += i;
     }
 
@@ -232,6 +208,24 @@ impl<T: ArrowNativeType> BufferBuilder<T> {
         self.len += n;
     }
 
+    /// Appends `n`, zero-initialized values
+    ///
+    /// # Example:
+    ///
+    /// ```
+    /// use arrow::array::UInt32BufferBuilder;
+    ///
+    /// let mut builder = UInt32BufferBuilder::new(10);
+    /// builder.append_n_zeroed(3);
+    ///
+    /// assert_eq!(builder.len(), 3);
+    /// assert_eq!(builder.as_slice(), &[0, 0, 0])
+    #[inline]
+    pub fn append_n_zeroed(&mut self, n: usize) {
+        self.buffer.extend_zeros(n * mem::size_of::<T>());
+        self.len += n;
+    }
+
     /// Appends a slice of type `T`, growing the internal buffer as needed.
     ///
     /// # Example:
@@ -250,6 +244,78 @@ impl<T: ArrowNativeType> BufferBuilder<T> {
         self.len += slice.len();
     }
 
+    /// View the contents of this buffer as a slice
+    ///
+    /// ```
+    /// use arrow::array::Float64BufferBuilder;
+    ///
+    /// let mut builder = Float64BufferBuilder::new(10);
+    /// builder.append(1.3);
+    /// builder.append_n(2, 2.3);
+    ///
+    /// assert_eq!(builder.as_slice(), &[1.3, 2.3, 2.3]);
+    /// ```
+    #[inline]
+    pub fn as_slice(&self) -> &[T] {
+        // SAFETY
+        //
+        // - MutableBuffer is aligned and initialized for len elements of T
+        // - MutableBuffer corresponds to a single allocation
+        // - MutableBuffer does not support modification whilst active immutable borrows
+        unsafe { std::slice::from_raw_parts(self.buffer.as_ptr() as _, self.len) }
+    }
+
+    /// View the contents of this buffer as a mutable slice
+    ///
+    /// # Example:
+    ///
+    /// ```
+    /// use arrow::array::Float32BufferBuilder;
+    ///
+    /// let mut builder = Float32BufferBuilder::new(10);
+    ///
+    /// builder.append_slice(&[1., 2., 3.4]);
+    /// assert_eq!(builder.as_slice(), &[1., 2., 3.4]);
+    ///
+    /// builder.as_slice_mut()[1] = 4.2;
+    /// assert_eq!(builder.as_slice(), &[1., 4.2, 3.4]);
+    /// ```
+    #[inline]
+    pub fn as_slice_mut(&mut self) -> &mut [T] {
+        // SAFETY
+        //
+        // - MutableBuffer is aligned and initialized for len elements of T
+        // - MutableBuffer corresponds to a single allocation
+        // - MutableBuffer does not support modification whilst active immutable borrows
+        unsafe { std::slice::from_raw_parts_mut(self.buffer.as_mut_ptr() as _, self.len) }
+    }
+
+    /// Shorten this BufferBuilder to `len` items
+    ///
+    /// If `len` is greater than the builder's current length, this has no effect
+    ///
+    /// # Example:
+    ///
+    /// ```
+    /// use arrow::array::UInt16BufferBuilder;
+    ///
+    /// let mut builder = UInt16BufferBuilder::new(10);
+    ///
+    /// builder.append_slice(&[42, 44, 46]);
+    /// assert_eq!(builder.as_slice(), &[42, 44, 46]);
+    ///
+    /// builder.truncate(2);
+    /// assert_eq!(builder.as_slice(), &[42, 44]);
+    ///
+    /// builder.append(12);
+    /// assert_eq!(builder.as_slice(), &[42, 44, 12]);
+    /// ```
+    #[inline]
+    pub fn truncate(&mut self, len: usize) {
+        self.buffer.truncate(len * mem::size_of::<T>());
+        self.len = len;
+    }
+
     /// # Safety
     /// This requires the iterator be a trusted length. This could instead require
     /// the iterator implement `TrustedLen` once that is stabilized.
diff --git a/arrow/src/array/builder/decimal_builder.rs b/arrow/src/array/builder/decimal_builder.rs
index a7925358b..e7e9ec6a5 100644
--- a/arrow/src/array/builder/decimal_builder.rs
+++ b/arrow/src/array/builder/decimal_builder.rs
@@ -18,19 +18,13 @@
 use std::any::Any;
 use std::sync::Arc;
 
-use crate::array::ArrayBuilder;
 use crate::array::ArrayRef;
 use crate::array::DecimalArray;
-use crate::array::FixedSizeBinaryArray;
-use crate::array::OffsetSizeTrait;
 use crate::array::UInt8Builder;
-use crate::array::{GenericBinaryArray, GenericStringArray};
+use crate::array::{ArrayBuilder, FixedSizeListBuilder};
 
 use crate::error::{ArrowError, Result};
 
-use super::{FixedSizeBinaryBuilder, FixedSizeListBuilder};
-use super::{GenericBinaryBuilder, GenericListBuilder, GenericStringBuilder};
-
 use crate::datatypes::validate_decimal_precision;
 
 /// Array Builder for [`DecimalArray`]
@@ -48,284 +42,6 @@ pub struct DecimalBuilder {
     value_validation: bool,
 }
 
-impl<OffsetSize: OffsetSizeTrait> ArrayBuilder for GenericBinaryBuilder<OffsetSize> {
-    /// Returns the builder as a non-mutable `Any` reference.
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    /// Returns the builder as a mutable `Any` reference.
-    fn as_any_mut(&mut self) -> &mut dyn Any {
-        self
-    }
-
-    /// Returns the boxed builder as a box of `Any`.
-    fn into_box_any(self: Box<Self>) -> Box<dyn Any> {
-        self
-    }
-
-    /// Returns the number of array slots in the builder
-    fn len(&self) -> usize {
-        self.builder.len()
-    }
-
-    /// Returns whether the number of array slots is zero
-    fn is_empty(&self) -> bool {
-        self.builder.is_empty()
-    }
-
-    /// Builds the array and reset this builder.
-    fn finish(&mut self) -> ArrayRef {
-        Arc::new(self.finish())
-    }
-}
-
-impl<OffsetSize: OffsetSizeTrait> ArrayBuilder for GenericStringBuilder<OffsetSize> {
-    /// Returns the builder as a non-mutable `Any` reference.
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    /// Returns the builder as a mutable `Any` reference.
-    fn as_any_mut(&mut self) -> &mut dyn Any {
-        self
-    }
-
-    /// Returns the boxed builder as a box of `Any`.
-    fn into_box_any(self: Box<Self>) -> Box<dyn Any> {
-        self
-    }
-
-    /// Returns the number of array slots in the builder
-    fn len(&self) -> usize {
-        self.builder.len()
-    }
-
-    /// Returns whether the number of array slots is zero
-    fn is_empty(&self) -> bool {
-        self.builder.is_empty()
-    }
-
-    /// Builds the array and reset this builder.
-    fn finish(&mut self) -> ArrayRef {
-        let a = GenericStringBuilder::<OffsetSize>::finish(self);
-        Arc::new(a)
-    }
-}
-
-impl ArrayBuilder for FixedSizeBinaryBuilder {
-    /// Returns the builder as a non-mutable `Any` reference.
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    /// Returns the builder as a mutable `Any` reference.
-    fn as_any_mut(&mut self) -> &mut dyn Any {
-        self
-    }
-
-    /// Returns the boxed builder as a box of `Any`.
-    fn into_box_any(self: Box<Self>) -> Box<dyn Any> {
-        self
-    }
-
-    /// Returns the number of array slots in the builder
-    fn len(&self) -> usize {
-        self.builder.len()
-    }
-
-    /// Returns whether the number of array slots is zero
-    fn is_empty(&self) -> bool {
-        self.builder.is_empty()
-    }
-
-    /// Builds the array and reset this builder.
-    fn finish(&mut self) -> ArrayRef {
-        Arc::new(self.finish())
-    }
-}
-
-impl ArrayBuilder for DecimalBuilder {
-    /// Returns the builder as a non-mutable `Any` reference.
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    /// Returns the builder as a mutable `Any` reference.
-    fn as_any_mut(&mut self) -> &mut dyn Any {
-        self
-    }
-
-    /// Returns the boxed builder as a box of `Any`.
-    fn into_box_any(self: Box<Self>) -> Box<dyn Any> {
-        self
-    }
-
-    /// Returns the number of array slots in the builder
-    fn len(&self) -> usize {
-        self.builder.len()
-    }
-
-    /// Returns whether the number of array slots is zero
-    fn is_empty(&self) -> bool {
-        self.builder.is_empty()
-    }
-
-    /// Builds the array and reset this builder.
-    fn finish(&mut self) -> ArrayRef {
-        Arc::new(self.finish())
-    }
-}
-
-impl<OffsetSize: OffsetSizeTrait> GenericBinaryBuilder<OffsetSize> {
-    /// Creates a new `GenericBinaryBuilder`, `capacity` is the number of bytes in the values
-    /// array
-    pub fn new(capacity: usize) -> Self {
-        let values_builder = UInt8Builder::new(capacity);
-        Self {
-            builder: GenericListBuilder::new(values_builder),
-        }
-    }
-
-    /// Appends a single byte value into the builder's values array.
-    ///
-    /// Note, when appending individual byte values you must call `append` to delimit each
-    /// distinct list value.
-    #[inline]
-    pub fn append_byte(&mut self, value: u8) -> Result<()> {
-        self.builder.values().append_value(value)?;
-        Ok(())
-    }
-
-    /// Appends a byte slice into the builder.
-    ///
-    /// Automatically calls the `append` method to delimit the slice appended in as a
-    /// distinct array element.
-    #[inline]
-    pub fn append_value(&mut self, value: impl AsRef<[u8]>) -> Result<()> {
-        self.builder.values().append_slice(value.as_ref())?;
-        self.builder.append(true)?;
-        Ok(())
-    }
-
-    /// Finish the current variable-length list array slot.
-    #[inline]
-    pub fn append(&mut self, is_valid: bool) -> Result<()> {
-        self.builder.append(is_valid)
-    }
-
-    /// Append a null value to the array.
-    #[inline]
-    pub fn append_null(&mut self) -> Result<()> {
-        self.append(false)
-    }
-
-    /// Builds the `BinaryArray` and reset this builder.
-    pub fn finish(&mut self) -> GenericBinaryArray<OffsetSize> {
-        GenericBinaryArray::<OffsetSize>::from(self.builder.finish())
-    }
-}
-
-impl<OffsetSize: OffsetSizeTrait> GenericStringBuilder<OffsetSize> {
-    /// Creates a new `StringBuilder`,
-    /// `capacity` is the number of bytes of string data to pre-allocate space for in this builder
-    pub fn new(capacity: usize) -> Self {
-        let values_builder = UInt8Builder::new(capacity);
-        Self {
-            builder: GenericListBuilder::new(values_builder),
-        }
-    }
-
-    /// Creates a new `StringBuilder`,
-    /// `data_capacity` is the number of bytes of string data to pre-allocate space for in this builder
-    /// `item_capacity` is the number of items to pre-allocate space for in this builder
-    pub fn with_capacity(item_capacity: usize, data_capacity: usize) -> Self {
-        let values_builder = UInt8Builder::new(data_capacity);
-        Self {
-            builder: GenericListBuilder::with_capacity(values_builder, item_capacity),
-        }
-    }
-
-    /// Appends a string into the builder.
-    ///
-    /// Automatically calls the `append` method to delimit the string appended in as a
-    /// distinct array element.
-    #[inline]
-    pub fn append_value(&mut self, value: impl AsRef<str>) -> Result<()> {
-        self.builder
-            .values()
-            .append_slice(value.as_ref().as_bytes())?;
-        self.builder.append(true)?;
-        Ok(())
-    }
-
-    /// Finish the current variable-length list array slot.
-    #[inline]
-    pub fn append(&mut self, is_valid: bool) -> Result<()> {
-        self.builder.append(is_valid)
-    }
-
-    /// Append a null value to the array.
-    #[inline]
-    pub fn append_null(&mut self) -> Result<()> {
-        self.append(false)
-    }
-
-    /// Append an `Option` value to the array.
-    #[inline]
-    pub fn append_option(&mut self, value: Option<impl AsRef<str>>) -> Result<()> {
-        match value {
-            None => self.append_null()?,
-            Some(v) => self.append_value(v)?,
-        };
-        Ok(())
-    }
-
-    /// Builds the `StringArray` and reset this builder.
-    pub fn finish(&mut self) -> GenericStringArray<OffsetSize> {
-        GenericStringArray::<OffsetSize>::from(self.builder.finish())
-    }
-}
-
-impl FixedSizeBinaryBuilder {
-    /// Creates a new `BinaryBuilder`, `capacity` is the number of bytes in the values
-    /// array
-    pub fn new(capacity: usize, byte_width: i32) -> Self {
-        let values_builder = UInt8Builder::new(capacity);
-        Self {
-            builder: FixedSizeListBuilder::new(values_builder, byte_width),
-        }
-    }
-
-    /// Appends a byte slice into the builder.
-    ///
-    /// Automatically calls the `append` method to delimit the slice appended in as a
-    /// distinct array element.
-    #[inline]
-    pub fn append_value(&mut self, value: impl AsRef<[u8]>) -> Result<()> {
-        if self.builder.value_length() != value.as_ref().len() as i32 {
-            return Err(ArrowError::InvalidArgumentError(
-                "Byte slice does not have the same length as FixedSizeBinaryBuilder value lengths".to_string()
-            ));
-        }
-        self.builder.values().append_slice(value.as_ref())?;
-        self.builder.append(true)
-    }
-
-    /// Append a null value to the array.
-    #[inline]
-    pub fn append_null(&mut self) -> Result<()> {
-        let length: usize = self.builder.value_length() as usize;
-        self.builder.values().append_slice(&vec![0u8; length][..])?;
-        self.builder.append(false)
-    }
-
-    /// Builds the `FixedSizeBinaryArray` and reset this builder.
-    pub fn finish(&mut self) -> FixedSizeBinaryArray {
-        FixedSizeBinaryArray::from(self.builder.finish())
-    }
-}
-
 impl DecimalBuilder {
     /// Creates a new `BinaryBuilder`, `capacity` is the number of bytes in the values
     /// array
@@ -406,6 +122,38 @@ impl DecimalBuilder {
     }
 }
 
+impl ArrayBuilder for DecimalBuilder {
+    /// Returns the builder as a non-mutable `Any` reference.
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    /// Returns the builder as a mutable `Any` reference.
+    fn as_any_mut(&mut self) -> &mut dyn Any {
+        self
+    }
+
+    /// Returns the boxed builder as a box of `Any`.
+    fn into_box_any(self: Box<Self>) -> Box<dyn Any> {
+        self
+    }
+
+    /// Returns the number of array slots in the builder
+    fn len(&self) -> usize {
+        self.builder.len()
+    }
+
+    /// Returns whether the number of array slots is zero
+    fn is_empty(&self) -> bool {
+        self.builder.is_empty()
+    }
+
+    /// Builds the array and reset this builder.
+    fn finish(&mut self) -> ArrayRef {
+        Arc::new(self.finish())
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
diff --git a/arrow/src/array/builder/fixed_size_binary_builder.rs b/arrow/src/array/builder/fixed_size_binary_builder.rs
new file mode 100644
index 000000000..1d40b4c5b
--- /dev/null
+++ b/arrow/src/array/builder/fixed_size_binary_builder.rs
@@ -0,0 +1,99 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::array::{
+    ArrayBuilder, ArrayRef, FixedSizeBinaryArray, FixedSizeListBuilder, UInt8Builder,
+};
+use crate::error::{ArrowError, Result};
+use std::any::Any;
+use std::sync::Arc;
+
+#[derive(Debug)]
+pub struct FixedSizeBinaryBuilder {
+    builder: FixedSizeListBuilder<UInt8Builder>,
+}
+
+impl FixedSizeBinaryBuilder {
+    /// Creates a new `BinaryBuilder`, `capacity` is the number of bytes in the values
+    /// array
+    pub fn new(capacity: usize, byte_width: i32) -> Self {
+        let values_builder = UInt8Builder::new(capacity);
+        Self {
+            builder: FixedSizeListBuilder::new(values_builder, byte_width),
+        }
+    }
+
+    /// Appends a byte slice into the builder.
+    ///
+    /// Automatically calls the `append` method to delimit the slice appended in as a
+    /// distinct array element.
+    #[inline]
+    pub fn append_value(&mut self, value: impl AsRef<[u8]>) -> Result<()> {
+        if self.builder.value_length() != value.as_ref().len() as i32 {
+            return Err(ArrowError::InvalidArgumentError(
+                "Byte slice does not have the same length as FixedSizeBinaryBuilder value lengths".to_string()
+            ));
+        }
+        self.builder.values().append_slice(value.as_ref())?;
+        self.builder.append(true)
+    }
+
+    /// Append a null value to the array.
+    #[inline]
+    pub fn append_null(&mut self) -> Result<()> {
+        let length: usize = self.builder.value_length() as usize;
+        self.builder.values().append_slice(&vec![0u8; length][..])?;
+        self.builder.append(false)
+    }
+
+    /// Builds the `FixedSizeBinaryArray` and reset this builder.
+    pub fn finish(&mut self) -> FixedSizeBinaryArray {
+        FixedSizeBinaryArray::from(self.builder.finish())
+    }
+}
+
+impl ArrayBuilder for FixedSizeBinaryBuilder {
+    /// Returns the builder as a non-mutable `Any` reference.
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    /// Returns the builder as a mutable `Any` reference.
+    fn as_any_mut(&mut self) -> &mut dyn Any {
+        self
+    }
+
+    /// Returns the boxed builder as a box of `Any`.
+    fn into_box_any(self: Box<Self>) -> Box<dyn Any> {
+        self
+    }
+
+    /// Returns the number of array slots in the builder
+    fn len(&self) -> usize {
+        self.builder.len()
+    }
+
+    /// Returns whether the number of array slots is zero
+    fn is_empty(&self) -> bool {
+        self.builder.is_empty()
+    }
+
+    /// Builds the array and reset this builder.
+    fn finish(&mut self) -> ArrayRef {
+        Arc::new(self.finish())
+    }
+}
diff --git a/arrow/src/array/builder/generic_binary_builder.rs b/arrow/src/array/builder/generic_binary_builder.rs
new file mode 100644
index 000000000..fc64eb0a2
--- /dev/null
+++ b/arrow/src/array/builder/generic_binary_builder.rs
@@ -0,0 +1,111 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::array::{
+    ArrayBuilder, ArrayRef, GenericBinaryArray, GenericListBuilder, OffsetSizeTrait,
+    UInt8Builder,
+};
+use crate::error::Result;
+use std::any::Any;
+use std::sync::Arc;
+
+///  Array builder for `BinaryArray`
+#[derive(Debug)]
+pub struct GenericBinaryBuilder<OffsetSize: OffsetSizeTrait> {
+    builder: GenericListBuilder<OffsetSize, UInt8Builder>,
+}
+
+impl<OffsetSize: OffsetSizeTrait> GenericBinaryBuilder<OffsetSize> {
+    /// Creates a new `GenericBinaryBuilder`, `capacity` is the number of bytes in the values
+    /// array
+    pub fn new(capacity: usize) -> Self {
+        let values_builder = UInt8Builder::new(capacity);
+        Self {
+            builder: GenericListBuilder::new(values_builder),
+        }
+    }
+
+    /// Appends a single byte value into the builder's values array.
+    ///
+    /// Note, when appending individual byte values you must call `append` to delimit each
+    /// distinct list value.
+    #[inline]
+    pub fn append_byte(&mut self, value: u8) -> Result<()> {
+        self.builder.values().append_value(value)?;
+        Ok(())
+    }
+
+    /// Appends a byte slice into the builder.
+    ///
+    /// Automatically calls the `append` method to delimit the slice appended in as a
+    /// distinct array element.
+    #[inline]
+    pub fn append_value(&mut self, value: impl AsRef<[u8]>) -> Result<()> {
+        self.builder.values().append_slice(value.as_ref())?;
+        self.builder.append(true)?;
+        Ok(())
+    }
+
+    /// Finish the current variable-length list array slot.
+    #[inline]
+    pub fn append(&mut self, is_valid: bool) -> Result<()> {
+        self.builder.append(is_valid)
+    }
+
+    /// Append a null value to the array.
+    #[inline]
+    pub fn append_null(&mut self) -> Result<()> {
+        self.append(false)
+    }
+
+    /// Builds the `BinaryArray` and reset this builder.
+    pub fn finish(&mut self) -> GenericBinaryArray<OffsetSize> {
+        GenericBinaryArray::<OffsetSize>::from(self.builder.finish())
+    }
+}
+
+impl<OffsetSize: OffsetSizeTrait> ArrayBuilder for GenericBinaryBuilder<OffsetSize> {
+    /// Returns the builder as a non-mutable `Any` reference.
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    /// Returns the builder as a mutable `Any` reference.
+    fn as_any_mut(&mut self) -> &mut dyn Any {
+        self
+    }
+
+    /// Returns the boxed builder as a box of `Any`.
+    fn into_box_any(self: Box<Self>) -> Box<dyn Any> {
+        self
+    }
+
+    /// Returns the number of array slots in the builder
+    fn len(&self) -> usize {
+        self.builder.len()
+    }
+
+    /// Returns whether the number of array slots is zero
+    fn is_empty(&self) -> bool {
+        self.builder.is_empty()
+    }
+
+    /// Builds the array and reset this builder.
+    fn finish(&mut self) -> ArrayRef {
+        Arc::new(self.finish())
+    }
+}
diff --git a/arrow/src/array/builder/generic_string_builder.rs b/arrow/src/array/builder/generic_string_builder.rs
new file mode 100644
index 000000000..ee391c4d4
--- /dev/null
+++ b/arrow/src/array/builder/generic_string_builder.rs
@@ -0,0 +1,123 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::array::{
+    ArrayBuilder, ArrayRef, GenericListBuilder, GenericStringArray, OffsetSizeTrait,
+    UInt8Builder,
+};
+use crate::error::Result;
+use std::any::Any;
+use std::sync::Arc;
+
+#[derive(Debug)]
+pub struct GenericStringBuilder<OffsetSize: OffsetSizeTrait> {
+    builder: GenericListBuilder<OffsetSize, UInt8Builder>,
+}
+
+impl<OffsetSize: OffsetSizeTrait> GenericStringBuilder<OffsetSize> {
+    /// Creates a new `StringBuilder`,
+    /// `capacity` is the number of bytes of string data to pre-allocate space for in this builder
+    pub fn new(capacity: usize) -> Self {
+        let values_builder = UInt8Builder::new(capacity);
+        Self {
+            builder: GenericListBuilder::new(values_builder),
+        }
+    }
+
+    /// Creates a new `StringBuilder`,
+    /// `data_capacity` is the number of bytes of string data to pre-allocate space for in this builder
+    /// `item_capacity` is the number of items to pre-allocate space for in this builder
+    pub fn with_capacity(item_capacity: usize, data_capacity: usize) -> Self {
+        let values_builder = UInt8Builder::new(data_capacity);
+        Self {
+            builder: GenericListBuilder::with_capacity(values_builder, item_capacity),
+        }
+    }
+
+    /// Appends a string into the builder.
+    ///
+    /// Automatically calls the `append` method to delimit the string appended in as a
+    /// distinct array element.
+    #[inline]
+    pub fn append_value(&mut self, value: impl AsRef<str>) -> Result<()> {
+        self.builder
+            .values()
+            .append_slice(value.as_ref().as_bytes())?;
+        self.builder.append(true)?;
+        Ok(())
+    }
+
+    /// Finish the current variable-length list array slot.
+    #[inline]
+    pub fn append(&mut self, is_valid: bool) -> Result<()> {
+        self.builder.append(is_valid)
+    }
+
+    /// Append a null value to the array.
+    #[inline]
+    pub fn append_null(&mut self) -> Result<()> {
+        self.append(false)
+    }
+
+    /// Append an `Option` value to the array.
+    #[inline]
+    pub fn append_option(&mut self, value: Option<impl AsRef<str>>) -> Result<()> {
+        match value {
+            None => self.append_null()?,
+            Some(v) => self.append_value(v)?,
+        };
+        Ok(())
+    }
+
+    /// Builds the `StringArray` and reset this builder.
+    pub fn finish(&mut self) -> GenericStringArray<OffsetSize> {
+        GenericStringArray::<OffsetSize>::from(self.builder.finish())
+    }
+}
+
+impl<OffsetSize: OffsetSizeTrait> ArrayBuilder for GenericStringBuilder<OffsetSize> {
+    /// Returns the builder as a non-mutable `Any` reference.
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    /// Returns the builder as a mutable `Any` reference.
+    fn as_any_mut(&mut self) -> &mut dyn Any {
+        self
+    }
+
+    /// Returns the boxed builder as a box of `Any`.
+    fn into_box_any(self: Box<Self>) -> Box<dyn Any> {
+        self
+    }
+
+    /// Returns the number of array slots in the builder
+    fn len(&self) -> usize {
+        self.builder.len()
+    }
+
+    /// Returns whether the number of array slots is zero
+    fn is_empty(&self) -> bool {
+        self.builder.is_empty()
+    }
+
+    /// Builds the array and reset this builder.
+    fn finish(&mut self) -> ArrayRef {
+        let a = GenericStringBuilder::<OffsetSize>::finish(self);
+        Arc::new(a)
+    }
+}
diff --git a/arrow/src/array/builder/mod.rs b/arrow/src/array/builder/mod.rs
index 4cd82d9bf..634ef772f 100644
--- a/arrow/src/array/builder/mod.rs
+++ b/arrow/src/array/builder/mod.rs
@@ -24,8 +24,11 @@ mod boolean_buffer_builder;
 mod boolean_builder;
 mod buffer_builder;
 mod decimal_builder;
+mod fixed_size_binary_builder;
 mod fixed_size_list_builder;
+mod generic_binary_builder;
 mod generic_list_builder;
+mod generic_string_builder;
 mod map_builder;
 mod primitive_builder;
 mod primitive_dictionary_builder;
@@ -38,24 +41,23 @@ use std::marker::PhantomData;
 use std::ops::Range;
 
 use super::ArrayRef;
-use super::OffsetSizeTrait;
-use super::UInt8Builder;
 
 pub use boolean_buffer_builder::BooleanBufferBuilder;
 pub use boolean_builder::BooleanBuilder;
 pub use buffer_builder::BufferBuilder;
 pub use decimal_builder::DecimalBuilder;
+pub use fixed_size_binary_builder::FixedSizeBinaryBuilder;
 pub use fixed_size_list_builder::FixedSizeListBuilder;
+pub use generic_binary_builder::GenericBinaryBuilder;
 pub use generic_list_builder::GenericListBuilder;
+pub use generic_string_builder::GenericStringBuilder;
 pub use map_builder::MapBuilder;
 pub use primitive_builder::PrimitiveBuilder;
 pub use primitive_dictionary_builder::PrimitiveDictionaryBuilder;
 pub use string_dictionary_builder::StringDictionaryBuilder;
-pub use struct_builder::StructBuilder;
+pub use struct_builder::{make_builder, StructBuilder};
 pub use union_builder::UnionBuilder;
 
-pub use struct_builder::make_builder;
-
 /// Trait for dealing with different array builders at runtime
 ///
 /// # Example
@@ -139,27 +141,8 @@ pub trait ArrayBuilder: Any + Send {
 pub type ListBuilder<T> = GenericListBuilder<i32, T>;
 pub type LargeListBuilder<T> = GenericListBuilder<i64, T>;
 
-///  Array builder for `BinaryArray`
-#[derive(Debug)]
-pub struct GenericBinaryBuilder<OffsetSize: OffsetSizeTrait> {
-    builder: GenericListBuilder<OffsetSize, UInt8Builder>,
-}
-
 pub type BinaryBuilder = GenericBinaryBuilder<i32>;
 pub type LargeBinaryBuilder = GenericBinaryBuilder<i64>;
 
-#[derive(Debug)]
-pub struct GenericStringBuilder<OffsetSize: OffsetSizeTrait> {
-    builder: GenericListBuilder<OffsetSize, UInt8Builder>,
-}
-
 pub type StringBuilder = GenericStringBuilder<i32>;
 pub type LargeStringBuilder = GenericStringBuilder<i64>;
-
-#[derive(Debug)]
-pub struct FixedSizeBinaryBuilder {
-    builder: FixedSizeListBuilder<UInt8Builder>,
-}
-
-#[cfg(test)]
-mod tests {}
diff --git a/arrow/src/array/builder/union_builder.rs b/arrow/src/array/builder/union_builder.rs
index 78f9a3f4b..95d9ea40a 100644
--- a/arrow/src/array/builder/union_builder.rs
+++ b/arrow/src/array/builder/union_builder.rs
@@ -15,28 +15,22 @@
 // specific language governing permissions and limitations
 // under the License.
 
+use std::any::Any;
 use std::collections::HashMap;
 
 use crate::array::ArrayDataBuilder;
 use crate::array::Int32BufferBuilder;
 use crate::array::Int8BufferBuilder;
 use crate::array::UnionArray;
-use crate::buffer::MutableBuffer;
+use crate::buffer::Buffer;
 
-use crate::datatypes::ArrowPrimitiveType;
 use crate::datatypes::DataType;
 use crate::datatypes::Field;
-use crate::datatypes::IntervalMonthDayNanoType;
-use crate::datatypes::IntervalUnit;
-use crate::datatypes::{Float32Type, Float64Type};
-use crate::datatypes::{Int16Type, Int32Type, Int64Type, Int8Type};
-use crate::datatypes::{UInt16Type, UInt32Type, UInt64Type, UInt8Type};
+use crate::datatypes::{ArrowNativeType, ArrowPrimitiveType};
 use crate::error::{ArrowError, Result};
 
 use super::{BooleanBufferBuilder, BufferBuilder};
 
-use super::buffer_builder::builder_to_mutable_buffer;
-use super::buffer_builder::mutable_buffer_to_builder;
 use crate::array::make_array;
 
 /// `FieldData` is a helper struct to track the state of the fields in the `UnionBuilder`.
@@ -47,101 +41,65 @@ struct FieldData {
     /// The Arrow data type represented in the `values_buffer`, which is untyped
     data_type: DataType,
     /// A buffer containing the values for this field in raw bytes
-    values_buffer: Option<MutableBuffer>,
+    values_buffer: Box<dyn FieldDataValues>,
     ///  The number of array slots represented by the buffer
     slots: usize,
     /// A builder for the null bitmap
     bitmap_builder: BooleanBufferBuilder,
 }
 
+/// A type-erased [`BufferBuilder`] used by [`FieldData`]
+trait FieldDataValues: std::fmt::Debug {
+    fn as_mut_any(&mut self) -> &mut dyn Any;
+
+    fn append_null(&mut self);
+
+    fn finish(&mut self) -> Buffer;
+}
+
+impl<T: ArrowNativeType> FieldDataValues for BufferBuilder<T> {
+    fn as_mut_any(&mut self) -> &mut dyn Any {
+        self
+    }
+
+    fn append_null(&mut self) {
+        self.advance(1)
+    }
+
+    fn finish(&mut self) -> Buffer {
+        self.finish()
+    }
+}
+
 impl FieldData {
     /// Creates a new `FieldData`.
-    fn new(type_id: i8, data_type: DataType) -> Self {
+    fn new<T: ArrowPrimitiveType>(type_id: i8, data_type: DataType) -> Self {
         Self {
             type_id,
             data_type,
-            values_buffer: Some(MutableBuffer::new(1)),
             slots: 0,
+            values_buffer: Box::new(BufferBuilder::<T::Native>::new(1)),
             bitmap_builder: BooleanBufferBuilder::new(1),
         }
     }
 
     /// Appends a single value to this `FieldData`'s `values_buffer`.
-    #[allow(clippy::unnecessary_wraps)]
-    fn append_to_values_buffer<T: ArrowPrimitiveType>(
-        &mut self,
-        v: T::Native,
-    ) -> Result<()> {
-        let values_buffer = self
-            .values_buffer
-            .take()
-            .expect("Values buffer was never created");
-        let mut builder: BufferBuilder<T::Native> =
-            mutable_buffer_to_builder(values_buffer, self.slots);
-        builder.append(v);
-        let mutable_buffer = builder_to_mutable_buffer(builder);
-        self.values_buffer = Some(mutable_buffer);
+    fn append_value<T: ArrowPrimitiveType>(&mut self, v: T::Native) {
+        self.values_buffer
+            .as_mut_any()
+            .downcast_mut::<BufferBuilder<T::Native>>()
+            .expect("Tried to append unexpected type")
+            .append(v);
 
-        self.slots += 1;
         self.bitmap_builder.append(true);
-        Ok(())
+        self.slots += 1;
     }
 
     /// Appends a null to this `FieldData`.
-    #[allow(clippy::unnecessary_wraps)]
-    fn append_null<T: ArrowPrimitiveType>(&mut self) -> Result<()> {
-        let values_buffer = self
-            .values_buffer
-            .take()
-            .expect("Values buffer was never created");
-
-        let mut builder: BufferBuilder<T::Native> =
-            mutable_buffer_to_builder(values_buffer, self.slots);
-
-        builder.advance(1);
-        let mutable_buffer = builder_to_mutable_buffer(builder);
-        self.values_buffer = Some(mutable_buffer);
-        self.slots += 1;
+    fn append_null(&mut self) {
+        self.values_buffer.append_null();
         self.bitmap_builder.append(false);
-        Ok(())
-    }
-
-    /// Appends a null to this `FieldData` when the type is not known at compile time.
-    ///
-    /// As the main `append` method of `UnionBuilder` is generic, we need a way to append null
-    /// slots to the fields that are not being appended to in the case of sparse unions.  This
-    /// method solves this problem by appending dynamically based on `DataType`.
-    ///
-    /// Note, this method does **not** update the length of the `UnionArray` (this is done by the
-    /// main append operation) and assumes that it is called from a method that is generic over `T`
-    /// where `T` satisfies the bound `ArrowPrimitiveType`.
-    fn append_null_dynamic(&mut self) -> Result<()> {
-        match self.data_type {
-            DataType::Null => unimplemented!(),
-            DataType::Int8 => self.append_null::<Int8Type>()?,
-            DataType::Int16 => self.append_null::<Int16Type>()?,
-            DataType::Int32
-            | DataType::Date32
-            | DataType::Time32(_)
-            | DataType::Interval(IntervalUnit::YearMonth) => {
-                self.append_null::<Int32Type>()?
-            }
-            DataType::Int64
-            | DataType::Timestamp(_, _)
-            | DataType::Date64
-            | DataType::Time64(_)
-            | DataType::Interval(IntervalUnit::DayTime)
-            | DataType::Duration(_) => self.append_null::<Int64Type>()?,
-            DataType::Interval(IntervalUnit::MonthDayNano) => self.append_null::<IntervalMonthDayNanoType>()?,
-            DataType::UInt8 => self.append_null::<UInt8Type>()?,
-            DataType::UInt16 => self.append_null::<UInt16Type>()?,
-            DataType::UInt32 => self.append_null::<UInt32Type>()?,
-            DataType::UInt64 => self.append_null::<UInt64Type>()?,
-            DataType::Float32 => self.append_null::<Float32Type>()?,
-            DataType::Float64 => self.append_null::<Float64Type>()?,
-            _ => unreachable!("All cases of types that satisfy the trait bounds over T are covered above."),
-        };
-        Ok(())
+        self.slots += 1;
     }
 }
 
@@ -257,11 +215,12 @@ impl UnionBuilder {
                 data
             }
             None => match self.value_offset_builder {
-                Some(_) => FieldData::new(self.fields.len() as i8, T::DATA_TYPE),
+                Some(_) => FieldData::new::<T>(self.fields.len() as i8, T::DATA_TYPE),
                 None => {
-                    let mut fd = FieldData::new(self.fields.len() as i8, T::DATA_TYPE);
+                    let mut fd =
+                        FieldData::new::<T>(self.fields.len() as i8, T::DATA_TYPE);
                     for _ in 0..self.len {
-                        fd.append_null::<T>()?;
+                        fd.append_null();
                     }
                     fd
                 }
@@ -278,14 +237,14 @@ impl UnionBuilder {
             None => {
                 for (_, fd) in self.fields.iter_mut() {
                     // Append to all bar the FieldData currently being appended to
-                    fd.append_null_dynamic()?;
+                    fd.append_null();
                 }
             }
         }
 
         match v {
-            Some(v) => field_data.append_to_values_buffer::<T>(v)?,
-            None => field_data.append_null::<T>()?,
+            Some(v) => field_data.append_value::<T>(v),
+            None => field_data.append_null(),
         }
 
         self.fields.insert(type_name, field_data);
@@ -303,15 +262,13 @@ impl UnionBuilder {
             FieldData {
                 type_id,
                 data_type,
-                values_buffer,
+                mut values_buffer,
                 slots,
                 mut bitmap_builder,
             },
         ) in self.fields.into_iter()
         {
-            let buffer = values_buffer
-                .expect("The `values_buffer` should only ever be None inside the `append` method.")
-                .into();
+            let buffer = values_buffer.finish();
             let arr_data_builder = ArrayDataBuilder::new(data_type.clone())
                 .add_buffer(buffer)
                 .len(slots)
@@ -333,6 +290,3 @@ impl UnionBuilder {
         UnionArray::try_new(&type_ids, type_id_buffer, value_offsets_buffer, children)
     }
 }
-
-#[cfg(test)]
-mod tests {}