You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by tu...@apache.org on 2022/11/23 16:13:04 UTC

[arrow-rs] branch master updated: Add finish_cloned to ArrayBuilder (#3158)

This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 6c466afe3 Add finish_cloned to ArrayBuilder (#3158)
6c466afe3 is described below

commit 6c466afe3b0b3a4c7b90c99c27eefade62011c31
Author: askoa <11...@users.noreply.github.com>
AuthorDate: Wed Nov 23 11:12:58 2022 -0500

    Add finish_cloned to ArrayBuilder (#3158)
    
    * add finish_cloned to PrimitiveBuilder
    
    * Add finish_cloned on array builders
    
    * incorporate PR comments and other PR merges
    
    * remove build_clone from union builder
    
    Co-authored-by: askoa <as...@local>
---
 arrow-array/src/builder/boolean_builder.rs         | 45 +++++++++++
 .../src/builder/fixed_size_binary_builder.rs       | 53 +++++++++++++
 arrow-array/src/builder/fixed_size_list_builder.rs | 79 +++++++++++++++++++
 arrow-array/src/builder/generic_bytes_builder.rs   | 56 +++++++++++++-
 arrow-array/src/builder/generic_list_builder.rs    | 55 ++++++++++++++
 arrow-array/src/builder/map_builder.rs             | 48 ++++++++++++
 arrow-array/src/builder/mod.rs                     |  3 +
 arrow-array/src/builder/null_buffer_builder.rs     |  6 +-
 arrow-array/src/builder/primitive_builder.rs       | 46 ++++++++++-
 .../src/builder/primitive_dictionary_builder.rs    | 22 ++++++
 .../src/builder/string_dictionary_builder.rs       | 73 ++++++++++++++++++
 arrow-array/src/builder/struct_builder.rs          | 88 ++++++++++++++++++++++
 12 files changed, 571 insertions(+), 3 deletions(-)

diff --git a/arrow-array/src/builder/boolean_builder.rs b/arrow-array/src/builder/boolean_builder.rs
index 96711dd1f..96f436253 100644
--- a/arrow-array/src/builder/boolean_builder.rs
+++ b/arrow-array/src/builder/boolean_builder.rs
@@ -18,6 +18,7 @@
 use crate::builder::null_buffer_builder::NullBufferBuilder;
 use crate::builder::{ArrayBuilder, BooleanBufferBuilder};
 use crate::{ArrayRef, BooleanArray};
+use arrow_buffer::Buffer;
 use arrow_data::ArrayData;
 use arrow_schema::{ArrowError, DataType};
 use std::any::Any;
@@ -154,6 +155,23 @@ impl BooleanBuilder {
         let array_data = unsafe { builder.build_unchecked() };
         BooleanArray::from(array_data)
     }
+
+    /// Builds the [BooleanArray] without resetting the builder.
+    pub fn finish_cloned(&self) -> BooleanArray {
+        let len = self.len();
+        let null_bit_buffer = self
+            .null_buffer_builder
+            .as_slice()
+            .map(Buffer::from_slice_ref);
+        let value_buffer = Buffer::from_slice_ref(self.values_builder.as_slice());
+        let builder = ArrayData::builder(DataType::Boolean)
+            .len(len)
+            .add_buffer(value_buffer)
+            .null_bit_buffer(null_bit_buffer);
+
+        let array_data = unsafe { builder.build_unchecked() };
+        BooleanArray::from(array_data)
+    }
 }
 
 impl ArrayBuilder for BooleanBuilder {
@@ -186,6 +204,11 @@ impl ArrayBuilder for BooleanBuilder {
     fn finish(&mut self) -> ArrayRef {
         Arc::new(self.finish())
     }
+
+    /// Builds the array without resetting the builder.
+    fn finish_cloned(&self) -> ArrayRef {
+        Arc::new(self.finish_cloned())
+    }
 }
 
 #[cfg(test)]
@@ -259,4 +282,26 @@ mod tests {
         assert_eq!(0, array.null_count());
         assert!(array.data().null_buffer().is_none());
     }
+
+    #[test]
+    fn test_boolean_array_builder_finish_cloned() {
+        let mut builder = BooleanArray::builder(16);
+        builder.append_option(Some(true));
+        builder.append_value(false);
+        builder.append_slice(&[true, false, true]);
+        let mut array = builder.finish_cloned();
+        assert_eq!(3, array.true_count());
+        assert_eq!(2, array.false_count());
+
+        builder
+            .append_values(&[false, false, true], &[true, true, true])
+            .unwrap();
+
+        array = builder.finish();
+        assert_eq!(4, array.true_count());
+        assert_eq!(4, array.false_count());
+
+        assert_eq!(0, array.null_count());
+        assert!(array.data().null_buffer().is_none());
+    }
 }
diff --git a/arrow-array/src/builder/fixed_size_binary_builder.rs b/arrow-array/src/builder/fixed_size_binary_builder.rs
index 15b840d0a..e9581922c 100644
--- a/arrow-array/src/builder/fixed_size_binary_builder.rs
+++ b/arrow-array/src/builder/fixed_size_binary_builder.rs
@@ -18,6 +18,7 @@
 use crate::builder::null_buffer_builder::NullBufferBuilder;
 use crate::builder::{ArrayBuilder, UInt8BufferBuilder};
 use crate::{ArrayRef, FixedSizeBinaryArray};
+use arrow_buffer::Buffer;
 use arrow_data::ArrayData;
 use arrow_schema::{ArrowError, DataType};
 use std::any::Any;
@@ -87,6 +88,23 @@ impl FixedSizeBinaryBuilder {
         let array_data = unsafe { array_data_builder.build_unchecked() };
         FixedSizeBinaryArray::from(array_data)
     }
+
+    /// Builds the [`FixedSizeBinaryArray`] without resetting the builder.
+    pub fn finish_cloned(&self) -> FixedSizeBinaryArray {
+        let array_length = self.len();
+        let values_buffer = Buffer::from_slice_ref(self.values_builder.as_slice());
+        let array_data_builder =
+            ArrayData::builder(DataType::FixedSizeBinary(self.value_length))
+                .add_buffer(values_buffer)
+                .null_bit_buffer(
+                    self.null_buffer_builder
+                        .as_slice()
+                        .map(Buffer::from_slice_ref),
+                )
+                .len(array_length);
+        let array_data = unsafe { array_data_builder.build_unchecked() };
+        FixedSizeBinaryArray::from(array_data)
+    }
 }
 
 impl ArrayBuilder for FixedSizeBinaryBuilder {
@@ -119,6 +137,11 @@ impl ArrayBuilder for FixedSizeBinaryBuilder {
     fn finish(&mut self) -> ArrayRef {
         Arc::new(self.finish())
     }
+
+    /// Builds the array without resetting the builder.
+    fn finish_cloned(&self) -> ArrayRef {
+        Arc::new(self.finish_cloned())
+    }
 }
 
 #[cfg(test)]
@@ -146,6 +169,36 @@ mod tests {
         assert_eq!(5, array.value_length());
     }
 
+    #[test]
+    fn test_fixed_size_binary_builder_finish_cloned() {
+        let mut builder = FixedSizeBinaryBuilder::with_capacity(3, 5);
+
+        //  [b"hello", null, "arrow"]
+        builder.append_value(b"hello").unwrap();
+        builder.append_null();
+        builder.append_value(b"arrow").unwrap();
+        let mut array: FixedSizeBinaryArray = builder.finish_cloned();
+
+        assert_eq!(&DataType::FixedSizeBinary(5), array.data_type());
+        assert_eq!(3, array.len());
+        assert_eq!(1, array.null_count());
+        assert_eq!(10, array.value_offset(2));
+        assert_eq!(5, array.value_length());
+
+        //  [b"finis", null, "clone"]
+        builder.append_value(b"finis").unwrap();
+        builder.append_null();
+        builder.append_value(b"clone").unwrap();
+
+        array = builder.finish();
+
+        assert_eq!(&DataType::FixedSizeBinary(5), array.data_type());
+        assert_eq!(6, array.len());
+        assert_eq!(2, array.null_count());
+        assert_eq!(25, array.value_offset(5));
+        assert_eq!(5, array.value_length());
+    }
+
     #[test]
     fn test_fixed_size_binary_builder_with_zero_value_length() {
         let mut builder = FixedSizeBinaryBuilder::new(0);
diff --git a/arrow-array/src/builder/fixed_size_list_builder.rs b/arrow-array/src/builder/fixed_size_list_builder.rs
index f6388d789..516c22925 100644
--- a/arrow-array/src/builder/fixed_size_list_builder.rs
+++ b/arrow-array/src/builder/fixed_size_list_builder.rs
@@ -18,6 +18,7 @@
 use crate::builder::null_buffer_builder::NullBufferBuilder;
 use crate::builder::ArrayBuilder;
 use crate::{ArrayRef, FixedSizeListArray};
+use arrow_buffer::Buffer;
 use arrow_data::ArrayData;
 use arrow_schema::{DataType, Field};
 use std::any::Any;
@@ -84,6 +85,11 @@ where
     fn finish(&mut self) -> ArrayRef {
         Arc::new(self.finish())
     }
+
+    /// Builds the array without resetting the builder.
+    fn finish_cloned(&self) -> ArrayRef {
+        Arc::new(self.finish_cloned())
+    }
 }
 
 impl<T: ArrayBuilder> FixedSizeListBuilder<T>
@@ -135,6 +141,37 @@ where
 
         FixedSizeListArray::from(array_data)
     }
+
+    /// Builds the [`FixedSizeListBuilder`] without resetting the builder.
+    pub fn finish_cloned(&self) -> FixedSizeListArray {
+        let len = self.len();
+        let values_arr = self.values_builder.finish_cloned();
+        let values_data = values_arr.data();
+
+        assert_eq!(
+            values_data.len(), len * self.list_len as usize,
+            "Length of the child array ({}) must be the multiple of the value length ({}) and the array length ({}).",
+            values_data.len(),
+            self.list_len,
+            len,
+        );
+
+        let null_bit_buffer = self
+            .null_buffer_builder
+            .as_slice()
+            .map(Buffer::from_slice_ref);
+        let array_data = ArrayData::builder(DataType::FixedSizeList(
+            Box::new(Field::new("item", values_data.data_type().clone(), true)),
+            self.list_len,
+        ))
+        .len(len)
+        .add_child_data(values_data.clone())
+        .null_bit_buffer(null_bit_buffer);
+
+        let array_data = unsafe { array_data.build_unchecked() };
+
+        FixedSizeListArray::from(array_data)
+    }
 }
 
 #[cfg(test)]
@@ -176,6 +213,48 @@ mod tests {
         assert_eq!(3, list_array.value_length());
     }
 
+    #[test]
+    fn test_fixed_size_list_array_builder_finish_cloned() {
+        let values_builder = Int32Builder::new();
+        let mut builder = FixedSizeListBuilder::new(values_builder, 3);
+
+        //  [[0, 1, 2], null, [3, null, 5], [6, 7, null]]
+        builder.values().append_value(0);
+        builder.values().append_value(1);
+        builder.values().append_value(2);
+        builder.append(true);
+        builder.values().append_null();
+        builder.values().append_null();
+        builder.values().append_null();
+        builder.append(false);
+        builder.values().append_value(3);
+        builder.values().append_null();
+        builder.values().append_value(5);
+        builder.append(true);
+        let mut list_array = builder.finish_cloned();
+
+        assert_eq!(DataType::Int32, list_array.value_type());
+        assert_eq!(3, list_array.len());
+        assert_eq!(1, list_array.null_count());
+        assert_eq!(3, list_array.value_length());
+
+        builder.values().append_value(6);
+        builder.values().append_value(7);
+        builder.values().append_null();
+        builder.append(true);
+        builder.values().append_null();
+        builder.values().append_null();
+        builder.values().append_null();
+        builder.append(false);
+        list_array = builder.finish();
+
+        assert_eq!(DataType::Int32, list_array.value_type());
+        assert_eq!(5, list_array.len());
+        assert_eq!(2, list_array.null_count());
+        assert_eq!(6, list_array.value_offset(2));
+        assert_eq!(3, list_array.value_length());
+    }
+
     #[test]
     fn test_fixed_size_list_array_builder_empty() {
         let values_builder = Int32Array::builder(5);
diff --git a/arrow-array/src/builder/generic_bytes_builder.rs b/arrow-array/src/builder/generic_bytes_builder.rs
index fa0a31ad7..9f9078c70 100644
--- a/arrow-array/src/builder/generic_bytes_builder.rs
+++ b/arrow-array/src/builder/generic_bytes_builder.rs
@@ -19,7 +19,7 @@ use crate::builder::null_buffer_builder::NullBufferBuilder;
 use crate::builder::{ArrayBuilder, BufferBuilder, UInt8BufferBuilder};
 use crate::types::{ByteArrayType, GenericBinaryType, GenericStringType};
 use crate::{ArrayRef, GenericByteArray, OffsetSizeTrait};
-use arrow_buffer::ArrowNativeType;
+use arrow_buffer::{ArrowNativeType, Buffer};
 use arrow_data::ArrayDataBuilder;
 use std::any::Any;
 use std::sync::Arc;
@@ -94,6 +94,25 @@ impl<T: ByteArrayType> GenericByteBuilder<T> {
         GenericByteArray::from(array_data)
     }
 
+    /// Builds the [`GenericByteArray`] without resetting the builder.
+    pub fn finish_cloned(&self) -> GenericByteArray<T> {
+        let array_type = T::DATA_TYPE;
+        let offset_buffer = Buffer::from_slice_ref(self.offsets_builder.as_slice());
+        let value_buffer = Buffer::from_slice_ref(self.value_builder.as_slice());
+        let array_builder = ArrayDataBuilder::new(array_type)
+            .len(self.len())
+            .add_buffer(offset_buffer)
+            .add_buffer(value_buffer)
+            .null_bit_buffer(
+                self.null_buffer_builder
+                    .as_slice()
+                    .map(Buffer::from_slice_ref),
+            );
+
+        let array_data = unsafe { array_builder.build_unchecked() };
+        GenericByteArray::from(array_data)
+    }
+
     /// Returns the current values buffer as a slice
     pub fn values_slice(&self) -> &[u8] {
         self.value_builder.as_slice()
@@ -138,6 +157,11 @@ impl<T: ByteArrayType> ArrayBuilder for GenericByteBuilder<T> {
         Arc::new(self.finish())
     }
 
+    /// Builds the array without resetting the builder.
+    fn finish_cloned(&self) -> ArrayRef {
+        Arc::new(self.finish_cloned())
+    }
+
     /// Returns the builder as a non-mutable `Any` reference.
     fn as_any(&self) -> &dyn Any {
         self
@@ -325,4 +349,34 @@ mod tests {
     fn test_large_string_array_builder_finish() {
         _test_generic_string_array_builder_finish::<i64>()
     }
+
+    fn _test_generic_string_array_builder_finish_cloned<O: OffsetSizeTrait>() {
+        let mut builder = GenericStringBuilder::<O>::with_capacity(3, 11);
+
+        builder.append_value("hello");
+        builder.append_value("rust");
+        builder.append_null();
+
+        let mut arr = builder.finish_cloned();
+        assert!(!builder.is_empty());
+        assert_eq!(3, arr.len());
+
+        builder.append_value("arrow");
+        builder.append_value("parquet");
+        arr = builder.finish();
+
+        assert!(arr.data().null_buffer().is_some());
+        assert_eq!(&[O::zero()], builder.offsets_slice());
+        assert_eq!(5, arr.len());
+    }
+
+    #[test]
+    fn test_string_array_builder_finish_cloned() {
+        _test_generic_string_array_builder_finish_cloned::<i32>()
+    }
+
+    #[test]
+    fn test_large_string_array_builder_finish_cloned() {
+        _test_generic_string_array_builder_finish_cloned::<i64>()
+    }
 }
diff --git a/arrow-array/src/builder/generic_list_builder.rs b/arrow-array/src/builder/generic_list_builder.rs
index 116567864..8f3f881c4 100644
--- a/arrow-array/src/builder/generic_list_builder.rs
+++ b/arrow-array/src/builder/generic_list_builder.rs
@@ -18,6 +18,7 @@
 use crate::builder::null_buffer_builder::NullBufferBuilder;
 use crate::builder::{ArrayBuilder, BufferBuilder};
 use crate::{ArrayRef, GenericListArray, OffsetSizeTrait};
+use arrow_buffer::Buffer;
 use arrow_data::ArrayData;
 use arrow_schema::Field;
 use std::any::Any;
@@ -85,6 +86,11 @@ where
     fn finish(&mut self) -> ArrayRef {
         Arc::new(self.finish())
     }
+
+    /// Builds the array without resetting the builder.
+    fn finish_cloned(&self) -> ArrayRef {
+        Arc::new(self.finish_cloned())
+    }
 }
 
 impl<OffsetSize: OffsetSizeTrait, T: ArrayBuilder> GenericListBuilder<OffsetSize, T>
@@ -138,6 +144,34 @@ where
         GenericListArray::<OffsetSize>::from(array_data)
     }
 
+    /// Builds the [`GenericListArray`] without resetting the builder.
+    pub fn finish_cloned(&self) -> GenericListArray<OffsetSize> {
+        let len = self.len();
+        let values_arr = self.values_builder.finish_cloned();
+        let values_data = values_arr.data();
+
+        let offset_buffer = Buffer::from_slice_ref(self.offsets_builder.as_slice());
+        let null_bit_buffer = self
+            .null_buffer_builder
+            .as_slice()
+            .map(Buffer::from_slice_ref);
+        let field = Box::new(Field::new(
+            "item",
+            values_data.data_type().clone(),
+            true, // TODO: find a consistent way of getting this
+        ));
+        let data_type = GenericListArray::<OffsetSize>::DATA_TYPE_CONSTRUCTOR(field);
+        let array_data_builder = ArrayData::builder(data_type)
+            .len(len)
+            .add_buffer(offset_buffer)
+            .add_child_data(values_data.clone())
+            .null_bit_buffer(null_bit_buffer);
+
+        let array_data = unsafe { array_data_builder.build_unchecked() };
+
+        GenericListArray::<OffsetSize>::from(array_data)
+    }
+
     /// Returns the current offsets buffer as a slice
     pub fn offsets_slice(&self) -> &[OffsetSize] {
         self.offsets_builder.as_slice()
@@ -255,6 +289,27 @@ mod tests {
         assert!(builder.is_empty());
     }
 
+    #[test]
+    fn test_list_array_builder_finish_cloned() {
+        let values_builder = Int32Array::builder(5);
+        let mut builder = ListBuilder::new(values_builder);
+
+        builder.values().append_slice(&[1, 2, 3]);
+        builder.append(true);
+        builder.values().append_slice(&[4, 5, 6]);
+        builder.append(true);
+
+        let mut arr = builder.finish_cloned();
+        assert_eq!(2, arr.len());
+        assert!(!builder.is_empty());
+
+        builder.values().append_slice(&[7, 8, 9]);
+        builder.append(true);
+        arr = builder.finish();
+        assert_eq!(3, arr.len());
+        assert!(builder.is_empty());
+    }
+
     #[test]
     fn test_list_list_array_builder() {
         let primitive_builder = Int32Builder::with_capacity(10);
diff --git a/arrow-array/src/builder/map_builder.rs b/arrow-array/src/builder/map_builder.rs
index 4b7597248..5602f8863 100644
--- a/arrow-array/src/builder/map_builder.rs
+++ b/arrow-array/src/builder/map_builder.rs
@@ -18,6 +18,7 @@
 use crate::builder::null_buffer_builder::NullBufferBuilder;
 use crate::builder::{ArrayBuilder, BufferBuilder};
 use crate::{Array, ArrayRef, MapArray, StructArray};
+use arrow_buffer::Buffer;
 use arrow_data::ArrayData;
 use arrow_schema::{ArrowError, DataType, Field};
 use std::any::Any;
@@ -142,6 +143,48 @@ impl<K: ArrayBuilder, V: ArrayBuilder> MapBuilder<K, V> {
 
         MapArray::from(array_data)
     }
+
+    pub fn finish_cloned(&self) -> MapArray {
+        let len = self.len();
+
+        // Build the keys
+        let keys_arr = self.key_builder.finish_cloned();
+        let values_arr = self.value_builder.finish_cloned();
+
+        let keys_field = Field::new(
+            self.field_names.key.as_str(),
+            keys_arr.data_type().clone(),
+            false, // always nullable
+        );
+        let values_field = Field::new(
+            self.field_names.value.as_str(),
+            values_arr.data_type().clone(),
+            true,
+        );
+
+        let struct_array =
+            StructArray::from(vec![(keys_field, keys_arr), (values_field, values_arr)]);
+
+        let offset_buffer = Buffer::from_slice_ref(self.offsets_builder.as_slice());
+        let null_bit_buffer = self
+            .null_buffer_builder
+            .as_slice()
+            .map(Buffer::from_slice_ref);
+        let map_field = Box::new(Field::new(
+            self.field_names.entry.as_str(),
+            struct_array.data_type().clone(),
+            false, // always non-nullable
+        ));
+        let array_data = ArrayData::builder(DataType::Map(map_field, false)) // TODO: support sorted keys
+            .len(len)
+            .add_buffer(offset_buffer)
+            .add_child_data(struct_array.into_data())
+            .null_bit_buffer(null_bit_buffer);
+
+        let array_data = unsafe { array_data.build_unchecked() };
+
+        MapArray::from(array_data)
+    }
 }
 
 impl<K: ArrayBuilder, V: ArrayBuilder> ArrayBuilder for MapBuilder<K, V> {
@@ -157,6 +200,11 @@ impl<K: ArrayBuilder, V: ArrayBuilder> ArrayBuilder for MapBuilder<K, V> {
         Arc::new(self.finish())
     }
 
+    /// Builds the array without resetting the builder.
+    fn finish_cloned(&self) -> ArrayRef {
+        Arc::new(self.finish_cloned())
+    }
+
     fn as_any(&self) -> &dyn Any {
         self
     }
diff --git a/arrow-array/src/builder/mod.rs b/arrow-array/src/builder/mod.rs
index a5c1e3d4b..eaf824397 100644
--- a/arrow-array/src/builder/mod.rs
+++ b/arrow-array/src/builder/mod.rs
@@ -107,6 +107,9 @@ pub trait ArrayBuilder: Any + Send {
     /// Builds the array
     fn finish(&mut self) -> ArrayRef;
 
+    /// Builds the array without resetting the underlying builder.
+    fn finish_cloned(&self) -> ArrayRef;
+
     /// Returns the builder as a non-mutable `Any` reference.
     ///
     /// This is most useful when one wants to call non-mutable APIs on a specific builder
diff --git a/arrow-array/src/builder/null_buffer_builder.rs b/arrow-array/src/builder/null_buffer_builder.rs
index fef7214d5..b3c788fe5 100644
--- a/arrow-array/src/builder/null_buffer_builder.rs
+++ b/arrow-array/src/builder/null_buffer_builder.rs
@@ -135,7 +135,11 @@ impl NullBufferBuilder {
         buf
     }
 
-    #[inline]
+    /// Returns the inner bitmap builder as slice
+    pub fn as_slice(&self) -> Option<&[u8]> {
+        Some(self.bitmap_builder.as_ref()?.as_slice())
+    }
+
     fn materialize_if_needed(&mut self) {
         if self.bitmap_builder.is_none() {
             self.materialize()
diff --git a/arrow-array/src/builder/primitive_builder.rs b/arrow-array/src/builder/primitive_builder.rs
index 55d8bac01..7a1fbafc7 100644
--- a/arrow-array/src/builder/primitive_builder.rs
+++ b/arrow-array/src/builder/primitive_builder.rs
@@ -19,7 +19,7 @@ use crate::builder::null_buffer_builder::NullBufferBuilder;
 use crate::builder::{ArrayBuilder, BufferBuilder};
 use crate::types::*;
 use crate::{ArrayRef, ArrowPrimitiveType, PrimitiveArray};
-use arrow_buffer::MutableBuffer;
+use arrow_buffer::{Buffer, MutableBuffer};
 use arrow_data::ArrayData;
 use std::any::Any;
 use std::sync::Arc;
@@ -93,6 +93,11 @@ impl<T: ArrowPrimitiveType> ArrayBuilder for PrimitiveBuilder<T> {
     fn finish(&mut self) -> ArrayRef {
         Arc::new(self.finish())
     }
+
+    /// Builds the array without resetting the builder.
+    fn finish_cloned(&self) -> ArrayRef {
+        Arc::new(self.finish_cloned())
+    }
 }
 
 impl<T: ArrowPrimitiveType> Default for PrimitiveBuilder<T> {
@@ -219,6 +224,23 @@ impl<T: ArrowPrimitiveType> PrimitiveBuilder<T> {
         PrimitiveArray::<T>::from(array_data)
     }
 
+    /// Builds the [`PrimitiveArray`] without resetting the builder.
+    pub fn finish_cloned(&self) -> PrimitiveArray<T> {
+        let len = self.len();
+        let null_bit_buffer = self
+            .null_buffer_builder
+            .as_slice()
+            .map(Buffer::from_slice_ref);
+        let values_buffer = Buffer::from_slice_ref(self.values_builder.as_slice());
+        let builder = ArrayData::builder(T::DATA_TYPE)
+            .len(len)
+            .add_buffer(values_buffer)
+            .null_bit_buffer(null_bit_buffer);
+
+        let array_data = unsafe { builder.build_unchecked() };
+        PrimitiveArray::<T>::from(array_data)
+    }
+
     /// Returns the current values buffer as a slice
     pub fn values_slice(&self) -> &[T::Native] {
         self.values_builder.as_slice()
@@ -431,4 +453,26 @@ mod tests {
         assert_eq!(5, arr.len());
         assert_eq!(0, builder.len());
     }
+
+    #[test]
+    fn test_primitive_array_builder_finish_cloned() {
+        let mut builder = Int32Builder::new();
+        builder.append_value(23);
+        builder.append_value(45);
+        let result = builder.finish_cloned();
+        assert_eq!(result, Int32Array::from(vec![23, 45]));
+        builder.append_value(56);
+        assert_eq!(builder.finish_cloned(), Int32Array::from(vec![23, 45, 56]));
+
+        builder.append_slice(&[2, 4, 6, 8]);
+        let mut arr = builder.finish();
+        assert_eq!(7, arr.len());
+        assert_eq!(arr, Int32Array::from(vec![23, 45, 56, 2, 4, 6, 8]));
+        assert_eq!(0, builder.len());
+
+        builder.append_slice(&[1, 3, 5, 7, 9]);
+        arr = builder.finish();
+        assert_eq!(5, arr.len());
+        assert_eq!(0, builder.len());
+    }
 }
diff --git a/arrow-array/src/builder/primitive_dictionary_builder.rs b/arrow-array/src/builder/primitive_dictionary_builder.rs
index c43416e5a..5b8a72835 100644
--- a/arrow-array/src/builder/primitive_dictionary_builder.rs
+++ b/arrow-array/src/builder/primitive_dictionary_builder.rs
@@ -160,6 +160,11 @@ where
     fn finish(&mut self) -> ArrayRef {
         Arc::new(self.finish())
     }
+
+    /// Builds the array without resetting the builder.
+    fn finish_cloned(&self) -> ArrayRef {
+        Arc::new(self.finish_cloned())
+    }
 }
 
 impl<K, V> PrimitiveDictionaryBuilder<K, V>
@@ -210,6 +215,23 @@ where
 
         DictionaryArray::from(unsafe { builder.build_unchecked() })
     }
+
+    /// Builds the `DictionaryArray` without resetting the builder.
+    pub fn finish_cloned(&self) -> DictionaryArray<K> {
+        let values = self.values_builder.finish_cloned();
+        let keys = self.keys_builder.finish_cloned();
+
+        let data_type =
+            DataType::Dictionary(Box::new(K::DATA_TYPE), Box::new(V::DATA_TYPE));
+
+        let builder = keys
+            .into_data()
+            .into_builder()
+            .data_type(data_type)
+            .child_data(vec![values.into_data()]);
+
+        DictionaryArray::from(unsafe { builder.build_unchecked() })
+    }
 }
 
 #[cfg(test)]
diff --git a/arrow-array/src/builder/string_dictionary_builder.rs b/arrow-array/src/builder/string_dictionary_builder.rs
index e41086c87..f44756b6b 100644
--- a/arrow-array/src/builder/string_dictionary_builder.rs
+++ b/arrow-array/src/builder/string_dictionary_builder.rs
@@ -222,6 +222,11 @@ where
     fn finish(&mut self) -> ArrayRef {
         Arc::new(self.finish())
     }
+
+    /// Builds the array without resetting the builder.
+    fn finish_cloned(&self) -> ArrayRef {
+        Arc::new(self.finish_cloned())
+    }
 }
 
 impl<K> StringDictionaryBuilder<K>
@@ -287,6 +292,23 @@ where
 
         DictionaryArray::from(unsafe { builder.build_unchecked() })
     }
+
+    /// Builds the `DictionaryArray` without resetting the builder.
+    pub fn finish_cloned(&self) -> DictionaryArray<K> {
+        let values = self.values_builder.finish_cloned();
+        let keys = self.keys_builder.finish_cloned();
+
+        let data_type =
+            DataType::Dictionary(Box::new(K::DATA_TYPE), Box::new(DataType::Utf8));
+
+        let builder = keys
+            .into_data()
+            .into_builder()
+            .data_type(data_type)
+            .child_data(vec![values.into_data()]);
+
+        DictionaryArray::from(unsafe { builder.build_unchecked() })
+    }
 }
 
 fn get_bytes<'a, K: ArrowNativeType>(values: &'a StringBuilder, key: &K) -> &'a [u8] {
@@ -331,6 +353,57 @@ mod tests {
         assert_eq!(ava.value(1), "def");
     }
 
+    #[test]
+    fn test_string_dictionary_builder_finish_cloned() {
+        let mut builder = StringDictionaryBuilder::<Int8Type>::new();
+        builder.append("abc").unwrap();
+        builder.append_null();
+        builder.append("def").unwrap();
+        builder.append("def").unwrap();
+        builder.append("abc").unwrap();
+        let mut array = builder.finish_cloned();
+
+        assert_eq!(
+            array.keys(),
+            &Int8Array::from(vec![Some(0), None, Some(1), Some(1), Some(0)])
+        );
+
+        // Values are polymorphic and so require a downcast.
+        let av = array.values();
+        let ava: &StringArray = av.as_any().downcast_ref::<StringArray>().unwrap();
+
+        assert_eq!(ava.value(0), "abc");
+        assert_eq!(ava.value(1), "def");
+
+        builder.append("abc").unwrap();
+        builder.append("ghi").unwrap();
+        builder.append("def").unwrap();
+
+        array = builder.finish();
+
+        assert_eq!(
+            array.keys(),
+            &Int8Array::from(vec![
+                Some(0),
+                None,
+                Some(1),
+                Some(1),
+                Some(0),
+                Some(0),
+                Some(2),
+                Some(1)
+            ])
+        );
+
+        // Values are polymorphic and so require a downcast.
+        let av2 = array.values();
+        let ava2: &StringArray = av2.as_any().downcast_ref::<StringArray>().unwrap();
+
+        assert_eq!(ava2.value(0), "abc");
+        assert_eq!(ava2.value(1), "def");
+        assert_eq!(ava2.value(2), "ghi");
+    }
+
     #[test]
     fn test_string_dictionary_builder_with_existing_dictionary() {
         let dictionary = StringArray::from(vec![None, Some("def"), Some("abc")]);
diff --git a/arrow-array/src/builder/struct_builder.rs b/arrow-array/src/builder/struct_builder.rs
index f00f81d1a..98d0e1a1d 100644
--- a/arrow-array/src/builder/struct_builder.rs
+++ b/arrow-array/src/builder/struct_builder.rs
@@ -18,6 +18,7 @@
 use crate::builder::null_buffer_builder::NullBufferBuilder;
 use crate::builder::*;
 use crate::{Array, ArrayRef, StructArray};
+use arrow_buffer::Buffer;
 use arrow_data::ArrayData;
 use arrow_schema::{DataType, Field, IntervalUnit, TimeUnit};
 use std::any::Any;
@@ -63,6 +64,11 @@ impl ArrayBuilder for StructBuilder {
         Arc::new(self.finish())
     }
 
+    /// Builds the array without resetting the builder.
+    fn finish_cloned(&self) -> ArrayRef {
+        Arc::new(self.finish_cloned())
+    }
+
     /// Returns the builder as a non-mutable `Any` reference.
     ///
     /// This is most useful when one wants to call non-mutable APIs on a specific builder
@@ -230,6 +236,30 @@ impl StructBuilder {
         StructArray::from(array_data)
     }
 
+    /// Builds the `StructArray` without resetting the builder.
+    pub fn finish_cloned(&self) -> StructArray {
+        self.validate_content();
+
+        let mut child_data = Vec::with_capacity(self.field_builders.len());
+        for f in &self.field_builders {
+            let arr = f.finish_cloned();
+            child_data.push(arr.data().clone());
+        }
+        let length = self.len();
+        let null_bit_buffer = self
+            .null_buffer_builder
+            .as_slice()
+            .map(Buffer::from_slice_ref);
+
+        let builder = ArrayData::builder(DataType::Struct(self.fields.clone()))
+            .len(length)
+            .child_data(child_data)
+            .null_bit_buffer(null_bit_buffer);
+
+        let array_data = unsafe { builder.build_unchecked() };
+        StructArray::from(array_data)
+    }
+
     /// Constructs and validates contents in the builder to ensure that
     /// - fields and field_builders are of equal length
     /// - the number of items in individual field_builders are equal to self.len()
@@ -374,6 +404,64 @@ mod tests {
         assert_eq!(0, builder.len());
     }
 
+    #[test]
+    fn test_struct_array_builder_finish_cloned() {
+        let int_builder = Int32Builder::new();
+        let bool_builder = BooleanBuilder::new();
+
+        let mut fields = Vec::new();
+        let mut field_builders = Vec::new();
+        fields.push(Field::new("f1", DataType::Int32, false));
+        field_builders.push(Box::new(int_builder) as Box<dyn ArrayBuilder>);
+        fields.push(Field::new("f2", DataType::Boolean, false));
+        field_builders.push(Box::new(bool_builder) as Box<dyn ArrayBuilder>);
+
+        let mut builder = StructBuilder::new(fields, field_builders);
+        builder
+            .field_builder::<Int32Builder>(0)
+            .unwrap()
+            .append_slice(&[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]);
+        builder
+            .field_builder::<BooleanBuilder>(1)
+            .unwrap()
+            .append_slice(&[
+                false, true, false, true, false, true, false, true, false, true,
+            ]);
+
+        // Append slot values - all are valid.
+        for _ in 0..10 {
+            builder.append(true);
+        }
+
+        assert_eq!(10, builder.len());
+
+        let mut arr = builder.finish_cloned();
+
+        assert_eq!(10, arr.len());
+        assert_eq!(10, builder.len());
+
+        builder
+            .field_builder::<Int32Builder>(0)
+            .unwrap()
+            .append_slice(&[1, 3, 5, 7, 9]);
+        builder
+            .field_builder::<BooleanBuilder>(1)
+            .unwrap()
+            .append_slice(&[false, true, false, true, false]);
+
+        // Append slot values - all are valid.
+        for _ in 0..5 {
+            builder.append(true);
+        }
+
+        assert_eq!(15, builder.len());
+
+        arr = builder.finish();
+
+        assert_eq!(15, arr.len());
+        assert_eq!(0, builder.len());
+    }
+
     #[test]
     fn test_struct_array_builder_from_schema() {
         let mut fields = vec![