You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by tu...@apache.org on 2022/11/23 16:13:04 UTC
[arrow-rs] branch master updated: Add finish_cloned to ArrayBuilder (#3158)
This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 6c466afe3 Add finish_cloned to ArrayBuilder (#3158)
6c466afe3 is described below
commit 6c466afe3b0b3a4c7b90c99c27eefade62011c31
Author: askoa <11...@users.noreply.github.com>
AuthorDate: Wed Nov 23 11:12:58 2022 -0500
Add finish_cloned to ArrayBuilder (#3158)
* add finish_cloned to PrimitiveBuilder
* Add finish_cloned on array builders
* incorporate PR comments and other PR merges
* remove build_clone from union builder
Co-authored-by: askoa <as...@local>
---
arrow-array/src/builder/boolean_builder.rs | 45 +++++++++++
.../src/builder/fixed_size_binary_builder.rs | 53 +++++++++++++
arrow-array/src/builder/fixed_size_list_builder.rs | 79 +++++++++++++++++++
arrow-array/src/builder/generic_bytes_builder.rs | 56 +++++++++++++-
arrow-array/src/builder/generic_list_builder.rs | 55 ++++++++++++++
arrow-array/src/builder/map_builder.rs | 48 ++++++++++++
arrow-array/src/builder/mod.rs | 3 +
arrow-array/src/builder/null_buffer_builder.rs | 6 +-
arrow-array/src/builder/primitive_builder.rs | 46 ++++++++++-
.../src/builder/primitive_dictionary_builder.rs | 22 ++++++
.../src/builder/string_dictionary_builder.rs | 73 ++++++++++++++++++
arrow-array/src/builder/struct_builder.rs | 88 ++++++++++++++++++++++
12 files changed, 571 insertions(+), 3 deletions(-)
diff --git a/arrow-array/src/builder/boolean_builder.rs b/arrow-array/src/builder/boolean_builder.rs
index 96711dd1f..96f436253 100644
--- a/arrow-array/src/builder/boolean_builder.rs
+++ b/arrow-array/src/builder/boolean_builder.rs
@@ -18,6 +18,7 @@
use crate::builder::null_buffer_builder::NullBufferBuilder;
use crate::builder::{ArrayBuilder, BooleanBufferBuilder};
use crate::{ArrayRef, BooleanArray};
+use arrow_buffer::Buffer;
use arrow_data::ArrayData;
use arrow_schema::{ArrowError, DataType};
use std::any::Any;
@@ -154,6 +155,23 @@ impl BooleanBuilder {
let array_data = unsafe { builder.build_unchecked() };
BooleanArray::from(array_data)
}
+
+ /// Builds the [BooleanArray] without resetting the builder.
+ pub fn finish_cloned(&self) -> BooleanArray {
+ let len = self.len();
+ let null_bit_buffer = self
+ .null_buffer_builder
+ .as_slice()
+ .map(Buffer::from_slice_ref);
+ let value_buffer = Buffer::from_slice_ref(self.values_builder.as_slice());
+ let builder = ArrayData::builder(DataType::Boolean)
+ .len(len)
+ .add_buffer(value_buffer)
+ .null_bit_buffer(null_bit_buffer);
+
+ let array_data = unsafe { builder.build_unchecked() };
+ BooleanArray::from(array_data)
+ }
}
impl ArrayBuilder for BooleanBuilder {
@@ -186,6 +204,11 @@ impl ArrayBuilder for BooleanBuilder {
fn finish(&mut self) -> ArrayRef {
Arc::new(self.finish())
}
+
+ /// Builds the array without resetting the builder.
+ fn finish_cloned(&self) -> ArrayRef {
+ Arc::new(self.finish_cloned())
+ }
}
#[cfg(test)]
@@ -259,4 +282,26 @@ mod tests {
assert_eq!(0, array.null_count());
assert!(array.data().null_buffer().is_none());
}
+
+ #[test]
+ fn test_boolean_array_builder_finish_cloned() {
+ let mut builder = BooleanArray::builder(16);
+ builder.append_option(Some(true));
+ builder.append_value(false);
+ builder.append_slice(&[true, false, true]);
+ let mut array = builder.finish_cloned();
+ assert_eq!(3, array.true_count());
+ assert_eq!(2, array.false_count());
+
+ builder
+ .append_values(&[false, false, true], &[true, true, true])
+ .unwrap();
+
+ array = builder.finish();
+ assert_eq!(4, array.true_count());
+ assert_eq!(4, array.false_count());
+
+ assert_eq!(0, array.null_count());
+ assert!(array.data().null_buffer().is_none());
+ }
}
diff --git a/arrow-array/src/builder/fixed_size_binary_builder.rs b/arrow-array/src/builder/fixed_size_binary_builder.rs
index 15b840d0a..e9581922c 100644
--- a/arrow-array/src/builder/fixed_size_binary_builder.rs
+++ b/arrow-array/src/builder/fixed_size_binary_builder.rs
@@ -18,6 +18,7 @@
use crate::builder::null_buffer_builder::NullBufferBuilder;
use crate::builder::{ArrayBuilder, UInt8BufferBuilder};
use crate::{ArrayRef, FixedSizeBinaryArray};
+use arrow_buffer::Buffer;
use arrow_data::ArrayData;
use arrow_schema::{ArrowError, DataType};
use std::any::Any;
@@ -87,6 +88,23 @@ impl FixedSizeBinaryBuilder {
let array_data = unsafe { array_data_builder.build_unchecked() };
FixedSizeBinaryArray::from(array_data)
}
+
+ /// Builds the [`FixedSizeBinaryArray`] without resetting the builder.
+ pub fn finish_cloned(&self) -> FixedSizeBinaryArray {
+ let array_length = self.len();
+ let values_buffer = Buffer::from_slice_ref(self.values_builder.as_slice());
+ let array_data_builder =
+ ArrayData::builder(DataType::FixedSizeBinary(self.value_length))
+ .add_buffer(values_buffer)
+ .null_bit_buffer(
+ self.null_buffer_builder
+ .as_slice()
+ .map(Buffer::from_slice_ref),
+ )
+ .len(array_length);
+ let array_data = unsafe { array_data_builder.build_unchecked() };
+ FixedSizeBinaryArray::from(array_data)
+ }
}
impl ArrayBuilder for FixedSizeBinaryBuilder {
@@ -119,6 +137,11 @@ impl ArrayBuilder for FixedSizeBinaryBuilder {
fn finish(&mut self) -> ArrayRef {
Arc::new(self.finish())
}
+
+ /// Builds the array without resetting the builder.
+ fn finish_cloned(&self) -> ArrayRef {
+ Arc::new(self.finish_cloned())
+ }
}
#[cfg(test)]
@@ -146,6 +169,36 @@ mod tests {
assert_eq!(5, array.value_length());
}
+ #[test]
+ fn test_fixed_size_binary_builder_finish_cloned() {
+ let mut builder = FixedSizeBinaryBuilder::with_capacity(3, 5);
+
+ // [b"hello", null, "arrow"]
+ builder.append_value(b"hello").unwrap();
+ builder.append_null();
+ builder.append_value(b"arrow").unwrap();
+ let mut array: FixedSizeBinaryArray = builder.finish_cloned();
+
+ assert_eq!(&DataType::FixedSizeBinary(5), array.data_type());
+ assert_eq!(3, array.len());
+ assert_eq!(1, array.null_count());
+ assert_eq!(10, array.value_offset(2));
+ assert_eq!(5, array.value_length());
+
+ // [b"finis", null, "clone"]
+ builder.append_value(b"finis").unwrap();
+ builder.append_null();
+ builder.append_value(b"clone").unwrap();
+
+ array = builder.finish();
+
+ assert_eq!(&DataType::FixedSizeBinary(5), array.data_type());
+ assert_eq!(6, array.len());
+ assert_eq!(2, array.null_count());
+ assert_eq!(25, array.value_offset(5));
+ assert_eq!(5, array.value_length());
+ }
+
#[test]
fn test_fixed_size_binary_builder_with_zero_value_length() {
let mut builder = FixedSizeBinaryBuilder::new(0);
diff --git a/arrow-array/src/builder/fixed_size_list_builder.rs b/arrow-array/src/builder/fixed_size_list_builder.rs
index f6388d789..516c22925 100644
--- a/arrow-array/src/builder/fixed_size_list_builder.rs
+++ b/arrow-array/src/builder/fixed_size_list_builder.rs
@@ -18,6 +18,7 @@
use crate::builder::null_buffer_builder::NullBufferBuilder;
use crate::builder::ArrayBuilder;
use crate::{ArrayRef, FixedSizeListArray};
+use arrow_buffer::Buffer;
use arrow_data::ArrayData;
use arrow_schema::{DataType, Field};
use std::any::Any;
@@ -84,6 +85,11 @@ where
fn finish(&mut self) -> ArrayRef {
Arc::new(self.finish())
}
+
+ /// Builds the array without resetting the builder.
+ fn finish_cloned(&self) -> ArrayRef {
+ Arc::new(self.finish_cloned())
+ }
}
impl<T: ArrayBuilder> FixedSizeListBuilder<T>
@@ -135,6 +141,37 @@ where
FixedSizeListArray::from(array_data)
}
+
+ /// Builds the [`FixedSizeListBuilder`] without resetting the builder.
+ pub fn finish_cloned(&self) -> FixedSizeListArray {
+ let len = self.len();
+ let values_arr = self.values_builder.finish_cloned();
+ let values_data = values_arr.data();
+
+ assert_eq!(
+ values_data.len(), len * self.list_len as usize,
+ "Length of the child array ({}) must be the multiple of the value length ({}) and the array length ({}).",
+ values_data.len(),
+ self.list_len,
+ len,
+ );
+
+ let null_bit_buffer = self
+ .null_buffer_builder
+ .as_slice()
+ .map(Buffer::from_slice_ref);
+ let array_data = ArrayData::builder(DataType::FixedSizeList(
+ Box::new(Field::new("item", values_data.data_type().clone(), true)),
+ self.list_len,
+ ))
+ .len(len)
+ .add_child_data(values_data.clone())
+ .null_bit_buffer(null_bit_buffer);
+
+ let array_data = unsafe { array_data.build_unchecked() };
+
+ FixedSizeListArray::from(array_data)
+ }
}
#[cfg(test)]
@@ -176,6 +213,48 @@ mod tests {
assert_eq!(3, list_array.value_length());
}
+ #[test]
+ fn test_fixed_size_list_array_builder_finish_cloned() {
+ let values_builder = Int32Builder::new();
+ let mut builder = FixedSizeListBuilder::new(values_builder, 3);
+
+ // [[0, 1, 2], null, [3, null, 5], [6, 7, null]]
+ builder.values().append_value(0);
+ builder.values().append_value(1);
+ builder.values().append_value(2);
+ builder.append(true);
+ builder.values().append_null();
+ builder.values().append_null();
+ builder.values().append_null();
+ builder.append(false);
+ builder.values().append_value(3);
+ builder.values().append_null();
+ builder.values().append_value(5);
+ builder.append(true);
+ let mut list_array = builder.finish_cloned();
+
+ assert_eq!(DataType::Int32, list_array.value_type());
+ assert_eq!(3, list_array.len());
+ assert_eq!(1, list_array.null_count());
+ assert_eq!(3, list_array.value_length());
+
+ builder.values().append_value(6);
+ builder.values().append_value(7);
+ builder.values().append_null();
+ builder.append(true);
+ builder.values().append_null();
+ builder.values().append_null();
+ builder.values().append_null();
+ builder.append(false);
+ list_array = builder.finish();
+
+ assert_eq!(DataType::Int32, list_array.value_type());
+ assert_eq!(5, list_array.len());
+ assert_eq!(2, list_array.null_count());
+ assert_eq!(6, list_array.value_offset(2));
+ assert_eq!(3, list_array.value_length());
+ }
+
#[test]
fn test_fixed_size_list_array_builder_empty() {
let values_builder = Int32Array::builder(5);
diff --git a/arrow-array/src/builder/generic_bytes_builder.rs b/arrow-array/src/builder/generic_bytes_builder.rs
index fa0a31ad7..9f9078c70 100644
--- a/arrow-array/src/builder/generic_bytes_builder.rs
+++ b/arrow-array/src/builder/generic_bytes_builder.rs
@@ -19,7 +19,7 @@ use crate::builder::null_buffer_builder::NullBufferBuilder;
use crate::builder::{ArrayBuilder, BufferBuilder, UInt8BufferBuilder};
use crate::types::{ByteArrayType, GenericBinaryType, GenericStringType};
use crate::{ArrayRef, GenericByteArray, OffsetSizeTrait};
-use arrow_buffer::ArrowNativeType;
+use arrow_buffer::{ArrowNativeType, Buffer};
use arrow_data::ArrayDataBuilder;
use std::any::Any;
use std::sync::Arc;
@@ -94,6 +94,25 @@ impl<T: ByteArrayType> GenericByteBuilder<T> {
GenericByteArray::from(array_data)
}
+ /// Builds the [`GenericByteArray`] without resetting the builder.
+ pub fn finish_cloned(&self) -> GenericByteArray<T> {
+ let array_type = T::DATA_TYPE;
+ let offset_buffer = Buffer::from_slice_ref(self.offsets_builder.as_slice());
+ let value_buffer = Buffer::from_slice_ref(self.value_builder.as_slice());
+ let array_builder = ArrayDataBuilder::new(array_type)
+ .len(self.len())
+ .add_buffer(offset_buffer)
+ .add_buffer(value_buffer)
+ .null_bit_buffer(
+ self.null_buffer_builder
+ .as_slice()
+ .map(Buffer::from_slice_ref),
+ );
+
+ let array_data = unsafe { array_builder.build_unchecked() };
+ GenericByteArray::from(array_data)
+ }
+
/// Returns the current values buffer as a slice
pub fn values_slice(&self) -> &[u8] {
self.value_builder.as_slice()
@@ -138,6 +157,11 @@ impl<T: ByteArrayType> ArrayBuilder for GenericByteBuilder<T> {
Arc::new(self.finish())
}
+ /// Builds the array without resetting the builder.
+ fn finish_cloned(&self) -> ArrayRef {
+ Arc::new(self.finish_cloned())
+ }
+
/// Returns the builder as a non-mutable `Any` reference.
fn as_any(&self) -> &dyn Any {
self
@@ -325,4 +349,34 @@ mod tests {
fn test_large_string_array_builder_finish() {
_test_generic_string_array_builder_finish::<i64>()
}
+
+ fn _test_generic_string_array_builder_finish_cloned<O: OffsetSizeTrait>() {
+ let mut builder = GenericStringBuilder::<O>::with_capacity(3, 11);
+
+ builder.append_value("hello");
+ builder.append_value("rust");
+ builder.append_null();
+
+ let mut arr = builder.finish_cloned();
+ assert!(!builder.is_empty());
+ assert_eq!(3, arr.len());
+
+ builder.append_value("arrow");
+ builder.append_value("parquet");
+ arr = builder.finish();
+
+ assert!(arr.data().null_buffer().is_some());
+ assert_eq!(&[O::zero()], builder.offsets_slice());
+ assert_eq!(5, arr.len());
+ }
+
+ #[test]
+ fn test_string_array_builder_finish_cloned() {
+ _test_generic_string_array_builder_finish_cloned::<i32>()
+ }
+
+ #[test]
+ fn test_large_string_array_builder_finish_cloned() {
+ _test_generic_string_array_builder_finish_cloned::<i64>()
+ }
}
diff --git a/arrow-array/src/builder/generic_list_builder.rs b/arrow-array/src/builder/generic_list_builder.rs
index 116567864..8f3f881c4 100644
--- a/arrow-array/src/builder/generic_list_builder.rs
+++ b/arrow-array/src/builder/generic_list_builder.rs
@@ -18,6 +18,7 @@
use crate::builder::null_buffer_builder::NullBufferBuilder;
use crate::builder::{ArrayBuilder, BufferBuilder};
use crate::{ArrayRef, GenericListArray, OffsetSizeTrait};
+use arrow_buffer::Buffer;
use arrow_data::ArrayData;
use arrow_schema::Field;
use std::any::Any;
@@ -85,6 +86,11 @@ where
fn finish(&mut self) -> ArrayRef {
Arc::new(self.finish())
}
+
+ /// Builds the array without resetting the builder.
+ fn finish_cloned(&self) -> ArrayRef {
+ Arc::new(self.finish_cloned())
+ }
}
impl<OffsetSize: OffsetSizeTrait, T: ArrayBuilder> GenericListBuilder<OffsetSize, T>
@@ -138,6 +144,34 @@ where
GenericListArray::<OffsetSize>::from(array_data)
}
+ /// Builds the [`GenericListArray`] without resetting the builder.
+ pub fn finish_cloned(&self) -> GenericListArray<OffsetSize> {
+ let len = self.len();
+ let values_arr = self.values_builder.finish_cloned();
+ let values_data = values_arr.data();
+
+ let offset_buffer = Buffer::from_slice_ref(self.offsets_builder.as_slice());
+ let null_bit_buffer = self
+ .null_buffer_builder
+ .as_slice()
+ .map(Buffer::from_slice_ref);
+ let field = Box::new(Field::new(
+ "item",
+ values_data.data_type().clone(),
+ true, // TODO: find a consistent way of getting this
+ ));
+ let data_type = GenericListArray::<OffsetSize>::DATA_TYPE_CONSTRUCTOR(field);
+ let array_data_builder = ArrayData::builder(data_type)
+ .len(len)
+ .add_buffer(offset_buffer)
+ .add_child_data(values_data.clone())
+ .null_bit_buffer(null_bit_buffer);
+
+ let array_data = unsafe { array_data_builder.build_unchecked() };
+
+ GenericListArray::<OffsetSize>::from(array_data)
+ }
+
/// Returns the current offsets buffer as a slice
pub fn offsets_slice(&self) -> &[OffsetSize] {
self.offsets_builder.as_slice()
@@ -255,6 +289,27 @@ mod tests {
assert!(builder.is_empty());
}
+ #[test]
+ fn test_list_array_builder_finish_cloned() {
+ let values_builder = Int32Array::builder(5);
+ let mut builder = ListBuilder::new(values_builder);
+
+ builder.values().append_slice(&[1, 2, 3]);
+ builder.append(true);
+ builder.values().append_slice(&[4, 5, 6]);
+ builder.append(true);
+
+ let mut arr = builder.finish_cloned();
+ assert_eq!(2, arr.len());
+ assert!(!builder.is_empty());
+
+ builder.values().append_slice(&[7, 8, 9]);
+ builder.append(true);
+ arr = builder.finish();
+ assert_eq!(3, arr.len());
+ assert!(builder.is_empty());
+ }
+
#[test]
fn test_list_list_array_builder() {
let primitive_builder = Int32Builder::with_capacity(10);
diff --git a/arrow-array/src/builder/map_builder.rs b/arrow-array/src/builder/map_builder.rs
index 4b7597248..5602f8863 100644
--- a/arrow-array/src/builder/map_builder.rs
+++ b/arrow-array/src/builder/map_builder.rs
@@ -18,6 +18,7 @@
use crate::builder::null_buffer_builder::NullBufferBuilder;
use crate::builder::{ArrayBuilder, BufferBuilder};
use crate::{Array, ArrayRef, MapArray, StructArray};
+use arrow_buffer::Buffer;
use arrow_data::ArrayData;
use arrow_schema::{ArrowError, DataType, Field};
use std::any::Any;
@@ -142,6 +143,48 @@ impl<K: ArrayBuilder, V: ArrayBuilder> MapBuilder<K, V> {
MapArray::from(array_data)
}
+
+ pub fn finish_cloned(&self) -> MapArray {
+ let len = self.len();
+
+ // Build the keys
+ let keys_arr = self.key_builder.finish_cloned();
+ let values_arr = self.value_builder.finish_cloned();
+
+ let keys_field = Field::new(
+ self.field_names.key.as_str(),
+ keys_arr.data_type().clone(),
+ false, // always nullable
+ );
+ let values_field = Field::new(
+ self.field_names.value.as_str(),
+ values_arr.data_type().clone(),
+ true,
+ );
+
+ let struct_array =
+ StructArray::from(vec![(keys_field, keys_arr), (values_field, values_arr)]);
+
+ let offset_buffer = Buffer::from_slice_ref(self.offsets_builder.as_slice());
+ let null_bit_buffer = self
+ .null_buffer_builder
+ .as_slice()
+ .map(Buffer::from_slice_ref);
+ let map_field = Box::new(Field::new(
+ self.field_names.entry.as_str(),
+ struct_array.data_type().clone(),
+ false, // always non-nullable
+ ));
+ let array_data = ArrayData::builder(DataType::Map(map_field, false)) // TODO: support sorted keys
+ .len(len)
+ .add_buffer(offset_buffer)
+ .add_child_data(struct_array.into_data())
+ .null_bit_buffer(null_bit_buffer);
+
+ let array_data = unsafe { array_data.build_unchecked() };
+
+ MapArray::from(array_data)
+ }
}
impl<K: ArrayBuilder, V: ArrayBuilder> ArrayBuilder for MapBuilder<K, V> {
@@ -157,6 +200,11 @@ impl<K: ArrayBuilder, V: ArrayBuilder> ArrayBuilder for MapBuilder<K, V> {
Arc::new(self.finish())
}
+ /// Builds the array without resetting the builder.
+ fn finish_cloned(&self) -> ArrayRef {
+ Arc::new(self.finish_cloned())
+ }
+
fn as_any(&self) -> &dyn Any {
self
}
diff --git a/arrow-array/src/builder/mod.rs b/arrow-array/src/builder/mod.rs
index a5c1e3d4b..eaf824397 100644
--- a/arrow-array/src/builder/mod.rs
+++ b/arrow-array/src/builder/mod.rs
@@ -107,6 +107,9 @@ pub trait ArrayBuilder: Any + Send {
/// Builds the array
fn finish(&mut self) -> ArrayRef;
+ /// Builds the array without resetting the underlying builder.
+ fn finish_cloned(&self) -> ArrayRef;
+
/// Returns the builder as a non-mutable `Any` reference.
///
/// This is most useful when one wants to call non-mutable APIs on a specific builder
diff --git a/arrow-array/src/builder/null_buffer_builder.rs b/arrow-array/src/builder/null_buffer_builder.rs
index fef7214d5..b3c788fe5 100644
--- a/arrow-array/src/builder/null_buffer_builder.rs
+++ b/arrow-array/src/builder/null_buffer_builder.rs
@@ -135,7 +135,11 @@ impl NullBufferBuilder {
buf
}
- #[inline]
+ /// Returns the inner bitmap builder as slice
+ pub fn as_slice(&self) -> Option<&[u8]> {
+ Some(self.bitmap_builder.as_ref()?.as_slice())
+ }
+
fn materialize_if_needed(&mut self) {
if self.bitmap_builder.is_none() {
self.materialize()
diff --git a/arrow-array/src/builder/primitive_builder.rs b/arrow-array/src/builder/primitive_builder.rs
index 55d8bac01..7a1fbafc7 100644
--- a/arrow-array/src/builder/primitive_builder.rs
+++ b/arrow-array/src/builder/primitive_builder.rs
@@ -19,7 +19,7 @@ use crate::builder::null_buffer_builder::NullBufferBuilder;
use crate::builder::{ArrayBuilder, BufferBuilder};
use crate::types::*;
use crate::{ArrayRef, ArrowPrimitiveType, PrimitiveArray};
-use arrow_buffer::MutableBuffer;
+use arrow_buffer::{Buffer, MutableBuffer};
use arrow_data::ArrayData;
use std::any::Any;
use std::sync::Arc;
@@ -93,6 +93,11 @@ impl<T: ArrowPrimitiveType> ArrayBuilder for PrimitiveBuilder<T> {
fn finish(&mut self) -> ArrayRef {
Arc::new(self.finish())
}
+
+ /// Builds the array without resetting the builder.
+ fn finish_cloned(&self) -> ArrayRef {
+ Arc::new(self.finish_cloned())
+ }
}
impl<T: ArrowPrimitiveType> Default for PrimitiveBuilder<T> {
@@ -219,6 +224,23 @@ impl<T: ArrowPrimitiveType> PrimitiveBuilder<T> {
PrimitiveArray::<T>::from(array_data)
}
+ /// Builds the [`PrimitiveArray`] without resetting the builder.
+ pub fn finish_cloned(&self) -> PrimitiveArray<T> {
+ let len = self.len();
+ let null_bit_buffer = self
+ .null_buffer_builder
+ .as_slice()
+ .map(Buffer::from_slice_ref);
+ let values_buffer = Buffer::from_slice_ref(self.values_builder.as_slice());
+ let builder = ArrayData::builder(T::DATA_TYPE)
+ .len(len)
+ .add_buffer(values_buffer)
+ .null_bit_buffer(null_bit_buffer);
+
+ let array_data = unsafe { builder.build_unchecked() };
+ PrimitiveArray::<T>::from(array_data)
+ }
+
/// Returns the current values buffer as a slice
pub fn values_slice(&self) -> &[T::Native] {
self.values_builder.as_slice()
@@ -431,4 +453,26 @@ mod tests {
assert_eq!(5, arr.len());
assert_eq!(0, builder.len());
}
+
+ #[test]
+ fn test_primitive_array_builder_finish_cloned() {
+ let mut builder = Int32Builder::new();
+ builder.append_value(23);
+ builder.append_value(45);
+ let result = builder.finish_cloned();
+ assert_eq!(result, Int32Array::from(vec![23, 45]));
+ builder.append_value(56);
+ assert_eq!(builder.finish_cloned(), Int32Array::from(vec![23, 45, 56]));
+
+ builder.append_slice(&[2, 4, 6, 8]);
+ let mut arr = builder.finish();
+ assert_eq!(7, arr.len());
+ assert_eq!(arr, Int32Array::from(vec![23, 45, 56, 2, 4, 6, 8]));
+ assert_eq!(0, builder.len());
+
+ builder.append_slice(&[1, 3, 5, 7, 9]);
+ arr = builder.finish();
+ assert_eq!(5, arr.len());
+ assert_eq!(0, builder.len());
+ }
}
diff --git a/arrow-array/src/builder/primitive_dictionary_builder.rs b/arrow-array/src/builder/primitive_dictionary_builder.rs
index c43416e5a..5b8a72835 100644
--- a/arrow-array/src/builder/primitive_dictionary_builder.rs
+++ b/arrow-array/src/builder/primitive_dictionary_builder.rs
@@ -160,6 +160,11 @@ where
fn finish(&mut self) -> ArrayRef {
Arc::new(self.finish())
}
+
+ /// Builds the array without resetting the builder.
+ fn finish_cloned(&self) -> ArrayRef {
+ Arc::new(self.finish_cloned())
+ }
}
impl<K, V> PrimitiveDictionaryBuilder<K, V>
@@ -210,6 +215,23 @@ where
DictionaryArray::from(unsafe { builder.build_unchecked() })
}
+
+ /// Builds the `DictionaryArray` without resetting the builder.
+ pub fn finish_cloned(&self) -> DictionaryArray<K> {
+ let values = self.values_builder.finish_cloned();
+ let keys = self.keys_builder.finish_cloned();
+
+ let data_type =
+ DataType::Dictionary(Box::new(K::DATA_TYPE), Box::new(V::DATA_TYPE));
+
+ let builder = keys
+ .into_data()
+ .into_builder()
+ .data_type(data_type)
+ .child_data(vec![values.into_data()]);
+
+ DictionaryArray::from(unsafe { builder.build_unchecked() })
+ }
}
#[cfg(test)]
diff --git a/arrow-array/src/builder/string_dictionary_builder.rs b/arrow-array/src/builder/string_dictionary_builder.rs
index e41086c87..f44756b6b 100644
--- a/arrow-array/src/builder/string_dictionary_builder.rs
+++ b/arrow-array/src/builder/string_dictionary_builder.rs
@@ -222,6 +222,11 @@ where
fn finish(&mut self) -> ArrayRef {
Arc::new(self.finish())
}
+
+ /// Builds the array without resetting the builder.
+ fn finish_cloned(&self) -> ArrayRef {
+ Arc::new(self.finish_cloned())
+ }
}
impl<K> StringDictionaryBuilder<K>
@@ -287,6 +292,23 @@ where
DictionaryArray::from(unsafe { builder.build_unchecked() })
}
+
+ /// Builds the `DictionaryArray` without resetting the builder.
+ pub fn finish_cloned(&self) -> DictionaryArray<K> {
+ let values = self.values_builder.finish_cloned();
+ let keys = self.keys_builder.finish_cloned();
+
+ let data_type =
+ DataType::Dictionary(Box::new(K::DATA_TYPE), Box::new(DataType::Utf8));
+
+ let builder = keys
+ .into_data()
+ .into_builder()
+ .data_type(data_type)
+ .child_data(vec![values.into_data()]);
+
+ DictionaryArray::from(unsafe { builder.build_unchecked() })
+ }
}
fn get_bytes<'a, K: ArrowNativeType>(values: &'a StringBuilder, key: &K) -> &'a [u8] {
@@ -331,6 +353,57 @@ mod tests {
assert_eq!(ava.value(1), "def");
}
+ #[test]
+ fn test_string_dictionary_builder_finish_cloned() {
+ let mut builder = StringDictionaryBuilder::<Int8Type>::new();
+ builder.append("abc").unwrap();
+ builder.append_null();
+ builder.append("def").unwrap();
+ builder.append("def").unwrap();
+ builder.append("abc").unwrap();
+ let mut array = builder.finish_cloned();
+
+ assert_eq!(
+ array.keys(),
+ &Int8Array::from(vec![Some(0), None, Some(1), Some(1), Some(0)])
+ );
+
+ // Values are polymorphic and so require a downcast.
+ let av = array.values();
+ let ava: &StringArray = av.as_any().downcast_ref::<StringArray>().unwrap();
+
+ assert_eq!(ava.value(0), "abc");
+ assert_eq!(ava.value(1), "def");
+
+ builder.append("abc").unwrap();
+ builder.append("ghi").unwrap();
+ builder.append("def").unwrap();
+
+ array = builder.finish();
+
+ assert_eq!(
+ array.keys(),
+ &Int8Array::from(vec![
+ Some(0),
+ None,
+ Some(1),
+ Some(1),
+ Some(0),
+ Some(0),
+ Some(2),
+ Some(1)
+ ])
+ );
+
+ // Values are polymorphic and so require a downcast.
+ let av2 = array.values();
+ let ava2: &StringArray = av2.as_any().downcast_ref::<StringArray>().unwrap();
+
+ assert_eq!(ava2.value(0), "abc");
+ assert_eq!(ava2.value(1), "def");
+ assert_eq!(ava2.value(2), "ghi");
+ }
+
#[test]
fn test_string_dictionary_builder_with_existing_dictionary() {
let dictionary = StringArray::from(vec![None, Some("def"), Some("abc")]);
diff --git a/arrow-array/src/builder/struct_builder.rs b/arrow-array/src/builder/struct_builder.rs
index f00f81d1a..98d0e1a1d 100644
--- a/arrow-array/src/builder/struct_builder.rs
+++ b/arrow-array/src/builder/struct_builder.rs
@@ -18,6 +18,7 @@
use crate::builder::null_buffer_builder::NullBufferBuilder;
use crate::builder::*;
use crate::{Array, ArrayRef, StructArray};
+use arrow_buffer::Buffer;
use arrow_data::ArrayData;
use arrow_schema::{DataType, Field, IntervalUnit, TimeUnit};
use std::any::Any;
@@ -63,6 +64,11 @@ impl ArrayBuilder for StructBuilder {
Arc::new(self.finish())
}
+ /// Builds the array without resetting the builder.
+ fn finish_cloned(&self) -> ArrayRef {
+ Arc::new(self.finish_cloned())
+ }
+
/// Returns the builder as a non-mutable `Any` reference.
///
/// This is most useful when one wants to call non-mutable APIs on a specific builder
@@ -230,6 +236,30 @@ impl StructBuilder {
StructArray::from(array_data)
}
+ /// Builds the `StructArray` without resetting the builder.
+ pub fn finish_cloned(&self) -> StructArray {
+ self.validate_content();
+
+ let mut child_data = Vec::with_capacity(self.field_builders.len());
+ for f in &self.field_builders {
+ let arr = f.finish_cloned();
+ child_data.push(arr.data().clone());
+ }
+ let length = self.len();
+ let null_bit_buffer = self
+ .null_buffer_builder
+ .as_slice()
+ .map(Buffer::from_slice_ref);
+
+ let builder = ArrayData::builder(DataType::Struct(self.fields.clone()))
+ .len(length)
+ .child_data(child_data)
+ .null_bit_buffer(null_bit_buffer);
+
+ let array_data = unsafe { builder.build_unchecked() };
+ StructArray::from(array_data)
+ }
+
/// Constructs and validates contents in the builder to ensure that
/// - fields and field_builders are of equal length
/// - the number of items in individual field_builders are equal to self.len()
@@ -374,6 +404,64 @@ mod tests {
assert_eq!(0, builder.len());
}
+ #[test]
+ fn test_struct_array_builder_finish_cloned() {
+ let int_builder = Int32Builder::new();
+ let bool_builder = BooleanBuilder::new();
+
+ let mut fields = Vec::new();
+ let mut field_builders = Vec::new();
+ fields.push(Field::new("f1", DataType::Int32, false));
+ field_builders.push(Box::new(int_builder) as Box<dyn ArrayBuilder>);
+ fields.push(Field::new("f2", DataType::Boolean, false));
+ field_builders.push(Box::new(bool_builder) as Box<dyn ArrayBuilder>);
+
+ let mut builder = StructBuilder::new(fields, field_builders);
+ builder
+ .field_builder::<Int32Builder>(0)
+ .unwrap()
+ .append_slice(&[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]);
+ builder
+ .field_builder::<BooleanBuilder>(1)
+ .unwrap()
+ .append_slice(&[
+ false, true, false, true, false, true, false, true, false, true,
+ ]);
+
+ // Append slot values - all are valid.
+ for _ in 0..10 {
+ builder.append(true);
+ }
+
+ assert_eq!(10, builder.len());
+
+ let mut arr = builder.finish_cloned();
+
+ assert_eq!(10, arr.len());
+ assert_eq!(10, builder.len());
+
+ builder
+ .field_builder::<Int32Builder>(0)
+ .unwrap()
+ .append_slice(&[1, 3, 5, 7, 9]);
+ builder
+ .field_builder::<BooleanBuilder>(1)
+ .unwrap()
+ .append_slice(&[false, true, false, true, false]);
+
+ // Append slot values - all are valid.
+ for _ in 0..5 {
+ builder.append(true);
+ }
+
+ assert_eq!(15, builder.len());
+
+ arr = builder.finish();
+
+ assert_eq!(15, arr.len());
+ assert_eq!(0, builder.len());
+ }
+
#[test]
fn test_struct_array_builder_from_schema() {
let mut fields = vec![