You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by tu...@apache.org on 2022/07/06 15:38:09 UTC
[arrow-rs] branch master updated: Refine the `FixedSizeBinaryBuilder` (#2013)
This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new b156cce9c Refine the `FixedSizeBinaryBuilder` (#2013)
b156cce9c is described below
commit b156cce9ccab71bc278d9b05e4eb5e33070906bc
Author: Remzi Yang <59...@users.noreply.github.com>
AuthorDate: Wed Jul 6 23:38:04 2022 +0800
Refine the `FixedSizeBinaryBuilder` (#2013)
* refine the code
Signed-off-by: remzi <13...@gmail.com>
* refine the docs
Signed-off-by: remzi <13...@gmail.com>
* nit
Signed-off-by: remzi <13...@gmail.com>
---
.../src/array/builder/fixed_size_binary_builder.rs | 118 ++++++++++++++++-----
1 file changed, 93 insertions(+), 25 deletions(-)
diff --git a/arrow/src/array/builder/fixed_size_binary_builder.rs b/arrow/src/array/builder/fixed_size_binary_builder.rs
index 2bbe4bb05..e62aa8fa6 100644
--- a/arrow/src/array/builder/fixed_size_binary_builder.rs
+++ b/arrow/src/array/builder/fixed_size_binary_builder.rs
@@ -16,53 +16,78 @@
// under the License.
use crate::array::{
- ArrayBuilder, ArrayRef, FixedSizeBinaryArray, FixedSizeListBuilder, UInt8Builder,
+ ArrayBuilder, ArrayData, ArrayRef, FixedSizeBinaryArray, UInt8BufferBuilder,
};
+use crate::datatypes::DataType;
use crate::error::{ArrowError, Result};
use std::any::Any;
use std::sync::Arc;
+use super::BooleanBufferBuilder;
+
#[derive(Debug)]
pub struct FixedSizeBinaryBuilder {
- builder: FixedSizeListBuilder<UInt8Builder>,
+ values_builder: UInt8BufferBuilder,
+ bitmap_builder: BooleanBufferBuilder,
+ value_length: i32,
}
impl FixedSizeBinaryBuilder {
- /// Creates a new `BinaryBuilder`, `capacity` is the number of bytes in the values
- /// array
+ /// Creates a new [`FixedSizeBinaryBuilder`], `capacity` is the number of bytes in the values
+ /// buffer
pub fn new(capacity: usize, byte_width: i32) -> Self {
- let values_builder = UInt8Builder::new(capacity);
+ assert!(
+ byte_width >= 0,
+ "value length ({}) of the array must >= 0",
+ byte_width
+ );
Self {
- builder: FixedSizeListBuilder::new(values_builder, byte_width),
+ values_builder: UInt8BufferBuilder::new(capacity),
+ bitmap_builder: BooleanBufferBuilder::new(if byte_width > 0 {
+ capacity / byte_width as usize
+ } else {
+ 0
+ }),
+ value_length: byte_width,
}
}
/// Appends a byte slice into the builder.
///
- /// Automatically calls the `append` method to delimit the slice appended in as a
- /// distinct array element.
+ /// Automatically update the null buffer to delimit the slice appended in as a
+ /// distinct value element.
#[inline]
pub fn append_value(&mut self, value: impl AsRef<[u8]>) -> Result<()> {
- if self.builder.value_length() != value.as_ref().len() as i32 {
- return Err(ArrowError::InvalidArgumentError(
+ if self.value_length != value.as_ref().len() as i32 {
+ Err(ArrowError::InvalidArgumentError(
"Byte slice does not have the same length as FixedSizeBinaryBuilder value lengths".to_string()
- ));
+ ))
+ } else {
+ self.values_builder.append_slice(value.as_ref());
+ self.bitmap_builder.append(true);
+ Ok(())
}
- self.builder.values().append_slice(value.as_ref())?;
- self.builder.append(true)
}
/// Append a null value to the array.
#[inline]
pub fn append_null(&mut self) -> Result<()> {
- let length: usize = self.builder.value_length() as usize;
- self.builder.values().append_slice(&vec![0u8; length][..])?;
- self.builder.append(false)
+ self.values_builder
+ .append_slice(&vec![0u8; self.value_length as usize][..]);
+ self.bitmap_builder.append(false);
+ Ok(())
}
- /// Builds the `FixedSizeBinaryArray` and reset this builder.
+ /// Builds the [`FixedSizeBinaryArray`] and reset this builder.
pub fn finish(&mut self) -> FixedSizeBinaryArray {
- FixedSizeBinaryArray::from(self.builder.finish())
+ let array_length = self.len();
+ let array_data_builder =
+ ArrayData::builder(DataType::FixedSizeBinary(self.value_length))
+ .add_buffer(self.values_builder.finish())
+ .null_bit_buffer(Some(self.bitmap_builder.finish()))
+ .len(array_length);
+ let array_data = unsafe { array_data_builder.build_unchecked() };
+ FixedSizeBinaryArray::from(array_data)
}
}
@@ -84,12 +109,12 @@ impl ArrayBuilder for FixedSizeBinaryBuilder {
/// Returns the number of array slots in the builder
fn len(&self) -> usize {
- self.builder.len()
+ self.bitmap_builder.len()
}
/// Returns whether the number of array slots is zero
fn is_empty(&self) -> bool {
- self.builder.is_empty()
+ self.bitmap_builder.is_empty()
}
/// Builds the array and reset this builder.
@@ -114,15 +139,58 @@ mod tests {
builder.append_value(b"hello").unwrap();
builder.append_null().unwrap();
builder.append_value(b"arrow").unwrap();
- let fixed_size_binary_array: FixedSizeBinaryArray = builder.finish();
+ let array: FixedSizeBinaryArray = builder.finish();
+
+ assert_eq!(&DataType::FixedSizeBinary(5), array.data_type());
+ assert_eq!(3, array.len());
+ assert_eq!(1, array.null_count());
+ assert_eq!(10, array.value_offset(2));
+ assert_eq!(5, array.value_length());
+ }
+
+ #[test]
+ fn test_fixed_size_binary_builder_with_zero_value_length() {
+ let mut builder = FixedSizeBinaryBuilder::new(0, 0);
+
+ builder.append_value(b"").unwrap();
+ builder.append_null().unwrap();
+ builder.append_value(b"").unwrap();
+ assert!(!builder.is_empty());
+
+ let array: FixedSizeBinaryArray = builder.finish();
+ assert_eq!(&DataType::FixedSizeBinary(0), array.data_type());
+ assert_eq!(3, array.len());
+ assert_eq!(1, array.null_count());
+ assert_eq!(0, array.value_offset(2));
+ assert_eq!(0, array.value_length());
+ assert_eq!(b"", array.value(0));
+ assert_eq!(b"", array.value(2));
+ }
+ #[test]
+ #[should_panic(
+ expected = "Byte slice does not have the same length as FixedSizeBinaryBuilder value lengths"
+ )]
+ fn test_fixed_size_binary_builder_with_inconsistent_value_length() {
+ let mut builder = FixedSizeBinaryBuilder::new(15, 4);
+ builder.append_value(b"hello").unwrap();
+ }
+ #[test]
+ fn test_fixed_size_binary_builder_empty() {
+ let mut builder = FixedSizeBinaryBuilder::new(15, 5);
+ assert!(builder.is_empty());
+
+ let fixed_size_binary_array = builder.finish();
assert_eq!(
&DataType::FixedSizeBinary(5),
fixed_size_binary_array.data_type()
);
- assert_eq!(3, fixed_size_binary_array.len());
- assert_eq!(1, fixed_size_binary_array.null_count());
- assert_eq!(10, fixed_size_binary_array.value_offset(2));
- assert_eq!(5, fixed_size_binary_array.value_length());
+ assert_eq!(0, fixed_size_binary_array.len());
+ }
+
+ #[test]
+ #[should_panic(expected = "value length (-1) of the array must >= 0")]
+ fn test_fixed_size_binary_builder_invalid_value_length() {
+ let _ = FixedSizeBinaryBuilder::new(15, -1);
}
}