You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by tu...@apache.org on 2022/07/15 15:15:45 UTC
[arrow-rs] branch master updated: Lazily materialize the null buffer builder of boolean builder (#2073)
This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 7fff23ff2 Lazily materialize the null buffer builder of boolean builder (#2073)
7fff23ff2 is described below
commit 7fff23ff214e7237032c1815832ee959cb3a7cc2
Author: Remzi Yang <59...@users.noreply.github.com>
AuthorDate: Fri Jul 15 23:15:38 2022 +0800
Lazily materialize the null buffer builder of boolean builder (#2073)
* lazily materialize the null buffer builder
Signed-off-by: remzi <13...@gmail.com>
* add docs
Signed-off-by: remzi <13...@gmail.com>
---
arrow/src/array/builder/boolean_builder.rs | 89 ++++++++++++++++++++++++----
arrow/src/array/builder/primitive_builder.rs | 27 ---------
2 files changed, 76 insertions(+), 40 deletions(-)
diff --git a/arrow/src/array/builder/boolean_builder.rs b/arrow/src/array/builder/boolean_builder.rs
index 98acb641b..d0063e566 100644
--- a/arrow/src/array/builder/boolean_builder.rs
+++ b/arrow/src/array/builder/boolean_builder.rs
@@ -60,7 +60,9 @@ use super::BooleanBufferBuilder;
#[derive(Debug)]
pub struct BooleanBuilder {
values_builder: BooleanBufferBuilder,
- bitmap_builder: BooleanBufferBuilder,
+ /// We only materialize the builder when we add `false`.
+ /// This optimization is **very** important for the performance.
+ bitmap_builder: Option<BooleanBufferBuilder>,
}
impl BooleanBuilder {
@@ -68,7 +70,7 @@ impl BooleanBuilder {
pub fn new(capacity: usize) -> Self {
Self {
values_builder: BooleanBufferBuilder::new(capacity),
- bitmap_builder: BooleanBufferBuilder::new(capacity),
+ bitmap_builder: None,
}
}
@@ -80,15 +82,18 @@ impl BooleanBuilder {
/// Appends a value of type `T` into the builder
#[inline]
pub fn append_value(&mut self, v: bool) -> Result<()> {
- self.bitmap_builder.append(true);
self.values_builder.append(v);
+ if let Some(b) = self.bitmap_builder.as_mut() {
+ b.append(true)
+ }
Ok(())
}
/// Appends a null slot into the builder
#[inline]
pub fn append_null(&mut self) -> Result<()> {
- self.bitmap_builder.append(false);
+ self.materialize_bitmap_builder();
+ self.bitmap_builder.as_mut().unwrap().append(false);
self.values_builder.advance(1);
Ok(())
}
@@ -106,7 +111,9 @@ impl BooleanBuilder {
/// Appends a slice of type `T` into the builder
#[inline]
pub fn append_slice(&mut self, v: &[bool]) -> Result<()> {
- self.bitmap_builder.append_n(v.len(), true);
+ if let Some(b) = self.bitmap_builder.as_mut() {
+ b.append_n(v.len(), true)
+ }
self.values_builder.append_slice(v);
Ok(())
}
@@ -115,28 +122,43 @@ impl BooleanBuilder {
#[inline]
pub fn append_values(&mut self, values: &[bool], is_valid: &[bool]) -> Result<()> {
if values.len() != is_valid.len() {
- return Err(ArrowError::InvalidArgumentError(
+ Err(ArrowError::InvalidArgumentError(
"Value and validity lengths must be equal".to_string(),
- ));
+ ))
+ } else {
+ is_valid
+ .iter()
+ .any(|v| !*v)
+ .then(|| self.materialize_bitmap_builder());
+ if let Some(b) = self.bitmap_builder.as_mut() {
+ b.append_slice(is_valid)
+ }
+ self.values_builder.append_slice(values);
+ Ok(())
}
- self.bitmap_builder.append_slice(is_valid);
- self.values_builder.append_slice(values);
- Ok(())
}
/// Builds the [BooleanArray] and reset this builder.
pub fn finish(&mut self) -> BooleanArray {
let len = self.len();
- let null_bit_buffer = self.bitmap_builder.finish();
- let null_count = len - null_bit_buffer.count_set_bits();
+ let null_bit_buffer = self.bitmap_builder.as_mut().map(|b| b.finish());
let builder = ArrayData::builder(DataType::Boolean)
.len(len)
.add_buffer(self.values_builder.finish())
- .null_bit_buffer((null_count > 0).then(|| null_bit_buffer));
+ .null_bit_buffer(null_bit_buffer);
let array_data = unsafe { builder.build_unchecked() };
BooleanArray::from(array_data)
}
+
+ fn materialize_bitmap_builder(&mut self) {
+ if self.bitmap_builder.is_none() {
+ let mut b = BooleanBufferBuilder::new(0);
+ b.reserve(self.values_builder.capacity());
+ b.append_n(self.values_builder.len(), true);
+ self.bitmap_builder = Some(b);
+ }
+ }
}
impl ArrayBuilder for BooleanBuilder {
@@ -174,6 +196,32 @@ impl ArrayBuilder for BooleanBuilder {
#[cfg(test)]
mod tests {
use super::*;
+ use crate::{array::Array, buffer::Buffer};
+
+ #[test]
+ fn test_boolean_array_builder() {
+ // 00000010 01001000
+ let buf = Buffer::from([72_u8, 2_u8]);
+ let mut builder = BooleanArray::builder(10);
+ for i in 0..10 {
+ if i == 3 || i == 6 || i == 9 {
+ builder.append_value(true).unwrap();
+ } else {
+ builder.append_value(false).unwrap();
+ }
+ }
+
+ let arr = builder.finish();
+ assert_eq!(&buf, arr.values());
+ assert_eq!(10, arr.len());
+ assert_eq!(0, arr.offset());
+ assert_eq!(0, arr.null_count());
+ for i in 0..10 {
+ assert!(!arr.is_null(i));
+ assert!(arr.is_valid(i));
+ assert_eq!(i == 3 || i == 6 || i == 9, arr.value(i), "failed at {}", i)
+ }
+ }
#[test]
fn test_boolean_array_builder_append_slice() {
@@ -200,4 +248,19 @@ mod tests {
assert_eq!(arr1, arr2);
}
+
+ #[test]
+ fn test_boolean_array_builder_no_null() {
+ let mut builder = BooleanArray::builder(0);
+ builder.append_option(Some(true)).unwrap();
+ builder.append_value(false).unwrap();
+ builder.append_slice(&[true, false, true]).unwrap();
+ builder
+ .append_values(&[false, false, true], &[true, true, true])
+ .unwrap();
+
+ let array = builder.finish();
+ assert_eq!(0, array.null_count());
+ assert!(array.data().null_buffer().is_none());
+ }
}
diff --git a/arrow/src/array/builder/primitive_builder.rs b/arrow/src/array/builder/primitive_builder.rs
index 2f5eeac73..ec1b408ed 100644
--- a/arrow/src/array/builder/primitive_builder.rs
+++ b/arrow/src/array/builder/primitive_builder.rs
@@ -216,12 +216,10 @@ mod tests {
use super::*;
use crate::array::Array;
- use crate::array::BooleanArray;
use crate::array::Date32Array;
use crate::array::Int32Array;
use crate::array::Int32Builder;
use crate::array::TimestampSecondArray;
- use crate::buffer::Buffer;
#[test]
fn test_primitive_array_builder_i32() {
@@ -303,31 +301,6 @@ mod tests {
}
}
- #[test]
- fn test_primitive_array_builder_bool() {
- // 00000010 01001000
- let buf = Buffer::from([72_u8, 2_u8]);
- let mut builder = BooleanArray::builder(10);
- for i in 0..10 {
- if i == 3 || i == 6 || i == 9 {
- builder.append_value(true).unwrap();
- } else {
- builder.append_value(false).unwrap();
- }
- }
-
- let arr = builder.finish();
- assert_eq!(&buf, arr.values());
- assert_eq!(10, arr.len());
- assert_eq!(0, arr.offset());
- assert_eq!(0, arr.null_count());
- for i in 0..10 {
- assert!(!arr.is_null(i));
- assert!(arr.is_valid(i));
- assert_eq!(i == 3 || i == 6 || i == 9, arr.value(i), "failed at {}", i)
- }
- }
-
#[test]
fn test_primitive_array_builder_append_option() {
let arr1 = Int32Array::from(vec![Some(0), None, Some(2), None, Some(4)]);