You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by tu...@apache.org on 2023/04/26 18:07:19 UTC
[arrow-rs] branch master updated: Return BooleanBuffer from BooleanBufferBuilder (#4140)
This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 341f26459 Return BooleanBuffer from BooleanBufferBuilder (#4140)
341f26459 is described below
commit 341f2645962b32015b33e20738d9f803f26bae2b
Author: Raphael Taylor-Davies <17...@users.noreply.github.com>
AuthorDate: Wed Apr 26 14:07:13 2023 -0400
Return BooleanBuffer from BooleanBufferBuilder (#4140)
* Return BooleanBuffer from BooleanBufferBuilder
* Clippy
---
arrow-arith/src/arithmetic.rs | 9 +----
arrow-array/src/array/primitive_array.rs | 6 +--
arrow-array/src/builder/boolean_buffer_builder.rs | 43 +++++++++++++---------
arrow-array/src/builder/boolean_builder.rs | 2 +-
arrow-array/src/builder/null_buffer_builder.rs | 3 +-
arrow-integration-test/Cargo.toml | 2 +-
arrow-json/src/reader/list_array.rs | 6 +--
arrow-json/src/reader/map_array.rs | 6 +--
arrow-json/src/reader/struct_array.rs | 6 +--
arrow-row/src/dictionary.rs | 4 +-
arrow-select/src/filter.rs | 4 +-
arrow-select/src/interleave.rs | 2 +-
arrow-select/src/nullif.rs | 2 +-
arrow-string/src/like.rs | 2 +-
arrow-string/src/regexp.rs | 4 +-
parquet/src/arrow/array_reader/list_array.rs | 4 +-
parquet/src/arrow/array_reader/primitive_array.rs | 2 +-
parquet/src/arrow/array_reader/struct_array.rs | 4 +-
.../src/arrow/record_reader/definition_levels.rs | 2 +-
19 files changed, 53 insertions(+), 60 deletions(-)
diff --git a/arrow-arith/src/arithmetic.rs b/arrow-arith/src/arithmetic.rs
index 2b8a2f3b7..7f5a08190 100644
--- a/arrow-arith/src/arithmetic.rs
+++ b/arrow-arith/src/arithmetic.rs
@@ -1965,7 +1965,7 @@ mod tests {
BooleanBufferBuilder, BufferBuilder, PrimitiveDictionaryBuilder,
};
use arrow_array::temporal_conversions::SECONDS_IN_DAY;
- use arrow_buffer::buffer::{BooleanBuffer, NullBuffer};
+ use arrow_buffer::buffer::NullBuffer;
use arrow_buffer::i256;
use arrow_data::ArrayDataBuilder;
use chrono::NaiveDate;
@@ -3575,12 +3575,7 @@ mod tests {
null_buffer_builder.resize(13);
assert_eq!(null_buffer_builder.len(), 13);
- let null_buffer = null_buffer_builder.finish();
-
- // `count_set_bits_offset` takes len in bits as parameter.
- assert_eq!(null_buffer.count_set_bits_offset(0, 13), 0);
-
- let nulls = BooleanBuffer::new(null_buffer, 0, 13);
+ let nulls = null_buffer_builder.finish();
assert_eq!(nulls.count_set_bits(), 0);
let nulls = NullBuffer::new(nulls);
assert_eq!(nulls.null_count(), 13);
diff --git a/arrow-array/src/array/primitive_array.rs b/arrow-array/src/array/primitive_array.rs
index 9fb78eb14..8c8562b5b 100644
--- a/arrow-array/src/array/primitive_array.rs
+++ b/arrow-array/src/array/primitive_array.rs
@@ -25,9 +25,7 @@ use crate::timezone::Tz;
use crate::trusted_len::trusted_len_unzip;
use crate::types::*;
use crate::{Array, ArrayAccessor, ArrayRef};
-use arrow_buffer::{
- i256, ArrowNativeType, BooleanBuffer, Buffer, NullBuffer, ScalarBuffer,
-};
+use arrow_buffer::{i256, ArrowNativeType, Buffer, NullBuffer, ScalarBuffer};
use arrow_data::bit_iterator::try_for_each_valid_idx;
use arrow_data::{ArrayData, ArrayDataBuilder};
use arrow_schema::{ArrowError, DataType};
@@ -642,7 +640,7 @@ impl<T: ArrowPrimitiveType> PrimitiveArray<T> {
Ok::<_, ()>(())
});
- let nulls = BooleanBuffer::new(null_builder.finish(), 0, len);
+ let nulls = null_builder.finish();
let values = buffer.finish().into();
let nulls = unsafe { NullBuffer::new_unchecked(nulls, out_null_count) };
PrimitiveArray::new(values, Some(nulls))
diff --git a/arrow-array/src/builder/boolean_buffer_builder.rs b/arrow-array/src/builder/boolean_buffer_builder.rs
index ac2a96fea..f721504d0 100644
--- a/arrow-array/src/builder/boolean_buffer_builder.rs
+++ b/arrow-array/src/builder/boolean_buffer_builder.rs
@@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.
-use arrow_buffer::{bit_util, Buffer, MutableBuffer};
+use arrow_buffer::{bit_util, BooleanBuffer, Buffer, MutableBuffer};
use arrow_data::bit_mask;
use std::ops::Range;
@@ -214,12 +214,12 @@ impl BooleanBufferBuilder {
self.buffer.as_slice_mut()
}
- /// Creates a [`Buffer`]
+ /// Creates a [`BooleanBuffer`]
#[inline]
- pub fn finish(&mut self) -> Buffer {
+ pub fn finish(&mut self) -> BooleanBuffer {
let buf = std::mem::replace(&mut self.buffer, MutableBuffer::new(0));
- self.len = 0;
- buf.into()
+ let len = std::mem::replace(&mut self.len, 0);
+ BooleanBuffer::new(buf.into(), 0, len)
}
}
@@ -230,6 +230,13 @@ impl From<BooleanBufferBuilder> for Buffer {
}
}
+impl From<BooleanBufferBuilder> for BooleanBuffer {
+ #[inline]
+ fn from(builder: BooleanBufferBuilder) -> Self {
+ BooleanBuffer::new(builder.buffer.into(), 0, builder.len)
+ }
+}
+
#[cfg(test)]
mod tests {
use super::*;
@@ -244,7 +251,7 @@ mod tests {
assert_eq!(4, b.len());
assert_eq!(512, b.capacity());
let buffer = b.finish();
- assert_eq!(1, buffer.len());
+ assert_eq!(4, buffer.len());
// Overallocate capacity
let mut b = BooleanBufferBuilder::new(8);
@@ -252,7 +259,7 @@ mod tests {
assert_eq!(4, b.len());
assert_eq!(512, b.capacity());
let buffer = b.finish();
- assert_eq!(1, buffer.len());
+ assert_eq!(4, buffer.len());
}
#[test]
@@ -264,7 +271,7 @@ mod tests {
buffer.append(true);
buffer.set_bit(0, false);
assert_eq!(buffer.len(), 4);
- assert_eq!(buffer.finish().as_slice(), &[0b1010_u8]);
+ assert_eq!(buffer.finish().values(), &[0b1010_u8]);
}
#[test]
@@ -276,7 +283,7 @@ mod tests {
buffer.append(true);
buffer.set_bit(3, false);
assert_eq!(buffer.len(), 4);
- assert_eq!(buffer.finish().as_slice(), &[0b0011_u8]);
+ assert_eq!(buffer.finish().values(), &[0b0011_u8]);
}
#[test]
@@ -288,7 +295,7 @@ mod tests {
buffer.append(true);
buffer.set_bit(1, false);
assert_eq!(buffer.len(), 4);
- assert_eq!(buffer.finish().as_slice(), &[0b1001_u8]);
+ assert_eq!(buffer.finish().values(), &[0b1001_u8]);
}
#[test]
@@ -302,7 +309,7 @@ mod tests {
buffer.set_bit(1, false);
buffer.set_bit(2, false);
assert_eq!(buffer.len(), 5);
- assert_eq!(buffer.finish().as_slice(), &[0b10001_u8]);
+ assert_eq!(buffer.finish().values(), &[0b10001_u8]);
}
#[test]
@@ -313,7 +320,7 @@ mod tests {
buffer.set_bit(3, false);
buffer.set_bit(9, false);
assert_eq!(buffer.len(), 10);
- assert_eq!(buffer.finish().as_slice(), &[0b11110110_u8, 0b01_u8]);
+ assert_eq!(buffer.finish().values(), &[0b11110110_u8, 0b01_u8]);
}
#[test]
@@ -329,7 +336,7 @@ mod tests {
buffer.set_bit(14, true);
buffer.set_bit(13, false);
assert_eq!(buffer.len(), 15);
- assert_eq!(buffer.finish().as_slice(), &[0b01010110_u8, 0b1011100_u8]);
+ assert_eq!(buffer.finish().values(), &[0b01010110_u8, 0b1011100_u8]);
}
#[test]
@@ -394,7 +401,7 @@ mod tests {
let start = a.min(b);
let end = a.max(b);
- buffer.append_packed_range(start..end, compacted_src.as_slice());
+ buffer.append_packed_range(start..end, compacted_src.values());
all_bools.extend_from_slice(&src[start..end]);
}
@@ -430,14 +437,14 @@ mod tests {
let mut builder = BooleanBufferBuilder::new_from_buffer(b, 2);
builder.advance(2);
let finished = builder.finish();
- assert_eq!(finished.as_slice(), &[0b00000011]);
+ assert_eq!(finished.values(), &[0b00000011]);
let mut builder = BooleanBufferBuilder::new(10);
builder.append_n(5, true);
builder.resize(3);
builder.advance(2);
let finished = builder.finish();
- assert_eq!(finished.as_slice(), &[0b00000111]);
+ assert_eq!(finished.values(), &[0b00000111]);
let mut builder = BooleanBufferBuilder::new(10);
builder.append_n(16, true);
@@ -478,7 +485,7 @@ mod tests {
}
let buf2 = builder.finish();
- assert_eq!(buf.len(), buf2.len());
- assert_eq!(buf.as_slice(), buf2.as_slice());
+ assert_eq!(buf.len(), buf2.inner().len());
+ assert_eq!(buf.as_slice(), buf2.values());
}
}
diff --git a/arrow-array/src/builder/boolean_builder.rs b/arrow-array/src/builder/boolean_builder.rs
index bc3b62f99..c7974967a 100644
--- a/arrow-array/src/builder/boolean_builder.rs
+++ b/arrow-array/src/builder/boolean_builder.rs
@@ -149,7 +149,7 @@ impl BooleanBuilder {
let null_bit_buffer = self.null_buffer_builder.finish();
let builder = ArrayData::builder(DataType::Boolean)
.len(len)
- .add_buffer(self.values_builder.finish())
+ .add_buffer(self.values_builder.finish().into_inner())
.null_bit_buffer(null_bit_buffer);
let array_data = unsafe { builder.build_unchecked() };
diff --git a/arrow-array/src/builder/null_buffer_builder.rs b/arrow-array/src/builder/null_buffer_builder.rs
index 0061f70c7..f37ce3a74 100644
--- a/arrow-array/src/builder/null_buffer_builder.rs
+++ b/arrow-array/src/builder/null_buffer_builder.rs
@@ -129,8 +129,7 @@ impl NullBufferBuilder {
/// Builds the null buffer and resets the builder.
/// Returns `None` if the builder only contains `true`s.
pub fn finish(&mut self) -> Option<Buffer> {
- let buf = self.bitmap_builder.as_mut().map(|b| b.finish());
- self.bitmap_builder = None;
+ let buf = self.bitmap_builder.take().map(Into::into);
self.len = 0;
buf
}
diff --git a/arrow-integration-test/Cargo.toml b/arrow-integration-test/Cargo.toml
index 6ede476eb..8afbfacff 100644
--- a/arrow-integration-test/Cargo.toml
+++ b/arrow-integration-test/Cargo.toml
@@ -35,7 +35,7 @@ bench = false
[dependencies]
arrow = { workspace = true }
-arrow-buffer = { workspace = true, path = "../arrow-buffer" }
+arrow-buffer = { workspace = true }
hex = { version = "0.4", default-features = false, features = ["std"] }
serde = { version = "1.0", default-features = false, features = ["rc", "derive"] }
serde_json = { version = "1.0", default-features = false, features = ["std"] }
diff --git a/arrow-json/src/reader/list_array.rs b/arrow-json/src/reader/list_array.rs
index aa3538bd5..ad27eb516 100644
--- a/arrow-json/src/reader/list_array.rs
+++ b/arrow-json/src/reader/list_array.rs
@@ -19,7 +19,7 @@ use crate::reader::tape::{Tape, TapeElement};
use crate::reader::{make_decoder, ArrayDecoder};
use arrow_array::builder::{BooleanBufferBuilder, BufferBuilder};
use arrow_array::OffsetSizeTrait;
-use arrow_buffer::buffer::{BooleanBuffer, NullBuffer};
+use arrow_buffer::buffer::NullBuffer;
use arrow_data::{ArrayData, ArrayDataBuilder};
use arrow_schema::{ArrowError, DataType};
use std::marker::PhantomData;
@@ -99,9 +99,7 @@ impl<O: OffsetSizeTrait> ArrayDecoder for ListArrayDecoder<O> {
}
let child_data = self.decoder.decode(tape, &child_pos)?;
- let nulls = nulls
- .as_mut()
- .map(|x| NullBuffer::new(BooleanBuffer::new(x.finish(), 0, pos.len())));
+ let nulls = nulls.as_mut().map(|x| NullBuffer::new(x.finish()));
let data = ArrayDataBuilder::new(self.data_type.clone())
.len(pos.len())
diff --git a/arrow-json/src/reader/map_array.rs b/arrow-json/src/reader/map_array.rs
index 5e800a0d6..2d6fde34d 100644
--- a/arrow-json/src/reader/map_array.rs
+++ b/arrow-json/src/reader/map_array.rs
@@ -18,7 +18,7 @@
use crate::reader::tape::{Tape, TapeElement};
use crate::reader::{make_decoder, ArrayDecoder};
use arrow_array::builder::{BooleanBufferBuilder, BufferBuilder};
-use arrow_buffer::buffer::{BooleanBuffer, NullBuffer};
+use arrow_buffer::buffer::NullBuffer;
use arrow_buffer::ArrowNativeType;
use arrow_data::{ArrayData, ArrayDataBuilder};
use arrow_schema::{ArrowError, DataType};
@@ -139,9 +139,7 @@ impl ArrayDecoder for MapArrayDecoder {
// Valid by construction
let struct_data = unsafe { struct_data.build_unchecked() };
- let nulls = nulls
- .as_mut()
- .map(|x| NullBuffer::new(BooleanBuffer::new(x.finish(), 0, pos.len())));
+ let nulls = nulls.as_mut().map(|x| NullBuffer::new(x.finish()));
let builder = ArrayDataBuilder::new(self.data_type.clone())
.len(pos.len())
diff --git a/arrow-json/src/reader/struct_array.rs b/arrow-json/src/reader/struct_array.rs
index 707b56d50..3d24a927d 100644
--- a/arrow-json/src/reader/struct_array.rs
+++ b/arrow-json/src/reader/struct_array.rs
@@ -18,7 +18,7 @@
use crate::reader::tape::{Tape, TapeElement};
use crate::reader::{make_decoder, ArrayDecoder};
use arrow_array::builder::BooleanBufferBuilder;
-use arrow_buffer::buffer::{BooleanBuffer, NullBuffer};
+use arrow_buffer::buffer::NullBuffer;
use arrow_data::{ArrayData, ArrayDataBuilder};
use arrow_schema::{ArrowError, DataType, Fields};
@@ -113,9 +113,7 @@ impl ArrayDecoder for StructArrayDecoder {
})
.collect::<Result<Vec<_>, ArrowError>>()?;
- let nulls = nulls
- .as_mut()
- .map(|x| NullBuffer::new(BooleanBuffer::new(x.finish(), 0, pos.len())));
+ let nulls = nulls.as_mut().map(|x| NullBuffer::new(x.finish()));
for (c, f) in child_data.iter().zip(fields) {
// Sanity check
diff --git a/arrow-row/src/dictionary.rs b/arrow-row/src/dictionary.rs
index 273b7439d..d790d951e 100644
--- a/arrow-row/src/dictionary.rs
+++ b/arrow-row/src/dictionary.rs
@@ -202,7 +202,7 @@ pub unsafe fn decode_dictionary<K: ArrowDictionaryKeyType>(
let builder = ArrayDataBuilder::new(data_type)
.len(len)
- .null_bit_buffer(Some(null_builder.finish()))
+ .null_bit_buffer(Some(null_builder.into()))
.null_count(null_count)
.add_buffer(keys.finish())
.add_child_data(child);
@@ -250,7 +250,7 @@ fn decode_bool(values: &[&[u8]]) -> ArrayData {
let builder = ArrayDataBuilder::new(DataType::Boolean)
.len(values.len())
- .add_buffer(builder.finish());
+ .add_buffer(builder.into());
// SAFETY: Buffers correct length
unsafe { builder.build_unchecked() }
diff --git a/arrow-select/src/filter.rs b/arrow-select/src/filter.rs
index 06f083356..c89491944 100644
--- a/arrow-select/src/filter.rs
+++ b/arrow-select/src/filter.rs
@@ -433,7 +433,7 @@ fn filter_bits(buffer: &BooleanBuffer, predicate: &FilterPredicate) -> Buffer {
for (start, end) in SlicesIterator::new(&predicate.filter) {
builder.append_packed_range(start + offset..end + offset, src)
}
- builder.finish()
+ builder.into()
}
IterationStrategy::Slices(slices) => {
let mut builder =
@@ -441,7 +441,7 @@ fn filter_bits(buffer: &BooleanBuffer, predicate: &FilterPredicate) -> Buffer {
for (start, end) in slices {
builder.append_packed_range(*start + offset..*end + offset, src)
}
- builder.finish()
+ builder.into()
}
IterationStrategy::All | IterationStrategy::None => unreachable!(),
}
diff --git a/arrow-select/src/interleave.rs b/arrow-select/src/interleave.rs
index 491395d1c..c0d202680 100644
--- a/arrow-select/src/interleave.rs
+++ b/arrow-select/src/interleave.rs
@@ -122,7 +122,7 @@ impl<'a, T: Array + 'static> Interleave<'a, T> {
null_count += !v as usize;
builder.append(v)
}
- builder.finish()
+ builder.into()
});
Self {
diff --git a/arrow-select/src/nullif.rs b/arrow-select/src/nullif.rs
index aaa3423d6..3d9148016 100644
--- a/arrow-select/src/nullif.rs
+++ b/arrow-select/src/nullif.rs
@@ -105,7 +105,7 @@ pub fn nullif(left: &dyn Array, right: &BooleanArray) -> Result<ArrayRef, ArrowE
// Pad with 0s up to offset
builder.resize(l_offset);
builder.append_packed_range(0..len, &combined);
- builder.finish()
+ builder.into()
}
};
diff --git a/arrow-string/src/like.rs b/arrow-string/src/like.rs
index f896f0c6c..6b4aea7e8 100644
--- a/arrow-string/src/like.rs
+++ b/arrow-string/src/like.rs
@@ -607,7 +607,7 @@ where
ArrayDataBuilder::new(DataType::Boolean)
.len(left.len())
.nulls(nulls)
- .buffers(vec![result.finish()])
+ .buffers(vec![result.into()])
.build_unchecked()
};
Ok(BooleanArray::from(data))
diff --git a/arrow-string/src/regexp.rs b/arrow-string/src/regexp.rs
index 7ccd450de..e28564bda 100644
--- a/arrow-string/src/regexp.rs
+++ b/arrow-string/src/regexp.rs
@@ -101,7 +101,7 @@ pub fn regexp_is_match_utf8<OffsetSize: OffsetSizeTrait>(
let data = unsafe {
ArrayDataBuilder::new(DataType::Boolean)
.len(array.len())
- .buffers(vec![result.finish()])
+ .buffers(vec![result.into()])
.nulls(nulls)
.build_unchecked()
};
@@ -136,7 +136,7 @@ pub fn regexp_is_match_utf8_scalar<OffsetSize: OffsetSizeTrait>(
}
}
- let buffer = result.finish();
+ let buffer = result.into();
let data = unsafe {
ArrayData::new_unchecked(
DataType::Boolean,
diff --git a/parquet/src/arrow/array_reader/list_array.rs b/parquet/src/arrow/array_reader/list_array.rs
index a6b354f90..932034417 100644
--- a/parquet/src/arrow/array_reader/list_array.rs
+++ b/parquet/src/arrow/array_reader/list_array.rs
@@ -220,9 +220,9 @@ impl<OffsetSize: OffsetSizeTrait> ArrayReader for ListArrayReader<OffsetSize> {
.add_buffer(value_offsets)
.add_child_data(child_data);
- if let Some(mut builder) = validity {
+ if let Some(builder) = validity {
assert_eq!(builder.len(), list_offsets.len() - 1);
- data_builder = data_builder.null_bit_buffer(Some(builder.finish()))
+ data_builder = data_builder.null_bit_buffer(Some(builder.into()))
}
let list_data = unsafe { data_builder.build_unchecked() };
diff --git a/parquet/src/arrow/array_reader/primitive_array.rs b/parquet/src/arrow/array_reader/primitive_array.rs
index 012cad5c4..772026960 100644
--- a/parquet/src/arrow/array_reader/primitive_array.rs
+++ b/parquet/src/arrow/array_reader/primitive_array.rs
@@ -147,7 +147,7 @@ where
for e in record_data.as_slice() {
boolean_buffer.append(*e > 0);
}
- boolean_buffer.finish()
+ boolean_buffer.into()
}
PhysicalType::INT96 => {
// SAFETY - record_data is an aligned buffer of Int96
diff --git a/parquet/src/arrow/array_reader/struct_array.rs b/parquet/src/arrow/array_reader/struct_array.rs
index 600fda4fb..a147c4e95 100644
--- a/parquet/src/arrow/array_reader/struct_array.rs
+++ b/parquet/src/arrow/array_reader/struct_array.rs
@@ -17,7 +17,7 @@
use crate::arrow::array_reader::ArrayReader;
use crate::errors::{ParquetError, Result};
-use arrow_array::{builder::BooleanBufferBuilder, ArrayRef, StructArray, Array};
+use arrow_array::{builder::BooleanBufferBuilder, Array, ArrayRef, StructArray};
use arrow_data::{ArrayData, ArrayDataBuilder};
use arrow_schema::DataType as ArrowType;
use std::any::Any;
@@ -170,7 +170,7 @@ impl ArrayReader for StructArrayReader {
}
array_data_builder =
- array_data_builder.null_bit_buffer(Some(bitmap_builder.finish()));
+ array_data_builder.null_bit_buffer(Some(bitmap_builder.into()));
}
let array_data = unsafe { array_data_builder.build_unchecked() };
diff --git a/parquet/src/arrow/record_reader/definition_levels.rs b/parquet/src/arrow/record_reader/definition_levels.rs
index 7c27a365f..272716caf 100644
--- a/parquet/src/arrow/record_reader/definition_levels.rs
+++ b/parquet/src/arrow/record_reader/definition_levels.rs
@@ -123,7 +123,7 @@ impl DefinitionLevelBuffer {
// Swap into self
self.len = new_builder.len();
- std::mem::replace(old_builder, new_builder).finish()
+ std::mem::replace(old_builder, new_builder).into()
}
pub fn nulls(&self) -> &BooleanBufferBuilder {