You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by tu...@apache.org on 2023/04/26 18:07:19 UTC

[arrow-rs] branch master updated: Return BooleanBuffer from BooleanBufferBuilder (#4140)

This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 341f26459 Return BooleanBuffer from BooleanBufferBuilder (#4140)
341f26459 is described below

commit 341f2645962b32015b33e20738d9f803f26bae2b
Author: Raphael Taylor-Davies <17...@users.noreply.github.com>
AuthorDate: Wed Apr 26 14:07:13 2023 -0400

    Return BooleanBuffer from BooleanBufferBuilder (#4140)
    
    * Return BooleanBuffer from BooleanBufferBuilder
    
    * Clippy
---
 arrow-arith/src/arithmetic.rs                      |  9 +----
 arrow-array/src/array/primitive_array.rs           |  6 +--
 arrow-array/src/builder/boolean_buffer_builder.rs  | 43 +++++++++++++---------
 arrow-array/src/builder/boolean_builder.rs         |  2 +-
 arrow-array/src/builder/null_buffer_builder.rs     |  3 +-
 arrow-integration-test/Cargo.toml                  |  2 +-
 arrow-json/src/reader/list_array.rs                |  6 +--
 arrow-json/src/reader/map_array.rs                 |  6 +--
 arrow-json/src/reader/struct_array.rs              |  6 +--
 arrow-row/src/dictionary.rs                        |  4 +-
 arrow-select/src/filter.rs                         |  4 +-
 arrow-select/src/interleave.rs                     |  2 +-
 arrow-select/src/nullif.rs                         |  2 +-
 arrow-string/src/like.rs                           |  2 +-
 arrow-string/src/regexp.rs                         |  4 +-
 parquet/src/arrow/array_reader/list_array.rs       |  4 +-
 parquet/src/arrow/array_reader/primitive_array.rs  |  2 +-
 parquet/src/arrow/array_reader/struct_array.rs     |  4 +-
 .../src/arrow/record_reader/definition_levels.rs   |  2 +-
 19 files changed, 53 insertions(+), 60 deletions(-)

diff --git a/arrow-arith/src/arithmetic.rs b/arrow-arith/src/arithmetic.rs
index 2b8a2f3b7..7f5a08190 100644
--- a/arrow-arith/src/arithmetic.rs
+++ b/arrow-arith/src/arithmetic.rs
@@ -1965,7 +1965,7 @@ mod tests {
         BooleanBufferBuilder, BufferBuilder, PrimitiveDictionaryBuilder,
     };
     use arrow_array::temporal_conversions::SECONDS_IN_DAY;
-    use arrow_buffer::buffer::{BooleanBuffer, NullBuffer};
+    use arrow_buffer::buffer::NullBuffer;
     use arrow_buffer::i256;
     use arrow_data::ArrayDataBuilder;
     use chrono::NaiveDate;
@@ -3575,12 +3575,7 @@ mod tests {
         null_buffer_builder.resize(13);
         assert_eq!(null_buffer_builder.len(), 13);
 
-        let null_buffer = null_buffer_builder.finish();
-
-        // `count_set_bits_offset` takes len in bits as parameter.
-        assert_eq!(null_buffer.count_set_bits_offset(0, 13), 0);
-
-        let nulls = BooleanBuffer::new(null_buffer, 0, 13);
+        let nulls = null_buffer_builder.finish();
         assert_eq!(nulls.count_set_bits(), 0);
         let nulls = NullBuffer::new(nulls);
         assert_eq!(nulls.null_count(), 13);
diff --git a/arrow-array/src/array/primitive_array.rs b/arrow-array/src/array/primitive_array.rs
index 9fb78eb14..8c8562b5b 100644
--- a/arrow-array/src/array/primitive_array.rs
+++ b/arrow-array/src/array/primitive_array.rs
@@ -25,9 +25,7 @@ use crate::timezone::Tz;
 use crate::trusted_len::trusted_len_unzip;
 use crate::types::*;
 use crate::{Array, ArrayAccessor, ArrayRef};
-use arrow_buffer::{
-    i256, ArrowNativeType, BooleanBuffer, Buffer, NullBuffer, ScalarBuffer,
-};
+use arrow_buffer::{i256, ArrowNativeType, Buffer, NullBuffer, ScalarBuffer};
 use arrow_data::bit_iterator::try_for_each_valid_idx;
 use arrow_data::{ArrayData, ArrayDataBuilder};
 use arrow_schema::{ArrowError, DataType};
@@ -642,7 +640,7 @@ impl<T: ArrowPrimitiveType> PrimitiveArray<T> {
             Ok::<_, ()>(())
         });
 
-        let nulls = BooleanBuffer::new(null_builder.finish(), 0, len);
+        let nulls = null_builder.finish();
         let values = buffer.finish().into();
         let nulls = unsafe { NullBuffer::new_unchecked(nulls, out_null_count) };
         PrimitiveArray::new(values, Some(nulls))
diff --git a/arrow-array/src/builder/boolean_buffer_builder.rs b/arrow-array/src/builder/boolean_buffer_builder.rs
index ac2a96fea..f721504d0 100644
--- a/arrow-array/src/builder/boolean_buffer_builder.rs
+++ b/arrow-array/src/builder/boolean_buffer_builder.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use arrow_buffer::{bit_util, Buffer, MutableBuffer};
+use arrow_buffer::{bit_util, BooleanBuffer, Buffer, MutableBuffer};
 use arrow_data::bit_mask;
 use std::ops::Range;
 
@@ -214,12 +214,12 @@ impl BooleanBufferBuilder {
         self.buffer.as_slice_mut()
     }
 
-    /// Creates a [`Buffer`]
+    /// Creates a [`BooleanBuffer`]
     #[inline]
-    pub fn finish(&mut self) -> Buffer {
+    pub fn finish(&mut self) -> BooleanBuffer {
         let buf = std::mem::replace(&mut self.buffer, MutableBuffer::new(0));
-        self.len = 0;
-        buf.into()
+        let len = std::mem::replace(&mut self.len, 0);
+        BooleanBuffer::new(buf.into(), 0, len)
     }
 }
 
@@ -230,6 +230,13 @@ impl From<BooleanBufferBuilder> for Buffer {
     }
 }
 
+impl From<BooleanBufferBuilder> for BooleanBuffer {
+    #[inline]
+    fn from(builder: BooleanBufferBuilder) -> Self {
+        BooleanBuffer::new(builder.buffer.into(), 0, builder.len)
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -244,7 +251,7 @@ mod tests {
         assert_eq!(4, b.len());
         assert_eq!(512, b.capacity());
         let buffer = b.finish();
-        assert_eq!(1, buffer.len());
+        assert_eq!(4, buffer.len());
 
         // Overallocate capacity
         let mut b = BooleanBufferBuilder::new(8);
@@ -252,7 +259,7 @@ mod tests {
         assert_eq!(4, b.len());
         assert_eq!(512, b.capacity());
         let buffer = b.finish();
-        assert_eq!(1, buffer.len());
+        assert_eq!(4, buffer.len());
     }
 
     #[test]
@@ -264,7 +271,7 @@ mod tests {
         buffer.append(true);
         buffer.set_bit(0, false);
         assert_eq!(buffer.len(), 4);
-        assert_eq!(buffer.finish().as_slice(), &[0b1010_u8]);
+        assert_eq!(buffer.finish().values(), &[0b1010_u8]);
     }
 
     #[test]
@@ -276,7 +283,7 @@ mod tests {
         buffer.append(true);
         buffer.set_bit(3, false);
         assert_eq!(buffer.len(), 4);
-        assert_eq!(buffer.finish().as_slice(), &[0b0011_u8]);
+        assert_eq!(buffer.finish().values(), &[0b0011_u8]);
     }
 
     #[test]
@@ -288,7 +295,7 @@ mod tests {
         buffer.append(true);
         buffer.set_bit(1, false);
         assert_eq!(buffer.len(), 4);
-        assert_eq!(buffer.finish().as_slice(), &[0b1001_u8]);
+        assert_eq!(buffer.finish().values(), &[0b1001_u8]);
     }
 
     #[test]
@@ -302,7 +309,7 @@ mod tests {
         buffer.set_bit(1, false);
         buffer.set_bit(2, false);
         assert_eq!(buffer.len(), 5);
-        assert_eq!(buffer.finish().as_slice(), &[0b10001_u8]);
+        assert_eq!(buffer.finish().values(), &[0b10001_u8]);
     }
 
     #[test]
@@ -313,7 +320,7 @@ mod tests {
         buffer.set_bit(3, false);
         buffer.set_bit(9, false);
         assert_eq!(buffer.len(), 10);
-        assert_eq!(buffer.finish().as_slice(), &[0b11110110_u8, 0b01_u8]);
+        assert_eq!(buffer.finish().values(), &[0b11110110_u8, 0b01_u8]);
     }
 
     #[test]
@@ -329,7 +336,7 @@ mod tests {
         buffer.set_bit(14, true);
         buffer.set_bit(13, false);
         assert_eq!(buffer.len(), 15);
-        assert_eq!(buffer.finish().as_slice(), &[0b01010110_u8, 0b1011100_u8]);
+        assert_eq!(buffer.finish().values(), &[0b01010110_u8, 0b1011100_u8]);
     }
 
     #[test]
@@ -394,7 +401,7 @@ mod tests {
             let start = a.min(b);
             let end = a.max(b);
 
-            buffer.append_packed_range(start..end, compacted_src.as_slice());
+            buffer.append_packed_range(start..end, compacted_src.values());
             all_bools.extend_from_slice(&src[start..end]);
         }
 
@@ -430,14 +437,14 @@ mod tests {
         let mut builder = BooleanBufferBuilder::new_from_buffer(b, 2);
         builder.advance(2);
         let finished = builder.finish();
-        assert_eq!(finished.as_slice(), &[0b00000011]);
+        assert_eq!(finished.values(), &[0b00000011]);
 
         let mut builder = BooleanBufferBuilder::new(10);
         builder.append_n(5, true);
         builder.resize(3);
         builder.advance(2);
         let finished = builder.finish();
-        assert_eq!(finished.as_slice(), &[0b00000111]);
+        assert_eq!(finished.values(), &[0b00000111]);
 
         let mut builder = BooleanBufferBuilder::new(10);
         builder.append_n(16, true);
@@ -478,7 +485,7 @@ mod tests {
         }
         let buf2 = builder.finish();
 
-        assert_eq!(buf.len(), buf2.len());
-        assert_eq!(buf.as_slice(), buf2.as_slice());
+        assert_eq!(buf.len(), buf2.inner().len());
+        assert_eq!(buf.as_slice(), buf2.values());
     }
 }
diff --git a/arrow-array/src/builder/boolean_builder.rs b/arrow-array/src/builder/boolean_builder.rs
index bc3b62f99..c7974967a 100644
--- a/arrow-array/src/builder/boolean_builder.rs
+++ b/arrow-array/src/builder/boolean_builder.rs
@@ -149,7 +149,7 @@ impl BooleanBuilder {
         let null_bit_buffer = self.null_buffer_builder.finish();
         let builder = ArrayData::builder(DataType::Boolean)
             .len(len)
-            .add_buffer(self.values_builder.finish())
+            .add_buffer(self.values_builder.finish().into_inner())
             .null_bit_buffer(null_bit_buffer);
 
         let array_data = unsafe { builder.build_unchecked() };
diff --git a/arrow-array/src/builder/null_buffer_builder.rs b/arrow-array/src/builder/null_buffer_builder.rs
index 0061f70c7..f37ce3a74 100644
--- a/arrow-array/src/builder/null_buffer_builder.rs
+++ b/arrow-array/src/builder/null_buffer_builder.rs
@@ -129,8 +129,7 @@ impl NullBufferBuilder {
     /// Builds the null buffer and resets the builder.
     /// Returns `None` if the builder only contains `true`s.
     pub fn finish(&mut self) -> Option<Buffer> {
-        let buf = self.bitmap_builder.as_mut().map(|b| b.finish());
-        self.bitmap_builder = None;
+        let buf = self.bitmap_builder.take().map(Into::into);
         self.len = 0;
         buf
     }
diff --git a/arrow-integration-test/Cargo.toml b/arrow-integration-test/Cargo.toml
index 6ede476eb..8afbfacff 100644
--- a/arrow-integration-test/Cargo.toml
+++ b/arrow-integration-test/Cargo.toml
@@ -35,7 +35,7 @@ bench = false
 
 [dependencies]
 arrow = { workspace = true }
-arrow-buffer = { workspace = true, path = "../arrow-buffer" }
+arrow-buffer = { workspace = true }
 hex = { version = "0.4", default-features = false, features = ["std"] }
 serde = { version = "1.0", default-features = false, features = ["rc", "derive"] }
 serde_json = { version = "1.0", default-features = false, features = ["std"] }
diff --git a/arrow-json/src/reader/list_array.rs b/arrow-json/src/reader/list_array.rs
index aa3538bd5..ad27eb516 100644
--- a/arrow-json/src/reader/list_array.rs
+++ b/arrow-json/src/reader/list_array.rs
@@ -19,7 +19,7 @@ use crate::reader::tape::{Tape, TapeElement};
 use crate::reader::{make_decoder, ArrayDecoder};
 use arrow_array::builder::{BooleanBufferBuilder, BufferBuilder};
 use arrow_array::OffsetSizeTrait;
-use arrow_buffer::buffer::{BooleanBuffer, NullBuffer};
+use arrow_buffer::buffer::NullBuffer;
 use arrow_data::{ArrayData, ArrayDataBuilder};
 use arrow_schema::{ArrowError, DataType};
 use std::marker::PhantomData;
@@ -99,9 +99,7 @@ impl<O: OffsetSizeTrait> ArrayDecoder for ListArrayDecoder<O> {
         }
 
         let child_data = self.decoder.decode(tape, &child_pos)?;
-        let nulls = nulls
-            .as_mut()
-            .map(|x| NullBuffer::new(BooleanBuffer::new(x.finish(), 0, pos.len())));
+        let nulls = nulls.as_mut().map(|x| NullBuffer::new(x.finish()));
 
         let data = ArrayDataBuilder::new(self.data_type.clone())
             .len(pos.len())
diff --git a/arrow-json/src/reader/map_array.rs b/arrow-json/src/reader/map_array.rs
index 5e800a0d6..2d6fde34d 100644
--- a/arrow-json/src/reader/map_array.rs
+++ b/arrow-json/src/reader/map_array.rs
@@ -18,7 +18,7 @@
 use crate::reader::tape::{Tape, TapeElement};
 use crate::reader::{make_decoder, ArrayDecoder};
 use arrow_array::builder::{BooleanBufferBuilder, BufferBuilder};
-use arrow_buffer::buffer::{BooleanBuffer, NullBuffer};
+use arrow_buffer::buffer::NullBuffer;
 use arrow_buffer::ArrowNativeType;
 use arrow_data::{ArrayData, ArrayDataBuilder};
 use arrow_schema::{ArrowError, DataType};
@@ -139,9 +139,7 @@ impl ArrayDecoder for MapArrayDecoder {
         // Valid by construction
         let struct_data = unsafe { struct_data.build_unchecked() };
 
-        let nulls = nulls
-            .as_mut()
-            .map(|x| NullBuffer::new(BooleanBuffer::new(x.finish(), 0, pos.len())));
+        let nulls = nulls.as_mut().map(|x| NullBuffer::new(x.finish()));
 
         let builder = ArrayDataBuilder::new(self.data_type.clone())
             .len(pos.len())
diff --git a/arrow-json/src/reader/struct_array.rs b/arrow-json/src/reader/struct_array.rs
index 707b56d50..3d24a927d 100644
--- a/arrow-json/src/reader/struct_array.rs
+++ b/arrow-json/src/reader/struct_array.rs
@@ -18,7 +18,7 @@
 use crate::reader::tape::{Tape, TapeElement};
 use crate::reader::{make_decoder, ArrayDecoder};
 use arrow_array::builder::BooleanBufferBuilder;
-use arrow_buffer::buffer::{BooleanBuffer, NullBuffer};
+use arrow_buffer::buffer::NullBuffer;
 use arrow_data::{ArrayData, ArrayDataBuilder};
 use arrow_schema::{ArrowError, DataType, Fields};
 
@@ -113,9 +113,7 @@ impl ArrayDecoder for StructArrayDecoder {
             })
             .collect::<Result<Vec<_>, ArrowError>>()?;
 
-        let nulls = nulls
-            .as_mut()
-            .map(|x| NullBuffer::new(BooleanBuffer::new(x.finish(), 0, pos.len())));
+        let nulls = nulls.as_mut().map(|x| NullBuffer::new(x.finish()));
 
         for (c, f) in child_data.iter().zip(fields) {
             // Sanity check
diff --git a/arrow-row/src/dictionary.rs b/arrow-row/src/dictionary.rs
index 273b7439d..d790d951e 100644
--- a/arrow-row/src/dictionary.rs
+++ b/arrow-row/src/dictionary.rs
@@ -202,7 +202,7 @@ pub unsafe fn decode_dictionary<K: ArrowDictionaryKeyType>(
 
     let builder = ArrayDataBuilder::new(data_type)
         .len(len)
-        .null_bit_buffer(Some(null_builder.finish()))
+        .null_bit_buffer(Some(null_builder.into()))
         .null_count(null_count)
         .add_buffer(keys.finish())
         .add_child_data(child);
@@ -250,7 +250,7 @@ fn decode_bool(values: &[&[u8]]) -> ArrayData {
 
     let builder = ArrayDataBuilder::new(DataType::Boolean)
         .len(values.len())
-        .add_buffer(builder.finish());
+        .add_buffer(builder.into());
 
     // SAFETY: Buffers correct length
     unsafe { builder.build_unchecked() }
diff --git a/arrow-select/src/filter.rs b/arrow-select/src/filter.rs
index 06f083356..c89491944 100644
--- a/arrow-select/src/filter.rs
+++ b/arrow-select/src/filter.rs
@@ -433,7 +433,7 @@ fn filter_bits(buffer: &BooleanBuffer, predicate: &FilterPredicate) -> Buffer {
             for (start, end) in SlicesIterator::new(&predicate.filter) {
                 builder.append_packed_range(start + offset..end + offset, src)
             }
-            builder.finish()
+            builder.into()
         }
         IterationStrategy::Slices(slices) => {
             let mut builder =
@@ -441,7 +441,7 @@ fn filter_bits(buffer: &BooleanBuffer, predicate: &FilterPredicate) -> Buffer {
             for (start, end) in slices {
                 builder.append_packed_range(*start + offset..*end + offset, src)
             }
-            builder.finish()
+            builder.into()
         }
         IterationStrategy::All | IterationStrategy::None => unreachable!(),
     }
diff --git a/arrow-select/src/interleave.rs b/arrow-select/src/interleave.rs
index 491395d1c..c0d202680 100644
--- a/arrow-select/src/interleave.rs
+++ b/arrow-select/src/interleave.rs
@@ -122,7 +122,7 @@ impl<'a, T: Array + 'static> Interleave<'a, T> {
                 null_count += !v as usize;
                 builder.append(v)
             }
-            builder.finish()
+            builder.into()
         });
 
         Self {
diff --git a/arrow-select/src/nullif.rs b/arrow-select/src/nullif.rs
index aaa3423d6..3d9148016 100644
--- a/arrow-select/src/nullif.rs
+++ b/arrow-select/src/nullif.rs
@@ -105,7 +105,7 @@ pub fn nullif(left: &dyn Array, right: &BooleanArray) -> Result<ArrayRef, ArrowE
             // Pad with 0s up to offset
             builder.resize(l_offset);
             builder.append_packed_range(0..len, &combined);
-            builder.finish()
+            builder.into()
         }
     };
 
diff --git a/arrow-string/src/like.rs b/arrow-string/src/like.rs
index f896f0c6c..6b4aea7e8 100644
--- a/arrow-string/src/like.rs
+++ b/arrow-string/src/like.rs
@@ -607,7 +607,7 @@ where
         ArrayDataBuilder::new(DataType::Boolean)
             .len(left.len())
             .nulls(nulls)
-            .buffers(vec![result.finish()])
+            .buffers(vec![result.into()])
             .build_unchecked()
     };
     Ok(BooleanArray::from(data))
diff --git a/arrow-string/src/regexp.rs b/arrow-string/src/regexp.rs
index 7ccd450de..e28564bda 100644
--- a/arrow-string/src/regexp.rs
+++ b/arrow-string/src/regexp.rs
@@ -101,7 +101,7 @@ pub fn regexp_is_match_utf8<OffsetSize: OffsetSizeTrait>(
     let data = unsafe {
         ArrayDataBuilder::new(DataType::Boolean)
             .len(array.len())
-            .buffers(vec![result.finish()])
+            .buffers(vec![result.into()])
             .nulls(nulls)
             .build_unchecked()
     };
@@ -136,7 +136,7 @@ pub fn regexp_is_match_utf8_scalar<OffsetSize: OffsetSizeTrait>(
         }
     }
 
-    let buffer = result.finish();
+    let buffer = result.into();
     let data = unsafe {
         ArrayData::new_unchecked(
             DataType::Boolean,
diff --git a/parquet/src/arrow/array_reader/list_array.rs b/parquet/src/arrow/array_reader/list_array.rs
index a6b354f90..932034417 100644
--- a/parquet/src/arrow/array_reader/list_array.rs
+++ b/parquet/src/arrow/array_reader/list_array.rs
@@ -220,9 +220,9 @@ impl<OffsetSize: OffsetSizeTrait> ArrayReader for ListArrayReader<OffsetSize> {
             .add_buffer(value_offsets)
             .add_child_data(child_data);
 
-        if let Some(mut builder) = validity {
+        if let Some(builder) = validity {
             assert_eq!(builder.len(), list_offsets.len() - 1);
-            data_builder = data_builder.null_bit_buffer(Some(builder.finish()))
+            data_builder = data_builder.null_bit_buffer(Some(builder.into()))
         }
 
         let list_data = unsafe { data_builder.build_unchecked() };
diff --git a/parquet/src/arrow/array_reader/primitive_array.rs b/parquet/src/arrow/array_reader/primitive_array.rs
index 012cad5c4..772026960 100644
--- a/parquet/src/arrow/array_reader/primitive_array.rs
+++ b/parquet/src/arrow/array_reader/primitive_array.rs
@@ -147,7 +147,7 @@ where
                 for e in record_data.as_slice() {
                     boolean_buffer.append(*e > 0);
                 }
-                boolean_buffer.finish()
+                boolean_buffer.into()
             }
             PhysicalType::INT96 => {
                 // SAFETY - record_data is an aligned buffer of Int96
diff --git a/parquet/src/arrow/array_reader/struct_array.rs b/parquet/src/arrow/array_reader/struct_array.rs
index 600fda4fb..a147c4e95 100644
--- a/parquet/src/arrow/array_reader/struct_array.rs
+++ b/parquet/src/arrow/array_reader/struct_array.rs
@@ -17,7 +17,7 @@
 
 use crate::arrow::array_reader::ArrayReader;
 use crate::errors::{ParquetError, Result};
-use arrow_array::{builder::BooleanBufferBuilder, ArrayRef, StructArray, Array};
+use arrow_array::{builder::BooleanBufferBuilder, Array, ArrayRef, StructArray};
 use arrow_data::{ArrayData, ArrayDataBuilder};
 use arrow_schema::DataType as ArrowType;
 use std::any::Any;
@@ -170,7 +170,7 @@ impl ArrayReader for StructArrayReader {
             }
 
             array_data_builder =
-                array_data_builder.null_bit_buffer(Some(bitmap_builder.finish()));
+                array_data_builder.null_bit_buffer(Some(bitmap_builder.into()));
         }
 
         let array_data = unsafe { array_data_builder.build_unchecked() };
diff --git a/parquet/src/arrow/record_reader/definition_levels.rs b/parquet/src/arrow/record_reader/definition_levels.rs
index 7c27a365f..272716caf 100644
--- a/parquet/src/arrow/record_reader/definition_levels.rs
+++ b/parquet/src/arrow/record_reader/definition_levels.rs
@@ -123,7 +123,7 @@ impl DefinitionLevelBuffer {
 
         // Swap into self
         self.len = new_builder.len();
-        std::mem::replace(old_builder, new_builder).finish()
+        std::mem::replace(old_builder, new_builder).into()
     }
 
     pub fn nulls(&self) -> &BooleanBufferBuilder {