You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by tu...@apache.org on 2022/05/27 08:11:32 UTC
[arrow-rs] branch master updated: Rewrite `ArrayDataBuilder::null_bit_buffer` (#1739)
This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 9722f062c Rewrite `ArrayDataBuilder::null_bit_buffer` (#1739)
9722f062c is described below
commit 9722f062c2ebedeeaeccd789e9cb4808a5ad9e58
Author: Remzi Yang <59...@users.noreply.github.com>
AuthorDate: Fri May 27 16:11:27 2022 +0800
Rewrite `ArrayDataBuilder::null_bit_buffer` (#1739)
* clean up
Signed-off-by: remzi <13...@gmail.com>
* fix a nit
Signed-off-by: remzi <13...@gmail.com>
* use boolean::then
Signed-off-by: remzi <13...@gmail.com>
* fix merge conflict
Signed-off-by: remzi <13...@gmail.com>
---
arrow/examples/builders.rs | 2 +-
arrow/src/array/array.rs | 2 +-
arrow/src/array/array_binary.rs | 26 ++---
arrow/src/array/array_dictionary.rs | 2 +-
arrow/src/array/array_list.rs | 12 +--
arrow/src/array/array_map.rs | 2 +-
arrow/src/array/array_primitive.rs | 2 +-
arrow/src/array/array_string.rs | 10 +-
arrow/src/array/array_struct.rs | 18 ++--
arrow/src/array/builder.rs | 42 ++++----
arrow/src/array/data.rs | 16 +--
arrow/src/array/equal/mod.rs | 22 ++---
arrow/src/array/ffi.rs | 4 +-
arrow/src/array/transform/mod.rs | 2 +-
arrow/src/compute/kernels/cast.rs | 16 ++-
arrow/src/compute/kernels/comparison.rs | 2 +-
arrow/src/compute/kernels/concat_elements.rs | 14 +--
arrow/src/compute/kernels/filter.rs | 12 +--
arrow/src/compute/kernels/limit.rs | 8 +-
arrow/src/compute/kernels/substring.rs | 6 +-
arrow/src/compute/kernels/take.rs | 27 +++--
arrow/src/compute/util.rs | 4 +-
arrow/src/ffi.rs | 4 +-
arrow/src/ipc/reader.rs | 110 +++++++++------------
arrow/src/json/reader.rs | 20 ++--
arrow/src/json/writer.rs | 8 +-
integration-testing/src/lib.rs | 12 +--
parquet/src/arrow/array_reader.rs | 11 +--
.../src/arrow/array_reader/dictionary_buffer.rs | 9 +-
parquet/src/arrow/array_reader/list_array.rs | 6 +-
parquet/src/arrow/array_reader/map_array.rs | 2 +-
parquet/src/arrow/array_reader/offset_buffer.rs | 9 +-
parquet/src/arrow/arrow_writer.rs | 20 ++--
parquet/src/arrow/levels.rs | 2 +-
34 files changed, 212 insertions(+), 252 deletions(-)
diff --git a/arrow/examples/builders.rs b/arrow/examples/builders.rs
index 0dc1d76f3..d35cb5ab7 100644
--- a/arrow/examples/builders.rs
+++ b/arrow/examples/builders.rs
@@ -81,7 +81,7 @@ fn main() {
.len(3)
.add_buffer(Buffer::from(offsets.to_byte_slice()))
.add_buffer(Buffer::from(&values[..]))
- .null_bit_buffer(Buffer::from([0b00000101]))
+ .null_bit_buffer(Some(Buffer::from([0b00000101])))
.build()
.unwrap();
let binary_array = StringArray::from(array_data);
diff --git a/arrow/src/array/array.rs b/arrow/src/array/array.rs
index ed99a6b9f..f28aba59d 100644
--- a/arrow/src/array/array.rs
+++ b/arrow/src/array/array.rs
@@ -877,7 +877,7 @@ mod tests {
let empty_with_bitmap = PrimitiveArray::<Int64Type>::from(
ArrayData::builder(arr.data_type().clone())
.add_buffer(MutableBuffer::new(0).into())
- .null_bit_buffer(MutableBuffer::new_null(0).into())
+ .null_bit_buffer(Some(MutableBuffer::new_null(0).into()))
.build()
.unwrap(),
);
diff --git a/arrow/src/array/array_binary.rs b/arrow/src/array/array_binary.rs
index b8bf76dc7..a3ab4aeaa 100644
--- a/arrow/src/array/array_binary.rs
+++ b/arrow/src/array/array_binary.rs
@@ -152,13 +152,11 @@ impl<OffsetSize: OffsetSizeTrait> GenericBinaryArray<OffsetSize> {
"BinaryArray can only be created from List<u8> arrays, mismatched data types."
);
- let mut builder = ArrayData::builder(Self::get_data_type())
+ let builder = ArrayData::builder(Self::get_data_type())
.len(v.len())
.add_buffer(v.data_ref().buffers()[0].clone())
- .add_buffer(v.data_ref().child_data()[0].buffers()[0].clone());
- if let Some(bitmap) = v.data_ref().null_bitmap() {
- builder = builder.null_bit_buffer(bitmap.bits.clone())
- }
+ .add_buffer(v.data_ref().child_data()[0].buffers()[0].clone())
+ .null_bit_buffer(v.data_ref().null_buffer().cloned());
let data = unsafe { builder.build_unchecked() };
Self::from(data)
@@ -308,7 +306,7 @@ where
.len(data_len)
.add_buffer(Buffer::from_slice_ref(&offsets))
.add_buffer(Buffer::from_slice_ref(&values))
- .null_bit_buffer(null_buf.into());
+ .null_bit_buffer(Some(null_buf.into()));
let array_data = unsafe { array_data.build_unchecked() };
Self::from(array_data)
}
@@ -692,12 +690,10 @@ impl From<FixedSizeListArray> for FixedSizeBinaryArray {
"FixedSizeBinaryArray can only be created from FixedSizeList<u8> arrays, mismatched data types."
);
- let mut builder = ArrayData::builder(DataType::FixedSizeBinary(v.value_length()))
+ let builder = ArrayData::builder(DataType::FixedSizeBinary(v.value_length()))
.len(v.len())
- .add_buffer(v.data_ref().child_data()[0].buffers()[0].clone());
- if let Some(bitmap) = v.data_ref().null_bitmap() {
- builder = builder.null_bit_buffer(bitmap.bits.clone())
- }
+ .add_buffer(v.data_ref().child_data()[0].buffers()[0].clone())
+ .null_bit_buffer(v.data_ref().null_buffer().cloned());
let data = unsafe { builder.build_unchecked() };
Self::from(data)
@@ -849,12 +845,10 @@ impl DecimalArray {
"DecimalArray can only be created from FixedSizeList<u8> arrays, mismatched data types."
);
- let mut builder = ArrayData::builder(DataType::Decimal(precision, scale))
+ let builder = ArrayData::builder(DataType::Decimal(precision, scale))
.len(v.len())
- .add_buffer(v.data_ref().child_data()[0].buffers()[0].clone());
- if let Some(bitmap) = v.data_ref().null_bitmap() {
- builder = builder.null_bit_buffer(bitmap.bits.clone())
- }
+ .add_buffer(v.data_ref().child_data()[0].buffers()[0].clone())
+ .null_bit_buffer(v.data_ref().null_buffer().cloned());
let array_data = unsafe { builder.build_unchecked() };
Self::from(array_data)
diff --git a/arrow/src/array/array_dictionary.rs b/arrow/src/array/array_dictionary.rs
index a9feff903..b967b3abb 100644
--- a/arrow/src/array/array_dictionary.rs
+++ b/arrow/src/array/array_dictionary.rs
@@ -107,7 +107,7 @@ impl<'a, K: ArrowPrimitiveType> DictionaryArray<K> {
match keys.data().null_buffer() {
Some(buffer) if keys.data().null_count() > 0 => {
data = data
- .null_bit_buffer(buffer.clone())
+ .null_bit_buffer(Some(buffer.clone()))
.null_count(keys.data().null_count());
}
_ => data = data.null_count(0),
diff --git a/arrow/src/array/array_list.rs b/arrow/src/array/array_list.rs
index acd0cffba..709e4e7ba 100644
--- a/arrow/src/array/array_list.rs
+++ b/arrow/src/array/array_list.rs
@@ -178,7 +178,7 @@ impl<OffsetSize: OffsetSizeTrait> GenericListArray<OffsetSize> {
.len(null_buf.len())
.add_buffer(offsets.into())
.add_child_data(values.data().clone())
- .null_bit_buffer(null_buf.into());
+ .null_bit_buffer(Some(null_buf.into()));
let array_data = unsafe { array_data.build_unchecked() };
Self::from(array_data)
@@ -836,7 +836,7 @@ mod tests {
.len(9)
.add_buffer(value_offsets)
.add_child_data(value_data.clone())
- .null_bit_buffer(Buffer::from(null_bits))
+ .null_bit_buffer(Some(Buffer::from(null_bits)))
.build()
.unwrap();
let list_array = ListArray::from(list_data);
@@ -900,7 +900,7 @@ mod tests {
.len(9)
.add_buffer(value_offsets)
.add_child_data(value_data.clone())
- .null_bit_buffer(Buffer::from(null_bits))
+ .null_bit_buffer(Some(Buffer::from(null_bits)))
.build()
.unwrap();
let list_array = LargeListArray::from(list_data);
@@ -967,7 +967,7 @@ mod tests {
.len(9)
.add_buffer(value_offsets)
.add_child_data(value_data)
- .null_bit_buffer(Buffer::from(null_bits))
+ .null_bit_buffer(Some(Buffer::from(null_bits)))
.build()
.unwrap();
let list_array = LargeListArray::from(list_data);
@@ -1001,7 +1001,7 @@ mod tests {
let list_data = ArrayData::builder(list_data_type)
.len(5)
.add_child_data(value_data.clone())
- .null_bit_buffer(Buffer::from(null_bits))
+ .null_bit_buffer(Some(Buffer::from(null_bits)))
.build()
.unwrap();
let list_array = FixedSizeListArray::from(list_data);
@@ -1063,7 +1063,7 @@ mod tests {
let list_data = ArrayData::builder(list_data_type)
.len(5)
.add_child_data(value_data)
- .null_bit_buffer(Buffer::from(null_bits))
+ .null_bit_buffer(Some(Buffer::from(null_bits)))
.build()
.unwrap();
let list_array = FixedSizeListArray::from(list_data);
diff --git a/arrow/src/array/array_map.rs b/arrow/src/array/array_map.rs
index 045d647ad..081362021 100644
--- a/arrow/src/array/array_map.rs
+++ b/arrow/src/array/array_map.rs
@@ -296,7 +296,7 @@ mod tests {
.add_buffer(Buffer::from(
&[0u32, 10, 20, 0, 40, 0, 60, 70].to_byte_slice(),
))
- .null_bit_buffer(Buffer::from(&[0b11010110]))
+ .null_bit_buffer(Some(Buffer::from(&[0b11010110])))
.build()
.unwrap();
diff --git a/arrow/src/array/array_primitive.rs b/arrow/src/array/array_primitive.rs
index e4146e768..8893703aa 100644
--- a/arrow/src/array/array_primitive.rs
+++ b/arrow/src/array/array_primitive.rs
@@ -550,7 +550,7 @@ impl<T: ArrowTimestampType> PrimitiveArray<T> {
ArrayData::builder(DataType::Timestamp(T::get_time_unit(), timezone))
.len(data_len)
.add_buffer(val_buf.into())
- .null_bit_buffer(null_buf.into());
+ .null_bit_buffer(Some(null_buf.into()));
let array_data = unsafe { array_data.build_unchecked() };
PrimitiveArray::from(array_data)
}
diff --git a/arrow/src/array/array_string.rs b/arrow/src/array/array_string.rs
index e2fed3f20..9e09350f7 100644
--- a/arrow/src/array/array_string.rs
+++ b/arrow/src/array/array_string.rs
@@ -131,13 +131,11 @@ impl<OffsetSize: OffsetSizeTrait> GenericStringArray<OffsetSize> {
"StringArray can only be created from List<u8> arrays, mismatched data types."
);
- let mut builder = ArrayData::builder(Self::get_data_type())
+ let builder = ArrayData::builder(Self::get_data_type())
.len(v.len())
.add_buffer(v.data().buffers()[0].clone())
- .add_buffer(v.data().child_data()[0].buffers()[0].clone());
- if let Some(bitmap) = v.data().null_bitmap() {
- builder = builder.null_bit_buffer(bitmap.bits.clone())
- }
+ .add_buffer(v.data().child_data()[0].buffers()[0].clone())
+ .null_bit_buffer(v.data().null_buffer().cloned());
let array_data = unsafe { builder.build_unchecked() };
Self::from(array_data)
@@ -252,7 +250,7 @@ where
.len(data_len)
.add_buffer(offsets.into())
.add_buffer(values.into())
- .null_bit_buffer(null_buf.into());
+ .null_bit_buffer(Some(null_buf.into()));
let array_data = unsafe { array_data.build_unchecked() };
Self::from(array_data)
}
diff --git a/arrow/src/array/array_struct.rs b/arrow/src/array/array_struct.rs
index 936176f49..91c77c72b 100644
--- a/arrow/src/array/array_struct.rs
+++ b/arrow/src/array/array_struct.rs
@@ -176,12 +176,10 @@ impl TryFrom<Vec<(&str, ArrayRef)>> for StructArray {
}
let len = len.unwrap();
- let mut builder = ArrayData::builder(DataType::Struct(fields))
+ let builder = ArrayData::builder(DataType::Struct(fields))
.len(len)
+ .null_bit_buffer(null)
.child_data(child_data);
- if let Some(null_buffer) = null {
- builder = builder.null_bit_buffer(null_buffer);
- }
let array_data = unsafe { builder.build_unchecked() };
@@ -270,7 +268,7 @@ impl From<(Vec<(Field, ArrayRef)>, Buffer)> for StructArray {
}
let array_data = ArrayData::builder(DataType::Struct(field_types))
- .null_bit_buffer(pair.1)
+ .null_bit_buffer(Some(pair.1))
.child_data(field_values.into_iter().map(|a| a.data().clone()).collect())
.len(length);
let array_data = unsafe { array_data.build_unchecked() };
@@ -366,7 +364,7 @@ mod tests {
let expected_string_data = ArrayData::builder(DataType::Utf8)
.len(4)
- .null_bit_buffer(Buffer::from(&[9_u8]))
+ .null_bit_buffer(Some(Buffer::from(&[9_u8])))
.add_buffer(Buffer::from(&[0, 3, 3, 3, 7].to_byte_slice()))
.add_buffer(Buffer::from(b"joemark"))
.build()
@@ -374,7 +372,7 @@ mod tests {
let expected_int_data = ArrayData::builder(DataType::Int32)
.len(4)
- .null_bit_buffer(Buffer::from(&[11_u8]))
+ .null_bit_buffer(Some(Buffer::from(&[11_u8])))
.add_buffer(Buffer::from(&[1, 2, 0, 4].to_byte_slice()))
.build()
.unwrap();
@@ -428,13 +426,13 @@ mod tests {
let boolean_data = ArrayData::builder(DataType::Boolean)
.len(5)
.add_buffer(Buffer::from([0b00010000]))
- .null_bit_buffer(Buffer::from([0b00010001]))
+ .null_bit_buffer(Some(Buffer::from([0b00010001])))
.build()
.unwrap();
let int_data = ArrayData::builder(DataType::Int32)
.len(5)
.add_buffer(Buffer::from([0, 28, 42, 0, 0].to_byte_slice()))
- .null_bit_buffer(Buffer::from([0b00000110]))
+ .null_bit_buffer(Some(Buffer::from([0b00000110])))
.build()
.unwrap();
@@ -446,7 +444,7 @@ mod tests {
.len(5)
.add_child_data(boolean_data.clone())
.add_child_data(int_data.clone())
- .null_bit_buffer(Buffer::from([0b00010111]))
+ .null_bit_buffer(Some(Buffer::from([0b00010111])))
.build()
.unwrap();
let struct_array = StructArray::from(struct_array_data);
diff --git a/arrow/src/array/builder.rs b/arrow/src/array/builder.rs
index 091a51b15..e22a6f81e 100644
--- a/arrow/src/array/builder.rs
+++ b/arrow/src/array/builder.rs
@@ -630,12 +630,11 @@ impl BooleanBuilder {
let len = self.len();
let null_bit_buffer = self.bitmap_builder.finish();
let null_count = len - null_bit_buffer.count_set_bits();
- let mut builder = ArrayData::builder(DataType::Boolean)
+ let builder = ArrayData::builder(DataType::Boolean)
.len(len)
- .add_buffer(self.values_builder.finish());
- if null_count > 0 {
- builder = builder.null_bit_buffer(null_bit_buffer);
- }
+ .add_buffer(self.values_builder.finish())
+ .null_bit_buffer((null_count > 0).then(|| null_bit_buffer));
+
let array_data = unsafe { builder.build_unchecked() };
BooleanArray::from(array_data)
}
@@ -829,12 +828,15 @@ impl<T: ArrowPrimitiveType> PrimitiveBuilder<T> {
.as_ref()
.map(|b| b.count_set_bits())
.unwrap_or(len);
- let mut builder = ArrayData::builder(T::DATA_TYPE)
+ let builder = ArrayData::builder(T::DATA_TYPE)
.len(len)
- .add_buffer(self.values_builder.finish());
- if null_count > 0 {
- builder = builder.null_bit_buffer(null_bit_buffer.unwrap());
- }
+ .add_buffer(self.values_builder.finish())
+ .null_bit_buffer(if null_count > 0 {
+ null_bit_buffer
+ } else {
+ None
+ });
+
let array_data = unsafe { builder.build_unchecked() };
PrimitiveArray::<T>::from(array_data)
}
@@ -856,7 +858,7 @@ impl<T: ArrowPrimitiveType> PrimitiveBuilder<T> {
.len(len)
.add_buffer(self.values_builder.finish());
if null_count > 0 {
- builder = builder.null_bit_buffer(null_bit_buffer.unwrap());
+ builder = builder.null_bit_buffer(null_bit_buffer);
}
builder = builder.add_child_data(values.data().clone());
let array_data = unsafe { builder.build_unchecked() };
@@ -992,7 +994,7 @@ where
.len(len)
.add_buffer(offset_buffer)
.add_child_data(values_data.clone())
- .null_bit_buffer(null_bit_buffer);
+ .null_bit_buffer(Some(null_bit_buffer));
let array_data = unsafe { array_data.build_unchecked() };
@@ -1123,7 +1125,7 @@ where
))
.len(len)
.add_child_data(values_data.clone())
- .null_bit_buffer(null_bit_buffer);
+ .null_bit_buffer(Some(null_bit_buffer));
let array_data = unsafe { array_data.build_unchecked() };
@@ -1710,7 +1712,7 @@ impl StructBuilder {
.len(self.len)
.child_data(child_data);
if null_count > 0 {
- builder = builder.null_bit_buffer(null_bit_buffer);
+ builder = builder.null_bit_buffer(Some(null_bit_buffer));
}
self.len = 0;
@@ -1845,7 +1847,7 @@ impl<K: ArrayBuilder, V: ArrayBuilder> MapBuilder<K, V> {
.len(len)
.add_buffer(offset_buffer)
.add_child_data(struct_array.data().clone())
- .null_bit_buffer(null_bit_buffer);
+ .null_bit_buffer(Some(null_bit_buffer));
let array_data = unsafe { array_data.build_unchecked() };
@@ -2155,7 +2157,7 @@ impl UnionBuilder {
let arr_data_builder = ArrayDataBuilder::new(data_type.clone())
.add_buffer(buffer)
.len(slots)
- .null_bit_buffer(bitmap_builder.finish());
+ .null_bit_buffer(Some(bitmap_builder.finish()));
let arr_data_ref = unsafe { arr_data_builder.build_unchecked() };
let array_ref = make_array(arr_data_ref);
@@ -3534,7 +3536,7 @@ mod tests {
let expected_string_data = ArrayData::builder(DataType::Utf8)
.len(4)
- .null_bit_buffer(Buffer::from(&[9_u8]))
+ .null_bit_buffer(Some(Buffer::from(&[9_u8])))
.add_buffer(Buffer::from_slice_ref(&[0, 3, 3, 3, 7]))
.add_buffer(Buffer::from_slice_ref(b"joemark"))
.build()
@@ -3542,7 +3544,7 @@ mod tests {
let expected_int_data = ArrayData::builder(DataType::Int32)
.len(4)
- .null_bit_buffer(Buffer::from_slice_ref(&[11_u8]))
+ .null_bit_buffer(Some(Buffer::from_slice_ref(&[11_u8])))
.add_buffer(Buffer::from_slice_ref(&[1, 2, 0, 4]))
.build()
.unwrap();
@@ -3648,7 +3650,7 @@ mod tests {
let expected_string_data = ArrayData::builder(DataType::Utf8)
.len(4)
- .null_bit_buffer(Buffer::from(&[9_u8]))
+ .null_bit_buffer(Some(Buffer::from(&[9_u8])))
.add_buffer(Buffer::from_slice_ref(&[0, 3, 3, 3, 7]))
.add_buffer(Buffer::from_slice_ref(b"joemark"))
.build()
@@ -3656,7 +3658,7 @@ mod tests {
let expected_int_data = ArrayData::builder(DataType::Int32)
.len(4)
- .null_bit_buffer(Buffer::from_slice_ref(&[11_u8]))
+ .null_bit_buffer(Some(Buffer::from_slice_ref(&[11_u8])))
.add_buffer(Buffer::from_slice_ref(&[1, 2, 0, 4]))
.build()
.unwrap();
diff --git a/arrow/src/array/data.rs b/arrow/src/array/data.rs
index dcf382ae0..cb6b894a0 100644
--- a/arrow/src/array/data.rs
+++ b/arrow/src/array/data.rs
@@ -1424,8 +1424,8 @@ impl ArrayDataBuilder {
self
}
- pub fn null_bit_buffer(mut self, buf: Buffer) -> Self {
- self.null_bit_buffer = Some(buf);
+ pub fn null_bit_buffer(mut self, buf: Option<Buffer>) -> Self {
+ self.null_bit_buffer = buf;
self
}
@@ -1508,9 +1508,9 @@ mod tests {
.len(20)
.offset(5)
.add_buffer(b1)
- .null_bit_buffer(Buffer::from(vec![
+ .null_bit_buffer(Some(Buffer::from(vec![
0b01011111, 0b10110101, 0b01100011, 0b00011110,
- ]))
+ ])))
.build()
.unwrap();
@@ -1559,7 +1559,7 @@ mod tests {
let arr_data = ArrayData::builder(DataType::Int32)
.len(16)
.add_buffer(make_i32_buffer(16))
- .null_bit_buffer(Buffer::from(bit_v))
+ .null_bit_buffer(Some(Buffer::from(bit_v)))
.build()
.unwrap();
assert_eq!(13, arr_data.null_count());
@@ -1573,7 +1573,7 @@ mod tests {
.len(12)
.offset(2)
.add_buffer(make_i32_buffer(14)) // requires at least 14 bytes of space,
- .null_bit_buffer(Buffer::from(bit_v))
+ .null_bit_buffer(Some(Buffer::from(bit_v)))
.build()
.unwrap();
assert_eq!(10, arr_data.null_count());
@@ -1588,7 +1588,7 @@ mod tests {
let arr_data = ArrayData::builder(DataType::Int32)
.len(16)
.add_buffer(make_i32_buffer(16))
- .null_bit_buffer(Buffer::from(bit_v))
+ .null_bit_buffer(Some(Buffer::from(bit_v)))
.build()
.unwrap();
assert!(arr_data.null_buffer().is_some());
@@ -1604,7 +1604,7 @@ mod tests {
let data = ArrayData::builder(DataType::Int32)
.len(16)
.add_buffer(make_i32_buffer(16))
- .null_bit_buffer(Buffer::from(bit_v))
+ .null_bit_buffer(Some(Buffer::from(bit_v)))
.build()
.unwrap();
let new_data = data.slice(1, 15);
diff --git a/arrow/src/array/equal/mod.rs b/arrow/src/array/equal/mod.rs
index c45b30ccc..b89a8fa53 100644
--- a/arrow/src/array/equal/mod.rs
+++ b/arrow/src/array/equal/mod.rs
@@ -662,7 +662,7 @@ mod tests {
.len(6)
.add_buffer(Buffer::from(vec![0i32, 2, 3, 4, 6, 7, 8].to_byte_slice()))
.add_child_data(c_values.data().clone())
- .null_bit_buffer(Buffer::from(vec![0b00001001]))
+ .null_bit_buffer(Some(Buffer::from(vec![0b00001001])))
.build()
.unwrap();
@@ -684,7 +684,7 @@ mod tests {
.len(6)
.add_buffer(Buffer::from(vec![0i32, 2, 3, 4, 6, 7, 8].to_byte_slice()))
.add_child_data(d_values.data().clone())
- .null_bit_buffer(Buffer::from(vec![0b00001001]))
+ .null_bit_buffer(Some(Buffer::from(vec![0b00001001])))
.build()
.unwrap();
test_equal(&c, &d, true);
@@ -1033,7 +1033,7 @@ mod tests {
Field::new("f1", DataType::Utf8, true),
Field::new("f2", DataType::Int32, true),
]))
- .null_bit_buffer(Buffer::from(vec![0b00001011]))
+ .null_bit_buffer(Some(Buffer::from(vec![0b00001011])))
.len(5)
.add_child_data(strings.data_ref().clone())
.add_child_data(ints.data_ref().clone())
@@ -1045,7 +1045,7 @@ mod tests {
Field::new("f1", DataType::Utf8, true),
Field::new("f2", DataType::Int32, true),
]))
- .null_bit_buffer(Buffer::from(vec![0b00001011]))
+ .null_bit_buffer(Some(Buffer::from(vec![0b00001011])))
.len(5)
.add_child_data(strings.data_ref().clone())
.add_child_data(ints_non_null.data_ref().clone())
@@ -1061,7 +1061,7 @@ mod tests {
Field::new("f1", DataType::Utf8, true),
Field::new("f2", DataType::Int32, true),
]))
- .null_bit_buffer(Buffer::from(vec![0b00001011]))
+ .null_bit_buffer(Some(Buffer::from(vec![0b00001011])))
.len(5)
.add_child_data(strings.data_ref().clone())
.add_child_data(c_ints_non_null.data_ref().clone())
@@ -1077,7 +1077,7 @@ mod tests {
a.data_type().clone(),
true,
)]))
- .null_bit_buffer(Buffer::from(vec![0b00011110]))
+ .null_bit_buffer(Some(Buffer::from(vec![0b00011110])))
.len(5)
.add_child_data(a.data_ref().clone())
.build()
@@ -1096,7 +1096,7 @@ mod tests {
Field::new("f1", DataType::Utf8, true),
Field::new("f2", DataType::Int32, true),
]))
- .null_bit_buffer(Buffer::from(vec![0b00001011]))
+ .null_bit_buffer(Some(Buffer::from(vec![0b00001011])))
.len(5)
.add_child_data(strings.data_ref().clone())
.add_child_data(ints_non_null.data_ref().clone())
@@ -1108,7 +1108,7 @@ mod tests {
b.data_type().clone(),
true,
)]))
- .null_bit_buffer(Buffer::from(vec![0b00011110]))
+ .null_bit_buffer(Some(Buffer::from(vec![0b00011110])))
.len(5)
.add_child_data(b)
.build()
@@ -1141,7 +1141,7 @@ mod tests {
DataType::Utf8,
true,
)]))
- .null_bit_buffer(Buffer::from(vec![0b00001010]))
+ .null_bit_buffer(Some(Buffer::from(vec![0b00001010])))
.len(5)
.add_child_data(strings1.data_ref().clone())
.build()
@@ -1153,7 +1153,7 @@ mod tests {
DataType::Utf8,
true,
)]))
- .null_bit_buffer(Buffer::from(vec![0b00001010]))
+ .null_bit_buffer(Some(Buffer::from(vec![0b00001010])))
.len(5)
.add_child_data(strings2.data_ref().clone())
.build()
@@ -1175,7 +1175,7 @@ mod tests {
DataType::Utf8,
true,
)]))
- .null_bit_buffer(Buffer::from(vec![0b00001011]))
+ .null_bit_buffer(Some(Buffer::from(vec![0b00001011])))
.len(5)
.add_child_data(strings3.data_ref().clone())
.build()
diff --git a/arrow/src/array/ffi.rs b/arrow/src/array/ffi.rs
index 51da67c59..57329037b 100644
--- a/arrow/src/array/ffi.rs
+++ b/arrow/src/array/ffi.rs
@@ -213,7 +213,7 @@ mod tests {
DataType::FixedSizeList(Box::new(Field::new("f", DataType::Int16, false)), 2);
let list_data = ArrayData::builder(list_data_type)
.len(8)
- .null_bit_buffer(Buffer::from(validity_bits))
+ .null_bit_buffer(Some(Buffer::from(validity_bits)))
.add_child_data(value_data)
.build()?;
let array = FixedSizeListArray::from(list_data);
@@ -250,7 +250,7 @@ mod tests {
);
let list_data = ArrayData::builder(list_data_type)
.len(4)
- .null_bit_buffer(Buffer::from(validity_bits))
+ .null_bit_buffer(Some(Buffer::from(validity_bits)))
.add_child_data(inner_list_data)
.build()?;
diff --git a/arrow/src/array/transform/mod.rs b/arrow/src/array/transform/mod.rs
index 4e47dbc29..4671d8267 100644
--- a/arrow/src/array/transform/mod.rs
+++ b/arrow/src/array/transform/mod.rs
@@ -86,7 +86,7 @@ impl<'a> _MutableArrayData<'a> {
.child_data(child_data);
if self.null_count > 0 {
array_data_builder =
- array_data_builder.null_bit_buffer(self.null_buffer.into());
+ array_data_builder.null_bit_buffer(Some(self.null_buffer.into()));
}
array_data_builder
diff --git a/arrow/src/compute/kernels/cast.rs b/arrow/src/compute/kernels/cast.rs
index c989cd2fe..26aacff0b 100644
--- a/arrow/src/compute/kernels/cast.rs
+++ b/arrow/src/compute/kernels/cast.rs
@@ -2078,15 +2078,13 @@ where
DataType::Utf8
};
- let mut builder = ArrayData::builder(dtype)
+ let builder = ArrayData::builder(dtype)
.offset(array.offset())
.len(array.len())
.add_buffer(offset_buffer)
- .add_buffer(str_values_buf);
+ .add_buffer(str_values_buf)
+ .null_bit_buffer(list_data.null_buffer().cloned());
- if let Some(buf) = list_data.null_buffer() {
- builder = builder.null_bit_buffer(buf.clone())
- }
let array_data = unsafe { builder.build_unchecked() };
Ok(Arc::new(GenericStringArray::<OffsetSizeTo>::from(
@@ -2157,15 +2155,13 @@ where
let offset_buffer = unsafe { Buffer::from_trusted_len_iter(iter) };
// wrap up
- let mut builder = ArrayData::builder(out_dtype)
+ let builder = ArrayData::builder(out_dtype)
.offset(array.offset())
.len(array.len())
.add_buffer(offset_buffer)
- .add_child_data(value_data);
+ .add_child_data(value_data)
+ .null_bit_buffer(data.null_buffer().cloned());
- if let Some(buf) = data.null_buffer() {
- builder = builder.null_bit_buffer(buf.clone())
- }
let array_data = unsafe { builder.build_unchecked() };
Ok(make_array(array_data))
}
diff --git a/arrow/src/compute/kernels/comparison.rs b/arrow/src/compute/kernels/comparison.rs
index a3ef387f3..590ed5b0f 100644
--- a/arrow/src/compute/kernels/comparison.rs
+++ b/arrow/src/compute/kernels/comparison.rs
@@ -3327,7 +3327,7 @@ mod tests {
.len(4)
.add_buffer(value_offsets)
.add_child_data(value_data)
- .null_bit_buffer(Buffer::from([0b00001011]))
+ .null_bit_buffer(Some(Buffer::from([0b00001011])))
.build()
.unwrap();
diff --git a/arrow/src/compute/kernels/concat_elements.rs b/arrow/src/compute/kernels/concat_elements.rs
index 66cf569bd..47cbdfab1 100644
--- a/arrow/src/compute/kernels/concat_elements.rs
+++ b/arrow/src/compute/kernels/concat_elements.rs
@@ -75,15 +75,11 @@ pub fn concat_elements_utf8<Offset: OffsetSizeTrait>(
output_offsets.append(Offset::from_usize(output_values.len()).unwrap());
}
- let mut builder =
- ArrayDataBuilder::new(GenericStringArray::<Offset>::get_data_type())
- .len(left.len())
- .add_buffer(output_offsets.finish())
- .add_buffer(output_values.finish());
-
- if let Some(null_bitmap) = output_bitmap {
- builder = builder.null_bit_buffer(null_bitmap);
- }
+ let builder = ArrayDataBuilder::new(GenericStringArray::<Offset>::get_data_type())
+ .len(left.len())
+ .add_buffer(output_offsets.finish())
+ .add_buffer(output_values.finish())
+ .null_bit_buffer(output_bitmap);
// SAFETY - offsets valid by construction
Ok(unsafe { builder.build_unchecked() }.into())
diff --git a/arrow/src/compute/kernels/filter.rs b/arrow/src/compute/kernels/filter.rs
index ddfd0ee02..b59625115 100644
--- a/arrow/src/compute/kernels/filter.rs
+++ b/arrow/src/compute/kernels/filter.rs
@@ -662,7 +662,7 @@ fn filter_boolean(values: &BooleanArray, predicate: &FilterPredicate) -> Boolean
.add_buffer(values);
if let Some((null_count, nulls)) = filter_null_mask(data, predicate) {
- builder = builder.null_count(null_count).null_bit_buffer(nulls);
+ builder = builder.null_count(null_count).null_bit_buffer(Some(nulls));
}
let data = unsafe { builder.build_unchecked() };
@@ -722,7 +722,7 @@ where
.add_buffer(buffer.into());
if let Some((null_count, nulls)) = filter_null_mask(data, predicate) {
- builder = builder.null_count(null_count).null_bit_buffer(nulls);
+ builder = builder.null_count(null_count).null_bit_buffer(Some(nulls));
}
let data = unsafe { builder.build_unchecked() };
@@ -840,7 +840,7 @@ where
.add_buffer(filter.dst_values.into());
if let Some((null_count, nulls)) = filter_null_mask(data, predicate) {
- builder = builder.null_count(null_count).null_bit_buffer(nulls);
+ builder = builder.null_count(null_count).null_bit_buffer(Some(nulls));
}
let data = unsafe { builder.build_unchecked() };
@@ -1149,7 +1149,7 @@ mod tests {
.len(4)
.add_buffer(value_offsets)
.add_child_data(value_data)
- .null_bit_buffer(Buffer::from([0b00000111]))
+ .null_bit_buffer(Some(Buffer::from([0b00000111])))
.build()
.unwrap();
@@ -1173,7 +1173,7 @@ mod tests {
.len(2)
.add_buffer(value_offsets)
.add_child_data(value_data)
- .null_bit_buffer(Buffer::from([0b00000001]))
+ .null_bit_buffer(Some(Buffer::from([0b00000001])))
.build()
.unwrap();
@@ -1600,7 +1600,7 @@ mod tests {
let list_data = ArrayData::builder(list_data_type)
.len(5)
.add_child_data(value_data)
- .null_bit_buffer(Buffer::from(null_bits))
+ .null_bit_buffer(Some(Buffer::from(null_bits)))
.build()
.unwrap();
let array = FixedSizeListArray::from(list_data);
diff --git a/arrow/src/compute/kernels/limit.rs b/arrow/src/compute/kernels/limit.rs
index 8aec1e35a..07cf727b0 100644
--- a/arrow/src/compute/kernels/limit.rs
+++ b/arrow/src/compute/kernels/limit.rs
@@ -113,7 +113,7 @@ mod tests {
.len(9)
.add_buffer(value_offsets)
.add_child_data(value_data)
- .null_bit_buffer(Buffer::from(null_bits))
+ .null_bit_buffer(Some(Buffer::from(null_bits)))
.build()
.unwrap();
let list_array: ArrayRef = Arc::new(ListArray::from(list_data));
@@ -145,13 +145,13 @@ mod tests {
let boolean_data = ArrayData::builder(DataType::Boolean)
.len(5)
.add_buffer(Buffer::from([0b00010000]))
- .null_bit_buffer(Buffer::from([0b00010001]))
+ .null_bit_buffer(Some(Buffer::from([0b00010001])))
.build()
.unwrap();
let int_data = ArrayData::builder(DataType::Int32)
.len(5)
.add_buffer(Buffer::from_slice_ref(&[0, 28, 42, 0, 0]))
- .null_bit_buffer(Buffer::from([0b00000110]))
+ .null_bit_buffer(Some(Buffer::from([0b00000110])))
.build()
.unwrap();
@@ -163,7 +163,7 @@ mod tests {
.len(5)
.add_child_data(boolean_data.clone())
.add_child_data(int_data.clone())
- .null_bit_buffer(Buffer::from([0b00010111]))
+ .null_bit_buffer(Some(Buffer::from([0b00010111])))
.build()
.unwrap();
let struct_array = StructArray::from(struct_array_data);
diff --git a/arrow/src/compute/kernels/substring.rs b/arrow/src/compute/kernels/substring.rs
index 866e42707..f1b6e8d4a 100644
--- a/arrow/src/compute/kernels/substring.rs
+++ b/arrow/src/compute/kernels/substring.rs
@@ -574,7 +574,7 @@ mod tests {
.len(2)
.add_buffer(Buffer::from_slice_ref(offsets))
.add_buffer(Buffer::from_iter(values))
- .null_bit_buffer(Buffer::from(bitmap))
+ .null_bit_buffer(Some(Buffer::from(bitmap)))
.offset(1)
.build()?;
// array is `[null, [10, 11, 12, 13, 14]]`
@@ -867,7 +867,7 @@ mod tests {
.len(2)
.add_buffer(Buffer::from(&values[..]))
.offset(1)
- .null_bit_buffer(Buffer::from(bits_v))
+ .null_bit_buffer(Some(Buffer::from(bits_v)))
.build()
.unwrap();
// array is `[null, "arrow"]`
@@ -1056,7 +1056,7 @@ mod tests {
.len(2)
.add_buffer(Buffer::from_slice_ref(offsets))
.add_buffer(Buffer::from(values))
- .null_bit_buffer(Buffer::from(bitmap))
+ .null_bit_buffer(Some(Buffer::from(bitmap)))
.offset(1)
.build()?;
// array is `[null, "arrow"]`
diff --git a/arrow/src/compute/kernels/take.rs b/arrow/src/compute/kernels/take.rs
index 086cd47f3..567bf5c8b 100644
--- a/arrow/src/compute/kernels/take.rs
+++ b/arrow/src/compute/kernels/take.rs
@@ -777,14 +777,13 @@ where
};
}
- let mut array_data =
+ let array_data =
ArrayData::builder(GenericStringArray::<OffsetSize>::get_data_type())
.len(data_len)
.add_buffer(offsets_buffer.into())
- .add_buffer(values.into());
- if let Some(null_buffer) = nulls {
- array_data = array_data.null_bit_buffer(null_buffer);
- }
+ .add_buffer(values.into())
+ .null_bit_buffer(nulls);
+
let array_data = unsafe { array_data.build_unchecked() };
Ok(GenericStringArray::<OffsetSize>::from(array_data))
@@ -832,7 +831,7 @@ where
// create a new list with taken data and computed null information
let list_data = ArrayDataBuilder::new(values.data_type().clone())
.len(indices.len())
- .null_bit_buffer(null_buf.into())
+ .null_bit_buffer(Some(null_buf.into()))
.offset(0)
.add_child_data(taken.data().clone())
.add_buffer(value_offsets);
@@ -875,7 +874,7 @@ where
let list_data = ArrayDataBuilder::new(values.data_type().clone())
.len(indices.len())
- .null_bit_buffer(null_buf.into())
+ .null_bit_buffer(Some(null_buf.into()))
.offset(0)
.add_child_data(taken.data().clone());
@@ -1573,9 +1572,7 @@ mod tests {
let expected_list_data = ArrayData::builder(list_data_type)
.len(5)
// null buffer remains the same as only the indices have nulls
- .null_bit_buffer(
- index.data().null_bitmap().as_ref().unwrap().bits.clone(),
- )
+ .null_bit_buffer(index.data().null_buffer().cloned())
.add_buffer(expected_offsets)
.add_child_data(expected_data)
.build()
@@ -1614,7 +1611,7 @@ mod tests {
let list_data = ArrayData::builder(list_data_type.clone())
.len(4)
.add_buffer(value_offsets)
- .null_bit_buffer(Buffer::from([0b10111101, 0b00000000]))
+ .null_bit_buffer(Some(Buffer::from([0b10111101, 0b00000000])))
.add_child_data(value_data)
.build()
.unwrap();
@@ -1649,9 +1646,7 @@ mod tests {
let expected_list_data = ArrayData::builder(list_data_type)
.len(5)
// null buffer remains the same as only the indices have nulls
- .null_bit_buffer(
- index.data().null_bitmap().as_ref().unwrap().bits.clone(),
- )
+ .null_bit_buffer(index.data().null_buffer().cloned())
.add_buffer(expected_offsets)
.add_child_data(expected_data)
.build()
@@ -1689,7 +1684,7 @@ mod tests {
let list_data = ArrayData::builder(list_data_type.clone())
.len(4)
.add_buffer(value_offsets)
- .null_bit_buffer(Buffer::from([0b01111101]))
+ .null_bit_buffer(Some(Buffer::from([0b01111101])))
.add_child_data(value_data)
.build()
.unwrap();
@@ -1727,7 +1722,7 @@ mod tests {
let expected_list_data = ArrayData::builder(list_data_type)
.len(5)
// null buffer must be recalculated as both values and indices have nulls
- .null_bit_buffer(Buffer::from(null_bits))
+ .null_bit_buffer(Some(Buffer::from(null_bits)))
.add_buffer(expected_offsets)
.add_child_data(expected_data)
.build()
diff --git a/arrow/src/compute/util.rs b/arrow/src/compute/util.rs
index 23f33d228..4b5029d68 100644
--- a/arrow/src/compute/util.rs
+++ b/arrow/src/compute/util.rs
@@ -333,7 +333,7 @@ pub(super) mod tests {
let list_data = ArrayData::builder(list_data_type)
.len(list_len)
- .null_bit_buffer(list_bitmap.into())
+ .null_bit_buffer(Some(list_bitmap.into()))
.add_buffer(value_offsets)
.add_child_data(value_data)
.build()
@@ -378,7 +378,7 @@ pub(super) mod tests {
let list_data = ArrayData::builder(list_data_type)
.len(list_len)
- .null_bit_buffer(list_bitmap.into())
+ .null_bit_buffer(Some(list_bitmap.into()))
.add_child_data(child_data)
.build()
.unwrap();
diff --git a/arrow/src/ffi.rs b/arrow/src/ffi.rs
index b1fa9f7bb..4ab929829 100644
--- a/arrow/src/ffi.rs
+++ b/arrow/src/ffi.rs
@@ -1256,7 +1256,7 @@ mod tests {
DataType::FixedSizeList(Box::new(Field::new("f", DataType::Int32, false)), 3);
let list_data = ArrayData::builder(list_data_type.clone())
.len(3)
- .null_bit_buffer(Buffer::from(validity_bits))
+ .null_bit_buffer(Some(Buffer::from(validity_bits)))
.add_child_data(value_data)
.build()?;
@@ -1287,7 +1287,7 @@ mod tests {
let expected_list_data = ArrayData::builder(list_data_type)
.len(6)
- .null_bit_buffer(Buffer::from(expected_validity_bits))
+ .null_bit_buffer(Some(Buffer::from(expected_validity_bits)))
.add_child_data(expected_value_data)
.build()?;
let expected_array = FixedSizeListArray::from(expected_list_data);
diff --git a/arrow/src/ipc/reader.rs b/arrow/src/ipc/reader.rs
index 99c2dccb4..41c0c3293 100644
--- a/arrow/src/ipc/reader.rs
+++ b/arrow/src/ipc/reader.rs
@@ -401,24 +401,22 @@ fn create_primitive_array(
let array_data = match data_type {
Utf8 | Binary | LargeBinary | LargeUtf8 => {
// read 3 buffers
- let mut builder = ArrayData::builder(data_type.clone())
+ ArrayData::builder(data_type.clone())
.len(length)
.buffers(buffers[1..3].to_vec())
- .offset(0);
- if null_count > 0 {
- builder = builder.null_bit_buffer(buffers[0].clone())
- }
- builder.build().unwrap()
+ .offset(0)
+ .null_bit_buffer((null_count > 0).then(|| buffers[0].clone()))
+ .build()
+ .unwrap()
}
FixedSizeBinary(_) => {
// read 3 buffers
- let mut builder = ArrayData::builder(data_type.clone())
+ let builder = ArrayData::builder(data_type.clone())
.len(length)
.buffers(buffers[1..2].to_vec())
- .offset(0);
- if null_count > 0 {
- builder = builder.null_bit_buffer(buffers[0].clone())
- }
+ .offset(0)
+ .null_bit_buffer((null_count > 0).then(|| buffers[0].clone()));
+
unsafe { builder.build_unchecked() }
}
Int8
@@ -432,52 +430,48 @@ fn create_primitive_array(
| Interval(IntervalUnit::YearMonth) => {
if buffers[1].len() / 8 == length && length != 1 {
// interpret as a signed i64, and cast appropriately
- let mut builder = ArrayData::builder(DataType::Int64)
+ let builder = ArrayData::builder(DataType::Int64)
.len(length)
.buffers(buffers[1..].to_vec())
- .offset(0);
- if null_count > 0 {
- builder = builder.null_bit_buffer(buffers[0].clone())
- }
+ .offset(0)
+ .null_bit_buffer((null_count > 0).then(|| buffers[0].clone()));
+
let data = unsafe { builder.build_unchecked() };
let values = Arc::new(Int64Array::from(data)) as ArrayRef;
// this cast is infallible, the unwrap is safe
let casted = cast(&values, data_type).unwrap();
casted.data().clone()
} else {
- let mut builder = ArrayData::builder(data_type.clone())
+ let builder = ArrayData::builder(data_type.clone())
.len(length)
.buffers(buffers[1..].to_vec())
- .offset(0);
- if null_count > 0 {
- builder = builder.null_bit_buffer(buffers[0].clone())
- }
+ .offset(0)
+ .null_bit_buffer((null_count > 0).then(|| buffers[0].clone()));
+
unsafe { builder.build_unchecked() }
}
}
Float32 => {
if buffers[1].len() / 8 == length && length != 1 {
// interpret as a f64, and cast appropriately
- let mut builder = ArrayData::builder(DataType::Float64)
+ let builder = ArrayData::builder(DataType::Float64)
.len(length)
.buffers(buffers[1..].to_vec())
- .offset(0);
- if null_count > 0 {
- builder = builder.null_bit_buffer(buffers[0].clone())
- }
+ .offset(0)
+ .null_bit_buffer((null_count > 0).then(|| buffers[0].clone()));
+
let data = unsafe { builder.build_unchecked() };
let values = Arc::new(Float64Array::from(data)) as ArrayRef;
// this cast is infallible, the unwrap is safe
let casted = cast(&values, data_type).unwrap();
casted.data().clone()
} else {
- let mut builder = ArrayData::builder(data_type.clone())
+ let builder = ArrayData::builder(data_type.clone())
.len(length)
.buffers(buffers[1..].to_vec())
- .offset(0);
- if null_count > 0 {
- builder = builder.null_bit_buffer(buffers[0].clone())
- }
+ .offset(0)
+ .null_bit_buffer((null_count > 0).then(|| buffers[0].clone()));
+
unsafe { builder.build_unchecked() }
}
}
@@ -491,24 +485,22 @@ fn create_primitive_array(
| Duration(_)
| Interval(IntervalUnit::DayTime)
| Interval(IntervalUnit::MonthDayNano) => {
- let mut builder = ArrayData::builder(data_type.clone())
+ let builder = ArrayData::builder(data_type.clone())
.len(length)
.buffers(buffers[1..].to_vec())
- .offset(0);
- if null_count > 0 {
- builder = builder.null_bit_buffer(buffers[0].clone())
- }
+ .offset(0)
+ .null_bit_buffer((null_count > 0).then(|| buffers[0].clone()));
+
unsafe { builder.build_unchecked() }
}
Decimal(_, _) => {
// read 3 buffers
- let mut builder = ArrayData::builder(data_type.clone())
+ let builder = ArrayData::builder(data_type.clone())
.len(length)
.buffers(buffers[1..2].to_vec())
- .offset(0);
- if null_count > 0 {
- builder = builder.null_bit_buffer(buffers[0].clone())
- }
+ .offset(0)
+ .null_bit_buffer((null_count > 0).then(|| buffers[0].clone()));
+
unsafe { builder.build_unchecked() }
}
t => panic!("Data type {:?} either unsupported or not primitive", t),
@@ -527,36 +519,33 @@ fn create_list_array(
) -> ArrayRef {
if let DataType::List(_) | DataType::LargeList(_) = *data_type {
let null_count = field_node.null_count() as usize;
- let mut builder = ArrayData::builder(data_type.clone())
+ let builder = ArrayData::builder(data_type.clone())
.len(field_node.length() as usize)
.buffers(buffers[1..2].to_vec())
.offset(0)
- .child_data(vec![child_array.data().clone()]);
- if null_count > 0 {
- builder = builder.null_bit_buffer(buffers[0].clone())
- }
+ .child_data(vec![child_array.data().clone()])
+ .null_bit_buffer((null_count > 0).then(|| buffers[0].clone()));
+
make_array(unsafe { builder.build_unchecked() })
} else if let DataType::FixedSizeList(_, _) = *data_type {
let null_count = field_node.null_count() as usize;
- let mut builder = ArrayData::builder(data_type.clone())
+ let builder = ArrayData::builder(data_type.clone())
.len(field_node.length() as usize)
.buffers(buffers[1..1].to_vec())
.offset(0)
- .child_data(vec![child_array.data().clone()]);
- if null_count > 0 {
- builder = builder.null_bit_buffer(buffers[0].clone())
- }
+ .child_data(vec![child_array.data().clone()])
+ .null_bit_buffer((null_count > 0).then(|| buffers[0].clone()));
+
make_array(unsafe { builder.build_unchecked() })
} else if let DataType::Map(_, _) = *data_type {
let null_count = field_node.null_count() as usize;
- let mut builder = ArrayData::builder(data_type.clone())
+ let builder = ArrayData::builder(data_type.clone())
.len(field_node.length() as usize)
.buffers(buffers[1..2].to_vec())
.offset(0)
- .child_data(vec![child_array.data().clone()]);
- if null_count > 0 {
- builder = builder.null_bit_buffer(buffers[0].clone())
- }
+ .child_data(vec![child_array.data().clone()])
+ .null_bit_buffer((null_count > 0).then(|| buffers[0].clone()));
+
make_array(unsafe { builder.build_unchecked() })
} else {
panic!("Cannot create list or map array from {:?}", data_type)
@@ -573,14 +562,13 @@ fn create_dictionary_array(
) -> ArrayRef {
if let DataType::Dictionary(_, _) = *data_type {
let null_count = field_node.null_count() as usize;
- let mut builder = ArrayData::builder(data_type.clone())
+ let builder = ArrayData::builder(data_type.clone())
.len(field_node.length() as usize)
.buffers(buffers[1..2].to_vec())
.offset(0)
- .child_data(vec![value_array.data().clone()]);
- if null_count > 0 {
- builder = builder.null_bit_buffer(buffers[0].clone())
- }
+ .child_data(vec![value_array.data().clone()])
+ .null_bit_buffer((null_count > 0).then(|| buffers[0].clone()));
+
make_array(unsafe { builder.build_unchecked() })
} else {
unreachable!("Cannot create dictionary array from {:?}", data_type)
diff --git a/arrow/src/json/reader.rs b/arrow/src/json/reader.rs
index f224b3b5b..e1fa54f8a 100644
--- a/arrow/src/json/reader.rs
+++ b/arrow/src/json/reader.rs
@@ -1064,7 +1064,7 @@ impl Decoder {
ArrayData::builder(list_field.data_type().clone())
.len(valid_len)
.add_buffer(bool_values.into())
- .null_bit_buffer(bool_nulls.into())
+ .null_bit_buffer(Some(bool_nulls.into()))
.build_unchecked()
}
}
@@ -1143,7 +1143,7 @@ impl Decoder {
unsafe {
ArrayDataBuilder::new(data_type)
.len(rows.len())
- .null_bit_buffer(buf)
+ .null_bit_buffer(Some(buf))
.child_data(
arrays.into_iter().map(|a| a.data().clone()).collect(),
)
@@ -1162,7 +1162,7 @@ impl Decoder {
.len(list_len)
.add_buffer(Buffer::from_slice_ref(&offsets))
.add_child_data(array_data)
- .null_bit_buffer(list_nulls.into());
+ .null_bit_buffer(Some(list_nulls.into()));
let list_data = unsafe { list_data.build_unchecked() };
Ok(Arc::new(GenericListArray::<OffsetSize>::from(list_data)))
}
@@ -1351,7 +1351,7 @@ impl Decoder {
let data_type = DataType::Struct(fields.clone());
let data = ArrayDataBuilder::new(data_type)
.len(len)
- .null_bit_buffer(null_buffer.into())
+ .null_bit_buffer(Some(null_buffer.into()))
.child_data(
arrays.into_iter().map(|a| a.data().clone()).collect(),
);
@@ -2235,7 +2235,7 @@ mod tests {
let c = ArrayDataBuilder::new(c_field.data_type().clone())
.len(4)
.add_child_data(d.data().clone())
- .null_bit_buffer(Buffer::from(vec![0b00000101]))
+ .null_bit_buffer(Some(Buffer::from(vec![0b00000101])))
.build()
.unwrap();
let b = BooleanArray::from(vec![Some(true), Some(false), Some(true), None]);
@@ -2243,7 +2243,7 @@ mod tests {
.len(4)
.add_child_data(b.data().clone())
.add_child_data(c)
- .null_bit_buffer(Buffer::from(vec![0b00000111]))
+ .null_bit_buffer(Some(Buffer::from(vec![0b00000111])))
.build()
.unwrap();
let expected = make_array(a);
@@ -2301,7 +2301,7 @@ mod tests {
let c = ArrayDataBuilder::new(c_field.data_type().clone())
.len(7)
.add_child_data(d.data().clone())
- .null_bit_buffer(Buffer::from(vec![0b00111011]))
+ .null_bit_buffer(Some(Buffer::from(vec![0b00111011])))
.build()
.unwrap();
let b = BooleanArray::from(vec![
@@ -2317,14 +2317,14 @@ mod tests {
.len(7)
.add_child_data(b.data().clone())
.add_child_data(c.clone())
- .null_bit_buffer(Buffer::from(vec![0b00111111]))
+ .null_bit_buffer(Some(Buffer::from(vec![0b00111111])))
.build()
.unwrap();
let a_list = ArrayDataBuilder::new(a_field.data_type().clone())
.len(6)
.add_buffer(Buffer::from_slice_ref(&[0i32, 2, 3, 6, 6, 6, 7]))
.add_child_data(a)
- .null_bit_buffer(Buffer::from(vec![0b00110111]))
+ .null_bit_buffer(Some(Buffer::from(vec![0b00110111])))
.build()
.unwrap();
let expected = make_array(a_list);
@@ -2423,7 +2423,7 @@ mod tests {
vec![0i32, 2, 4, 7, 7, 8, 8, 9].to_byte_slice(),
))
.add_child_data(expected_value_array_data)
- .null_bit_buffer(Buffer::from(vec![0b01010111]))
+ .null_bit_buffer(Some(Buffer::from(vec![0b01010111])))
.build()
.unwrap();
let expected_stocks_entries_data = ArrayDataBuilder::new(entries_struct_type)
diff --git a/arrow/src/json/writer.rs b/arrow/src/json/writer.rs
index a9d485ecf..078382f57 100644
--- a/arrow/src/json/writer.rs
+++ b/arrow/src/json/writer.rs
@@ -1119,7 +1119,7 @@ mod tests {
.len(5)
.add_buffer(a_value_offsets)
.add_child_data(a_values.data().clone())
- .null_bit_buffer(Buffer::from(vec![0b00011111]))
+ .null_bit_buffer(Some(Buffer::from(vec![0b00011111])))
.build()
.unwrap();
let a = ListArray::from(a_list_data);
@@ -1170,7 +1170,7 @@ mod tests {
let a_list_data = ArrayData::builder(list_inner_type.data_type().clone())
.len(3)
.add_buffer(a_value_offsets)
- .null_bit_buffer(Buffer::from(vec![0b00000111]))
+ .null_bit_buffer(Some(Buffer::from(vec![0b00000111])))
.add_child_data(a_values.data().clone())
.build()
.unwrap();
@@ -1254,7 +1254,7 @@ mod tests {
.len(3)
.add_buffer(c1_value_offsets)
.add_child_data(struct_values.data().clone())
- .null_bit_buffer(Buffer::from(vec![0b00000101]))
+ .null_bit_buffer(Some(Buffer::from(vec![0b00000101])))
.build()
.unwrap();
let c1 = ListArray::from(c1_list_data);
@@ -1436,7 +1436,7 @@ mod tests {
let map_data = ArrayData::builder(map_data_type.clone())
.len(6)
- .null_bit_buffer(valid_buffer)
+ .null_bit_buffer(Some(valid_buffer))
.add_buffer(entry_offsets)
.add_child_data(entry_struct.data().clone())
.build()
diff --git a/integration-testing/src/lib.rs b/integration-testing/src/lib.rs
index c57ef32bc..90537242a 100644
--- a/integration-testing/src/lib.rs
+++ b/integration-testing/src/lib.rs
@@ -496,7 +496,7 @@ fn array_from_json(
.offset(0)
.add_buffer(Buffer::from(&offsets.to_byte_slice()))
.add_child_data(child_array.data().clone())
- .null_bit_buffer(null_buf)
+ .null_bit_buffer(Some(null_buf))
.build()
.unwrap();
Ok(Arc::new(ListArray::from(list_data)))
@@ -524,7 +524,7 @@ fn array_from_json(
.offset(0)
.add_buffer(Buffer::from(&offsets.to_byte_slice()))
.add_child_data(child_array.data().clone())
- .null_bit_buffer(null_buf)
+ .null_bit_buffer(Some(null_buf))
.build()
.unwrap();
Ok(Arc::new(LargeListArray::from(list_data)))
@@ -540,7 +540,7 @@ fn array_from_json(
let list_data = ArrayData::builder(field.data_type().clone())
.len(json_col.count)
.add_child_data(child_array.data().clone())
- .null_bit_buffer(null_buf)
+ .null_bit_buffer(Some(null_buf))
.build()
.unwrap();
Ok(Arc::new(FixedSizeListArray::from(list_data)))
@@ -550,7 +550,7 @@ fn array_from_json(
let null_buf = create_null_buf(&json_col);
let mut array_data = ArrayData::builder(field.data_type().clone())
.len(json_col.count)
- .null_bit_buffer(null_buf);
+ .null_bit_buffer(Some(null_buf));
for (field, col) in fields.iter().zip(json_col.children.unwrap()) {
let array = array_from_json(field, col, dictionaries)?;
@@ -625,7 +625,7 @@ fn array_from_json(
.len(json_col.count)
.add_buffer(Buffer::from(&offsets.to_byte_slice()))
.add_child_data(child_array.data().clone())
- .null_bit_buffer(null_buf)
+ .null_bit_buffer(Some(null_buf))
.build()
.unwrap();
@@ -713,7 +713,7 @@ fn dictionary_array_from_json(
let dict_data = ArrayData::builder(field.data_type().clone())
.len(keys.len())
.add_buffer(keys.data().buffers()[0].clone())
- .null_bit_buffer(null_buf)
+ .null_bit_buffer(Some(null_buf))
.add_child_data(values.data().clone())
.build()
.unwrap();
diff --git a/parquet/src/arrow/array_reader.rs b/parquet/src/arrow/array_reader.rs
index 12c9ca522..c70071dac 100644
--- a/parquet/src/arrow/array_reader.rs
+++ b/parquet/src/arrow/array_reader.rs
@@ -362,13 +362,10 @@ where
record_data = boolean_buffer.finish();
}
- let mut array_data = ArrayDataBuilder::new(arrow_data_type)
+ let array_data = ArrayDataBuilder::new(arrow_data_type)
.len(self.record_reader.num_values())
- .add_buffer(record_data);
-
- if let Some(b) = self.record_reader.consume_bitmap_buffer()? {
- array_data = array_data.null_bit_buffer(b);
- }
+ .add_buffer(record_data)
+ .null_bit_buffer(self.record_reader.consume_bitmap_buffer()?);
let array_data = unsafe { array_data.build_unchecked() };
let array = match T::get_physical_type() {
@@ -773,7 +770,7 @@ impl ArrayReader for StructArrayReader {
}
array_data_builder =
- array_data_builder.null_bit_buffer(bitmap_builder.finish());
+ array_data_builder.null_bit_buffer(Some(bitmap_builder.finish()));
}
let array_data = unsafe { array_data_builder.build_unchecked() };
diff --git a/parquet/src/arrow/array_reader/dictionary_buffer.rs b/parquet/src/arrow/array_reader/dictionary_buffer.rs
index 6bb96030e..6dc9cc80f 100644
--- a/parquet/src/arrow/array_reader/dictionary_buffer.rs
+++ b/parquet/src/arrow/array_reader/dictionary_buffer.rs
@@ -158,14 +158,11 @@ impl<K: ScalarValue + ArrowNativeType + Ord, V: ScalarValue + OffsetSizeTrait>
}
}
- let mut builder = ArrayDataBuilder::new(data_type.clone())
+ let builder = ArrayDataBuilder::new(data_type.clone())
.len(keys.len())
.add_buffer(keys.into())
- .add_child_data(values.data().clone());
-
- if let Some(buffer) = null_buffer {
- builder = builder.null_bit_buffer(buffer);
- }
+ .add_child_data(values.data().clone())
+ .null_bit_buffer(null_buffer);
let data = match cfg!(debug_assertions) {
true => builder.build().unwrap(),
diff --git a/parquet/src/arrow/array_reader/list_array.rs b/parquet/src/arrow/array_reader/list_array.rs
index 808f815e6..ab51cd87d 100644
--- a/parquet/src/arrow/array_reader/list_array.rs
+++ b/parquet/src/arrow/array_reader/list_array.rs
@@ -222,7 +222,7 @@ impl<OffsetSize: OffsetSizeTrait> ArrayReader for ListArrayReader<OffsetSize> {
if let Some(mut builder) = validity {
assert_eq!(builder.len(), list_offsets.len() - 1);
- data_builder = data_builder.null_bit_buffer(builder.finish())
+ data_builder = data_builder.null_bit_buffer(Some(builder.finish()))
}
let list_data = unsafe { data_builder.build_unchecked() };
@@ -327,7 +327,7 @@ mod tests {
.len(10)
.add_buffer(offsets)
.add_child_data(leaf.data().clone())
- .null_bit_buffer(Buffer::from([0b11111101, 0b00000010]))
+ .null_bit_buffer(Some(Buffer::from([0b11111101, 0b00000010])))
.build()
.unwrap();
@@ -345,7 +345,7 @@ mod tests {
.len(4)
.add_buffer(offsets)
.add_child_data(l2)
- .null_bit_buffer(Buffer::from([0b00001101]))
+ .null_bit_buffer(Some(Buffer::from([0b00001101])))
.build()
.unwrap();
diff --git a/parquet/src/arrow/array_reader/map_array.rs b/parquet/src/arrow/array_reader/map_array.rs
index 2c9f037ab..efeafe201 100644
--- a/parquet/src/arrow/array_reader/map_array.rs
+++ b/parquet/src/arrow/array_reader/map_array.rs
@@ -141,7 +141,7 @@ impl ArrayReader for MapArrayReader {
let array_data = ArrayDataBuilder::new(self.data_type.clone())
.len(entry_len)
.add_buffer(value_offsets)
- .null_bit_buffer(null_buf.into())
+ .null_bit_buffer(Some(null_buf.into()))
.add_child_data(entry_data);
let array_data = unsafe { array_data.build_unchecked() };
diff --git a/parquet/src/arrow/array_reader/offset_buffer.rs b/parquet/src/arrow/array_reader/offset_buffer.rs
index dc35f9558..23e7af759 100644
--- a/parquet/src/arrow/array_reader/offset_buffer.rs
+++ b/parquet/src/arrow/array_reader/offset_buffer.rs
@@ -131,14 +131,11 @@ impl<I: OffsetSizeTrait + ScalarValue> OffsetBuffer<I> {
null_buffer: Option<Buffer>,
data_type: ArrowType,
) -> ArrayRef {
- let mut array_data_builder = ArrayDataBuilder::new(data_type)
+ let array_data_builder = ArrayDataBuilder::new(data_type)
.len(self.len())
.add_buffer(self.offsets.into())
- .add_buffer(self.values.into());
-
- if let Some(buffer) = null_buffer {
- array_data_builder = array_data_builder.null_bit_buffer(buffer);
- }
+ .add_buffer(self.values.into())
+ .null_bit_buffer(null_buffer);
let data = match cfg!(debug_assertions) {
true => array_data_builder.build().unwrap(),
diff --git a/parquet/src/arrow/arrow_writer.rs b/parquet/src/arrow/arrow_writer.rs
index e1fd93a9b..a5162045b 100644
--- a/parquet/src/arrow/arrow_writer.rs
+++ b/parquet/src/arrow/arrow_writer.rs
@@ -813,7 +813,7 @@ mod tests {
.len(5)
.add_buffer(a_value_offsets)
.add_child_data(a_values.data().clone())
- .null_bit_buffer(Buffer::from(vec![0b00011011]))
+ .null_bit_buffer(Some(Buffer::from(vec![0b00011011])))
.build()
.unwrap();
let a = ListArray::from(a_list_data);
@@ -976,7 +976,7 @@ mod tests {
.len(5)
.add_buffer(g_value_offsets)
.add_child_data(g_value.data().clone())
- .null_bit_buffer(Buffer::from(vec![0b00011011]))
+ .null_bit_buffer(Some(Buffer::from(vec![0b00011011])))
.build()
.unwrap();
let h = ListArray::from(h_list_data);
@@ -1083,14 +1083,14 @@ mod tests {
let c = Int32Array::from(vec![Some(1), None, Some(3), None, None, Some(6)]);
let b_data = ArrayDataBuilder::new(field_b.data_type().clone())
.len(6)
- .null_bit_buffer(Buffer::from(vec![0b00100111]))
+ .null_bit_buffer(Some(Buffer::from(vec![0b00100111])))
.add_child_data(c.data().clone())
.build()
.unwrap();
let b = StructArray::from(b_data);
let a_data = ArrayDataBuilder::new(field_a.data_type().clone())
.len(6)
- .null_bit_buffer(Buffer::from(vec![0b00101111]))
+ .null_bit_buffer(Some(Buffer::from(vec![0b00101111])))
.add_child_data(b.data().clone())
.build()
.unwrap();
@@ -1149,7 +1149,7 @@ mod tests {
let c = Int32Array::from(vec![1, 2, 3, 4, 5, 6]);
let b_data = ArrayDataBuilder::new(field_b.data_type().clone())
.len(6)
- .null_bit_buffer(Buffer::from(vec![0b00100111]))
+ .null_bit_buffer(Some(Buffer::from(vec![0b00100111])))
.add_child_data(c.data().clone())
.build()
.unwrap();
@@ -1532,7 +1532,7 @@ mod tests {
))))
.len(3)
.add_buffer(a_value_offsets)
- .null_bit_buffer(Buffer::from(vec![0b00000101]))
+ .null_bit_buffer(Some(Buffer::from(vec![0b00000101])))
.add_child_data(a_values.data().clone())
.build()
.unwrap();
@@ -1563,7 +1563,7 @@ mod tests {
))))
.len(5)
.add_buffer(a_value_offsets)
- .null_bit_buffer(Buffer::from(vec![0b00011011]))
+ .null_bit_buffer(Some(Buffer::from(vec![0b00011011])))
.add_child_data(a_values.data().clone())
.build()
.unwrap();
@@ -1589,7 +1589,7 @@ mod tests {
.len(5)
.add_buffer(a_value_offsets)
.add_child_data(a_values.data().clone())
- .null_bit_buffer(Buffer::from(vec![0b00011011]))
+ .null_bit_buffer(Some(Buffer::from(vec![0b00011011])))
.build()
.unwrap();
@@ -1986,7 +1986,9 @@ mod tests {
.add_buffer(Buffer::from_iter(vec![
0_i32, 1_i32, 1_i32, 3_i32, 3_i32, 5_i32,
]))
- .null_bit_buffer(Buffer::from_iter(vec![true, false, true, false, true]))
+ .null_bit_buffer(Some(Buffer::from_iter(vec![
+ true, false, true, false, true,
+ ])))
.child_data(vec![struct_a_array.data().clone()])
.build()
.unwrap();
diff --git a/parquet/src/arrow/levels.rs b/parquet/src/arrow/levels.rs
index be9a5e993..9dcb00830 100644
--- a/parquet/src/arrow/levels.rs
+++ b/parquet/src/arrow/levels.rs
@@ -1317,7 +1317,7 @@ mod tests {
let a_list_data = ArrayData::builder(a_list_type.clone())
.len(5)
.add_buffer(a_value_offsets)
- .null_bit_buffer(Buffer::from(vec![0b00011011]))
+ .null_bit_buffer(Some(Buffer::from(vec![0b00011011])))
.add_child_data(a_values.data().clone())
.build()
.unwrap();