You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by tu...@apache.org on 2023/06/01 14:30:25 UTC
[arrow-rs] branch master updated: Skip unnecessary null checks in MutableArrayData (#4333)
This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new f2610515e Skip unnecessary null checks in MutableArrayData (#4333)
f2610515e is described below
commit f2610515e03a72afbe8c017683867ee9f921fffa
Author: Raphael Taylor-Davies <17...@users.noreply.github.com>
AuthorDate: Thu Jun 1 15:30:19 2023 +0100
Skip unnecessary null checks in MutableArrayData (#4333)
---
arrow-data/src/transform/fixed_binary.rs | 32 +++---------
arrow-data/src/transform/fixed_size_list.rs | 40 +++-----------
arrow-data/src/transform/list.rs | 81 ++++++++---------------------
arrow-data/src/transform/structure.rs | 44 ++++------------
arrow-data/src/transform/variable_size.rs | 61 ++++++----------------
5 files changed, 59 insertions(+), 199 deletions(-)
diff --git a/arrow-data/src/transform/fixed_binary.rs b/arrow-data/src/transform/fixed_binary.rs
index a20901014..44c6f46eb 100644
--- a/arrow-data/src/transform/fixed_binary.rs
+++ b/arrow-data/src/transform/fixed_binary.rs
@@ -26,32 +26,12 @@ pub(super) fn build_extend(array: &ArrayData) -> Extend {
};
let values = &array.buffers()[0].as_slice()[array.offset() * size..];
- if array.null_count() == 0 {
- // fast case where we can copy regions without null issues
- Box::new(
- move |mutable: &mut _MutableArrayData, _, start: usize, len: usize| {
- let buffer = &mut mutable.buffer1;
- buffer.extend_from_slice(&values[start * size..(start + len) * size]);
- },
- )
- } else {
- Box::new(
- move |mutable: &mut _MutableArrayData, _, start: usize, len: usize| {
- // nulls present: append item by item, ignoring null entries
- let values_buffer = &mut mutable.buffer1;
-
- (start..start + len).for_each(|i| {
- if array.is_valid(i) {
- // append value
- let bytes = &values[i * size..(i + 1) * size];
- values_buffer.extend_from_slice(bytes);
- } else {
- values_buffer.extend_zeros(size);
- }
- })
- },
- )
- }
+ Box::new(
+ move |mutable: &mut _MutableArrayData, _, start: usize, len: usize| {
+ let buffer = &mut mutable.buffer1;
+ buffer.extend_from_slice(&values[start * size..(start + len) * size]);
+ },
+ )
}
pub(super) fn extend_nulls(mutable: &mut _MutableArrayData, len: usize) {
diff --git a/arrow-data/src/transform/fixed_size_list.rs b/arrow-data/src/transform/fixed_size_list.rs
index ad369c2be..8eef7bce9 100644
--- a/arrow-data/src/transform/fixed_size_list.rs
+++ b/arrow-data/src/transform/fixed_size_list.rs
@@ -26,38 +26,14 @@ pub(super) fn build_extend(array: &ArrayData) -> Extend {
_ => unreachable!(),
};
- if array.null_count() == 0 {
- Box::new(
- move |mutable: &mut _MutableArrayData,
- index: usize,
- start: usize,
- len: usize| {
- mutable.child_data.iter_mut().for_each(|child| {
- child.extend(index, start * size, (start + len) * size)
- })
- },
- )
- } else {
- Box::new(
- move |mutable: &mut _MutableArrayData,
- index: usize,
- start: usize,
- len: usize| {
- (start..start + len).for_each(|i| {
- if array.is_valid(i) {
- mutable.child_data.iter_mut().for_each(|child| {
- child.extend(index, i * size, (i + 1) * size)
- })
- } else {
- mutable
- .child_data
- .iter_mut()
- .for_each(|child| child.extend_nulls(size))
- }
- })
- },
- )
- }
+ Box::new(
+ move |mutable: &mut _MutableArrayData, index: usize, start: usize, len: usize| {
+ mutable
+ .child_data
+ .iter_mut()
+ .for_each(|child| child.extend(index, start * size, (start + len) * size))
+ },
+ )
}
pub(super) fn extend_nulls(mutable: &mut _MutableArrayData, len: usize) {
diff --git a/arrow-data/src/transform/list.rs b/arrow-data/src/transform/list.rs
index 76a845958..9d5d8330c 100644
--- a/arrow-data/src/transform/list.rs
+++ b/arrow-data/src/transform/list.rs
@@ -27,66 +27,27 @@ pub(super) fn build_extend<T: ArrowNativeType + Integer + CheckedAdd>(
array: &ArrayData,
) -> Extend {
let offsets = array.buffer::<T>(0);
- if array.null_count() == 0 {
- // fast case where we can copy regions without nullability checks
- Box::new(
- move |mutable: &mut _MutableArrayData,
- index: usize,
- start: usize,
- len: usize| {
- let offset_buffer = &mut mutable.buffer1;
-
- // this is safe due to how offset is built. See details on `get_last_offset`
- let last_offset: T = unsafe { get_last_offset(offset_buffer) };
-
- // offsets
- extend_offsets::<T>(
- offset_buffer,
- last_offset,
- &offsets[start..start + len + 1],
- );
-
- mutable.child_data[0].extend(
- index,
- offsets[start].as_usize(),
- offsets[start + len].as_usize(),
- )
- },
- )
- } else {
- // nulls present: append item by item, ignoring null entries
- Box::new(
- move |mutable: &mut _MutableArrayData,
- index: usize,
- start: usize,
- len: usize| {
- let offset_buffer = &mut mutable.buffer1;
-
- // this is safe due to how offset is built. See details on `get_last_offset`
- let mut last_offset: T = unsafe { get_last_offset(offset_buffer) };
-
- let delta_len = array.len() - array.null_count();
- offset_buffer.reserve(delta_len * std::mem::size_of::<T>());
-
- let child = &mut mutable.child_data[0];
- (start..start + len).for_each(|i| {
- if array.is_valid(i) {
- // compute the new offset
- last_offset = last_offset + offsets[i + 1] - offsets[i];
-
- // append value
- child.extend(
- index,
- offsets[i].as_usize(),
- offsets[i + 1].as_usize(),
- );
- }
- // append offset
- offset_buffer.push(last_offset);
- })
- },
- )
- }
+ Box::new(
+ move |mutable: &mut _MutableArrayData, index: usize, start: usize, len: usize| {
+ let offset_buffer = &mut mutable.buffer1;
+
+ // this is safe due to how offset is built. See details on `get_last_offset`
+ let last_offset: T = unsafe { get_last_offset(offset_buffer) };
+
+ // offsets
+ extend_offsets::<T>(
+ offset_buffer,
+ last_offset,
+ &offsets[start..start + len + 1],
+ );
+
+ mutable.child_data[0].extend(
+ index,
+ offsets[start].as_usize(),
+ offsets[start + len].as_usize(),
+ )
+ },
+ )
}
pub(super) fn extend_nulls<T: ArrowNativeType>(
diff --git a/arrow-data/src/transform/structure.rs b/arrow-data/src/transform/structure.rs
index c6841da4d..7330dcaa3 100644
--- a/arrow-data/src/transform/structure.rs
+++ b/arrow-data/src/transform/structure.rs
@@ -18,41 +18,15 @@
use super::{Extend, _MutableArrayData};
use crate::ArrayData;
-pub(super) fn build_extend(array: &ArrayData) -> Extend {
- if array.null_count() == 0 {
- Box::new(
- move |mutable: &mut _MutableArrayData,
- index: usize,
- start: usize,
- len: usize| {
- mutable
- .child_data
- .iter_mut()
- .for_each(|child| child.extend(index, start, start + len))
- },
- )
- } else {
- Box::new(
- move |mutable: &mut _MutableArrayData,
- index: usize,
- start: usize,
- len: usize| {
- (start..start + len).for_each(|i| {
- if array.is_valid(i) {
- mutable
- .child_data
- .iter_mut()
- .for_each(|child| child.extend(index, i, i + 1))
- } else {
- mutable
- .child_data
- .iter_mut()
- .for_each(|child| child.extend_nulls(1))
- }
- })
- },
- )
- }
+pub(super) fn build_extend(_: &ArrayData) -> Extend {
+ Box::new(
+ move |mutable: &mut _MutableArrayData, index: usize, start: usize, len: usize| {
+ mutable
+ .child_data
+ .iter_mut()
+ .for_each(|child| child.extend(index, start, start + len))
+ },
+ )
}
pub(super) fn extend_nulls(mutable: &mut _MutableArrayData, len: usize) {
diff --git a/arrow-data/src/transform/variable_size.rs b/arrow-data/src/transform/variable_size.rs
index ce62459ae..597a8b2b6 100644
--- a/arrow-data/src/transform/variable_size.rs
+++ b/arrow-data/src/transform/variable_size.rs
@@ -46,54 +46,23 @@ pub(super) fn build_extend<
) -> Extend {
let offsets = array.buffer::<T>(0);
let values = array.buffers()[1].as_slice();
- if array.null_count() == 0 {
- // fast case where we can copy regions without null issues
- Box::new(
- move |mutable: &mut _MutableArrayData, _, start: usize, len: usize| {
- let offset_buffer = &mut mutable.buffer1;
- let values_buffer = &mut mutable.buffer2;
+ Box::new(
+ move |mutable: &mut _MutableArrayData, _, start: usize, len: usize| {
+ let offset_buffer = &mut mutable.buffer1;
+ let values_buffer = &mut mutable.buffer2;
- // this is safe due to how offset is built. See details on `get_last_offset`
- let last_offset = unsafe { get_last_offset(offset_buffer) };
+ // this is safe due to how offset is built. See details on `get_last_offset`
+ let last_offset = unsafe { get_last_offset(offset_buffer) };
- extend_offsets::<T>(
- offset_buffer,
- last_offset,
- &offsets[start..start + len + 1],
- );
- // values
- extend_offset_values::<T>(values_buffer, offsets, values, start, len);
- },
- )
- } else {
- Box::new(
- move |mutable: &mut _MutableArrayData, _, start: usize, len: usize| {
- let offset_buffer = &mut mutable.buffer1;
- let values_buffer = &mut mutable.buffer2;
-
- // this is safe due to how offset is built. See details on `get_last_offset`
- let mut last_offset: T = unsafe { get_last_offset(offset_buffer) };
-
- // nulls present: append item by item, ignoring null entries
- offset_buffer.reserve(len * std::mem::size_of::<T>());
-
- (start..start + len).for_each(|i| {
- if array.is_valid(i) {
- // compute the new offset
- let length = offsets[i + 1] - offsets[i];
- last_offset = last_offset + length;
-
- // append value
- let bytes = &values[offsets[i].to_usize().unwrap()
- ..offsets[i + 1].to_usize().unwrap()];
- values_buffer.extend_from_slice(bytes);
- }
- // offsets are always present
- offset_buffer.push(last_offset);
- })
- },
- )
- }
+ extend_offsets::<T>(
+ offset_buffer,
+ last_offset,
+ &offsets[start..start + len + 1],
+ );
+ // values
+ extend_offset_values::<T>(values_buffer, offsets, values, start, len);
+ },
+ )
}
pub(super) fn extend_nulls<T: ArrowNativeType>(