You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by tu...@apache.org on 2023/06/01 14:30:25 UTC

[arrow-rs] branch master updated: Skip unnecessary null checks in MutableArrayData (#4333)

This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new f2610515e Skip unnecessary null checks in MutableArrayData (#4333)
f2610515e is described below

commit f2610515e03a72afbe8c017683867ee9f921fffa
Author: Raphael Taylor-Davies <17...@users.noreply.github.com>
AuthorDate: Thu Jun 1 15:30:19 2023 +0100

    Skip unnecessary null checks in MutableArrayData (#4333)
---
 arrow-data/src/transform/fixed_binary.rs    | 32 +++---------
 arrow-data/src/transform/fixed_size_list.rs | 40 +++-----------
 arrow-data/src/transform/list.rs            | 81 ++++++++---------------------
 arrow-data/src/transform/structure.rs       | 44 ++++------------
 arrow-data/src/transform/variable_size.rs   | 61 ++++++----------------
 5 files changed, 59 insertions(+), 199 deletions(-)

diff --git a/arrow-data/src/transform/fixed_binary.rs b/arrow-data/src/transform/fixed_binary.rs
index a20901014..44c6f46eb 100644
--- a/arrow-data/src/transform/fixed_binary.rs
+++ b/arrow-data/src/transform/fixed_binary.rs
@@ -26,32 +26,12 @@ pub(super) fn build_extend(array: &ArrayData) -> Extend {
     };
 
     let values = &array.buffers()[0].as_slice()[array.offset() * size..];
-    if array.null_count() == 0 {
-        // fast case where we can copy regions without null issues
-        Box::new(
-            move |mutable: &mut _MutableArrayData, _, start: usize, len: usize| {
-                let buffer = &mut mutable.buffer1;
-                buffer.extend_from_slice(&values[start * size..(start + len) * size]);
-            },
-        )
-    } else {
-        Box::new(
-            move |mutable: &mut _MutableArrayData, _, start: usize, len: usize| {
-                // nulls present: append item by item, ignoring null entries
-                let values_buffer = &mut mutable.buffer1;
-
-                (start..start + len).for_each(|i| {
-                    if array.is_valid(i) {
-                        // append value
-                        let bytes = &values[i * size..(i + 1) * size];
-                        values_buffer.extend_from_slice(bytes);
-                    } else {
-                        values_buffer.extend_zeros(size);
-                    }
-                })
-            },
-        )
-    }
+    Box::new(
+        move |mutable: &mut _MutableArrayData, _, start: usize, len: usize| {
+            let buffer = &mut mutable.buffer1;
+            buffer.extend_from_slice(&values[start * size..(start + len) * size]);
+        },
+    )
 }
 
 pub(super) fn extend_nulls(mutable: &mut _MutableArrayData, len: usize) {
diff --git a/arrow-data/src/transform/fixed_size_list.rs b/arrow-data/src/transform/fixed_size_list.rs
index ad369c2be..8eef7bce9 100644
--- a/arrow-data/src/transform/fixed_size_list.rs
+++ b/arrow-data/src/transform/fixed_size_list.rs
@@ -26,38 +26,14 @@ pub(super) fn build_extend(array: &ArrayData) -> Extend {
         _ => unreachable!(),
     };
 
-    if array.null_count() == 0 {
-        Box::new(
-            move |mutable: &mut _MutableArrayData,
-                  index: usize,
-                  start: usize,
-                  len: usize| {
-                mutable.child_data.iter_mut().for_each(|child| {
-                    child.extend(index, start * size, (start + len) * size)
-                })
-            },
-        )
-    } else {
-        Box::new(
-            move |mutable: &mut _MutableArrayData,
-                  index: usize,
-                  start: usize,
-                  len: usize| {
-                (start..start + len).for_each(|i| {
-                    if array.is_valid(i) {
-                        mutable.child_data.iter_mut().for_each(|child| {
-                            child.extend(index, i * size, (i + 1) * size)
-                        })
-                    } else {
-                        mutable
-                            .child_data
-                            .iter_mut()
-                            .for_each(|child| child.extend_nulls(size))
-                    }
-                })
-            },
-        )
-    }
+    Box::new(
+        move |mutable: &mut _MutableArrayData, index: usize, start: usize, len: usize| {
+            mutable
+                .child_data
+                .iter_mut()
+                .for_each(|child| child.extend(index, start * size, (start + len) * size))
+        },
+    )
 }
 
 pub(super) fn extend_nulls(mutable: &mut _MutableArrayData, len: usize) {
diff --git a/arrow-data/src/transform/list.rs b/arrow-data/src/transform/list.rs
index 76a845958..9d5d8330c 100644
--- a/arrow-data/src/transform/list.rs
+++ b/arrow-data/src/transform/list.rs
@@ -27,66 +27,27 @@ pub(super) fn build_extend<T: ArrowNativeType + Integer + CheckedAdd>(
     array: &ArrayData,
 ) -> Extend {
     let offsets = array.buffer::<T>(0);
-    if array.null_count() == 0 {
-        // fast case where we can copy regions without nullability checks
-        Box::new(
-            move |mutable: &mut _MutableArrayData,
-                  index: usize,
-                  start: usize,
-                  len: usize| {
-                let offset_buffer = &mut mutable.buffer1;
-
-                // this is safe due to how offset is built. See details on `get_last_offset`
-                let last_offset: T = unsafe { get_last_offset(offset_buffer) };
-
-                // offsets
-                extend_offsets::<T>(
-                    offset_buffer,
-                    last_offset,
-                    &offsets[start..start + len + 1],
-                );
-
-                mutable.child_data[0].extend(
-                    index,
-                    offsets[start].as_usize(),
-                    offsets[start + len].as_usize(),
-                )
-            },
-        )
-    } else {
-        // nulls present: append item by item, ignoring null entries
-        Box::new(
-            move |mutable: &mut _MutableArrayData,
-                  index: usize,
-                  start: usize,
-                  len: usize| {
-                let offset_buffer = &mut mutable.buffer1;
-
-                // this is safe due to how offset is built. See details on `get_last_offset`
-                let mut last_offset: T = unsafe { get_last_offset(offset_buffer) };
-
-                let delta_len = array.len() - array.null_count();
-                offset_buffer.reserve(delta_len * std::mem::size_of::<T>());
-
-                let child = &mut mutable.child_data[0];
-                (start..start + len).for_each(|i| {
-                    if array.is_valid(i) {
-                        // compute the new offset
-                        last_offset = last_offset + offsets[i + 1] - offsets[i];
-
-                        // append value
-                        child.extend(
-                            index,
-                            offsets[i].as_usize(),
-                            offsets[i + 1].as_usize(),
-                        );
-                    }
-                    // append offset
-                    offset_buffer.push(last_offset);
-                })
-            },
-        )
-    }
+    Box::new(
+        move |mutable: &mut _MutableArrayData, index: usize, start: usize, len: usize| {
+            let offset_buffer = &mut mutable.buffer1;
+
+            // this is safe due to how offset is built. See details on `get_last_offset`
+            let last_offset: T = unsafe { get_last_offset(offset_buffer) };
+
+            // offsets
+            extend_offsets::<T>(
+                offset_buffer,
+                last_offset,
+                &offsets[start..start + len + 1],
+            );
+
+            mutable.child_data[0].extend(
+                index,
+                offsets[start].as_usize(),
+                offsets[start + len].as_usize(),
+            )
+        },
+    )
 }
 
 pub(super) fn extend_nulls<T: ArrowNativeType>(
diff --git a/arrow-data/src/transform/structure.rs b/arrow-data/src/transform/structure.rs
index c6841da4d..7330dcaa3 100644
--- a/arrow-data/src/transform/structure.rs
+++ b/arrow-data/src/transform/structure.rs
@@ -18,41 +18,15 @@
 use super::{Extend, _MutableArrayData};
 use crate::ArrayData;
 
-pub(super) fn build_extend(array: &ArrayData) -> Extend {
-    if array.null_count() == 0 {
-        Box::new(
-            move |mutable: &mut _MutableArrayData,
-                  index: usize,
-                  start: usize,
-                  len: usize| {
-                mutable
-                    .child_data
-                    .iter_mut()
-                    .for_each(|child| child.extend(index, start, start + len))
-            },
-        )
-    } else {
-        Box::new(
-            move |mutable: &mut _MutableArrayData,
-                  index: usize,
-                  start: usize,
-                  len: usize| {
-                (start..start + len).for_each(|i| {
-                    if array.is_valid(i) {
-                        mutable
-                            .child_data
-                            .iter_mut()
-                            .for_each(|child| child.extend(index, i, i + 1))
-                    } else {
-                        mutable
-                            .child_data
-                            .iter_mut()
-                            .for_each(|child| child.extend_nulls(1))
-                    }
-                })
-            },
-        )
-    }
+pub(super) fn build_extend(_: &ArrayData) -> Extend {
+    Box::new(
+        move |mutable: &mut _MutableArrayData, index: usize, start: usize, len: usize| {
+            mutable
+                .child_data
+                .iter_mut()
+                .for_each(|child| child.extend(index, start, start + len))
+        },
+    )
 }
 
 pub(super) fn extend_nulls(mutable: &mut _MutableArrayData, len: usize) {
diff --git a/arrow-data/src/transform/variable_size.rs b/arrow-data/src/transform/variable_size.rs
index ce62459ae..597a8b2b6 100644
--- a/arrow-data/src/transform/variable_size.rs
+++ b/arrow-data/src/transform/variable_size.rs
@@ -46,54 +46,23 @@ pub(super) fn build_extend<
 ) -> Extend {
     let offsets = array.buffer::<T>(0);
     let values = array.buffers()[1].as_slice();
-    if array.null_count() == 0 {
-        // fast case where we can copy regions without null issues
-        Box::new(
-            move |mutable: &mut _MutableArrayData, _, start: usize, len: usize| {
-                let offset_buffer = &mut mutable.buffer1;
-                let values_buffer = &mut mutable.buffer2;
+    Box::new(
+        move |mutable: &mut _MutableArrayData, _, start: usize, len: usize| {
+            let offset_buffer = &mut mutable.buffer1;
+            let values_buffer = &mut mutable.buffer2;
 
-                // this is safe due to how offset is built. See details on `get_last_offset`
-                let last_offset = unsafe { get_last_offset(offset_buffer) };
+            // this is safe due to how offset is built. See details on `get_last_offset`
+            let last_offset = unsafe { get_last_offset(offset_buffer) };
 
-                extend_offsets::<T>(
-                    offset_buffer,
-                    last_offset,
-                    &offsets[start..start + len + 1],
-                );
-                // values
-                extend_offset_values::<T>(values_buffer, offsets, values, start, len);
-            },
-        )
-    } else {
-        Box::new(
-            move |mutable: &mut _MutableArrayData, _, start: usize, len: usize| {
-                let offset_buffer = &mut mutable.buffer1;
-                let values_buffer = &mut mutable.buffer2;
-
-                // this is safe due to how offset is built. See details on `get_last_offset`
-                let mut last_offset: T = unsafe { get_last_offset(offset_buffer) };
-
-                // nulls present: append item by item, ignoring null entries
-                offset_buffer.reserve(len * std::mem::size_of::<T>());
-
-                (start..start + len).for_each(|i| {
-                    if array.is_valid(i) {
-                        // compute the new offset
-                        let length = offsets[i + 1] - offsets[i];
-                        last_offset = last_offset + length;
-
-                        // append value
-                        let bytes = &values[offsets[i].to_usize().unwrap()
-                            ..offsets[i + 1].to_usize().unwrap()];
-                        values_buffer.extend_from_slice(bytes);
-                    }
-                    // offsets are always present
-                    offset_buffer.push(last_offset);
-                })
-            },
-        )
-    }
+            extend_offsets::<T>(
+                offset_buffer,
+                last_offset,
+                &offsets[start..start + len + 1],
+            );
+            // values
+            extend_offset_values::<T>(values_buffer, offsets, values, start, len);
+        },
+    )
 }
 
 pub(super) fn extend_nulls<T: ArrowNativeType>(