You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by al...@apache.org on 2021/08/08 10:35:43 UTC

[arrow-rs] branch active_release updated: Remove undefined behavior in `value` method of boolean and primitive arrays (#644) (#668)

This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch active_release
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/active_release by this push:
     new ead64b7  Remove undefined behavior in `value` method of boolean and primitive arrays (#644) (#668)
ead64b7 is described below

commit ead64b7a2fb6c4e0e3c05c7e27aa2c043882f7c3
Author: Andrew Lamb <an...@nerdnetworks.org>
AuthorDate: Sun Aug 8 06:35:38 2021 -0400

    Remove undefined behavior in `value` method of boolean and primitive arrays (#644) (#668)
    
    * Remove UB in `value`
    
    * Add safety note
    
    Co-authored-by: Daniƫl Heres <da...@gmail.com>
---
 arrow/src/array/array_boolean.rs        |  6 ++++--
 arrow/src/array/array_primitive.rs      |  6 ++----
 arrow/src/array/array_string.rs         | 25 +++++--------------------
 arrow/src/compute/kernels/comparison.rs | 11 ++++++++---
 4 files changed, 19 insertions(+), 29 deletions(-)

diff --git a/arrow/src/array/array_boolean.rs b/arrow/src/array/array_boolean.rs
index 5357614..9274e65 100644
--- a/arrow/src/array/array_boolean.rs
+++ b/arrow/src/array/array_boolean.rs
@@ -115,9 +115,11 @@ impl BooleanArray {
 
     /// Returns the boolean value at index `i`.
     ///
-    /// Note this doesn't do any bound checking, for performance reason.
+    /// Panics of offset `i` is out of bounds
     pub fn value(&self, i: usize) -> bool {
-        debug_assert!(i < self.len());
+        assert!(i < self.len());
+        // Safety:
+        // `i < self.len()
         unsafe { self.value_unchecked(i) }
     }
 }
diff --git a/arrow/src/array/array_primitive.rs b/arrow/src/array/array_primitive.rs
index 0765629..9c14f88 100644
--- a/arrow/src/array/array_primitive.rs
+++ b/arrow/src/array/array_primitive.rs
@@ -101,12 +101,10 @@ impl<T: ArrowPrimitiveType> PrimitiveArray<T> {
 
     /// Returns the primitive value at index `i`.
     ///
-    /// Note this doesn't do any bound checking, for performance reason.
-    /// # Safety
-    /// caller must ensure that the passed in offset is less than the array len()
+    /// Panics of offset `i` is out of bounds
     #[inline]
     pub fn value(&self, i: usize) -> T::Native {
-        debug_assert!(i < self.len());
+        assert!(i < self.len());
         unsafe { self.value_unchecked(i) }
     }
 
diff --git a/arrow/src/array/array_string.rs b/arrow/src/array/array_string.rs
index 0b48e57..2fa4c48 100644
--- a/arrow/src/array/array_string.rs
+++ b/arrow/src/array/array_string.rs
@@ -81,6 +81,7 @@ impl<OffsetSize: StringOffsetSizeTrait> GenericStringArray<OffsetSize> {
     /// Returns the element at index
     /// # Safety
     /// caller is responsible for ensuring that index is within the array bounds
+    #[inline]
     pub unsafe fn value_unchecked(&self, i: usize) -> &str {
         let end = self.value_offsets().get_unchecked(i + 1);
         let start = self.value_offsets().get_unchecked(i);
@@ -103,28 +104,12 @@ impl<OffsetSize: StringOffsetSizeTrait> GenericStringArray<OffsetSize> {
     }
 
     /// Returns the element at index `i` as &str
+    #[inline]
     pub fn value(&self, i: usize) -> &str {
         assert!(i < self.data.len(), "StringArray out of bounds access");
-        //Soundness: length checked above, offset buffer length is 1 larger than logical array length
-        let end = unsafe { self.value_offsets().get_unchecked(i + 1) };
-        let start = unsafe { self.value_offsets().get_unchecked(i) };
-
-        // Soundness
-        // pointer alignment & location is ensured by RawPtrBox
-        // buffer bounds/offset is ensured by the value_offset invariants
-        // ISSUE: utf-8 well formedness is not checked
-        unsafe {
-            // Safety of `to_isize().unwrap()`
-            // `start` and `end` are &OffsetSize, which is a generic type that implements the
-            // OffsetSizeTrait. Currently, only i32 and i64 implement OffsetSizeTrait,
-            // both of which should cleanly cast to isize on an architecture that supports
-            // 32/64-bit offsets
-            let slice = std::slice::from_raw_parts(
-                self.value_data.as_ptr().offset(start.to_isize().unwrap()),
-                (*end - *start).to_usize().unwrap(),
-            );
-            std::str::from_utf8_unchecked(slice)
-        }
+        // Safety:
+        // `i < self.data.len()
+        unsafe { self.value_unchecked(i) }
     }
 
     fn from_list(v: GenericListArray<OffsetSize>) -> Self {
diff --git a/arrow/src/compute/kernels/comparison.rs b/arrow/src/compute/kernels/comparison.rs
index f54d305..a899d5b 100644
--- a/arrow/src/compute/kernels/comparison.rs
+++ b/arrow/src/compute/kernels/comparison.rs
@@ -46,7 +46,10 @@ macro_rules! compare_op {
         let null_bit_buffer =
             combine_option_bitmap($left.data_ref(), $right.data_ref(), $left.len())?;
 
-        let comparison = (0..$left.len()).map(|i| $op($left.value(i), $right.value(i)));
+        // Safety:
+        // `i < $left.len()` and $left.len() == $right.len()
+        let comparison = (0..$left.len())
+            .map(|i| unsafe { $op($left.value_unchecked(i), $right.value_unchecked(i)) });
         // same size as $left.len() and $right.len()
         let buffer = unsafe { MutableBuffer::from_trusted_len_iter_bool(comparison) };
 
@@ -121,8 +124,10 @@ macro_rules! compare_op_primitive {
 macro_rules! compare_op_scalar {
     ($left: expr, $right:expr, $op:expr) => {{
         let null_bit_buffer = $left.data().null_buffer().cloned();
-
-        let comparison = (0..$left.len()).map(|i| $op($left.value(i), $right));
+        // Safety:
+        // `i < $left.len()`
+        let comparison =
+            (0..$left.len()).map(|i| unsafe { $op($left.value_unchecked(i), $right) });
         // same as $left.len()
         let buffer = unsafe { MutableBuffer::from_trusted_len_iter_bool(comparison) };