You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by al...@apache.org on 2021/08/08 10:35:43 UTC
[arrow-rs] branch active_release updated: Remove undefined behavior
in `value` method of boolean and primitive arrays (#644) (#668)
This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch active_release
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/active_release by this push:
new ead64b7 Remove undefined behavior in `value` method of boolean and primitive arrays (#644) (#668)
ead64b7 is described below
commit ead64b7a2fb6c4e0e3c05c7e27aa2c043882f7c3
Author: Andrew Lamb <an...@nerdnetworks.org>
AuthorDate: Sun Aug 8 06:35:38 2021 -0400
Remove undefined behavior in `value` method of boolean and primitive arrays (#644) (#668)
* Remove UB in `value`
* Add safety note
Co-authored-by: Daniƫl Heres <da...@gmail.com>
---
arrow/src/array/array_boolean.rs | 6 ++++--
arrow/src/array/array_primitive.rs | 6 ++----
arrow/src/array/array_string.rs | 25 +++++--------------------
arrow/src/compute/kernels/comparison.rs | 11 ++++++++---
4 files changed, 19 insertions(+), 29 deletions(-)
diff --git a/arrow/src/array/array_boolean.rs b/arrow/src/array/array_boolean.rs
index 5357614..9274e65 100644
--- a/arrow/src/array/array_boolean.rs
+++ b/arrow/src/array/array_boolean.rs
@@ -115,9 +115,11 @@ impl BooleanArray {
/// Returns the boolean value at index `i`.
///
- /// Note this doesn't do any bound checking, for performance reason.
+ /// Panics of offset `i` is out of bounds
pub fn value(&self, i: usize) -> bool {
- debug_assert!(i < self.len());
+ assert!(i < self.len());
+ // Safety:
+ // `i < self.len()
unsafe { self.value_unchecked(i) }
}
}
diff --git a/arrow/src/array/array_primitive.rs b/arrow/src/array/array_primitive.rs
index 0765629..9c14f88 100644
--- a/arrow/src/array/array_primitive.rs
+++ b/arrow/src/array/array_primitive.rs
@@ -101,12 +101,10 @@ impl<T: ArrowPrimitiveType> PrimitiveArray<T> {
/// Returns the primitive value at index `i`.
///
- /// Note this doesn't do any bound checking, for performance reason.
- /// # Safety
- /// caller must ensure that the passed in offset is less than the array len()
+ /// Panics of offset `i` is out of bounds
#[inline]
pub fn value(&self, i: usize) -> T::Native {
- debug_assert!(i < self.len());
+ assert!(i < self.len());
unsafe { self.value_unchecked(i) }
}
diff --git a/arrow/src/array/array_string.rs b/arrow/src/array/array_string.rs
index 0b48e57..2fa4c48 100644
--- a/arrow/src/array/array_string.rs
+++ b/arrow/src/array/array_string.rs
@@ -81,6 +81,7 @@ impl<OffsetSize: StringOffsetSizeTrait> GenericStringArray<OffsetSize> {
/// Returns the element at index
/// # Safety
/// caller is responsible for ensuring that index is within the array bounds
+ #[inline]
pub unsafe fn value_unchecked(&self, i: usize) -> &str {
let end = self.value_offsets().get_unchecked(i + 1);
let start = self.value_offsets().get_unchecked(i);
@@ -103,28 +104,12 @@ impl<OffsetSize: StringOffsetSizeTrait> GenericStringArray<OffsetSize> {
}
/// Returns the element at index `i` as &str
+ #[inline]
pub fn value(&self, i: usize) -> &str {
assert!(i < self.data.len(), "StringArray out of bounds access");
- //Soundness: length checked above, offset buffer length is 1 larger than logical array length
- let end = unsafe { self.value_offsets().get_unchecked(i + 1) };
- let start = unsafe { self.value_offsets().get_unchecked(i) };
-
- // Soundness
- // pointer alignment & location is ensured by RawPtrBox
- // buffer bounds/offset is ensured by the value_offset invariants
- // ISSUE: utf-8 well formedness is not checked
- unsafe {
- // Safety of `to_isize().unwrap()`
- // `start` and `end` are &OffsetSize, which is a generic type that implements the
- // OffsetSizeTrait. Currently, only i32 and i64 implement OffsetSizeTrait,
- // both of which should cleanly cast to isize on an architecture that supports
- // 32/64-bit offsets
- let slice = std::slice::from_raw_parts(
- self.value_data.as_ptr().offset(start.to_isize().unwrap()),
- (*end - *start).to_usize().unwrap(),
- );
- std::str::from_utf8_unchecked(slice)
- }
+ // Safety:
+ // `i < self.data.len()
+ unsafe { self.value_unchecked(i) }
}
fn from_list(v: GenericListArray<OffsetSize>) -> Self {
diff --git a/arrow/src/compute/kernels/comparison.rs b/arrow/src/compute/kernels/comparison.rs
index f54d305..a899d5b 100644
--- a/arrow/src/compute/kernels/comparison.rs
+++ b/arrow/src/compute/kernels/comparison.rs
@@ -46,7 +46,10 @@ macro_rules! compare_op {
let null_bit_buffer =
combine_option_bitmap($left.data_ref(), $right.data_ref(), $left.len())?;
- let comparison = (0..$left.len()).map(|i| $op($left.value(i), $right.value(i)));
+ // Safety:
+ // `i < $left.len()` and $left.len() == $right.len()
+ let comparison = (0..$left.len())
+ .map(|i| unsafe { $op($left.value_unchecked(i), $right.value_unchecked(i)) });
// same size as $left.len() and $right.len()
let buffer = unsafe { MutableBuffer::from_trusted_len_iter_bool(comparison) };
@@ -121,8 +124,10 @@ macro_rules! compare_op_primitive {
macro_rules! compare_op_scalar {
($left: expr, $right:expr, $op:expr) => {{
let null_bit_buffer = $left.data().null_buffer().cloned();
-
- let comparison = (0..$left.len()).map(|i| $op($left.value(i), $right));
+ // Safety:
+ // `i < $left.len()`
+ let comparison =
+ (0..$left.len()).map(|i| unsafe { $op($left.value_unchecked(i), $right) });
// same as $left.len()
let buffer = unsafe { MutableBuffer::from_trusted_len_iter_bool(comparison) };