You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by tu...@apache.org on 2022/11/23 18:48:59 UTC

[arrow-rs] branch master updated: Return slice from GenericByteArray::value_data (#3171)

This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new f749e1d9f Return slice from GenericByteArray::value_data (#3171)
f749e1d9f is described below

commit f749e1d9f19a5da9249b8e1d2429b10acde97805
Author: Raphael Taylor-Davies <17...@users.noreply.github.com>
AuthorDate: Wed Nov 23 18:48:53 2022 +0000

    Return slice from GenericByteArray::value_data (#3171)
---
 arrow-array/src/array/byte_array.rs          |  8 ++++----
 arrow-cast/src/cast.rs                       | 23 +++++++++--------------
 arrow/src/compute/kernels/concat_elements.rs | 13 +++----------
 arrow/src/compute/kernels/substring.rs       |  6 ++----
 4 files changed, 18 insertions(+), 32 deletions(-)

diff --git a/arrow-array/src/array/byte_array.rs b/arrow-array/src/array/byte_array.rs
index 8dd206bd2..8c2616624 100644
--- a/arrow-array/src/array/byte_array.rs
+++ b/arrow-array/src/array/byte_array.rs
@@ -21,7 +21,7 @@ use crate::raw_pointer::RawPtrBox;
 use crate::types::bytes::ByteArrayNativeType;
 use crate::types::ByteArrayType;
 use crate::{Array, ArrayAccessor, OffsetSizeTrait};
-use arrow_buffer::{ArrowNativeType, Buffer};
+use arrow_buffer::ArrowNativeType;
 use arrow_data::ArrayData;
 use arrow_schema::DataType;
 use std::any::Any;
@@ -55,9 +55,9 @@ impl<T: ByteArrayType> GenericByteArray<T> {
         offsets[i + 1] - offsets[i]
     }
 
-    /// Returns a clone of the value data buffer
-    pub fn value_data(&self) -> Buffer {
-        self.data.buffers()[1].clone()
+    /// Returns the raw value data
+    pub fn value_data(&self) -> &[u8] {
+        self.data.buffers()[1].as_slice()
     }
 
     /// Returns the offset values in the offsets buffer
diff --git a/arrow-cast/src/cast.rs b/arrow-cast/src/cast.rs
index 61be2171b..3f1775825 100644
--- a/arrow-cast/src/cast.rs
+++ b/arrow-cast/src/cast.rs
@@ -3402,14 +3402,13 @@ where
     OffsetSizeFrom: OffsetSizeTrait + ToPrimitive,
     OffsetSizeTo: OffsetSizeTrait + NumCast + ArrowNativeType,
 {
-    let str_array = array
-        .as_any()
-        .downcast_ref::<GenericStringArray<OffsetSizeFrom>>()
-        .unwrap();
-    let list_data = array.data();
-    let str_values_buf = str_array.value_data();
-
-    let offsets = list_data.buffers()[0].typed_data::<OffsetSizeFrom>();
+    let data = array.data();
+    assert_eq!(
+        data.data_type(),
+        &GenericStringArray::<OffsetSizeFrom>::DATA_TYPE
+    );
+    let str_values_buf = data.buffers()[1].clone();
+    let offsets = data.buffers()[0].typed_data::<OffsetSizeFrom>();
 
     let mut offset_builder = BufferBuilder::<OffsetSizeTo>::new(offsets.len());
     offsets
@@ -3426,18 +3425,14 @@ where
 
     let offset_buffer = offset_builder.finish();
 
-    let dtype = if matches!(std::mem::size_of::<OffsetSizeTo>(), 8) {
-        DataType::LargeUtf8
-    } else {
-        DataType::Utf8
-    };
+    let dtype = GenericStringArray::<OffsetSizeTo>::DATA_TYPE;
 
     let builder = ArrayData::builder(dtype)
         .offset(array.offset())
         .len(array.len())
         .add_buffer(offset_buffer)
         .add_buffer(str_values_buf)
-        .null_bit_buffer(list_data.null_buffer().cloned());
+        .null_bit_buffer(data.null_buffer().cloned());
 
     let array_data = unsafe { builder.build_unchecked() };
 
diff --git a/arrow/src/compute/kernels/concat_elements.rs b/arrow/src/compute/kernels/concat_elements.rs
index 1c0a0925d..a908ba9ab 100644
--- a/arrow/src/compute/kernels/concat_elements.rs
+++ b/arrow/src/compute/kernels/concat_elements.rs
@@ -50,10 +50,8 @@ pub fn concat_elements_utf8<Offset: OffsetSizeTrait>(
     let left_offsets = left.value_offsets();
     let right_offsets = right.value_offsets();
 
-    let left_buffer = left.value_data();
-    let right_buffer = right.value_data();
-    let left_values = left_buffer.as_slice();
-    let right_values = right_buffer.as_slice();
+    let left_values = left.value_data();
+    let right_values = right.value_data();
 
     let mut output_values = BufferBuilder::<u8>::new(
         left_values.len() + right_values.len()
@@ -115,16 +113,11 @@ pub fn concat_elements_utf8_many<Offset: OffsetSizeTrait>(
         size,
     )?;
 
-    let data_buffers = arrays
+    let data_values = arrays
         .iter()
         .map(|array| array.value_data())
         .collect::<Vec<_>>();
 
-    let data_values = data_buffers
-        .iter()
-        .map(|buffer| buffer.as_slice())
-        .collect::<Vec<_>>();
-
     let mut offsets = arrays
         .iter()
         .map(|a| a.value_offsets().iter().peekable())
diff --git a/arrow/src/compute/kernels/substring.rs b/arrow/src/compute/kernels/substring.rs
index 76568ae0d..23cb2c19f 100644
--- a/arrow/src/compute/kernels/substring.rs
+++ b/arrow/src/compute/kernels/substring.rs
@@ -253,8 +253,7 @@ fn binary_substring<OffsetSize: OffsetSizeTrait>(
     length: Option<OffsetSize>,
 ) -> Result<ArrayRef> {
     let offsets = array.value_offsets();
-    let values = array.value_data();
-    let data = values.as_slice();
+    let data = array.value_data();
     let zero = OffsetSize::zero();
 
     // start and end offsets of all substrings
@@ -364,8 +363,7 @@ fn utf8_substring<OffsetSize: OffsetSizeTrait>(
     length: Option<OffsetSize>,
 ) -> Result<ArrayRef> {
     let offsets = array.value_offsets();
-    let values = array.value_data();
-    let data = values.as_slice();
+    let data = array.value_data();
     let zero = OffsetSize::zero();
 
     // Check if `offset` is at a valid char boundary.