You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by tu...@apache.org on 2022/11/23 18:48:59 UTC
[arrow-rs] branch master updated: Return slice from GenericByteArray::value_data (#3171)
This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new f749e1d9f Return slice from GenericByteArray::value_data (#3171)
f749e1d9f is described below
commit f749e1d9f19a5da9249b8e1d2429b10acde97805
Author: Raphael Taylor-Davies <17...@users.noreply.github.com>
AuthorDate: Wed Nov 23 18:48:53 2022 +0000
Return slice from GenericByteArray::value_data (#3171)
---
arrow-array/src/array/byte_array.rs | 8 ++++----
arrow-cast/src/cast.rs | 23 +++++++++--------------
arrow/src/compute/kernels/concat_elements.rs | 13 +++----------
arrow/src/compute/kernels/substring.rs | 6 ++----
4 files changed, 18 insertions(+), 32 deletions(-)
diff --git a/arrow-array/src/array/byte_array.rs b/arrow-array/src/array/byte_array.rs
index 8dd206bd2..8c2616624 100644
--- a/arrow-array/src/array/byte_array.rs
+++ b/arrow-array/src/array/byte_array.rs
@@ -21,7 +21,7 @@ use crate::raw_pointer::RawPtrBox;
use crate::types::bytes::ByteArrayNativeType;
use crate::types::ByteArrayType;
use crate::{Array, ArrayAccessor, OffsetSizeTrait};
-use arrow_buffer::{ArrowNativeType, Buffer};
+use arrow_buffer::ArrowNativeType;
use arrow_data::ArrayData;
use arrow_schema::DataType;
use std::any::Any;
@@ -55,9 +55,9 @@ impl<T: ByteArrayType> GenericByteArray<T> {
offsets[i + 1] - offsets[i]
}
- /// Returns a clone of the value data buffer
- pub fn value_data(&self) -> Buffer {
- self.data.buffers()[1].clone()
+ /// Returns the raw value data
+ pub fn value_data(&self) -> &[u8] {
+ self.data.buffers()[1].as_slice()
}
/// Returns the offset values in the offsets buffer
diff --git a/arrow-cast/src/cast.rs b/arrow-cast/src/cast.rs
index 61be2171b..3f1775825 100644
--- a/arrow-cast/src/cast.rs
+++ b/arrow-cast/src/cast.rs
@@ -3402,14 +3402,13 @@ where
OffsetSizeFrom: OffsetSizeTrait + ToPrimitive,
OffsetSizeTo: OffsetSizeTrait + NumCast + ArrowNativeType,
{
- let str_array = array
- .as_any()
- .downcast_ref::<GenericStringArray<OffsetSizeFrom>>()
- .unwrap();
- let list_data = array.data();
- let str_values_buf = str_array.value_data();
-
- let offsets = list_data.buffers()[0].typed_data::<OffsetSizeFrom>();
+ let data = array.data();
+ assert_eq!(
+ data.data_type(),
+ &GenericStringArray::<OffsetSizeFrom>::DATA_TYPE
+ );
+ let str_values_buf = data.buffers()[1].clone();
+ let offsets = data.buffers()[0].typed_data::<OffsetSizeFrom>();
let mut offset_builder = BufferBuilder::<OffsetSizeTo>::new(offsets.len());
offsets
@@ -3426,18 +3425,14 @@ where
let offset_buffer = offset_builder.finish();
- let dtype = if matches!(std::mem::size_of::<OffsetSizeTo>(), 8) {
- DataType::LargeUtf8
- } else {
- DataType::Utf8
- };
+ let dtype = GenericStringArray::<OffsetSizeTo>::DATA_TYPE;
let builder = ArrayData::builder(dtype)
.offset(array.offset())
.len(array.len())
.add_buffer(offset_buffer)
.add_buffer(str_values_buf)
- .null_bit_buffer(list_data.null_buffer().cloned());
+ .null_bit_buffer(data.null_buffer().cloned());
let array_data = unsafe { builder.build_unchecked() };
diff --git a/arrow/src/compute/kernels/concat_elements.rs b/arrow/src/compute/kernels/concat_elements.rs
index 1c0a0925d..a908ba9ab 100644
--- a/arrow/src/compute/kernels/concat_elements.rs
+++ b/arrow/src/compute/kernels/concat_elements.rs
@@ -50,10 +50,8 @@ pub fn concat_elements_utf8<Offset: OffsetSizeTrait>(
let left_offsets = left.value_offsets();
let right_offsets = right.value_offsets();
- let left_buffer = left.value_data();
- let right_buffer = right.value_data();
- let left_values = left_buffer.as_slice();
- let right_values = right_buffer.as_slice();
+ let left_values = left.value_data();
+ let right_values = right.value_data();
let mut output_values = BufferBuilder::<u8>::new(
left_values.len() + right_values.len()
@@ -115,16 +113,11 @@ pub fn concat_elements_utf8_many<Offset: OffsetSizeTrait>(
size,
)?;
- let data_buffers = arrays
+ let data_values = arrays
.iter()
.map(|array| array.value_data())
.collect::<Vec<_>>();
- let data_values = data_buffers
- .iter()
- .map(|buffer| buffer.as_slice())
- .collect::<Vec<_>>();
-
let mut offsets = arrays
.iter()
.map(|a| a.value_offsets().iter().peekable())
diff --git a/arrow/src/compute/kernels/substring.rs b/arrow/src/compute/kernels/substring.rs
index 76568ae0d..23cb2c19f 100644
--- a/arrow/src/compute/kernels/substring.rs
+++ b/arrow/src/compute/kernels/substring.rs
@@ -253,8 +253,7 @@ fn binary_substring<OffsetSize: OffsetSizeTrait>(
length: Option<OffsetSize>,
) -> Result<ArrayRef> {
let offsets = array.value_offsets();
- let values = array.value_data();
- let data = values.as_slice();
+ let data = array.value_data();
let zero = OffsetSize::zero();
// start and end offsets of all substrings
@@ -364,8 +363,7 @@ fn utf8_substring<OffsetSize: OffsetSizeTrait>(
length: Option<OffsetSize>,
) -> Result<ArrayRef> {
let offsets = array.value_offsets();
- let values = array.value_data();
- let data = values.as_slice();
+ let data = array.value_data();
let zero = OffsetSize::zero();
// Check if `offset` is at a valid char boundary.