You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by tu...@apache.org on 2023/04/05 11:03:39 UTC
[arrow-rs] branch master updated: Deprecate Array::data (#3880) (#4019)
This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 39a48e18d Deprecate Array::data (#3880) (#4019)
39a48e18d is described below
commit 39a48e18d0b880be9932252dc0755f45cde50188
Author: Raphael Taylor-Davies <17...@users.noreply.github.com>
AuthorDate: Wed Apr 5 12:03:32 2023 +0100
Deprecate Array::data (#3880) (#4019)
* Deprecate Array::data (#3880)
* Review feedback
---
arrow-array/src/array/binary_array.rs | 3 --
arrow-array/src/array/mod.rs | 25 ++++++++++----
arrow-array/src/array/primitive_array.rs | 3 +-
arrow-array/src/array/struct_array.rs | 18 +++++-----
arrow-cast/src/cast.rs | 38 ++++++++--------------
arrow-integration-test/src/lib.rs | 2 +-
.../src/bin/arrow-json-integration-test.rs | 4 +--
.../flight_client_scenarios/integration_test.rs | 4 +--
arrow-ipc/src/writer.rs | 25 +++++++-------
arrow-row/src/lib.rs | 2 +-
arrow-select/src/concat.rs | 32 ++++++++++--------
arrow-select/src/filter.rs | 4 +--
arrow-select/src/interleave.rs | 3 +-
arrow-select/src/take.rs | 24 +++++---------
arrow-select/src/zip.rs | 6 ++--
arrow-string/src/length.rs | 4 +--
arrow/src/ffi_stream.rs | 8 ++---
arrow/src/pyarrow.rs | 2 +-
arrow/tests/array_transform.rs | 4 +--
19 files changed, 102 insertions(+), 109 deletions(-)
diff --git a/arrow-array/src/array/binary_array.rs b/arrow-array/src/array/binary_array.rs
index be861474f..3b13a513f 100644
--- a/arrow-array/src/array/binary_array.rs
+++ b/arrow-array/src/array/binary_array.rs
@@ -467,9 +467,6 @@ mod tests {
let list_array = GenericListArray::<O>::from(array_data2);
let binary_array2 = GenericBinaryArray::<O>::from(list_array);
- assert_eq!(2, binary_array2.data().buffers().len());
- assert_eq!(0, binary_array2.data().child_data().len());
-
assert_eq!(binary_array1.len(), binary_array2.len());
assert_eq!(binary_array1.null_count(), binary_array2.null_count());
assert_eq!(binary_array1.value_offsets(), binary_array2.value_offsets());
diff --git a/arrow-array/src/array/mod.rs b/arrow-array/src/array/mod.rs
index 41d5c8beb..fa6e970b4 100644
--- a/arrow-array/src/array/mod.rs
+++ b/arrow-array/src/array/mod.rs
@@ -95,8 +95,7 @@ pub trait Array: std::fmt::Debug + Send + Sync {
fn as_any(&self) -> &dyn Any;
/// Returns a reference to the underlying data of this array
- ///
- /// This will be deprecated in a future release [(#3880)](https://github.com/apache/arrow-rs/issues/3880)
+ #[deprecated(note = "Use Array::to_data or Array::into_data")]
fn data(&self) -> &ArrayData;
/// Returns the underlying data of this array
@@ -108,9 +107,8 @@ pub trait Array: std::fmt::Debug + Send + Sync {
fn into_data(self) -> ArrayData;
/// Returns a reference-counted pointer to the underlying data of this array.
- ///
- /// This will be deprecated in a future release [(#3880)](https://github.com/apache/arrow-rs/issues/3880)
#[deprecated(note = "Use Array::to_data or Array::into_data")]
+ #[allow(deprecated)]
fn data_ref(&self) -> &ArrayData {
self.data()
}
@@ -281,6 +279,7 @@ impl Array for ArrayRef {
self.as_ref().as_any()
}
+ #[allow(deprecated)]
fn data(&self) -> &ArrayData {
self.as_ref().data()
}
@@ -348,6 +347,7 @@ impl<'a, T: Array> Array for &'a T {
T::as_any(self)
}
+ #[allow(deprecated)]
fn data(&self) -> &ArrayData {
T::data(self)
}
@@ -435,78 +435,91 @@ pub trait ArrayAccessor: Array {
}
impl PartialEq for dyn Array + '_ {
+ #[allow(deprecated)]
fn eq(&self, other: &Self) -> bool {
self.data().eq(other.data())
}
}
impl<T: Array> PartialEq<T> for dyn Array + '_ {
+ #[allow(deprecated)]
fn eq(&self, other: &T) -> bool {
self.data().eq(other.data())
}
}
impl PartialEq for NullArray {
+ #[allow(deprecated)]
fn eq(&self, other: &NullArray) -> bool {
self.data().eq(other.data())
}
}
impl<T: ArrowPrimitiveType> PartialEq for PrimitiveArray<T> {
+ #[allow(deprecated)]
fn eq(&self, other: &PrimitiveArray<T>) -> bool {
self.data().eq(other.data())
}
}
impl<K: ArrowDictionaryKeyType> PartialEq for DictionaryArray<K> {
+ #[allow(deprecated)]
fn eq(&self, other: &Self) -> bool {
self.data().eq(other.data())
}
}
impl PartialEq for BooleanArray {
+ #[allow(deprecated)]
fn eq(&self, other: &BooleanArray) -> bool {
self.data().eq(other.data())
}
}
impl<OffsetSize: OffsetSizeTrait> PartialEq for GenericStringArray<OffsetSize> {
+ #[allow(deprecated)]
fn eq(&self, other: &Self) -> bool {
self.data().eq(other.data())
}
}
impl<OffsetSize: OffsetSizeTrait> PartialEq for GenericBinaryArray<OffsetSize> {
+ #[allow(deprecated)]
fn eq(&self, other: &Self) -> bool {
self.data().eq(other.data())
}
}
impl PartialEq for FixedSizeBinaryArray {
+ #[allow(deprecated)]
fn eq(&self, other: &Self) -> bool {
self.data().eq(other.data())
}
}
impl<OffsetSize: OffsetSizeTrait> PartialEq for GenericListArray<OffsetSize> {
+ #[allow(deprecated)]
fn eq(&self, other: &Self) -> bool {
self.data().eq(other.data())
}
}
impl PartialEq for MapArray {
+ #[allow(deprecated)]
fn eq(&self, other: &Self) -> bool {
self.data().eq(other.data())
}
}
impl PartialEq for FixedSizeListArray {
+ #[allow(deprecated)]
fn eq(&self, other: &Self) -> bool {
self.data().eq(other.data())
}
}
impl PartialEq for StructArray {
+ #[allow(deprecated)]
fn eq(&self, other: &Self) -> bool {
self.data().eq(other.data())
}
@@ -865,8 +878,8 @@ mod tests {
let null_array = new_null_array(array.data_type(), 9);
assert_eq!(&array, &null_array);
assert_eq!(
- array.data().buffers()[0].len(),
- null_array.data().buffers()[0].len()
+ array.to_data().buffers()[0].len(),
+ null_array.to_data().buffers()[0].len()
);
}
diff --git a/arrow-array/src/array/primitive_array.rs b/arrow-array/src/array/primitive_array.rs
index 5dfcb4da4..75bf85b3f 100644
--- a/arrow-array/src/array/primitive_array.rs
+++ b/arrow-array/src/array/primitive_array.rs
@@ -519,10 +519,9 @@ impl<T: ArrowPrimitiveType> PrimitiveArray<T> {
O: ArrowPrimitiveType,
F: Fn(T::Native) -> Result<O::Native, E>,
{
- let data = self.data();
let len = self.len();
- let nulls = data.nulls().cloned();
+ let nulls = self.nulls().cloned();
let mut buffer = BufferBuilder::<O::Native>::new(len);
buffer.append_n_zeroed(len);
let slice = buffer.as_slice_mut();
diff --git a/arrow-array/src/array/struct_array.rs b/arrow-array/src/array/struct_array.rs
index 27e10a31f..1dccfc7d4 100644
--- a/arrow-array/src/array/struct_array.rs
+++ b/arrow-array/src/array/struct_array.rs
@@ -174,7 +174,7 @@ impl TryFrom<Vec<(&str, ArrayRef)>> for StructArray {
// null: the null mask of the arrays.
let mut null: Option<Buffer> = None;
for (field_name, array) in values {
- let child_datum = array.data();
+ let child_datum = array.to_data();
let child_datum_len = child_datum.len();
if let Some(len) = len {
if len != child_datum_len {
@@ -186,7 +186,6 @@ impl TryFrom<Vec<(&str, ArrayRef)>> for StructArray {
} else {
len = Some(child_datum_len)
}
- child_data.push(child_datum.clone());
fields.push(Arc::new(Field::new(
field_name,
array.data_type().clone(),
@@ -209,6 +208,7 @@ impl TryFrom<Vec<(&str, ArrayRef)>> for StructArray {
// when one of the fields has no nulls, then there is no null in the array
null = None;
}
+ child_data.push(child_datum);
}
let len = len.unwrap();
@@ -385,10 +385,8 @@ mod tests {
#[test]
fn test_struct_array_builder() {
- let array = BooleanArray::from(vec![false, false, true, true]);
- let boolean_data = array.data();
- let array = Int64Array::from(vec![42, 28, 19, 31]);
- let int_data = array.data();
+ let boolean_array = BooleanArray::from(vec![false, false, true, true]);
+ let int_array = Int64Array::from(vec![42, 28, 19, 31]);
let fields = vec![
Field::new("a", DataType::Boolean, false),
@@ -396,14 +394,14 @@ mod tests {
];
let struct_array_data = ArrayData::builder(DataType::Struct(fields.into()))
.len(4)
- .add_child_data(boolean_data.clone())
- .add_child_data(int_data.clone())
+ .add_child_data(boolean_array.to_data())
+ .add_child_data(int_array.to_data())
.build()
.unwrap();
let struct_array = StructArray::from(struct_array_data);
- assert_eq!(boolean_data, struct_array.column(0).data());
- assert_eq!(int_data, struct_array.column(1).data());
+ assert_eq!(struct_array.column(0).as_ref(), &boolean_array);
+ assert_eq!(struct_array.column(1).as_ref(), &int_array);
}
#[test]
diff --git a/arrow-cast/src/cast.rs b/arrow-cast/src/cast.rs
index 0ea6332a7..372fcc1a3 100644
--- a/arrow-cast/src/cast.rs
+++ b/arrow-cast/src/cast.rs
@@ -2217,9 +2217,9 @@ fn value_to_string<O: OffsetSizeTrait>(
let mut builder = GenericStringBuilder::<O>::new();
let options = FormatOptions::default();
let formatter = ArrayFormatter::try_new(array, &options)?;
- let data = array.data();
- for i in 0..data.len() {
- match data.is_null(i) {
+ let nulls = array.nulls();
+ for i in 0..array.len() {
+ match nulls.map(|x| x.is_null(i)).unwrap_or_default() {
true => builder.append_null(),
false => {
formatter.value(i).write(&mut builder)?;
@@ -3500,7 +3500,7 @@ where
FROM::Offset: OffsetSizeTrait + ToPrimitive,
TO::Offset: OffsetSizeTrait + NumCast,
{
- let data = array.data();
+ let data = array.to_data();
assert_eq!(data.data_type(), &FROM::DATA_TYPE);
let str_values_buf = data.buffers()[1].clone();
let offsets = data.buffers()[0].typed_data::<FROM::Offset>();
@@ -4844,9 +4844,8 @@ mod tests {
#[test]
fn test_cast_list_i32_to_list_u16() {
- let value_data = Int32Array::from(vec![0, 0, 0, -1, -2, -1, 2, 100000000])
- .data()
- .clone();
+ let value_data =
+ Int32Array::from(vec![0, 0, 0, -1, -2, -1, 2, 100000000]).into_data();
let value_offsets = Buffer::from_slice_ref([0, 3, 6, 8]);
@@ -4875,15 +4874,9 @@ mod tests {
assert_eq!(0, cast_array.null_count());
// offsets should be the same
- assert_eq!(
- list_array.data().buffers().to_vec(),
- cast_array.data().buffers().to_vec()
- );
- let array = cast_array
- .as_ref()
- .as_any()
- .downcast_ref::<ListArray>()
- .unwrap();
+ let array = cast_array.as_list::<i32>();
+ assert_eq!(list_array.value_offsets(), array.value_offsets());
+
assert_eq!(DataType::UInt16, array.value_type());
assert_eq!(3, array.value_length(0));
assert_eq!(3, array.value_length(1));
@@ -4908,9 +4901,8 @@ mod tests {
)]
fn test_cast_list_i32_to_list_timestamp() {
// Construct a value array
- let value_data = Int32Array::from(vec![0, 0, 0, -1, -2, -1, 2, 8, 100000000])
- .data()
- .clone();
+ let value_data =
+ Int32Array::from(vec![0, 0, 0, -1, -2, -1, 2, 8, 100000000]).into_data();
let value_offsets = Buffer::from_slice_ref([0, 3, 6, 9]);
@@ -7355,11 +7347,7 @@ mod tests {
fn test_list_to_string() {
let str_array = StringArray::from(vec!["a", "b", "c", "d", "e", "f", "g", "h"]);
let value_offsets = Buffer::from_slice_ref([0, 3, 6, 8]);
- let value_data = ArrayData::builder(DataType::Utf8)
- .len(str_array.len())
- .buffers(str_array.data().buffers().to_vec())
- .build()
- .unwrap();
+ let value_data = str_array.into_data();
let list_data_type =
DataType::List(Arc::new(Field::new("item", DataType::Utf8, true)));
@@ -8123,7 +8111,7 @@ mod tests {
let options = CastOptions { safe: true };
let array = cast_with_options(&s, &DataType::Utf8, &options).unwrap();
let a = array.as_string::<i32>();
- a.data().validate_full().unwrap();
+ a.to_data().validate_full().unwrap();
assert_eq!(a.null_count(), 1);
assert_eq!(a.len(), 2);
diff --git a/arrow-integration-test/src/lib.rs b/arrow-integration-test/src/lib.rs
index 8ee7bc600..04bbcf3f6 100644
--- a/arrow-integration-test/src/lib.rs
+++ b/arrow-integration-test/src/lib.rs
@@ -938,7 +938,7 @@ pub fn dictionary_array_from_json(
// convert key and value to dictionary data
let dict_data = ArrayData::builder(field.data_type().clone())
.len(keys.len())
- .add_buffer(keys.data().buffers()[0].clone())
+ .add_buffer(keys.to_data().buffers()[0].clone())
.null_bit_buffer(Some(null_buf))
.add_child_data(values.into_data())
.build()
diff --git a/arrow-integration-testing/src/bin/arrow-json-integration-test.rs b/arrow-integration-testing/src/bin/arrow-json-integration-test.rs
index 90a2d171d..2c36e8d9b 100644
--- a/arrow-integration-testing/src/bin/arrow-json-integration-test.rs
+++ b/arrow-integration-testing/src/bin/arrow-json-integration-test.rs
@@ -200,8 +200,8 @@ fn validate(arrow_name: &str, json_name: &str, verbose: bool) -> Result<()> {
for i in 0..num_columns {
assert_eq!(
- arrow_batch.column(i).data(),
- json_batch.column(i).data(),
+ arrow_batch.column(i).as_ref(),
+ json_batch.column(i).as_ref(),
"Arrow and JSON batch columns not the same"
);
}
diff --git a/arrow-integration-testing/src/flight_client_scenarios/integration_test.rs b/arrow-integration-testing/src/flight_client_scenarios/integration_test.rs
index 3c537c5f6..a55c2dec0 100644
--- a/arrow-integration-testing/src/flight_client_scenarios/integration_test.rs
+++ b/arrow-integration-testing/src/flight_client_scenarios/integration_test.rs
@@ -232,8 +232,8 @@ async fn consume_flight_location(
let field = schema.field(i);
let field_name = field.name();
- let expected_data = expected_batch.column(i).data();
- let actual_data = actual_batch.column(i).data();
+ let expected_data = expected_batch.column(i).as_ref();
+ let actual_data = actual_batch.column(i).as_ref();
assert_eq!(expected_data, actual_data, "Data for field {field_name}");
}
diff --git a/arrow-ipc/src/writer.rs b/arrow-ipc/src/writer.rs
index 7d29f048a..7d44d8f24 100644
--- a/arrow-ipc/src/writer.rs
+++ b/arrow-ipc/src/writer.rs
@@ -220,15 +220,16 @@ impl IpcDataGenerator {
}
}
DataType::RunEndEncoded(_, values) => {
- if column.data().child_data().len() != 2 {
+ let data = column.to_data();
+ if data.child_data().len() != 2 {
return Err(ArrowError::InvalidArgumentError(format!(
"The run encoded array should have exactly two child arrays. Found {}",
- column.data().child_data().len()
+ data.child_data().len()
)));
}
- // The run_ends array is not expected to be dictionoary encoded. Hence encode dictionaries
+ // The run_ends array is not expected to be dictionary encoded. Hence encode dictionaries
// only for values array.
- let values_array = make_array(column.data().child_data()[1].clone());
+ let values_array = make_array(data.child_data()[1].clone());
self.encode_dictionaries(
values,
&values_array,
@@ -330,7 +331,7 @@ impl IpcDataGenerator {
let dict_id = field
.dict_id()
.expect("All Dictionary types have `dict_id`");
- let dict_data = column.data();
+ let dict_data = column.to_data();
let dict_values = &dict_data.child_data()[0];
let values = make_array(dict_data.child_data()[0].clone());
@@ -418,9 +419,9 @@ impl IpcDataGenerator {
batch_compression_type.map(TryInto::try_into).transpose()?;
for array in batch.columns() {
- let array_data = array.data();
+ let array_data = array.to_data();
offset = write_array_data(
- array_data,
+ &array_data,
&mut buffers,
&mut arrow_data,
&mut nodes,
@@ -631,7 +632,7 @@ fn into_zero_offset_run_array<R: RunEndIndexType>(
/// multiple times. Can optionally error if an update to an existing dictionary is attempted, which
/// isn't allowed in the `FileWriter`.
pub struct DictionaryTracker {
- written: HashMap<i64, ArrayRef>,
+ written: HashMap<i64, ArrayData>,
error_on_replacement: bool,
}
@@ -660,18 +661,18 @@ impl DictionaryTracker {
dict_id: i64,
column: &ArrayRef,
) -> Result<bool, ArrowError> {
- let dict_data = column.data();
+ let dict_data = column.to_data();
let dict_values = &dict_data.child_data()[0];
// If a dictionary with this id was already emitted, check if it was the same.
if let Some(last) = self.written.get(&dict_id) {
- if ArrayData::ptr_eq(&last.data().child_data()[0], dict_values) {
+ if ArrayData::ptr_eq(&last.child_data()[0], dict_values) {
// Same dictionary values => no need to emit it again
return Ok(false);
}
if self.error_on_replacement {
// If error on replacement perform a logical comparison
- if last.data().child_data()[0] == *dict_values {
+ if last.child_data()[0] == *dict_values {
// Same dictionary values => no need to emit it again
return Ok(false);
}
@@ -684,7 +685,7 @@ impl DictionaryTracker {
}
}
- self.written.insert(dict_id, column.clone());
+ self.written.insert(dict_id, dict_data);
Ok(true)
}
}
diff --git a/arrow-row/src/lib.rs b/arrow-row/src/lib.rs
index 9cc7b4f30..71e1de416 100644
--- a/arrow-row/src/lib.rs
+++ b/arrow-row/src/lib.rs
@@ -2308,7 +2308,7 @@ mod tests {
let back = converter.convert_rows(&rows).unwrap();
for ((actual, expected), preserve) in back.iter().zip(&arrays).zip(preserve) {
- actual.data().validate_full().unwrap();
+ actual.to_data().validate_full().unwrap();
dictionary_eq(preserve, actual, expected)
}
}
diff --git a/arrow-select/src/concat.rs b/arrow-select/src/concat.rs
index e34cc9edb..ed27520cc 100644
--- a/arrow-select/src/concat.rs
+++ b/arrow-select/src/concat.rs
@@ -81,7 +81,8 @@ pub fn concat(arrays: &[&dyn Array]) -> Result<ArrayRef, ArrowError> {
_ => Capacities::Array(arrays.iter().map(|a| a.len()).sum()),
};
- let array_data = arrays.iter().map(|a| a.data()).collect::<Vec<_>>();
+ let array_data: Vec<_> = arrays.iter().map(|a| a.to_data()).collect::<Vec<_>>();
+ let array_data = array_data.iter().collect();
let mut mutable = MutableArrayData::with_capacities(array_data, false, capacity);
for (i, a) in arrays.iter().enumerate() {
@@ -131,6 +132,7 @@ pub fn concat_batches<'a>(
#[cfg(test)]
mod tests {
use super::*;
+ use arrow_array::cast::AsArray;
use arrow_schema::{Field, Schema};
use std::sync::Arc;
@@ -527,7 +529,7 @@ mod tests {
let arr = concat(&[&a, &b, &c]).unwrap();
// this would have been 1280 if we did not precompute the value lengths.
- assert_eq!(arr.data().buffers()[1].capacity(), 960);
+ assert_eq!(arr.to_data().buffers()[1].capacity(), 960);
}
#[test]
@@ -563,16 +565,20 @@ mod tests {
);
// Should have reused the dictionary
- assert!(array.data().child_data()[0].ptr_eq(&combined.data().child_data()[0]));
- assert!(copy.data().child_data()[0].ptr_eq(&combined.data().child_data()[0]));
+ assert!(array
+ .values()
+ .to_data()
+ .ptr_eq(&combined.values().to_data()));
+ assert!(copy.values().to_data().ptr_eq(&combined.values().to_data()));
let new: DictionaryArray<Int8Type> = vec!["d"].into_iter().collect();
let combined = concat(&[© as _, &array as _, &new as _]).unwrap();
+ let com = combined.as_dictionary::<Int8Type>();
// Should not have reused the dictionary
- assert!(!array.data().child_data()[0].ptr_eq(&combined.data().child_data()[0]));
- assert!(!copy.data().child_data()[0].ptr_eq(&combined.data().child_data()[0]));
- assert!(!new.data().child_data()[0].ptr_eq(&combined.data().child_data()[0]));
+ assert!(!array.values().to_data().ptr_eq(&com.values().to_data()));
+ assert!(!copy.values().to_data().ptr_eq(&com.values().to_data()));
+ assert!(!new.values().to_data().ptr_eq(&com.values().to_data()));
}
#[test]
@@ -656,12 +662,12 @@ mod tests {
let a = Int32Array::from_iter_values(0..100);
let b = Int32Array::from_iter_values(10..20);
let a = concat(&[&a, &b]).unwrap();
- let data = a.data();
+ let data = a.to_data();
assert_eq!(data.buffers()[0].len(), 440);
assert_eq!(data.buffers()[0].capacity(), 448); // Nearest multiple of 64
let a = concat(&[&a.slice(10, 20), &b]).unwrap();
- let data = a.data();
+ let data = a.to_data();
assert_eq!(data.buffers()[0].len(), 120);
assert_eq!(data.buffers()[0].capacity(), 128); // Nearest multiple of 64
@@ -669,7 +675,7 @@ mod tests {
let b = StringArray::from(vec!["bingo", "bongo", "lorem", ""]);
let a = concat(&[&a, &b]).unwrap();
- let data = a.data();
+ let data = a.to_data();
// (100 + 4 + 1) * size_of<i32>()
assert_eq!(data.buffers()[0].len(), 420);
assert_eq!(data.buffers()[0].capacity(), 448); // Nearest multiple of 64
@@ -679,7 +685,7 @@ mod tests {
assert_eq!(data.buffers()[1].capacity(), 320); // Nearest multiple of 64
let a = concat(&[&a.slice(10, 40), &b]).unwrap();
- let data = a.data();
+ let data = a.to_data();
// (40 + 4 + 5) * size_of<i32>()
assert_eq!(data.buffers()[0].len(), 180);
assert_eq!(data.buffers()[0].capacity(), 192); // Nearest multiple of 64
@@ -693,7 +699,7 @@ mod tests {
LargeBinaryArray::from_iter_values(std::iter::repeat(b"cupcakes").take(10));
let a = concat(&[&a, &b]).unwrap();
- let data = a.data();
+ let data = a.to_data();
// (100 + 10 + 1) * size_of<i64>()
assert_eq!(data.buffers()[0].len(), 888);
assert_eq!(data.buffers()[0].capacity(), 896); // Nearest multiple of 64
@@ -703,7 +709,7 @@ mod tests {
assert_eq!(data.buffers()[1].capacity(), 384); // Nearest multiple of 64
let a = concat(&[&a.slice(10, 40), &b]).unwrap();
- let data = a.data();
+ let data = a.to_data();
// (40 + 10 + 1) * size_of<i64>()
assert_eq!(data.buffers()[0].len(), 408);
assert_eq!(data.buffers()[0].capacity(), 448); // Nearest multiple of 64
diff --git a/arrow-select/src/filter.rs b/arrow-select/src/filter.rs
index ba8fc4a2c..06f083356 100644
--- a/arrow-select/src/filter.rs
+++ b/arrow-select/src/filter.rs
@@ -650,7 +650,7 @@ where
.into_data()
.into_builder()
.data_type(array.data_type().clone())
- .child_data(array.data().child_data().to_vec());
+ .child_data(vec![array.values().to_data()]);
// SAFETY:
// Keys were valid before, filtered subset is therefore still valid
@@ -1433,7 +1433,7 @@ mod tests {
builder.append::<Int32Type>("A", 3).unwrap();
let expected = builder.build().unwrap();
- assert_eq!(filtered.data(), expected.data());
+ assert_eq!(filtered.to_data(), expected.to_data());
}
#[test]
diff --git a/arrow-select/src/interleave.rs b/arrow-select/src/interleave.rs
index f274a3ebc..491395d1c 100644
--- a/arrow-select/src/interleave.rs
+++ b/arrow-select/src/interleave.rs
@@ -193,7 +193,8 @@ fn interleave_fallback(
values: &[&dyn Array],
indices: &[(usize, usize)],
) -> Result<ArrayRef, ArrowError> {
- let arrays: Vec<_> = values.iter().map(|x| x.data()).collect();
+ let arrays: Vec<_> = values.iter().map(|x| x.to_data()).collect();
+ let arrays: Vec<_> = arrays.iter().collect();
let mut array_data = MutableArrayData::new(arrays, false, indices.len());
let mut cur_array = indices[0].0;
diff --git a/arrow-select/src/take.rs b/arrow-select/src/take.rs
index 01d614813..3e7432530 100644
--- a/arrow-select/src/take.rs
+++ b/arrow-select/src/take.rs
@@ -1531,9 +1531,8 @@ mod tests {
macro_rules! test_take_list {
($offset_type:ty, $list_data_type:ident, $list_array_type:ident) => {{
// Construct a value array, [[0,0,0], [-1,-2,-1], [], [2,3]]
- let value_data = Int32Array::from(vec![0, 0, 0, -1, -2, -1, 2, 3])
- .data()
- .clone();
+ let value_data =
+ Int32Array::from(vec![0, 0, 0, -1, -2, -1, 2, 3]).into_data();
// Construct offsets
let value_offsets: [$offset_type; 5] = [0, 3, 6, 6, 8];
let value_offsets = Buffer::from_slice_ref(&value_offsets);
@@ -1570,8 +1569,7 @@ mod tests {
Some(0),
Some(0),
])
- .data()
- .clone();
+ .into_data();
// construct offsets
let expected_offsets: [$offset_type; 6] = [0, 2, 2, 5, 5, 8];
let expected_offsets = Buffer::from_slice_ref(&expected_offsets);
@@ -1604,8 +1602,7 @@ mod tests {
Some(5),
None,
])
- .data()
- .clone();
+ .into_data();
// Construct offsets
let value_offsets: [$offset_type; 5] = [0, 3, 6, 7, 9];
let value_offsets = Buffer::from_slice_ref(&value_offsets);
@@ -1644,8 +1641,7 @@ mod tests {
None,
Some(0),
])
- .data()
- .clone();
+ .into_data();
// construct offsets
let expected_offsets: [$offset_type; 6] = [0, 1, 1, 4, 6, 9];
let expected_offsets = Buffer::from_slice_ref(&expected_offsets);
@@ -1677,8 +1673,7 @@ mod tests {
Some(5),
None,
])
- .data()
- .clone();
+ .into_data();
// Construct offsets
let value_offsets: [$offset_type; 5] = [0, 3, 6, 6, 8];
let value_offsets = Buffer::from_slice_ref(&value_offsets);
@@ -1716,8 +1711,7 @@ mod tests {
None,
Some(0),
])
- .data()
- .clone();
+ .into_data();
// construct offsets
let expected_offsets: [$offset_type; 6] = [0, 0, 0, 3, 5, 8];
let expected_offsets = Buffer::from_slice_ref(&expected_offsets);
@@ -1852,9 +1846,7 @@ mod tests {
#[should_panic(expected = "index out of bounds: the len is 4 but the index is 1000")]
fn test_take_list_out_of_bounds() {
// Construct a value array, [[0,0,0], [-1,-2,-1], [2,3]]
- let value_data = Int32Array::from(vec![0, 0, 0, -1, -2, -1, 2, 3])
- .data()
- .clone();
+ let value_data = Int32Array::from(vec![0, 0, 0, -1, -2, -1, 2, 3]).into_data();
// Construct offsets
let value_offsets = Buffer::from_slice_ref([0, 3, 6, 8]);
// Construct a list array from the above two
diff --git a/arrow-select/src/zip.rs b/arrow-select/src/zip.rs
index e5d0f25e8..b5df89154 100644
--- a/arrow-select/src/zip.rs
+++ b/arrow-select/src/zip.rs
@@ -42,10 +42,10 @@ pub fn zip(
"all arrays should have the same length".into(),
));
}
- let falsy = falsy.data();
- let truthy = truthy.data();
+ let falsy = falsy.to_data();
+ let truthy = truthy.to_data();
- let mut mutable = MutableArrayData::new(vec![truthy, falsy], false, truthy.len());
+ let mut mutable = MutableArrayData::new(vec![&truthy, &falsy], false, truthy.len());
// the SlicesIterator slices only the true values. So the gaps left by this iterator we need to
// fill with falsy values
diff --git a/arrow-string/src/length.rs b/arrow-string/src/length.rs
index c206fffb9..bd022532d 100644
--- a/arrow-string/src/length.rs
+++ b/arrow-string/src/length.rs
@@ -243,7 +243,7 @@ mod tests {
let result = $kernel(&array).unwrap();
let result = result.as_any().downcast_ref::<$result_ty>().unwrap();
let expected: $result_ty = $expected.into();
- assert_eq!(expected.data(), result.data());
+ assert_eq!(&expected, result);
}};
}
@@ -256,7 +256,7 @@ mod tests {
let result = length(&array).unwrap();
let result = result.as_any().downcast_ref::<$result_ty>().unwrap();
let expected: $result_ty = $expected.into();
- assert_eq!(expected.data(), result.data());
+ assert_eq!(&expected, result);
}};
}
diff --git a/arrow/src/ffi_stream.rs b/arrow/src/ffi_stream.rs
index 6b3067ab7..0e358c36a 100644
--- a/arrow/src/ffi_stream.rs
+++ b/arrow/src/ffi_stream.rs
@@ -220,7 +220,7 @@ impl ExportedArrayStream {
let mut private_data = self.get_private_data();
let reader = &mut private_data.batch_reader;
- let ret_code = match reader.next() {
+ match reader.next() {
None => {
// Marks ArrowArray released to indicate reaching the end of stream.
unsafe { std::ptr::write(out, FFI_ArrowArray::empty()) }
@@ -229,7 +229,7 @@ impl ExportedArrayStream {
Some(next_batch) => {
if let Ok(batch) = next_batch {
let struct_array = StructArray::from(batch);
- let array = FFI_ArrowArray::new(struct_array.data());
+ let array = FFI_ArrowArray::new(&struct_array.to_data());
unsafe { std::ptr::copy(addr_of!(array), out, 1) };
std::mem::forget(array);
@@ -240,9 +240,7 @@ impl ExportedArrayStream {
get_error_code(err)
}
}
- };
-
- ret_code
+ }
}
pub fn get_last_error(&mut self) -> &String {
diff --git a/arrow/src/pyarrow.rs b/arrow/src/pyarrow.rs
index 8cc08988c..081cc8063 100644
--- a/arrow/src/pyarrow.rs
+++ b/arrow/src/pyarrow.rs
@@ -187,7 +187,7 @@ impl PyArrowConvert for RecordBatch {
let columns = self.columns().iter();
for array in columns {
- py_arrays.push(array.data().to_pyarrow(py)?);
+ py_arrays.push(array.to_data().to_pyarrow(py)?);
}
let py_schema = schema.to_pyarrow(py)?;
diff --git a/arrow/tests/array_transform.rs b/arrow/tests/array_transform.rs
index 30a8bad60..7cd0007cc 100644
--- a/arrow/tests/array_transform.rs
+++ b/arrow/tests/array_transform.rs
@@ -44,8 +44,8 @@ fn create_decimal_array(
#[cfg(not(feature = "force_validate"))]
fn test_decimal() {
let decimal_array =
- create_decimal_array(vec![Some(1), Some(2), None, Some(3)], 10, 3);
- let arrays = vec![Array::data(&decimal_array)];
+ create_decimal_array(vec![Some(1), Some(2), None, Some(3)], 10, 3).into_data();
+ let arrays = vec![&decimal_array];
let mut a = MutableArrayData::new(arrays, true, 3);
a.extend(0, 0, 3);
a.extend(0, 2, 3);