You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by tu...@apache.org on 2023/06/27 07:49:53 UTC

[arrow-rs] branch master updated: Simplify ffi import/export (#4447)

This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 45cc770a9 Simplify ffi import/export (#4447)
45cc770a9 is described below

commit 45cc770a96c6501b8f0c233f5cf301507c0f28c3
Author: Virgiel <35...@users.noreply.github.com>
AuthorDate: Tue Jun 27 09:49:47 2023 +0200

    Simplify ffi import/export (#4447)
    
    Co-authored-by: Virgiel <>
---
 arrow/src/array/ffi.rs  |  30 +----
 arrow/src/ffi.rs        | 295 ++++++++++++++++++------------------------------
 arrow/src/ffi_stream.rs |  16 +--
 arrow/src/pyarrow.rs    |   5 +-
 4 files changed, 117 insertions(+), 229 deletions(-)

diff --git a/arrow/src/array/ffi.rs b/arrow/src/array/ffi.rs
index 56b9b6ecf..639ff980e 100644
--- a/arrow/src/array/ffi.rs
+++ b/arrow/src/array/ffi.rs
@@ -19,29 +19,9 @@
 
 use std::convert::TryFrom;
 
-use crate::{
-    error::{ArrowError, Result},
-    ffi,
-    ffi::ArrowArrayRef,
-};
+use crate::{error::Result, ffi};
 
-use super::{ArrayData, ArrayRef};
-
-impl TryFrom<ffi::ArrowArray> for ArrayData {
-    type Error = ArrowError;
-
-    fn try_from(value: ffi::ArrowArray) -> Result<Self> {
-        value.to_data()
-    }
-}
-
-impl TryFrom<ArrayData> for ffi::ArrowArray {
-    type Error = ArrowError;
-
-    fn try_from(value: ArrayData) -> Result<Self> {
-        ffi::ArrowArray::try_new(value)
-    }
-}
+use super::ArrayRef;
 
 /// Exports an array to raw pointers of the C Data Interface provided by the consumer.
 /// # Safety
@@ -79,7 +59,7 @@ mod tests {
             StructArray, UInt32Array, UInt64Array,
         },
         datatypes::{DataType, Field},
-        ffi::{ArrowArray, FFI_ArrowArray, FFI_ArrowSchema},
+        ffi::{from_ffi, FFI_ArrowArray, FFI_ArrowSchema},
     };
     use std::convert::TryFrom;
     use std::sync::Arc;
@@ -90,9 +70,7 @@ mod tests {
         let schema = FFI_ArrowSchema::try_from(expected.data_type())?;
 
         // simulate an external consumer by being the consumer
-        let d1 = ArrowArray::new(array, schema);
-
-        let result = &ArrayData::try_from(d1)?;
+        let result = &from_ffi(array, &schema)?;
 
         assert_eq!(result, expected);
         Ok(())
diff --git a/arrow/src/ffi.rs b/arrow/src/ffi.rs
index d8b5be69a..12aa1309c 100644
--- a/arrow/src/ffi.rs
+++ b/arrow/src/ffi.rs
@@ -22,7 +22,7 @@
 //! This is handled by [FFI_ArrowSchema] and [FFI_ArrowArray].
 //!
 //! The second interface maps native Rust types to the Rust-specific implementation of Arrow such as `format` to `Datatype`,
-//! `Buffer`, etc. This is handled by `ArrowArray`.
+//! `Buffer`, etc. This is handled by `from_ffi` and `to_ffi`.
 //!
 //!
 //! Export to FFI
@@ -32,19 +32,18 @@
 //! # use arrow::array::{Int32Array, Array, ArrayData, make_array};
 //! # use arrow::error::Result;
 //! # use arrow::compute::kernels::arithmetic;
-//! # use arrow::ffi::{ArrowArray, FFI_ArrowArray, FFI_ArrowSchema};
+//! # use arrow::ffi::{to_ffi, from_ffi};
 //! # fn main() -> Result<()> {
 //! // create an array natively
 //! let array = Int32Array::from(vec![Some(1), None, Some(3)]);
 //! let data = array.into_data();
 //!
 //! // Export it
-//! let out_array = FFI_ArrowArray::new(&data);
-//! let out_schema = FFI_ArrowSchema::try_from(data.data_type())?;
+//! let (out_array, out_schema) = to_ffi(&data)?;
 //!
 //! // import it
-//! let array = ArrowArray::new(out_array, out_schema);
-//! let array = Int32Array::from(ArrayData::try_from(array)?);
+//! let data = from_ffi(out_array, &out_schema)?;
+//! let array = Int32Array::from(data);
 //!
 //! // perform some operation
 //! let array = arithmetic::add(&array, &array)?;
@@ -60,7 +59,7 @@
 //!
 //! ```
 //! # use std::ptr::addr_of_mut;
-//! # use arrow::ffi::{ArrowArray, FFI_ArrowArray, FFI_ArrowSchema};
+//! # use arrow::ffi::{from_ffi, FFI_ArrowArray, FFI_ArrowSchema};
 //! # use arrow_array::{ArrayRef, make_array};
 //! # use arrow_schema::ArrowError;
 //! #
@@ -80,7 +79,7 @@
 //!     let mut schema = FFI_ArrowSchema::empty();
 //!     let mut array = FFI_ArrowArray::empty();
 //!     foreign.export_to_c(addr_of_mut!(array), addr_of_mut!(schema));
-//!     Ok(make_array(ArrowArray::new(array, schema).try_into()?))
+//!     Ok(make_array(from_ffi(array, &schema)?))
 //! }
 //! ```
 
@@ -222,15 +221,44 @@ unsafe fn create_buffer(
         .map(|ptr| Buffer::from_custom_allocation(ptr, len, owner))
 }
 
-pub trait ArrowArrayRef {
-    fn to_data(&self) -> Result<ArrayData> {
-        let data_type = self.data_type()?;
-        let len = self.array().len();
-        let offset = self.array().offset();
-        let null_count = self.array().null_count();
+/// Export to the C Data Interface
+pub fn to_ffi(data: &ArrayData) -> Result<(FFI_ArrowArray, FFI_ArrowSchema)> {
+    let array = FFI_ArrowArray::new(data);
+    let schema = FFI_ArrowSchema::try_from(data.data_type())?;
+    Ok((array, schema))
+}
+
+/// Import [ArrayData] from the C Data Interface
+///
+/// # Safety
+///
+/// This struct assumes that the incoming data agrees with the C data interface.
+pub fn from_ffi(array: FFI_ArrowArray, schema: &FFI_ArrowSchema) -> Result<ArrayData> {
+    let array = Arc::new(array);
+    let tmp = ArrowArray {
+        array: &array,
+        schema,
+        owner: &array,
+    };
+    tmp.consume()
+}
+
+#[derive(Debug)]
+struct ArrowArray<'a> {
+    array: &'a FFI_ArrowArray,
+    schema: &'a FFI_ArrowSchema,
+    owner: &'a Arc<FFI_ArrowArray>,
+}
 
-        let data_layout = layout(&data_type);
-        let buffers = self.buffers(data_layout.can_contain_null_mask)?;
+impl<'a> ArrowArray<'a> {
+    fn consume(self) -> Result<ArrayData> {
+        let dt = DataType::try_from(self.schema)?;
+        let len = self.array.len();
+        let offset = self.array.offset();
+        let null_count = self.array.null_count();
+
+        let data_layout = layout(&dt);
+        let buffers = self.buffers(data_layout.can_contain_null_mask, &dt)?;
 
         let null_bit_buffer = if data_layout.can_contain_null_mask {
             self.null_bit_buffer()
@@ -238,25 +266,24 @@ pub trait ArrowArrayRef {
             None
         };
 
-        let mut child_data: Vec<ArrayData> = (0..self.array().num_children())
+        let mut child_data = (0..self.array.num_children())
             .map(|i| {
                 let child = self.child(i);
-                child.to_data()
+                child.consume()
             })
-            .map(|d| d.unwrap())
-            .collect();
+            .collect::<Result<Vec<_>>>()?;
 
         if let Some(d) = self.dictionary() {
             // For dictionary type there should only be a single child, so we don't need to worry if
             // there are other children added above.
             assert!(child_data.is_empty());
-            child_data.push(d.to_data()?);
+            child_data.push(d.consume()?);
         }
 
         // Should FFI be checking validity?
         Ok(unsafe {
             ArrayData::new_unchecked(
-                data_type,
+                dt,
                 len,
                 Some(null_count),
                 null_bit_buffer,
@@ -269,16 +296,15 @@ pub trait ArrowArrayRef {
 
     /// returns all buffers, as organized by Rust (i.e. null buffer is skipped if it's present
     /// in the spec of the type)
-    fn buffers(&self, can_contain_null_mask: bool) -> Result<Vec<Buffer>> {
+    fn buffers(&self, can_contain_null_mask: bool, dt: &DataType) -> Result<Vec<Buffer>> {
         // + 1: skip null buffer
         let buffer_begin = can_contain_null_mask as usize;
-        (buffer_begin..self.array().num_buffers())
+        (buffer_begin..self.array.num_buffers())
             .map(|index| {
-                let len = self.buffer_len(index)?;
+                let len = self.buffer_len(index, dt)?;
 
-                match unsafe {
-                    create_buffer(self.owner().clone(), self.array(), index, len)
-                } {
+                match unsafe { create_buffer(self.owner.clone(), self.array, index, len) }
+                {
                     Some(buf) => Ok(buf),
                     None if len == 0 => {
                         // Null data buffer, which Rust doesn't allow. So create
@@ -297,17 +323,16 @@ pub trait ArrowArrayRef {
     /// Rust implementation uses fixed-sized buffers, which require knowledge of their `len`.
     /// for variable-sized buffers, such as the second buffer of a stringArray, we need
     /// to fetch offset buffer's len to build the second buffer.
-    fn buffer_len(&self, i: usize) -> Result<usize> {
+    fn buffer_len(&self, i: usize, dt: &DataType) -> Result<usize> {
         // Special handling for dictionary type as we only care about the key type in the case.
-        let t = self.data_type()?;
-        let data_type = match &t {
+        let data_type = match dt {
             DataType::Dictionary(key_data_type, _) => key_data_type.as_ref(),
             dt => dt,
         };
 
         // `ffi::ArrowArray` records array offset, we need to add it back to the
         // buffer length to get the actual buffer length.
-        let length = self.array().len() + self.array().offset();
+        let length = self.array.len() + self.array.offset();
 
         // Inner type is not important for buffer length.
         Ok(match (&data_type, i) {
@@ -325,21 +350,21 @@ pub trait ArrowArrayRef {
             }
             (DataType::Utf8, 2) | (DataType::Binary, 2) => {
                 // the len of the data buffer (buffer 2) equals the last value of the offset buffer (buffer 1)
-                let len = self.buffer_len(1)?;
+                let len = self.buffer_len(1, dt)?;
                 // first buffer is the null buffer => add(1)
                 // we assume that pointer is aligned for `i32`, as Utf8 uses `i32` offsets.
                 #[allow(clippy::cast_ptr_alignment)]
-                let offset_buffer = self.array().buffer(1) as *const i32;
+                let offset_buffer = self.array.buffer(1) as *const i32;
                 // get last offset
                 (unsafe { *offset_buffer.add(len / size_of::<i32>() - 1) }) as usize
             }
             (DataType::LargeUtf8, 2) | (DataType::LargeBinary, 2) => {
                 // the len of the data buffer (buffer 2) equals the last value of the offset buffer (buffer 1)
-                let len = self.buffer_len(1)?;
+                let len = self.buffer_len(1, dt)?;
                 // first buffer is the null buffer => add(1)
                 // we assume that pointer is aligned for `i64`, as Large uses `i64` offsets.
                 #[allow(clippy::cast_ptr_alignment)]
-                let offset_buffer = self.array().buffer(1) as *const i64;
+                let offset_buffer = self.array.buffer(1) as *const i64;
                 // get last offset
                 (unsafe { *offset_buffer.add(len / size_of::<i64>() - 1) }) as usize
             }
@@ -358,30 +383,26 @@ pub trait ArrowArrayRef {
         // similar to `self.buffer_len(0)`, but without `Result`.
         // `ffi::ArrowArray` records array offset, we need to add it back to the
         // buffer length to get the actual buffer length.
-        let length = self.array().len() + self.array().offset();
+        let length = self.array.len() + self.array.offset();
         let buffer_len = bit_util::ceil(length, 8);
 
-        unsafe { create_buffer(self.owner().clone(), self.array(), 0, buffer_len) }
+        unsafe { create_buffer(self.owner.clone(), self.array, 0, buffer_len) }
     }
 
-    fn child(&self, index: usize) -> ArrowArrayChild {
-        ArrowArrayChild {
-            array: self.array().child(index),
-            schema: self.schema().child(index),
-            owner: self.owner(),
+    fn child(&self, index: usize) -> ArrowArray {
+        ArrowArray {
+            array: self.array.child(index),
+            schema: self.schema.child(index),
+            owner: self.owner,
         }
     }
 
-    fn owner(&self) -> &Arc<FFI_ArrowArray>;
-    fn array(&self) -> &FFI_ArrowArray;
-    fn schema(&self) -> &FFI_ArrowSchema;
-    fn data_type(&self) -> Result<DataType>;
-    fn dictionary(&self) -> Option<ArrowArrayChild> {
-        match (self.array().dictionary(), self.schema().dictionary()) {
-            (Some(array), Some(schema)) => Some(ArrowArrayChild {
+    fn dictionary(&self) -> Option<ArrowArray> {
+        match (self.array.dictionary(), self.schema.dictionary()) {
+            (Some(array), Some(schema)) => Some(ArrowArray {
                 array,
                 schema,
-                owner: self.owner(),
+                owner: self.owner,
             }),
             (None, None) => None,
             _ => panic!("Dictionary should both be set or not set in FFI_ArrowArray and FFI_ArrowSchema")
@@ -389,101 +410,6 @@ pub trait ArrowArrayRef {
     }
 }
 
-#[allow(rustdoc::private_intra_doc_links)]
-/// Struct used to move an Array from and to the C Data Interface.
-/// Its main responsibility is to expose functionality that requires
-/// both [FFI_ArrowArray] and [FFI_ArrowSchema].
-///
-/// ## Import from the C Data Interface
-/// * [ArrowArray::new] to create an array from [`FFI_ArrowArray`] and [`FFI_ArrowSchema`]
-///
-/// ## Export to the C Data Interface
-/// * Use [`FFI_ArrowArray`] and [`FFI_ArrowSchema`] directly
-///
-/// # Safety
-///
-/// This struct assumes that the incoming data agrees with the C data interface.
-#[derive(Debug)]
-pub struct ArrowArray {
-    pub(crate) array: Arc<FFI_ArrowArray>,
-    pub(crate) schema: Arc<FFI_ArrowSchema>,
-}
-
-#[derive(Debug)]
-pub struct ArrowArrayChild<'a> {
-    array: &'a FFI_ArrowArray,
-    schema: &'a FFI_ArrowSchema,
-    owner: &'a Arc<FFI_ArrowArray>,
-}
-
-impl ArrowArrayRef for ArrowArray {
-    /// the data_type as declared in the schema
-    fn data_type(&self) -> Result<DataType> {
-        DataType::try_from(self.schema.as_ref())
-    }
-
-    fn array(&self) -> &FFI_ArrowArray {
-        self.array.as_ref()
-    }
-
-    fn schema(&self) -> &FFI_ArrowSchema {
-        self.schema.as_ref()
-    }
-
-    fn owner(&self) -> &Arc<FFI_ArrowArray> {
-        &self.array
-    }
-}
-
-impl<'a> ArrowArrayRef for ArrowArrayChild<'a> {
-    /// the data_type as declared in the schema
-    fn data_type(&self) -> Result<DataType> {
-        DataType::try_from(self.schema)
-    }
-
-    fn array(&self) -> &FFI_ArrowArray {
-        self.array
-    }
-
-    fn schema(&self) -> &FFI_ArrowSchema {
-        self.schema
-    }
-
-    fn owner(&self) -> &Arc<FFI_ArrowArray> {
-        self.owner
-    }
-}
-
-impl ArrowArray {
-    /// Creates a new [`ArrowArray`] from the provided array and schema
-    pub fn new(array: FFI_ArrowArray, schema: FFI_ArrowSchema) -> Self {
-        Self {
-            array: Arc::new(array),
-            schema: Arc::new(schema),
-        }
-    }
-
-    /// creates a new `ArrowArray`. This is used to export to the C Data Interface.
-    ///
-    /// # Memory Leaks
-    /// This method releases `buffers`. Consumers of this struct *must* call `release` before
-    /// releasing this struct, or contents in `buffers` leak.
-    pub fn try_new(data: ArrayData) -> Result<Self> {
-        let array = Arc::new(FFI_ArrowArray::new(&data));
-        let schema = Arc::new(FFI_ArrowSchema::try_from(data.data_type())?);
-        Ok(ArrowArray { array, schema })
-    }
-
-    /// creates a new empty [ArrowArray]. Used to import from the C Data Interface.
-    /// # Safety
-    /// See safety of [ArrowArray]
-    pub unsafe fn empty() -> Self {
-        let schema = Arc::new(FFI_ArrowSchema::empty());
-        let array = Arc::new(FFI_ArrowArray::empty());
-        ArrowArray { array, schema }
-    }
-}
-
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -510,10 +436,10 @@ mod tests {
         let array = Int32Array::from(vec![1, 2, 3]);
 
         // export it
-        let array = ArrowArray::try_from(array.into_data()).unwrap();
+        let (array, schema) = to_ffi(&array.into_data()).unwrap();
 
         // (simulate consumer) import it
-        let array = Int32Array::from(ArrayData::try_from(array).unwrap());
+        let array = Int32Array::from(from_ffi(array, &schema).unwrap());
         let array = kernels::arithmetic::add(&array, &array).unwrap();
 
         // verify
@@ -539,11 +465,11 @@ mod tests {
         // We can read them back to memory
         // SAFETY:
         // Pointers are aligned and valid
-        let array = unsafe {
-            ArrowArray::new(std::ptr::read(array_ptr), std::ptr::read(schema_ptr))
+        let data = unsafe {
+            from_ffi(std::ptr::read(array_ptr), &std::ptr::read(schema_ptr)).unwrap()
         };
 
-        let array = Int32Array::from(ArrayData::try_from(array).unwrap());
+        let array = Int32Array::from(data);
         assert_eq!(array, Int32Array::from(vec![1, 2, 3]));
     }
 
@@ -555,10 +481,10 @@ mod tests {
         let array = array.slice(1, 2);
 
         // export it
-        let array = ArrowArray::try_from(array.into_data())?;
+        let (array, schema) = to_ffi(&array.to_data())?;
 
         // (simulate consumer) import it
-        let data = ArrayData::try_from(array)?;
+        let data = from_ffi(array, &schema)?;
         let array = make_array(data);
 
         // perform some operation
@@ -585,10 +511,10 @@ mod tests {
             .unwrap();
 
         // export it
-        let array = ArrowArray::try_from(Array::to_data(&original_array))?;
+        let (array, schema) = to_ffi(&original_array.to_data())?;
 
         // (simulate consumer) import it
-        let data = ArrayData::try_from(array)?;
+        let data = from_ffi(array, &schema)?;
         let array = make_array(data);
 
         // perform some operation
@@ -608,10 +534,10 @@ mod tests {
             GenericStringArray::<Offset>::from(vec![Some("a"), None, Some("aaa")]);
 
         // export it
-        let array = ArrowArray::try_from(array.into_data())?;
+        let (array, schema) = to_ffi(&array.to_data())?;
 
         // (simulate consumer) import it
-        let data = ArrayData::try_from(array)?;
+        let data = from_ffi(array, &schema)?;
         let array = make_array(data);
 
         // perform some operation
@@ -677,10 +603,10 @@ mod tests {
         let array = GenericListArray::<Offset>::from(list_data.clone());
 
         // export it
-        let array = ArrowArray::try_from(array.into_data())?;
+        let (array, schema) = to_ffi(&array.to_data())?;
 
         // (simulate consumer) import it
-        let data = ArrayData::try_from(array)?;
+        let data = from_ffi(array, &schema)?;
         let array = make_array(data);
 
         // downcast
@@ -717,10 +643,10 @@ mod tests {
         let array = GenericBinaryArray::<Offset>::from(array);
 
         // export it
-        let array = ArrowArray::try_from(array.into_data())?;
+        let (array, schema) = to_ffi(&array.to_data())?;
 
         // (simulate consumer) import it
-        let data = ArrayData::try_from(array)?;
+        let data = from_ffi(array, &schema)?;
         let array = make_array(data);
 
         // perform some operation
@@ -762,10 +688,10 @@ mod tests {
         let array = BooleanArray::from(vec![None, Some(true), Some(false)]);
 
         // export it
-        let array = ArrowArray::try_from(array.into_data())?;
+        let (array, schema) = to_ffi(&array.to_data())?;
 
         // (simulate consumer) import it
-        let data = ArrayData::try_from(array)?;
+        let data = from_ffi(array, &schema)?;
         let array = make_array(data);
 
         // perform some operation
@@ -788,10 +714,10 @@ mod tests {
         let array = Time32MillisecondArray::from(vec![None, Some(1), Some(2)]);
 
         // export it
-        let array = ArrowArray::try_from(array.into_data())?;
+        let (array, schema) = to_ffi(&array.to_data())?;
 
         // (simulate consumer) import it
-        let data = ArrayData::try_from(array)?;
+        let data = from_ffi(array, &schema)?;
         let array = make_array(data);
 
         // perform some operation
@@ -824,10 +750,10 @@ mod tests {
         let array = TimestampMillisecondArray::from(vec![None, Some(1), Some(2)]);
 
         // export it
-        let array = ArrowArray::try_from(array.into_data())?;
+        let (array, schema) = to_ffi(&array.to_data())?;
 
         // (simulate consumer) import it
-        let data = ArrayData::try_from(array)?;
+        let data = from_ffi(array, &schema)?;
         let array = make_array(data);
 
         // perform some operation
@@ -868,10 +794,10 @@ mod tests {
             FixedSizeBinaryArray::try_from_sparse_iter_with_size(values.into_iter(), 3)?;
 
         // export it
-        let array = ArrowArray::try_from(array.into_data())?;
+        let (array, schema) = to_ffi(&array.to_data())?;
 
         // (simulate consumer) import it
-        let data = ArrayData::try_from(array)?;
+        let data = from_ffi(array, &schema)?;
         let array = make_array(data);
 
         // perform some operation
@@ -929,10 +855,10 @@ mod tests {
             .build()?;
 
         // export it
-        let array = ArrowArray::try_from(list_data)?;
+        let (array, schema) = to_ffi(&list_data)?;
 
         // (simulate consumer) import it
-        let data = ArrayData::try_from(array)?;
+        let data = from_ffi(array, &schema)?;
         let array = make_array(data);
 
         // perform some operation
@@ -974,10 +900,10 @@ mod tests {
         let dict_array: DictionaryArray<Int8Type> = values.into_iter().collect();
 
         // export it
-        let array = ArrowArray::try_from(dict_array.into_data())?;
+        let (array, schema) = to_ffi(&dict_array.to_data())?;
 
         // (simulate consumer) import it
-        let data = ArrayData::try_from(array)?;
+        let data = from_ffi(array, &schema)?;
         let array = make_array(data);
 
         // perform some operation
@@ -1015,8 +941,7 @@ mod tests {
         }
 
         // (simulate consumer) import it
-        let array = ArrowArray::new(out_array, out_schema);
-        let data = ArrayData::try_from(array)?;
+        let data = from_ffi(out_array, &out_schema)?;
         let array = make_array(data);
 
         // perform some operation
@@ -1034,10 +959,10 @@ mod tests {
         let array = DurationSecondArray::from(vec![None, Some(1), Some(2)]);
 
         // export it
-        let array = ArrowArray::try_from(array.into_data())?;
+        let (array, schema) = to_ffi(&array.to_data())?;
 
         // (simulate consumer) import it
-        let data = ArrayData::try_from(array)?;
+        let data = from_ffi(array, &schema)?;
         let array = make_array(data);
 
         // perform some operation
@@ -1081,10 +1006,10 @@ mod tests {
         .unwrap();
 
         // export it
-        let array = ArrowArray::try_from(map_array.to_data())?;
+        let (array, schema) = to_ffi(&map_array.to_data())?;
 
         // (simulate consumer) import it
-        let data = ArrayData::try_from(array)?;
+        let data = from_ffi(array, &schema)?;
         let array = make_array(data);
 
         // perform some operation
@@ -1104,10 +1029,10 @@ mod tests {
         )]);
 
         // export it
-        let array = ArrowArray::try_from(struct_array.to_data())?;
+        let (array, schema) = to_ffi(&struct_array.to_data())?;
 
         // (simulate consumer) import it
-        let data = ArrayData::try_from(array)?;
+        let data = from_ffi(array, &schema)?;
         let array = make_array(data);
 
         // perform some operation
@@ -1128,10 +1053,10 @@ mod tests {
         let union = builder.build().unwrap();
 
         // export it
-        let array = ArrowArray::try_from(union.to_data())?;
+        let (array, schema) = to_ffi(&union.to_data())?;
 
         // (simulate consumer) import it
-        let data = ArrayData::try_from(array)?;
+        let data = from_ffi(array, &schema)?;
         let array = make_array(data);
 
         let array = array.as_any().downcast_ref::<UnionArray>().unwrap();
@@ -1189,10 +1114,10 @@ mod tests {
         let union = builder.build().unwrap();
 
         // export it
-        let array = ArrowArray::try_from(union.to_data())?;
+        let (array, schema) = to_ffi(&union.to_data())?;
 
         // (simulate consumer) import it
-        let data = ArrayData::try_from(array)?;
+        let data = from_ffi(array, &schema)?;
         let array = UnionArray::from(data);
 
         let expected_type_ids = vec![0_i8, 0, 1, 0];
diff --git a/arrow/src/ffi_stream.rs b/arrow/src/ffi_stream.rs
index cfda4c88b..5fb1c1073 100644
--- a/arrow/src/ffi_stream.rs
+++ b/arrow/src/ffi_stream.rs
@@ -346,12 +346,7 @@ impl Iterator for ArrowArrayStreamReader {
             let schema_ref = self.schema();
             let schema = FFI_ArrowSchema::try_from(schema_ref.as_ref()).ok()?;
 
-            let data = ArrowArray {
-                array: Arc::new(array),
-                schema: Arc::new(schema),
-            }
-            .to_data()
-            .ok()?;
+            let data = from_ffi(array, &schema).ok()?;
 
             let record_batch = RecordBatch::from(StructArray::from(data));
 
@@ -442,8 +437,6 @@ mod tests {
         let exported_schema = Schema::try_from(&ffi_schema).unwrap();
         assert_eq!(&exported_schema, schema.as_ref());
 
-        let ffi_schema = Arc::new(ffi_schema);
-
         // Get array from `FFI_ArrowArrayStream`
         let mut produced_batches = vec![];
         loop {
@@ -456,12 +449,7 @@ mod tests {
                 break;
             }
 
-            let array = ArrowArray {
-                array: Arc::new(ffi_array),
-                schema: ffi_schema.clone(),
-            }
-            .to_data()
-            .unwrap();
+            let array = from_ffi(ffi_array, &ffi_schema).unwrap();
 
             let record_batch = RecordBatch::from(StructArray::from(array));
             produced_batches.push(record_batch);
diff --git a/arrow/src/pyarrow.rs b/arrow/src/pyarrow.rs
index 98e27ab30..54a247d53 100644
--- a/arrow/src/pyarrow.rs
+++ b/arrow/src/pyarrow.rs
@@ -175,10 +175,7 @@ impl FromPyArrow for ArrayData {
             ),
         )?;
 
-        let ffi_array = ffi::ArrowArray::new(array, schema);
-        let data = ArrayData::try_from(ffi_array).map_err(to_py_err)?;
-
-        Ok(data)
+        ffi::from_ffi(array, &schema).map_err(to_py_err)
     }
 }