You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by vi...@apache.org on 2022/06/29 19:54:49 UTC

[arrow-rs] branch master updated: Calculate n_buffers in FFI_ArrowArray by data layout (#1960)

This is an automated email from the ASF dual-hosted git repository.

viirya pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 1a5dcb745 Calculate n_buffers in FFI_ArrowArray by data layout (#1960)
1a5dcb745 is described below

commit 1a5dcb745e32156f808a2dd324c233f191b119fd
Author: Liang-Chi Hsieh <vi...@gmail.com>
AuthorDate: Wed Jun 29 12:54:42 2022 -0700

    Calculate n_buffers in FFI_ArrowArray by data layout (#1960)
    
    * Fix n_buffers
    
    * Add test
    
    * Add code comment
    
    * Don't put null pointer if no null buffer by spec
    
    * Trigger Build
---
 arrow/src/array/data.rs |  6 +++---
 arrow/src/array/mod.rs  |  1 +
 arrow/src/ffi.rs        | 46 ++++++++++++++++++++++++++++++++++++++++------
 3 files changed, 44 insertions(+), 9 deletions(-)

diff --git a/arrow/src/array/data.rs b/arrow/src/array/data.rs
index 95c980bab..52920827d 100644
--- a/arrow/src/array/data.rs
+++ b/arrow/src/array/data.rs
@@ -1221,7 +1221,7 @@ impl ArrayData {
 
 /// Return the expected [`DataTypeLayout`] Arrays of this data
 /// type are expected to have
-fn layout(data_type: &DataType) -> DataTypeLayout {
+pub(crate) fn layout(data_type: &DataType) -> DataTypeLayout {
     // based on C/C++ implementation in
     // https://github.com/apache/arrow/blob/661c7d749150905a63dd3b52e0a04dac39030d95/cpp/src/arrow/type.h (and .cc)
     use std::mem::size_of;
@@ -1312,7 +1312,7 @@ fn layout(data_type: &DataType) -> DataTypeLayout {
 /// Layout specification for a data type
 #[derive(Debug, PartialEq)]
 // Note: Follows structure from C++: https://github.com/apache/arrow/blob/master/cpp/src/arrow/type.h#L91
-struct DataTypeLayout {
+pub(crate) struct DataTypeLayout {
     /// A vector of buffer layout specifications, one for each expected buffer
     pub buffers: Vec<BufferSpec>,
 
@@ -1359,7 +1359,7 @@ impl DataTypeLayout {
 
 /// Layout specification for a single data type buffer
 #[derive(Debug, PartialEq)]
-enum BufferSpec {
+pub(crate) enum BufferSpec {
     /// each element has a fixed width
     FixedWidth { byte_width: usize },
     /// Variable width, such as string data for utf8 data
diff --git a/arrow/src/array/mod.rs b/arrow/src/array/mod.rs
index bbe62cf6a..b8009cf9b 100644
--- a/arrow/src/array/mod.rs
+++ b/arrow/src/array/mod.rs
@@ -171,6 +171,7 @@ use crate::datatypes::*;
 
 pub use self::array::Array;
 pub use self::array::ArrayRef;
+pub(crate) use self::data::layout;
 pub use self::data::ArrayData;
 pub use self::data::ArrayDataBuilder;
 pub use self::data::ArrayDataRef;
diff --git a/arrow/src/ffi.rs b/arrow/src/ffi.rs
index 9be20ccd6..ad2062b4c 100644
--- a/arrow/src/ffi.rs
+++ b/arrow/src/ffi.rs
@@ -120,7 +120,7 @@ use std::{
 
 use bitflags::bitflags;
 
-use crate::array::ArrayData;
+use crate::array::{layout, ArrayData};
 use crate::buffer::Buffer;
 use crate::datatypes::DataType;
 use crate::error::{ArrowError, Result};
@@ -450,14 +450,30 @@ impl FFI_ArrowArray {
         let buffers = iter::once(data.null_buffer().cloned())
             .chain(data.buffers().iter().map(|b| Some(b.clone())))
             .collect::<Vec<_>>();
-        let n_buffers = buffers.len() as i64;
+        let data_layout = layout(data.data_type());
+        // `n_buffers` is the number of buffers by the spec.
+        let n_buffers = {
+            data_layout.buffers.len() + {
+                // If the layout has a null buffer by Arrow spec.
+                // Note that even the array doesn't have a null buffer because it has
+                // no null value, we still need to count 1 here to follow the spec.
+                if data_layout.can_contain_null_mask {
+                    1
+                } else {
+                    0
+                }
+            }
+        } as i64;
 
         let buffers_ptr = buffers
             .iter()
-            .map(|maybe_buffer| match maybe_buffer {
+            .flat_map(|maybe_buffer| match maybe_buffer {
                 // note that `raw_data` takes into account the buffer's offset
-                Some(b) => b.as_ptr() as *const c_void,
-                None => std::ptr::null(),
+                Some(b) => Some(b.as_ptr() as *const c_void),
+                // This is for null buffer. We only put a null pointer for
+                // null buffer if by spec it can contain null mask.
+                None if data_layout.can_contain_null_mask => Some(std::ptr::null()),
+                None => None,
             })
             .collect::<Box<[_]>>();
 
@@ -881,7 +897,7 @@ mod tests {
     use crate::array::{
         export_array_into_raw, make_array, Array, ArrayData, BooleanArray, DecimalArray,
         DictionaryArray, DurationSecondArray, FixedSizeBinaryArray, FixedSizeListArray,
-        GenericBinaryArray, GenericListArray, GenericStringArray, Int32Array,
+        GenericBinaryArray, GenericListArray, GenericStringArray, Int32Array, NullArray,
         OffsetSizeTrait, Time32MillisecondArray, TimestampMillisecondArray,
     };
     use crate::compute::kernels;
@@ -1402,4 +1418,22 @@ mod tests {
         // (drop/release)
         Ok(())
     }
+
+    #[test]
+    fn null_array_n_buffers() -> Result<()> {
+        let array = NullArray::new(10);
+        let data = array.data();
+
+        let ffi_array = FFI_ArrowArray::new(data);
+        assert_eq!(0, ffi_array.n_buffers);
+
+        let private_data =
+            unsafe { Box::from_raw(ffi_array.private_data as *mut ArrayPrivateData) };
+
+        assert_eq!(0, private_data.buffers_ptr.len());
+
+        Box::into_raw(private_data);
+
+        Ok(())
+    }
 }