You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by vi...@apache.org on 2022/06/29 19:54:49 UTC
[arrow-rs] branch master updated: Calculate n_buffers in FFI_ArrowArray by data layout (#1960)
This is an automated email from the ASF dual-hosted git repository.
viirya pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 1a5dcb745 Calculate n_buffers in FFI_ArrowArray by data layout (#1960)
1a5dcb745 is described below
commit 1a5dcb745e32156f808a2dd324c233f191b119fd
Author: Liang-Chi Hsieh <vi...@gmail.com>
AuthorDate: Wed Jun 29 12:54:42 2022 -0700
Calculate n_buffers in FFI_ArrowArray by data layout (#1960)
* Fix n_buffers
* Add test
* Add code comment
* Don't put null pointer if no null buffer by spec
* Trigger Build
---
arrow/src/array/data.rs | 6 +++---
arrow/src/array/mod.rs | 1 +
arrow/src/ffi.rs | 46 ++++++++++++++++++++++++++++++++++++++++------
3 files changed, 44 insertions(+), 9 deletions(-)
diff --git a/arrow/src/array/data.rs b/arrow/src/array/data.rs
index 95c980bab..52920827d 100644
--- a/arrow/src/array/data.rs
+++ b/arrow/src/array/data.rs
@@ -1221,7 +1221,7 @@ impl ArrayData {
/// Return the expected [`DataTypeLayout`] Arrays of this data
/// type are expected to have
-fn layout(data_type: &DataType) -> DataTypeLayout {
+pub(crate) fn layout(data_type: &DataType) -> DataTypeLayout {
// based on C/C++ implementation in
// https://github.com/apache/arrow/blob/661c7d749150905a63dd3b52e0a04dac39030d95/cpp/src/arrow/type.h (and .cc)
use std::mem::size_of;
@@ -1312,7 +1312,7 @@ fn layout(data_type: &DataType) -> DataTypeLayout {
/// Layout specification for a data type
#[derive(Debug, PartialEq)]
// Note: Follows structure from C++: https://github.com/apache/arrow/blob/master/cpp/src/arrow/type.h#L91
-struct DataTypeLayout {
+pub(crate) struct DataTypeLayout {
/// A vector of buffer layout specifications, one for each expected buffer
pub buffers: Vec<BufferSpec>,
@@ -1359,7 +1359,7 @@ impl DataTypeLayout {
/// Layout specification for a single data type buffer
#[derive(Debug, PartialEq)]
-enum BufferSpec {
+pub(crate) enum BufferSpec {
/// each element has a fixed width
FixedWidth { byte_width: usize },
/// Variable width, such as string data for utf8 data
diff --git a/arrow/src/array/mod.rs b/arrow/src/array/mod.rs
index bbe62cf6a..b8009cf9b 100644
--- a/arrow/src/array/mod.rs
+++ b/arrow/src/array/mod.rs
@@ -171,6 +171,7 @@ use crate::datatypes::*;
pub use self::array::Array;
pub use self::array::ArrayRef;
+pub(crate) use self::data::layout;
pub use self::data::ArrayData;
pub use self::data::ArrayDataBuilder;
pub use self::data::ArrayDataRef;
diff --git a/arrow/src/ffi.rs b/arrow/src/ffi.rs
index 9be20ccd6..ad2062b4c 100644
--- a/arrow/src/ffi.rs
+++ b/arrow/src/ffi.rs
@@ -120,7 +120,7 @@ use std::{
use bitflags::bitflags;
-use crate::array::ArrayData;
+use crate::array::{layout, ArrayData};
use crate::buffer::Buffer;
use crate::datatypes::DataType;
use crate::error::{ArrowError, Result};
@@ -450,14 +450,30 @@ impl FFI_ArrowArray {
let buffers = iter::once(data.null_buffer().cloned())
.chain(data.buffers().iter().map(|b| Some(b.clone())))
.collect::<Vec<_>>();
- let n_buffers = buffers.len() as i64;
+ let data_layout = layout(data.data_type());
+ // `n_buffers` is the number of buffers by the spec.
+ let n_buffers = {
+ data_layout.buffers.len() + {
+ // If the layout has a null buffer by Arrow spec.
+ // Note that even the array doesn't have a null buffer because it has
+ // no null value, we still need to count 1 here to follow the spec.
+ if data_layout.can_contain_null_mask {
+ 1
+ } else {
+ 0
+ }
+ }
+ } as i64;
let buffers_ptr = buffers
.iter()
- .map(|maybe_buffer| match maybe_buffer {
+ .flat_map(|maybe_buffer| match maybe_buffer {
// note that `raw_data` takes into account the buffer's offset
- Some(b) => b.as_ptr() as *const c_void,
- None => std::ptr::null(),
+ Some(b) => Some(b.as_ptr() as *const c_void),
+ // This is for null buffer. We only put a null pointer for
+ // null buffer if by spec it can contain null mask.
+ None if data_layout.can_contain_null_mask => Some(std::ptr::null()),
+ None => None,
})
.collect::<Box<[_]>>();
@@ -881,7 +897,7 @@ mod tests {
use crate::array::{
export_array_into_raw, make_array, Array, ArrayData, BooleanArray, DecimalArray,
DictionaryArray, DurationSecondArray, FixedSizeBinaryArray, FixedSizeListArray,
- GenericBinaryArray, GenericListArray, GenericStringArray, Int32Array,
+ GenericBinaryArray, GenericListArray, GenericStringArray, Int32Array, NullArray,
OffsetSizeTrait, Time32MillisecondArray, TimestampMillisecondArray,
};
use crate::compute::kernels;
@@ -1402,4 +1418,22 @@ mod tests {
// (drop/release)
Ok(())
}
+
+ #[test]
+ fn null_array_n_buffers() -> Result<()> {
+ let array = NullArray::new(10);
+ let data = array.data();
+
+ let ffi_array = FFI_ArrowArray::new(data);
+ assert_eq!(0, ffi_array.n_buffers);
+
+ let private_data =
+ unsafe { Box::from_raw(ffi_array.private_data as *mut ArrayPrivateData) };
+
+ assert_eq!(0, private_data.buffers_ptr.len());
+
+ Box::into_raw(private_data);
+
+ Ok(())
+ }
}