You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by tu...@apache.org on 2023/06/08 15:23:46 UTC
[arrow-rs] branch master updated: Add MapArray constructors and doc example (#4382)
This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new fac00bf5b Add MapArray constructors and doc example (#4382)
fac00bf5b is described below
commit fac00bf5b03448db224fadb3965fb422a4394a4e
Author: Raphael Taylor-Davies <17...@users.noreply.github.com>
AuthorDate: Thu Jun 8 16:23:38 2023 +0100
Add MapArray constructors and doc example (#4382)
* Add MapArray constructors
* Clippy
* Review feedback
* Link to builder (#4385)
* Clippy
* Further docs tweaks
---
arrow-array/src/array/list_array.rs | 3 +-
arrow-array/src/array/map_array.rs | 192 +++++++++++++++++++++++++++++++++++-
2 files changed, 191 insertions(+), 4 deletions(-)
diff --git a/arrow-array/src/array/list_array.rs b/arrow-array/src/array/list_array.rs
index d016afccb..abb5ba5e3 100644
--- a/arrow-array/src/array/list_array.rs
+++ b/arrow-array/src/array/list_array.rs
@@ -94,6 +94,7 @@ impl<OffsetSize: OffsetSizeTrait> GenericListArray<OffsetSize> {
/// * `offsets.len() - 1 != nulls.len()`
/// * `offsets.last() > values.len()`
/// * `!field.is_nullable() && values.null_count() != 0`
+ /// * `field.data_type() != values.data_type()`
pub fn try_new(
field: FieldRef,
offsets: OffsetBuffer<OffsetSize>,
@@ -103,7 +104,7 @@ impl<OffsetSize: OffsetSizeTrait> GenericListArray<OffsetSize> {
let len = offsets.len() - 1; // Offsets guaranteed to not be empty
let end_offset = offsets.last().unwrap().as_usize();
// don't need to check other values of `offsets` because they are checked
- // during construction of `OffsetsbBuffer`
+ // during construction of `OffsetBuffer`
if end_offset > values.len() {
return Err(ArrowError::InvalidArgumentError(format!(
"Max offset of {end_offset} exceeds length of values {}",
diff --git a/arrow-array/src/array/map_array.rs b/arrow-array/src/array/map_array.rs
index c98bca950..fca49cd78 100644
--- a/arrow-array/src/array/map_array.rs
+++ b/arrow-array/src/array/map_array.rs
@@ -22,7 +22,7 @@ use crate::{
};
use arrow_buffer::{ArrowNativeType, Buffer, NullBuffer, OffsetBuffer, ToByteSlice};
use arrow_data::{ArrayData, ArrayDataBuilder};
-use arrow_schema::{ArrowError, DataType, Field};
+use arrow_schema::{ArrowError, DataType, Field, FieldRef};
use std::any::Any;
use std::sync::Arc;
@@ -30,8 +30,10 @@ use std::sync::Arc;
///
/// Keys should always be non-null, but values can be null.
///
-/// [MapArray] is physically a [crate::array::ListArray] that has a
-/// [StructArray] with 2 child fields.
+/// [`MapArray`] is physically a [`ListArray`] of key values pairs stored as an `entries`
+/// [`StructArray`] with 2 child fields.
+///
+/// See [`MapBuilder`](crate::builder::MapBuilder) for how to construct a [`MapArray`]
#[derive(Clone)]
pub struct MapArray {
data_type: DataType,
@@ -43,6 +45,112 @@ pub struct MapArray {
}
impl MapArray {
+ /// Create a new [`MapArray`] from the provided parts
+ ///
+ /// See [`MapBuilder`](crate::builder::MapBuilder) for a higher-level interface
+ /// to construct a [`MapArray`]
+ ///
+ /// # Errors
+ ///
+ /// Errors if
+ ///
+ /// * `offsets.len() - 1 != nulls.len()`
+ /// * `offsets.last() > entries.len()`
+ /// * `field.is_nullable()`
+ /// * `entries.null_count() != 0`
+ /// * `entries.columns().len() != 2`
+ /// * `field.data_type() != entries.data_type()`
+ pub fn try_new(
+ field: FieldRef,
+ offsets: OffsetBuffer<i32>,
+ entries: StructArray,
+ nulls: Option<NullBuffer>,
+ ordered: bool,
+ ) -> Result<Self, ArrowError> {
+ let len = offsets.len() - 1; // Offsets guaranteed to not be empty
+ let end_offset = offsets.last().unwrap().as_usize();
+ // don't need to check other values of `offsets` because they are checked
+ // during construction of `OffsetBuffer`
+ if end_offset > entries.len() {
+ return Err(ArrowError::InvalidArgumentError(format!(
+ "Max offset of {end_offset} exceeds length of entries {}",
+ entries.len()
+ )));
+ }
+
+ if let Some(n) = nulls.as_ref() {
+ if n.len() != len {
+ return Err(ArrowError::InvalidArgumentError(format!(
+ "Incorrect length of null buffer for MapArray, expected {len} got {}",
+ n.len(),
+ )));
+ }
+ }
+ if field.is_nullable() || entries.null_count() != 0 {
+ return Err(ArrowError::InvalidArgumentError(
+ "MapArray entries cannot contain nulls".to_string(),
+ ));
+ }
+
+ if field.data_type() != entries.data_type() {
+ return Err(ArrowError::InvalidArgumentError(format!(
+ "MapArray expected data type {} got {} for {:?}",
+ field.data_type(),
+ entries.data_type(),
+ field.name()
+ )));
+ }
+
+ if entries.columns().len() != 2 {
+ return Err(ArrowError::InvalidArgumentError(format!(
+ "MapArray entries must contain two children, got {}",
+ entries.columns().len()
+ )));
+ }
+
+ Ok(Self {
+ data_type: DataType::Map(field, ordered),
+ nulls,
+ entries,
+ value_offsets: offsets,
+ })
+ }
+
+ /// Create a new [`MapArray`] from the provided parts
+ ///
+ /// See [`MapBuilder`](crate::builder::MapBuilder) for a higher-level interface
+ /// to construct a [`MapArray`]
+ ///
+ /// # Panics
+ ///
+ /// Panics if [`Self::try_new`] returns an error
+ pub fn new(
+ field: FieldRef,
+ offsets: OffsetBuffer<i32>,
+ entries: StructArray,
+ nulls: Option<NullBuffer>,
+ ordered: bool,
+ ) -> Self {
+ Self::try_new(field, offsets, entries, nulls, ordered).unwrap()
+ }
+
+ /// Deconstruct this array into its constituent parts
+ pub fn into_parts(
+ self,
+ ) -> (
+ FieldRef,
+ OffsetBuffer<i32>,
+ StructArray,
+ Option<NullBuffer>,
+ bool,
+ ) {
+ let (f, ordered) = match self.data_type {
+ DataType::Map(f, ordered) => (f, ordered),
+ _ => unreachable!(),
+ };
+ (f, self.value_offsets, self.entries, self.nulls, ordered)
+ }
+
/// Returns a reference to the offsets of this map
///
/// Unlike [`Self::value_offsets`] this returns the [`OffsetBuffer`]
@@ -623,4 +731,82 @@ mod tests {
assert!(!map_array.is_null(i));
}
}
+
+ #[test]
+ fn test_try_new() {
+ let offsets = OffsetBuffer::new(vec![0, 1, 4, 5].into());
+ let fields = Fields::from(vec![
+ Field::new("key", DataType::Int32, false),
+ Field::new("values", DataType::Int32, false),
+ ]);
+ let columns = vec![
+ Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5])) as _,
+ Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5])) as _,
+ ];
+
+ let entries = StructArray::new(fields.clone(), columns, None);
+ let field = Arc::new(Field::new("entries", DataType::Struct(fields), false));
+
+ MapArray::new(field.clone(), offsets.clone(), entries.clone(), None, false);
+
+ let nulls = NullBuffer::new_null(3);
+ MapArray::new(field.clone(), offsets, entries.clone(), Some(nulls), false);
+
+ let nulls = NullBuffer::new_null(3);
+ let offsets = OffsetBuffer::new(vec![0, 1, 2, 4, 5].into());
+ let err = MapArray::try_new(
+ field.clone(),
+ offsets.clone(),
+ entries.clone(),
+ Some(nulls),
+ false,
+ )
+ .unwrap_err();
+
+ assert_eq!(
+ err.to_string(),
+ "Invalid argument error: Incorrect length of null buffer for MapArray, expected 4 got 3"
+ );
+
+ let err =
+ MapArray::try_new(field, offsets.clone(), entries.slice(0, 2), None, false)
+ .unwrap_err();
+
+ assert_eq!(
+ err.to_string(),
+ "Invalid argument error: Max offset of 5 exceeds length of entries 2"
+ );
+
+ let field = Arc::new(Field::new("element", DataType::Int64, false));
+ let err = MapArray::try_new(field, offsets.clone(), entries, None, false)
+ .unwrap_err()
+ .to_string();
+
+ assert!(
+ err.starts_with(
+ "Invalid argument error: MapArray expected data type Int64 got Struct"
+ ),
+ "{err}"
+ );
+
+ let fields = Fields::from(vec![
+ Field::new("a", DataType::Int32, false),
+ Field::new("b", DataType::Int32, false),
+ Field::new("c", DataType::Int32, false),
+ ]);
+ let columns = vec![
+ Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5])) as _,
+ Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5])) as _,
+ Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5])) as _,
+ ];
+
+ let s = StructArray::new(fields.clone(), columns, None);
+ let field = Arc::new(Field::new("entries", DataType::Struct(fields), false));
+ let err = MapArray::try_new(field, offsets, s, None, false).unwrap_err();
+
+ assert_eq!(
+ err.to_string(),
+ "Invalid argument error: MapArray entries must contain two children, got 3"
+ );
+ }
}