You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by tu...@apache.org on 2023/04/14 06:59:47 UTC
[arrow-rs] branch master updated: Store StructArray entries in MapArray (#4085)
This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 637d3832e Store StructArray entries in MapArray (#4085)
637d3832e is described below
commit 637d3832e714d0bcd9166761e99b05456e158687
Author: Raphael Taylor-Davies <17...@users.noreply.github.com>
AuthorDate: Fri Apr 14 07:59:40 2023 +0100
Store StructArray entries in MapArray (#4085)
---
arrow-array/src/array/map_array.rs | 30 +++++++++---------------------
parquet/src/arrow/arrow_writer/levels.rs | 9 ++++-----
2 files changed, 13 insertions(+), 26 deletions(-)
diff --git a/arrow-array/src/array/map_array.rs b/arrow-array/src/array/map_array.rs
index 18b3eb3ce..62e12c30e 100644
--- a/arrow-array/src/array/map_array.rs
+++ b/arrow-array/src/array/map_array.rs
@@ -33,11 +33,7 @@ pub struct MapArray {
data_type: DataType,
nulls: Option<NullBuffer>,
/// The [`StructArray`] that is the direct child of this array
- entries: ArrayRef,
- /// The first child of `entries`, the "keys" of this MapArray
- keys: ArrayRef,
- /// The second child of `entries`, the "values" of this MapArray
- values: ArrayRef,
+ entries: StructArray,
/// The start and end offsets of each entry
value_offsets: OffsetBuffer<i32>,
}
@@ -54,35 +50,34 @@ impl MapArray {
/// Returns a reference to the keys of this map
pub fn keys(&self) -> &ArrayRef {
- &self.keys
+ self.entries.column(0)
}
/// Returns a reference to the values of this map
pub fn values(&self) -> &ArrayRef {
- &self.values
+ self.entries.column(1)
}
/// Returns a reference to the [`StructArray`] entries of this map
- pub fn entries(&self) -> &ArrayRef {
+ pub fn entries(&self) -> &StructArray {
&self.entries
}
/// Returns the data type of the map's keys.
pub fn key_type(&self) -> &DataType {
- self.keys.data_type()
+ self.keys().data_type()
}
/// Returns the data type of the map's values.
pub fn value_type(&self) -> &DataType {
- self.values.data_type()
+ self.values().data_type()
}
/// Returns ith value of this map array.
///
- /// This is a [`StructArray`] containing two fields
/// # Safety
/// Caller must ensure that the index is within the array bounds
- pub unsafe fn value_unchecked(&self, i: usize) -> ArrayRef {
+ pub unsafe fn value_unchecked(&self, i: usize) -> StructArray {
let end = *self.value_offsets().get_unchecked(i + 1);
let start = *self.value_offsets().get_unchecked(i);
self.entries
@@ -92,7 +87,7 @@ impl MapArray {
/// Returns ith value of this map array.
///
/// This is a [`StructArray`] containing two fields
- pub fn value(&self, i: usize) -> ArrayRef {
+ pub fn value(&self, i: usize) -> StructArray {
let end = self.value_offsets()[i + 1] as usize;
let start = self.value_offsets()[i] as usize;
self.entries.slice(start, end - start)
@@ -117,8 +112,6 @@ impl MapArray {
data_type: self.data_type.clone(),
nulls: self.nulls.as_ref().map(|n| n.slice(offset, length)),
entries: self.entries.clone(),
- keys: self.keys.clone(),
- values: self.values.clone(),
value_offsets: self.value_offsets.slice(offset, length),
}
}
@@ -181,10 +174,7 @@ impl MapArray {
entries.data_type()
)));
}
-
- let keys = make_array(entries.child_data()[0].clone());
- let values = make_array(entries.child_data()[1].clone());
- let entries = make_array(entries);
+ let entries = entries.into();
// SAFETY:
// ArrayData is valid, and verified type above
@@ -194,8 +184,6 @@ impl MapArray {
data_type: data.data_type().clone(),
nulls: data.nulls().cloned(),
entries,
- keys,
- values,
value_offsets,
})
}
diff --git a/parquet/src/arrow/arrow_writer/levels.rs b/parquet/src/arrow/arrow_writer/levels.rs
index d662a16ea..fe6126ba4 100644
--- a/parquet/src/arrow/arrow_writer/levels.rs
+++ b/parquet/src/arrow/arrow_writer/levels.rs
@@ -175,7 +175,7 @@ impl LevelInfoBuilder {
}
/// Given an `array`, write the level data for the elements in `range`
- fn write(&mut self, array: &ArrayRef, range: Range<usize>) {
+ fn write(&mut self, array: &dyn Array, range: Range<usize>) {
match array.data_type() {
d if is_leaf(d) => self.write_leaf(array, range),
DataType::Dictionary(_, v) if is_leaf(v.as_ref()) => {
@@ -225,7 +225,7 @@ impl LevelInfoBuilder {
&mut self,
offsets: &[O],
nulls: Option<&NullBuffer>,
- values: &ArrayRef,
+ values: &dyn Array,
range: Range<usize>,
) {
let (child, ctx) = match self {
@@ -372,7 +372,7 @@ impl LevelInfoBuilder {
}
/// Write a primitive array, as defined by [`is_leaf`]
- fn write_leaf(&mut self, array: &ArrayRef, range: Range<usize>) {
+ fn write_leaf(&mut self, array: &dyn Array, range: Range<usize>) {
let info = match self {
Self::Primitive(info) => info,
_ => unreachable!(),
@@ -918,12 +918,11 @@ mod tests {
assert_eq!(a_list_data.null_count(), 1);
let a = ListArray::from(a_list_data);
- let values = Arc::new(a) as _;
let item_field = Field::new("item", a_list_type, true);
let mut builder =
LevelInfoBuilder::try_new(&item_field, Default::default()).unwrap();
- builder.write(&values, 2..4);
+ builder.write(&a, 2..4);
let levels = builder.finish();
assert_eq!(levels.len(), 1);