You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by tu...@apache.org on 2023/04/14 06:59:47 UTC

[arrow-rs] branch master updated: Store StructArray entries in MapArray (#4085)

This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 637d3832e Store StructArray entries in MapArray (#4085)
637d3832e is described below

commit 637d3832e714d0bcd9166761e99b05456e158687
Author: Raphael Taylor-Davies <17...@users.noreply.github.com>
AuthorDate: Fri Apr 14 07:59:40 2023 +0100

    Store StructArray entries in MapArray (#4085)
---
 arrow-array/src/array/map_array.rs       | 30 +++++++++---------------------
 parquet/src/arrow/arrow_writer/levels.rs |  9 ++++-----
 2 files changed, 13 insertions(+), 26 deletions(-)

diff --git a/arrow-array/src/array/map_array.rs b/arrow-array/src/array/map_array.rs
index 18b3eb3ce..62e12c30e 100644
--- a/arrow-array/src/array/map_array.rs
+++ b/arrow-array/src/array/map_array.rs
@@ -33,11 +33,7 @@ pub struct MapArray {
     data_type: DataType,
     nulls: Option<NullBuffer>,
     /// The [`StructArray`] that is the direct child of this array
-    entries: ArrayRef,
-    /// The first child of `entries`, the "keys" of this MapArray
-    keys: ArrayRef,
-    /// The second child of `entries`, the "values" of this MapArray
-    values: ArrayRef,
+    entries: StructArray,
     /// The start and end offsets of each entry
     value_offsets: OffsetBuffer<i32>,
 }
@@ -54,35 +50,34 @@ impl MapArray {
 
     /// Returns a reference to the keys of this map
     pub fn keys(&self) -> &ArrayRef {
-        &self.keys
+        self.entries.column(0)
     }
 
     /// Returns a reference to the values of this map
     pub fn values(&self) -> &ArrayRef {
-        &self.values
+        self.entries.column(1)
     }
 
     /// Returns a reference to the [`StructArray`] entries of this map
-    pub fn entries(&self) -> &ArrayRef {
+    pub fn entries(&self) -> &StructArray {
         &self.entries
     }
 
     /// Returns the data type of the map's keys.
     pub fn key_type(&self) -> &DataType {
-        self.keys.data_type()
+        self.keys().data_type()
     }
 
     /// Returns the data type of the map's values.
     pub fn value_type(&self) -> &DataType {
-        self.values.data_type()
+        self.values().data_type()
     }
 
     /// Returns ith value of this map array.
     ///
-    /// This is a [`StructArray`] containing two fields
     /// # Safety
     /// Caller must ensure that the index is within the array bounds
-    pub unsafe fn value_unchecked(&self, i: usize) -> ArrayRef {
+    pub unsafe fn value_unchecked(&self, i: usize) -> StructArray {
         let end = *self.value_offsets().get_unchecked(i + 1);
         let start = *self.value_offsets().get_unchecked(i);
         self.entries
@@ -92,7 +87,7 @@ impl MapArray {
     /// Returns ith value of this map array.
     ///
     /// This is a [`StructArray`] containing two fields
-    pub fn value(&self, i: usize) -> ArrayRef {
+    pub fn value(&self, i: usize) -> StructArray {
         let end = self.value_offsets()[i + 1] as usize;
         let start = self.value_offsets()[i] as usize;
         self.entries.slice(start, end - start)
@@ -117,8 +112,6 @@ impl MapArray {
             data_type: self.data_type.clone(),
             nulls: self.nulls.as_ref().map(|n| n.slice(offset, length)),
             entries: self.entries.clone(),
-            keys: self.keys.clone(),
-            values: self.values.clone(),
             value_offsets: self.value_offsets.slice(offset, length),
         }
     }
@@ -181,10 +174,7 @@ impl MapArray {
                 entries.data_type()
             )));
         }
-
-        let keys = make_array(entries.child_data()[0].clone());
-        let values = make_array(entries.child_data()[1].clone());
-        let entries = make_array(entries);
+        let entries = entries.into();
 
         // SAFETY:
         // ArrayData is valid, and verified type above
@@ -194,8 +184,6 @@ impl MapArray {
             data_type: data.data_type().clone(),
             nulls: data.nulls().cloned(),
             entries,
-            keys,
-            values,
             value_offsets,
         })
     }
diff --git a/parquet/src/arrow/arrow_writer/levels.rs b/parquet/src/arrow/arrow_writer/levels.rs
index d662a16ea..fe6126ba4 100644
--- a/parquet/src/arrow/arrow_writer/levels.rs
+++ b/parquet/src/arrow/arrow_writer/levels.rs
@@ -175,7 +175,7 @@ impl LevelInfoBuilder {
     }
 
     /// Given an `array`, write the level data for the elements in `range`
-    fn write(&mut self, array: &ArrayRef, range: Range<usize>) {
+    fn write(&mut self, array: &dyn Array, range: Range<usize>) {
         match array.data_type() {
             d if is_leaf(d) => self.write_leaf(array, range),
             DataType::Dictionary(_, v) if is_leaf(v.as_ref()) => {
@@ -225,7 +225,7 @@ impl LevelInfoBuilder {
         &mut self,
         offsets: &[O],
         nulls: Option<&NullBuffer>,
-        values: &ArrayRef,
+        values: &dyn Array,
         range: Range<usize>,
     ) {
         let (child, ctx) = match self {
@@ -372,7 +372,7 @@ impl LevelInfoBuilder {
     }
 
     /// Write a primitive array, as defined by [`is_leaf`]
-    fn write_leaf(&mut self, array: &ArrayRef, range: Range<usize>) {
+    fn write_leaf(&mut self, array: &dyn Array, range: Range<usize>) {
         let info = match self {
             Self::Primitive(info) => info,
             _ => unreachable!(),
@@ -918,12 +918,11 @@ mod tests {
         assert_eq!(a_list_data.null_count(), 1);
 
         let a = ListArray::from(a_list_data);
-        let values = Arc::new(a) as _;
 
         let item_field = Field::new("item", a_list_type, true);
         let mut builder =
             LevelInfoBuilder::try_new(&item_field, Default::default()).unwrap();
-        builder.write(&values, 2..4);
+        builder.write(&a, 2..4);
         let levels = builder.finish();
 
         assert_eq!(levels.len(), 1);