You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ne...@apache.org on 2022/04/25 06:16:03 UTC

[arrow-rs] branch master updated: Read/Write nested dictionaries under FixedSizeList in IPC (#1610)

This is an automated email from the ASF dual-hosted git repository.

nevime pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new cbd0303c6 Read/Write nested dictionaries under FixedSizeList in IPC (#1610)
cbd0303c6 is described below

commit cbd0303c69d66d4c683fea29787c8d03c8942568
Author: Liang-Chi Hsieh <vi...@gmail.com>
AuthorDate: Sun Apr 24 23:15:57 2022 -0700

    Read/Write nested dictionaries under FixedSizeList in IPC (#1610)
    
    * Read/Write nested dictionaries under FixedSizeList in IPC
    
    * Fix clippy
---
 arrow/src/ipc/reader.rs | 39 +++++++++++++++++++++++++++++++++++++++
 arrow/src/ipc/writer.rs | 16 ++++++++++++++--
 2 files changed, 53 insertions(+), 2 deletions(-)

diff --git a/arrow/src/ipc/reader.rs b/arrow/src/ipc/reader.rs
index 33d608576..8a26167db 100644
--- a/arrow/src/ipc/reader.rs
+++ b/arrow/src/ipc/reader.rs
@@ -1573,4 +1573,43 @@ mod tests {
             offsets,
         );
     }
+
+    #[test]
+    fn test_roundtrip_stream_dict_of_fixed_size_list_of_dict() {
+        let values = StringArray::from(vec![Some("a"), None, Some("c"), None]);
+        let keys = Int8Array::from_iter_values([0, 0, 1, 2, 0, 1, 3, 1, 2]);
+        let dict_array = DictionaryArray::<Int8Type>::try_new(&keys, &values).unwrap();
+        let dict_data = dict_array.data();
+
+        let list_data_type = DataType::FixedSizeList(
+            Box::new(Field::new_dict(
+                "item",
+                DataType::Dictionary(Box::new(DataType::Int8), Box::new(DataType::Utf8)),
+                true,
+                1,
+                false,
+            )),
+            3,
+        );
+        let list_data = ArrayData::builder(list_data_type)
+            .len(3)
+            .add_child_data(dict_data.clone())
+            .build()
+            .unwrap();
+        let list_array = FixedSizeListArray::from(list_data);
+
+        let keys_for_dict = Int8Array::from_iter_values([0, 1, 0, 1, 1, 2, 0, 1, 2]);
+        let dict_dict_array =
+            DictionaryArray::<Int8Type>::try_new(&keys_for_dict, &list_array).unwrap();
+
+        let schema = Arc::new(Schema::new(vec![Field::new(
+            "f1",
+            dict_dict_array.data_type().clone(),
+            false,
+        )]));
+        let input_batch =
+            RecordBatch::try_new(schema, vec![Arc::new(dict_dict_array)]).unwrap();
+        let output_batch = roundtrip_ipc_stream(&input_batch);
+        assert_eq!(input_batch, output_batch);
+    }
 }
diff --git a/arrow/src/ipc/writer.rs b/arrow/src/ipc/writer.rs
index 1f73d16d2..efc878a12 100644
--- a/arrow/src/ipc/writer.rs
+++ b/arrow/src/ipc/writer.rs
@@ -27,7 +27,7 @@ use flatbuffers::FlatBufferBuilder;
 
 use crate::array::{
     as_large_list_array, as_list_array, as_map_array, as_struct_array, as_union_array,
-    make_array, Array, ArrayData, ArrayRef,
+    make_array, Array, ArrayData, ArrayRef, FixedSizeListArray,
 };
 use crate::buffer::{Buffer, MutableBuffer};
 use crate::datatypes::*;
@@ -147,7 +147,6 @@ impl IpcDataGenerator {
         dictionary_tracker: &mut DictionaryTracker,
         write_options: &IpcWriteOptions,
     ) -> Result<()> {
-        // TODO: Handle other nested types (FixedSizeList)
         match column.data_type() {
             DataType::Struct(fields) => {
                 let s = as_struct_array(column);
@@ -181,6 +180,19 @@ impl IpcDataGenerator {
                     write_options,
                 )?;
             }
+            DataType::FixedSizeList(field, _) => {
+                let list = column
+                    .as_any()
+                    .downcast_ref::<FixedSizeListArray>()
+                    .expect("Unable to downcast to fixed size list array");
+                self.encode_dictionaries(
+                    field,
+                    &list.values(),
+                    encoded_dictionaries,
+                    dictionary_tracker,
+                    write_options,
+                )?;
+            }
             DataType::Map(field, _) => {
                 let map_array = as_map_array(column);