You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ne...@apache.org on 2022/04/25 06:16:03 UTC
[arrow-rs] branch master updated: Read/Write nested dictionaries under FixedSizeList in IPC (#1610)
This is an automated email from the ASF dual-hosted git repository.
nevime pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new cbd0303c6 Read/Write nested dictionaries under FixedSizeList in IPC (#1610)
cbd0303c6 is described below
commit cbd0303c69d66d4c683fea29787c8d03c8942568
Author: Liang-Chi Hsieh <vi...@gmail.com>
AuthorDate: Sun Apr 24 23:15:57 2022 -0700
Read/Write nested dictionaries under FixedSizeList in IPC (#1610)
* Read/Write nested dictionaries under FixedSizeList in IPC
* Fix clippy
---
arrow/src/ipc/reader.rs | 39 +++++++++++++++++++++++++++++++++++++++
arrow/src/ipc/writer.rs | 16 ++++++++++++++--
2 files changed, 53 insertions(+), 2 deletions(-)
diff --git a/arrow/src/ipc/reader.rs b/arrow/src/ipc/reader.rs
index 33d608576..8a26167db 100644
--- a/arrow/src/ipc/reader.rs
+++ b/arrow/src/ipc/reader.rs
@@ -1573,4 +1573,43 @@ mod tests {
offsets,
);
}
+
+ #[test]
+ fn test_roundtrip_stream_dict_of_fixed_size_list_of_dict() {
+ let values = StringArray::from(vec![Some("a"), None, Some("c"), None]);
+ let keys = Int8Array::from_iter_values([0, 0, 1, 2, 0, 1, 3, 1, 2]);
+ let dict_array = DictionaryArray::<Int8Type>::try_new(&keys, &values).unwrap();
+ let dict_data = dict_array.data();
+
+ let list_data_type = DataType::FixedSizeList(
+ Box::new(Field::new_dict(
+ "item",
+ DataType::Dictionary(Box::new(DataType::Int8), Box::new(DataType::Utf8)),
+ true,
+ 1,
+ false,
+ )),
+ 3,
+ );
+ let list_data = ArrayData::builder(list_data_type)
+ .len(3)
+ .add_child_data(dict_data.clone())
+ .build()
+ .unwrap();
+ let list_array = FixedSizeListArray::from(list_data);
+
+ let keys_for_dict = Int8Array::from_iter_values([0, 1, 0, 1, 1, 2, 0, 1, 2]);
+ let dict_dict_array =
+ DictionaryArray::<Int8Type>::try_new(&keys_for_dict, &list_array).unwrap();
+
+ let schema = Arc::new(Schema::new(vec![Field::new(
+ "f1",
+ dict_dict_array.data_type().clone(),
+ false,
+ )]));
+ let input_batch =
+ RecordBatch::try_new(schema, vec![Arc::new(dict_dict_array)]).unwrap();
+ let output_batch = roundtrip_ipc_stream(&input_batch);
+ assert_eq!(input_batch, output_batch);
+ }
}
diff --git a/arrow/src/ipc/writer.rs b/arrow/src/ipc/writer.rs
index 1f73d16d2..efc878a12 100644
--- a/arrow/src/ipc/writer.rs
+++ b/arrow/src/ipc/writer.rs
@@ -27,7 +27,7 @@ use flatbuffers::FlatBufferBuilder;
use crate::array::{
as_large_list_array, as_list_array, as_map_array, as_struct_array, as_union_array,
- make_array, Array, ArrayData, ArrayRef,
+ make_array, Array, ArrayData, ArrayRef, FixedSizeListArray,
};
use crate::buffer::{Buffer, MutableBuffer};
use crate::datatypes::*;
@@ -147,7 +147,6 @@ impl IpcDataGenerator {
dictionary_tracker: &mut DictionaryTracker,
write_options: &IpcWriteOptions,
) -> Result<()> {
- // TODO: Handle other nested types (FixedSizeList)
match column.data_type() {
DataType::Struct(fields) => {
let s = as_struct_array(column);
@@ -181,6 +180,19 @@ impl IpcDataGenerator {
write_options,
)?;
}
+ DataType::FixedSizeList(field, _) => {
+ let list = column
+ .as_any()
+ .downcast_ref::<FixedSizeListArray>()
+ .expect("Unable to downcast to fixed size list array");
+ self.encode_dictionaries(
+ field,
+ &list.values(),
+ encoded_dictionaries,
+ dictionary_tracker,
+ write_options,
+ )?;
+ }
DataType::Map(field, _) => {
let map_array = as_map_array(column);