You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2022/04/19 08:45:25 UTC

[GitHub] [arrow-rs] tustvold commented on a diff in pull request #1585: Read/write nested dictionary under large list in ipc stream reader/writer

tustvold commented on code in PR #1585:
URL: https://github.com/apache/arrow-rs/pull/1585#discussion_r852765193


##########
arrow/src/ipc/reader.rs:
##########
@@ -1481,4 +1479,35 @@ mod tests {
         let output_batch = roundtrip_ipc_stream(&input_batch);
         assert_eq!(input_batch, output_batch);
     }
+
+    #[test]
+    fn test_roundtrip_stream_nested_dict_dict_in_list() {

Review Comment:
   ```suggestion
       fn test_roundtrip_stream_dict_of_list_of_dict() {
   ```



##########
arrow/src/datatypes/field.rs:
##########
@@ -131,7 +131,7 @@ impl Field {
             DataType::List(field)
             | DataType::LargeList(field)
             | DataType::FixedSizeList(field, _)
-            | DataType::Map(field, _) => collected_fields.push(field),
+            | DataType::Map(field, _) => collected_fields.extend(field.fields()),

Review Comment:
   What does this change relate to?



##########
arrow/src/ipc/reader.rs:
##########
@@ -1481,4 +1479,35 @@ mod tests {
         let output_batch = roundtrip_ipc_stream(&input_batch);
         assert_eq!(input_batch, output_batch);
     }
+
+    #[test]
+    fn test_roundtrip_stream_nested_dict_dict_in_list() {
+        // list
+        let list_data_type = DataType::List(Box::new(Field::new_dict(
+            "item",
+            DataType::Dictionary(Box::new(DataType::Int8), Box::new(DataType::Utf8)),
+            false,
+            1,
+            false,
+        )));
+        let offsets: &[i32; 4] = &[0, 2, 4, 6];

Review Comment:
   Maybe try a zero-length slice, instead of uniform 2 each time?



##########
arrow/src/ipc/reader.rs:
##########
@@ -1444,29 +1444,27 @@ mod tests {
         assert_eq!(input_batch, output_batch);
     }
 
-    #[test]
-    fn test_roundtrip_stream_nested_dict_dict() {
+    fn test_test_roundtrip_stream_nested_dict_dict_for_list<

Review Comment:
   ```suggestion
       fn test_roundtrip_stream_dict_of_list_of_dict_impl<
   ```
   Maybe?



##########
arrow/src/ipc/reader.rs:
##########
@@ -1444,29 +1444,27 @@ mod tests {
         assert_eq!(input_batch, output_batch);
     }
 
-    #[test]
-    fn test_roundtrip_stream_nested_dict_dict() {
+    fn test_test_roundtrip_stream_nested_dict_dict_for_list<
+        OffsetSize: OffsetSizeTrait,
+        U: ArrowNativeType,
+    >(
+        list_data_type: DataType,
+        offsets: &[U; 4],
+    ) {
         let values = StringArray::from_iter_values(["a", "b", "c"]);
         let keys = Int8Array::from_iter_values([0, 0, 1, 2, 0, 1]);
         let dict_array = DictionaryArray::<Int8Type>::try_new(&keys, &values).unwrap();
         let dict_data = dict_array.data();
 
-        let value_offsets = Buffer::from_slice_ref(&[0, 2, 4, 6]);
+        let value_offsets = Buffer::from_slice_ref(offsets);
 
-        let list_data_type = DataType::List(Box::new(Field::new_dict(
-            "item",
-            DataType::Dictionary(Box::new(DataType::Int8), Box::new(DataType::Utf8)),
-            false,
-            1,
-            false,
-        )));
         let list_data = ArrayData::builder(list_data_type)
             .len(3)
             .add_buffer(value_offsets)
             .add_child_data(dict_data.clone())
             .build()
             .unwrap();
-        let list_array = ListArray::from(list_data);
+        let list_array = GenericListArray::<OffsetSize>::from(list_data);
 
         let dict_dict_array =
             DictionaryArray::<Int8Type>::try_new(&keys, &list_array).unwrap();

Review Comment:
   Could consider using a different keys array for this dictionary for added :hot_pepper: 



##########
arrow/src/ipc/reader.rs:
##########
@@ -1481,4 +1479,35 @@ mod tests {
         let output_batch = roundtrip_ipc_stream(&input_batch);
         assert_eq!(input_batch, output_batch);
     }
+
+    #[test]
+    fn test_roundtrip_stream_nested_dict_dict_in_list() {
+        // list
+        let list_data_type = DataType::List(Box::new(Field::new_dict(
+            "item",
+            DataType::Dictionary(Box::new(DataType::Int8), Box::new(DataType::Utf8)),
+            false,

Review Comment:
   Could also test some nulls, just because they always seem to break things :sweat_smile: 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org