You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by tu...@apache.org on 2024/01/22 11:30:58 UTC

(arrow-rs) branch master updated: fix panic when decode a group with no child (#5322)

This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new e47e2f19d1 fix panic when decode a group with no child (#5322)
e47e2f19d1 is described below

commit e47e2f19d189637d82fcb2650225f7684280de1f
Author: liyixin <60...@qq.com>
AuthorDate: Mon Jan 22 19:30:52 2024 +0800

    fix panic when decode a group with no child (#5322)
    
    * fix panic when decode group with no child
    
    * remove copied comment in test
---
 parquet/src/schema/types.rs | 58 ++++++++++++++++++++++++++++++++++-----------
 1 file changed, 44 insertions(+), 14 deletions(-)

diff --git a/parquet/src/schema/types.rs b/parquet/src/schema/types.rs
index 2f36deffba..c913f13c17 100644
--- a/parquet/src/schema/types.rs
+++ b/parquet/src/schema/types.rs
@@ -1085,20 +1085,38 @@ fn from_thrift_helper(elements: &[SchemaElement], index: usize) -> Result<(usize
                 ));
             }
             let repetition = Repetition::try_from(elements[index].repetition_type.unwrap())?;
-            let physical_type = PhysicalType::try_from(elements[index].type_.unwrap())?;
-            let length = elements[index].type_length.unwrap_or(-1);
-            let scale = elements[index].scale.unwrap_or(-1);
-            let precision = elements[index].precision.unwrap_or(-1);
-            let name = &elements[index].name;
-            let builder = Type::primitive_type_builder(name, physical_type)
-                .with_repetition(repetition)
-                .with_converted_type(converted_type)
-                .with_logical_type(logical_type)
-                .with_length(length)
-                .with_precision(precision)
-                .with_scale(scale)
-                .with_id(field_id);
-            Ok((index + 1, Arc::new(builder.build()?)))
+            if let Some(type_) = elements[index].type_ {
+                let physical_type = PhysicalType::try_from(type_)?;
+                let length = elements[index].type_length.unwrap_or(-1);
+                let scale = elements[index].scale.unwrap_or(-1);
+                let precision = elements[index].precision.unwrap_or(-1);
+                let name = &elements[index].name;
+                let builder = Type::primitive_type_builder(name, physical_type)
+                    .with_repetition(repetition)
+                    .with_converted_type(converted_type)
+                    .with_logical_type(logical_type)
+                    .with_length(length)
+                    .with_precision(precision)
+                    .with_scale(scale)
+                    .with_id(field_id);
+                Ok((index + 1, Arc::new(builder.build()?)))
+            } else {
+                let mut builder = Type::group_type_builder(&elements[index].name)
+                    .with_converted_type(converted_type)
+                    .with_logical_type(logical_type)
+                    .with_id(field_id);
+                if !is_root_node {
+                    // Sometimes parquet-cpp and parquet-mr set repetition level REQUIRED or
+                    // REPEATED for root node.
+                    //
+                    // We only set repetition for group types that are not top-level message
+                    // type. According to parquet-format:
+                    //   Root of the schema does not have a repetition_type.
+                    //   All other types must have one.
+                    builder = builder.with_repetition(repetition);
+                }
+                Ok((index + 1, Arc::new(builder.build().unwrap())))
+            }
         }
         Some(n) => {
             let repetition = elements[index]
@@ -2138,4 +2156,16 @@ mod tests {
         let result_schema = from_thrift(&thrift_schema).unwrap();
         assert_eq!(result_schema, Arc::new(expected_schema));
     }
+
+    #[test]
+    fn test_schema_from_thrift_group_has_no_child() {
+        let message_type = "message schema {}";
+
+        let expected_schema = parse_message_type(message_type).unwrap();
+        let mut thrift_schema = to_thrift(&expected_schema).unwrap();
+        thrift_schema[0].repetition_type = Some(Repetition::REQUIRED.into());
+
+        let result_schema = from_thrift(&thrift_schema).unwrap();
+        assert_eq!(result_schema, Arc::new(expected_schema));
+    }
 }