You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by tu...@apache.org on 2024/01/22 11:30:58 UTC
(arrow-rs) branch master updated: fix panic when decode a group with no child (#5322)
This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new e47e2f19d1 fix panic when decode a group with no child (#5322)
e47e2f19d1 is described below
commit e47e2f19d189637d82fcb2650225f7684280de1f
Author: liyixin <60...@qq.com>
AuthorDate: Mon Jan 22 19:30:52 2024 +0800
fix panic when decode a group with no child (#5322)
* fix panic when decode group with no child
* remove copied comment in test
---
parquet/src/schema/types.rs | 58 ++++++++++++++++++++++++++++++++++-----------
1 file changed, 44 insertions(+), 14 deletions(-)
diff --git a/parquet/src/schema/types.rs b/parquet/src/schema/types.rs
index 2f36deffba..c913f13c17 100644
--- a/parquet/src/schema/types.rs
+++ b/parquet/src/schema/types.rs
@@ -1085,20 +1085,38 @@ fn from_thrift_helper(elements: &[SchemaElement], index: usize) -> Result<(usize
));
}
let repetition = Repetition::try_from(elements[index].repetition_type.unwrap())?;
- let physical_type = PhysicalType::try_from(elements[index].type_.unwrap())?;
- let length = elements[index].type_length.unwrap_or(-1);
- let scale = elements[index].scale.unwrap_or(-1);
- let precision = elements[index].precision.unwrap_or(-1);
- let name = &elements[index].name;
- let builder = Type::primitive_type_builder(name, physical_type)
- .with_repetition(repetition)
- .with_converted_type(converted_type)
- .with_logical_type(logical_type)
- .with_length(length)
- .with_precision(precision)
- .with_scale(scale)
- .with_id(field_id);
- Ok((index + 1, Arc::new(builder.build()?)))
+ if let Some(type_) = elements[index].type_ {
+ let physical_type = PhysicalType::try_from(type_)?;
+ let length = elements[index].type_length.unwrap_or(-1);
+ let scale = elements[index].scale.unwrap_or(-1);
+ let precision = elements[index].precision.unwrap_or(-1);
+ let name = &elements[index].name;
+ let builder = Type::primitive_type_builder(name, physical_type)
+ .with_repetition(repetition)
+ .with_converted_type(converted_type)
+ .with_logical_type(logical_type)
+ .with_length(length)
+ .with_precision(precision)
+ .with_scale(scale)
+ .with_id(field_id);
+ Ok((index + 1, Arc::new(builder.build()?)))
+ } else {
+ let mut builder = Type::group_type_builder(&elements[index].name)
+ .with_converted_type(converted_type)
+ .with_logical_type(logical_type)
+ .with_id(field_id);
+ if !is_root_node {
+ // Sometimes parquet-cpp and parquet-mr set repetition level REQUIRED or
+ // REPEATED for root node.
+ //
+ // We only set repetition for group types that are not top-level message
+ // type. According to parquet-format:
+ // Root of the schema does not have a repetition_type.
+ // All other types must have one.
+ builder = builder.with_repetition(repetition);
+ }
+ Ok((index + 1, Arc::new(builder.build().unwrap())))
+ }
}
Some(n) => {
let repetition = elements[index]
@@ -2138,4 +2156,16 @@ mod tests {
let result_schema = from_thrift(&thrift_schema).unwrap();
assert_eq!(result_schema, Arc::new(expected_schema));
}
+
+ #[test]
+ fn test_schema_from_thrift_group_has_no_child() {
+ let message_type = "message schema {}";
+
+ let expected_schema = parse_message_type(message_type).unwrap();
+ let mut thrift_schema = to_thrift(&expected_schema).unwrap();
+ thrift_schema[0].repetition_type = Some(Repetition::REQUIRED.into());
+
+ let result_schema = from_thrift(&thrift_schema).unwrap();
+ assert_eq!(result_schema, Arc::new(expected_schema));
+ }
}