You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by tu...@apache.org on 2023/04/12 22:24:48 UTC
[arrow-rs] branch master updated: feat: Prevent UnionArray with Repeated Type IDs (#4070)
This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new f14a6787c feat: Prevent UnionArray with Repeated Type IDs (#4070)
f14a6787c is described below
commit f14a6787cae77d9eb7d44a8cbb71d9fcc9f8365a
Author: Alex Huang <hu...@gmail.com>
AuthorDate: Thu Apr 13 00:24:43 2023 +0200
feat: Prevent UnionArray with Repeated Type IDs (#4070)
* feat: Prevent UnionArray with Repeated Type IDs
* fix format
* add tests
* use should_panic
* fix clippy
---
arrow-schema/src/datatype.rs | 16 ++++++++++++++++
arrow-schema/src/fields.rs | 15 ++++++++++++++-
2 files changed, 30 insertions(+), 1 deletion(-)
diff --git a/arrow-schema/src/datatype.rs b/arrow-schema/src/datatype.rs
index 64e8d0e77..0bbd64f30 100644
--- a/arrow-schema/src/datatype.rs
+++ b/arrow-schema/src/datatype.rs
@@ -742,4 +742,20 @@ mod tests {
fn size_should_not_regress() {
assert_eq!(std::mem::size_of::<DataType>(), 24);
}
+
+ #[test]
+ #[should_panic(expected = "duplicate type id: 1")]
+ fn test_union_with_duplicated_type_id() {
+ let type_ids = vec![1, 1];
+ let _union = DataType::Union(
+ UnionFields::new(
+ type_ids,
+ vec![
+ Field::new("f1", DataType::Int32, false),
+ Field::new("f2", DataType::Utf8, false),
+ ],
+ ),
+ UnionMode::Dense,
+ );
+ }
}
diff --git a/arrow-schema/src/fields.rs b/arrow-schema/src/fields.rs
index b93735328..07e9abeee 100644
--- a/arrow-schema/src/fields.rs
+++ b/arrow-schema/src/fields.rs
@@ -186,7 +186,20 @@ impl UnionFields {
T: IntoIterator<Item = i8>,
{
let fields = fields.into_iter().map(Into::into);
- type_ids.into_iter().zip(fields).collect()
+ let mut set = 0_u128;
+ type_ids
+ .into_iter()
+ .map(|idx| {
+ let mask = 1_u128 << idx;
+ if (set & mask) != 0 {
+ panic!("duplicate type id: {}", idx);
+ } else {
+ set |= mask;
+ }
+ idx
+ })
+ .zip(fields)
+ .collect()
}
/// Return size of this instance in bytes.