You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by tu...@apache.org on 2023/04/12 22:24:48 UTC

[arrow-rs] branch master updated: feat: Prevent UnionArray with Repeated Type IDs (#4070)

This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new f14a6787c feat: Prevent UnionArray with Repeated Type IDs (#4070)
f14a6787c is described below

commit f14a6787cae77d9eb7d44a8cbb71d9fcc9f8365a
Author: Alex Huang <hu...@gmail.com>
AuthorDate: Thu Apr 13 00:24:43 2023 +0200

    feat: Prevent UnionArray with Repeated Type IDs (#4070)
    
    * feat: Prevent UnionArray with Repeated Type IDs
    
    * fix format
    
    * add tests
    
    * use should_panic
    
    * fix clippy
---
 arrow-schema/src/datatype.rs | 16 ++++++++++++++++
 arrow-schema/src/fields.rs   | 15 ++++++++++++++-
 2 files changed, 30 insertions(+), 1 deletion(-)

diff --git a/arrow-schema/src/datatype.rs b/arrow-schema/src/datatype.rs
index 64e8d0e77..0bbd64f30 100644
--- a/arrow-schema/src/datatype.rs
+++ b/arrow-schema/src/datatype.rs
@@ -742,4 +742,20 @@ mod tests {
     fn size_should_not_regress() {
         assert_eq!(std::mem::size_of::<DataType>(), 24);
     }
+
+    #[test]
+    #[should_panic(expected = "duplicate type id: 1")]
+    fn test_union_with_duplicated_type_id() {
+        let type_ids = vec![1, 1];
+        let _union = DataType::Union(
+            UnionFields::new(
+                type_ids,
+                vec![
+                    Field::new("f1", DataType::Int32, false),
+                    Field::new("f2", DataType::Utf8, false),
+                ],
+            ),
+            UnionMode::Dense,
+        );
+    }
 }
diff --git a/arrow-schema/src/fields.rs b/arrow-schema/src/fields.rs
index b93735328..07e9abeee 100644
--- a/arrow-schema/src/fields.rs
+++ b/arrow-schema/src/fields.rs
@@ -186,7 +186,20 @@ impl UnionFields {
         T: IntoIterator<Item = i8>,
     {
         let fields = fields.into_iter().map(Into::into);
-        type_ids.into_iter().zip(fields).collect()
+        let mut set = 0_u128;
+        type_ids
+            .into_iter()
+            .map(|idx| {
+                let mask = 1_u128 << idx;
+                if (set & mask) != 0 {
+                    panic!("duplicate type id: {}", idx);
+                } else {
+                    set |= mask;
+                }
+                idx
+            })
+            .zip(fields)
+            .collect()
     }
 
     /// Return size of this instance in bytes.