You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by tu...@apache.org on 2022/04/14 18:42:35 UTC

[arrow-rs] branch master updated: Fix incorrect `into_buffers` for UnionArray (#1567)

This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 0192872d8 Fix incorrect `into_buffers` for UnionArray (#1567)
0192872d8 is described below

commit 0192872d86e04d17df576186673c858eed5c2e94
Author: Liang-Chi Hsieh <vi...@gmail.com>
AuthorDate: Thu Apr 14 11:42:29 2022 -0700

    Fix incorrect `into_buffers` for UnionArray (#1567)
    
    * Fix incorrect buffers for UnionArray
    
    * Add test
    
    * Re-enable test_filter_union_array_sparse
---
 arrow/src/array/data.rs             | 27 +++++++++++++++++++++++++--
 arrow/src/compute/kernels/filter.rs |  6 ------
 2 files changed, 25 insertions(+), 8 deletions(-)

diff --git a/arrow/src/array/data.rs b/arrow/src/array/data.rs
index f2dadcf78..989e29fec 100644
--- a/arrow/src/array/data.rs
+++ b/arrow/src/array/data.rs
@@ -219,8 +219,14 @@ pub(crate) fn into_buffers(
         DataType::Utf8
         | DataType::Binary
         | DataType::LargeUtf8
-        | DataType::LargeBinary
-        | DataType::Union(_, _) => vec![buffer1.into(), buffer2.into()],
+        | DataType::LargeBinary => vec![buffer1.into(), buffer2.into()],
+        DataType::Union(_, mode) => {
+            match mode {
+                // Based on Union's DataTypeLayout
+                UnionMode::Sparse => vec![buffer1.into()],
+                UnionMode::Dense => vec![buffer1.into(), buffer2.into()],
+            }
+        }
         _ => vec![buffer1.into()],
     }
 }
@@ -2602,6 +2608,23 @@ mod tests {
         assert_eq!(&struct_array_slice, &cloned);
     }
 
+    #[test]
+    fn test_into_buffers() {
+        let data_types = vec![
+            DataType::Union(vec![], UnionMode::Dense),
+            DataType::Union(vec![], UnionMode::Sparse),
+        ];
+
+        for data_type in data_types {
+            let buffers = new_buffers(&data_type, 0);
+            let [buffer1, buffer2] = buffers;
+            let buffers = into_buffers(&data_type, buffer1, buffer2);
+
+            let layout = layout(&data_type);
+            assert_eq!(buffers.len(), layout.buffers.len());
+        }
+    }
+
     #[test]
     fn test_string_data_from_foreign() {
         let mut strings = "foobarfoobar".to_owned();
diff --git a/arrow/src/compute/kernels/filter.rs b/arrow/src/compute/kernels/filter.rs
index 4ab5b180b..df59ba63c 100644
--- a/arrow/src/compute/kernels/filter.rs
+++ b/arrow/src/compute/kernels/filter.rs
@@ -1692,9 +1692,6 @@ mod tests {
     }
 
     #[test]
-    // Fails when validation enabled
-    // https://github.com/apache/arrow-rs/issues/1547
-    #[cfg(not(feature = "force_validate"))]
     fn test_filter_union_array_sparse() {
         let mut builder = UnionBuilder::new_sparse(3);
         builder.append::<Int32Type>("A", 1).unwrap();
@@ -1706,9 +1703,6 @@ mod tests {
     }
 
     #[test]
-    // Fails when validation enabled
-    // https://github.com/apache/arrow-rs/issues/1547
-    #[cfg(not(feature = "force_validate"))]
     fn test_filter_union_array_sparse_with_nulls() {
         let mut builder = UnionBuilder::new_sparse(4);
         builder.append::<Int32Type>("A", 1).unwrap();