You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by al...@apache.org on 2021/11/12 11:49:11 UTC

[arrow-rs] branch active_release updated: Fix validation for offsets of StructArrays (#942) (#946)

This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch active_release
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/active_release by this push:
     new 4037933  Fix validation for offsets of StructArrays (#942) (#946)
4037933 is described below

commit 4037933e43cad9e4de027039ce14caa65f78300a
Author: Andrew Lamb <an...@nerdnetworks.org>
AuthorDate: Fri Nov 12 06:49:08 2021 -0500

    Fix validation for offsets of StructArrays (#942) (#946)
    
    * reproduce validation error
    
    * Fix validation bug
    
    Co-authored-by: Ben Chambers <bj...@gmail.com>
    
    Co-authored-by: Ben Chambers <bj...@gmail.com>
---
 arrow/src/array/data.rs | 108 +++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 103 insertions(+), 5 deletions(-)

diff --git a/arrow/src/array/data.rs b/arrow/src/array/data.rs
index 3ca14d8..40a8bee 100644
--- a/arrow/src/array/data.rs
+++ b/arrow/src/array/data.rs
@@ -790,12 +790,11 @@ impl ArrayData {
                 for (i, field) in fields.iter().enumerate() {
                     let field_data = self.get_valid_child_data(i, field.data_type())?;
 
-                    // C++ does this check, but it is not clear why
-                    // field_data checks only len, but self checks len+offset
-                    if field_data.len < (self.len + self.offset) {
+                    // Ensure child field has sufficient size
+                    if field_data.len < self.len {
                         return Err(ArrowError::InvalidArgumentError(format!(
                             "{} child array #{} for field {} has length smaller than expected for struct array ({} < {})",
-                            self.data_type, i, field.name(), field_data.len, self.len + self.offset
+                            self.data_type, i, field.name(), field_data.len, self.len
                         )));
                     }
                 }
@@ -1114,7 +1113,9 @@ impl ArrayDataBuilder {
 mod tests {
     use super::*;
 
-    use crate::array::{Array, Int32Array, StringArray};
+    use crate::array::{
+        Array, BooleanBuilder, Int32Array, Int32Builder, StringArray, StructBuilder,
+    };
     use crate::buffer::Buffer;
     use crate::datatypes::Field;
     use crate::util::bit_util;
@@ -1640,4 +1641,101 @@ mod tests {
     fn make_f32_buffer(n: usize) -> Buffer {
         Buffer::from_slice_ref(&vec![42f32; n])
     }
+
+    #[test]
+    fn test_try_new_sliced_struct() {
+        let mut builder = StructBuilder::new(
+            vec![
+                Field::new("a", DataType::Int32, true),
+                Field::new("b", DataType::Boolean, true),
+            ],
+            vec![
+                Box::new(Int32Builder::new(5)),
+                Box::new(BooleanBuilder::new(5)),
+            ],
+        );
+
+        // struct[0] = { a: 10, b: true }
+        builder
+            .field_builder::<Int32Builder>(0)
+            .unwrap()
+            .append_option(Some(10))
+            .unwrap();
+        builder
+            .field_builder::<BooleanBuilder>(1)
+            .unwrap()
+            .append_option(Some(true))
+            .unwrap();
+        builder.append(true).unwrap();
+
+        // struct[1] = null
+        builder
+            .field_builder::<Int32Builder>(0)
+            .unwrap()
+            .append_option(None)
+            .unwrap();
+        builder
+            .field_builder::<BooleanBuilder>(1)
+            .unwrap()
+            .append_option(None)
+            .unwrap();
+        builder.append(false).unwrap();
+
+        // struct[2] = { a: null, b: false }
+        builder
+            .field_builder::<Int32Builder>(0)
+            .unwrap()
+            .append_option(None)
+            .unwrap();
+        builder
+            .field_builder::<BooleanBuilder>(1)
+            .unwrap()
+            .append_option(Some(false))
+            .unwrap();
+        builder.append(true).unwrap();
+
+        // struct[3] = { a: 21, b: null }
+        builder
+            .field_builder::<Int32Builder>(0)
+            .unwrap()
+            .append_option(Some(21))
+            .unwrap();
+        builder
+            .field_builder::<BooleanBuilder>(1)
+            .unwrap()
+            .append_option(None)
+            .unwrap();
+        builder.append(true).unwrap();
+
+        // struct[4] = { a: 18, b: false }
+        builder
+            .field_builder::<Int32Builder>(0)
+            .unwrap()
+            .append_option(Some(18))
+            .unwrap();
+        builder
+            .field_builder::<BooleanBuilder>(1)
+            .unwrap()
+            .append_option(Some(false))
+            .unwrap();
+        builder.append(true).unwrap();
+
+        let struct_array = builder.finish();
+        let struct_array_slice = struct_array.slice(1, 3);
+        let struct_array_data = struct_array_slice.data();
+
+        let cloned_data = ArrayData::try_new(
+            struct_array_slice.data_type().clone(),
+            struct_array_slice.len(),
+            None, // force new to compute the number of null bits
+            struct_array_data.null_buffer().cloned(),
+            struct_array_slice.offset(),
+            struct_array_data.buffers().to_vec(),
+            struct_array_data.child_data().to_vec(),
+        )
+        .unwrap();
+        let cloned = crate::array::make_array(cloned_data);
+
+        assert_eq!(&struct_array_slice, &cloned);
+    }
 }