You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by al...@apache.org on 2021/11/12 11:49:11 UTC
[arrow-rs] branch active_release updated: Fix validation for
offsets of StructArrays (#942) (#946)
This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch active_release
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/active_release by this push:
new 4037933 Fix validation for offsets of StructArrays (#942) (#946)
4037933 is described below
commit 4037933e43cad9e4de027039ce14caa65f78300a
Author: Andrew Lamb <an...@nerdnetworks.org>
AuthorDate: Fri Nov 12 06:49:08 2021 -0500
Fix validation for offsets of StructArrays (#942) (#946)
* reproduce validation error
* Fix validation bug
Co-authored-by: Ben Chambers <bj...@gmail.com>
Co-authored-by: Ben Chambers <bj...@gmail.com>
---
arrow/src/array/data.rs | 108 +++++++++++++++++++++++++++++++++++++++++++++---
1 file changed, 103 insertions(+), 5 deletions(-)
diff --git a/arrow/src/array/data.rs b/arrow/src/array/data.rs
index 3ca14d8..40a8bee 100644
--- a/arrow/src/array/data.rs
+++ b/arrow/src/array/data.rs
@@ -790,12 +790,11 @@ impl ArrayData {
for (i, field) in fields.iter().enumerate() {
let field_data = self.get_valid_child_data(i, field.data_type())?;
- // C++ does this check, but it is not clear why
- // field_data checks only len, but self checks len+offset
- if field_data.len < (self.len + self.offset) {
+ // Ensure child field has sufficient size
+ if field_data.len < self.len {
return Err(ArrowError::InvalidArgumentError(format!(
"{} child array #{} for field {} has length smaller than expected for struct array ({} < {})",
- self.data_type, i, field.name(), field_data.len, self.len + self.offset
+ self.data_type, i, field.name(), field_data.len, self.len
)));
}
}
@@ -1114,7 +1113,9 @@ impl ArrayDataBuilder {
mod tests {
use super::*;
- use crate::array::{Array, Int32Array, StringArray};
+ use crate::array::{
+ Array, BooleanBuilder, Int32Array, Int32Builder, StringArray, StructBuilder,
+ };
use crate::buffer::Buffer;
use crate::datatypes::Field;
use crate::util::bit_util;
@@ -1640,4 +1641,101 @@ mod tests {
fn make_f32_buffer(n: usize) -> Buffer {
Buffer::from_slice_ref(&vec![42f32; n])
}
+
+ #[test]
+ fn test_try_new_sliced_struct() {
+ let mut builder = StructBuilder::new(
+ vec![
+ Field::new("a", DataType::Int32, true),
+ Field::new("b", DataType::Boolean, true),
+ ],
+ vec![
+ Box::new(Int32Builder::new(5)),
+ Box::new(BooleanBuilder::new(5)),
+ ],
+ );
+
+ // struct[0] = { a: 10, b: true }
+ builder
+ .field_builder::<Int32Builder>(0)
+ .unwrap()
+ .append_option(Some(10))
+ .unwrap();
+ builder
+ .field_builder::<BooleanBuilder>(1)
+ .unwrap()
+ .append_option(Some(true))
+ .unwrap();
+ builder.append(true).unwrap();
+
+ // struct[1] = null
+ builder
+ .field_builder::<Int32Builder>(0)
+ .unwrap()
+ .append_option(None)
+ .unwrap();
+ builder
+ .field_builder::<BooleanBuilder>(1)
+ .unwrap()
+ .append_option(None)
+ .unwrap();
+ builder.append(false).unwrap();
+
+ // struct[2] = { a: null, b: false }
+ builder
+ .field_builder::<Int32Builder>(0)
+ .unwrap()
+ .append_option(None)
+ .unwrap();
+ builder
+ .field_builder::<BooleanBuilder>(1)
+ .unwrap()
+ .append_option(Some(false))
+ .unwrap();
+ builder.append(true).unwrap();
+
+ // struct[3] = { a: 21, b: null }
+ builder
+ .field_builder::<Int32Builder>(0)
+ .unwrap()
+ .append_option(Some(21))
+ .unwrap();
+ builder
+ .field_builder::<BooleanBuilder>(1)
+ .unwrap()
+ .append_option(None)
+ .unwrap();
+ builder.append(true).unwrap();
+
+ // struct[4] = { a: 18, b: false }
+ builder
+ .field_builder::<Int32Builder>(0)
+ .unwrap()
+ .append_option(Some(18))
+ .unwrap();
+ builder
+ .field_builder::<BooleanBuilder>(1)
+ .unwrap()
+ .append_option(Some(false))
+ .unwrap();
+ builder.append(true).unwrap();
+
+ let struct_array = builder.finish();
+ let struct_array_slice = struct_array.slice(1, 3);
+ let struct_array_data = struct_array_slice.data();
+
+ let cloned_data = ArrayData::try_new(
+ struct_array_slice.data_type().clone(),
+ struct_array_slice.len(),
+ None, // force new to compute the number of null bits
+ struct_array_data.null_buffer().cloned(),
+ struct_array_slice.offset(),
+ struct_array_data.buffers().to_vec(),
+ struct_array_data.child_data().to_vec(),
+ )
+ .unwrap();
+ let cloned = crate::array::make_array(cloned_data);
+
+ assert_eq!(&struct_array_slice, &cloned);
+ }
}