You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2022/04/21 07:01:36 UTC

[GitHub] [arrow-rs] viirya commented on a diff in pull request #1589: Fix Null Mask Handling in ArrayData And UnionArray

viirya commented on code in PR #1589:
URL: https://github.com/apache/arrow-rs/pull/1589#discussion_r854817015


##########
arrow/src/array/array_union.rs:
##########
@@ -522,29 +516,29 @@ mod tests {
             match i {
                 0 => {
                     let slot = slot.as_any().downcast_ref::<Int32Array>().unwrap();
-                    assert!(!union.is_null(i));
+                    assert!(!slot.is_null(0));

Review Comment:
   I think it is good to check null in the current slot, but does `union.is_null(index)` not work anymore? Can we check both?



##########
arrow/src/array/array_union.rs:
##########
@@ -886,4 +876,12 @@ mod tests {
             }
         }
     }
+
+    #[test]
+    fn test_type_check() {
+        let mut builder = UnionBuilder::new_sparse(2);
+        builder.append::<Float32Type>("a", 1.0).unwrap();
+        let err = builder.append::<Int32Type>("a", 1).unwrap_err().to_string();
+        assert!(err.contains("Attempt to write col \"a\" with type Int32 doesn't match existing type Float32"), "{}", err);

Review Comment:
   👍 



##########
arrow/src/array/builder.rs:
##########
@@ -1895,22 +1895,18 @@ struct FieldData {
     ///  The number of array slots represented by the buffer
     slots: usize,
     /// A builder for the bitmap if required (for Sparse Unions)
-    bitmap_builder: Option<BooleanBufferBuilder>,
+    bitmap_builder: BooleanBufferBuilder,

Review Comment:
   The comment seems out-of-date now?



##########
arrow/src/array/builder.rs:
##########
@@ -2182,16 +2152,10 @@ impl UnionBuilder {
                 .into();
             let arr_data_builder = ArrayDataBuilder::new(data_type.clone())
                 .add_buffer(buffer)
-                .len(slots);
+                .len(slots)
+                .null_bit_buffer(bitmap_builder.finish());
             //                .build();
-            let arr_data_ref = unsafe {
-                match bitmap_builder {
-                    Some(mut bb) => arr_data_builder
-                        .null_bit_buffer(bb.finish())
-                        .build_unchecked(),
-                    None => arr_data_builder.build_unchecked(),
-                }

Review Comment:
   more clear now.



##########
arrow/src/array/equal/boolean.rs:
##########
@@ -16,25 +16,22 @@
 // under the License.
 
 use crate::array::{data::count_nulls, ArrayData};
-use crate::buffer::Buffer;
 use crate::util::bit_util::get_bit;
 
 use super::utils::{equal_bits, equal_len};
 
 pub(super) fn boolean_equal(
     lhs: &ArrayData,
     rhs: &ArrayData,
-    lhs_nulls: Option<&Buffer>,
-    rhs_nulls: Option<&Buffer>,
     mut lhs_start: usize,
     mut rhs_start: usize,
     mut len: usize,
 ) -> bool {
     let lhs_values = lhs.buffers()[0].as_slice();
     let rhs_values = rhs.buffers()[0].as_slice();
 
-    let lhs_null_count = count_nulls(lhs_nulls, lhs_start, len);
-    let rhs_null_count = count_nulls(rhs_nulls, rhs_start, len);
+    let lhs_null_count = count_nulls(lhs.null_buffer(), lhs_start + lhs.offset(), len);
+    let rhs_null_count = count_nulls(rhs.null_buffer(), rhs_start + rhs.offset(), len);

Review Comment:
   Hmm, do we assume `offset` is zero previously?



##########
arrow/src/array/equal/mod.rs:
##########
@@ -282,8 +282,7 @@ fn equal_range(
     rhs_start: usize,
     len: usize,
 ) -> bool {
-    utils::base_equal(lhs, rhs)

Review Comment:
   Actually `equal` already does `utils::base_equal`. I guess it is okay to remove it from here, although I'm not pretty sure.



##########
arrow/src/array/builder.rs:
##########
@@ -2182,16 +2152,10 @@ impl UnionBuilder {
                 .into();
             let arr_data_builder = ArrayDataBuilder::new(data_type.clone())
                 .add_buffer(buffer)
-                .len(slots);
+                .len(slots)
+                .null_bit_buffer(bitmap_builder.finish());
             //                .build();

Review Comment:
   `// .build();` seems redundant and can be removed?



##########
arrow/src/array/data.rs:
##########
@@ -1308,21 +1320,28 @@ fn layout(data_type: &DataType) -> DataTypeLayout {
 struct DataTypeLayout {
     /// A vector of buffer layout specifications, one for each expected buffer
     pub buffers: Vec<BufferSpec>,
+
+    /// Can contain a null bitmask
+    pub can_contain_null_mask: bool,

Review Comment:
   Although the C++ structure definition doesn't have this, I think it is fine to have it here.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org