You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by al...@apache.org on 2020/12/06 11:51:04 UTC

[arrow] branch master updated: ARROW-10823: [Rust] Fixed error in MutableArrayData

This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 1727b10  ARROW-10823: [Rust] Fixed error in MutableArrayData
1727b10 is described below

commit 1727b102a5b9dcba60feb58a005bb389dfdbe2a9
Author: Jorge C. Leitao <jo...@gmail.com>
AuthorDate: Sun Dec 6 06:49:43 2020 -0500

    ARROW-10823: [Rust] Fixed error in MutableArrayData
    
    This fixes an error on `MutableArrayData` on which null bits were not being set when an array had no nulls, but other arrays had nulls, causing a semantic error in the final array.
    
    Closes #8848 from jorgecarleitao/fix_error
    
    Authored-by: Jorge C. Leitao <jo...@gmail.com>
    Signed-off-by: Andrew Lamb <an...@nerdnetworks.org>
---
 rust/arrow/src/array/transform/mod.rs | 28 +++++++++++++++++++++++++++-
 1 file changed, 27 insertions(+), 1 deletion(-)

diff --git a/rust/arrow/src/array/transform/mod.rs b/rust/arrow/src/array/transform/mod.rs
index 9c4149e..074d6ac 100644
--- a/rust/arrow/src/array/transform/mod.rs
+++ b/rust/arrow/src/array/transform/mod.rs
@@ -285,10 +285,16 @@ impl<'a> MutableArrayData<'a> {
     /// `use_nulls` is a flag used to optimize insertions. It should be `false` if the only source of nulls
     /// are the arrays themselves and `true` if the user plans to call [MutableArrayData::extend_nulls].
     /// In other words, if `use_nulls` is `false`, calling [MutableArrayData::extend_nulls] should not be used.
-    pub fn new(arrays: Vec<&'a ArrayData>, use_nulls: bool, capacity: usize) -> Self {
+    pub fn new(arrays: Vec<&'a ArrayData>, mut use_nulls: bool, capacity: usize) -> Self {
         let data_type = arrays[0].data_type();
         use crate::datatypes::*;
 
+        // if any of the arrays has nulls, insertions from any array requires setting bits
+        // as there is at least one array with nulls.
+        if arrays.iter().any(|array| array.null_count() > 0) {
+            use_nulls = true;
+        };
+
         let buffers = match &data_type {
             DataType::Boolean => {
                 let bytes = bit_util::ceil(capacity, 8);
@@ -615,6 +621,26 @@ mod tests {
     }
 
     #[test]
+    fn test_multiple_with_nulls() {
+        let array1 = StringArray::from(vec!["hello", "world"]).data();
+        let array2 = StringArray::from(vec![Some("1"), None]).data();
+
+        let arrays = vec![array1.as_ref(), array2.as_ref()];
+
+        let mut mutable = MutableArrayData::new(arrays, false, 5);
+
+        mutable.extend(0, 0, 2);
+        mutable.extend(1, 0, 2);
+
+        let result = mutable.freeze();
+        let result = StringArray::from(Arc::new(result));
+
+        let expected =
+            StringArray::from(vec![Some("hello"), Some("world"), Some("1"), None]);
+        assert_eq!(result, expected);
+    }
+
+    #[test]
     fn test_string_null_offset_nulls() {
         let array =
             StringArray::from(vec![Some("a"), Some("bc"), None, Some("defh")]).data();