You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by tu...@apache.org on 2022/11/09 10:11:07 UTC

[arrow-rs] branch master updated: Fix null_count computation in binary (#3062)

This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 6057cf7ea Fix null_count computation in binary (#3062)
6057cf7ea is described below

commit 6057cf7ead11d66c7eeb15aed965b632afff07c4
Author: Liang-Chi Hsieh <vi...@gmail.com>
AuthorDate: Wed Nov 9 02:11:00 2022 -0800

    Fix null_count computation in binary (#3062)
---
 arrow/src/compute/kernels/arithmetic.rs | 57 +++++++++++++++++++++++++++++++++
 arrow/src/compute/kernels/arity.rs      |  4 +--
 2 files changed, 59 insertions(+), 2 deletions(-)

diff --git a/arrow/src/compute/kernels/arithmetic.rs b/arrow/src/compute/kernels/arithmetic.rs
index d12a0c196..b310d4fbf 100644
--- a/arrow/src/compute/kernels/arithmetic.rs
+++ b/arrow/src/compute/kernels/arithmetic.rs
@@ -3044,4 +3044,61 @@ mod tests {
         let c = add(&a, &b).unwrap();
         assert_eq!(c, expected);
     }
+
+    #[test]
+    fn test_resize_builder() {
+        let mut null_buffer_builder = BooleanBufferBuilder::new(16);
+        null_buffer_builder.append_slice(&[
+            false, false, false, false, false, false, false, false, false, false, false,
+            false, false, true, true, true,
+        ]);
+        // `resize` resizes the buffer length to the ceil of byte numbers.
+        // So the underlying buffer is not changed.
+        null_buffer_builder.resize(13);
+        assert_eq!(null_buffer_builder.len(), 13);
+
+        let null_buffer = null_buffer_builder.finish();
+
+        // `count_set_bits` counts 1-bits in entire buffer. Because above `resize` doesn't
+        // actually truncate the buffer, `count_set_bits` still return 3.
+        assert_eq!(null_buffer.count_set_bits(), 3);
+        // `count_set_bits_offset` takes len in bits as parameter.
+        assert_eq!(null_buffer.count_set_bits_offset(0, 13), 0);
+
+        let mut data_buffer_builder = BufferBuilder::<i32>::new(13);
+        data_buffer_builder.append_slice(&[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]);
+        let data_buffer = data_buffer_builder.finish();
+
+        let arg1: Int32Array = ArrayDataBuilder::new(DataType::Int32)
+            .len(13)
+            .null_count(13)
+            .buffers(vec![data_buffer])
+            .null_bit_buffer(Some(null_buffer))
+            .build()
+            .unwrap()
+            .into();
+
+        assert_eq!(arg1.null_count(), 13);
+
+        let mut data_buffer_builder = BufferBuilder::<i32>::new(13);
+        data_buffer_builder.append_slice(&[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]);
+        let data_buffer = data_buffer_builder.finish();
+
+        let arg2: Int32Array = ArrayDataBuilder::new(DataType::Int32)
+            .len(13)
+            .null_count(0)
+            .buffers(vec![data_buffer])
+            .null_bit_buffer(None)
+            .build()
+            .unwrap()
+            .into();
+
+        assert_eq!(arg2.null_count(), 0);
+
+        let result_dyn = add_dyn(&arg1, &arg2).unwrap();
+        let result = result_dyn.as_any().downcast_ref::<Int32Array>().unwrap();
+
+        assert_eq!(result.len(), 13);
+        assert_eq!(result.null_count(), 13);
+    }
 }
diff --git a/arrow/src/compute/kernels/arity.rs b/arrow/src/compute/kernels/arity.rs
index 11ae5a204..c99d2b727 100644
--- a/arrow/src/compute/kernels/arity.rs
+++ b/arrow/src/compute/kernels/arity.rs
@@ -191,7 +191,7 @@ where
     let null_buffer = combine_option_bitmap(&[a.data(), b.data()], len).unwrap();
     let null_count = null_buffer
         .as_ref()
-        .map(|x| len - x.count_set_bits())
+        .map(|x| len - x.count_set_bits_offset(0, len))
         .unwrap_or_default();
 
     let values = a.values().iter().zip(b.values()).map(|(l, r)| op(*l, *r));
@@ -241,7 +241,7 @@ where
 
         let null_count = null_buffer
             .as_ref()
-            .map(|x| len - x.count_set_bits())
+            .map(|x| len - x.count_set_bits_offset(0, len))
             .unwrap_or_default();
 
         let mut buffer = BufferBuilder::<O::Native>::new(len);