You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by jo...@apache.org on 2021/01/30 05:22:06 UTC

[arrow] branch master updated: ARROW-11239: [Rust] Fixed equality with offsets and nulls

This is an automated email from the ASF dual-hosted git repository.

jorgecarleitao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 6357981  ARROW-11239: [Rust] Fixed equality with offsets and nulls
6357981 is described below

commit 6357981c41ad7eb7d6654f3cf9f97df969e21707
Author: Jorge C. Leitao <jo...@gmail.com>
AuthorDate: Sat Jan 30 06:20:42 2021 +0100

    ARROW-11239: [Rust] Fixed equality with offsets and nulls
    
    Fix the equality operator of all types with offsets and nulls.
    
    Big kudos to @mqy  for identifying and reducing its scope for variable-sized, and @alamb for identifying a second error.
    
    Closes #9211 from jorgecarleitao/fix_equality_with_offsets
    
    Authored-by: Jorge C. Leitao <jo...@gmail.com>
    Signed-off-by: Jorge C. Leitao <jo...@gmail.com>
---
 rust/arrow/src/array/equal/boolean.rs       | 11 ++---
 rust/arrow/src/array/equal/decimal.rs       |  2 +-
 rust/arrow/src/array/equal/dictionary.rs    |  4 +-
 rust/arrow/src/array/equal/fixed_binary.rs  |  4 +-
 rust/arrow/src/array/equal/fixed_list.rs    |  4 +-
 rust/arrow/src/array/equal/list.rs          |  6 +--
 rust/arrow/src/array/equal/mod.rs           | 62 +++++++++++++++++++++++------
 rust/arrow/src/array/equal/primitive.rs     |  4 +-
 rust/arrow/src/array/equal/structure.rs     |  4 +-
 rust/arrow/src/array/equal/variable_size.rs | 10 ++---
 10 files changed, 73 insertions(+), 38 deletions(-)

diff --git a/rust/arrow/src/array/equal/boolean.rs b/rust/arrow/src/array/equal/boolean.rs
index e777d4d..35c9786 100644
--- a/rust/arrow/src/array/equal/boolean.rs
+++ b/rust/arrow/src/array/equal/boolean.rs
@@ -76,6 +76,9 @@ pub(super) fn boolean_equal(
         let lhs_null_bytes = lhs_nulls.as_ref().unwrap().as_slice();
         let rhs_null_bytes = rhs_nulls.as_ref().unwrap().as_slice();
 
+        let lhs_start = lhs.offset() + lhs_start;
+        let rhs_start = rhs.offset() + rhs_start;
+
         (0..len).all(|i| {
             let lhs_pos = lhs_start + i;
             let rhs_pos = rhs_start + i;
@@ -84,13 +87,7 @@ pub(super) fn boolean_equal(
 
             lhs_is_null
                 || (lhs_is_null == rhs_is_null)
-                    && equal_bits(
-                        lhs_values,
-                        rhs_values,
-                        lhs_pos + lhs.offset(),
-                        rhs_pos + rhs.offset(),
-                        1,
-                    )
+                    && equal_bits(lhs_values, rhs_values, lhs_pos, rhs_pos, 1)
         })
     }
 }
diff --git a/rust/arrow/src/array/equal/decimal.rs b/rust/arrow/src/array/equal/decimal.rs
index 0924835..1ee6ec9 100644
--- a/rust/arrow/src/array/equal/decimal.rs
+++ b/rust/arrow/src/array/equal/decimal.rs
@@ -60,7 +60,7 @@ pub(super) fn decimal_equal(
             let rhs_pos = rhs_start + i;
 
             let lhs_is_null = !get_bit(lhs_null_bytes, lhs_pos + lhs.offset());
-            let rhs_is_null = !get_bit(rhs_null_bytes, rhs_pos + lhs.offset());
+            let rhs_is_null = !get_bit(rhs_null_bytes, rhs_pos + rhs.offset());
 
             lhs_is_null
                 || (lhs_is_null == rhs_is_null)
diff --git a/rust/arrow/src/array/equal/dictionary.rs b/rust/arrow/src/array/equal/dictionary.rs
index 4f4c148..81eedce 100644
--- a/rust/arrow/src/array/equal/dictionary.rs
+++ b/rust/arrow/src/array/equal/dictionary.rs
@@ -63,8 +63,8 @@ pub(super) fn dictionary_equal<T: ArrowNativeType>(
             let lhs_pos = lhs_start + i;
             let rhs_pos = rhs_start + i;
 
-            let lhs_is_null = !get_bit(lhs_null_bytes, lhs_pos);
-            let rhs_is_null = !get_bit(rhs_null_bytes, rhs_pos);
+            let lhs_is_null = !get_bit(lhs_null_bytes, lhs_pos + lhs.offset());
+            let rhs_is_null = !get_bit(rhs_null_bytes, rhs_pos + rhs.offset());
 
             lhs_is_null
                 || (lhs_is_null == rhs_is_null)
diff --git a/rust/arrow/src/array/equal/fixed_binary.rs b/rust/arrow/src/array/equal/fixed_binary.rs
index 5715831..5f8f932 100644
--- a/rust/arrow/src/array/equal/fixed_binary.rs
+++ b/rust/arrow/src/array/equal/fixed_binary.rs
@@ -59,8 +59,8 @@ pub(super) fn fixed_binary_equal(
             let lhs_pos = lhs_start + i;
             let rhs_pos = rhs_start + i;
 
-            let lhs_is_null = !get_bit(lhs_null_bytes, lhs_pos);
-            let rhs_is_null = !get_bit(rhs_null_bytes, rhs_pos);
+            let lhs_is_null = !get_bit(lhs_null_bytes, lhs_pos + lhs.offset());
+            let rhs_is_null = !get_bit(rhs_null_bytes, rhs_pos + rhs.offset());
 
             lhs_is_null
                 || (lhs_is_null == rhs_is_null)
diff --git a/rust/arrow/src/array/equal/fixed_list.rs b/rust/arrow/src/array/equal/fixed_list.rs
index f5065bb..a107285 100644
--- a/rust/arrow/src/array/equal/fixed_list.rs
+++ b/rust/arrow/src/array/equal/fixed_list.rs
@@ -61,8 +61,8 @@ pub(super) fn fixed_list_equal(
             let lhs_pos = lhs_start + i;
             let rhs_pos = rhs_start + i;
 
-            let lhs_is_null = !get_bit(lhs_null_bytes, lhs_pos);
-            let rhs_is_null = !get_bit(rhs_null_bytes, rhs_pos);
+            let lhs_is_null = !get_bit(lhs_null_bytes, lhs_pos + lhs.offset());
+            let rhs_is_null = !get_bit(rhs_null_bytes, rhs_pos + rhs.offset());
 
             lhs_is_null
                 || (lhs_is_null == rhs_is_null)
diff --git a/rust/arrow/src/array/equal/list.rs b/rust/arrow/src/array/equal/list.rs
index a7a6bd3..7acd477 100644
--- a/rust/arrow/src/array/equal/list.rs
+++ b/rust/arrow/src/array/equal/list.rs
@@ -144,15 +144,15 @@ pub(super) fn list_equal<T: OffsetSizeTrait>(
         )
     } else {
         // get a ref of the parent null buffer bytes, to use in testing for nullness
-        let lhs_null_bytes = rhs_nulls.unwrap().as_slice();
+        let lhs_null_bytes = lhs_nulls.unwrap().as_slice();
         let rhs_null_bytes = rhs_nulls.unwrap().as_slice();
         // with nulls, we need to compare item by item whenever it is not null
         (0..len).all(|i| {
             let lhs_pos = lhs_start + i;
             let rhs_pos = rhs_start + i;
 
-            let lhs_is_null = !get_bit(lhs_null_bytes, lhs_pos);
-            let rhs_is_null = !get_bit(rhs_null_bytes, rhs_pos);
+            let lhs_is_null = !get_bit(lhs_null_bytes, lhs_pos + lhs.offset());
+            let rhs_is_null = !get_bit(rhs_null_bytes, rhs_pos + rhs.offset());
 
             lhs_is_null
                 || (lhs_is_null == rhs_is_null)
diff --git a/rust/arrow/src/array/equal/mod.rs b/rust/arrow/src/array/equal/mod.rs
index e2ee9bc..67dde55 100644
--- a/rust/arrow/src/array/equal/mod.rs
+++ b/rust/arrow/src/array/equal/mod.rs
@@ -329,9 +329,10 @@ mod tests {
 
         let b = BooleanArray::from(vec![false, false, false]).data();
         test_equal(a.as_ref(), b.as_ref(), false);
+    }
 
-        // Test the case where null_count > 0
-
+    #[test]
+    fn test_boolean_equal_null() {
         let a = BooleanArray::from(vec![Some(false), None, None, Some(true)]).data();
         let b = BooleanArray::from(vec![Some(false), None, None, Some(true)]).data();
         test_equal(a.as_ref(), b.as_ref(), true);
@@ -341,23 +342,25 @@ mod tests {
 
         let b = BooleanArray::from(vec![Some(true), None, None, Some(true)]).data();
         test_equal(a.as_ref(), b.as_ref(), false);
+    }
 
-        // Test the case where offset != 0
-
+    #[test]
+    fn test_boolean_equal_offset() {
         let a =
             BooleanArray::from(vec![false, true, false, true, false, false, true]).data();
         let b =
-            BooleanArray::from(vec![false, false, false, true, false, true, true]).data();
+            BooleanArray::from(vec![true, false, false, false, true, false, true, true])
+                .data();
         assert_eq!(equal(a.as_ref(), b.as_ref()), false);
         assert_eq!(equal(b.as_ref(), a.as_ref()), false);
 
         let a_slice = a.slice(2, 3);
-        let b_slice = b.slice(2, 3);
+        let b_slice = b.slice(3, 3);
         assert_eq!(equal(&a_slice, &b_slice), true);
         assert_eq!(equal(&b_slice, &a_slice), true);
 
         let a_slice = a.slice(3, 4);
-        let b_slice = b.slice(3, 4);
+        let b_slice = b.slice(4, 4);
         assert_eq!(equal(&a_slice, &b_slice), false);
         assert_eq!(equal(&b_slice, &a_slice), false);
 
@@ -437,6 +440,20 @@ mod tests {
                 (2, 1),
                 true,
             ),
+            (
+                vec![None, Some(2), None],
+                (1, 1),
+                vec![None, None, Some(2)],
+                (2, 1),
+                true,
+            ),
+            (
+                vec![Some(1), None, Some(2), None, Some(3)],
+                (2, 2),
+                vec![None, Some(2), None, Some(3)],
+                (1, 2),
+                true,
+            ),
         ];
 
         for (lhs, slice_lhs, rhs, slice_rhs, expected) in cases {
@@ -542,6 +559,26 @@ mod tests {
     }
 
     #[test]
+    fn test_string_offset() {
+        let a = StringArray::from(vec![Some("a"), None, Some("b")]).data();
+        let a = a.slice(2, 1);
+        let b = StringArray::from(vec![Some("b")]).data();
+
+        test_equal(&a, b.as_ref(), true);
+    }
+
+    #[test]
+    fn test_string_offset_larger() {
+        let a =
+            StringArray::from(vec![Some("a"), None, Some("b"), None, Some("c")]).data();
+        let b = StringArray::from(vec![None, Some("b"), None, Some("c")]).data();
+
+        test_equal(&a.slice(2, 2), &b.slice(0, 2), false);
+        test_equal(&a.slice(2, 2), &b.slice(1, 2), true);
+        test_equal(&a.slice(2, 2), &b.slice(2, 2), false);
+    }
+
+    #[test]
     fn test_null() {
         let a = NullArray::new(2).data();
         let b = NullArray::new(2).data();
@@ -781,6 +818,7 @@ mod tests {
             None,
         ]);
         let b = create_decimal_array(&[
+            None,
             Some(8_887_000_000),
             None,
             None,
@@ -790,23 +828,23 @@ mod tests {
         ]);
 
         let a_slice = a.slice(0, 3);
-        let b_slice = b.slice(0, 3);
+        let b_slice = b.slice(1, 3);
         test_equal(&a_slice, &b_slice, true);
 
         let a_slice = a.slice(0, 5);
-        let b_slice = b.slice(0, 5);
+        let b_slice = b.slice(1, 5);
         test_equal(&a_slice, &b_slice, false);
 
         let a_slice = a.slice(4, 1);
-        let b_slice = b.slice(4, 1);
+        let b_slice = b.slice(5, 1);
         test_equal(&a_slice, &b_slice, true);
 
         let a_slice = a.slice(3, 3);
-        let b_slice = b.slice(3, 3);
+        let b_slice = b.slice(4, 3);
         test_equal(&a_slice, &b_slice, false);
 
         let a_slice = a.slice(1, 3);
-        let b_slice = b.slice(1, 3);
+        let b_slice = b.slice(2, 3);
         test_equal(&a_slice, &b_slice, false);
 
         let b = create_decimal_array(&[
diff --git a/rust/arrow/src/array/equal/primitive.rs b/rust/arrow/src/array/equal/primitive.rs
index ff061d1..db75879 100644
--- a/rust/arrow/src/array/equal/primitive.rs
+++ b/rust/arrow/src/array/equal/primitive.rs
@@ -56,8 +56,8 @@ pub(super) fn primitive_equal<T>(
         (0..len).all(|i| {
             let lhs_pos = lhs_start + i;
             let rhs_pos = rhs_start + i;
-            let lhs_is_null = !get_bit(lhs_null_bytes, lhs_pos);
-            let rhs_is_null = !get_bit(rhs_null_bytes, rhs_pos);
+            let lhs_is_null = !get_bit(lhs_null_bytes, lhs_pos + lhs.offset());
+            let rhs_is_null = !get_bit(rhs_null_bytes, rhs_pos + rhs.offset());
 
             lhs_is_null
                 || (lhs_is_null == rhs_is_null)
diff --git a/rust/arrow/src/array/equal/structure.rs b/rust/arrow/src/array/equal/structure.rs
index 6ec7183..b3cc402 100644
--- a/rust/arrow/src/array/equal/structure.rs
+++ b/rust/arrow/src/array/equal/structure.rs
@@ -79,8 +79,8 @@ pub(super) fn struct_equal(
             let lhs_pos = lhs_start + i;
             let rhs_pos = rhs_start + i;
             // if both struct and child had no null buffers,
-            let lhs_is_null = !get_bit(lhs_null_bytes, lhs_pos);
-            let rhs_is_null = !get_bit(rhs_null_bytes, rhs_pos);
+            let lhs_is_null = !get_bit(lhs_null_bytes, lhs_pos + lhs.offset());
+            let rhs_is_null = !get_bit(rhs_null_bytes, rhs_pos + rhs.offset());
 
             lhs_is_null
                 || (lhs_is_null == rhs_is_null)
diff --git a/rust/arrow/src/array/equal/variable_size.rs b/rust/arrow/src/array/equal/variable_size.rs
index 94fdf6b..ecb3bc2 100644
--- a/rust/arrow/src/array/equal/variable_size.rs
+++ b/rust/arrow/src/array/equal/variable_size.rs
@@ -60,9 +60,9 @@ pub(super) fn variable_sized_equal<T: OffsetSizeTrait>(
     let lhs_offsets = lhs.buffer::<T>(0);
     let rhs_offsets = rhs.buffer::<T>(0);
 
-    // these are bytes, and thus the offset does not need to be multiplied
-    let lhs_values = &lhs.buffers()[1].as_slice()[lhs.offset()..];
-    let rhs_values = &rhs.buffers()[1].as_slice()[rhs.offset()..];
+    // the offsets of the `ArrayData` are ignored as they are only applied to the offset buffer.
+    let lhs_values = lhs.buffers()[1].as_slice();
+    let rhs_values = rhs.buffers()[1].as_slice();
 
     let lhs_null_count = count_nulls(lhs_nulls, lhs_start, len);
     let rhs_null_count = count_nulls(rhs_nulls, rhs_start, len);
@@ -88,10 +88,10 @@ pub(super) fn variable_sized_equal<T: OffsetSizeTrait>(
 
             // the null bits can still be `None`, so we don't unwrap
             let lhs_is_null = !lhs_nulls
-                .map(|v| get_bit(v.as_slice(), lhs_pos))
+                .map(|v| get_bit(v.as_slice(), lhs.offset() + lhs_pos))
                 .unwrap_or(false);
             let rhs_is_null = !rhs_nulls
-                .map(|v| get_bit(v.as_slice(), rhs_pos))
+                .map(|v| get_bit(v.as_slice(), rhs.offset() + rhs_pos))
                 .unwrap_or(false);
 
             lhs_is_null