You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by jo...@apache.org on 2021/01/30 05:22:06 UTC
[arrow] branch master updated: ARROW-11239: [Rust] Fixed equality
with offsets and nulls
This is an automated email from the ASF dual-hosted git repository.
jorgecarleitao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 6357981 ARROW-11239: [Rust] Fixed equality with offsets and nulls
6357981 is described below
commit 6357981c41ad7eb7d6654f3cf9f97df969e21707
Author: Jorge C. Leitao <jo...@gmail.com>
AuthorDate: Sat Jan 30 06:20:42 2021 +0100
ARROW-11239: [Rust] Fixed equality with offsets and nulls
Fix the equality operator of all types with offsets and nulls.
Big kudos to @mqy for identifying and reducing its scope for variable-sized, and @alamb for identifying a second error.
Closes #9211 from jorgecarleitao/fix_equality_with_offsets
Authored-by: Jorge C. Leitao <jo...@gmail.com>
Signed-off-by: Jorge C. Leitao <jo...@gmail.com>
---
rust/arrow/src/array/equal/boolean.rs | 11 ++---
rust/arrow/src/array/equal/decimal.rs | 2 +-
rust/arrow/src/array/equal/dictionary.rs | 4 +-
rust/arrow/src/array/equal/fixed_binary.rs | 4 +-
rust/arrow/src/array/equal/fixed_list.rs | 4 +-
rust/arrow/src/array/equal/list.rs | 6 +--
rust/arrow/src/array/equal/mod.rs | 62 +++++++++++++++++++++++------
rust/arrow/src/array/equal/primitive.rs | 4 +-
rust/arrow/src/array/equal/structure.rs | 4 +-
rust/arrow/src/array/equal/variable_size.rs | 10 ++---
10 files changed, 73 insertions(+), 38 deletions(-)
diff --git a/rust/arrow/src/array/equal/boolean.rs b/rust/arrow/src/array/equal/boolean.rs
index e777d4d..35c9786 100644
--- a/rust/arrow/src/array/equal/boolean.rs
+++ b/rust/arrow/src/array/equal/boolean.rs
@@ -76,6 +76,9 @@ pub(super) fn boolean_equal(
let lhs_null_bytes = lhs_nulls.as_ref().unwrap().as_slice();
let rhs_null_bytes = rhs_nulls.as_ref().unwrap().as_slice();
+ let lhs_start = lhs.offset() + lhs_start;
+ let rhs_start = rhs.offset() + rhs_start;
+
(0..len).all(|i| {
let lhs_pos = lhs_start + i;
let rhs_pos = rhs_start + i;
@@ -84,13 +87,7 @@ pub(super) fn boolean_equal(
lhs_is_null
|| (lhs_is_null == rhs_is_null)
- && equal_bits(
- lhs_values,
- rhs_values,
- lhs_pos + lhs.offset(),
- rhs_pos + rhs.offset(),
- 1,
- )
+ && equal_bits(lhs_values, rhs_values, lhs_pos, rhs_pos, 1)
})
}
}
diff --git a/rust/arrow/src/array/equal/decimal.rs b/rust/arrow/src/array/equal/decimal.rs
index 0924835..1ee6ec9 100644
--- a/rust/arrow/src/array/equal/decimal.rs
+++ b/rust/arrow/src/array/equal/decimal.rs
@@ -60,7 +60,7 @@ pub(super) fn decimal_equal(
let rhs_pos = rhs_start + i;
let lhs_is_null = !get_bit(lhs_null_bytes, lhs_pos + lhs.offset());
- let rhs_is_null = !get_bit(rhs_null_bytes, rhs_pos + lhs.offset());
+ let rhs_is_null = !get_bit(rhs_null_bytes, rhs_pos + rhs.offset());
lhs_is_null
|| (lhs_is_null == rhs_is_null)
diff --git a/rust/arrow/src/array/equal/dictionary.rs b/rust/arrow/src/array/equal/dictionary.rs
index 4f4c148..81eedce 100644
--- a/rust/arrow/src/array/equal/dictionary.rs
+++ b/rust/arrow/src/array/equal/dictionary.rs
@@ -63,8 +63,8 @@ pub(super) fn dictionary_equal<T: ArrowNativeType>(
let lhs_pos = lhs_start + i;
let rhs_pos = rhs_start + i;
- let lhs_is_null = !get_bit(lhs_null_bytes, lhs_pos);
- let rhs_is_null = !get_bit(rhs_null_bytes, rhs_pos);
+ let lhs_is_null = !get_bit(lhs_null_bytes, lhs_pos + lhs.offset());
+ let rhs_is_null = !get_bit(rhs_null_bytes, rhs_pos + rhs.offset());
lhs_is_null
|| (lhs_is_null == rhs_is_null)
diff --git a/rust/arrow/src/array/equal/fixed_binary.rs b/rust/arrow/src/array/equal/fixed_binary.rs
index 5715831..5f8f932 100644
--- a/rust/arrow/src/array/equal/fixed_binary.rs
+++ b/rust/arrow/src/array/equal/fixed_binary.rs
@@ -59,8 +59,8 @@ pub(super) fn fixed_binary_equal(
let lhs_pos = lhs_start + i;
let rhs_pos = rhs_start + i;
- let lhs_is_null = !get_bit(lhs_null_bytes, lhs_pos);
- let rhs_is_null = !get_bit(rhs_null_bytes, rhs_pos);
+ let lhs_is_null = !get_bit(lhs_null_bytes, lhs_pos + lhs.offset());
+ let rhs_is_null = !get_bit(rhs_null_bytes, rhs_pos + rhs.offset());
lhs_is_null
|| (lhs_is_null == rhs_is_null)
diff --git a/rust/arrow/src/array/equal/fixed_list.rs b/rust/arrow/src/array/equal/fixed_list.rs
index f5065bb..a107285 100644
--- a/rust/arrow/src/array/equal/fixed_list.rs
+++ b/rust/arrow/src/array/equal/fixed_list.rs
@@ -61,8 +61,8 @@ pub(super) fn fixed_list_equal(
let lhs_pos = lhs_start + i;
let rhs_pos = rhs_start + i;
- let lhs_is_null = !get_bit(lhs_null_bytes, lhs_pos);
- let rhs_is_null = !get_bit(rhs_null_bytes, rhs_pos);
+ let lhs_is_null = !get_bit(lhs_null_bytes, lhs_pos + lhs.offset());
+ let rhs_is_null = !get_bit(rhs_null_bytes, rhs_pos + rhs.offset());
lhs_is_null
|| (lhs_is_null == rhs_is_null)
diff --git a/rust/arrow/src/array/equal/list.rs b/rust/arrow/src/array/equal/list.rs
index a7a6bd3..7acd477 100644
--- a/rust/arrow/src/array/equal/list.rs
+++ b/rust/arrow/src/array/equal/list.rs
@@ -144,15 +144,15 @@ pub(super) fn list_equal<T: OffsetSizeTrait>(
)
} else {
// get a ref of the parent null buffer bytes, to use in testing for nullness
- let lhs_null_bytes = rhs_nulls.unwrap().as_slice();
+ let lhs_null_bytes = lhs_nulls.unwrap().as_slice();
let rhs_null_bytes = rhs_nulls.unwrap().as_slice();
// with nulls, we need to compare item by item whenever it is not null
(0..len).all(|i| {
let lhs_pos = lhs_start + i;
let rhs_pos = rhs_start + i;
- let lhs_is_null = !get_bit(lhs_null_bytes, lhs_pos);
- let rhs_is_null = !get_bit(rhs_null_bytes, rhs_pos);
+ let lhs_is_null = !get_bit(lhs_null_bytes, lhs_pos + lhs.offset());
+ let rhs_is_null = !get_bit(rhs_null_bytes, rhs_pos + rhs.offset());
lhs_is_null
|| (lhs_is_null == rhs_is_null)
diff --git a/rust/arrow/src/array/equal/mod.rs b/rust/arrow/src/array/equal/mod.rs
index e2ee9bc..67dde55 100644
--- a/rust/arrow/src/array/equal/mod.rs
+++ b/rust/arrow/src/array/equal/mod.rs
@@ -329,9 +329,10 @@ mod tests {
let b = BooleanArray::from(vec![false, false, false]).data();
test_equal(a.as_ref(), b.as_ref(), false);
+ }
- // Test the case where null_count > 0
-
+ #[test]
+ fn test_boolean_equal_null() {
let a = BooleanArray::from(vec![Some(false), None, None, Some(true)]).data();
let b = BooleanArray::from(vec![Some(false), None, None, Some(true)]).data();
test_equal(a.as_ref(), b.as_ref(), true);
@@ -341,23 +342,25 @@ mod tests {
let b = BooleanArray::from(vec![Some(true), None, None, Some(true)]).data();
test_equal(a.as_ref(), b.as_ref(), false);
+ }
- // Test the case where offset != 0
-
+ #[test]
+ fn test_boolean_equal_offset() {
let a =
BooleanArray::from(vec![false, true, false, true, false, false, true]).data();
let b =
- BooleanArray::from(vec![false, false, false, true, false, true, true]).data();
+ BooleanArray::from(vec![true, false, false, false, true, false, true, true])
+ .data();
assert_eq!(equal(a.as_ref(), b.as_ref()), false);
assert_eq!(equal(b.as_ref(), a.as_ref()), false);
let a_slice = a.slice(2, 3);
- let b_slice = b.slice(2, 3);
+ let b_slice = b.slice(3, 3);
assert_eq!(equal(&a_slice, &b_slice), true);
assert_eq!(equal(&b_slice, &a_slice), true);
let a_slice = a.slice(3, 4);
- let b_slice = b.slice(3, 4);
+ let b_slice = b.slice(4, 4);
assert_eq!(equal(&a_slice, &b_slice), false);
assert_eq!(equal(&b_slice, &a_slice), false);
@@ -437,6 +440,20 @@ mod tests {
(2, 1),
true,
),
+ (
+ vec![None, Some(2), None],
+ (1, 1),
+ vec![None, None, Some(2)],
+ (2, 1),
+ true,
+ ),
+ (
+ vec![Some(1), None, Some(2), None, Some(3)],
+ (2, 2),
+ vec![None, Some(2), None, Some(3)],
+ (1, 2),
+ true,
+ ),
];
for (lhs, slice_lhs, rhs, slice_rhs, expected) in cases {
@@ -542,6 +559,26 @@ mod tests {
}
#[test]
+ fn test_string_offset() {
+ let a = StringArray::from(vec![Some("a"), None, Some("b")]).data();
+ let a = a.slice(2, 1);
+ let b = StringArray::from(vec![Some("b")]).data();
+
+ test_equal(&a, b.as_ref(), true);
+ }
+
+ #[test]
+ fn test_string_offset_larger() {
+ let a =
+ StringArray::from(vec![Some("a"), None, Some("b"), None, Some("c")]).data();
+ let b = StringArray::from(vec![None, Some("b"), None, Some("c")]).data();
+
+ test_equal(&a.slice(2, 2), &b.slice(0, 2), false);
+ test_equal(&a.slice(2, 2), &b.slice(1, 2), true);
+ test_equal(&a.slice(2, 2), &b.slice(2, 2), false);
+ }
+
+ #[test]
fn test_null() {
let a = NullArray::new(2).data();
let b = NullArray::new(2).data();
@@ -781,6 +818,7 @@ mod tests {
None,
]);
let b = create_decimal_array(&[
+ None,
Some(8_887_000_000),
None,
None,
@@ -790,23 +828,23 @@ mod tests {
]);
let a_slice = a.slice(0, 3);
- let b_slice = b.slice(0, 3);
+ let b_slice = b.slice(1, 3);
test_equal(&a_slice, &b_slice, true);
let a_slice = a.slice(0, 5);
- let b_slice = b.slice(0, 5);
+ let b_slice = b.slice(1, 5);
test_equal(&a_slice, &b_slice, false);
let a_slice = a.slice(4, 1);
- let b_slice = b.slice(4, 1);
+ let b_slice = b.slice(5, 1);
test_equal(&a_slice, &b_slice, true);
let a_slice = a.slice(3, 3);
- let b_slice = b.slice(3, 3);
+ let b_slice = b.slice(4, 3);
test_equal(&a_slice, &b_slice, false);
let a_slice = a.slice(1, 3);
- let b_slice = b.slice(1, 3);
+ let b_slice = b.slice(2, 3);
test_equal(&a_slice, &b_slice, false);
let b = create_decimal_array(&[
diff --git a/rust/arrow/src/array/equal/primitive.rs b/rust/arrow/src/array/equal/primitive.rs
index ff061d1..db75879 100644
--- a/rust/arrow/src/array/equal/primitive.rs
+++ b/rust/arrow/src/array/equal/primitive.rs
@@ -56,8 +56,8 @@ pub(super) fn primitive_equal<T>(
(0..len).all(|i| {
let lhs_pos = lhs_start + i;
let rhs_pos = rhs_start + i;
- let lhs_is_null = !get_bit(lhs_null_bytes, lhs_pos);
- let rhs_is_null = !get_bit(rhs_null_bytes, rhs_pos);
+ let lhs_is_null = !get_bit(lhs_null_bytes, lhs_pos + lhs.offset());
+ let rhs_is_null = !get_bit(rhs_null_bytes, rhs_pos + rhs.offset());
lhs_is_null
|| (lhs_is_null == rhs_is_null)
diff --git a/rust/arrow/src/array/equal/structure.rs b/rust/arrow/src/array/equal/structure.rs
index 6ec7183..b3cc402 100644
--- a/rust/arrow/src/array/equal/structure.rs
+++ b/rust/arrow/src/array/equal/structure.rs
@@ -79,8 +79,8 @@ pub(super) fn struct_equal(
let lhs_pos = lhs_start + i;
let rhs_pos = rhs_start + i;
// if both struct and child had no null buffers,
- let lhs_is_null = !get_bit(lhs_null_bytes, lhs_pos);
- let rhs_is_null = !get_bit(rhs_null_bytes, rhs_pos);
+ let lhs_is_null = !get_bit(lhs_null_bytes, lhs_pos + lhs.offset());
+ let rhs_is_null = !get_bit(rhs_null_bytes, rhs_pos + rhs.offset());
lhs_is_null
|| (lhs_is_null == rhs_is_null)
diff --git a/rust/arrow/src/array/equal/variable_size.rs b/rust/arrow/src/array/equal/variable_size.rs
index 94fdf6b..ecb3bc2 100644
--- a/rust/arrow/src/array/equal/variable_size.rs
+++ b/rust/arrow/src/array/equal/variable_size.rs
@@ -60,9 +60,9 @@ pub(super) fn variable_sized_equal<T: OffsetSizeTrait>(
let lhs_offsets = lhs.buffer::<T>(0);
let rhs_offsets = rhs.buffer::<T>(0);
- // these are bytes, and thus the offset does not need to be multiplied
- let lhs_values = &lhs.buffers()[1].as_slice()[lhs.offset()..];
- let rhs_values = &rhs.buffers()[1].as_slice()[rhs.offset()..];
+ // the offsets of the `ArrayData` are ignored as they are only applied to the offset buffer.
+ let lhs_values = lhs.buffers()[1].as_slice();
+ let rhs_values = rhs.buffers()[1].as_slice();
let lhs_null_count = count_nulls(lhs_nulls, lhs_start, len);
let rhs_null_count = count_nulls(rhs_nulls, rhs_start, len);
@@ -88,10 +88,10 @@ pub(super) fn variable_sized_equal<T: OffsetSizeTrait>(
// the null bits can still be `None`, so we don't unwrap
let lhs_is_null = !lhs_nulls
- .map(|v| get_bit(v.as_slice(), lhs_pos))
+ .map(|v| get_bit(v.as_slice(), lhs.offset() + lhs_pos))
.unwrap_or(false);
let rhs_is_null = !rhs_nulls
- .map(|v| get_bit(v.as_slice(), rhs_pos))
+ .map(|v| get_bit(v.as_slice(), rhs.offset() + rhs_pos))
.unwrap_or(false);
lhs_is_null