You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ko...@apache.org on 2019/07/04 05:51:54 UTC
[arrow] 15/38: ARROW-5358: [Rust] Implement equality check for
ArrayData and Array
This is an automated email from the ASF dual-hosted git repository.
kou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
commit 75d4f4257658bcea233dff28ae9bba2506fc7854
Author: Chao Sun <su...@apache.org>
AuthorDate: Mon Jul 1 13:00:54 2019 -0700
ARROW-5358: [Rust] Implement equality check for ArrayData and Array
This implements equality comparison for `Array` type which checks whether two arrays are identical in content.
Besides the above, this adds two traits: `PrimitiveArrayOps` and `ListArrayOps`. The former exposes a few common operations between numeric arrays and boolean array, while the latter between list and binary arrays.
Author: Chao Sun <su...@apache.org>
Closes #4643 from sunchao/ARROW-5358 and squashes the following commits:
e40241b1 <Chao Sun> Fixes after rebasing
9a11efa4 <Chao Sun> Fix a bug in test
53ac33e6 <Chao Sun> Address comments
8663124c <Chao Sun> Replace expect with unwrap
d3ffb27f <Chao Sun> ARROW-5358: Implement equality check for ArrayData and Array
---
rust/arrow/src/array/array.rs | 95 +++++-
rust/arrow/src/array/equal.rs | 741 ++++++++++++++++++++++++++++++++++++++++++
rust/arrow/src/array/mod.rs | 81 +++--
3 files changed, 879 insertions(+), 38 deletions(-)
diff --git a/rust/arrow/src/array/array.rs b/rust/arrow/src/array/array.rs
index f4af117..2c353d5 100644
--- a/rust/arrow/src/array/array.rs
+++ b/rust/arrow/src/array/array.rs
@@ -16,7 +16,7 @@
// under the License.
use std::any::Any;
-use std::convert::From;
+use std::convert::{From, TryFrom};
use std::fmt;
use std::io::Write;
use std::mem;
@@ -27,6 +27,7 @@ use chrono::prelude::*;
use super::*;
use crate::buffer::{Buffer, MutableBuffer};
use crate::datatypes::*;
+use crate::error::{ArrowError, Result};
use crate::memory;
use crate::util::bit_util;
@@ -41,7 +42,7 @@ const NANOSECONDS: i64 = 1_000_000_000;
/// Trait for dealing with different types of array at runtime when the type of the
/// array is not known in advance
-pub trait Array: Send + Sync {
+pub trait Array: Send + Sync + ArrayEqual {
/// Returns the array as `Any` so that it can be downcast to a specific implementation
fn as_any(&self) -> &Any;
@@ -194,6 +195,45 @@ pub struct PrimitiveArray<T: ArrowPrimitiveType> {
raw_values: RawPtrBox<T::Native>,
}
+/// Common operations for primitive types, including numeric types and boolean type.
+pub trait PrimitiveArrayOps<T: ArrowPrimitiveType> {
+ fn values(&self) -> Buffer;
+ fn value(&self, i: usize) -> T::Native;
+}
+
+// This is necessary when caller wants to access `PrimitiveArrayOps`'s methods with
+// `ArrowPrimitiveType`. It doesn't have any implementation as the actual implementations
+// are delegated to that of `ArrowNumericType` and `BooleanType`.
+impl<T: ArrowPrimitiveType> PrimitiveArrayOps<T> for PrimitiveArray<T> {
+ default fn values(&self) -> Buffer {
+ unimplemented!()
+ }
+
+ default fn value(&self, _: usize) -> T::Native {
+ unimplemented!()
+ }
+}
+
+impl<T: ArrowNumericType> PrimitiveArrayOps<T> for PrimitiveArray<T> {
+ fn values(&self) -> Buffer {
+ self.values()
+ }
+
+ fn value(&self, i: usize) -> T::Native {
+ self.value(i)
+ }
+}
+
+impl PrimitiveArrayOps<BooleanType> for BooleanArray {
+ fn values(&self) -> Buffer {
+ self.values()
+ }
+
+ fn value(&self, i: usize) -> bool {
+ self.value(i)
+ }
+}
+
impl<T: ArrowPrimitiveType> Array for PrimitiveArray<T> {
fn as_any(&self) -> &Any {
self
@@ -271,7 +311,6 @@ where
///
/// If a data type cannot be converted to `NaiveDateTime`, a `None` is returned.
/// A valid value is expected, thus the user should first check for validity.
- /// TODO: extract constants into static variables
pub fn value_as_datetime(&self, i: usize) -> Option<NaiveDateTime> {
let v = i64::from(self.value(i));
match self.data_type() {
@@ -651,6 +690,23 @@ impl<T: ArrowPrimitiveType> From<ArrayDataRef> for PrimitiveArray<T> {
}
}
+/// Common operations for List types, currently `ListArray` and `BinaryArray`.
+pub trait ListArrayOps {
+ fn value_offset_at(&self, i: usize) -> i32;
+}
+
+impl ListArrayOps for ListArray {
+ fn value_offset_at(&self, i: usize) -> i32 {
+ self.value_offset_at(i)
+ }
+}
+
+impl ListArrayOps for BinaryArray {
+ fn value_offset_at(&self, i: usize) -> i32 {
+ self.value_offset_at(i)
+ }
+}
+
/// A list array where each element is a variable-sized sequence of values with the same
/// type.
pub struct ListArray {
@@ -784,6 +840,16 @@ impl BinaryArray {
self.value_offset_at(i + 1) - self.value_offset_at(i)
}
+ /// Returns a clone of the value offset buffer
+ pub fn value_offsets(&self) -> Buffer {
+ self.data.buffers()[0].clone()
+ }
+
+ /// Returns a clone of the value data buffer
+ pub fn value_data(&self) -> Buffer {
+ self.data.buffers()[1].clone()
+ }
+
#[inline]
fn value_offset_at(&self, i: usize) -> i32 {
unsafe { *self.value_offsets.get().offset(i as isize) }
@@ -831,7 +897,7 @@ impl<'a> From<Vec<&'a str>> for BinaryArray {
}
}
-impl<'a> From<Vec<&[u8]>> for BinaryArray {
+impl From<Vec<&[u8]>> for BinaryArray {
fn from(v: Vec<&[u8]>) -> Self {
let mut offsets = Vec::with_capacity(v.len() + 1);
let mut values = Vec::new();
@@ -851,6 +917,22 @@ impl<'a> From<Vec<&[u8]>> for BinaryArray {
}
}
+impl<'a> TryFrom<Vec<Option<&'a str>>> for BinaryArray {
+ type Error = ArrowError;
+
+ fn try_from(v: Vec<Option<&'a str>>) -> Result<Self> {
+ let mut builder = BinaryBuilder::new(v.len());
+ for val in v {
+ if let Some(s) = val {
+ builder.append_string(s)?;
+ } else {
+ builder.append(false)?;
+ }
+ }
+ Ok(builder.finish())
+ }
+}
+
/// Creates a `BinaryArray` from `List<u8>` array
impl From<ListArray> for BinaryArray {
fn from(v: ListArray) -> Self {
@@ -907,6 +989,11 @@ impl StructArray {
pub fn column(&self, pos: usize) -> &ArrayRef {
&self.boxed_fields[pos]
}
+
+ /// Return the number of fields in this struct array
+ pub fn num_columns(&self) -> usize {
+ self.boxed_fields.len()
+ }
}
impl From<ArrayDataRef> for StructArray {
diff --git a/rust/arrow/src/array/equal.rs b/rust/arrow/src/array/equal.rs
new file mode 100644
index 0000000..5f888ab
--- /dev/null
+++ b/rust/arrow/src/array/equal.rs
@@ -0,0 +1,741 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use super::*;
+use crate::datatypes::*;
+use crate::util::bit_util;
+
+/// Trait for `Array` equality.
+pub trait ArrayEqual {
+ /// Returns true if this array is equal to the `other` array
+ fn equals(&self, other: &dyn Array) -> bool;
+
+ /// Returns true if the range [start_idx, end_idx) is equal to
+ /// [other_start_idx, other_start_idx + end_idx - start_idx) in the `other` array
+ fn range_equals(
+ &self,
+ other: &dyn Array,
+ start_idx: usize,
+ end_idx: usize,
+ other_start_idx: usize,
+ ) -> bool;
+}
+
+impl<T: ArrowPrimitiveType> ArrayEqual for PrimitiveArray<T> {
+ default fn equals(&self, other: &dyn Array) -> bool {
+ if !base_equal(&self.data(), &other.data()) {
+ return false;
+ }
+
+ let value_buf = self.data_ref().buffers()[0].clone();
+ let other_value_buf = other.data_ref().buffers()[0].clone();
+ let byte_width = T::get_bit_width() / 8;
+
+ if self.null_count() > 0 {
+ let values = value_buf.data();
+ let other_values = other_value_buf.data();
+
+ for i in 0..self.len() {
+ if self.is_valid(i) {
+ let start = (i + self.offset()) * byte_width;
+ let data = &values[start..(start + byte_width)];
+ let other_start = (i + other.offset()) * byte_width;
+ let other_data =
+ &other_values[other_start..(other_start + byte_width)];
+ if data != other_data {
+ return false;
+ }
+ }
+ }
+ } else {
+ let start = self.offset() * byte_width;
+ let other_start = other.offset() * byte_width;
+ let len = self.len() * byte_width;
+ let data = &value_buf.data()[start..(start + len)];
+ let other_data = &other_value_buf.data()[other_start..(other_start + len)];
+ if data != other_data {
+ return false;
+ }
+ }
+
+ true
+ }
+
+ default fn range_equals(
+ &self,
+ other: &dyn Array,
+ start_idx: usize,
+ end_idx: usize,
+ other_start_idx: usize,
+ ) -> bool {
+ assert!(other_start_idx + (end_idx - start_idx) <= other.len());
+ let other = other.as_any().downcast_ref::<PrimitiveArray<T>>().unwrap();
+
+ let mut j = other_start_idx;
+ for i in start_idx..end_idx {
+ let is_null = self.is_null(i);
+ let other_is_null = other.is_null(j);
+ if is_null != other_is_null || (!is_null && self.value(i) != other.value(j)) {
+ return false;
+ }
+ j += 1;
+ }
+
+ true
+ }
+}
+
+impl ArrayEqual for BooleanArray {
+ fn equals(&self, other: &dyn Array) -> bool {
+ if !base_equal(&self.data(), &other.data()) {
+ return false;
+ }
+
+ let values = self.data_ref().buffers()[0].data();
+ let other_values = other.data_ref().buffers()[0].data();
+
+ // TODO: we can do this more efficiently if all values are not-null
+ for i in 0..self.len() {
+ if self.is_valid(i) {
+ if bit_util::get_bit(values, i + self.offset())
+ != bit_util::get_bit(other_values, i + other.offset())
+ {
+ return false;
+ }
+ }
+ }
+
+ true
+ }
+}
+
+impl<T: ArrowNumericType> PartialEq for PrimitiveArray<T> {
+ fn eq(&self, other: &PrimitiveArray<T>) -> bool {
+ self.equals(other)
+ }
+}
+
+impl ArrayEqual for ListArray {
+ fn equals(&self, other: &dyn Array) -> bool {
+ if !base_equal(&self.data(), &other.data()) {
+ return false;
+ }
+
+ let other = other.as_any().downcast_ref::<ListArray>().unwrap();
+
+ if !value_offset_equal(self, other) {
+ return false;
+ }
+
+ if !self.values().range_equals(
+ &*other.values(),
+ self.value_offset(0) as usize,
+ self.value_offset(self.len()) as usize,
+ other.value_offset(0) as usize,
+ ) {
+ return false;
+ }
+
+ true
+ }
+
+ fn range_equals(
+ &self,
+ other: &dyn Array,
+ start_idx: usize,
+ end_idx: usize,
+ other_start_idx: usize,
+ ) -> bool {
+ assert!(other_start_idx + (end_idx - start_idx) <= other.len());
+ let other = other.as_any().downcast_ref::<ListArray>().unwrap();
+
+ let mut j = other_start_idx;
+ for i in start_idx..end_idx {
+ let is_null = self.is_null(i);
+ let other_is_null = other.is_null(j);
+
+ if is_null != other_is_null {
+ return false;
+ }
+
+ if is_null {
+ continue;
+ }
+
+ let start_offset = self.value_offset(i) as usize;
+ let end_offset = self.value_offset(i + 1) as usize;
+ let other_start_offset = other.value_offset(j) as usize;
+ let other_end_offset = other.value_offset(j + 1) as usize;
+
+ if end_offset - start_offset != other_end_offset - other_start_offset {
+ return false;
+ }
+
+ if !self.values().range_equals(
+ &*other.values(),
+ start_offset,
+ end_offset,
+ other_start_offset,
+ ) {
+ return false;
+ }
+
+ j += 1;
+ }
+
+ true
+ }
+}
+
+impl ArrayEqual for BinaryArray {
+ fn equals(&self, other: &dyn Array) -> bool {
+ if !base_equal(&self.data(), &other.data()) {
+ return false;
+ }
+
+ let other = other.as_any().downcast_ref::<BinaryArray>().unwrap();
+
+ if !value_offset_equal(self, other) {
+ return false;
+ }
+
+ // TODO: handle null & length == 0 case?
+
+ let value_buf = self.value_data();
+ let other_value_buf = other.value_data();
+ let value_data = value_buf.data();
+ let other_value_data = other_value_buf.data();
+
+ if self.null_count() == 0 {
+ // No offset in both - just do memcmp
+ if self.offset() == 0 && other.offset() == 0 {
+ let len = self.value_offset(self.len()) as usize;
+ return value_data[..len] == other_value_data[..len];
+ } else {
+ let start = self.value_offset(0) as usize;
+ let other_start = other.value_offset(0) as usize;
+ let len = (self.value_offset(self.len()) - self.value_offset(0)) as usize;
+ return value_data[start..(start + len)]
+ == other_value_data[other_start..(other_start + len)];
+ }
+ } else {
+ for i in 0..self.len() {
+ if self.is_null(i) {
+ continue;
+ }
+
+ let start = self.value_offset(i) as usize;
+ let other_start = other.value_offset(i) as usize;
+ let len = self.value_length(i) as usize;
+ if value_data[start..(start + len)]
+ != other_value_data[other_start..(other_start + len)]
+ {
+ return false;
+ }
+ }
+ }
+
+ true
+ }
+
+ fn range_equals(
+ &self,
+ other: &dyn Array,
+ start_idx: usize,
+ end_idx: usize,
+ other_start_idx: usize,
+ ) -> bool {
+ assert!(other_start_idx + (end_idx - start_idx) <= other.len());
+ let other = other.as_any().downcast_ref::<BinaryArray>().unwrap();
+
+ let mut j = other_start_idx;
+ for i in start_idx..end_idx {
+ let is_null = self.is_null(i);
+ let other_is_null = other.is_null(j);
+
+ if is_null != other_is_null {
+ return false;
+ }
+
+ if is_null {
+ continue;
+ }
+
+ let start_offset = self.value_offset(i) as usize;
+ let end_offset = self.value_offset(i + 1) as usize;
+ let other_start_offset = other.value_offset(j) as usize;
+ let other_end_offset = other.value_offset(j + 1) as usize;
+
+ if end_offset - start_offset != other_end_offset - other_start_offset {
+ return false;
+ }
+
+ let value_buf = self.value_data();
+ let other_value_buf = other.value_data();
+ let value_data = value_buf.data();
+ let other_value_data = other_value_buf.data();
+
+ if end_offset - start_offset > 0 {
+ let len = end_offset - start_offset;
+ if value_data[start_offset..(start_offset + len)]
+ != other_value_data[other_start_offset..(other_start_offset + len)]
+ {
+ return false;
+ }
+ }
+
+ j += 1;
+ }
+
+ true
+ }
+}
+
+impl ArrayEqual for StructArray {
+ fn equals(&self, other: &dyn Array) -> bool {
+ if !base_equal(&self.data(), &other.data()) {
+ return false;
+ }
+
+ let other = other.as_any().downcast_ref::<StructArray>().unwrap();
+
+ for i in 0..self.len() {
+ let is_null = self.is_null(i);
+ let other_is_null = other.is_null(i);
+
+ if is_null != other_is_null {
+ return false;
+ }
+
+ if is_null {
+ continue;
+ }
+ for j in 0..self.num_columns() {
+ if !self.column(j).range_equals(&**other.column(j), i, i + 1, i) {
+ return false;
+ }
+ }
+ }
+
+ true
+ }
+
+ fn range_equals(
+ &self,
+ other: &dyn Array,
+ start_idx: usize,
+ end_idx: usize,
+ other_start_idx: usize,
+ ) -> bool {
+ assert!(other_start_idx + (end_idx - start_idx) <= other.len());
+ let other = other.as_any().downcast_ref::<StructArray>().unwrap();
+
+ let mut j = other_start_idx;
+ for i in start_idx..end_idx {
+ let is_null = self.is_null(i);
+ let other_is_null = other.is_null(i);
+
+ if is_null != other_is_null {
+ return false;
+ }
+
+ if is_null {
+ continue;
+ }
+ for k in 0..self.num_columns() {
+ if !self.column(k).range_equals(&**other.column(k), i, i + 1, j) {
+ return false;
+ }
+ }
+
+ j += 1;
+ }
+
+ true
+ }
+}
+
+// Compare if the common basic fields between the two arrays are equal
+fn base_equal(this: &ArrayDataRef, other: &ArrayDataRef) -> bool {
+ if this.data_type() != other.data_type() {
+ return false;
+ }
+ if this.len != other.len {
+ return false;
+ }
+ if this.null_count != other.null_count {
+ return false;
+ }
+ if this.null_count > 0 {
+ let null_bitmap = this.null_bitmap().as_ref().unwrap();
+ let other_null_bitmap = other.null_bitmap().as_ref().unwrap();
+ let null_buf = null_bitmap.bits.data();
+ let other_null_buf = other_null_bitmap.bits.data();
+ for i in 0..this.len() {
+ if bit_util::get_bit(null_buf, i + this.offset())
+ != bit_util::get_bit(other_null_buf, i + other.offset())
+ {
+ return false;
+ }
+ }
+ }
+ true
+}
+
+// Compare if the value offsets are equal between the two list arrays
+fn value_offset_equal<T: Array + ListArrayOps>(this: &T, other: &T) -> bool {
+ // Check if offsets differ
+ if this.offset() == 0 && other.offset() == 0 {
+ let offset_data = &this.data_ref().buffers()[0];
+ let other_offset_data = &other.data_ref().buffers()[0];
+ return offset_data.data()[0..((this.len() + 1) * 4)]
+ == other_offset_data.data()[0..((other.len() + 1) * 4)];
+ }
+
+ // The expensive case
+ for i in 0..this.len() + 1 {
+ if this.value_offset_at(i) - this.value_offset_at(0)
+ != other.value_offset_at(i) - other.value_offset_at(0)
+ {
+ return false;
+ }
+ }
+
+ true
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ use std::convert::TryFrom;
+
+ use crate::error::Result;
+
+ #[test]
+ fn test_primitive_equal() {
+ let a = Int32Array::from(vec![1, 2, 3]);
+ let b = Int32Array::from(vec![1, 2, 3]);
+ assert!(a.equals(&b));
+ assert!(b.equals(&a));
+
+ let b = Int32Array::from(vec![1, 2, 4]);
+ assert!(!a.equals(&b));
+ assert!(!b.equals(&a));
+
+ // Test the case where null_count > 0
+
+ let a = Int32Array::from(vec![Some(1), None, Some(2), Some(3)]);
+ let b = Int32Array::from(vec![Some(1), None, Some(2), Some(3)]);
+ assert!(a.equals(&b));
+ assert!(b.equals(&a));
+
+ let b = Int32Array::from(vec![Some(1), None, None, Some(3)]);
+ assert!(!a.equals(&b));
+ assert!(!b.equals(&a));
+
+ let b = Int32Array::from(vec![Some(1), None, Some(2), Some(4)]);
+ assert!(!a.equals(&b));
+ assert!(!b.equals(&a));
+
+ // Test the case where offset != 0
+
+ let a_slice = a.slice(1, 2);
+ let b_slice = b.slice(1, 2);
+ assert!(a_slice.equals(&*b_slice));
+ assert!(b_slice.equals(&*a_slice));
+ }
+
+ #[test]
+ fn test_boolean_equal() {
+ let a = BooleanArray::from(vec![false, false, true]);
+ let b = BooleanArray::from(vec![false, false, true]);
+ assert!(a.equals(&b));
+ assert!(b.equals(&a));
+
+ let b = BooleanArray::from(vec![false, false, false]);
+ assert!(!a.equals(&b));
+ assert!(!b.equals(&a));
+
+ // Test the case where null_count > 0
+
+ let a = BooleanArray::from(vec![Some(false), None, None, Some(true)]);
+ let b = BooleanArray::from(vec![Some(false), None, None, Some(true)]);
+ assert!(a.equals(&b));
+ assert!(b.equals(&a));
+
+ let b = BooleanArray::from(vec![None, None, None, Some(true)]);
+ assert!(!a.equals(&b));
+ assert!(!b.equals(&a));
+
+ let b = BooleanArray::from(vec![Some(true), None, None, Some(true)]);
+ assert!(!a.equals(&b));
+ assert!(!b.equals(&a));
+
+ // Test the case where offset != 0
+
+ let a = BooleanArray::from(vec![false, true, false, true, false, false, true]);
+ let b = BooleanArray::from(vec![false, false, false, true, false, true, true]);
+ assert!(!a.equals(&b));
+ assert!(!b.equals(&a));
+
+ let a_slice = a.slice(2, 3);
+ let b_slice = b.slice(2, 3);
+ assert!(a_slice.equals(&*b_slice));
+ assert!(b_slice.equals(&*a_slice));
+
+ let a_slice = a.slice(3, 4);
+ let b_slice = b.slice(3, 4);
+ assert!(!a_slice.equals(&*b_slice));
+ assert!(!b_slice.equals(&*a_slice));
+ }
+
+ #[test]
+ fn test_list_equal() {
+ let mut a_builder = ListBuilder::new(Int32Builder::new(10));
+ let mut b_builder = ListBuilder::new(Int32Builder::new(10));
+
+ let a = create_list_array(&mut a_builder, &[Some(&[1, 2, 3]), Some(&[4, 5, 6])])
+ .unwrap();
+ let b = create_list_array(&mut b_builder, &[Some(&[1, 2, 3]), Some(&[4, 5, 6])])
+ .unwrap();
+
+ assert!(a.equals(&b));
+ assert!(b.equals(&a));
+
+ let b = create_list_array(&mut a_builder, &[Some(&[1, 2, 3]), Some(&[4, 5, 7])])
+ .unwrap();
+ assert!(!a.equals(&b));
+ assert!(!b.equals(&a));
+
+ // Test the case where null_count > 0
+
+ let a = create_list_array(
+ &mut a_builder,
+ &[Some(&[1, 2]), None, None, Some(&[3, 4]), None, None],
+ )
+ .unwrap();
+ let b = create_list_array(
+ &mut a_builder,
+ &[Some(&[1, 2]), None, None, Some(&[3, 4]), None, None],
+ )
+ .unwrap();
+ assert!(a.equals(&b));
+ assert!(b.equals(&a));
+
+ let b = create_list_array(
+ &mut a_builder,
+ &[
+ Some(&[1, 2]),
+ None,
+ Some(&[5, 6]),
+ Some(&[3, 4]),
+ None,
+ None,
+ ],
+ )
+ .unwrap();
+ assert!(!a.equals(&b));
+ assert!(!b.equals(&a));
+
+ let b = create_list_array(
+ &mut a_builder,
+ &[Some(&[1, 2]), None, None, Some(&[3, 5]), None, None],
+ )
+ .unwrap();
+ assert!(!a.equals(&b));
+ assert!(!b.equals(&a));
+
+ // Test the case where offset != 0
+
+ let a_slice = a.slice(0, 3);
+ let b_slice = b.slice(0, 3);
+ assert!(a_slice.equals(&*b_slice));
+ assert!(b_slice.equals(&*a_slice));
+
+ let a_slice = a.slice(0, 5);
+ let b_slice = b.slice(0, 5);
+ assert!(!a_slice.equals(&*b_slice));
+ assert!(!b_slice.equals(&*a_slice));
+
+ let a_slice = a.slice(4, 1);
+ let b_slice = b.slice(4, 1);
+ assert!(a_slice.equals(&*b_slice));
+ assert!(b_slice.equals(&*a_slice));
+ }
+
+ #[test]
+ fn test_binary_equal() {
+ let a = BinaryArray::from(vec!["hello", "world"]);
+ let b = BinaryArray::from(vec!["hello", "world"]);
+ assert!(a.equals(&b));
+ assert!(b.equals(&a));
+
+ let b = BinaryArray::from(vec!["hello", "arrow"]);
+ assert!(!a.equals(&b));
+ assert!(!b.equals(&a));
+
+ // Test the case where null_count > 0
+
+ let a = BinaryArray::try_from(vec![
+ Some("hello"),
+ None,
+ None,
+ Some("world"),
+ None,
+ None,
+ ])
+ .unwrap();
+
+ let b = BinaryArray::try_from(vec![
+ Some("hello"),
+ None,
+ None,
+ Some("world"),
+ None,
+ None,
+ ])
+ .unwrap();
+ assert!(a.equals(&b));
+ assert!(b.equals(&a));
+
+ let b = BinaryArray::try_from(vec![
+ Some("hello"),
+ Some("foo"),
+ None,
+ Some("world"),
+ None,
+ None,
+ ])
+ .unwrap();
+ assert!(!a.equals(&b));
+ assert!(!b.equals(&a));
+
+ let b = BinaryArray::try_from(vec![
+ Some("hello"),
+ None,
+ None,
+ Some("arrow"),
+ None,
+ None,
+ ])
+ .unwrap();
+ assert!(!a.equals(&b));
+ assert!(!b.equals(&a));
+
+ // Test the case where offset != 0
+
+ let a_slice = a.slice(0, 3);
+ let b_slice = b.slice(0, 3);
+ assert!(a_slice.equals(&*b_slice));
+ assert!(b_slice.equals(&*a_slice));
+
+ let a_slice = a.slice(0, 5);
+ let b_slice = b.slice(0, 5);
+ assert!(!a_slice.equals(&*b_slice));
+ assert!(!b_slice.equals(&*a_slice));
+
+ let a_slice = a.slice(4, 1);
+ let b_slice = b.slice(4, 1);
+ assert!(a_slice.equals(&*b_slice));
+ assert!(b_slice.equals(&*a_slice));
+ }
+
+ #[test]
+ fn test_struct_equal() {
+ let string_builder = BinaryBuilder::new(5);
+ let int_builder = Int32Builder::new(5);
+
+ let mut fields = Vec::new();
+ let mut field_builders = Vec::new();
+ fields.push(Field::new("f1", DataType::Utf8, false));
+ field_builders.push(Box::new(string_builder) as Box<ArrayBuilder>);
+ fields.push(Field::new("f2", DataType::Int32, false));
+ field_builders.push(Box::new(int_builder) as Box<ArrayBuilder>);
+
+ let mut builder = StructBuilder::new(fields, field_builders);
+
+ let a = create_struct_array(
+ &mut builder,
+ &[Some("joe"), None, None, Some("mark"), Some("doe")],
+ &[Some(1), Some(2), None, Some(4), Some(5)],
+ &[true, true, false, true, true],
+ )
+ .unwrap();
+ let b = create_struct_array(
+ &mut builder,
+ &[Some("joe"), None, None, Some("mark"), Some("doe")],
+ &[Some(1), Some(2), None, Some(4), Some(5)],
+ &[true, true, false, true, true],
+ )
+ .unwrap();
+
+ assert!(a.equals(&b));
+ assert!(b.equals(&a));
+ }
+
+ fn create_list_array<'a, U: AsRef<[i32]>, T: AsRef<[Option<U>]>>(
+ builder: &'a mut ListBuilder<Int32Builder>,
+ data: T,
+ ) -> Result<ListArray> {
+ for d in data.as_ref() {
+ if let Some(v) = d {
+ builder.values().append_slice(v.as_ref())?;
+ builder.append(true)?
+ } else {
+ builder.append(false)?
+ }
+ }
+ Ok(builder.finish())
+ }
+
+ fn create_struct_array<
+ 'a,
+ T: AsRef<[Option<&'a str>]>,
+ U: AsRef<[Option<i32>]>,
+ V: AsRef<[bool]>,
+ >(
+ builder: &'a mut StructBuilder,
+ first: T,
+ second: U,
+ is_valid: V,
+ ) -> Result<StructArray> {
+ let string_builder = builder.field_builder::<BinaryBuilder>(0).unwrap();
+ for v in first.as_ref() {
+ if let Some(s) = v {
+ string_builder.append_string(s)?;
+ } else {
+ string_builder.append_null()?;
+ }
+ }
+
+ let int_builder = builder.field_builder::<Int32Builder>(1).unwrap();
+ for v in second.as_ref() {
+ if let Some(i) = v {
+ int_builder.append_value(*i)?;
+ } else {
+ int_builder.append_null()?;
+ }
+ }
+
+ for v in is_valid.as_ref() {
+ builder.append(*v)?
+ }
+
+ Ok(builder.finish())
+ }
+}
diff --git a/rust/arrow/src/array/mod.rs b/rust/arrow/src/array/mod.rs
index aa14f0f..47e4219 100644
--- a/rust/arrow/src/array/mod.rs
+++ b/rust/arrow/src/array/mod.rs
@@ -57,6 +57,11 @@
mod array;
mod builder;
mod data;
+mod equal;
+
+use crate::datatypes::*;
+
+// --------------------- Array & ArrayData ---------------------
pub use self::array::Array;
pub use self::array::ArrayRef;
@@ -64,7 +69,41 @@ pub use self::data::ArrayData;
pub use self::data::ArrayDataBuilder;
pub use self::data::ArrayDataRef;
-use crate::datatypes::*;
+pub use self::array::BinaryArray;
+pub use self::array::ListArray;
+pub use self::array::PrimitiveArray;
+pub use self::array::StructArray;
+
+pub(crate) use self::array::make_array;
+
+pub type BooleanArray = PrimitiveArray<BooleanType>;
+pub type Int8Array = PrimitiveArray<Int8Type>;
+pub type Int16Array = PrimitiveArray<Int16Type>;
+pub type Int32Array = PrimitiveArray<Int32Type>;
+pub type Int64Array = PrimitiveArray<Int64Type>;
+pub type UInt8Array = PrimitiveArray<UInt8Type>;
+pub type UInt16Array = PrimitiveArray<UInt16Type>;
+pub type UInt32Array = PrimitiveArray<UInt32Type>;
+pub type UInt64Array = PrimitiveArray<UInt64Type>;
+pub type Float32Array = PrimitiveArray<Float32Type>;
+pub type Float64Array = PrimitiveArray<Float64Type>;
+
+pub type TimestampSecondArray = PrimitiveArray<TimestampSecondType>;
+pub type TimestampMillisecondArray = PrimitiveArray<TimestampMillisecondType>;
+pub type TimestampMicrosecondArray = PrimitiveArray<TimestampMicrosecondType>;
+pub type TimestampNanosecondArray = PrimitiveArray<TimestampNanosecondType>;
+pub type Date32Array = PrimitiveArray<Date32Type>;
+pub type Date64Array = PrimitiveArray<Date64Type>;
+pub type Time32SecondArray = PrimitiveArray<Time32SecondType>;
+pub type Time32MillisecondArray = PrimitiveArray<Time32MillisecondType>;
+pub type Time64MicrosecondArray = PrimitiveArray<Time64MicrosecondType>;
+pub type Time64NanosecondArray = PrimitiveArray<Time64NanosecondType>;
+// TODO add interval
+
+pub use self::array::ListArrayOps;
+pub use self::array::PrimitiveArrayOps;
+
+// --------------------- Array Builder ---------------------
pub use self::builder::BufferBuilder;
pub use self::builder::BufferBuilderTrait;
@@ -92,7 +131,12 @@ pub type Time32MillisecondBufferBuilder = BufferBuilder<Time32MillisecondType>;
pub type Time64MicrosecondBufferBuilder = BufferBuilder<Time64MicrosecondType>;
pub type Time64NanosecondBufferBuilder = BufferBuilder<Time64NanosecondType>;
+pub use self::builder::ArrayBuilder;
+pub use self::builder::BinaryBuilder;
+pub use self::builder::ListBuilder;
pub use self::builder::PrimitiveBuilder;
+pub use self::builder::StructBuilder;
+
pub type BooleanBuilder = PrimitiveBuilder<BooleanType>;
pub type Int8Builder = PrimitiveBuilder<Int8Type>;
pub type Int16Builder = PrimitiveBuilder<Int16Type>;
@@ -116,37 +160,6 @@ pub type Time32MillisecondBuilder = PrimitiveBuilder<Time32MillisecondType>;
pub type Time64MicrosecondBuilder = PrimitiveBuilder<Time64MicrosecondType>;
pub type Time64NanosecondBuilder = PrimitiveBuilder<Time64NanosecondType>;
-pub use self::builder::BinaryBuilder;
-pub use self::builder::ListBuilder;
-pub use self::builder::StructBuilder;
-
-pub use self::array::BinaryArray;
-pub use self::array::ListArray;
-pub use self::array::PrimitiveArray;
-pub use self::array::StructArray;
-
-pub(crate) use self::array::make_array;
-
-pub type BooleanArray = PrimitiveArray<BooleanType>;
-pub type Int8Array = PrimitiveArray<Int8Type>;
-pub type Int16Array = PrimitiveArray<Int16Type>;
-pub type Int32Array = PrimitiveArray<Int32Type>;
-pub type Int64Array = PrimitiveArray<Int64Type>;
-pub type UInt8Array = PrimitiveArray<UInt8Type>;
-pub type UInt16Array = PrimitiveArray<UInt16Type>;
-pub type UInt32Array = PrimitiveArray<UInt32Type>;
-pub type UInt64Array = PrimitiveArray<UInt64Type>;
-pub type Float32Array = PrimitiveArray<Float32Type>;
-pub type Float64Array = PrimitiveArray<Float64Type>;
+// --------------------- Array Equality ---------------------
-pub type TimestampSecondArray = PrimitiveArray<TimestampSecondType>;
-pub type TimestampMillisecondArray = PrimitiveArray<TimestampMillisecondType>;
-pub type TimestampMicrosecondArray = PrimitiveArray<TimestampMicrosecondType>;
-pub type TimestampNanosecondArray = PrimitiveArray<TimestampNanosecondType>;
-pub type Date32Array = PrimitiveArray<Date32Type>;
-pub type Date64Array = PrimitiveArray<Date64Type>;
-pub type Time32SecondArray = PrimitiveArray<Time32SecondType>;
-pub type Time32MillisecondArray = PrimitiveArray<Time32MillisecondType>;
-pub type Time64MicrosecondArray = PrimitiveArray<Time64MicrosecondType>;
-pub type Time64NanosecondArray = PrimitiveArray<Time64NanosecondType>;
-// TODO add interval
+pub use self::equal::ArrayEqual;