You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ko...@apache.org on 2019/07/04 05:51:54 UTC

[arrow] 15/38: ARROW-5358: [Rust] Implement equality check for ArrayData and Array

This is an automated email from the ASF dual-hosted git repository.

kou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git

commit 75d4f4257658bcea233dff28ae9bba2506fc7854
Author: Chao Sun <su...@apache.org>
AuthorDate: Mon Jul 1 13:00:54 2019 -0700

    ARROW-5358: [Rust] Implement equality check for ArrayData and Array
    
    This implements equality comparison for `Array` type which checks whether two arrays are identical in content.
    
    Besides the above, this adds two traits: `PrimitiveArrayOps` and `ListArrayOps`. The former exposes a few common operations between numeric arrays and boolean array, while the latter between list and binary arrays.
    
    Author: Chao Sun <su...@apache.org>
    
    Closes #4643 from sunchao/ARROW-5358 and squashes the following commits:
    
    e40241b1 <Chao Sun> Fixes after rebasing
    9a11efa4 <Chao Sun> Fix a bug in test
    53ac33e6 <Chao Sun> Address comments
    8663124c <Chao Sun> Replace expect with unwrap
    d3ffb27f <Chao Sun> ARROW-5358:  Implement equality check for ArrayData and Array
---
 rust/arrow/src/array/array.rs |  95 +++++-
 rust/arrow/src/array/equal.rs | 741 ++++++++++++++++++++++++++++++++++++++++++
 rust/arrow/src/array/mod.rs   |  81 +++--
 3 files changed, 879 insertions(+), 38 deletions(-)

diff --git a/rust/arrow/src/array/array.rs b/rust/arrow/src/array/array.rs
index f4af117..2c353d5 100644
--- a/rust/arrow/src/array/array.rs
+++ b/rust/arrow/src/array/array.rs
@@ -16,7 +16,7 @@
 // under the License.
 
 use std::any::Any;
-use std::convert::From;
+use std::convert::{From, TryFrom};
 use std::fmt;
 use std::io::Write;
 use std::mem;
@@ -27,6 +27,7 @@ use chrono::prelude::*;
 use super::*;
 use crate::buffer::{Buffer, MutableBuffer};
 use crate::datatypes::*;
+use crate::error::{ArrowError, Result};
 use crate::memory;
 use crate::util::bit_util;
 
@@ -41,7 +42,7 @@ const NANOSECONDS: i64 = 1_000_000_000;
 
 /// Trait for dealing with different types of array at runtime when the type of the
 /// array is not known in advance
-pub trait Array: Send + Sync {
+pub trait Array: Send + Sync + ArrayEqual {
     /// Returns the array as `Any` so that it can be downcast to a specific implementation
     fn as_any(&self) -> &Any;
 
@@ -194,6 +195,45 @@ pub struct PrimitiveArray<T: ArrowPrimitiveType> {
     raw_values: RawPtrBox<T::Native>,
 }
 
+/// Common operations for primitive types, including numeric types and boolean type.
+pub trait PrimitiveArrayOps<T: ArrowPrimitiveType> {
+    fn values(&self) -> Buffer;
+    fn value(&self, i: usize) -> T::Native;
+}
+
+// This is necessary when caller wants to access `PrimitiveArrayOps`'s methods with
+// `ArrowPrimitiveType`. It doesn't have any implementation as the actual implementations
+// are delegated to that of `ArrowNumericType` and `BooleanType`.
+impl<T: ArrowPrimitiveType> PrimitiveArrayOps<T> for PrimitiveArray<T> {
+    default fn values(&self) -> Buffer {
+        unimplemented!()
+    }
+
+    default fn value(&self, _: usize) -> T::Native {
+        unimplemented!()
+    }
+}
+
+impl<T: ArrowNumericType> PrimitiveArrayOps<T> for PrimitiveArray<T> {
+    fn values(&self) -> Buffer {
+        self.values()
+    }
+
+    fn value(&self, i: usize) -> T::Native {
+        self.value(i)
+    }
+}
+
+impl PrimitiveArrayOps<BooleanType> for BooleanArray {
+    fn values(&self) -> Buffer {
+        self.values()
+    }
+
+    fn value(&self, i: usize) -> bool {
+        self.value(i)
+    }
+}
+
 impl<T: ArrowPrimitiveType> Array for PrimitiveArray<T> {
     fn as_any(&self) -> &Any {
         self
@@ -271,7 +311,6 @@ where
     ///
     /// If a data type cannot be converted to `NaiveDateTime`, a `None` is returned.
     /// A valid value is expected, thus the user should first check for validity.
-    /// TODO: extract constants into static variables
     pub fn value_as_datetime(&self, i: usize) -> Option<NaiveDateTime> {
         let v = i64::from(self.value(i));
         match self.data_type() {
@@ -651,6 +690,23 @@ impl<T: ArrowPrimitiveType> From<ArrayDataRef> for PrimitiveArray<T> {
     }
 }
 
+/// Common operations for List types, currently `ListArray` and `BinaryArray`.
+pub trait ListArrayOps {
+    fn value_offset_at(&self, i: usize) -> i32;
+}
+
+impl ListArrayOps for ListArray {
+    fn value_offset_at(&self, i: usize) -> i32 {
+        self.value_offset_at(i)
+    }
+}
+
+impl ListArrayOps for BinaryArray {
+    fn value_offset_at(&self, i: usize) -> i32 {
+        self.value_offset_at(i)
+    }
+}
+
 /// A list array where each element is a variable-sized sequence of values with the same
 /// type.
 pub struct ListArray {
@@ -784,6 +840,16 @@ impl BinaryArray {
         self.value_offset_at(i + 1) - self.value_offset_at(i)
     }
 
+    /// Returns a clone of the value offset buffer
+    pub fn value_offsets(&self) -> Buffer {
+        self.data.buffers()[0].clone()
+    }
+
+    /// Returns a clone of the value data buffer
+    pub fn value_data(&self) -> Buffer {
+        self.data.buffers()[1].clone()
+    }
+
     #[inline]
     fn value_offset_at(&self, i: usize) -> i32 {
         unsafe { *self.value_offsets.get().offset(i as isize) }
@@ -831,7 +897,7 @@ impl<'a> From<Vec<&'a str>> for BinaryArray {
     }
 }
 
-impl<'a> From<Vec<&[u8]>> for BinaryArray {
+impl From<Vec<&[u8]>> for BinaryArray {
     fn from(v: Vec<&[u8]>) -> Self {
         let mut offsets = Vec::with_capacity(v.len() + 1);
         let mut values = Vec::new();
@@ -851,6 +917,22 @@ impl<'a> From<Vec<&[u8]>> for BinaryArray {
     }
 }
 
+impl<'a> TryFrom<Vec<Option<&'a str>>> for BinaryArray {
+    type Error = ArrowError;
+
+    fn try_from(v: Vec<Option<&'a str>>) -> Result<Self> {
+        let mut builder = BinaryBuilder::new(v.len());
+        for val in v {
+            if let Some(s) = val {
+                builder.append_string(s)?;
+            } else {
+                builder.append(false)?;
+            }
+        }
+        Ok(builder.finish())
+    }
+}
+
 /// Creates a `BinaryArray` from `List<u8>` array
 impl From<ListArray> for BinaryArray {
     fn from(v: ListArray) -> Self {
@@ -907,6 +989,11 @@ impl StructArray {
     pub fn column(&self, pos: usize) -> &ArrayRef {
         &self.boxed_fields[pos]
     }
+
+    /// Return the number of fields in this struct array
+    pub fn num_columns(&self) -> usize {
+        self.boxed_fields.len()
+    }
 }
 
 impl From<ArrayDataRef> for StructArray {
diff --git a/rust/arrow/src/array/equal.rs b/rust/arrow/src/array/equal.rs
new file mode 100644
index 0000000..5f888ab
--- /dev/null
+++ b/rust/arrow/src/array/equal.rs
@@ -0,0 +1,741 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use super::*;
+use crate::datatypes::*;
+use crate::util::bit_util;
+
+/// Trait for `Array` equality.
+pub trait ArrayEqual {
+    /// Returns true if this array is equal to the `other` array
+    fn equals(&self, other: &dyn Array) -> bool;
+
+    /// Returns true if the range [start_idx, end_idx) is equal to
+    /// [other_start_idx, other_start_idx + end_idx - start_idx) in the `other` array
+    fn range_equals(
+        &self,
+        other: &dyn Array,
+        start_idx: usize,
+        end_idx: usize,
+        other_start_idx: usize,
+    ) -> bool;
+}
+
+impl<T: ArrowPrimitiveType> ArrayEqual for PrimitiveArray<T> {
+    default fn equals(&self, other: &dyn Array) -> bool {
+        if !base_equal(&self.data(), &other.data()) {
+            return false;
+        }
+
+        let value_buf = self.data_ref().buffers()[0].clone();
+        let other_value_buf = other.data_ref().buffers()[0].clone();
+        let byte_width = T::get_bit_width() / 8;
+
+        if self.null_count() > 0 {
+            let values = value_buf.data();
+            let other_values = other_value_buf.data();
+
+            for i in 0..self.len() {
+                if self.is_valid(i) {
+                    let start = (i + self.offset()) * byte_width;
+                    let data = &values[start..(start + byte_width)];
+                    let other_start = (i + other.offset()) * byte_width;
+                    let other_data =
+                        &other_values[other_start..(other_start + byte_width)];
+                    if data != other_data {
+                        return false;
+                    }
+                }
+            }
+        } else {
+            let start = self.offset() * byte_width;
+            let other_start = other.offset() * byte_width;
+            let len = self.len() * byte_width;
+            let data = &value_buf.data()[start..(start + len)];
+            let other_data = &other_value_buf.data()[other_start..(other_start + len)];
+            if data != other_data {
+                return false;
+            }
+        }
+
+        true
+    }
+
+    default fn range_equals(
+        &self,
+        other: &dyn Array,
+        start_idx: usize,
+        end_idx: usize,
+        other_start_idx: usize,
+    ) -> bool {
+        assert!(other_start_idx + (end_idx - start_idx) <= other.len());
+        let other = other.as_any().downcast_ref::<PrimitiveArray<T>>().unwrap();
+
+        let mut j = other_start_idx;
+        for i in start_idx..end_idx {
+            let is_null = self.is_null(i);
+            let other_is_null = other.is_null(j);
+            if is_null != other_is_null || (!is_null && self.value(i) != other.value(j)) {
+                return false;
+            }
+            j += 1;
+        }
+
+        true
+    }
+}
+
+impl ArrayEqual for BooleanArray {
+    fn equals(&self, other: &dyn Array) -> bool {
+        if !base_equal(&self.data(), &other.data()) {
+            return false;
+        }
+
+        let values = self.data_ref().buffers()[0].data();
+        let other_values = other.data_ref().buffers()[0].data();
+
+        // TODO: we can do this more efficiently if all values are not-null
+        for i in 0..self.len() {
+            if self.is_valid(i) {
+                if bit_util::get_bit(values, i + self.offset())
+                    != bit_util::get_bit(other_values, i + other.offset())
+                {
+                    return false;
+                }
+            }
+        }
+
+        true
+    }
+}
+
+impl<T: ArrowNumericType> PartialEq for PrimitiveArray<T> {
+    fn eq(&self, other: &PrimitiveArray<T>) -> bool {
+        self.equals(other)
+    }
+}
+
+impl ArrayEqual for ListArray {
+    fn equals(&self, other: &dyn Array) -> bool {
+        if !base_equal(&self.data(), &other.data()) {
+            return false;
+        }
+
+        let other = other.as_any().downcast_ref::<ListArray>().unwrap();
+
+        if !value_offset_equal(self, other) {
+            return false;
+        }
+
+        if !self.values().range_equals(
+            &*other.values(),
+            self.value_offset(0) as usize,
+            self.value_offset(self.len()) as usize,
+            other.value_offset(0) as usize,
+        ) {
+            return false;
+        }
+
+        true
+    }
+
+    fn range_equals(
+        &self,
+        other: &dyn Array,
+        start_idx: usize,
+        end_idx: usize,
+        other_start_idx: usize,
+    ) -> bool {
+        assert!(other_start_idx + (end_idx - start_idx) <= other.len());
+        let other = other.as_any().downcast_ref::<ListArray>().unwrap();
+
+        let mut j = other_start_idx;
+        for i in start_idx..end_idx {
+            let is_null = self.is_null(i);
+            let other_is_null = other.is_null(j);
+
+            if is_null != other_is_null {
+                return false;
+            }
+
+            if is_null {
+                continue;
+            }
+
+            let start_offset = self.value_offset(i) as usize;
+            let end_offset = self.value_offset(i + 1) as usize;
+            let other_start_offset = other.value_offset(j) as usize;
+            let other_end_offset = other.value_offset(j + 1) as usize;
+
+            if end_offset - start_offset != other_end_offset - other_start_offset {
+                return false;
+            }
+
+            if !self.values().range_equals(
+                &*other.values(),
+                start_offset,
+                end_offset,
+                other_start_offset,
+            ) {
+                return false;
+            }
+
+            j += 1;
+        }
+
+        true
+    }
+}
+
+impl ArrayEqual for BinaryArray {
+    fn equals(&self, other: &dyn Array) -> bool {
+        if !base_equal(&self.data(), &other.data()) {
+            return false;
+        }
+
+        let other = other.as_any().downcast_ref::<BinaryArray>().unwrap();
+
+        if !value_offset_equal(self, other) {
+            return false;
+        }
+
+        // TODO: handle null & length == 0 case?
+
+        let value_buf = self.value_data();
+        let other_value_buf = other.value_data();
+        let value_data = value_buf.data();
+        let other_value_data = other_value_buf.data();
+
+        if self.null_count() == 0 {
+            // No offset in both - just do memcmp
+            if self.offset() == 0 && other.offset() == 0 {
+                let len = self.value_offset(self.len()) as usize;
+                return value_data[..len] == other_value_data[..len];
+            } else {
+                let start = self.value_offset(0) as usize;
+                let other_start = other.value_offset(0) as usize;
+                let len = (self.value_offset(self.len()) - self.value_offset(0)) as usize;
+                return value_data[start..(start + len)]
+                    == other_value_data[other_start..(other_start + len)];
+            }
+        } else {
+            for i in 0..self.len() {
+                if self.is_null(i) {
+                    continue;
+                }
+
+                let start = self.value_offset(i) as usize;
+                let other_start = other.value_offset(i) as usize;
+                let len = self.value_length(i) as usize;
+                if value_data[start..(start + len)]
+                    != other_value_data[other_start..(other_start + len)]
+                {
+                    return false;
+                }
+            }
+        }
+
+        true
+    }
+
+    fn range_equals(
+        &self,
+        other: &dyn Array,
+        start_idx: usize,
+        end_idx: usize,
+        other_start_idx: usize,
+    ) -> bool {
+        assert!(other_start_idx + (end_idx - start_idx) <= other.len());
+        let other = other.as_any().downcast_ref::<BinaryArray>().unwrap();
+
+        let mut j = other_start_idx;
+        for i in start_idx..end_idx {
+            let is_null = self.is_null(i);
+            let other_is_null = other.is_null(j);
+
+            if is_null != other_is_null {
+                return false;
+            }
+
+            if is_null {
+                continue;
+            }
+
+            let start_offset = self.value_offset(i) as usize;
+            let end_offset = self.value_offset(i + 1) as usize;
+            let other_start_offset = other.value_offset(j) as usize;
+            let other_end_offset = other.value_offset(j + 1) as usize;
+
+            if end_offset - start_offset != other_end_offset - other_start_offset {
+                return false;
+            }
+
+            let value_buf = self.value_data();
+            let other_value_buf = other.value_data();
+            let value_data = value_buf.data();
+            let other_value_data = other_value_buf.data();
+
+            if end_offset - start_offset > 0 {
+                let len = end_offset - start_offset;
+                if value_data[start_offset..(start_offset + len)]
+                    != other_value_data[other_start_offset..(other_start_offset + len)]
+                {
+                    return false;
+                }
+            }
+
+            j += 1;
+        }
+
+        true
+    }
+}
+
+impl ArrayEqual for StructArray {
+    fn equals(&self, other: &dyn Array) -> bool {
+        if !base_equal(&self.data(), &other.data()) {
+            return false;
+        }
+
+        let other = other.as_any().downcast_ref::<StructArray>().unwrap();
+
+        for i in 0..self.len() {
+            let is_null = self.is_null(i);
+            let other_is_null = other.is_null(i);
+
+            if is_null != other_is_null {
+                return false;
+            }
+
+            if is_null {
+                continue;
+            }
+            for j in 0..self.num_columns() {
+                if !self.column(j).range_equals(&**other.column(j), i, i + 1, i) {
+                    return false;
+                }
+            }
+        }
+
+        true
+    }
+
+    fn range_equals(
+        &self,
+        other: &dyn Array,
+        start_idx: usize,
+        end_idx: usize,
+        other_start_idx: usize,
+    ) -> bool {
+        assert!(other_start_idx + (end_idx - start_idx) <= other.len());
+        let other = other.as_any().downcast_ref::<StructArray>().unwrap();
+
+        let mut j = other_start_idx;
+        for i in start_idx..end_idx {
+            let is_null = self.is_null(i);
+            let other_is_null = other.is_null(i);
+
+            if is_null != other_is_null {
+                return false;
+            }
+
+            if is_null {
+                continue;
+            }
+            for k in 0..self.num_columns() {
+                if !self.column(k).range_equals(&**other.column(k), i, i + 1, j) {
+                    return false;
+                }
+            }
+
+            j += 1;
+        }
+
+        true
+    }
+}
+
+// Compare if the common basic fields between the two arrays are equal
+fn base_equal(this: &ArrayDataRef, other: &ArrayDataRef) -> bool {
+    if this.data_type() != other.data_type() {
+        return false;
+    }
+    if this.len != other.len {
+        return false;
+    }
+    if this.null_count != other.null_count {
+        return false;
+    }
+    if this.null_count > 0 {
+        let null_bitmap = this.null_bitmap().as_ref().unwrap();
+        let other_null_bitmap = other.null_bitmap().as_ref().unwrap();
+        let null_buf = null_bitmap.bits.data();
+        let other_null_buf = other_null_bitmap.bits.data();
+        for i in 0..this.len() {
+            if bit_util::get_bit(null_buf, i + this.offset())
+                != bit_util::get_bit(other_null_buf, i + other.offset())
+            {
+                return false;
+            }
+        }
+    }
+    true
+}
+
+// Compare if the value offsets are equal between the two list arrays
+fn value_offset_equal<T: Array + ListArrayOps>(this: &T, other: &T) -> bool {
+    // Check if offsets differ
+    if this.offset() == 0 && other.offset() == 0 {
+        let offset_data = &this.data_ref().buffers()[0];
+        let other_offset_data = &other.data_ref().buffers()[0];
+        return offset_data.data()[0..((this.len() + 1) * 4)]
+            == other_offset_data.data()[0..((other.len() + 1) * 4)];
+    }
+
+    // The expensive case
+    for i in 0..this.len() + 1 {
+        if this.value_offset_at(i) - this.value_offset_at(0)
+            != other.value_offset_at(i) - other.value_offset_at(0)
+        {
+            return false;
+        }
+    }
+
+    true
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    use std::convert::TryFrom;
+
+    use crate::error::Result;
+
+    #[test]
+    fn test_primitive_equal() {
+        let a = Int32Array::from(vec![1, 2, 3]);
+        let b = Int32Array::from(vec![1, 2, 3]);
+        assert!(a.equals(&b));
+        assert!(b.equals(&a));
+
+        let b = Int32Array::from(vec![1, 2, 4]);
+        assert!(!a.equals(&b));
+        assert!(!b.equals(&a));
+
+        // Test the case where null_count > 0
+
+        let a = Int32Array::from(vec![Some(1), None, Some(2), Some(3)]);
+        let b = Int32Array::from(vec![Some(1), None, Some(2), Some(3)]);
+        assert!(a.equals(&b));
+        assert!(b.equals(&a));
+
+        let b = Int32Array::from(vec![Some(1), None, None, Some(3)]);
+        assert!(!a.equals(&b));
+        assert!(!b.equals(&a));
+
+        let b = Int32Array::from(vec![Some(1), None, Some(2), Some(4)]);
+        assert!(!a.equals(&b));
+        assert!(!b.equals(&a));
+
+        // Test the case where offset != 0
+
+        let a_slice = a.slice(1, 2);
+        let b_slice = b.slice(1, 2);
+        assert!(a_slice.equals(&*b_slice));
+        assert!(b_slice.equals(&*a_slice));
+    }
+
+    #[test]
+    fn test_boolean_equal() {
+        let a = BooleanArray::from(vec![false, false, true]);
+        let b = BooleanArray::from(vec![false, false, true]);
+        assert!(a.equals(&b));
+        assert!(b.equals(&a));
+
+        let b = BooleanArray::from(vec![false, false, false]);
+        assert!(!a.equals(&b));
+        assert!(!b.equals(&a));
+
+        // Test the case where null_count > 0
+
+        let a = BooleanArray::from(vec![Some(false), None, None, Some(true)]);
+        let b = BooleanArray::from(vec![Some(false), None, None, Some(true)]);
+        assert!(a.equals(&b));
+        assert!(b.equals(&a));
+
+        let b = BooleanArray::from(vec![None, None, None, Some(true)]);
+        assert!(!a.equals(&b));
+        assert!(!b.equals(&a));
+
+        let b = BooleanArray::from(vec![Some(true), None, None, Some(true)]);
+        assert!(!a.equals(&b));
+        assert!(!b.equals(&a));
+
+        // Test the case where offset != 0
+
+        let a = BooleanArray::from(vec![false, true, false, true, false, false, true]);
+        let b = BooleanArray::from(vec![false, false, false, true, false, true, true]);
+        assert!(!a.equals(&b));
+        assert!(!b.equals(&a));
+
+        let a_slice = a.slice(2, 3);
+        let b_slice = b.slice(2, 3);
+        assert!(a_slice.equals(&*b_slice));
+        assert!(b_slice.equals(&*a_slice));
+
+        let a_slice = a.slice(3, 4);
+        let b_slice = b.slice(3, 4);
+        assert!(!a_slice.equals(&*b_slice));
+        assert!(!b_slice.equals(&*a_slice));
+    }
+
+    #[test]
+    fn test_list_equal() {
+        let mut a_builder = ListBuilder::new(Int32Builder::new(10));
+        let mut b_builder = ListBuilder::new(Int32Builder::new(10));
+
+        let a = create_list_array(&mut a_builder, &[Some(&[1, 2, 3]), Some(&[4, 5, 6])])
+            .unwrap();
+        let b = create_list_array(&mut b_builder, &[Some(&[1, 2, 3]), Some(&[4, 5, 6])])
+            .unwrap();
+
+        assert!(a.equals(&b));
+        assert!(b.equals(&a));
+
+        let b = create_list_array(&mut a_builder, &[Some(&[1, 2, 3]), Some(&[4, 5, 7])])
+            .unwrap();
+        assert!(!a.equals(&b));
+        assert!(!b.equals(&a));
+
+        // Test the case where null_count > 0
+
+        let a = create_list_array(
+            &mut a_builder,
+            &[Some(&[1, 2]), None, None, Some(&[3, 4]), None, None],
+        )
+        .unwrap();
+        let b = create_list_array(
+            &mut a_builder,
+            &[Some(&[1, 2]), None, None, Some(&[3, 4]), None, None],
+        )
+        .unwrap();
+        assert!(a.equals(&b));
+        assert!(b.equals(&a));
+
+        let b = create_list_array(
+            &mut a_builder,
+            &[
+                Some(&[1, 2]),
+                None,
+                Some(&[5, 6]),
+                Some(&[3, 4]),
+                None,
+                None,
+            ],
+        )
+        .unwrap();
+        assert!(!a.equals(&b));
+        assert!(!b.equals(&a));
+
+        let b = create_list_array(
+            &mut a_builder,
+            &[Some(&[1, 2]), None, None, Some(&[3, 5]), None, None],
+        )
+        .unwrap();
+        assert!(!a.equals(&b));
+        assert!(!b.equals(&a));
+
+        // Test the case where offset != 0
+
+        let a_slice = a.slice(0, 3);
+        let b_slice = b.slice(0, 3);
+        assert!(a_slice.equals(&*b_slice));
+        assert!(b_slice.equals(&*a_slice));
+
+        let a_slice = a.slice(0, 5);
+        let b_slice = b.slice(0, 5);
+        assert!(!a_slice.equals(&*b_slice));
+        assert!(!b_slice.equals(&*a_slice));
+
+        let a_slice = a.slice(4, 1);
+        let b_slice = b.slice(4, 1);
+        assert!(a_slice.equals(&*b_slice));
+        assert!(b_slice.equals(&*a_slice));
+    }
+
+    #[test]
+    fn test_binary_equal() {
+        let a = BinaryArray::from(vec!["hello", "world"]);
+        let b = BinaryArray::from(vec!["hello", "world"]);
+        assert!(a.equals(&b));
+        assert!(b.equals(&a));
+
+        let b = BinaryArray::from(vec!["hello", "arrow"]);
+        assert!(!a.equals(&b));
+        assert!(!b.equals(&a));
+
+        // Test the case where null_count > 0
+
+        let a = BinaryArray::try_from(vec![
+            Some("hello"),
+            None,
+            None,
+            Some("world"),
+            None,
+            None,
+        ])
+        .unwrap();
+
+        let b = BinaryArray::try_from(vec![
+            Some("hello"),
+            None,
+            None,
+            Some("world"),
+            None,
+            None,
+        ])
+        .unwrap();
+        assert!(a.equals(&b));
+        assert!(b.equals(&a));
+
+        let b = BinaryArray::try_from(vec![
+            Some("hello"),
+            Some("foo"),
+            None,
+            Some("world"),
+            None,
+            None,
+        ])
+        .unwrap();
+        assert!(!a.equals(&b));
+        assert!(!b.equals(&a));
+
+        let b = BinaryArray::try_from(vec![
+            Some("hello"),
+            None,
+            None,
+            Some("arrow"),
+            None,
+            None,
+        ])
+        .unwrap();
+        assert!(!a.equals(&b));
+        assert!(!b.equals(&a));
+
+        // Test the case where offset != 0
+
+        let a_slice = a.slice(0, 3);
+        let b_slice = b.slice(0, 3);
+        assert!(a_slice.equals(&*b_slice));
+        assert!(b_slice.equals(&*a_slice));
+
+        let a_slice = a.slice(0, 5);
+        let b_slice = b.slice(0, 5);
+        assert!(!a_slice.equals(&*b_slice));
+        assert!(!b_slice.equals(&*a_slice));
+
+        let a_slice = a.slice(4, 1);
+        let b_slice = b.slice(4, 1);
+        assert!(a_slice.equals(&*b_slice));
+        assert!(b_slice.equals(&*a_slice));
+    }
+
+    #[test]
+    fn test_struct_equal() {
+        let string_builder = BinaryBuilder::new(5);
+        let int_builder = Int32Builder::new(5);
+
+        let mut fields = Vec::new();
+        let mut field_builders = Vec::new();
+        fields.push(Field::new("f1", DataType::Utf8, false));
+        field_builders.push(Box::new(string_builder) as Box<ArrayBuilder>);
+        fields.push(Field::new("f2", DataType::Int32, false));
+        field_builders.push(Box::new(int_builder) as Box<ArrayBuilder>);
+
+        let mut builder = StructBuilder::new(fields, field_builders);
+
+        let a = create_struct_array(
+            &mut builder,
+            &[Some("joe"), None, None, Some("mark"), Some("doe")],
+            &[Some(1), Some(2), None, Some(4), Some(5)],
+            &[true, true, false, true, true],
+        )
+        .unwrap();
+        let b = create_struct_array(
+            &mut builder,
+            &[Some("joe"), None, None, Some("mark"), Some("doe")],
+            &[Some(1), Some(2), None, Some(4), Some(5)],
+            &[true, true, false, true, true],
+        )
+        .unwrap();
+
+        assert!(a.equals(&b));
+        assert!(b.equals(&a));
+    }
+
+    fn create_list_array<'a, U: AsRef<[i32]>, T: AsRef<[Option<U>]>>(
+        builder: &'a mut ListBuilder<Int32Builder>,
+        data: T,
+    ) -> Result<ListArray> {
+        for d in data.as_ref() {
+            if let Some(v) = d {
+                builder.values().append_slice(v.as_ref())?;
+                builder.append(true)?
+            } else {
+                builder.append(false)?
+            }
+        }
+        Ok(builder.finish())
+    }
+
+    fn create_struct_array<
+        'a,
+        T: AsRef<[Option<&'a str>]>,
+        U: AsRef<[Option<i32>]>,
+        V: AsRef<[bool]>,
+    >(
+        builder: &'a mut StructBuilder,
+        first: T,
+        second: U,
+        is_valid: V,
+    ) -> Result<StructArray> {
+        let string_builder = builder.field_builder::<BinaryBuilder>(0).unwrap();
+        for v in first.as_ref() {
+            if let Some(s) = v {
+                string_builder.append_string(s)?;
+            } else {
+                string_builder.append_null()?;
+            }
+        }
+
+        let int_builder = builder.field_builder::<Int32Builder>(1).unwrap();
+        for v in second.as_ref() {
+            if let Some(i) = v {
+                int_builder.append_value(*i)?;
+            } else {
+                int_builder.append_null()?;
+            }
+        }
+
+        for v in is_valid.as_ref() {
+            builder.append(*v)?
+        }
+
+        Ok(builder.finish())
+    }
+}
diff --git a/rust/arrow/src/array/mod.rs b/rust/arrow/src/array/mod.rs
index aa14f0f..47e4219 100644
--- a/rust/arrow/src/array/mod.rs
+++ b/rust/arrow/src/array/mod.rs
@@ -57,6 +57,11 @@
 mod array;
 mod builder;
 mod data;
+mod equal;
+
+use crate::datatypes::*;
+
+// --------------------- Array & ArrayData ---------------------
 
 pub use self::array::Array;
 pub use self::array::ArrayRef;
@@ -64,7 +69,41 @@ pub use self::data::ArrayData;
 pub use self::data::ArrayDataBuilder;
 pub use self::data::ArrayDataRef;
 
-use crate::datatypes::*;
+pub use self::array::BinaryArray;
+pub use self::array::ListArray;
+pub use self::array::PrimitiveArray;
+pub use self::array::StructArray;
+
+pub(crate) use self::array::make_array;
+
+pub type BooleanArray = PrimitiveArray<BooleanType>;
+pub type Int8Array = PrimitiveArray<Int8Type>;
+pub type Int16Array = PrimitiveArray<Int16Type>;
+pub type Int32Array = PrimitiveArray<Int32Type>;
+pub type Int64Array = PrimitiveArray<Int64Type>;
+pub type UInt8Array = PrimitiveArray<UInt8Type>;
+pub type UInt16Array = PrimitiveArray<UInt16Type>;
+pub type UInt32Array = PrimitiveArray<UInt32Type>;
+pub type UInt64Array = PrimitiveArray<UInt64Type>;
+pub type Float32Array = PrimitiveArray<Float32Type>;
+pub type Float64Array = PrimitiveArray<Float64Type>;
+
+pub type TimestampSecondArray = PrimitiveArray<TimestampSecondType>;
+pub type TimestampMillisecondArray = PrimitiveArray<TimestampMillisecondType>;
+pub type TimestampMicrosecondArray = PrimitiveArray<TimestampMicrosecondType>;
+pub type TimestampNanosecondArray = PrimitiveArray<TimestampNanosecondType>;
+pub type Date32Array = PrimitiveArray<Date32Type>;
+pub type Date64Array = PrimitiveArray<Date64Type>;
+pub type Time32SecondArray = PrimitiveArray<Time32SecondType>;
+pub type Time32MillisecondArray = PrimitiveArray<Time32MillisecondType>;
+pub type Time64MicrosecondArray = PrimitiveArray<Time64MicrosecondType>;
+pub type Time64NanosecondArray = PrimitiveArray<Time64NanosecondType>;
+// TODO add interval
+
+pub use self::array::ListArrayOps;
+pub use self::array::PrimitiveArrayOps;
+
+// --------------------- Array Builder ---------------------
 
 pub use self::builder::BufferBuilder;
 pub use self::builder::BufferBuilderTrait;
@@ -92,7 +131,12 @@ pub type Time32MillisecondBufferBuilder = BufferBuilder<Time32MillisecondType>;
 pub type Time64MicrosecondBufferBuilder = BufferBuilder<Time64MicrosecondType>;
 pub type Time64NanosecondBufferBuilder = BufferBuilder<Time64NanosecondType>;
 
+pub use self::builder::ArrayBuilder;
+pub use self::builder::BinaryBuilder;
+pub use self::builder::ListBuilder;
 pub use self::builder::PrimitiveBuilder;
+pub use self::builder::StructBuilder;
+
 pub type BooleanBuilder = PrimitiveBuilder<BooleanType>;
 pub type Int8Builder = PrimitiveBuilder<Int8Type>;
 pub type Int16Builder = PrimitiveBuilder<Int16Type>;
@@ -116,37 +160,6 @@ pub type Time32MillisecondBuilder = PrimitiveBuilder<Time32MillisecondType>;
 pub type Time64MicrosecondBuilder = PrimitiveBuilder<Time64MicrosecondType>;
 pub type Time64NanosecondBuilder = PrimitiveBuilder<Time64NanosecondType>;
 
-pub use self::builder::BinaryBuilder;
-pub use self::builder::ListBuilder;
-pub use self::builder::StructBuilder;
-
-pub use self::array::BinaryArray;
-pub use self::array::ListArray;
-pub use self::array::PrimitiveArray;
-pub use self::array::StructArray;
-
-pub(crate) use self::array::make_array;
-
-pub type BooleanArray = PrimitiveArray<BooleanType>;
-pub type Int8Array = PrimitiveArray<Int8Type>;
-pub type Int16Array = PrimitiveArray<Int16Type>;
-pub type Int32Array = PrimitiveArray<Int32Type>;
-pub type Int64Array = PrimitiveArray<Int64Type>;
-pub type UInt8Array = PrimitiveArray<UInt8Type>;
-pub type UInt16Array = PrimitiveArray<UInt16Type>;
-pub type UInt32Array = PrimitiveArray<UInt32Type>;
-pub type UInt64Array = PrimitiveArray<UInt64Type>;
-pub type Float32Array = PrimitiveArray<Float32Type>;
-pub type Float64Array = PrimitiveArray<Float64Type>;
+// --------------------- Array Equality ---------------------
 
-pub type TimestampSecondArray = PrimitiveArray<TimestampSecondType>;
-pub type TimestampMillisecondArray = PrimitiveArray<TimestampMillisecondType>;
-pub type TimestampMicrosecondArray = PrimitiveArray<TimestampMicrosecondType>;
-pub type TimestampNanosecondArray = PrimitiveArray<TimestampNanosecondType>;
-pub type Date32Array = PrimitiveArray<Date32Type>;
-pub type Date64Array = PrimitiveArray<Date64Type>;
-pub type Time32SecondArray = PrimitiveArray<Time32SecondType>;
-pub type Time32MillisecondArray = PrimitiveArray<Time32MillisecondType>;
-pub type Time64MicrosecondArray = PrimitiveArray<Time64MicrosecondType>;
-pub type Time64NanosecondArray = PrimitiveArray<Time64NanosecondType>;
-// TODO add interval
+pub use self::equal::ArrayEqual;