You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by vi...@apache.org on 2022/05/08 07:00:40 UTC

[arrow-rs] branch master updated: Exclude `dict_id` and `dict_is_ordered` from equality comparison of `Field` (#1647)

This is an automated email from the ASF dual-hosted git repository.

viirya pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new f74ad3473 Exclude `dict_id` and `dict_is_ordered` from equality comparison of `Field` (#1647)
f74ad3473 is described below

commit f74ad34733a2dbaf6376db79c396595733835542
Author: Liang-Chi Hsieh <vi...@gmail.com>
AuthorDate: Sun May 8 00:00:35 2022 -0700

    Exclude `dict_id` and `dict_is_ordered` from equality comparison of `Field` (#1647)
    
    * Impl Eq for Field
    
    * Impl Hash
    
    * Impl PartialOrd for Field
    
    * Impl Ord instead
    
    * Add comment and test.
---
 arrow/src/datatypes/field.rs | 86 +++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 85 insertions(+), 1 deletion(-)

diff --git a/arrow/src/datatypes/field.rs b/arrow/src/datatypes/field.rs
index 7ad790268..ded7fc67b 100644
--- a/arrow/src/datatypes/field.rs
+++ b/arrow/src/datatypes/field.rs
@@ -15,7 +15,9 @@
 // specific language governing permissions and limitations
 // under the License.
 
+use std::cmp::Ordering;
 use std::collections::BTreeMap;
+use std::hash::{Hash, Hasher};
 
 use serde_derive::{Deserialize, Serialize};
 use serde_json::{json, Value};
@@ -27,7 +29,7 @@ use super::DataType;
 /// Contains the meta-data for a single relative type.
 ///
 /// The `Schema` object is an ordered collection of `Field` objects.
-#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
+#[derive(Serialize, Deserialize, Debug, Clone)]
 pub struct Field {
     name: String,
     data_type: DataType,
@@ -39,6 +41,47 @@ pub struct Field {
     metadata: Option<BTreeMap<String, String>>,
 }
 
+// Auto-derive `PartialEq` traits will pull `dict_id` and `dict_is_ordered`
+// into comparison. However, these properties are only used in IPC context
+// for matching dictionary encoded data. They are not necessary to be same
+// to consider schema equality. For example, in C++ `Field` implementation,
+// it doesn't contain these dictionary properties too.
+impl PartialEq for Field {
+    fn eq(&self, other: &Self) -> bool {
+        self.name == other.name
+            && self.data_type == other.data_type
+            && self.nullable == other.nullable
+            && self.metadata == other.metadata
+    }
+}
+
+impl Eq for Field {}
+
+impl PartialOrd for Field {
+    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+        Some(self.cmp(other))
+    }
+}
+
+impl Ord for Field {
+    fn cmp(&self, other: &Self) -> Ordering {
+        self.name
+            .cmp(other.name())
+            .then(self.data_type.cmp(other.data_type()))
+            .then(self.nullable.cmp(&other.nullable))
+            .then(self.metadata.cmp(&other.metadata))
+    }
+}
+
+impl Hash for Field {
+    fn hash<H: Hasher>(&self, state: &mut H) {
+        self.name.hash(state);
+        self.data_type.hash(state);
+        self.nullable.hash(state);
+        self.metadata.hash(state);
+    }
+}
+
 impl Field {
     /// Creates a new field
     pub fn new(name: &str, data_type: DataType, nullable: bool) -> Self {
@@ -623,6 +666,8 @@ impl std::fmt::Display for Field {
 #[cfg(test)]
 mod test {
     use super::{DataType, Field};
+    use std::collections::hash_map::DefaultHasher;
+    use std::hash::{Hash, Hasher};
 
     #[test]
     fn test_fields_with_dict_id() {
@@ -676,4 +721,43 @@ mod test {
             assert_eq!(dict2, *field);
         }
     }
+
+    fn get_field_hash(field: &Field) -> u64 {
+        let mut s = DefaultHasher::new();
+        field.hash(&mut s);
+        s.finish()
+    }
+
+    #[test]
+    fn test_field_comparison_case() {
+        // dictionary-encoding properties not used for field comparison
+        let dict1 = Field::new_dict(
+            "dict1",
+            DataType::Dictionary(DataType::Utf8.into(), DataType::Int32.into()),
+            false,
+            10,
+            false,
+        );
+        let dict2 = Field::new_dict(
+            "dict1",
+            DataType::Dictionary(DataType::Utf8.into(), DataType::Int32.into()),
+            false,
+            20,
+            false,
+        );
+
+        assert_eq!(dict1, dict2);
+        assert_eq!(get_field_hash(&dict1), get_field_hash(&dict2));
+
+        let dict1 = Field::new_dict(
+            "dict0",
+            DataType::Dictionary(DataType::Utf8.into(), DataType::Int32.into()),
+            false,
+            10,
+            false,
+        );
+
+        assert_ne!(dict1, dict2);
+        assert_ne!(get_field_hash(&dict1), get_field_hash(&dict2));
+    }
 }