You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by vi...@apache.org on 2022/05/08 07:00:40 UTC
[arrow-rs] branch master updated: Exclude `dict_id` and `dict_is_ordered` from equality comparison of `Field` (#1647)
This is an automated email from the ASF dual-hosted git repository.
viirya pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new f74ad3473 Exclude `dict_id` and `dict_is_ordered` from equality comparison of `Field` (#1647)
f74ad3473 is described below
commit f74ad34733a2dbaf6376db79c396595733835542
Author: Liang-Chi Hsieh <vi...@gmail.com>
AuthorDate: Sun May 8 00:00:35 2022 -0700
Exclude `dict_id` and `dict_is_ordered` from equality comparison of `Field` (#1647)
* Impl Eq for Field
* Impl Hash
* Impl PartialOrd for Field
* Impl Ord instead
* Add comment and test.
---
arrow/src/datatypes/field.rs | 86 +++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 85 insertions(+), 1 deletion(-)
diff --git a/arrow/src/datatypes/field.rs b/arrow/src/datatypes/field.rs
index 7ad790268..ded7fc67b 100644
--- a/arrow/src/datatypes/field.rs
+++ b/arrow/src/datatypes/field.rs
@@ -15,7 +15,9 @@
// specific language governing permissions and limitations
// under the License.
+use std::cmp::Ordering;
use std::collections::BTreeMap;
+use std::hash::{Hash, Hasher};
use serde_derive::{Deserialize, Serialize};
use serde_json::{json, Value};
@@ -27,7 +29,7 @@ use super::DataType;
/// Contains the meta-data for a single relative type.
///
/// The `Schema` object is an ordered collection of `Field` objects.
-#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
+#[derive(Serialize, Deserialize, Debug, Clone)]
pub struct Field {
name: String,
data_type: DataType,
@@ -39,6 +41,47 @@ pub struct Field {
metadata: Option<BTreeMap<String, String>>,
}
+// Auto-derive `PartialEq` traits will pull `dict_id` and `dict_is_ordered`
+// into comparison. However, these properties are only used in IPC context
+// for matching dictionary encoded data. They are not necessary to be same
+// to consider schema equality. For example, in C++ `Field` implementation,
+// it doesn't contain these dictionary properties too.
+impl PartialEq for Field {
+ fn eq(&self, other: &Self) -> bool {
+ self.name == other.name
+ && self.data_type == other.data_type
+ && self.nullable == other.nullable
+ && self.metadata == other.metadata
+ }
+}
+
+impl Eq for Field {}
+
+impl PartialOrd for Field {
+ fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+ Some(self.cmp(other))
+ }
+}
+
+impl Ord for Field {
+ fn cmp(&self, other: &Self) -> Ordering {
+ self.name
+ .cmp(other.name())
+ .then(self.data_type.cmp(other.data_type()))
+ .then(self.nullable.cmp(&other.nullable))
+ .then(self.metadata.cmp(&other.metadata))
+ }
+}
+
+impl Hash for Field {
+ fn hash<H: Hasher>(&self, state: &mut H) {
+ self.name.hash(state);
+ self.data_type.hash(state);
+ self.nullable.hash(state);
+ self.metadata.hash(state);
+ }
+}
+
impl Field {
/// Creates a new field
pub fn new(name: &str, data_type: DataType, nullable: bool) -> Self {
@@ -623,6 +666,8 @@ impl std::fmt::Display for Field {
#[cfg(test)]
mod test {
use super::{DataType, Field};
+ use std::collections::hash_map::DefaultHasher;
+ use std::hash::{Hash, Hasher};
#[test]
fn test_fields_with_dict_id() {
@@ -676,4 +721,43 @@ mod test {
assert_eq!(dict2, *field);
}
}
+
+ fn get_field_hash(field: &Field) -> u64 {
+ let mut s = DefaultHasher::new();
+ field.hash(&mut s);
+ s.finish()
+ }
+
+ #[test]
+ fn test_field_comparison_case() {
+ // dictionary-encoding properties not used for field comparison
+ let dict1 = Field::new_dict(
+ "dict1",
+ DataType::Dictionary(DataType::Utf8.into(), DataType::Int32.into()),
+ false,
+ 10,
+ false,
+ );
+ let dict2 = Field::new_dict(
+ "dict1",
+ DataType::Dictionary(DataType::Utf8.into(), DataType::Int32.into()),
+ false,
+ 20,
+ false,
+ );
+
+ assert_eq!(dict1, dict2);
+ assert_eq!(get_field_hash(&dict1), get_field_hash(&dict2));
+
+ let dict1 = Field::new_dict(
+ "dict0",
+ DataType::Dictionary(DataType::Utf8.into(), DataType::Int32.into()),
+ false,
+ 10,
+ false,
+ );
+
+ assert_ne!(dict1, dict2);
+ assert_ne!(get_field_hash(&dict1), get_field_hash(&dict2));
+ }
}