You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by tu...@apache.org on 2022/08/16 16:15:42 UTC
[arrow-rs] branch master updated: clean (#2461)
This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 5c79210b2 clean (#2461)
5c79210b2 is described below
commit 5c79210b20aa8d87d581d1932329f21a68d4d676
Author: Remzi Yang <59...@users.noreply.github.com>
AuthorDate: Wed Aug 17 00:15:38 2022 +0800
clean (#2461)
Signed-off-by: remzi <13...@gmail.com>
Signed-off-by: remzi <13...@gmail.com>
---
arrow/src/datatypes/schema.rs | 108 ++++++++++++++++++++++--------------------
1 file changed, 57 insertions(+), 51 deletions(-)
diff --git a/arrow/src/datatypes/schema.rs b/arrow/src/datatypes/schema.rs
index 3ab627c68..d08042b51 100644
--- a/arrow/src/datatypes/schema.rs
+++ b/arrow/src/datatypes/schema.rs
@@ -49,7 +49,7 @@ impl Schema {
}
}
- /// Creates a new `Schema` from a sequence of `Field` values.
+ /// Creates a new [`Schema`] from a sequence of [`Field`] values.
///
/// # Example
///
@@ -64,7 +64,7 @@ impl Schema {
Self::new_with_metadata(fields, HashMap::new())
}
- /// Creates a new `Schema` from a sequence of `Field` values
+ /// Creates a new [`Schema`] from a sequence of [`Field`] values
/// and adds additional metadata in form of key value pairs.
///
/// # Example
@@ -160,17 +160,12 @@ impl Schema {
}
// merge fields
for field in fields.into_iter() {
- let mut new_field = true;
- for merged_field in &mut merged.fields {
- if field.name() != merged_field.name() {
- continue;
- }
- new_field = false;
- merged_field.try_merge(&field)?
- }
- // found a new field, add to field list
- if new_field {
- merged.fields.push(field);
+ let merged_field =
+ merged.fields.iter_mut().find(|f| f.name() == field.name());
+ match merged_field {
+ Some(merged_field) => merged_field.try_merge(&field)?,
+ // found a new field, add to field list
+ None => merged.fields.push(field),
}
}
Ok(merged)
@@ -189,18 +184,18 @@ impl Schema {
self.fields.iter().flat_map(|f| f.fields()).collect()
}
- /// Returns an immutable reference of a specific `Field` instance selected using an
+ /// Returns an immutable reference of a specific [`Field`] instance selected using an
/// offset within the internal `fields` vector.
pub fn field(&self, i: usize) -> &Field {
&self.fields[i]
}
- /// Returns an immutable reference of a specific `Field` instance selected by name.
+ /// Returns an immutable reference of a specific [`Field`] instance selected by name.
pub fn field_with_name(&self, name: &str) -> Result<&Field> {
Ok(&self.fields[self.index_of(name)?])
}
- /// Returns a vector of immutable references to all `Field` instances selected by
+ /// Returns a vector of immutable references to all [`Field`] instances selected by
/// the dictionary ID they use.
pub fn fields_with_dict_id(&self, dict_id: i64) -> Vec<&Field> {
self.fields
@@ -211,17 +206,16 @@ impl Schema {
/// Find the index of the column with the given name.
pub fn index_of(&self, name: &str) -> Result<usize> {
- for i in 0..self.fields.len() {
- if self.fields[i].name() == name {
- return Ok(i);
- }
- }
- let valid_fields: Vec<String> =
- self.fields.iter().map(|f| f.name().clone()).collect();
- Err(ArrowError::InvalidArgumentError(format!(
- "Unable to get field named \"{}\". Valid fields: {:?}",
- name, valid_fields
- )))
+ (0..self.fields.len())
+ .find(|idx| self.fields[*idx].name() == name)
+ .ok_or_else(|| {
+ let valid_fields: Vec<String> =
+ self.fields.iter().map(|f| f.name().clone()).collect();
+ ArrowError::InvalidArgumentError(format!(
+ "Unable to get field named \"{}\". Valid fields: {:?}",
+ name, valid_fields
+ ))
+ })
}
/// Returns an immutable reference to the Map of custom metadata key-value pairs.
@@ -317,31 +311,13 @@ impl Schema {
///
/// In other words, any record conforms to `other` should also conform to `self`.
pub fn contains(&self, other: &Schema) -> bool {
- if self.fields.len() != other.fields.len() {
- return false;
- }
-
- for (i, field) in other.fields.iter().enumerate() {
- if !self.fields[i].contains(field) {
- return false;
- }
- }
-
+ self.fields.len() == other.fields.len()
+ && self.fields.iter().zip(other.fields.iter()).all(|(f1, f2)| f1.contains(f2))
// make sure self.metadata is a superset of other.metadata
- for (k, v) in &other.metadata {
- match self.metadata.get(k) {
- Some(s) => {
- if s != v {
- return false;
- }
- }
- None => {
- return false;
- }
- }
- }
-
- true
+ && other.metadata.iter().all(|(k, v1)| match self.metadata.get(k) {
+ Some(v2) => v1 == v2,
+ _ => false,
+ })
}
}
@@ -451,4 +427,34 @@ mod tests {
)
}
}
+
+ #[test]
+ fn test_schema_contains() {
+ let mut metadata1 = HashMap::new();
+ metadata1.insert("meta".to_string(), "data".to_string());
+
+ let schema1 = Schema::new(vec![
+ Field::new("name", DataType::Utf8, false),
+ Field::new("address", DataType::Utf8, false),
+ Field::new("priority", DataType::UInt8, false),
+ ])
+ .with_metadata(metadata1.clone());
+
+ let mut metadata2 = HashMap::new();
+ metadata2.insert("meta".to_string(), "data".to_string());
+ metadata2.insert("meta2".to_string(), "data".to_string());
+ let schema2 = Schema::new(vec![
+ Field::new("name", DataType::Utf8, false),
+ Field::new("address", DataType::Utf8, false),
+ Field::new("priority", DataType::UInt8, false),
+ ])
+ .with_metadata(metadata2);
+
+ // reflexivity
+ assert!(schema1.contains(&schema1));
+ assert!(schema2.contains(&schema2));
+
+ assert!(!schema1.contains(&schema2));
+ assert!(schema2.contains(&schema1));
+ }
}