You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@avro.apache.org by mg...@apache.org on 2022/04/19 18:38:10 UTC

[avro] branch branch-1.11 updated: AVRO-3495: Rust: Fields order should not matter (#1650)

This is an automated email from the ASF dual-hosted git repository.

mgrigorov pushed a commit to branch branch-1.11
in repository https://gitbox.apache.org/repos/asf/avro.git


The following commit(s) were added to refs/heads/branch-1.11 by this push:
     new 70b37a70e AVRO-3495: Rust: Fields order should not matter (#1650)
70b37a70e is described below

commit 70b37a70e6efd3591af5921f231446c9b013593a
Author: Martin Grigorov <ma...@users.noreply.github.com>
AuthorDate: Tue Apr 19 21:37:45 2022 +0300

    AVRO-3495: Rust: Fields order should not matter (#1650)
    
    * AVRO-3495: The order of the struct's fields and schema's fields should not matter
    
    Signed-off-by: Martin Tzvetanov Grigorov <mg...@apache.org>
    
    * AVRO-3495: Use the lookup table when comparing values against fields by name
    
    Until now it was expected that both the schema fields and the input
    values are sorted the same way.
    
    Use BTreeMap instead of HashMap for the lookup table because otherwise
    the assertion on the validation error messages is impossible due to
    random printing of the map's entries
    
    Signed-off-by: Martin Tzvetanov Grigorov <mg...@apache.org>
    
    * AVRO-3495: Update the test case
    
    Signed-off-by: Martin Tzvetanov Grigorov <mg...@apache.org>
    (cherry picked from commit 3312b986e43252963d567e3b1137b8de33220696)
---
 lang/rust/avro/src/encode.rs     | 27 +++++++++----
 lang/rust/avro/src/error.rs      |  3 ++
 lang/rust/avro/src/schema.rs     | 22 +++++------
 lang/rust/avro/src/types.rs      | 83 ++++++++++++++++++++++++----------------
 lang/rust/avro/tests/schema.rs   | 32 ++++++++++++++++
 lang/rust/avro_derive/src/lib.rs |  2 +-
 6 files changed, 117 insertions(+), 52 deletions(-)

diff --git a/lang/rust/avro/src/encode.rs b/lang/rust/avro/src/encode.rs
index c4c0bd3fe..04d220276 100644
--- a/lang/rust/avro/src/encode.rs
+++ b/lang/rust/avro/src/encode.rs
@@ -188,18 +188,29 @@ pub(crate) fn encode_internal(
             if let Schema::Record {
                 ref name,
                 fields: ref schema_fields,
+                ref lookup,
                 ..
             } = *schema
             {
                 let record_namespace = name.fully_qualified_name(enclosing_namespace).namespace;
-                for (i, &(_, ref value)) in fields.iter().enumerate() {
-                    encode_internal(
-                        value,
-                        &schema_fields[i].schema,
-                        names,
-                        &record_namespace,
-                        buffer,
-                    )?;
+                for &(ref name, ref value) in fields.iter() {
+                    match lookup.get(name) {
+                        Some(idx) => {
+                            encode_internal(
+                                value,
+                                &schema_fields[*idx].schema,
+                                names,
+                                &record_namespace,
+                                buffer,
+                            )?;
+                        }
+                        None => {
+                            return Err(Error::NoEntryInLookupTable(
+                                name.clone(),
+                                format!("{:?}", lookup),
+                            ));
+                        }
+                    }
                 }
             } else {
                 error!("invalid schema type for Record: {:?}", schema);
diff --git a/lang/rust/avro/src/error.rs b/lang/rust/avro/src/error.rs
index 908e040e9..d7483ea91 100644
--- a/lang/rust/avro/src/error.rs
+++ b/lang/rust/avro/src/error.rs
@@ -403,6 +403,9 @@ pub enum Error {
     #[error("Signed decimal bytes length {0} not equal to fixed schema size {1}.")]
     EncodeDecimalAsFixedError(usize, usize),
 
+    #[error("There is no entry for {0} in the lookup table: {1}.")]
+    NoEntryInLookupTable(String, String),
+
     #[error("Can only encode value type {value_kind:?} as one of {supported_schema:?}")]
     EncodeValueAsSchemaError {
         value_kind: ValueKind,
diff --git a/lang/rust/avro/src/schema.rs b/lang/rust/avro/src/schema.rs
index 6134c8256..e0fcca23b 100644
--- a/lang/rust/avro/src/schema.rs
+++ b/lang/rust/avro/src/schema.rs
@@ -106,7 +106,7 @@ pub enum Schema {
         aliases: Aliases,
         doc: Documentation,
         fields: Vec<RecordField>,
-        lookup: HashMap<String, usize>,
+        lookup: BTreeMap<String, usize>,
     },
     /// An `enum` Avro schema.
     Enum {
@@ -1063,7 +1063,7 @@ impl Parser {
             }
         }
 
-        let mut lookup = HashMap::new();
+        let mut lookup = BTreeMap::new();
         let fully_qualified_name = name.fully_qualified_name(enclosing_namespace);
         self.register_resolving_schema(&fully_qualified_name, &aliases);
 
@@ -1856,7 +1856,7 @@ mod tests {
                 order: RecordFieldOrder::Ignore,
                 position: 0,
             }],
-            lookup: HashMap::from_iter(vec![("field_one".to_string(), 0)]),
+            lookup: BTreeMap::from_iter(vec![("field_one".to_string(), 0)]),
         };
 
         assert_eq!(schema_c, schema_c_expected);
@@ -1910,7 +1910,7 @@ mod tests {
                 order: RecordFieldOrder::Ignore,
                 position: 0,
             }],
-            lookup: HashMap::from_iter(vec![("field_one".to_string(), 0)]),
+            lookup: BTreeMap::from_iter(vec![("field_one".to_string(), 0)]),
         };
 
         assert_eq!(schema_option_a, schema_option_a_expected);
@@ -1932,7 +1932,7 @@ mod tests {
         )
         .unwrap();
 
-        let mut lookup = HashMap::new();
+        let mut lookup = BTreeMap::new();
         lookup.insert("a".to_owned(), 0);
         lookup.insert("b".to_owned(), 1);
 
@@ -1988,10 +1988,10 @@ mod tests {
         )
         .unwrap();
 
-        let mut lookup = HashMap::new();
+        let mut lookup = BTreeMap::new();
         lookup.insert("recordField".to_owned(), 0);
 
-        let mut node_lookup = HashMap::new();
+        let mut node_lookup = BTreeMap::new();
         node_lookup.insert("children".to_owned(), 1);
         node_lookup.insert("label".to_owned(), 0);
 
@@ -2159,7 +2159,7 @@ mod tests {
         )
         .unwrap();
 
-        let mut lookup = HashMap::new();
+        let mut lookup = BTreeMap::new();
         lookup.insert("value".to_owned(), 0);
         lookup.insert("next".to_owned(), 1);
 
@@ -2225,7 +2225,7 @@ mod tests {
         )
         .unwrap();
 
-        let mut lookup = HashMap::new();
+        let mut lookup = BTreeMap::new();
         lookup.insert("value".to_owned(), 0);
         lookup.insert("next".to_owned(), 1);
 
@@ -2289,7 +2289,7 @@ mod tests {
         )
         .unwrap();
 
-        let mut lookup = HashMap::new();
+        let mut lookup = BTreeMap::new();
         lookup.insert("enum".to_owned(), 0);
         lookup.insert("next".to_owned(), 1);
 
@@ -2364,7 +2364,7 @@ mod tests {
         )
         .unwrap();
 
-        let mut lookup = HashMap::new();
+        let mut lookup = BTreeMap::new();
         lookup.insert("fixed".to_owned(), 0);
         lookup.insert("next".to_owned(), 1);
 
diff --git a/lang/rust/avro/src/types.rs b/lang/rust/avro/src/types.rs
index 25d968120..2ecb23e0a 100644
--- a/lang/rust/avro/src/types.rs
+++ b/lang/rust/avro/src/types.rs
@@ -25,7 +25,12 @@ use crate::{
     AvroResult, Error,
 };
 use serde_json::{Number, Value as JsonValue};
-use std::{collections::HashMap, convert::TryFrom, hash::BuildHasher, str::FromStr, u8};
+use std::{
+    collections::{BTreeMap, HashMap},
+    convert::TryFrom,
+    hash::BuildHasher,
+    str::FromStr,
+};
 use uuid::Uuid;
 
 /// Compute the maximum decimal value precision of a byte array of length `len` could hold.
@@ -206,7 +211,7 @@ pub struct Record<'a> {
     /// Ordered according to the fields in the schema given to create this
     /// `Record` object. Any unset field defaults to `Value::Null`.
     pub fields: Vec<(String, Value)>,
-    schema_lookup: &'a HashMap<String, usize>,
+    schema_lookup: &'a BTreeMap<String, usize>,
 }
 
 impl<'a> Record<'a> {
@@ -457,7 +462,14 @@ impl Value {
                     Value::accumulate(acc, value.validate_internal(inner, names))
                 })
             }
-            (&Value::Record(ref record_fields), &Schema::Record { ref fields, .. }) => {
+            (
+                &Value::Record(ref record_fields),
+                &Schema::Record {
+                    ref fields,
+                    ref lookup,
+                    ..
+                },
+            ) => {
                 if fields.len() != record_fields.len() {
                     return Some(format!(
                         "The value's records length ({}) is different than the schema's ({})",
@@ -466,19 +478,26 @@ impl Value {
                     ));
                 }
 
-                fields.iter().zip(record_fields.iter()).fold(
-                    None,
-                    |acc, (field, &(ref name, ref value))| {
-                        if field.name != *name {
-                            return Some(format!(
-                                "Value's name '{}' does not match the expected field's name '{}'",
-                                name, field.name
-                            ));
+                record_fields
+                    .iter()
+                    .fold(None, |acc, (field_name, record_field)| {
+                        match lookup.get(field_name) {
+                            Some(idx) => {
+                                let field = &fields[*idx];
+                                Value::accumulate(
+                                    acc,
+                                    record_field.validate_internal(&field.schema, names),
+                                )
+                            }
+                            None => Value::accumulate(
+                                acc,
+                                Some(format!(
+                                    "There is no schema field for field '{}'",
+                                    field_name
+                                )),
+                            ),
                         }
-                        let res = value.validate_internal(&field.schema, names);
-                        Value::accumulate(acc, res)
-                    },
-                )
+                    })
             }
             (&Value::Map(ref items), &Schema::Record { ref fields, .. }) => {
                 fields.iter().fold(None, |acc, field| {
@@ -1054,7 +1073,7 @@ mod tests {
                     lookup: Default::default(),
                 },
                 false,
-                "Invalid value: Record([(\"unknown_field_name\", Null)]) for schema: Record { name: Name { name: \"record_name\", namespace: None }, aliases: None, doc: None, fields: [RecordField { name: \"field_name\", doc: None, default: None, schema: Int, order: Ignore, position: 0 }], lookup: {} }. Reason: Value's name 'unknown_field_name' does not match the expected field's name 'field_name'",
+                "Invalid value: Record([(\"unknown_field_name\", Null)]) for schema: Record { name: Name { name: \"record_name\", namespace: None }, aliases: None, doc: None, fields: [RecordField { name: \"field_name\", doc: None, default: None, schema: Int, order: Ignore, position: 0 }], lookup: {} }. Reason: There is no schema field for field 'unknown_field_name'",
             ),
             (
                 Value::Record(vec![("field_name".to_string(), Value::Null)]),
@@ -1072,10 +1091,10 @@ mod tests {
                         order: RecordFieldOrder::Ignore,
                         position: 0,
                     }],
-                    lookup: Default::default(),
+                    lookup: [("field_name".to_string(), 0)].iter().cloned().collect(),
                 },
                 false,
-                "Invalid value: Record([(\"field_name\", Null)]) for schema: Record { name: Name { name: \"record_name\", namespace: None }, aliases: None, doc: None, fields: [RecordField { name: \"field_name\", doc: None, default: None, schema: Ref { name: Name { name: \"missing\", namespace: None } }, order: Ignore, position: 0 }], lookup: {} }. Reason: Unresolved schema reference: 'missing'. Parsed names: []",
+                "Invalid value: Record([(\"field_name\", Null)]) for schema: Record { name: Name { name: \"record_name\", namespace: None }, aliases: None, doc: None, fields: [RecordField { name: \"field_name\", doc: None, default: None, schema: Ref { name: Name { name: \"missing\", namespace: None } }, order: Ignore, position: 0 }], lookup: {\"field_name\": 0} }. Reason: Unresolved schema reference: 'missing'. Parsed names: []",
             ),
         ];
 
@@ -1204,7 +1223,6 @@ mod tests {
     fn validate_record() {
         init();
 
-        use std::collections::HashMap;
         // {
         //    "type": "record",
         //    "fields": [
@@ -1234,7 +1252,10 @@ mod tests {
                     position: 1,
                 },
             ],
-            lookup: HashMap::new(),
+            lookup: [("a".to_string(), 0), ("b".to_string(), 1)]
+                .iter()
+                .cloned()
+                .collect(),
         };
 
         assert!(Value::Record(vec![
@@ -1247,21 +1268,14 @@ mod tests {
             ("b".to_string(), Value::String("foo".to_string())),
             ("a".to_string(), Value::Long(42i64)),
         ]);
-        assert!(!value.validate(&schema));
-        assert_log_message(
-            format!(
-                "Invalid value: {:?} for schema: {:?}. Reason: {}",
-                value, schema, "Value's name 'a' does not match the expected field's name 'b'"
-            )
-            .as_str(),
-        );
+        assert!(value.validate(&schema));
 
         let value = Value::Record(vec![
             ("a".to_string(), Value::Boolean(false)),
             ("b".to_string(), Value::String("foo".to_string())),
         ]);
         assert!(!value.validate(&schema));
-        assert_log_message("Invalid value: Record([(\"a\", Boolean(false)), (\"b\", String(\"foo\"))]) for schema: Record { name: Name { name: \"some_record\", namespace: None }, aliases: None, doc: None, fields: [RecordField { name: \"a\", doc: None, default: None, schema: Long, order: Ascending, position: 0 }, RecordField { name: \"b\", doc: None, default: None, schema: String, order: Ascending, position: 1 }], lookup: {} }. Reason: Unsupported value-schema combination");
+        assert_log_message("Invalid value: Record([(\"a\", Boolean(false)), (\"b\", String(\"foo\"))]) for schema: Record { name: Name { name: \"some_record\", namespace: None }, aliases: None, doc: None, fields: [RecordField { name: \"a\", doc: None, default: None, schema: Long, order: Ascending, position: 0 }, RecordField { name: \"b\", doc: None, default: None, schema: String, order: Ascending, position: 1 }], lookup: {\"a\": 0, \"b\": 1} }. Reason: Unsupported value-schema combination");
 
         let value = Value::Record(vec![
             ("a".to_string(), Value::Long(42i64)),
@@ -1269,7 +1283,7 @@ mod tests {
         ]);
         assert!(!value.validate(&schema));
         assert_log_message(
-            "Invalid value: Record([(\"a\", Long(42)), (\"c\", String(\"foo\"))]) for schema: Record { name: Name { name: \"some_record\", namespace: None }, aliases: None, doc: None, fields: [RecordField { name: \"a\", doc: None, default: None, schema: Long, order: Ascending, position: 0 }, RecordField { name: \"b\", doc: None, default: None, schema: String, order: Ascending, position: 1 }], lookup: {} }. Reason: Value's name 'c' does not match the expected field's name 'b'"
+            "Invalid value: Record([(\"a\", Long(42)), (\"c\", String(\"foo\"))]) for schema: Record { name: Name { name: \"some_record\", namespace: None }, aliases: None, doc: None, fields: [RecordField { name: \"a\", doc: None, default: None, schema: Long, order: Ascending, position: 0 }, RecordField { name: \"b\", doc: None, default: None, schema: String, order: Ascending, position: 1 }], lookup: {\"a\": 0, \"b\": 1} }. Reason: There is no schema field for field 'c'"
         );
 
         let value = Value::Record(vec![
@@ -1278,7 +1292,9 @@ mod tests {
             ("c".to_string(), Value::Null),
         ]);
         assert!(!value.validate(&schema));
-        assert_log_message("Invalid value: Record([(\"a\", Long(42)), (\"b\", String(\"foo\")), (\"c\", Null)]) for schema: Record { name: Name { name: \"some_record\", namespace: None }, aliases: None, doc: None, fields: [RecordField { name: \"a\", doc: None, default: None, schema: Long, order: Ascending, position: 0 }, RecordField { name: \"b\", doc: None, default: None, schema: String, order: Ascending, position: 1 }], lookup: {} }. Reason: The value's records length (3) is different  [...]
+        assert_log_message(
+            r#"Invalid value: Record([("a", Long(42)), ("b", String("foo")), ("c", Null)]) for schema: Record { name: Name { name: "some_record", namespace: None }, aliases: None, doc: None, fields: [RecordField { name: "a", doc: None, default: None, schema: Long, order: Ascending, position: 0 }, RecordField { name: "b", doc: None, default: None, schema: String, order: Ascending, position: 1 }], lookup: {"a": 0, "b": 1} }. Reason: The value's records length (3) is different than the sche [...]
+        );
 
         assert!(Value::Map(
             vec![
@@ -1296,7 +1312,10 @@ mod tests {
                 .collect()
         )
         .validate(&schema));
-        assert_log_message("Invalid value: Map({\"c\": Long(123)}) for schema: Record { name: Name { name: \"some_record\", namespace: None }, aliases: None, doc: None, fields: [RecordField { name: \"a\", doc: None, default: None, schema: Long, order: Ascending, position: 0 }, RecordField { name: \"b\", doc: None, default: None, schema: String, order: Ascending, position: 1 }], lookup: {} }. Reason: Field with name '\"a\"' is not a member of the map items\nField with name '\"b\"' is not  [...]
+        assert_log_message(
+            r#"Invalid value: Map({"c": Long(123)}) for schema: Record { name: Name { name: "some_record", namespace: None }, aliases: None, doc: None, fields: [RecordField { name: "a", doc: None, default: None, schema: Long, order: Ascending, position: 0 }, RecordField { name: "b", doc: None, default: None, schema: String, order: Ascending, position: 1 }], lookup: {"a": 0, "b": 1} }. Reason: Field with name '"a"' is not a member of the map items
+Field with name '"b"' is not a member of the map items"#,
+        );
 
         let union_schema = Schema::Union(UnionSchema::new(vec![Schema::Null, schema]).unwrap());
 
diff --git a/lang/rust/avro/tests/schema.rs b/lang/rust/avro/tests/schema.rs
index f58b62f9e..62eeb2ddf 100644
--- a/lang/rust/avro/tests/schema.rs
+++ b/lang/rust/avro/tests/schema.rs
@@ -17,6 +17,7 @@
 
 use apache_avro::{
     schema::{Name, RecordField},
+    to_avro_datum, to_value,
     types::{Record, Value},
     Codec, Error, Reader, Schema, Writer,
 };
@@ -1311,3 +1312,34 @@ fn test_decimal_valid_type_attributes() {
     assert_eq!(0, bytes_decimal.get_attribute("scale"));
 }
 */
+
+// https://github.com/flavray/avro-rs/issues/47
+#[test]
+fn avro_old_issue_47() {
+    init();
+    let schema_str = r#"
+    {
+      "type": "record",
+      "name": "my_record",
+      "fields": [
+        {"name": "a", "type": "long"},
+        {"name": "b", "type": "string"}
+      ]
+    }"#;
+    let schema = Schema::parse_str(schema_str).unwrap();
+
+    use serde::{Deserialize, Serialize};
+
+    #[derive(Deserialize, Serialize)]
+    pub struct MyRecord {
+        b: String,
+        a: i64,
+    }
+
+    let record = MyRecord {
+        b: "hello".to_string(),
+        a: 1,
+    };
+
+    let _ = to_avro_datum(&schema, to_value(record).unwrap()).unwrap();
+}
diff --git a/lang/rust/avro_derive/src/lib.rs b/lang/rust/avro_derive/src/lib.rs
index 0055249cb..a32f2be33 100644
--- a/lang/rust/avro_derive/src/lib.rs
+++ b/lang/rust/avro_derive/src/lib.rs
@@ -152,7 +152,7 @@ fn get_data_struct_schema_def(
     Ok(quote! {
         let schema_fields = vec![#(#record_field_exprs),*];
         let name = apache_avro::schema::Name::new(#full_schema_name).expect(&format!("Unable to parse struct name for schema {}", #full_schema_name)[..]);
-        let lookup: HashMap<String, usize> = schema_fields
+        let lookup: std::collections::BTreeMap<String, usize> = schema_fields
             .iter()
             .map(|field| (field.name.to_owned(), field.position))
             .collect();