You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@avro.apache.org by mg...@apache.org on 2023/08/15 07:10:10 UTC

[avro] 07/09: AVRO-3786: Add test-cases and fix for AVRO-3786

This is an automated email from the ASF dual-hosted git repository.

mgrigorov pushed a commit to branch avro-3814/schema-resolution-union
in repository https://gitbox.apache.org/repos/asf/avro.git

commit bc56014801a8f1986cd7a7779ad641c98c5f5c09
Author: Rik Heijdens <r....@lithic.com>
AuthorDate: Fri Jul 28 13:54:56 2023 +0200

    AVRO-3786: Add test-cases and fix for AVRO-3786
---
 lang/rust/avro/src/schema.rs      |  10 +-
 lang/rust/avro/src/types.rs       |  38 +-
 lang/rust/avro/tests/avro-3786.rs | 727 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 761 insertions(+), 14 deletions(-)

diff --git a/lang/rust/avro/src/schema.rs b/lang/rust/avro/src/schema.rs
index 22307d4e4..8ca6bc996 100644
--- a/lang/rust/avro/src/schema.rs
+++ b/lang/rust/avro/src/schema.rs
@@ -839,9 +839,13 @@ impl UnionSchema {
                 let namespace = &schema.namespace().or_else(|| enclosing_namespace.clone());
 
                 // Attempt to validate the value in order to ensure we've selected the right schema.
-                value
-                    .validate_internal(schema, &collected_names, namespace, true)
-                    .is_none()
+                match value.validate_internal(schema, &collected_names, namespace, true) {
+                    None => true,
+                    Some(err) => {
+                        println!("Validation failed: {:?}", err);
+                        false
+                    }
+                }
             })
         }
     }
diff --git a/lang/rust/avro/src/types.rs b/lang/rust/avro/src/types.rs
index c2fdf7075..4ba6a77fe 100644
--- a/lang/rust/avro/src/types.rs
+++ b/lang/rust/avro/src/types.rs
@@ -477,19 +477,35 @@ impl Value {
                 Schema::Enum(EnumSchema {
                     symbols, default, ..
                 }),
-            ) => symbols
-                .get(i as usize)
-                .map(|ref symbol| {
-                    if symbol != &s {
-                        Some(format!("Symbol '{s}' is not at position '{i}'"))
-                    } else {
+            ) => {
+                if schema_resolution {
+                    // When resolving a schema the following rule applies:
+                    // if both are enums: if the writer’s symbol is not present in the reader’s enum and the reader has a default value,
+                    // then that value is used, otherwise an error is signalled.
+                    if symbols.contains(s) || default.is_some() {
+                        // If `s` is a symbol in the schema, or a default is available then the value is valid.
                         None
+                    } else {
+                        Some(format!(
+                            "Unknown symbol '{s}': no symbol to fallback to is available."
+                        ))
                     }
-                })
-                .unwrap_or_else(|| match default {
-                    Some(_) => None,
-                    None => Some(format!("No symbol at position '{i}'")),
-                }),
+                } else {
+                    symbols
+                        .get(i as usize)
+                        .map(|ref symbol| {
+                            if symbol != &s {
+                                Some(format!("Symbol '{s}' is not at position '{i}'"))
+                            } else {
+                                None
+                            }
+                        })
+                        .unwrap_or_else(|| match default {
+                            Some(_) => None,
+                            None => Some(format!("No symbol at position '{i}'")),
+                        })
+                }
+            }
             // (&Value::Union(None), &Schema::Union(_)) => None,
             (&Value::Union(i, ref value), Schema::Union(inner)) => inner
                 .variants()
diff --git a/lang/rust/avro/tests/avro-3786.rs b/lang/rust/avro/tests/avro-3786.rs
index f36b163c4..437889ed7 100644
--- a/lang/rust/avro/tests/avro-3786.rs
+++ b/lang/rust/avro/tests/avro-3786.rs
@@ -157,3 +157,730 @@ fn avro_3786_deserialize_union_with_different_enum_order() -> TestResult {
     }
     Ok(())
 }
+
+#[test]
+fn avro_3786_deserialize_union_with_different_enum_order_defined_in_record() -> TestResult {
+    #[derive(
+        Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Clone, serde::Deserialize, serde::Serialize,
+    )]
+    pub enum Bar {
+        #[serde(rename = "bar0")]
+        Bar0,
+        #[serde(rename = "bar1")]
+        Bar1,
+        #[serde(rename = "bar2")]
+        Bar2,
+    }
+    #[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize)]
+    pub struct BarParent {
+        pub bar: Bar,
+    }
+    #[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize)]
+    pub struct Foo {
+        #[serde(rename = "barParent")]
+        pub bar_parent: Option<BarParent>,
+    }
+    let writer_schema = r#"{
+        "type": "record",
+        "name": "Foo",
+        "namespace": "com.rallyhealth.devices.canonical.avro.model.v6_0",
+        "fields":
+        [
+            {
+                "name": "barParent",
+                "type": [
+                    "null",
+                    {
+                        "type": "record",
+                        "name": "BarParent",
+                        "fields": [
+                            {
+                                "name": "bar",
+                                "type": {
+                                    "type": "enum",
+                                    "name": "Bar",
+                                    "symbols":
+                                    [
+                                        "bar0",
+                                        "bar1",
+                                        "bar2"
+                                    ],
+                                    "default": "bar0"
+                                }
+                            }
+                        ]
+                    }
+                ]
+            }
+        ]
+    }"#;
+    let reader_schema = r#"{
+        "type": "record",
+        "name": "Foo",
+        "namespace": "com.rallyhealth.devices.canonical.avro.model.v6_0",
+        "fields":
+        [
+            {
+                "name": "barParent",
+                "type": [
+                    "null",
+                    {
+                        "type": "record",
+                        "name": "BarParent",
+                        "fields": [
+                            {
+                                "name": "bar",
+                                "type": {
+                                    "type": "enum",
+                                    "name": "Bar",
+                                    "symbols":
+                                    [
+                                        "bar0",
+                                        "bar2"
+                                    ],
+                                    "default": "bar0"
+                                }
+                            }
+                        ]
+                    }
+                ]
+            }
+        ]
+    }"#;
+    let writer_schema = Schema::parse_str(writer_schema)?;
+    let foo = Foo {
+        bar_parent: Some(BarParent { bar: Bar::Bar0 }),
+    };
+    let avro_value = crate::to_value(foo)?;
+    assert!(
+        avro_value.validate(&writer_schema),
+        "value is valid for schema",
+    );
+    let datum = crate::to_avro_datum(&writer_schema, avro_value)?;
+    let mut x = &datum[..];
+    let reader_schema = Schema::parse_str(reader_schema)?;
+    let deser_value = crate::from_avro_datum(&writer_schema, &mut x, Some(&reader_schema))?;
+    match deser_value {
+        types::Value::Record(fields) => {
+            assert_eq!(fields.len(), 1);
+            assert_eq!(fields[0].0, "barParent");
+            // TODO: better validation
+        }
+        _ => panic!("Expected Value::Record"),
+    }
+    Ok(())
+}
+
+#[test]
+fn test_avro_3786_deserialize_union_with_different_enum_order_defined_in_record_v1() -> TestResult {
+    #[derive(
+        Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Clone, serde::Deserialize, serde::Serialize,
+    )]
+    pub enum Bar {
+        #[serde(rename = "bar0")]
+        Bar0,
+        #[serde(rename = "bar1")]
+        Bar1,
+        #[serde(rename = "bar2")]
+        Bar2,
+    }
+    #[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize)]
+    pub struct BarParent {
+        pub bar: Bar,
+    }
+    #[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize)]
+    pub struct Foo {
+        #[serde(rename = "barParent")]
+        pub bar_parent: Option<BarParent>,
+    }
+    let writer_schema = r#"{
+            "type": "record",
+            "name": "Foo",
+            "namespace": "com.rallyhealth.devices.canonical.avro.model.v6_0",
+            "fields":
+            [
+                {
+                    "name": "barParent",
+                    "type": [
+                        "null",
+                        {
+                            "type": "record",
+                            "name": "BarParent",
+                            "fields": [
+                                {
+                                    "name": "bar",
+                                    "type": {
+                                        "type": "enum",
+                                        "name": "Bar",
+                                        "symbols":
+                                        [
+                                            "bar0",
+                                            "bar1",
+                                            "bar2"
+                                        ],
+                                        "default": "bar0"
+                                    }
+                                }
+                            ]
+                        }
+                    ]
+                }
+            ]
+        }"#;
+    let reader_schema = r#"{
+            "type": "record",
+            "name": "Foo",
+            "namespace": "com.rallyhealth.devices.canonical.avro.model.v6_0",
+            "fields":
+            [
+                {
+                    "name": "barParent",
+                    "type": [
+                        "null",
+                        {
+                            "type": "record",
+                            "name": "BarParent",
+                            "fields": [
+                                {
+                                    "name": "bar",
+                                    "type": {
+                                        "type": "enum",
+                                        "name": "Bar",
+                                        "symbols":
+                                        [
+                                            "bar0",
+                                            "bar2"
+                                        ],
+                                        "default": "bar0"
+                                    }
+                                }
+                            ]
+                        }
+                    ]
+                }
+            ]
+        }"#;
+    let writer_schema = Schema::parse_str(writer_schema)?;
+    let foo = Foo {
+        bar_parent: Some(BarParent { bar: Bar::Bar1 }),
+    };
+    let avro_value = crate::to_value(foo)?;
+    assert!(
+        avro_value.validate(&writer_schema),
+        "value is valid for schema",
+    );
+    let datum = crate::to_avro_datum(&writer_schema, avro_value)?;
+    let mut x = &datum[..];
+    let reader_schema = Schema::parse_str(reader_schema)?;
+    let deser_value = crate::from_avro_datum(&writer_schema, &mut x, Some(&reader_schema))?;
+    match deser_value {
+        types::Value::Record(fields) => {
+            assert_eq!(fields.len(), 1);
+            assert_eq!(fields[0].0, "barParent");
+            // TODO: better validation
+        }
+        _ => panic!("Expected Value::Record"),
+    }
+    Ok(())
+}
+
+#[test]
+fn test_avro_3786_deserialize_union_with_different_enum_order_defined_in_record_v2() -> TestResult {
+    #[derive(
+        Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Clone, serde::Deserialize, serde::Serialize,
+    )]
+    pub enum Bar {
+        #[serde(rename = "bar0")]
+        Bar0,
+        #[serde(rename = "bar1")]
+        Bar1,
+        #[serde(rename = "bar2")]
+        Bar2,
+    }
+    #[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize)]
+    pub struct BarParent {
+        pub bar: Bar,
+    }
+    #[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize)]
+    pub struct Foo {
+        #[serde(rename = "barParent")]
+        pub bar_parent: Option<BarParent>,
+    }
+    let writer_schema = r#"{
+            "type": "record",
+            "name": "Foo",
+            "namespace": "com.rallyhealth.devices.canonical.avro.model.v6_0",
+            "fields":
+            [
+                {
+                    "name": "barParent",
+                    "type": [
+                        "null",
+                        {
+                            "type": "record",
+                            "name": "BarParent",
+                            "fields": [
+                                {
+                                    "name": "bar",
+                                    "type": {
+                                        "type": "enum",
+                                        "name": "Bar",
+                                        "symbols":
+                                        [
+                                            "bar0",
+                                            "bar1",
+                                            "bar2"
+                                        ],
+                                        "default": "bar2"
+                                    }
+                                }
+                            ]
+                        }
+                    ]
+                }
+            ]
+        }"#;
+    let reader_schema = r#"{
+            "type": "record",
+            "name": "Foo",
+            "namespace": "com.rallyhealth.devices.canonical.avro.model.v6_0",
+            "fields":
+            [
+                {
+                    "name": "barParent",
+                    "type": [
+                        "null",
+                        {
+                            "type": "record",
+                            "name": "BarParent",
+                            "fields": [
+                                {
+                                    "name": "bar",
+                                    "type": {
+                                        "type": "enum",
+                                        "name": "Bar",
+                                        "symbols":
+                                        [
+                                            "bar1",
+                                            "bar2"
+                                        ],
+                                        "default": "bar2"
+                                    }
+                                }
+                            ]
+                        }
+                    ]
+                }
+            ]
+        }"#;
+    let writer_schema = Schema::parse_str(writer_schema)?;
+    let foo = Foo {
+        bar_parent: Some(BarParent { bar: Bar::Bar1 }),
+    };
+    let avro_value = crate::to_value(foo)?;
+    assert!(
+        avro_value.validate(&writer_schema),
+        "value is valid for schema",
+    );
+    let datum = crate::to_avro_datum(&writer_schema, avro_value)?;
+    let mut x = &datum[..];
+    let reader_schema = Schema::parse_str(reader_schema)?;
+    let deser_value = crate::from_avro_datum(&writer_schema, &mut x, Some(&reader_schema))?;
+    match deser_value {
+        types::Value::Record(fields) => {
+            assert_eq!(fields.len(), 1);
+            assert_eq!(fields[0].0, "barParent");
+            // TODO: better validation
+        }
+        _ => panic!("Expected Value::Record"),
+    }
+    Ok(())
+}
+
+#[test]
+fn deserialize_union_with_different_enum_order_defined_in_record() -> TestResult {
+    #[derive(
+        Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Clone, serde::Deserialize, serde::Serialize,
+    )]
+    pub enum Bar {
+        #[serde(rename = "bar0")]
+        Bar0,
+        #[serde(rename = "bar1")]
+        Bar1,
+        #[serde(rename = "bar2")]
+        Bar2,
+    }
+    #[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize)]
+    pub struct BarParent {
+        pub bar: Bar,
+    }
+    #[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize)]
+    pub struct Foo {
+        #[serde(rename = "barParent")]
+        pub bar_parent: Option<BarParent>,
+    }
+    let writer_schema = r#"{
+            "type": "record",
+            "name": "Foo",
+            "namespace": "com.rallyhealth.devices.canonical.avro.model.v6_0",
+            "fields":
+            [
+                {
+                    "name": "barParent",
+                    "type": [
+                        "null",
+                        {
+                            "type": "record",
+                            "name": "BarParent",
+                            "fields": [
+                                {
+                                    "name": "bar",
+                                    "type": {
+                                        "type": "enum",
+                                        "name": "Bar",
+                                        "symbols":
+                                        [
+                                            "bar0",
+                                            "bar1",
+                                            "bar2"
+                                        ],
+                                        "default": "bar0"
+                                    }
+                                }
+                            ]
+                        }
+                    ]
+                }
+            ]
+        }"#;
+    let reader_schema = r#"{
+            "type": "record",
+            "name": "Foo",
+            "namespace": "com.rallyhealth.devices.canonical.avro.model.v6_0",
+            "fields":
+            [
+                {
+                    "name": "barParent",
+                    "type": [
+                        "null",
+                        {
+                            "type": "record",
+                            "name": "BarParent",
+                            "fields": [
+                                {
+                                    "name": "bar",
+                                    "type": {
+                                        "type": "enum",
+                                        "name": "Bar",
+                                        "symbols":
+                                        [
+                                            "bar0",
+                                            "bar2"
+                                        ],
+                                        "default": "bar0"
+                                    }
+                                }
+                            ]
+                        }
+                    ]
+                }
+            ]
+        }"#;
+    let writer_schema = Schema::parse_str(writer_schema)?;
+    let foo = Foo {
+        bar_parent: Some(BarParent { bar: Bar::Bar2 }),
+    };
+    let avro_value = crate::to_value(foo)?;
+    assert!(
+        avro_value.validate(&writer_schema),
+        "value is valid for schema",
+    );
+    let datum = crate::to_avro_datum(&writer_schema, avro_value)?;
+    let mut x = &datum[..];
+    let reader_schema = Schema::parse_str(reader_schema)?;
+    let deser_value = crate::from_avro_datum(&writer_schema, &mut x, Some(&reader_schema))?;
+    match deser_value {
+        types::Value::Record(fields) => {
+            assert_eq!(fields.len(), 1);
+            assert_eq!(fields[0].0, "barParent");
+            // TODO: better validation
+        }
+        _ => panic!("Expected Value::Record"),
+    }
+    Ok(())
+}
+
+#[test]
+fn deserialize_union_with_record_with_enum_defined_inline_reader_has_different_indices(
+) -> TestResult {
+    #[derive(
+        Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Clone, serde::Deserialize, serde::Serialize,
+    )]
+    pub enum DefinedInRecord {
+        #[serde(rename = "val0")]
+        Val0,
+        #[serde(rename = "val1")]
+        Val1,
+        #[serde(rename = "val2")]
+        Val2,
+        #[serde(rename = "UNKNOWN")]
+        Unknown,
+    }
+    #[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize)]
+    pub struct Parent {
+        pub date: i64,
+        #[serde(rename = "barUse")]
+        pub bar_use: Bar,
+        #[serde(rename = "bazUse")]
+        pub baz_use: Option<Vec<Baz>>,
+        #[serde(rename = "definedInRecord")]
+        pub defined_in_record: DefinedInRecord,
+        #[serde(rename = "optionalString")]
+        pub optional_string: Option<String>,
+    }
+    #[derive(
+        Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Clone, serde::Deserialize, serde::Serialize,
+    )]
+    pub enum Baz {
+        #[serde(rename = "baz0")]
+        Baz0,
+        #[serde(rename = "baz1")]
+        Baz1,
+        #[serde(rename = "baz2")]
+        Baz2,
+    }
+    #[derive(
+        Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Clone, serde::Deserialize, serde::Serialize,
+    )]
+    pub enum Bar {
+        #[serde(rename = "bar0")]
+        Bar0,
+        #[serde(rename = "bar1")]
+        Bar1,
+        #[serde(rename = "bar2")]
+        Bar2,
+    }
+    #[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize)]
+    pub struct Foo {
+        #[serde(rename = "barInit")]
+        pub bar_init: Bar,
+        pub baz: Baz,
+        pub parent: Option<Parent>,
+    }
+    let writer_schema = r#"{
+            "type": "record",
+            "name": "Foo",
+            "namespace": "fake",
+            "fields":
+            [
+                {
+                    "name": "barInit",
+                    "type":
+                    {
+                        "type": "enum",
+                        "name": "Bar",
+                        "symbols":
+                        [
+                            "bar0",
+                            "bar1",
+                            "bar2"
+                        ],
+                        "default": "bar0"
+                    }
+                },
+                {
+                    "name": "baz",
+                    "type":
+                    {
+                        "type": "enum",
+                        "name": "Baz",
+                        "symbols":
+                        [
+                            "baz0",
+                            "baz1",
+                            "baz2"
+                        ],
+                        "default": "baz0"
+                    }
+                },
+                {
+                    "name": "parent",
+                    "type": [
+                        "null",
+                        {
+                            "type": "record",
+                            "name": "Parent",
+                            "fields": [
+                                {
+                                    "name": "date",
+                                    "type": {
+                                      "type": "long",
+                                      "avro.java.long": "Long"
+                                    }
+                                },
+                                {
+                                    "name": "barUse",
+                                    "type": "Bar"
+                                },
+                                {
+                                    "name": "bazUse",
+                                    "type": [
+                                        "null",
+                                        {
+                                            "type": "array",
+                                            "items": {
+                                                "type": "Baz"
+                                            }
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "definedInRecord",
+                                    "type": {
+                                      "name": "DefinedInRecord",
+                                      "type": "enum",
+                                      "symbols": [
+                                        "val0",
+                                        "val1",
+                                        "val2",
+                                        "UNKNOWN"
+                                      ],
+                                      "default": "UNKNOWN"
+                                    }
+                                },
+                                {
+                                  "name": "optionalString",
+                                  "type": [
+                                    "null",
+                                    "string"
+                                  ]
+                                }
+                            ]
+                        }
+                    ]
+                }
+            ]
+        }"#;
+    let reader_schema = r#"{
+            "type": "record",
+            "name": "Foo",
+            "namespace": "fake",
+            "fields":
+            [
+                {
+                    "name": "barInit",
+                    "type":
+                    {
+                        "type": "enum",
+                        "name": "Bar",
+                        "symbols":
+                        [
+                            "bar0",
+                            "bar2"
+                        ],
+                        "default": "bar0"
+                    }
+                },
+                {
+                    "name": "baz",
+                    "type":
+                    {
+                        "type": "enum",
+                        "name": "Baz",
+                        "symbols":
+                        [
+                            "baz0",
+                            "baz2"
+                        ],
+                        "default": "baz0"
+                    }
+                },
+                {
+                    "name": "parent",
+                    "type": [
+                        "null",
+                        {
+                            "type": "record",
+                            "name": "Parent",
+                            "fields": [
+                                {
+                                    "name": "date",
+                                    "type": {
+                                      "type": "long",
+                                      "avro.java.long": "Long"
+                                    }
+                                },
+                                {
+                                    "name": "barUse",
+                                    "type": "Bar"
+                                },
+                                {
+                                    "name": "bazUse",
+                                    "type": [
+                                        "null",
+                                        {
+                                            "type": "array",
+                                            "items": {
+                                                "type": "Baz"
+                                            }
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "definedInRecord",
+                                    "type": {
+                                      "name": "DefinedInRecord",
+                                      "type": "enum",
+                                      "symbols": [
+                                        "val1",
+                                        "val2",
+                                        "UNKNOWN"
+                                      ],
+                                      "default": "UNKNOWN"
+                                    }
+                                },
+                                {
+                                  "name": "optionalString",
+                                  "type": [
+                                    "null",
+                                    "string"
+                                  ]
+                                }
+                            ]
+                        }
+                    ]
+                }
+            ]
+        }"#;
+    let writer_schema = Schema::parse_str(writer_schema)?;
+    let foo = Foo {
+        bar_init: Bar::Bar0,
+        baz: Baz::Baz0,
+        parent: Some(Parent {
+            bar_use: Bar::Bar0,
+            baz_use: Some(vec![Baz::Baz0]),
+            optional_string: Some("test".to_string()),
+            date: 1689197893,
+            defined_in_record: DefinedInRecord::Val1,
+        }),
+    };
+    let avro_value = crate::to_value(foo)?;
+    assert!(
+        avro_value.validate(&writer_schema),
+        "value is valid for schema",
+    );
+    let datum = crate::to_avro_datum(&writer_schema, avro_value)?;
+    let mut x = &datum[..];
+    let reader_schema = Schema::parse_str(reader_schema)?;
+    let deser_value = crate::from_avro_datum(&writer_schema, &mut x, Some(&reader_schema))?;
+    match deser_value {
+        types::Value::Record(fields) => {
+            assert_eq!(fields.len(), 3);
+            assert_eq!(fields[0].0, "barInit");
+            assert_eq!(fields[0].1, types::Value::Enum(0, "bar0".to_string()));
+            // TODO: better validation
+        }
+        _ => panic!("Expected Value::Record"),
+    }
+    Ok(())
+}