You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@avro.apache.org by mg...@apache.org on 2023/08/15 07:10:10 UTC
[avro] 07/09: AVRO-3786: Add test-cases and fix for AVRO-3786
This is an automated email from the ASF dual-hosted git repository.
mgrigorov pushed a commit to branch avro-3814/schema-resolution-union
in repository https://gitbox.apache.org/repos/asf/avro.git
commit bc56014801a8f1986cd7a7779ad641c98c5f5c09
Author: Rik Heijdens <r....@lithic.com>
AuthorDate: Fri Jul 28 13:54:56 2023 +0200
AVRO-3786: Add test-cases and fix for AVRO-3786
---
lang/rust/avro/src/schema.rs | 10 +-
lang/rust/avro/src/types.rs | 38 +-
lang/rust/avro/tests/avro-3786.rs | 727 ++++++++++++++++++++++++++++++++++++++
3 files changed, 761 insertions(+), 14 deletions(-)
diff --git a/lang/rust/avro/src/schema.rs b/lang/rust/avro/src/schema.rs
index 22307d4e4..8ca6bc996 100644
--- a/lang/rust/avro/src/schema.rs
+++ b/lang/rust/avro/src/schema.rs
@@ -839,9 +839,13 @@ impl UnionSchema {
let namespace = &schema.namespace().or_else(|| enclosing_namespace.clone());
// Attempt to validate the value in order to ensure we've selected the right schema.
- value
- .validate_internal(schema, &collected_names, namespace, true)
- .is_none()
+ match value.validate_internal(schema, &collected_names, namespace, true) {
+ None => true,
+ Some(err) => {
+ println!("Validation failed: {:?}", err);
+ false
+ }
+ }
})
}
}
diff --git a/lang/rust/avro/src/types.rs b/lang/rust/avro/src/types.rs
index c2fdf7075..4ba6a77fe 100644
--- a/lang/rust/avro/src/types.rs
+++ b/lang/rust/avro/src/types.rs
@@ -477,19 +477,35 @@ impl Value {
Schema::Enum(EnumSchema {
symbols, default, ..
}),
- ) => symbols
- .get(i as usize)
- .map(|ref symbol| {
- if symbol != &s {
- Some(format!("Symbol '{s}' is not at position '{i}'"))
- } else {
+ ) => {
+ if schema_resolution {
+ // When resolving a schema the following rule applies:
+ // if both are enums: if the writer’s symbol is not present in the reader’s enum and the reader has a default value,
+ // then that value is used, otherwise an error is signalled.
+ if symbols.contains(s) || default.is_some() {
+ // If `s` is a symbol in the schema, or a default is available then the value is valid.
None
+ } else {
+ Some(format!(
+ "Unknown symbol '{s}': no symbol to fallback to is available."
+ ))
}
- })
- .unwrap_or_else(|| match default {
- Some(_) => None,
- None => Some(format!("No symbol at position '{i}'")),
- }),
+ } else {
+ symbols
+ .get(i as usize)
+ .map(|ref symbol| {
+ if symbol != &s {
+ Some(format!("Symbol '{s}' is not at position '{i}'"))
+ } else {
+ None
+ }
+ })
+ .unwrap_or_else(|| match default {
+ Some(_) => None,
+ None => Some(format!("No symbol at position '{i}'")),
+ })
+ }
+ }
// (&Value::Union(None), &Schema::Union(_)) => None,
(&Value::Union(i, ref value), Schema::Union(inner)) => inner
.variants()
diff --git a/lang/rust/avro/tests/avro-3786.rs b/lang/rust/avro/tests/avro-3786.rs
index f36b163c4..437889ed7 100644
--- a/lang/rust/avro/tests/avro-3786.rs
+++ b/lang/rust/avro/tests/avro-3786.rs
@@ -157,3 +157,730 @@ fn avro_3786_deserialize_union_with_different_enum_order() -> TestResult {
}
Ok(())
}
+
+#[test]
+fn avro_3786_deserialize_union_with_different_enum_order_defined_in_record() -> TestResult {
+ #[derive(
+ Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Clone, serde::Deserialize, serde::Serialize,
+ )]
+ pub enum Bar {
+ #[serde(rename = "bar0")]
+ Bar0,
+ #[serde(rename = "bar1")]
+ Bar1,
+ #[serde(rename = "bar2")]
+ Bar2,
+ }
+ #[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize)]
+ pub struct BarParent {
+ pub bar: Bar,
+ }
+ #[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize)]
+ pub struct Foo {
+ #[serde(rename = "barParent")]
+ pub bar_parent: Option<BarParent>,
+ }
+ let writer_schema = r#"{
+ "type": "record",
+ "name": "Foo",
+ "namespace": "com.rallyhealth.devices.canonical.avro.model.v6_0",
+ "fields":
+ [
+ {
+ "name": "barParent",
+ "type": [
+ "null",
+ {
+ "type": "record",
+ "name": "BarParent",
+ "fields": [
+ {
+ "name": "bar",
+ "type": {
+ "type": "enum",
+ "name": "Bar",
+ "symbols":
+ [
+ "bar0",
+ "bar1",
+ "bar2"
+ ],
+ "default": "bar0"
+ }
+ }
+ ]
+ }
+ ]
+ }
+ ]
+ }"#;
+ let reader_schema = r#"{
+ "type": "record",
+ "name": "Foo",
+ "namespace": "com.rallyhealth.devices.canonical.avro.model.v6_0",
+ "fields":
+ [
+ {
+ "name": "barParent",
+ "type": [
+ "null",
+ {
+ "type": "record",
+ "name": "BarParent",
+ "fields": [
+ {
+ "name": "bar",
+ "type": {
+ "type": "enum",
+ "name": "Bar",
+ "symbols":
+ [
+ "bar0",
+ "bar2"
+ ],
+ "default": "bar0"
+ }
+ }
+ ]
+ }
+ ]
+ }
+ ]
+ }"#;
+ let writer_schema = Schema::parse_str(writer_schema)?;
+ let foo = Foo {
+ bar_parent: Some(BarParent { bar: Bar::Bar0 }),
+ };
+ let avro_value = crate::to_value(foo)?;
+ assert!(
+ avro_value.validate(&writer_schema),
+ "value is valid for schema",
+ );
+ let datum = crate::to_avro_datum(&writer_schema, avro_value)?;
+ let mut x = &datum[..];
+ let reader_schema = Schema::parse_str(reader_schema)?;
+ let deser_value = crate::from_avro_datum(&writer_schema, &mut x, Some(&reader_schema))?;
+ match deser_value {
+ types::Value::Record(fields) => {
+ assert_eq!(fields.len(), 1);
+ assert_eq!(fields[0].0, "barParent");
+ // TODO: better validation
+ }
+ _ => panic!("Expected Value::Record"),
+ }
+ Ok(())
+}
+
+#[test]
+fn test_avro_3786_deserialize_union_with_different_enum_order_defined_in_record_v1() -> TestResult {
+ #[derive(
+ Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Clone, serde::Deserialize, serde::Serialize,
+ )]
+ pub enum Bar {
+ #[serde(rename = "bar0")]
+ Bar0,
+ #[serde(rename = "bar1")]
+ Bar1,
+ #[serde(rename = "bar2")]
+ Bar2,
+ }
+ #[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize)]
+ pub struct BarParent {
+ pub bar: Bar,
+ }
+ #[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize)]
+ pub struct Foo {
+ #[serde(rename = "barParent")]
+ pub bar_parent: Option<BarParent>,
+ }
+ let writer_schema = r#"{
+ "type": "record",
+ "name": "Foo",
+ "namespace": "com.rallyhealth.devices.canonical.avro.model.v6_0",
+ "fields":
+ [
+ {
+ "name": "barParent",
+ "type": [
+ "null",
+ {
+ "type": "record",
+ "name": "BarParent",
+ "fields": [
+ {
+ "name": "bar",
+ "type": {
+ "type": "enum",
+ "name": "Bar",
+ "symbols":
+ [
+ "bar0",
+ "bar1",
+ "bar2"
+ ],
+ "default": "bar0"
+ }
+ }
+ ]
+ }
+ ]
+ }
+ ]
+ }"#;
+ let reader_schema = r#"{
+ "type": "record",
+ "name": "Foo",
+ "namespace": "com.rallyhealth.devices.canonical.avro.model.v6_0",
+ "fields":
+ [
+ {
+ "name": "barParent",
+ "type": [
+ "null",
+ {
+ "type": "record",
+ "name": "BarParent",
+ "fields": [
+ {
+ "name": "bar",
+ "type": {
+ "type": "enum",
+ "name": "Bar",
+ "symbols":
+ [
+ "bar0",
+ "bar2"
+ ],
+ "default": "bar0"
+ }
+ }
+ ]
+ }
+ ]
+ }
+ ]
+ }"#;
+ let writer_schema = Schema::parse_str(writer_schema)?;
+ let foo = Foo {
+ bar_parent: Some(BarParent { bar: Bar::Bar1 }),
+ };
+ let avro_value = crate::to_value(foo)?;
+ assert!(
+ avro_value.validate(&writer_schema),
+ "value is valid for schema",
+ );
+ let datum = crate::to_avro_datum(&writer_schema, avro_value)?;
+ let mut x = &datum[..];
+ let reader_schema = Schema::parse_str(reader_schema)?;
+ let deser_value = crate::from_avro_datum(&writer_schema, &mut x, Some(&reader_schema))?;
+ match deser_value {
+ types::Value::Record(fields) => {
+ assert_eq!(fields.len(), 1);
+ assert_eq!(fields[0].0, "barParent");
+ // TODO: better validation
+ }
+ _ => panic!("Expected Value::Record"),
+ }
+ Ok(())
+}
+
+#[test]
+fn test_avro_3786_deserialize_union_with_different_enum_order_defined_in_record_v2() -> TestResult {
+ #[derive(
+ Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Clone, serde::Deserialize, serde::Serialize,
+ )]
+ pub enum Bar {
+ #[serde(rename = "bar0")]
+ Bar0,
+ #[serde(rename = "bar1")]
+ Bar1,
+ #[serde(rename = "bar2")]
+ Bar2,
+ }
+ #[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize)]
+ pub struct BarParent {
+ pub bar: Bar,
+ }
+ #[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize)]
+ pub struct Foo {
+ #[serde(rename = "barParent")]
+ pub bar_parent: Option<BarParent>,
+ }
+ let writer_schema = r#"{
+ "type": "record",
+ "name": "Foo",
+ "namespace": "com.rallyhealth.devices.canonical.avro.model.v6_0",
+ "fields":
+ [
+ {
+ "name": "barParent",
+ "type": [
+ "null",
+ {
+ "type": "record",
+ "name": "BarParent",
+ "fields": [
+ {
+ "name": "bar",
+ "type": {
+ "type": "enum",
+ "name": "Bar",
+ "symbols":
+ [
+ "bar0",
+ "bar1",
+ "bar2"
+ ],
+ "default": "bar2"
+ }
+ }
+ ]
+ }
+ ]
+ }
+ ]
+ }"#;
+ let reader_schema = r#"{
+ "type": "record",
+ "name": "Foo",
+ "namespace": "com.rallyhealth.devices.canonical.avro.model.v6_0",
+ "fields":
+ [
+ {
+ "name": "barParent",
+ "type": [
+ "null",
+ {
+ "type": "record",
+ "name": "BarParent",
+ "fields": [
+ {
+ "name": "bar",
+ "type": {
+ "type": "enum",
+ "name": "Bar",
+ "symbols":
+ [
+ "bar1",
+ "bar2"
+ ],
+ "default": "bar2"
+ }
+ }
+ ]
+ }
+ ]
+ }
+ ]
+ }"#;
+ let writer_schema = Schema::parse_str(writer_schema)?;
+ let foo = Foo {
+ bar_parent: Some(BarParent { bar: Bar::Bar1 }),
+ };
+ let avro_value = crate::to_value(foo)?;
+ assert!(
+ avro_value.validate(&writer_schema),
+ "value is valid for schema",
+ );
+ let datum = crate::to_avro_datum(&writer_schema, avro_value)?;
+ let mut x = &datum[..];
+ let reader_schema = Schema::parse_str(reader_schema)?;
+ let deser_value = crate::from_avro_datum(&writer_schema, &mut x, Some(&reader_schema))?;
+ match deser_value {
+ types::Value::Record(fields) => {
+ assert_eq!(fields.len(), 1);
+ assert_eq!(fields[0].0, "barParent");
+ // TODO: better validation
+ }
+ _ => panic!("Expected Value::Record"),
+ }
+ Ok(())
+}
+
+#[test]
+fn deserialize_union_with_different_enum_order_defined_in_record() -> TestResult {
+ #[derive(
+ Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Clone, serde::Deserialize, serde::Serialize,
+ )]
+ pub enum Bar {
+ #[serde(rename = "bar0")]
+ Bar0,
+ #[serde(rename = "bar1")]
+ Bar1,
+ #[serde(rename = "bar2")]
+ Bar2,
+ }
+ #[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize)]
+ pub struct BarParent {
+ pub bar: Bar,
+ }
+ #[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize)]
+ pub struct Foo {
+ #[serde(rename = "barParent")]
+ pub bar_parent: Option<BarParent>,
+ }
+ let writer_schema = r#"{
+ "type": "record",
+ "name": "Foo",
+ "namespace": "com.rallyhealth.devices.canonical.avro.model.v6_0",
+ "fields":
+ [
+ {
+ "name": "barParent",
+ "type": [
+ "null",
+ {
+ "type": "record",
+ "name": "BarParent",
+ "fields": [
+ {
+ "name": "bar",
+ "type": {
+ "type": "enum",
+ "name": "Bar",
+ "symbols":
+ [
+ "bar0",
+ "bar1",
+ "bar2"
+ ],
+ "default": "bar0"
+ }
+ }
+ ]
+ }
+ ]
+ }
+ ]
+ }"#;
+ let reader_schema = r#"{
+ "type": "record",
+ "name": "Foo",
+ "namespace": "com.rallyhealth.devices.canonical.avro.model.v6_0",
+ "fields":
+ [
+ {
+ "name": "barParent",
+ "type": [
+ "null",
+ {
+ "type": "record",
+ "name": "BarParent",
+ "fields": [
+ {
+ "name": "bar",
+ "type": {
+ "type": "enum",
+ "name": "Bar",
+ "symbols":
+ [
+ "bar0",
+ "bar2"
+ ],
+ "default": "bar0"
+ }
+ }
+ ]
+ }
+ ]
+ }
+ ]
+ }"#;
+ let writer_schema = Schema::parse_str(writer_schema)?;
+ let foo = Foo {
+ bar_parent: Some(BarParent { bar: Bar::Bar2 }),
+ };
+ let avro_value = crate::to_value(foo)?;
+ assert!(
+ avro_value.validate(&writer_schema),
+ "value is valid for schema",
+ );
+ let datum = crate::to_avro_datum(&writer_schema, avro_value)?;
+ let mut x = &datum[..];
+ let reader_schema = Schema::parse_str(reader_schema)?;
+ let deser_value = crate::from_avro_datum(&writer_schema, &mut x, Some(&reader_schema))?;
+ match deser_value {
+ types::Value::Record(fields) => {
+ assert_eq!(fields.len(), 1);
+ assert_eq!(fields[0].0, "barParent");
+ // TODO: better validation
+ }
+ _ => panic!("Expected Value::Record"),
+ }
+ Ok(())
+}
+
+#[test]
+fn deserialize_union_with_record_with_enum_defined_inline_reader_has_different_indices(
+) -> TestResult {
+ #[derive(
+ Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Clone, serde::Deserialize, serde::Serialize,
+ )]
+ pub enum DefinedInRecord {
+ #[serde(rename = "val0")]
+ Val0,
+ #[serde(rename = "val1")]
+ Val1,
+ #[serde(rename = "val2")]
+ Val2,
+ #[serde(rename = "UNKNOWN")]
+ Unknown,
+ }
+ #[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize)]
+ pub struct Parent {
+ pub date: i64,
+ #[serde(rename = "barUse")]
+ pub bar_use: Bar,
+ #[serde(rename = "bazUse")]
+ pub baz_use: Option<Vec<Baz>>,
+ #[serde(rename = "definedInRecord")]
+ pub defined_in_record: DefinedInRecord,
+ #[serde(rename = "optionalString")]
+ pub optional_string: Option<String>,
+ }
+ #[derive(
+ Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Clone, serde::Deserialize, serde::Serialize,
+ )]
+ pub enum Baz {
+ #[serde(rename = "baz0")]
+ Baz0,
+ #[serde(rename = "baz1")]
+ Baz1,
+ #[serde(rename = "baz2")]
+ Baz2,
+ }
+ #[derive(
+ Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Clone, serde::Deserialize, serde::Serialize,
+ )]
+ pub enum Bar {
+ #[serde(rename = "bar0")]
+ Bar0,
+ #[serde(rename = "bar1")]
+ Bar1,
+ #[serde(rename = "bar2")]
+ Bar2,
+ }
+ #[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize)]
+ pub struct Foo {
+ #[serde(rename = "barInit")]
+ pub bar_init: Bar,
+ pub baz: Baz,
+ pub parent: Option<Parent>,
+ }
+ let writer_schema = r#"{
+ "type": "record",
+ "name": "Foo",
+ "namespace": "fake",
+ "fields":
+ [
+ {
+ "name": "barInit",
+ "type":
+ {
+ "type": "enum",
+ "name": "Bar",
+ "symbols":
+ [
+ "bar0",
+ "bar1",
+ "bar2"
+ ],
+ "default": "bar0"
+ }
+ },
+ {
+ "name": "baz",
+ "type":
+ {
+ "type": "enum",
+ "name": "Baz",
+ "symbols":
+ [
+ "baz0",
+ "baz1",
+ "baz2"
+ ],
+ "default": "baz0"
+ }
+ },
+ {
+ "name": "parent",
+ "type": [
+ "null",
+ {
+ "type": "record",
+ "name": "Parent",
+ "fields": [
+ {
+ "name": "date",
+ "type": {
+ "type": "long",
+ "avro.java.long": "Long"
+ }
+ },
+ {
+ "name": "barUse",
+ "type": "Bar"
+ },
+ {
+ "name": "bazUse",
+ "type": [
+ "null",
+ {
+ "type": "array",
+ "items": {
+ "type": "Baz"
+ }
+ }
+ ]
+ },
+ {
+ "name": "definedInRecord",
+ "type": {
+ "name": "DefinedInRecord",
+ "type": "enum",
+ "symbols": [
+ "val0",
+ "val1",
+ "val2",
+ "UNKNOWN"
+ ],
+ "default": "UNKNOWN"
+ }
+ },
+ {
+ "name": "optionalString",
+ "type": [
+ "null",
+ "string"
+ ]
+ }
+ ]
+ }
+ ]
+ }
+ ]
+ }"#;
+ let reader_schema = r#"{
+ "type": "record",
+ "name": "Foo",
+ "namespace": "fake",
+ "fields":
+ [
+ {
+ "name": "barInit",
+ "type":
+ {
+ "type": "enum",
+ "name": "Bar",
+ "symbols":
+ [
+ "bar0",
+ "bar2"
+ ],
+ "default": "bar0"
+ }
+ },
+ {
+ "name": "baz",
+ "type":
+ {
+ "type": "enum",
+ "name": "Baz",
+ "symbols":
+ [
+ "baz0",
+ "baz2"
+ ],
+ "default": "baz0"
+ }
+ },
+ {
+ "name": "parent",
+ "type": [
+ "null",
+ {
+ "type": "record",
+ "name": "Parent",
+ "fields": [
+ {
+ "name": "date",
+ "type": {
+ "type": "long",
+ "avro.java.long": "Long"
+ }
+ },
+ {
+ "name": "barUse",
+ "type": "Bar"
+ },
+ {
+ "name": "bazUse",
+ "type": [
+ "null",
+ {
+ "type": "array",
+ "items": {
+ "type": "Baz"
+ }
+ }
+ ]
+ },
+ {
+ "name": "definedInRecord",
+ "type": {
+ "name": "DefinedInRecord",
+ "type": "enum",
+ "symbols": [
+ "val1",
+ "val2",
+ "UNKNOWN"
+ ],
+ "default": "UNKNOWN"
+ }
+ },
+ {
+ "name": "optionalString",
+ "type": [
+ "null",
+ "string"
+ ]
+ }
+ ]
+ }
+ ]
+ }
+ ]
+ }"#;
+ let writer_schema = Schema::parse_str(writer_schema)?;
+ let foo = Foo {
+ bar_init: Bar::Bar0,
+ baz: Baz::Baz0,
+ parent: Some(Parent {
+ bar_use: Bar::Bar0,
+ baz_use: Some(vec![Baz::Baz0]),
+ optional_string: Some("test".to_string()),
+ date: 1689197893,
+ defined_in_record: DefinedInRecord::Val1,
+ }),
+ };
+ let avro_value = crate::to_value(foo)?;
+ assert!(
+ avro_value.validate(&writer_schema),
+ "value is valid for schema",
+ );
+ let datum = crate::to_avro_datum(&writer_schema, avro_value)?;
+ let mut x = &datum[..];
+ let reader_schema = Schema::parse_str(reader_schema)?;
+ let deser_value = crate::from_avro_datum(&writer_schema, &mut x, Some(&reader_schema))?;
+ match deser_value {
+ types::Value::Record(fields) => {
+ assert_eq!(fields.len(), 3);
+ assert_eq!(fields[0].0, "barInit");
+ assert_eq!(fields[0].1, types::Value::Enum(0, "bar0".to_string()));
+ // TODO: better validation
+ }
+ _ => panic!("Expected Value::Record"),
+ }
+ Ok(())
+}