You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@avro.apache.org by mg...@apache.org on 2022/02/07 15:19:02 UTC

[avro] branch avro-3370-reference-avro-complex-types created (now 3e23f77)

This is an automated email from the ASF dual-hosted git repository.

mgrigorov pushed a change to branch avro-3370-reference-avro-complex-types
in repository https://gitbox.apache.org/repos/asf/avro.git.


      at 3e23f77  AVRO-3370: [Spec] Inconsistent behaviour on types as invalid names.

This branch includes the following new commits:

     new 3e23f77  AVRO-3370: [Spec] Inconsistent behaviour on types as invalid names.

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


[avro] 01/01: AVRO-3370: [Spec] Inconsistent behaviour on types as invalid names.

Posted by mg...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

mgrigorov pushed a commit to branch avro-3370-reference-avro-complex-types
in repository https://gitbox.apache.org/repos/asf/avro.git

commit 3e23f777d0717cd8895e821798a2e5139ea69f11
Author: Martin Tzvetanov Grigorov <mg...@apache.org>
AuthorDate: Mon Feb 7 17:16:46 2022 +0200

    AVRO-3370: [Spec] Inconsistent behaviour on types as invalid names.
    
    Support references to types named after Avro complex types (record, enum
    & fixed)
    
    Signed-off-by: Martin Tzvetanov Grigorov <mg...@apache.org>
---
 lang/rust/src/schema.rs | 254 ++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 248 insertions(+), 6 deletions(-)

diff --git a/lang/rust/src/schema.rs b/lang/rust/src/schema.rs
index ab5fd04..894aa6c 100644
--- a/lang/rust/src/schema.rs
+++ b/lang/rust/src/schema.rs
@@ -838,17 +838,35 @@ impl Parser {
         }
     }
 
+    /// Returns already parsed schema or a schema that is currently being resolved.
+    fn get_already_seen_schema(&self, complex: &Map<String, Value>) -> Option<&Schema> {
+        match complex.get("type") {
+            Some(Value::String(ref typ)) => self
+                .resolving_schemas
+                .get(typ)
+                .or_else(|| self.parsed_schemas.get(typ)),
+            _ => None,
+        }
+    }
+
     /// Parse a `serde_json::Value` representing a Avro record type into a
     /// `Schema`.
     fn parse_record(&mut self, complex: &Map<String, Value>) -> AvroResult<Schema> {
         let name = Name::parse(complex)?;
 
+        let fields_opt = complex.get("fields");
+
+        if fields_opt.is_none() {
+            if let Some(seen) = self.get_already_seen_schema(complex) {
+                return Ok(seen.clone());
+            }
+        }
+
         let mut lookup = HashMap::new();
 
         self.register_resolving_schema(&name);
 
-        let fields: Vec<RecordField> = complex
-            .get("fields")
+        let fields: Vec<RecordField> = fields_opt
             .and_then(|fields| fields.as_array())
             .ok_or(Error::GetRecordFieldsJson)
             .and_then(|fields| {
@@ -880,8 +898,15 @@ impl Parser {
     fn parse_enum(&mut self, complex: &Map<String, Value>) -> AvroResult<Schema> {
         let name = Name::parse(complex)?;
 
-        let symbols: Vec<String> = complex
-            .get("symbols")
+        let symbols_opt = complex.get("symbols");
+
+        if symbols_opt.is_none() {
+            if let Some(seen) = self.get_already_seen_schema(complex) {
+                return Ok(seen.clone());
+            }
+        }
+
+        let symbols: Vec<String> = symbols_opt
             .and_then(|v| v.as_array())
             .ok_or(Error::GetEnumSymbolsField)
             .and_then(|symbols| {
@@ -953,13 +978,19 @@ impl Parser {
     fn parse_fixed(&mut self, complex: &Map<String, Value>) -> AvroResult<Schema> {
         let name = Name::parse(complex)?;
 
+        let size_opt = complex.get("size");
+        if size_opt.is_none() {
+            if let Some(seen) = self.get_already_seen_schema(complex) {
+                return Ok(seen.clone());
+            }
+        }
+
         let doc = complex.get("doc").and_then(|v| match &v {
             Value::String(ref docstr) => Some(docstr.clone()),
             _ => None,
         });
 
-        let size = complex
-            .get("size")
+        let size = size_opt
             .and_then(|v| v.as_i64())
             .ok_or(Error::GetFixedSizeField)?;
 
@@ -1725,6 +1756,217 @@ mod tests {
         assert_eq!(canonical_form, &expected);
     }
 
+    // AVRO-3370
+    #[test]
+    fn test_record_schema_with_currently_parsing_schema_named_record() {
+        let schema = Schema::parse_str(
+            r#"
+            {
+              "type" : "record",
+              "name" : "record",
+              "fields" : [
+                 { "name" : "value", "type" : "long" },
+                 { "name" : "next", "type" : "record" }
+             ]
+            }
+        "#,
+        )
+        .unwrap();
+
+        let mut lookup = HashMap::new();
+        lookup.insert("value".to_owned(), 0);
+        lookup.insert("next".to_owned(), 1);
+
+        let expected = Schema::Record {
+            name: Name {
+                name: "record".to_owned(),
+                namespace: None,
+                aliases: None,
+            },
+            doc: None,
+            fields: vec![
+                RecordField {
+                    name: "value".to_string(),
+                    doc: None,
+                    default: None,
+                    schema: Schema::Long,
+                    order: RecordFieldOrder::Ascending,
+                    position: 0,
+                },
+                RecordField {
+                    name: "next".to_string(),
+                    doc: None,
+                    default: None,
+                    schema: Schema::Ref {
+                        name: Name {
+                            name: "record".to_owned(),
+                            namespace: None,
+                            aliases: None,
+                        },
+                    },
+                    order: RecordFieldOrder::Ascending,
+                    position: 1,
+                },
+            ],
+            lookup,
+        };
+        assert_eq!(schema, expected);
+
+        let canonical_form = &schema.canonical_form();
+        let expected = r#"{"name":"record","type":"record","fields":[{"name":"value","type":"long"},{"name":"next","type":"record"}]}"#;
+        assert_eq!(canonical_form, &expected);
+    }
+
+    // AVRO-3370
+    #[test]
+    fn test_record_schema_with_currently_parsing_schema_named_enum() {
+        let schema = Schema::parse_str(
+            r#"
+            {
+              "type" : "record",
+              "name" : "record",
+              "fields" : [
+                 {
+                    "type" : "enum",
+                    "name" : "enum",
+                    "symbols": ["one", "two", "three"]
+                 },
+                 { "name" : "next", "type" : "enum" }
+             ]
+            }
+        "#,
+        )
+        .unwrap();
+
+        let mut lookup = HashMap::new();
+        lookup.insert("enum".to_owned(), 0);
+        lookup.insert("next".to_owned(), 1);
+
+        let expected = Schema::Record {
+            name: Name {
+                name: "record".to_owned(),
+                namespace: None,
+                aliases: None,
+            },
+            doc: None,
+            fields: vec![
+                RecordField {
+                    name: "enum".to_string(),
+                    doc: None,
+                    default: None,
+                    schema: Schema::Enum {
+                        name: Name {
+                            name: "enum".to_owned(),
+                            namespace: None,
+                            aliases: None,
+                        },
+                        doc: None,
+                        symbols: vec!["one".to_string(), "two".to_string(), "three".to_string()],
+                    },
+                    order: RecordFieldOrder::Ascending,
+                    position: 0,
+                },
+                RecordField {
+                    name: "next".to_string(),
+                    doc: None,
+                    default: None,
+                    schema: Schema::Enum {
+                        name: Name {
+                            name: "enum".to_owned(),
+                            namespace: None,
+                            aliases: None,
+                        },
+                        doc: None,
+                        symbols: vec!["one".to_string(), "two".to_string(), "three".to_string()],
+                    },
+                    order: RecordFieldOrder::Ascending,
+                    position: 1,
+                },
+            ],
+            lookup,
+        };
+        assert_eq!(schema, expected);
+
+        let canonical_form = &schema.canonical_form();
+        let expected = r#"{"name":"record","type":"record","fields":[{"name":"enum","type":{"name":"enum","type":"enum","symbols":["one","two","three"]}},{"name":"next","type":{"name":"enum","type":"enum","symbols":["one","two","three"]}}]}"#;
+        assert_eq!(canonical_form, &expected);
+    }
+
+    // AVRO-3370
+    #[test]
+    fn test_record_schema_with_currently_parsing_schema_named_fixed() {
+        let schema = Schema::parse_str(
+            r#"
+            {
+              "type" : "record",
+              "name" : "record",
+              "fields" : [
+                 {
+                    "type" : "fixed",
+                    "name" : "fixed",
+                    "size": 456
+                 },
+                 { "name" : "next", "type" : "fixed" }
+             ]
+            }
+        "#,
+        )
+        .unwrap();
+
+        let mut lookup = HashMap::new();
+        lookup.insert("fixed".to_owned(), 0);
+        lookup.insert("next".to_owned(), 1);
+
+        let expected = Schema::Record {
+            name: Name {
+                name: "record".to_owned(),
+                namespace: None,
+                aliases: None,
+            },
+            doc: None,
+            fields: vec![
+                RecordField {
+                    name: "fixed".to_string(),
+                    doc: None,
+                    default: None,
+                    schema: Schema::Fixed {
+                        name: Name {
+                            name: "fixed".to_owned(),
+                            namespace: None,
+                            aliases: None,
+                        },
+                        doc: None,
+                        size: 456,
+                    },
+                    order: RecordFieldOrder::Ascending,
+                    position: 0,
+                },
+                RecordField {
+                    name: "next".to_string(),
+                    doc: None,
+                    default: None,
+                    schema: Schema::Fixed {
+                        name: Name {
+                            name: "fixed".to_owned(),
+                            namespace: None,
+                            aliases: None,
+                        },
+                        doc: None,
+                        size: 456,
+                    },
+                    order: RecordFieldOrder::Ascending,
+                    position: 1,
+                },
+            ],
+            lookup,
+        };
+        assert_eq!(schema, expected);
+
+        let canonical_form = &schema.canonical_form();
+        let expected = r#"{"name":"record","type":"record","fields":[{"name":"fixed","type":{"name":"fixed","type":"fixed","size":456}},{"name":"next","type":{"name":"fixed","type":"fixed","size":456}}]}"#;
+        assert_eq!(canonical_form, &expected);
+    }
+
     #[test]
     fn test_enum_schema() {
         let schema = Schema::parse_str(