You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@avro.apache.org by mg...@apache.org on 2022/02/25 22:21:00 UTC

[avro] branch branch-1.11 updated: AVRO-3370: [Spec] Inconsistent behaviour on types as invalid names. (#1525)

This is an automated email from the ASF dual-hosted git repository.

mgrigorov pushed a commit to branch branch-1.11
in repository https://gitbox.apache.org/repos/asf/avro.git


The following commit(s) were added to refs/heads/branch-1.11 by this push:
     new 410231a  AVRO-3370: [Spec] Inconsistent behaviour on types as invalid names. (#1525)
410231a is described below

commit 410231a05597da7cd2dc0591db045eecf9c7a4ab
Author: Martin Grigorov <ma...@users.noreply.github.com>
AuthorDate: Sat Feb 26 00:20:17 2022 +0200

    AVRO-3370: [Spec] Inconsistent behaviour on types as invalid names. (#1525)
    
    Support references to types named after Avro complex types (record, enum
    & fixed)
    
    Signed-off-by: Martin Tzvetanov Grigorov <mg...@apache.org>
    (cherry picked from commit 5d3931b9ed94a6a4558432f4993948329be35357)
---
 lang/rust/src/schema.rs | 254 ++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 248 insertions(+), 6 deletions(-)

diff --git a/lang/rust/src/schema.rs b/lang/rust/src/schema.rs
index 075e8ca..28b34f8 100644
--- a/lang/rust/src/schema.rs
+++ b/lang/rust/src/schema.rs
@@ -838,17 +838,35 @@ impl Parser {
         }
     }
 
+    /// Returns already parsed schema or a schema that is currently being resolved.
+    fn get_already_seen_schema(&self, complex: &Map<String, Value>) -> Option<&Schema> {
+        match complex.get("type") {
+            Some(Value::String(ref typ)) => self
+                .resolving_schemas
+                .get(typ)
+                .or_else(|| self.parsed_schemas.get(typ)),
+            _ => None,
+        }
+    }
+
     /// Parse a `serde_json::Value` representing a Avro record type into a
     /// `Schema`.
     fn parse_record(&mut self, complex: &Map<String, Value>) -> AvroResult<Schema> {
         let name = Name::parse(complex)?;
 
+        let fields_opt = complex.get("fields");
+
+        if fields_opt.is_none() {
+            if let Some(seen) = self.get_already_seen_schema(complex) {
+                return Ok(seen.clone());
+            }
+        }
+
         let mut lookup = HashMap::new();
 
         self.register_resolving_schema(&name);
 
-        let fields: Vec<RecordField> = complex
-            .get("fields")
+        let fields: Vec<RecordField> = fields_opt
             .and_then(|fields| fields.as_array())
             .ok_or(Error::GetRecordFieldsJson)
             .and_then(|fields| {
@@ -880,8 +898,15 @@ impl Parser {
     fn parse_enum(&mut self, complex: &Map<String, Value>) -> AvroResult<Schema> {
         let name = Name::parse(complex)?;
 
-        let symbols: Vec<String> = complex
-            .get("symbols")
+        let symbols_opt = complex.get("symbols");
+
+        if symbols_opt.is_none() {
+            if let Some(seen) = self.get_already_seen_schema(complex) {
+                return Ok(seen.clone());
+            }
+        }
+
+        let symbols: Vec<String> = symbols_opt
             .and_then(|v| v.as_array())
             .ok_or(Error::GetEnumSymbolsField)
             .and_then(|symbols| {
@@ -953,13 +978,19 @@ impl Parser {
     fn parse_fixed(&mut self, complex: &Map<String, Value>) -> AvroResult<Schema> {
         let name = Name::parse(complex)?;
 
+        let size_opt = complex.get("size");
+        if size_opt.is_none() {
+            if let Some(seen) = self.get_already_seen_schema(complex) {
+                return Ok(seen.clone());
+            }
+        }
+
         let doc = complex.get("doc").and_then(|v| match &v {
             Value::String(ref docstr) => Some(docstr.clone()),
             _ => None,
         });
 
-        let size = complex
-            .get("size")
+        let size = size_opt
             .and_then(|v| v.as_i64())
             .ok_or(Error::GetFixedSizeField)?;
 
@@ -1725,6 +1756,217 @@ mod tests {
         assert_eq!(canonical_form, &expected);
     }
 
+    // AVRO-3370
+    #[test]
+    fn test_record_schema_with_currently_parsing_schema_named_record() {
+        let schema = Schema::parse_str(
+            r#"
+            {
+              "type" : "record",
+              "name" : "record",
+              "fields" : [
+                 { "name" : "value", "type" : "long" },
+                 { "name" : "next", "type" : "record" }
+             ]
+            }
+        "#,
+        )
+        .unwrap();
+
+        let mut lookup = HashMap::new();
+        lookup.insert("value".to_owned(), 0);
+        lookup.insert("next".to_owned(), 1);
+
+        let expected = Schema::Record {
+            name: Name {
+                name: "record".to_owned(),
+                namespace: None,
+                aliases: None,
+            },
+            doc: None,
+            fields: vec![
+                RecordField {
+                    name: "value".to_string(),
+                    doc: None,
+                    default: None,
+                    schema: Schema::Long,
+                    order: RecordFieldOrder::Ascending,
+                    position: 0,
+                },
+                RecordField {
+                    name: "next".to_string(),
+                    doc: None,
+                    default: None,
+                    schema: Schema::Ref {
+                        name: Name {
+                            name: "record".to_owned(),
+                            namespace: None,
+                            aliases: None,
+                        },
+                    },
+                    order: RecordFieldOrder::Ascending,
+                    position: 1,
+                },
+            ],
+            lookup,
+        };
+        assert_eq!(schema, expected);
+
+        let canonical_form = &schema.canonical_form();
+        let expected = r#"{"name":"record","type":"record","fields":[{"name":"value","type":"long"},{"name":"next","type":"record"}]}"#;
+        assert_eq!(canonical_form, &expected);
+    }
+
+    // AVRO-3370
+    #[test]
+    fn test_record_schema_with_currently_parsing_schema_named_enum() {
+        let schema = Schema::parse_str(
+            r#"
+            {
+              "type" : "record",
+              "name" : "record",
+              "fields" : [
+                 {
+                    "type" : "enum",
+                    "name" : "enum",
+                    "symbols": ["one", "two", "three"]
+                 },
+                 { "name" : "next", "type" : "enum" }
+             ]
+            }
+        "#,
+        )
+        .unwrap();
+
+        let mut lookup = HashMap::new();
+        lookup.insert("enum".to_owned(), 0);
+        lookup.insert("next".to_owned(), 1);
+
+        let expected = Schema::Record {
+            name: Name {
+                name: "record".to_owned(),
+                namespace: None,
+                aliases: None,
+            },
+            doc: None,
+            fields: vec![
+                RecordField {
+                    name: "enum".to_string(),
+                    doc: None,
+                    default: None,
+                    schema: Schema::Enum {
+                        name: Name {
+                            name: "enum".to_owned(),
+                            namespace: None,
+                            aliases: None,
+                        },
+                        doc: None,
+                        symbols: vec!["one".to_string(), "two".to_string(), "three".to_string()],
+                    },
+                    order: RecordFieldOrder::Ascending,
+                    position: 0,
+                },
+                RecordField {
+                    name: "next".to_string(),
+                    doc: None,
+                    default: None,
+                    schema: Schema::Enum {
+                        name: Name {
+                            name: "enum".to_owned(),
+                            namespace: None,
+                            aliases: None,
+                        },
+                        doc: None,
+                        symbols: vec!["one".to_string(), "two".to_string(), "three".to_string()],
+                    },
+                    order: RecordFieldOrder::Ascending,
+                    position: 1,
+                },
+            ],
+            lookup,
+        };
+        assert_eq!(schema, expected);
+
+        let canonical_form = &schema.canonical_form();
+        let expected = r#"{"name":"record","type":"record","fields":[{"name":"enum","type":{"name":"enum","type":"enum","symbols":["one","two","three"]}},{"name":"next","type":{"name":"enum","type":"enum","symbols":["one","two","three"]}}]}"#;
+        assert_eq!(canonical_form, &expected);
+    }
+
+    // AVRO-3370
+    #[test]
+    fn test_record_schema_with_currently_parsing_schema_named_fixed() {
+        let schema = Schema::parse_str(
+            r#"
+            {
+              "type" : "record",
+              "name" : "record",
+              "fields" : [
+                 {
+                    "type" : "fixed",
+                    "name" : "fixed",
+                    "size": 456
+                 },
+                 { "name" : "next", "type" : "fixed" }
+             ]
+            }
+        "#,
+        )
+        .unwrap();
+
+        let mut lookup = HashMap::new();
+        lookup.insert("fixed".to_owned(), 0);
+        lookup.insert("next".to_owned(), 1);
+
+        let expected = Schema::Record {
+            name: Name {
+                name: "record".to_owned(),
+                namespace: None,
+                aliases: None,
+            },
+            doc: None,
+            fields: vec![
+                RecordField {
+                    name: "fixed".to_string(),
+                    doc: None,
+                    default: None,
+                    schema: Schema::Fixed {
+                        name: Name {
+                            name: "fixed".to_owned(),
+                            namespace: None,
+                            aliases: None,
+                        },
+                        doc: None,
+                        size: 456,
+                    },
+                    order: RecordFieldOrder::Ascending,
+                    position: 0,
+                },
+                RecordField {
+                    name: "next".to_string(),
+                    doc: None,
+                    default: None,
+                    schema: Schema::Fixed {
+                        name: Name {
+                            name: "fixed".to_owned(),
+                            namespace: None,
+                            aliases: None,
+                        },
+                        doc: None,
+                        size: 456,
+                    },
+                    order: RecordFieldOrder::Ascending,
+                    position: 1,
+                },
+            ],
+            lookup,
+        };
+        assert_eq!(schema, expected);
+
+        let canonical_form = &schema.canonical_form();
+        let expected = r#"{"name":"record","type":"record","fields":[{"name":"fixed","type":{"name":"fixed","type":"fixed","size":456}},{"name":"next","type":{"name":"fixed","type":"fixed","size":456}}]}"#;
+        assert_eq!(canonical_form, &expected);
+    }
+
     #[test]
     fn test_enum_schema() {
         let schema = Schema::parse_str(