You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@avro.apache.org by mg...@apache.org on 2022/03/24 07:00:50 UTC

[avro] branch master updated: AVRO-3460: [rust] Value::validate does not validate against Schema Refs (#1620)

This is an automated email from the ASF dual-hosted git repository.

mgrigorov pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/avro.git


The following commit(s) were added to refs/heads/master by this push:
     new fcc4e2d  AVRO-3460: [rust] Value::validate does not validate against Schema Refs (#1620)
fcc4e2d is described below

commit fcc4e2dec64e8cbb011a388f5e612c2b09e4d3ef
Author: Martin Grigorov <ma...@users.noreply.github.com>
AuthorDate: Thu Mar 24 09:00:40 2022 +0200

    AVRO-3460: [rust] Value::validate does not validate against Schema Refs (#1620)
    
    * AVRO-3460: [rust] Value::validate does not validate against Schema Refs
    
    Signed-off-by: Martin Tzvetanov Grigorov <mg...@apache.org>
    
    * AVRO-3460: Mkdirs the data folder for interop data
    
    Signed-off-by: Martin Tzvetanov Grigorov <mg...@apache.org>
    
    * AVRO-3460: Use validate_internal() for Schema::Ref too
    
    Signed-off-by: Martin Tzvetanov Grigorov <mg...@apache.org>
---
 lang/rust/avro/examples/generate_interop_data.rs |   4 +-
 lang/rust/avro/src/types.rs                      | 167 +++++++++++++++++++++--
 2 files changed, 160 insertions(+), 11 deletions(-)

diff --git a/lang/rust/avro/examples/generate_interop_data.rs b/lang/rust/avro/examples/generate_interop_data.rs
index d8e9480..eb830e1 100644
--- a/lang/rust/avro/examples/generate_interop_data.rs
+++ b/lang/rust/avro/examples/generate_interop_data.rs
@@ -78,6 +78,8 @@ fn main() -> anyhow::Result<()> {
     let schema_str = std::fs::read_to_string("../../share/test/schemas/interop.avsc")
         .expect("Unable to read the interop Avro schema");
     let schema = Schema::parse_str(schema_str.as_str())?;
+    let data_folder = "../../build/interop/data";
+    std::fs::create_dir_all(data_folder)?;
 
     for codec in Codec::iter() {
         let codec_name = <&str>::from(codec);
@@ -87,7 +89,7 @@ fn main() -> anyhow::Result<()> {
             format!("_{}", codec_name)
         };
 
-        let file_name = format!("../../build/interop/data/rust{}.avro", suffix);
+        let file_name = format!("{}/rust{}.avro", data_folder, suffix);
         let output_file = std::fs::File::create(&file_name)?;
 
         let mut writer = Writer::with_codec(&schema, BufWriter::new(output_file), codec);
diff --git a/lang/rust/avro/src/types.rs b/lang/rust/avro/src/types.rs
index 685904b..cd00b7c 100644
--- a/lang/rust/avro/src/types.rs
+++ b/lang/rust/avro/src/types.rs
@@ -333,8 +333,15 @@ impl Value {
     /// See the [Avro specification](https://avro.apache.org/docs/current/spec.html)
     /// for the full set of rules of schema validation.
     pub fn validate(&self, schema: &Schema) -> bool {
+        let rs = ResolvedSchema::try_from(schema).expect("Schema didn't successfully parse");
+        self.validate_internal(schema, rs.get_names())
+    }
+
+    fn validate_internal(&self, schema: &Schema, names: &NamesRef) -> bool {
         match (self, schema) {
-            (_, &Schema::Ref { name: _ }) => true,
+            (_, &Schema::Ref { ref name }) => names
+                .get(name)
+                .map_or(false, |s| self.validate_internal(s, names)),
             (&Value::Null, &Schema::Null) => true,
             (&Value::Boolean(_), &Schema::Boolean) => true,
             (&Value::Int(_), &Schema::Int) => true,
@@ -372,26 +379,26 @@ impl Value {
             (&Value::Union(i, ref value), &Schema::Union(ref inner)) => inner
                 .variants()
                 .get(i as usize)
-                .map(|schema| value.validate(schema))
+                .map(|schema| value.validate_internal(schema, names))
                 .unwrap_or(false),
-            (&Value::Array(ref items), &Schema::Array(ref inner)) => {
-                items.iter().all(|item| item.validate(inner))
-            }
-            (&Value::Map(ref items), &Schema::Map(ref inner)) => {
-                items.iter().all(|(_, value)| value.validate(inner))
-            }
+            (&Value::Array(ref items), &Schema::Array(ref inner)) => items
+                .iter()
+                .all(|item| item.validate_internal(inner, names)),
+            (&Value::Map(ref items), &Schema::Map(ref inner)) => items
+                .iter()
+                .all(|(_, value)| value.validate_internal(inner, names)),
             (&Value::Record(ref record_fields), &Schema::Record { ref fields, .. }) => {
                 fields.len() == record_fields.len()
                     && fields.iter().zip(record_fields.iter()).all(
                         |(field, &(ref name, ref value))| {
-                            field.name == *name && value.validate(&field.schema)
+                            field.name == *name && value.validate_internal(&field.schema, names)
                         },
                     )
             }
             (&Value::Map(ref items), &Schema::Record { ref fields, .. }) => {
                 fields.iter().all(|field| {
                     if let Some(item) = items.get(&field.name) {
-                        item.validate(&field.schema)
+                        item.validate_internal(&field.schema, names)
                     } else {
                         false
                     }
@@ -1847,4 +1854,144 @@ mod tests {
             .resolve(&schema)
             .expect("Should be able to resolve value to the schema that is it's definition");
     }
+
+    #[test]
+    fn test_avro_3460_validation_with_refs() {
+        let schema = Schema::parse_str(
+            r#"
+        {
+            "type":"record",
+            "name":"TestStruct",
+            "fields": [
+                {
+                    "name":"a",
+                    "type":{
+                        "type":"record",
+                        "name": "Inner",
+                        "fields": [ {
+                            "name":"z",
+                            "type":"int"
+                        }]
+                    }
+                },
+                {
+                    "name":"b",
+                    "type":"Inner"
+                }
+            ]
+        }"#,
+        )
+        .unwrap();
+
+        let inner_value_right = Value::Record(vec![("z".into(), Value::Int(3))]);
+        let inner_value_wrong1 = Value::Record(vec![("z".into(), Value::Null)]);
+        let inner_value_wrong2 = Value::Record(vec![("a".into(), Value::String("testing".into()))]);
+        let outer1 = Value::Record(vec![
+            ("a".into(), inner_value_right.clone()),
+            ("b".into(), inner_value_wrong1),
+        ]);
+
+        let outer2 = Value::Record(vec![
+            ("a".into(), inner_value_right),
+            ("b".into(), inner_value_wrong2),
+        ]);
+
+        assert!(
+            !outer1.validate(&schema),
+            "field b record is invalid against the schema"
+        ); // this should pass, but doesn't
+        assert!(
+            !outer2.validate(&schema),
+            "field b record is invalid against the schema"
+        ); // this should pass, but doesn't
+    }
+
+    #[test]
+    fn test_avro_3460_validation_with_refs_real_struct() {
+        use crate::ser::Serializer;
+        use serde::Serialize;
+
+        #[derive(Serialize, Clone)]
+        struct TestInner {
+            z: i32,
+        }
+
+        #[derive(Serialize)]
+        struct TestRefSchemaStruct1 {
+            a: TestInner,
+            b: String, // could be literally anything
+        }
+
+        #[derive(Serialize)]
+        struct TestRefSchemaStruct2 {
+            a: TestInner,
+            b: i32, // could be literally anything
+        }
+
+        #[derive(Serialize)]
+        struct TestRefSchemaStruct3 {
+            a: TestInner,
+            b: Option<TestInner>, // could be literally anything
+        }
+
+        let schema = Schema::parse_str(
+            r#"
+        {
+            "type":"record",
+            "name":"TestStruct",
+            "fields": [
+                {
+                    "name":"a",
+                    "type":{
+                        "type":"record",
+                        "name": "Inner",
+                        "fields": [ {
+                            "name":"z",
+                            "type":"int"
+                        }]
+                    }
+                },
+                {
+                    "name":"b",
+                    "type":"Inner"
+                }
+            ]
+        }"#,
+        )
+        .unwrap();
+
+        let test_inner = TestInner { z: 3 };
+        let test_outer1 = TestRefSchemaStruct1 {
+            a: test_inner.clone(),
+            b: "testing".into(),
+        };
+        let test_outer2 = TestRefSchemaStruct2 {
+            a: test_inner.clone(),
+            b: 24,
+        };
+        let test_outer3 = TestRefSchemaStruct3 {
+            a: test_inner,
+            b: None,
+        };
+
+        let mut ser = Serializer::default();
+        let test_outer1: Value = test_outer1.serialize(&mut ser).unwrap();
+        let mut ser = Serializer::default();
+        let test_outer2: Value = test_outer2.serialize(&mut ser).unwrap();
+        let mut ser = Serializer::default();
+        let test_outer3: Value = test_outer3.serialize(&mut ser).unwrap();
+
+        assert!(
+            !test_outer1.validate(&schema),
+            "field b record is invalid against the schema"
+        ); // this should pass, but doesn't
+        assert!(
+            !test_outer2.validate(&schema),
+            "field b record is invalid against the schema"
+        ); // this should pass, but doesn't
+        assert!(
+            !test_outer3.validate(&schema),
+            "field b record is invalid against the schema"
+        ); // this should pass, but doesn't
+    }
 }