You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@avro.apache.org by dk...@apache.org on 2018/11/29 20:21:39 UTC
[avro] branch master updated: [AVRO-2226] Fixes UnionSchema
specificity
This is an automated email from the ASF dual-hosted git repository.
dkulp pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/avro.git
The following commit(s) were added to refs/heads/master by this push:
new 3e5cf4a [AVRO-2226] Fixes UnionSchema specificity
3e5cf4a is described below
commit 3e5cf4a94dd1f71e42a45c812f26a53c7b89e945
Author: Andrew Kelleher <an...@venmo.com>
AuthorDate: Fri Sep 14 09:31:56 2018 -0400
[AVRO-2226] Fixes UnionSchema specificity
Trouble arises in the python library when deducing the appropriate schema from a list of schemas given a particular datum.
When "null" values are allowed for fields in two separate schemas, there is no way to differentiate which should schema be used given of a list of schemas that are set as the type definition for a record.
This PR checks to ensure all fields defined on a given datum are _also_ defined in the schema being validated to use for that datum.
With this bugfix, datums such as `{"foo": "a"}` will not "cast" to schemas such as `{"name": "bar", "type": ["long", "null"]}`, which is currently the case.
---
lang/py3/avro/io.py | 14 +++++++++++---
lang/py3/avro/tests/test_io.py | 24 ++++++++++++++++++++++++
2 files changed, 35 insertions(+), 3 deletions(-)
diff --git a/lang/py3/avro/io.py b/lang/py3/avro/io.py
index b944904..7b56dff 100644
--- a/lang/py3/avro/io.py
+++ b/lang/py3/avro/io.py
@@ -135,9 +135,17 @@ def Validate(expected_schema, datum):
return any(Validate(union_branch, datum)
for union_branch in expected_schema.schemas)
elif schema_type in ['record', 'error', 'request']:
- return (isinstance(datum, dict)
- and all(Validate(field.type, datum.get(field.name))
- for field in expected_schema.fields))
+ if not isinstance(datum, dict):
+ return False
+ expected_schema_field_names = set()
+ for field in expected_schema.fields:
+ expected_schema_field_names.add(field.name)
+ if not Validate(field.type, datum.get(field.name)):
+ return False
+ for datum_field in datum.keys():
+ if datum_field not in expected_schema_field_names:
+ return False
+ return True
else:
raise AvroTypeException('Unknown Avro schema type: %r' % schema_type)
diff --git a/lang/py3/avro/tests/test_io.py b/lang/py3/avro/tests/test_io.py
index 8349ce5..3d58683 100644
--- a/lang/py3/avro/tests/test_io.py
+++ b/lang/py3/avro/tests/test_io.py
@@ -346,6 +346,30 @@ class TestIO(unittest.TestCase):
self.assertRaises(
avro_io.AvroTypeException, write_datum, datum_to_write, writer_schema)
+ def testUnionSchemaSpecificity(self):
+ union_schema = schema.Parse("""
+ [{
+ "type" : "record",
+ "name" : "A",
+ "fields" : [{"name" : "foo", "type" : ["string", "null"]}]
+ },
+ {
+ "type" : "record",
+ "name" : "B",
+ "fields" : [{"name" : "bar", "type" : ["string", "null"]}]
+ },
+ {
+ "type" : "record",
+ "name" : "AOrB",
+ "fields" : [{"name" : "entity", "type" : ["A", "B"]}]
+ }]
+ """)
+ sch = {s.name: s for s in union_schema.schemas}.get('AOrB')
+ datum_to_read = {'entity': {'foo': 'this is an instance of schema A'}}
+ writer, encoder, datum_writer = write_datum(datum_to_read, sch)
+ datum_read = read_datum(writer, sch, sch)
+ self.assertEqual(datum_to_read, datum_read)
+
if __name__ == '__main__':
raise Exception('Use run_tests.py')