You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@avro.apache.org by ko...@apache.org on 2019/10/25 20:51:54 UTC
[avro] branch master updated: AVRO-2580: Enforce Logical Type and
Literal Type Match (#668)
This is an automated email from the ASF dual-hosted git repository.
kojiromike pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/avro.git
The following commit(s) were added to refs/heads/master by this push:
new e321938 AVRO-2580: Enforce Logical Type and Literal Type Match (#668)
e321938 is described below
commit e3219382f3fbc9c20bc851f56d4e56a0f73c7c52
Author: Michael A. Smith <mi...@smith-li.com>
AuthorDate: Fri Oct 25 16:51:47 2019 -0400
AVRO-2580: Enforce Logical Type and Literal Type Match (#668)
* AVRO-2580: Refactor Schema.parse Test
* AVRO-2580: Rewrite Tests to Highlight Bug
* AVRO-2580: Require Logical Type to Match Literal Type
* AVRO-2580: Refactor Schema Tests
1. Enable showing multiple failures in a single run.
2. Use JSON to format test schema when possible.
---
lang/py/src/avro/schema.py | 61 ++--
lang/py/test/test_schema.py | 754 +++++++++++++++++---------------------------
2 files changed, 317 insertions(+), 498 deletions(-)
diff --git a/lang/py/src/avro/schema.py b/lang/py/src/avro/schema.py
index 9c4599e..d0091c9 100644
--- a/lang/py/src/avro/schema.py
+++ b/lang/py/src/avro/schema.py
@@ -881,15 +881,36 @@ class TimestampMicrosSchema(LogicalSchema, PrimitiveSchema):
#
# Module Methods
#
-def get_other_props(all_props,reserved_props):
+def get_other_props(all_props, reserved_props):
"""
Retrieve the non-reserved properties from a dictionary of properties
@args reserved_props: The set of reserved properties to exclude
"""
if callable(getattr(all_props, 'items', None)):
- return dict([(k,v) for (k,v) in all_props.items() if k not in
- reserved_props ])
-
+ return {k: v for k, v in all_props.items() if k not in reserved_props}
+
+def make_bytes_decimal_schema(other_props):
+ """Make a BytesDecimalSchema from just other_props."""
+ return BytesDecimalSchema(other_props.get('precision'), other_props.get('scale', 0))
+
+def make_logical_schema(logical_type, type_, other_props):
+ """Map the logical types to the appropriate literal type and schema class."""
+ logical_types = {
+ constants.DATE: ('int', DateSchema),
+ # Fixed decimal schema is handled before we get here.
+ constants.DECIMAL: ('bytes', make_bytes_decimal_schema),
+ constants.TIMESTAMP_MICROS: ('long', TimestampMicrosSchema),
+ constants.TIMESTAMP_MILLIS: ('long', TimestampMillisSchema),
+ constants.TIME_MICROS: ('long', TimeMicrosSchema),
+ constants.TIME_MILLIS: ('int', TimeMillisSchema),
+ }
+ try:
+ literal_type, schema_type = logical_types[logical_type]
+ except KeyError:
+ raise SchemaParseException("Currently does not support {} logical type".format(logical_type))
+ if literal_type != type_:
+ raise SchemaParseException("Logical type {} requires literal type {}, not {}".format(logical_type, literal_type, type_))
+ return schema_type(other_props)
def make_avsc_object(json_data, names=None):
"""
@@ -897,35 +918,15 @@ def make_avsc_object(json_data, names=None):
@arg names: A Name object (tracks seen names and default space)
"""
- if names == None:
+ if names is None:
names = Names()
# JSON object (non-union)
if callable(getattr(json_data, 'get', None)):
type = json_data.get('type')
other_props = get_other_props(json_data, SCHEMA_RESERVED_PROPS)
- logical_type = None
- if 'logicalType' in json_data:
- logical_type = json_data.get('logicalType')
- if logical_type not in constants.SUPPORTED_LOGICAL_TYPE:
- raise SchemaParseException("Currently does not support %s logical type" % logical_type)
- if type in PRIMITIVE_TYPES:
- if type == 'int' and logical_type == constants.DATE:
- return DateSchema(other_props)
- if type == 'int' and logical_type == constants.TIME_MILLIS:
- return TimeMillisSchema(other_props=other_props)
- if type == 'long' and logical_type == constants.TIME_MICROS:
- return TimeMicrosSchema(other_props=other_props)
- if type == 'long' and logical_type == constants.TIMESTAMP_MILLIS:
- return TimestampMillisSchema(other_props=other_props)
- if type == 'long' and logical_type == constants.TIMESTAMP_MICROS:
- return TimestampMicrosSchema(other_props=other_props)
- if type == 'bytes' and logical_type == constants.DECIMAL:
- precision = json_data.get('precision')
- scale = 0 if json_data.get('scale') is None else json_data.get('scale')
- return BytesDecimalSchema(precision, scale, other_props)
- return PrimitiveSchema(type, other_props)
- elif type in NAMED_TYPES:
+ logical_type = json_data.get('logicalType')
+ if type in NAMED_TYPES:
name = json_data.get('name')
namespace = json_data.get('namespace', names.default_namespace)
if type == 'fixed':
@@ -945,7 +946,11 @@ def make_avsc_object(json_data, names=None):
return RecordSchema(name, namespace, fields, names, type, doc, other_props)
else:
raise SchemaParseException('Unknown Named Type: %s' % type)
- elif type in VALID_TYPES:
+ if logical_type:
+ return make_logical_schema(logical_type, type, other_props or {})
+ if type in PRIMITIVE_TYPES:
+ return PrimitiveSchema(type, other_props)
+ if type in VALID_TYPES:
if type == 'array':
items = json_data.get('items')
return ArraySchema(items, names, other_props)
diff --git a/lang/py/test/test_schema.py b/lang/py/test/test_schema.py
index a794e73..6b2c0ef 100644
--- a/lang/py/test/test_schema.py
+++ b/lang/py/test/test_schema.py
@@ -21,6 +21,7 @@
from __future__ import absolute_import, division, print_function
+import json
import unittest
import set_avro_test_path
@@ -28,356 +29,208 @@ from avro import schema
from avro.schema import AvroException, SchemaParseException
-def print_test_name(test_name):
- print()
- print(test_name)
- print('=' * len(test_name))
- print()
+class TestSchema(object):
+ """A proxy for a schema string that provides useful test metadata."""
-class ExampleSchema(object):
- def __init__(self, schema_string, valid, name='', comment=''):
- self._schema_string = schema_string
- self._valid = valid
- self._name = name or schema_string # default to schema_string for name
+ def __init__(self, data, name='', comment=''):
+ if not isinstance(data, basestring):
+ data = json.dumps(data)
+ self.data = data
+ self.name = name or data # default to data for name
self.comment = comment
- @property
- def schema_string(self):
- return self._schema_string
+ def parse(self):
+ return schema.parse(str(self))
- @property
- def valid(self):
- return self._valid
+ def __str__(self):
+ return str(self.data)
- @property
- def name(self):
- return self._name
-#
-# Example Schemas
-#
+class ValidTestSchema(TestSchema):
+ """A proxy for a valid schema string that provides useful test metadata."""
+ valid = True
+
-def make_primitive_examples():
- examples = []
- for type in schema.PRIMITIVE_TYPES:
- examples.append(ExampleSchema('"%s"' % type, True))
- examples.append(ExampleSchema('{"type": "%s"}' % type, True))
- return examples
+class InvalidTestSchema(ValidTestSchema):
+ """A proxy for an invalid schema string that provides useful test metadata."""
+ valid = False
-PRIMITIVE_EXAMPLES = [
- ExampleSchema('"True"', False),
- ExampleSchema('True', False),
- ExampleSchema('{"no_type": "test"}', False),
- ExampleSchema('{"type": "panther"}', False),
-] + make_primitive_examples()
+
+PRIMITIVE_EXAMPLES = ([
+ InvalidTestSchema('"True"'),
+ InvalidTestSchema('True'),
+ InvalidTestSchema('{"no_type": "test"}'),
+ InvalidTestSchema('{"type": "panther"}'),
+] + [ValidTestSchema('"{}"'.format(t)) for t in schema.PRIMITIVE_TYPES]
+ + [ValidTestSchema({"type": t}) for t in schema.PRIMITIVE_TYPES])
FIXED_EXAMPLES = [
- ExampleSchema('{"type": "fixed", "name": "Test", "size": 1}', True),
- ExampleSchema("""\
- {"type": "fixed",
- "name": "MyFixed",
- "namespace": "org.apache.hadoop.avro",
- "size": 1}
- """, True),
- ExampleSchema("""\
- {"type": "fixed",
- "name": "Missing size"}
- """, False),
- ExampleSchema("""\
- {"type": "fixed",
- "size": 314}
- """, False),
+ ValidTestSchema({"type": "fixed", "name": "Test", "size": 1}),
+ ValidTestSchema({"type": "fixed", "name": "MyFixed", "size": 1,
+ "namespace": "org.apache.hadoop.avro"}),
+ InvalidTestSchema({"type": "fixed", "name": "Missing size"}),
+ InvalidTestSchema({"type": "fixed", "size": 314}),
]
ENUM_EXAMPLES = [
- ExampleSchema('{"type": "enum", "name": "Test", "symbols": ["A", "B"]}', True),
- ExampleSchema("""\
- {"type": "enum",
- "name": "Status",
- "symbols": "Normal Caution Critical"}
- """, False),
- ExampleSchema("""\
- {"type": "enum",
- "name": [ 0, 1, 1, 2, 3, 5, 8 ],
- "symbols": ["Golden", "Mean"]}
- """, False),
- ExampleSchema("""\
- {"type": "enum",
- "symbols" : ["I", "will", "fail", "no", "name"]}
- """, False),
- ExampleSchema("""\
- {"type": "enum",
- "name": "Test"
- "symbols" : ["AA", "AA"]}
- """, False),
+ ValidTestSchema({"type": "enum", "name": "Test", "symbols": ["A", "B"]}),
+ InvalidTestSchema({"type": "enum", "name": "Status", "symbols": "Normal Caution Critical"}),
+ InvalidTestSchema({"type": "enum", "name": [0, 1, 1, 2, 3, 5, 8],
+ "symbols": ["Golden", "Mean"]}),
+ InvalidTestSchema({"type": "enum", "symbols" : ["I", "will", "fail", "no", "name"]}),
+ InvalidTestSchema({"type": "enum", "name": "Test", "symbols": ["AA", "AA"]}),
]
ARRAY_EXAMPLES = [
- ExampleSchema('{"type": "array", "items": "long"}', True),
- ExampleSchema("""\
- {"type": "array",
- "items": {"type": "enum", "name": "Test", "symbols": ["A", "B"]}}
- """, True),
+ ValidTestSchema({"type": "array", "items": "long"}),
+ ValidTestSchema({"type": "array",
+ "items": {"type": "enum", "name": "Test", "symbols": ["A", "B"]}}),
]
MAP_EXAMPLES = [
- ExampleSchema('{"type": "map", "values": "long"}', True),
- ExampleSchema("""\
- {"type": "map",
- "values": {"type": "enum", "name": "Test", "symbols": ["A", "B"]}}
- """, True),
+ ValidTestSchema({"type": "map", "values": "long"}),
+ ValidTestSchema({"type": "map",
+ "values": {"type": "enum", "name": "Test", "symbols": ["A", "B"]}}),
]
UNION_EXAMPLES = [
- ExampleSchema('["string", "null", "long"]', True),
- ExampleSchema('["null", "null"]', False),
- ExampleSchema('["long", "long"]', False),
- ExampleSchema("""\
- [{"type": "array", "items": "long"}
- {"type": "array", "items": "string"}]
- """, False),
+ ValidTestSchema(["string", "null", "long"]),
+ InvalidTestSchema(["null", "null"]),
+ InvalidTestSchema(["long", "long"]),
+ InvalidTestSchema([{"type": "array", "items": "long"},
+ {"type": "array", "items": "string"}]),
]
RECORD_EXAMPLES = [
- ExampleSchema("""\
- {"type": "record",
- "name": "Test",
- "fields": [{"name": "f",
- "type": "long"}]}
- """, True),
- ExampleSchema("""\
- {"type": "error",
- "name": "Test",
- "fields": [{"name": "f",
- "type": "long"}]}
- """, True),
- ExampleSchema("""\
- {"type": "record",
- "name": "Node",
- "fields": [{"name": "label", "type": "string"},
- {"name": "children",
- "type": {"type": "array", "items": "Node"}}]}
- """, True),
- ExampleSchema("""\
- {"type": "record",
- "name": "Lisp",
- "fields": [{"name": "value",
- "type": ["null", "string",
- {"type": "record",
- "name": "Cons",
- "fields": [{"name": "car", "type": "Lisp"},
- {"name": "cdr", "type": "Lisp"}]}]}]}
- """, True),
- ExampleSchema("""\
- {"type": "record",
- "name": "HandshakeRequest",
- "namespace": "org.apache.avro.ipc",
- "fields": [{"name": "clientHash",
- "type": {"type": "fixed", "name": "MD5", "size": 16}},
- {"name": "clientProtocol", "type": ["null", "string"]},
- {"name": "serverHash", "type": "MD5"},
- {"name": "meta",
- "type": ["null", {"type": "map", "values": "bytes"}]}]}
- """, True),
- ExampleSchema("""\
- {"type": "record",
- "name": "HandshakeResponse",
- "namespace": "org.apache.avro.ipc",
- "fields": [{"name": "match",
- "type": {"type": "enum",
- "name": "HandshakeMatch",
- "symbols": ["BOTH", "CLIENT", "NONE"]}},
- {"name": "serverProtocol", "type": ["null", "string"]},
- {"name": "serverHash",
- "type": ["null",
- {"name": "MD5", "size": 16, "type": "fixed"}]},
- {"name": "meta",
- "type": ["null", {"type": "map", "values": "bytes"}]}]}
- """, True),
- ExampleSchema("""\
- {"type": "record",
- "name": "Interop",
- "namespace": "org.apache.avro",
- "fields": [{"name": "intField", "type": "int"},
- {"name": "longField", "type": "long"},
- {"name": "stringField", "type": "string"},
- {"name": "boolField", "type": "boolean"},
- {"name": "floatField", "type": "float"},
- {"name": "doubleField", "type": "double"},
- {"name": "bytesField", "type": "bytes"},
- {"name": "nullField", "type": "null"},
- {"name": "arrayField",
- "type": {"type": "array", "items": "double"}},
- {"name": "mapField",
- "type": {"type": "map",
- "values": {"name": "Foo",
- "type": "record",
- "fields": [{"name": "label",
- "type": "string"}]}}},
- {"name": "unionField",
- "type": ["boolean",
- "double",
- {"type": "array", "items": "bytes"}]},
- {"name": "enumField",
- "type": {"type": "enum",
- "name": "Kind",
- "symbols": ["A", "B", "C"]}},
- {"name": "fixedField",
- "type": {"type": "fixed", "name": "MD5", "size": 16}},
- {"name": "recordField",
- "type": {"type": "record",
- "name": "Node",
- "fields": [{"name": "label", "type": "string"},
- {"name": "children",
- "type": {"type": "array",
- "items": "Node"}}]}}]}
- """, True),
- ExampleSchema("""\
- {"type": "record",
- "name": "ipAddr",
- "fields": [{"name": "addr",
- "type": [{"name": "IPv6", "type": "fixed", "size": 16},
- {"name": "IPv4", "type": "fixed", "size": 4}]}]}
- """, True),
- ExampleSchema("""\
- {"type": "record",
- "name": "Address",
- "fields": [{"type": "string"},
- {"type": "string", "name": "City"}]}
- """, False),
- ExampleSchema("""\
- {"type": "record",
- "name": "Event",
- "fields": [{"name": "Sponsor"},
- {"name": "City", "type": "string"}]}
- """, False),
- ExampleSchema("""\
- {"type": "record",
- "fields": "His vision, from the constantly passing bars,"
- "name", "Rainer"}
- """, False),
- ExampleSchema("""\
- {"name": ["Tom", "Jerry"],
- "type": "record",
- "fields": [{"name": "name", "type": "string"}]}
- """, False),
+ ValidTestSchema({"type": "record", "name": "Test", "fields": [{"name": "f", "type": "long"}]}),
+ ValidTestSchema({"type": "error", "name": "Test", "fields": [{"name": "f", "type": "long"}]}),
+ ValidTestSchema({"type": "record", "name": "Node",
+ "fields": [
+ {"name": "label", "type": "string"},
+ {"name": "children", "type": {"type": "array", "items": "Node"}}]}),
+ ValidTestSchema({"type": "record", "name": "Lisp",
+ "fields": [{"name": "value",
+ "type": ["null", "string",
+ {"type": "record", "name": "Cons",
+ "fields": [{"name": "car", "type": "Lisp"},
+ {"name": "cdr", "type": "Lisp"}]}]}]}),
+ ValidTestSchema({"type": "record", "name": "HandshakeRequest",
+ "namespace": "org.apache.avro.ipc",
+ "fields": [{"name": "clientHash",
+ "type": {"type": "fixed", "name": "MD5", "size": 16}},
+ {"name": "clientProtocol", "type": ["null", "string"]},
+ {"name": "serverHash", "type": "MD5"},
+ {"name": "meta",
+ "type": ["null", {"type": "map", "values": "bytes"}]}]}),
+ ValidTestSchema({"type": "record", "name": "HandshakeResponse",
+ "namespace": "org.apache.avro.ipc",
+ "fields": [{"name": "match",
+ "type": {"type": "enum", "name": "HandshakeMatch",
+ "symbols": ["BOTH", "CLIENT", "NONE"]}},
+ {"name": "serverProtocol", "type": ["null", "string"]},
+ {"name": "serverHash",
+ "type": ["null", {"name": "MD5", "size": 16, "type": "fixed"}]},
+ {"name": "meta",
+ "type": ["null", {"type": "map", "values": "bytes"}]}]}),
+ ValidTestSchema({"type": "record",
+ "name": "Interop",
+ "namespace": "org.apache.avro",
+ "fields": [{"name": "intField", "type": "int"},
+ {"name": "longField", "type": "long"},
+ {"name": "stringField", "type": "string"},
+ {"name": "boolField", "type": "boolean"},
+ {"name": "floatField", "type": "float"},
+ {"name": "doubleField", "type": "double"},
+ {"name": "bytesField", "type": "bytes"},
+ {"name": "nullField", "type": "null"},
+ {"name": "arrayField", "type": {"type": "array", "items": "double"}},
+ {"name": "mapField",
+ "type": {"type": "map",
+ "values": {"name": "Foo",
+ "type": "record",
+ "fields": [{"name": "label", "type": "string"}]}}},
+ {"name": "unionField",
+ "type": ["boolean", "double", {"type": "array", "items": "bytes"}]},
+ {"name": "enumField",
+ "type": {"type": "enum", "name": "Kind", "symbols": ["A", "B", "C"]}},
+ {"name": "fixedField",
+ "type": {"type": "fixed", "name": "MD5", "size": 16}},
+ {"name": "recordField",
+ "type": {"type": "record", "name": "Node",
+ "fields": [{"name": "label", "type": "string"},
+ {"name": "children",
+ "type": {"type": "array",
+ "items": "Node"}}]}}]}),
+ ValidTestSchema({"type": "record", "name": "ipAddr",
+ "fields": [{"name": "addr", "type": [{"name": "IPv6", "type": "fixed", "size": 16},
+ {"name": "IPv4", "type": "fixed", "size": 4}]}]}),
+ InvalidTestSchema({"type": "record", "name": "Address",
+ "fields": [{"type": "string"}, {"type": "string", "name": "City"}]}),
+ InvalidTestSchema({"type": "record", "name": "Event",
+ "fields": [{"name": "Sponsor"}, {"name": "City", "type": "string"}]}),
+ InvalidTestSchema({"type": "record", "name": "Rainer",
+ "fields": "His vision, from the constantly passing bars"}),
+ InvalidTestSchema({"name": ["Tom", "Jerry"], "type": "record",
+ "fields": [{"name": "name", "type": "string"}]}),
]
DOC_EXAMPLES = [
- ExampleSchema("""\
- {"type": "record",
- "name": "TestDoc",
- "doc": "Doc string",
- "fields": [{"name": "name", "type": "string",
- "doc" : "Doc String"}]}
- """, True),
- ExampleSchema("""\
- {"type": "enum", "name": "Test", "symbols": ["A", "B"],
- "doc": "Doc String"}
- """, True),
+ ValidTestSchema({"type": "record", "name": "TestDoc", "doc": "Doc string",
+ "fields": [{"name": "name", "type": "string", "doc" : "Doc String"}]}),
+ ValidTestSchema({"type": "enum", "name": "Test", "symbols": ["A", "B"], "doc": "Doc String"}),
]
OTHER_PROP_EXAMPLES = [
- ExampleSchema("""\
- {"type": "record",
- "name": "TestRecord",
- "cp_string": "string",
- "cp_int": 1,
- "cp_array": [ 1, 2, 3, 4],
- "fields": [ {"name": "f1", "type": "string", "cp_object": {"a":1,"b":2} },
- {"name": "f2", "type": "long", "cp_null": null} ]}
- """, True),
- ExampleSchema("""\
- {"type": "map", "values": "long", "cp_boolean": true}
- """, True),
- ExampleSchema("""\
- {"type": "enum",
- "name": "TestEnum",
- "symbols": [ "one", "two", "three" ],
- "cp_float" : 1.0 }
- """,True),
- ExampleSchema("""\
- {"type": "long",
- "date": "true"}
- """, True)
+ ValidTestSchema({"type": "record", "name": "TestRecord", "cp_string": "string",
+ "cp_int": 1, "cp_array": [1, 2, 3, 4],
+ "fields": [{"name": "f1", "type": "string", "cp_object": {"a": 1,"b": 2}},
+ {"name": "f2", "type": "long", "cp_null": None}]}),
+ ValidTestSchema({"type": "map", "values": "long", "cp_boolean": True}),
+ ValidTestSchema({"type": "enum", "name": "TestEnum",
+ "symbols": ["one", "two", "three"], "cp_float": 1.0}),
]
DECIMAL_LOGICAL_TYPE = [
- ExampleSchema("""{
- "type": "fixed",
- "logicalType": "decimal",
- "name": "TestDecimal",
- "precision": 4,
- "size": 10,
- "scale": 2}""", True),
- ExampleSchema("""{
- "type": "bytes",
- "logicalType": "decimal",
- "precision": 4,
- "scale": 2}""", True)
+ ValidTestSchema({"type": "fixed", "logicalType": "decimal", "name": "TestDecimal", "precision": 4, "size": 10, "scale": 2}),
+ ValidTestSchema({"type": "bytes", "logicalType": "decimal", "precision": 4, "scale": 2}),
+ InvalidTestSchema({"type": "bytes", "logicalType": "decimal", "precision": 2, "scale": -2}),
+ InvalidTestSchema({"type": "bytes", "logicalType": "decimal", "precision": -2, "scale": 2}),
+ InvalidTestSchema({"type": "bytes", "logicalType": "decimal", "precision": 2, "scale": 3}),
+ InvalidTestSchema({"type": "fixed", "logicalType": "decimal", "name": "TestDecimal", "precision": -10, "scale": 2, "size": 5}),
+ InvalidTestSchema({"type": "fixed", "logicalType": "decimal", "name": "TestDecimal", "precision": 2, "scale": 3, "size": 2}),
+ InvalidTestSchema({"type": "fixed", "logicalType": "decimal", "name": "TestDecimal", "precision": 2, "scale": 2, "size": -2}),
]
DATE_LOGICAL_TYPE = [
- ExampleSchema("""{
- "type": "int",
- "logicalType": "date"} """, True),
- ExampleSchema("""{
- "type": "int",
- "logicalType": "date1"} """, False),
- ExampleSchema("""{
- "type": "long",
- "logicalType": "date"} """, False),
+ ValidTestSchema({"type": "int", "logicalType": "date"}),
+ InvalidTestSchema({"type": "int", "logicalType": "date1"}),
+ InvalidTestSchema({"type": "long", "logicalType": "date"}),
]
TIMEMILLIS_LOGICAL_TYPE = [
- ExampleSchema("""{
- "type": "int",
- "logicalType": "time-millis"} """, True),
- ExampleSchema("""{
- "type": "int",
- "logicalType": "time-milis"} """, False),
- ExampleSchema("""{
- "type": "long",
- "logicalType": "time-millis"} """, False),
+ ValidTestSchema({"type": "int", "logicalType": "time-millis"}),
+ InvalidTestSchema({"type": "int", "logicalType": "time-milis"}),
+ InvalidTestSchema({"type": "long", "logicalType": "time-millis"}),
]
TIMEMICROS_LOGICAL_TYPE = [
- ExampleSchema("""{
- "type": "long",
- "logicalType": "time-micros"} """, True),
- ExampleSchema("""{
- "type": "long",
- "logicalType": "time-micro"} """, False),
- ExampleSchema("""{
- "type": "int",
- "logicalType": "time-micros"} """, False),
+ ValidTestSchema({"type": "long", "logicalType": "time-micros"}),
+ InvalidTestSchema({"type": "long", "logicalType": "time-micro"}),
+ InvalidTestSchema({"type": "int", "logicalType": "time-micros"}),
]
TIMESTAMPMILLIS_LOGICAL_TYPE = [
- ExampleSchema("""{
- "type": "long",
- "logicalType": "timestamp-millis"} """, True),
- ExampleSchema("""{
- "type": "long",
- "logicalType": "timestamp-milis"} """, False),
- ExampleSchema("""{
- "type": "int",
- "logicalType": "timestamp-millis"} """, False),
+ ValidTestSchema({"type": "long", "logicalType": "timestamp-millis"}),
+ InvalidTestSchema({"type": "long", "logicalType": "timestamp-milis"}),
+ InvalidTestSchema({"type": "int", "logicalType": "timestamp-millis"}),
]
TIMESTAMPMICROS_LOGICAL_TYPE = [
- ExampleSchema("""{
- "type": "long",
- "logicalType": "timestamp-micros"} """, True),
- ExampleSchema("""{
- "type": "long",
- "logicalType": "timestamp-micro"} """, False),
- ExampleSchema("""{
- "type": "int",
- "logicalType": "timestamp-micros"} """, False),
+ ValidTestSchema({"type": "long", "logicalType": "timestamp-micros"}),
+ InvalidTestSchema({"type": "long", "logicalType": "timestamp-micro"}),
+ InvalidTestSchema({"type": "int", "logicalType": "timestamp-micros"}),
]
-
EXAMPLES = PRIMITIVE_EXAMPLES
EXAMPLES += FIXED_EXAMPLES
EXAMPLES += ENUM_EXAMPLES
@@ -394,81 +247,22 @@ EXAMPLES += TIMESTAMPMILLIS_LOGICAL_TYPE
EXAMPLES += TIMESTAMPMICROS_LOGICAL_TYPE
VALID_EXAMPLES = [e for e in EXAMPLES if e.valid]
+INVALID_EXAMPLES = [e for e in EXAMPLES if not e.valid]
-# TODO(hammer): refactor into harness for examples
-# TODO(hammer): pretty-print detailed output
-# TODO(hammer): make verbose flag
-# TODO(hammer): show strack trace to user
-# TODO(hammer): use logging module?
class TestSchema(unittest.TestCase):
+ """Miscellaneous tests for schema"""
def test_correct_recursive_extraction(self):
+ """A recursive reference within a schema should be the same type every time."""
s = schema.parse('{"type": "record", "name": "X", "fields": [{"name": "y", "type": {"type": "record", "name": "Y", "fields": [{"name": "Z", "type": "X"}]}}]}')
t = schema.parse(str(s.fields[0].type))
# If we've made it this far, the subschema was reasonably stringified; it ccould be reparsed.
self.assertEqual("X", t.fields[0].type.name)
- def test_parse(self):
- correct = 0
- for example in EXAMPLES:
- try:
- schema.parse(example.schema_string)
- if example.valid:
- correct += 1
- else:
- self.fail("Invalid schema was parsed: " + example.schema_string)
- except:
- if not example.valid:
- correct += 1
- else:
- self.fail("Valid schema failed to parse: " + example.schema_string)
-
- fail_msg = "Parse behavior correct on %d out of %d schemas." % \
- (correct, len(EXAMPLES))
- self.assertEqual(correct, len(EXAMPLES), fail_msg)
-
- def test_valid_cast_to_string_after_parse(self):
- """
- Test that the string generated by an Avro Schema object
- is, in fact, a valid Avro schema.
- """
- print_test_name('TEST CAST TO STRING AFTER PARSE')
- correct = 0
- for example in VALID_EXAMPLES:
- schema_data = schema.parse(example.schema_string)
- schema.parse(str(schema_data))
- correct += 1
-
- fail_msg = "Cast to string success on %d out of %d schemas" % \
- (correct, len(VALID_EXAMPLES))
- self.assertEqual(correct, len(VALID_EXAMPLES), fail_msg)
-
- def test_equivalence_after_round_trip(self):
- """
- 1. Given a string, parse it to get Avro schema "original".
- 2. Serialize "original" to a string and parse that string
- to generate Avro schema "round trip".
- 3. Ensure "original" and "round trip" schemas are equivalent.
- """
- print_test_name('TEST ROUND TRIP')
- correct = 0
- for example in VALID_EXAMPLES:
- original_schema = schema.parse(example.schema_string)
- round_trip_schema = schema.parse(str(original_schema))
- if original_schema == round_trip_schema:
- correct += 1
- debug_msg = "%s: ROUND TRIP SUCCESS" % example.name
- else:
- debug_msg = "%s: ROUND TRIP FAILURE" % example.name
- self.fail("Round trip failure: %s, %s, %s" % (example.name, original_schema, str(original_schema)))
-
- fail_msg = "Round trip success on %d out of %d schemas" % \
- (correct, len(VALID_EXAMPLES))
- self.assertEqual(correct, len(VALID_EXAMPLES), fail_msg)
-
# TODO(hammer): more tests
def test_fullname(self):
- """
+ """Test schema full names
+
The fullname is determined in one of the following ways:
* A name and namespace are both specified. For example,
one might use "name": "X", "namespace": "org.foo"
@@ -495,7 +289,6 @@ class TestSchema(unittest.TestCase):
multiple definitions of a fullname if the definitions are
equivalent.
"""
- print_test_name('TEST FULLNAME')
# name and namespace specified
fullname = schema.Name('a', 'o.a.h', None).fullname
@@ -521,54 +314,8 @@ class TestSchema(unittest.TestCase):
fullname = schema.Name('a', 'o.a.a', 'o.a.h').fullname
self.assertEqual(fullname, 'o.a.a.a')
- def test_doc_attributes(self):
- print_test_name('TEST DOC ATTRIBUTES')
- correct = 0
- for example in DOC_EXAMPLES:
- original_schema = schema.parse(example.schema_string)
- if original_schema.doc is not None:
- correct += 1
- if original_schema.type == 'record':
- for f in original_schema.fields:
- if f.doc is None:
- self.fail("Failed to preserve 'doc' in fields: " + example.schema_string)
- self.assertEqual(correct,len(DOC_EXAMPLES))
-
- def test_other_attributes(self):
- print_test_name('TEST OTHER ATTRIBUTES')
- correct = 0
- props = {}
- for example in OTHER_PROP_EXAMPLES:
- original_schema = schema.parse(example.schema_string)
- round_trip_schema = schema.parse(str(original_schema))
- self.assertEqual(original_schema.other_props,round_trip_schema.other_props)
- if original_schema.type == "record":
- field_props = 0
- for f in original_schema.fields:
- if f.other_props:
- props.update(f.other_props)
- field_props += 1
- self.assertEqual(field_props,len(original_schema.fields))
- if original_schema.other_props:
- props.update(original_schema.other_props)
- correct += 1
- for k in props:
- v = props[k]
- if k == "cp_boolean":
- self.assertEqual(type(v), bool)
- elif k == "cp_int":
- self.assertEqual(type(v), int)
- elif k == "cp_object":
- self.assertEqual(type(v), dict)
- elif k == "cp_float":
- self.assertEqual(type(v), float)
- elif k == "cp_array":
- self.assertEqual(type(v), list)
- self.assertEqual(correct,len(OTHER_PROP_EXAMPLES))
-
def test_exception_is_not_swallowed_on_parse_error(self):
- print_test_name('TEST EXCEPTION NOT SWALLOWED ON PARSE ERROR')
-
+ """A specific exception message should appear on a json parse error."""
try:
schema.parse('/not/a/real/file')
caught_exception = False
@@ -580,78 +327,145 @@ class TestSchema(unittest.TestCase):
self.assertTrue(caught_exception, 'Exception was not caught')
- def test_decimal_invalid_schema(self):
- invalid_schemas = [
- ExampleSchema("""{
- "type": "bytes",
- "logicalType": "decimal",
- "precision": 2,
- "scale": -2}""", True),
-
- ExampleSchema("""{
- "type": "bytes",
- "logicalType": "decimal",
- "precision": -2,
- "scale": 2}""", True),
-
- ExampleSchema("""{
- "type": "bytes",
- "logicalType": "decimal",
- "precision": 2,
- "scale": 3}""", True),
-
- ExampleSchema("""{
+ def test_decimal_valid_type(self):
+ fixed_decimal_schema = ValidTestSchema({
"type": "fixed",
"logicalType": "decimal",
"name": "TestDecimal",
- "precision": -10,
+ "precision": 4,
"scale": 2,
- "size": 5}""", True),
-
+ "size": 2})
- ExampleSchema("""{
- "type": "fixed",
+ bytes_decimal_schema = ValidTestSchema({
+ "type": "bytes",
"logicalType": "decimal",
- "name": "TestDecimal",
- "precision": 2,
- "scale": 3,
- "size": 2}""", True)
- ]
-
- for invalid_schema in invalid_schemas:
- self.assertRaises(SchemaParseException, schema.parse, invalid_schema.schema_string)
-
- fixed_invalid_schema_size = ExampleSchema("""{
- "type": "fixed",
- "logicalType": "decimal",
- "name": "TestDecimal",
- "precision": 2,
- "scale": 2,
- "size": -2}""", True)
- self.assertRaises(AvroException, schema.parse, fixed_invalid_schema_size.schema_string)
+ "precision": 4})
- def test_decimal_valid_type(self):
- fixed_decimal_schema = ExampleSchema("""{
- "type": "fixed",
- "logicalType": "decimal",
- "name": "TestDecimal",
- "precision": 4,
- "scale": 2,
- "size": 2}""", True)
-
- bytes_decimal_schema = ExampleSchema("""{
- "type": "bytes",
- "logicalType": "decimal",
- "precision": 4}""", True)
-
- fixed_decimal = schema.parse(fixed_decimal_schema.schema_string)
+ fixed_decimal = fixed_decimal_schema.parse()
self.assertEqual(4, fixed_decimal.get_prop('precision'))
self.assertEqual(2, fixed_decimal.get_prop('scale'))
self.assertEqual(2, fixed_decimal.get_prop('size'))
- bytes_decimal = schema.parse(bytes_decimal_schema.schema_string)
+ bytes_decimal = bytes_decimal_schema.parse()
self.assertEqual(4, bytes_decimal.get_prop('precision'))
self.assertEqual(0, bytes_decimal.get_prop('scale'))
+class SchemaParseTestCase(unittest.TestCase):
+ """Enable generating parse test cases over all the valid and invalid example schema."""
+
+ def __init__(self, test_schema):
+ """Ignore the normal signature for unittest.TestCase because we are generating
+ many test cases from this one class. This is safe as long as the autoloader
+ ignores this class. The autoloader will ignore this class as long as it has
+ no methods starting with `test_`.
+ """
+ super(SchemaParseTestCase, self).__init__(
+ 'parse_valid' if test_schema.valid else 'parse_invalid')
+ self.test_schema = test_schema
+
+ def parse_valid(self):
+ """Parsing a valid schema should not error."""
+ try:
+ self.test_schema.parse()
+ except (schema.AvroException, schema.SchemaParseException):
+ self.fail("Valid schema failed to parse: {!s}".format(self.test_schema))
+
+ def parse_invalid(self):
+ """Parsing an invalid schema should error."""
+ try:
+ self.test_schema.parse()
+ except (schema.AvroException, schema.SchemaParseException):
+ pass
+ else:
+ self.fail("Invalid schema should not have parsed: {!s}".format(self.test_schema))
+
+class RoundTripParseTestCase(unittest.TestCase):
+ """Enable generating round-trip parse test cases over all the valid test schema."""
+
+ def __init__(self, test_schema):
+ """Ignore the normal signature for unittest.TestCase because we are generating
+ many test cases from this one class. This is safe as long as the autoloader
+ ignores this class. The autoloader will ignore this class as long as it has
+ no methods starting with `test_`.
+ """
+ super(RoundTripParseTestCase, self).__init__('parse_round_trip')
+ self.test_schema = test_schema
+
+ def parse_round_trip(self):
+ """The string of a Schema should be parseable to the same Schema."""
+ parsed = self.test_schema.parse()
+ round_trip = schema.parse(str(parsed))
+ self.assertEqual(parsed, round_trip)
+
+class DocAttributesTestCase(unittest.TestCase):
+ """Enable generating document attribute test cases over all the document test schema."""
+
+ def __init__(self, test_schema):
+ """Ignore the normal signature for unittest.TestCase because we are generating
+ many test cases from this one class. This is safe as long as the autoloader
+ ignores this class. The autoloader will ignore this class as long as it has
+ no methods starting with `test_`.
+ """
+ super(DocAttributesTestCase, self).__init__('check_doc_attributes')
+ self.test_schema = test_schema
+
+ def check_doc_attributes(self):
+ """Documentation attributes should be preserved."""
+ sch = self.test_schema.parse()
+ self.assertIsNotNone(sch.doc, "Failed to preserve 'doc' in schema: {!s}".format(self.test_schema))
+ if sch.type == 'record':
+ for f in sch.fields:
+ self.assertIsNotNone(f.doc, "Failed to preserve 'doc' in fields: {!s}".format(self.test_schema))
+
+
+class OtherAttributesTestCase(unittest.TestCase):
+ """Enable generating attribute test cases over all the other-prop test schema."""
+ _type_map = {
+ "cp_array": list,
+ "cp_boolean": bool,
+ "cp_float": float,
+ "cp_int": int,
+ "cp_null": type(None),
+ "cp_object": dict,
+ "cp_string": basestring,
+ }
+
+ def __init__(self, test_schema):
+ """Ignore the normal signature for unittest.TestCase because we are generating
+ many test cases from this one class. This is safe as long as the autoloader
+ ignores this class. The autoloader will ignore this class as long as it has
+ no methods starting with `test_`.
+ """
+ super(OtherAttributesTestCase, self).__init__('check_attributes')
+ self.test_schema = test_schema
+
+ def _check_props(self, props):
+ for k, v in props.items():
+ self.assertIsInstance(v, self._type_map[k])
+
+ def check_attributes(self):
+ """Other attributes and their types on a schema should be preserved."""
+ sch = self.test_schema.parse()
+ round_trip = schema.parse(str(sch))
+ self.assertEqual(sch.other_props, round_trip.other_props,
+ "Properties were not preserved in a round-trip parse.")
+ self._check_props(sch.other_props)
+ if sch.type == "record":
+ field_props = [f.other_props for f in sch.fields if f.other_props]
+ self.assertEqual(len(field_props), len(sch.fields))
+ for p in field_props:
+ self._check_props(p)
+
+
+def load_tests(loader, default_tests, pattern):
+ """Generate test cases across many test schema."""
+ suite = unittest.TestSuite()
+ suite.addTests(loader.loadTestsFromTestCase(TestSchema))
+ suite.addTests(SchemaParseTestCase(ex) for ex in EXAMPLES)
+ suite.addTests(RoundTripParseTestCase(ex) for ex in VALID_EXAMPLES)
+ suite.addTests(DocAttributesTestCase(ex) for ex in DOC_EXAMPLES)
+ suite.addTests(OtherAttributesTestCase(ex) for ex in OTHER_PROP_EXAMPLES)
+ return suite
+
if __name__ == '__main__':
unittest.main()