You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@avro.apache.org by ko...@apache.org on 2019/10/25 20:51:54 UTC

[avro] branch master updated: AVRO-2580: Enforce Logical Type and Literal Type Match (#668)

This is an automated email from the ASF dual-hosted git repository.

kojiromike pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/avro.git


The following commit(s) were added to refs/heads/master by this push:
     new e321938  AVRO-2580: Enforce Logical Type and Literal Type Match (#668)
e321938 is described below

commit e3219382f3fbc9c20bc851f56d4e56a0f73c7c52
Author: Michael A. Smith <mi...@smith-li.com>
AuthorDate: Fri Oct 25 16:51:47 2019 -0400

    AVRO-2580: Enforce Logical Type and Literal Type Match (#668)
    
    * AVRO-2580: Refactor Schema.parse Test
    
    * AVRO-2580: Rewrite Tests to Highlight Bug
    
    * AVRO-2580: Require Logical Type to Match Literal Type
    
    * AVRO-2580: Refactor Schema Tests
    
    1. Enable showing multiple failures in a single run.
    2. Use JSON to format test schema when possible.
---
 lang/py/src/avro/schema.py  |  61 ++--
 lang/py/test/test_schema.py | 754 +++++++++++++++++---------------------------
 2 files changed, 317 insertions(+), 498 deletions(-)

diff --git a/lang/py/src/avro/schema.py b/lang/py/src/avro/schema.py
index 9c4599e..d0091c9 100644
--- a/lang/py/src/avro/schema.py
+++ b/lang/py/src/avro/schema.py
@@ -881,15 +881,36 @@ class TimestampMicrosSchema(LogicalSchema, PrimitiveSchema):
 #
 # Module Methods
 #
-def get_other_props(all_props,reserved_props):
+def get_other_props(all_props, reserved_props):
   """
   Retrieve the non-reserved properties from a dictionary of properties
   @args reserved_props: The set of reserved properties to exclude
   """
   if callable(getattr(all_props, 'items', None)):
-    return dict([(k,v) for (k,v) in all_props.items() if k not in
-                 reserved_props ])
-
+    return {k: v for k, v in all_props.items() if k not in reserved_props}
+
+def make_bytes_decimal_schema(other_props):
+  """Make a BytesDecimalSchema from just other_props."""
+  return BytesDecimalSchema(other_props.get('precision'), other_props.get('scale', 0))
+
+def make_logical_schema(logical_type, type_, other_props):
+  """Map the logical types to the appropriate literal type and schema class."""
+  logical_types = {
+    constants.DATE: ('int', DateSchema),
+    # Fixed decimal schema is handled before we get here.
+    constants.DECIMAL: ('bytes', make_bytes_decimal_schema),
+    constants.TIMESTAMP_MICROS: ('long', TimestampMicrosSchema),
+    constants.TIMESTAMP_MILLIS: ('long', TimestampMillisSchema),
+    constants.TIME_MICROS: ('long', TimeMicrosSchema),
+    constants.TIME_MILLIS: ('int', TimeMillisSchema),
+  }
+  try:
+    literal_type, schema_type = logical_types[logical_type]
+  except KeyError:
+    raise SchemaParseException("Currently does not support {} logical type".format(logical_type))
+  if literal_type != type_:
+    raise SchemaParseException("Logical type {} requires literal type {}, not {}".format(logical_type, literal_type, type_))
+  return schema_type(other_props)
 
 def make_avsc_object(json_data, names=None):
   """
@@ -897,35 +918,15 @@ def make_avsc_object(json_data, names=None):
 
   @arg names: A Name object (tracks seen names and default space)
   """
-  if names == None:
+  if names is None:
     names = Names()
 
   # JSON object (non-union)
   if callable(getattr(json_data, 'get', None)):
     type = json_data.get('type')
     other_props = get_other_props(json_data, SCHEMA_RESERVED_PROPS)
-    logical_type = None
-    if 'logicalType' in json_data:
-      logical_type = json_data.get('logicalType')
-      if logical_type not in constants.SUPPORTED_LOGICAL_TYPE:
-        raise SchemaParseException("Currently does not support %s logical type" % logical_type)
-    if type in PRIMITIVE_TYPES:
-      if type == 'int' and logical_type == constants.DATE:
-        return DateSchema(other_props)
-      if type == 'int' and logical_type == constants.TIME_MILLIS:
-        return TimeMillisSchema(other_props=other_props)
-      if type == 'long' and logical_type == constants.TIME_MICROS:
-        return TimeMicrosSchema(other_props=other_props)
-      if type == 'long' and logical_type == constants.TIMESTAMP_MILLIS:
-        return TimestampMillisSchema(other_props=other_props)
-      if type == 'long' and logical_type == constants.TIMESTAMP_MICROS:
-        return TimestampMicrosSchema(other_props=other_props)
-      if type == 'bytes' and logical_type == constants.DECIMAL:
-          precision = json_data.get('precision')
-          scale = 0 if json_data.get('scale') is None else json_data.get('scale')
-          return BytesDecimalSchema(precision, scale, other_props)
-      return PrimitiveSchema(type, other_props)
-    elif type in NAMED_TYPES:
+    logical_type = json_data.get('logicalType')
+    if type in NAMED_TYPES:
       name = json_data.get('name')
       namespace = json_data.get('namespace', names.default_namespace)
       if type == 'fixed':
@@ -945,7 +946,11 @@ def make_avsc_object(json_data, names=None):
         return RecordSchema(name, namespace, fields, names, type, doc, other_props)
       else:
         raise SchemaParseException('Unknown Named Type: %s' % type)
-    elif type in VALID_TYPES:
+    if logical_type:
+      return make_logical_schema(logical_type, type, other_props or {})
+    if type in PRIMITIVE_TYPES:
+      return PrimitiveSchema(type, other_props)
+    if type in VALID_TYPES:
       if type == 'array':
         items = json_data.get('items')
         return ArraySchema(items, names, other_props)
diff --git a/lang/py/test/test_schema.py b/lang/py/test/test_schema.py
index a794e73..6b2c0ef 100644
--- a/lang/py/test/test_schema.py
+++ b/lang/py/test/test_schema.py
@@ -21,6 +21,7 @@
 
 from __future__ import absolute_import, division, print_function
 
+import json
 import unittest
 
 import set_avro_test_path
@@ -28,356 +29,208 @@ from avro import schema
 from avro.schema import AvroException, SchemaParseException
 
 
-def print_test_name(test_name):
-  print()
-  print(test_name)
-  print('=' * len(test_name))
-  print()
+class TestSchema(object):
+  """A proxy for a schema string that provides useful test metadata."""
 
-class ExampleSchema(object):
-  def __init__(self, schema_string, valid, name='', comment=''):
-    self._schema_string = schema_string
-    self._valid = valid
-    self._name = name or schema_string # default to schema_string for name
+  def __init__(self, data, name='', comment=''):
+    if not isinstance(data, basestring):
+      data = json.dumps(data)
+    self.data = data
+    self.name = name or data  # default to data for name
     self.comment = comment
 
-  @property
-  def schema_string(self):
-    return self._schema_string
+  def parse(self):
+    return schema.parse(str(self))
 
-  @property
-  def valid(self):
-    return self._valid
+  def __str__(self):
+    return str(self.data)
 
-  @property
-  def name(self):
-    return self._name
 
-#
-# Example Schemas
-#
+class ValidTestSchema(TestSchema):
+  """A proxy for a valid schema string that provides useful test metadata."""
+  valid = True
+
 
-def make_primitive_examples():
-  examples = []
-  for type in schema.PRIMITIVE_TYPES:
-    examples.append(ExampleSchema('"%s"' % type, True))
-    examples.append(ExampleSchema('{"type": "%s"}' % type, True))
-  return examples
+class InvalidTestSchema(ValidTestSchema):
+  """A proxy for an invalid schema string that provides useful test metadata."""
+  valid = False
 
-PRIMITIVE_EXAMPLES = [
-  ExampleSchema('"True"', False),
-  ExampleSchema('True', False),
-  ExampleSchema('{"no_type": "test"}', False),
-  ExampleSchema('{"type": "panther"}', False),
-] + make_primitive_examples()
+
+PRIMITIVE_EXAMPLES = ([
+  InvalidTestSchema('"True"'),
+  InvalidTestSchema('True'),
+  InvalidTestSchema('{"no_type": "test"}'),
+  InvalidTestSchema('{"type": "panther"}'),
+] + [ValidTestSchema('"{}"'.format(t)) for t in schema.PRIMITIVE_TYPES]
+  + [ValidTestSchema({"type": t}) for t in schema.PRIMITIVE_TYPES])
 
 FIXED_EXAMPLES = [
-  ExampleSchema('{"type": "fixed", "name": "Test", "size": 1}', True),
-  ExampleSchema("""\
-    {"type": "fixed",
-     "name": "MyFixed",
-     "namespace": "org.apache.hadoop.avro",
-     "size": 1}
-    """, True),
-  ExampleSchema("""\
-    {"type": "fixed",
-     "name": "Missing size"}
-    """, False),
-  ExampleSchema("""\
-    {"type": "fixed",
-     "size": 314}
-    """, False),
+  ValidTestSchema({"type": "fixed", "name": "Test", "size": 1}),
+  ValidTestSchema({"type": "fixed", "name": "MyFixed", "size": 1,
+                   "namespace": "org.apache.hadoop.avro"}),
+  InvalidTestSchema({"type": "fixed", "name": "Missing size"}),
+  InvalidTestSchema({"type": "fixed", "size": 314}),
 ]
 
 ENUM_EXAMPLES = [
-  ExampleSchema('{"type": "enum", "name": "Test", "symbols": ["A", "B"]}', True),
-  ExampleSchema("""\
-    {"type": "enum",
-     "name": "Status",
-     "symbols": "Normal Caution Critical"}
-    """, False),
-  ExampleSchema("""\
-    {"type": "enum",
-     "name": [ 0, 1, 1, 2, 3, 5, 8 ],
-     "symbols": ["Golden", "Mean"]}
-    """, False),
-  ExampleSchema("""\
-    {"type": "enum",
-     "symbols" : ["I", "will", "fail", "no", "name"]}
-    """, False),
-  ExampleSchema("""\
-    {"type": "enum",
-     "name": "Test"
-     "symbols" : ["AA", "AA"]}
-    """, False),
+  ValidTestSchema({"type": "enum", "name": "Test", "symbols": ["A", "B"]}),
+  InvalidTestSchema({"type": "enum", "name": "Status", "symbols": "Normal Caution Critical"}),
+  InvalidTestSchema({"type": "enum", "name": [0, 1, 1, 2, 3, 5, 8],
+                     "symbols": ["Golden", "Mean"]}),
+  InvalidTestSchema({"type": "enum", "symbols" : ["I", "will", "fail", "no", "name"]}),
+  InvalidTestSchema({"type": "enum", "name": "Test", "symbols": ["AA", "AA"]}),
 ]
 
 ARRAY_EXAMPLES = [
-  ExampleSchema('{"type": "array", "items": "long"}', True),
-  ExampleSchema("""\
-    {"type": "array",
-     "items": {"type": "enum", "name": "Test", "symbols": ["A", "B"]}}
-    """, True),
+  ValidTestSchema({"type": "array", "items": "long"}),
+  ValidTestSchema({"type": "array",
+                   "items": {"type": "enum", "name": "Test", "symbols": ["A", "B"]}}),
 ]
 
 MAP_EXAMPLES = [
-  ExampleSchema('{"type": "map", "values": "long"}', True),
-  ExampleSchema("""\
-    {"type": "map",
-     "values": {"type": "enum", "name": "Test", "symbols": ["A", "B"]}}
-    """, True),
+  ValidTestSchema({"type": "map", "values": "long"}),
+  ValidTestSchema({"type": "map",
+                   "values": {"type": "enum", "name": "Test", "symbols": ["A", "B"]}}),
 ]
 
 UNION_EXAMPLES = [
-  ExampleSchema('["string", "null", "long"]', True),
-  ExampleSchema('["null", "null"]', False),
-  ExampleSchema('["long", "long"]', False),
-  ExampleSchema("""\
-    [{"type": "array", "items": "long"}
-     {"type": "array", "items": "string"}]
-    """, False),
+  ValidTestSchema(["string", "null", "long"]),
+  InvalidTestSchema(["null", "null"]),
+  InvalidTestSchema(["long", "long"]),
+  InvalidTestSchema([{"type": "array", "items": "long"},
+                     {"type": "array", "items": "string"}]),
 ]
 
 RECORD_EXAMPLES = [
-  ExampleSchema("""\
-    {"type": "record",
-     "name": "Test",
-     "fields": [{"name": "f",
-                 "type": "long"}]}
-    """, True),
-  ExampleSchema("""\
-    {"type": "error",
-     "name": "Test",
-     "fields": [{"name": "f",
-                 "type": "long"}]}
-    """, True),
-  ExampleSchema("""\
-    {"type": "record",
-     "name": "Node",
-     "fields": [{"name": "label", "type": "string"},
-                {"name": "children",
-                 "type": {"type": "array", "items": "Node"}}]}
-    """, True),
-  ExampleSchema("""\
-    {"type": "record",
-     "name": "Lisp",
-     "fields": [{"name": "value",
-                 "type": ["null", "string",
-                          {"type": "record",
-                           "name": "Cons",
-                           "fields": [{"name": "car", "type": "Lisp"},
-                                      {"name": "cdr", "type": "Lisp"}]}]}]}
-    """, True),
-  ExampleSchema("""\
-    {"type": "record",
-     "name": "HandshakeRequest",
-     "namespace": "org.apache.avro.ipc",
-     "fields": [{"name": "clientHash",
-                 "type": {"type": "fixed", "name": "MD5", "size": 16}},
-                {"name": "clientProtocol", "type": ["null", "string"]},
-                {"name": "serverHash", "type": "MD5"},
-                {"name": "meta",
-                 "type": ["null", {"type": "map", "values": "bytes"}]}]}
-    """, True),
-  ExampleSchema("""\
-    {"type": "record",
-     "name": "HandshakeResponse",
-     "namespace": "org.apache.avro.ipc",
-     "fields": [{"name": "match",
-                 "type": {"type": "enum",
-                          "name": "HandshakeMatch",
-                          "symbols": ["BOTH", "CLIENT", "NONE"]}},
-                {"name": "serverProtocol", "type": ["null", "string"]},
-                {"name": "serverHash",
-                 "type": ["null",
-                          {"name": "MD5", "size": 16, "type": "fixed"}]},
-                {"name": "meta",
-                 "type": ["null", {"type": "map", "values": "bytes"}]}]}
-    """, True),
-  ExampleSchema("""\
-    {"type": "record",
-     "name": "Interop",
-     "namespace": "org.apache.avro",
-     "fields": [{"name": "intField", "type": "int"},
-                {"name": "longField", "type": "long"},
-                {"name": "stringField", "type": "string"},
-                {"name": "boolField", "type": "boolean"},
-                {"name": "floatField", "type": "float"},
-                {"name": "doubleField", "type": "double"},
-                {"name": "bytesField", "type": "bytes"},
-                {"name": "nullField", "type": "null"},
-                {"name": "arrayField",
-                 "type": {"type": "array", "items": "double"}},
-                {"name": "mapField",
-                 "type": {"type": "map",
-                          "values": {"name": "Foo",
-                                     "type": "record",
-                                     "fields": [{"name": "label",
-                                                 "type": "string"}]}}},
-                {"name": "unionField",
-                 "type": ["boolean",
-                          "double",
-                          {"type": "array", "items": "bytes"}]},
-                {"name": "enumField",
-                 "type": {"type": "enum",
-                          "name": "Kind",
-                          "symbols": ["A", "B", "C"]}},
-                {"name": "fixedField",
-                 "type": {"type": "fixed", "name": "MD5", "size": 16}},
-                {"name": "recordField",
-                 "type": {"type": "record",
-                          "name": "Node",
-                          "fields": [{"name": "label", "type": "string"},
-                                     {"name": "children",
-                                      "type": {"type": "array",
-                                               "items": "Node"}}]}}]}
-    """, True),
-  ExampleSchema("""\
-    {"type": "record",
-     "name": "ipAddr",
-     "fields": [{"name": "addr",
-                 "type": [{"name": "IPv6", "type": "fixed", "size": 16},
-                          {"name": "IPv4", "type": "fixed", "size": 4}]}]}
-    """, True),
-  ExampleSchema("""\
-    {"type": "record",
-     "name": "Address",
-     "fields": [{"type": "string"},
-                {"type": "string", "name": "City"}]}
-    """, False),
-  ExampleSchema("""\
-    {"type": "record",
-     "name": "Event",
-     "fields": [{"name": "Sponsor"},
-                {"name": "City", "type": "string"}]}
-    """, False),
-  ExampleSchema("""\
-    {"type": "record",
-     "fields": "His vision, from the constantly passing bars,"
-     "name", "Rainer"}
-    """, False),
-  ExampleSchema("""\
-    {"name": ["Tom", "Jerry"],
-     "type": "record",
-     "fields": [{"name": "name", "type": "string"}]}
-    """, False),
+  ValidTestSchema({"type": "record", "name": "Test", "fields": [{"name": "f", "type": "long"}]}),
+  ValidTestSchema({"type": "error", "name": "Test", "fields": [{"name": "f", "type": "long"}]}),
+  ValidTestSchema({"type": "record", "name": "Node",
+                   "fields": [
+                     {"name": "label", "type": "string"},
+                     {"name": "children", "type": {"type": "array", "items": "Node"}}]}),
+  ValidTestSchema({"type": "record", "name": "Lisp",
+                   "fields": [{"name": "value",
+                               "type": ["null", "string",
+                                        {"type": "record", "name": "Cons",
+                                         "fields": [{"name": "car", "type": "Lisp"},
+                                                    {"name": "cdr", "type": "Lisp"}]}]}]}),
+  ValidTestSchema({"type": "record", "name": "HandshakeRequest",
+                   "namespace": "org.apache.avro.ipc",
+                   "fields": [{"name": "clientHash",
+                               "type": {"type": "fixed", "name": "MD5", "size": 16}},
+                              {"name": "clientProtocol", "type": ["null", "string"]},
+                              {"name": "serverHash", "type": "MD5"},
+                              {"name": "meta",
+                               "type": ["null", {"type": "map", "values": "bytes"}]}]}),
+  ValidTestSchema({"type": "record", "name": "HandshakeResponse",
+                   "namespace": "org.apache.avro.ipc",
+                   "fields": [{"name": "match",
+                               "type": {"type": "enum", "name": "HandshakeMatch",
+                                        "symbols": ["BOTH", "CLIENT", "NONE"]}},
+                              {"name": "serverProtocol", "type": ["null", "string"]},
+                              {"name": "serverHash",
+                               "type": ["null", {"name": "MD5", "size": 16, "type": "fixed"}]},
+                              {"name": "meta",
+                               "type": ["null", {"type": "map", "values": "bytes"}]}]}),
+  ValidTestSchema({"type": "record",
+                   "name": "Interop",
+                   "namespace": "org.apache.avro",
+                   "fields": [{"name": "intField", "type": "int"},
+                              {"name": "longField", "type": "long"},
+                              {"name": "stringField", "type": "string"},
+                              {"name": "boolField", "type": "boolean"},
+                              {"name": "floatField", "type": "float"},
+                              {"name": "doubleField", "type": "double"},
+                              {"name": "bytesField", "type": "bytes"},
+                              {"name": "nullField", "type": "null"},
+                              {"name": "arrayField", "type": {"type": "array", "items": "double"}},
+                              {"name": "mapField",
+                               "type": {"type": "map",
+                                        "values": {"name": "Foo",
+                                                   "type": "record",
+                                                   "fields": [{"name": "label", "type": "string"}]}}},
+                              {"name": "unionField",
+                               "type": ["boolean", "double", {"type": "array", "items": "bytes"}]},
+                              {"name": "enumField",
+                               "type": {"type": "enum", "name": "Kind", "symbols": ["A", "B", "C"]}},
+                              {"name": "fixedField",
+                               "type": {"type": "fixed", "name": "MD5", "size": 16}},
+                              {"name": "recordField",
+                               "type": {"type": "record", "name": "Node",
+                                        "fields": [{"name": "label", "type": "string"},
+                                                   {"name": "children",
+                                                    "type": {"type": "array",
+                                                             "items": "Node"}}]}}]}),
+  ValidTestSchema({"type": "record", "name": "ipAddr",
+                   "fields": [{"name": "addr", "type": [{"name": "IPv6", "type": "fixed", "size": 16},
+                                                        {"name": "IPv4", "type": "fixed", "size": 4}]}]}),
+  InvalidTestSchema({"type": "record", "name": "Address",
+                     "fields": [{"type": "string"}, {"type": "string", "name": "City"}]}),
+  InvalidTestSchema({"type": "record", "name": "Event",
+                     "fields": [{"name": "Sponsor"}, {"name": "City", "type": "string"}]}),
+  InvalidTestSchema({"type": "record", "name": "Rainer",
+                     "fields": "His vision, from the constantly passing bars"}),
+  InvalidTestSchema({"name": ["Tom", "Jerry"], "type": "record",
+                     "fields": [{"name": "name", "type": "string"}]}),
 ]
 
 DOC_EXAMPLES = [
-  ExampleSchema("""\
-    {"type": "record",
-     "name": "TestDoc",
-     "doc":  "Doc string",
-     "fields": [{"name": "name", "type": "string",
-                 "doc" : "Doc String"}]}
-    """, True),
-  ExampleSchema("""\
-    {"type": "enum", "name": "Test", "symbols": ["A", "B"],
-     "doc": "Doc String"}
-    """, True),
+  ValidTestSchema({"type": "record", "name": "TestDoc", "doc": "Doc string",
+                   "fields": [{"name": "name", "type": "string", "doc" : "Doc String"}]}),
+  ValidTestSchema({"type": "enum", "name": "Test", "symbols": ["A", "B"], "doc": "Doc String"}),
 ]
 
 OTHER_PROP_EXAMPLES = [
-  ExampleSchema("""\
-    {"type": "record",
-     "name": "TestRecord",
-     "cp_string": "string",
-     "cp_int": 1,
-     "cp_array": [ 1, 2, 3, 4],
-     "fields": [ {"name": "f1", "type": "string", "cp_object": {"a":1,"b":2} },
-                 {"name": "f2", "type": "long", "cp_null": null} ]}
-    """, True),
-  ExampleSchema("""\
-     {"type": "map", "values": "long", "cp_boolean": true}
-    """, True),
-  ExampleSchema("""\
-    {"type": "enum",
-     "name": "TestEnum",
-     "symbols": [ "one", "two", "three" ],
-     "cp_float" : 1.0 }
-    """,True),
-  ExampleSchema("""\
-    {"type": "long",
-     "date": "true"}
-    """, True)
+  ValidTestSchema({"type": "record", "name": "TestRecord", "cp_string": "string",
+                   "cp_int": 1, "cp_array": [1, 2, 3, 4],
+                   "fields": [{"name": "f1", "type": "string", "cp_object": {"a": 1,"b": 2}},
+                              {"name": "f2", "type": "long", "cp_null": None}]}),
+  ValidTestSchema({"type": "map", "values": "long", "cp_boolean": True}),
+  ValidTestSchema({"type": "enum", "name": "TestEnum",
+                   "symbols": ["one", "two", "three"], "cp_float": 1.0}),
 ]
 
 DECIMAL_LOGICAL_TYPE = [
-  ExampleSchema("""{
-  "type": "fixed",
-  "logicalType": "decimal",
-  "name": "TestDecimal",
-  "precision": 4,
-  "size": 10,
-  "scale": 2}""", True),
-  ExampleSchema("""{
-  "type": "bytes",
-  "logicalType": "decimal",
-  "precision": 4,
-  "scale": 2}""", True)
+  ValidTestSchema({"type": "fixed", "logicalType": "decimal", "name": "TestDecimal", "precision": 4, "size": 10, "scale": 2}),
+  ValidTestSchema({"type": "bytes", "logicalType": "decimal", "precision": 4, "scale": 2}),
+  InvalidTestSchema({"type": "bytes", "logicalType": "decimal", "precision": 2, "scale": -2}),
+  InvalidTestSchema({"type": "bytes", "logicalType": "decimal", "precision": -2, "scale": 2}),
+  InvalidTestSchema({"type": "bytes", "logicalType": "decimal", "precision": 2, "scale": 3}),
+  InvalidTestSchema({"type": "fixed", "logicalType": "decimal", "name": "TestDecimal", "precision": -10, "scale": 2, "size": 5}),
+  InvalidTestSchema({"type": "fixed", "logicalType": "decimal", "name": "TestDecimal", "precision": 2, "scale": 3, "size": 2}),
+  InvalidTestSchema({"type": "fixed", "logicalType": "decimal", "name": "TestDecimal", "precision": 2, "scale": 2, "size": -2}),
 ]
 
 DATE_LOGICAL_TYPE = [
-  ExampleSchema("""{
-  "type": "int",
-  "logicalType": "date"} """, True),
-  ExampleSchema("""{
-  "type": "int",
-  "logicalType": "date1"} """, False),
-  ExampleSchema("""{
-  "type": "long",
-  "logicalType": "date"} """, False),
+  ValidTestSchema({"type": "int", "logicalType": "date"}),
+  InvalidTestSchema({"type": "int", "logicalType": "date1"}),
+  InvalidTestSchema({"type": "long", "logicalType": "date"}),
 ]
 
 TIMEMILLIS_LOGICAL_TYPE = [
-  ExampleSchema("""{
-  "type": "int",
-  "logicalType": "time-millis"} """, True),
-  ExampleSchema("""{
-  "type": "int",
-  "logicalType": "time-milis"} """, False),
-  ExampleSchema("""{
-  "type": "long",
-  "logicalType": "time-millis"} """, False),
+  ValidTestSchema({"type": "int", "logicalType": "time-millis"}),
+  InvalidTestSchema({"type": "int", "logicalType": "time-milis"}),
+  InvalidTestSchema({"type": "long", "logicalType": "time-millis"}),
 ]
 
 TIMEMICROS_LOGICAL_TYPE = [
-  ExampleSchema("""{
-  "type": "long",
-  "logicalType": "time-micros"} """, True),
-  ExampleSchema("""{
-  "type": "long",
-  "logicalType": "time-micro"} """, False),
-  ExampleSchema("""{
-  "type": "int",
-  "logicalType": "time-micros"} """, False),
+  ValidTestSchema({"type": "long", "logicalType": "time-micros"}),
+  InvalidTestSchema({"type": "long", "logicalType": "time-micro"}),
+  InvalidTestSchema({"type": "int", "logicalType": "time-micros"}),
 ]
 
 TIMESTAMPMILLIS_LOGICAL_TYPE = [
-  ExampleSchema("""{
-  "type": "long",
-  "logicalType": "timestamp-millis"} """, True),
-  ExampleSchema("""{
-  "type": "long",
-  "logicalType": "timestamp-milis"} """, False),
-  ExampleSchema("""{
-  "type": "int",
-  "logicalType": "timestamp-millis"} """, False),
+  ValidTestSchema({"type": "long", "logicalType": "timestamp-millis"}),
+  InvalidTestSchema({"type": "long", "logicalType": "timestamp-milis"}),
+  InvalidTestSchema({"type": "int", "logicalType": "timestamp-millis"}),
 ]
 
 TIMESTAMPMICROS_LOGICAL_TYPE = [
-  ExampleSchema("""{
-  "type": "long",
-  "logicalType": "timestamp-micros"} """, True),
-  ExampleSchema("""{
-  "type": "long",
-  "logicalType": "timestamp-micro"} """, False),
-  ExampleSchema("""{
-  "type": "int",
-  "logicalType": "timestamp-micros"} """, False),
+  ValidTestSchema({"type": "long", "logicalType": "timestamp-micros"}),
+  InvalidTestSchema({"type": "long", "logicalType": "timestamp-micro"}),
+  InvalidTestSchema({"type": "int", "logicalType": "timestamp-micros"}),
 ]
 
-
 EXAMPLES = PRIMITIVE_EXAMPLES
 EXAMPLES += FIXED_EXAMPLES
 EXAMPLES += ENUM_EXAMPLES
@@ -394,81 +247,22 @@ EXAMPLES += TIMESTAMPMILLIS_LOGICAL_TYPE
 EXAMPLES += TIMESTAMPMICROS_LOGICAL_TYPE
 
 VALID_EXAMPLES = [e for e in EXAMPLES if e.valid]
+INVALID_EXAMPLES = [e for e in EXAMPLES if not e.valid]
 
-# TODO(hammer): refactor into harness for examples
-# TODO(hammer): pretty-print detailed output
-# TODO(hammer): make verbose flag
-# TODO(hammer): show strack trace to user
-# TODO(hammer): use logging module?
 class TestSchema(unittest.TestCase):
+  """Miscellaneous tests for schema"""
 
   def test_correct_recursive_extraction(self):
+    """A recursive reference within a schema should be the same type every time."""
     s = schema.parse('{"type": "record", "name": "X", "fields": [{"name": "y", "type": {"type": "record", "name": "Y", "fields": [{"name": "Z", "type": "X"}]}}]}')
     t = schema.parse(str(s.fields[0].type))
     # If we've made it this far, the subschema was reasonably stringified; it ccould be reparsed.
     self.assertEqual("X", t.fields[0].type.name)
 
-  def test_parse(self):
-    correct = 0
-    for example in EXAMPLES:
-      try:
-        schema.parse(example.schema_string)
-        if example.valid:
-          correct += 1
-        else:
-          self.fail("Invalid schema was parsed: " + example.schema_string)
-      except:
-        if not example.valid:
-          correct += 1
-        else:
-          self.fail("Valid schema failed to parse: " + example.schema_string)
-
-    fail_msg = "Parse behavior correct on %d out of %d schemas." % \
-      (correct, len(EXAMPLES))
-    self.assertEqual(correct, len(EXAMPLES), fail_msg)
-
-  def test_valid_cast_to_string_after_parse(self):
-    """
-    Test that the string generated by an Avro Schema object
-    is, in fact, a valid Avro schema.
-    """
-    print_test_name('TEST CAST TO STRING AFTER PARSE')
-    correct = 0
-    for example in VALID_EXAMPLES:
-      schema_data = schema.parse(example.schema_string)
-      schema.parse(str(schema_data))
-      correct += 1
-
-    fail_msg = "Cast to string success on %d out of %d schemas" % \
-      (correct, len(VALID_EXAMPLES))
-    self.assertEqual(correct, len(VALID_EXAMPLES), fail_msg)
-
-  def test_equivalence_after_round_trip(self):
-    """
-    1. Given a string, parse it to get Avro schema "original".
-    2. Serialize "original" to a string and parse that string
-         to generate Avro schema "round trip".
-    3. Ensure "original" and "round trip" schemas are equivalent.
-    """
-    print_test_name('TEST ROUND TRIP')
-    correct = 0
-    for example in VALID_EXAMPLES:
-      original_schema = schema.parse(example.schema_string)
-      round_trip_schema = schema.parse(str(original_schema))
-      if original_schema == round_trip_schema:
-        correct += 1
-        debug_msg = "%s: ROUND TRIP SUCCESS" % example.name
-      else:
-        debug_msg = "%s: ROUND TRIP FAILURE" % example.name
-        self.fail("Round trip failure: %s, %s, %s" % (example.name, original_schema, str(original_schema)))
-
-    fail_msg = "Round trip success on %d out of %d schemas" % \
-      (correct, len(VALID_EXAMPLES))
-    self.assertEqual(correct, len(VALID_EXAMPLES), fail_msg)
-
   # TODO(hammer): more tests
   def test_fullname(self):
-    """
+    """Test schema full names
+
     The fullname is determined in one of the following ways:
      * A name and namespace are both specified.  For example,
        one might use "name": "X", "namespace": "org.foo"
@@ -495,7 +289,6 @@ class TestSchema(unittest.TestCase):
     multiple definitions of a fullname if the definitions are
     equivalent.
     """
-    print_test_name('TEST FULLNAME')
 
     # name and namespace specified
     fullname = schema.Name('a', 'o.a.h', None).fullname
@@ -521,54 +314,8 @@ class TestSchema(unittest.TestCase):
     fullname = schema.Name('a', 'o.a.a', 'o.a.h').fullname
     self.assertEqual(fullname, 'o.a.a.a')
 
-  def test_doc_attributes(self):
-    print_test_name('TEST DOC ATTRIBUTES')
-    correct = 0
-    for example in DOC_EXAMPLES:
-      original_schema = schema.parse(example.schema_string)
-      if original_schema.doc is not None:
-        correct += 1
-      if original_schema.type == 'record':
-        for f in original_schema.fields:
-          if f.doc is None:
-            self.fail("Failed to preserve 'doc' in fields: " + example.schema_string)
-    self.assertEqual(correct,len(DOC_EXAMPLES))
-
-  def test_other_attributes(self):
-    print_test_name('TEST OTHER ATTRIBUTES')
-    correct = 0
-    props = {}
-    for example in OTHER_PROP_EXAMPLES:
-      original_schema = schema.parse(example.schema_string)
-      round_trip_schema = schema.parse(str(original_schema))
-      self.assertEqual(original_schema.other_props,round_trip_schema.other_props)
-      if original_schema.type == "record":
-        field_props = 0
-        for f in original_schema.fields:
-          if f.other_props:
-            props.update(f.other_props)
-            field_props += 1
-        self.assertEqual(field_props,len(original_schema.fields))
-      if original_schema.other_props:
-        props.update(original_schema.other_props)
-        correct += 1
-    for k in props:
-      v = props[k]
-      if k == "cp_boolean":
-        self.assertEqual(type(v), bool)
-      elif k == "cp_int":
-        self.assertEqual(type(v), int)
-      elif k == "cp_object":
-        self.assertEqual(type(v), dict)
-      elif k == "cp_float":
-        self.assertEqual(type(v), float)
-      elif k == "cp_array":
-        self.assertEqual(type(v), list)
-    self.assertEqual(correct,len(OTHER_PROP_EXAMPLES))
-
   def test_exception_is_not_swallowed_on_parse_error(self):
-    print_test_name('TEST EXCEPTION NOT SWALLOWED ON PARSE ERROR')
-
+    """A specific exception message should appear on a json parse error."""
     try:
         schema.parse('/not/a/real/file')
         caught_exception = False
@@ -580,78 +327,145 @@ class TestSchema(unittest.TestCase):
 
     self.assertTrue(caught_exception, 'Exception was not caught')
 
-  def test_decimal_invalid_schema(self):
-    invalid_schemas = [
-      ExampleSchema("""{
-      "type": "bytes",
-      "logicalType": "decimal",
-      "precision": 2,
-      "scale": -2}""", True),
-
-      ExampleSchema("""{
-      "type": "bytes",
-      "logicalType": "decimal",
-      "precision": -2,
-      "scale": 2}""", True),
-
-      ExampleSchema("""{
-      "type": "bytes",
-      "logicalType": "decimal",
-      "precision": 2,
-      "scale": 3}""", True),
-
-      ExampleSchema("""{
+  def test_decimal_valid_type(self):
+    fixed_decimal_schema = ValidTestSchema({
       "type": "fixed",
       "logicalType": "decimal",
       "name": "TestDecimal",
-      "precision": -10,
+      "precision": 4,
       "scale": 2,
-      "size": 5}""", True),
-
+      "size": 2})
 
-      ExampleSchema("""{
-      "type": "fixed",
+    bytes_decimal_schema = ValidTestSchema({
+      "type": "bytes",
       "logicalType": "decimal",
-      "name": "TestDecimal",
-      "precision": 2,
-      "scale": 3,
-      "size": 2}""", True)
-    ]
-
-    for invalid_schema in invalid_schemas:
-      self.assertRaises(SchemaParseException, schema.parse, invalid_schema.schema_string)
-
-    fixed_invalid_schema_size = ExampleSchema("""{
-                                "type": "fixed",
-                                "logicalType": "decimal",
-                                "name": "TestDecimal",
-                                "precision": 2,
-                                "scale": 2,
-                                "size": -2}""", True)
-    self.assertRaises(AvroException, schema.parse, fixed_invalid_schema_size.schema_string)
+      "precision": 4})
 
-  def test_decimal_valid_type(self):
-    fixed_decimal_schema = ExampleSchema("""{
-    "type": "fixed",
-    "logicalType": "decimal",
-    "name": "TestDecimal",
-    "precision": 4,
-    "scale": 2,
-    "size": 2}""", True)
-
-    bytes_decimal_schema = ExampleSchema("""{
-    "type": "bytes",
-    "logicalType": "decimal",
-    "precision": 4}""", True)
-
-    fixed_decimal = schema.parse(fixed_decimal_schema.schema_string)
+    fixed_decimal = fixed_decimal_schema.parse()
     self.assertEqual(4, fixed_decimal.get_prop('precision'))
     self.assertEqual(2, fixed_decimal.get_prop('scale'))
     self.assertEqual(2, fixed_decimal.get_prop('size'))
 
-    bytes_decimal = schema.parse(bytes_decimal_schema.schema_string)
+    bytes_decimal = bytes_decimal_schema.parse()
     self.assertEqual(4, bytes_decimal.get_prop('precision'))
     self.assertEqual(0, bytes_decimal.get_prop('scale'))
 
+class SchemaParseTestCase(unittest.TestCase):
+  """Enable generating parse test cases over all the valid and invalid example schema."""
+
+  def __init__(self, test_schema):
+    """Ignore the normal signature for unittest.TestCase because we are generating
+    many test cases from this one class. This is safe as long as the autoloader
+    ignores this class. The autoloader will ignore this class as long as it has
+    no methods starting with `test_`.
+    """
+    super(SchemaParseTestCase, self).__init__(
+        'parse_valid' if test_schema.valid else 'parse_invalid')
+    self.test_schema = test_schema
+
+  def parse_valid(self):
+    """Parsing a valid schema should not error."""
+    try:
+      self.test_schema.parse()
+    except (schema.AvroException, schema.SchemaParseException):
+      self.fail("Valid schema failed to parse: {!s}".format(self.test_schema))
+
+  def parse_invalid(self):
+    """Parsing an invalid schema should error."""
+    try:
+      self.test_schema.parse()
+    except (schema.AvroException, schema.SchemaParseException):
+      pass
+    else:
+      self.fail("Invalid schema should not have parsed: {!s}".format(self.test_schema))
+
+class RoundTripParseTestCase(unittest.TestCase):
+  """Enable generating round-trip parse test cases over all the valid test schema."""
+
+  def __init__(self, test_schema):
+    """Ignore the normal signature for unittest.TestCase because we are generating
+    many test cases from this one class. This is safe as long as the autoloader
+    ignores this class. The autoloader will ignore this class as long as it has
+    no methods starting with `test_`.
+    """
+    super(RoundTripParseTestCase, self).__init__('parse_round_trip')
+    self.test_schema = test_schema
+
+  def parse_round_trip(self):
+    """The string of a Schema should be parseable to the same Schema."""
+    parsed = self.test_schema.parse()
+    round_trip = schema.parse(str(parsed))
+    self.assertEqual(parsed, round_trip)
+
+class DocAttributesTestCase(unittest.TestCase):
+  """Enable generating document attribute test cases over all the document test schema."""
+
+  def __init__(self, test_schema):
+    """Ignore the normal signature for unittest.TestCase because we are generating
+    many test cases from this one class. This is safe as long as the autoloader
+    ignores this class. The autoloader will ignore this class as long as it has
+    no methods starting with `test_`.
+    """
+    super(DocAttributesTestCase, self).__init__('check_doc_attributes')
+    self.test_schema = test_schema
+
+  def check_doc_attributes(self):
+    """Documentation attributes should be preserved."""
+    sch = self.test_schema.parse()
+    self.assertIsNotNone(sch.doc, "Failed to preserve 'doc' in schema: {!s}".format(self.test_schema))
+    if sch.type == 'record':
+      for f in sch.fields:
+        self.assertIsNotNone(f.doc, "Failed to preserve 'doc' in fields: {!s}".format(self.test_schema))
+
+
+class OtherAttributesTestCase(unittest.TestCase):
+  """Enable generating attribute test cases over all the other-prop test schema."""
+  _type_map = {
+    "cp_array": list,
+    "cp_boolean": bool,
+    "cp_float": float,
+    "cp_int": int,
+    "cp_null": type(None),
+    "cp_object": dict,
+    "cp_string": basestring,
+  }
+
+  def __init__(self, test_schema):
+    """Ignore the normal signature for unittest.TestCase because we are generating
+    many test cases from this one class. This is safe as long as the autoloader
+    ignores this class. The autoloader will ignore this class as long as it has
+    no methods starting with `test_`.
+    """
+    super(OtherAttributesTestCase, self).__init__('check_attributes')
+    self.test_schema = test_schema
+
+  def _check_props(self, props):
+    for k, v in props.items():
+      self.assertIsInstance(v, self._type_map[k])
+
+  def check_attributes(self):
+    """Other attributes and their types on a schema should be preserved."""
+    sch = self.test_schema.parse()
+    round_trip = schema.parse(str(sch))
+    self.assertEqual(sch.other_props, round_trip.other_props,
+                     "Properties were not preserved in a round-trip parse.")
+    self._check_props(sch.other_props)
+    if sch.type == "record":
+      field_props = [f.other_props for f in sch.fields if f.other_props]
+      self.assertEqual(len(field_props), len(sch.fields))
+      for p in field_props:
+        self._check_props(p)
+
+
+def load_tests(loader, default_tests, pattern):
+  """Generate test cases across many test schema."""
+  suite = unittest.TestSuite()
+  suite.addTests(loader.loadTestsFromTestCase(TestSchema))
+  suite.addTests(SchemaParseTestCase(ex) for ex in EXAMPLES)
+  suite.addTests(RoundTripParseTestCase(ex) for ex in VALID_EXAMPLES)
+  suite.addTests(DocAttributesTestCase(ex) for ex in DOC_EXAMPLES)
+  suite.addTests(OtherAttributesTestCase(ex) for ex in OTHER_PROP_EXAMPLES)
+  return suite
+
 if __name__ == '__main__':
   unittest.main()