You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@avro.apache.org by te...@apache.org on 2012/03/16 18:07:39 UTC
svn commit: r1301652 - in /avro/trunk/lang/py: src/avro/schema.py
test/test_schema.py
Author: tebeka
Date: Fri Mar 16 17:07:38 2012
New Revision: 1301652
URL: http://svn.apache.org/viewvc?rev=1301652&view=rev
Log:
AVRO-300. Python: Support "doc" field in schemas
AVRO-301. Python: Handle non-reserved properties appropriately
Contributed by Marcio Silva
Modified:
avro/trunk/lang/py/src/avro/schema.py
avro/trunk/lang/py/test/test_schema.py
Modified: avro/trunk/lang/py/src/avro/schema.py
URL: http://svn.apache.org/viewvc/avro/trunk/lang/py/src/avro/schema.py?rev=1301652&r1=1301651&r2=1301652&view=diff
==============================================================================
--- avro/trunk/lang/py/src/avro/schema.py (original)
+++ avro/trunk/lang/py/src/avro/schema.py Fri Mar 16 17:07:38 2012
@@ -68,7 +68,7 @@ VALID_TYPES = PRIMITIVE_TYPES + NAMED_TY
'error_union'
)
-RESERVED_PROPS = (
+SCHEMA_RESERVED_PROPS = (
'type',
'name',
'namespace',
@@ -77,6 +77,15 @@ RESERVED_PROPS = (
'size', # Fixed
'symbols', # Enum
'values', # Map
+ 'doc',
+)
+
+FIELD_RESERVED_PROPS = (
+ 'default',
+ 'name',
+ 'doc',
+ 'order',
+ 'type',
)
VALID_FIELD_SORT_ORDERS = (
@@ -101,7 +110,7 @@ class SchemaParseException(AvroException
class Schema(object):
"""Base class for all Schema classes."""
- def __init__(self, type):
+ def __init__(self, type, other_props=None):
# Ensure valid ctor args
if not isinstance(type, basestring):
fail_msg = 'Schema type must be a string.'
@@ -114,11 +123,16 @@ class Schema(object):
if not hasattr(self, '_props'): self._props = {}
self.set_prop('type', type)
self.type = type
+ self._props.update(other_props or {})
# Read-only properties dict. Printing schemas
# creates JSON properties directly from this dict.
props = property(lambda self: self._props)
+ # Read-only property dict. Non-reserved properties
+ other_props = property(lambda self: get_other_props(self._props, SCHEMA_RESERVED_PROPS),
+ doc="dictionary of non-reserved properties")
+
# utility functions to manipulate properties dict
def get_prop(self, key):
return self._props.get(key)
@@ -245,7 +259,7 @@ class Names(object):
class NamedSchema(Schema):
"""Named Schemas specified in NAMED_TYPES."""
- def __init__(self, type, name, namespace=None, names=None):
+ def __init__(self, type, name, namespace=None, names=None, other_props=None):
# Ensure valid ctor args
if not name:
fail_msg = 'Named Schemas must have a non-empty name.'
@@ -258,7 +272,7 @@ class NamedSchema(Schema):
raise SchemaParseException(fail_msg)
# Call parent ctor
- Schema.__init__(self, type)
+ Schema.__init__(self, type, other_props)
# Add class members
new_name = names.add_name(name, namespace, self)
@@ -283,7 +297,8 @@ class NamedSchema(Schema):
fullname = property(lambda self: self._fullname)
class Field(object):
- def __init__(self, type, name, has_default, default=None, order=None, names=None):
+ def __init__(self, type, name, has_default, default=None,
+ order=None,names=None, doc=None, other_props=None):
# Ensure valid ctor args
if not name:
fail_msg = 'Fields must have a non-empty name.'
@@ -298,6 +313,7 @@ class Field(object):
# add members
self._props = {}
self._has_default = has_default
+ self._props.update(other_props or {})
if (isinstance(type, basestring) and names is not None
and names.has_name(type, None)):
@@ -315,14 +331,20 @@ class Field(object):
# TODO(hammer): check to ensure default is valid
if has_default: self.set_prop('default', default)
if order is not None: self.set_prop('order', order)
+ if doc is not None: self.set_prop('doc', doc)
# read-only properties
default = property(lambda self: self.get_prop('default'))
has_default = property(lambda self: self._has_default)
order = property(lambda self: self.get_prop('order'))
+ doc = property(lambda self: self.get_prop('doc'))
props = property(lambda self: self._props)
- # utility functions to manipulate properties dict
+ # Read-only property dict. Non-reserved properties
+ other_props = property(lambda self: get_other_props(self._props, FIELD_RESERVED_PROPS),
+ doc="dictionary of non-reserved properties")
+
+# utility functions to manipulate properties dict
def get_prop(self, key):
return self._props.get(key)
def set_prop(self, key, value):
@@ -366,14 +388,14 @@ class PrimitiveSchema(Schema):
#
class FixedSchema(NamedSchema):
- def __init__(self, name, namespace, size, names=None):
+ def __init__(self, name, namespace, size, names=None, other_props=None):
# Ensure valid ctor args
if not isinstance(size, int):
fail_msg = 'Fixed Schema requires a valid integer for size property.'
raise AvroException(fail_msg)
# Call parent ctor
- NamedSchema.__init__(self, 'fixed', name, namespace, names)
+ NamedSchema.__init__(self, 'fixed', name, namespace, names, other_props)
# Add class members
self.set_prop('size', size)
@@ -392,7 +414,7 @@ class FixedSchema(NamedSchema):
return self.props == that.props
class EnumSchema(NamedSchema):
- def __init__(self, name, namespace, symbols, names=None):
+ def __init__(self, name, namespace, symbols, names=None, doc=None, other_props=None):
# Ensure valid ctor args
if not isinstance(symbols, list):
fail_msg = 'Enum Schema requires a JSON array for the symbols property.'
@@ -405,13 +427,15 @@ class EnumSchema(NamedSchema):
raise AvroException(fail_msg)
# Call parent ctor
- NamedSchema.__init__(self, 'enum', name, namespace, names)
+ NamedSchema.__init__(self, 'enum', name, namespace, names, other_props)
# Add class members
self.set_prop('symbols', symbols)
+ if doc is not None: self.set_prop('doc', doc)
# read-only properties
symbols = property(lambda self: self.get_prop('symbols'))
+ doc = property(lambda self: self.get_prop('doc'))
def to_json(self, names):
if self.fullname in names.names:
@@ -428,9 +452,9 @@ class EnumSchema(NamedSchema):
#
class ArraySchema(Schema):
- def __init__(self, items, names=None):
+ def __init__(self, items, names=None, other_props=None):
# Call parent ctor
- Schema.__init__(self, 'array')
+ Schema.__init__(self, 'array', other_props)
# Add class members
if isinstance(items, basestring) and names.has_name(items, None):
@@ -458,9 +482,9 @@ class ArraySchema(Schema):
return to_cmp == json.loads(str(that))
class MapSchema(Schema):
- def __init__(self, values, names=None):
+ def __init__(self, values, names=None, other_props=None):
# Call parent ctor
- Schema.__init__(self, 'map')
+ Schema.__init__(self, 'map',other_props)
# Add class members
if isinstance(values, basestring) and names.has_name(values, None):
@@ -564,7 +588,10 @@ class RecordSchema(NamedSchema):
default = field.get('default')
order = field.get('order')
- new_field = Field(type, name, has_default, default, order, names)
+ doc = field.get('doc')
+ other_props = get_other_props(field, FIELD_RESERVED_PROPS)
+ new_field = Field(type, name, has_default, default, order, names, doc,
+ other_props)
# make sure field name has not been used yet
if new_field.name in field_names:
fail_msg = 'Field name %s already in use.' % new_field.name
@@ -575,7 +602,8 @@ class RecordSchema(NamedSchema):
field_objects.append(new_field)
return field_objects
- def __init__(self, name, namespace, fields, names=None, schema_type='record'):
+ def __init__(self, name, namespace, fields, names=None, schema_type='record',
+ doc=None, other_props=None):
# Ensure valid ctor args
if fields is None:
fail_msg = 'Record schema requires a non-empty fields property.'
@@ -586,9 +614,10 @@ class RecordSchema(NamedSchema):
# Call parent ctor (adds own name to namespace, too)
if schema_type == 'request':
- Schema.__init__(self, schema_type)
+ Schema.__init__(self, schema_type, other_props)
else:
- NamedSchema.__init__(self, schema_type, name, namespace, names)
+ NamedSchema.__init__(self, schema_type, name, namespace, names,
+ other_props)
if schema_type == 'record':
old_default = names.default_namespace
@@ -598,12 +627,14 @@ class RecordSchema(NamedSchema):
# Add class members
field_objects = RecordSchema.make_field_objects(fields, names)
self.set_prop('fields', field_objects)
-
+ if doc is not None: self.set_prop('doc', doc)
+
if schema_type == 'record':
names.default_namespace = old_default
# read-only properties
fields = property(lambda self: self.get_prop('fields'))
+ doc = property(lambda self: self.get_prop('doc'))
@property
def fields_dict(self):
@@ -633,8 +664,16 @@ class RecordSchema(NamedSchema):
#
# Module Methods
#
+def get_other_props(all_props,reserved_props):
+ """
+ Retrieve the non-reserved properties from a dictionary of properties
+ @args reserved_props: The set of reserved properties to exclude
+ """
+ if hasattr(all_props, 'items') and callable(all_props.items):
+ return dict([(k,v) for (k,v) in all_props.items() if k not in
+ reserved_props ])
+
-# TODO(hammer): handle non-reserved properties
def make_avsc_object(json_data, names=None):
"""
Build Avro Schema from data parsed out of JSON string.
@@ -647,6 +686,7 @@ def make_avsc_object(json_data, names=No
# JSON object (non-union)
if hasattr(json_data, 'get') and callable(json_data.get):
type = json_data.get('type')
+ other_props = get_other_props(json_data, SCHEMA_RESERVED_PROPS)
if type in PRIMITIVE_TYPES:
return PrimitiveSchema(type)
elif type in NAMED_TYPES:
@@ -654,22 +694,24 @@ def make_avsc_object(json_data, names=No
namespace = json_data.get('namespace')
if type == 'fixed':
size = json_data.get('size')
- return FixedSchema(name, namespace, size, names)
+ return FixedSchema(name, namespace, size, names, other_props)
elif type == 'enum':
symbols = json_data.get('symbols')
- return EnumSchema(name, namespace, symbols, names)
+ doc = json_data.get('doc')
+ return EnumSchema(name, namespace, symbols, names, doc, other_props)
elif type in ['record', 'error']:
fields = json_data.get('fields')
- return RecordSchema(name, namespace, fields, names, type)
+ doc = json_data.get('doc')
+ return RecordSchema(name, namespace, fields, names, type, doc, other_props)
else:
raise SchemaParseException('Unknown Named Type: %s' % type)
elif type in VALID_TYPES:
if type == 'array':
items = json_data.get('items')
- return ArraySchema(items, names)
+ return ArraySchema(items, names, other_props)
elif type == 'map':
values = json_data.get('values')
- return MapSchema(values, names)
+ return MapSchema(values, names, other_props)
elif type == 'error_union':
declared_errors = json_data.get('declared_errors')
return ErrorUnionSchema(declared_errors, names)
Modified: avro/trunk/lang/py/test/test_schema.py
URL: http://svn.apache.org/viewvc/avro/trunk/lang/py/test/test_schema.py?rev=1301652&r1=1301651&r2=1301652&view=diff
==============================================================================
--- avro/trunk/lang/py/test/test_schema.py (original)
+++ avro/trunk/lang/py/test/test_schema.py Fri Mar 16 17:07:38 2012
@@ -254,6 +254,41 @@ RECORD_EXAMPLES = [
""", False),
]
+DOC_EXAMPLES = [
+ ExampleSchema("""\
+ {"type": "record",
+ "name": "TestDoc",
+ "doc": "Doc string",
+ "fields": [{"name": "name", "type": "string",
+ "doc" : "Doc String"}]}
+ """, True),
+ ExampleSchema("""\
+ {"type": "enum", "name": "Test", "symbols": ["A", "B"],
+ "doc": "Doc String"}
+ """, True),
+]
+
+OTHER_PROP_EXAMPLES = [
+ ExampleSchema("""\
+ {"type": "record",
+ "name": "TestRecord",
+ "cp_string": "string",
+ "cp_int": 1,
+ "cp_array": [ 1, 2, 3, 4],
+ "fields": [ {"name": "f1", "type": "string", "cp_object": {"a":1,"b":2} },
+ {"name": "f2", "type": "long", "cp_null": null} ]}
+ """, True),
+ ExampleSchema("""\
+ {"type": "map", "values": "long", "cp_boolean": true}
+ """, True),
+ ExampleSchema("""\
+ {"type": "enum",
+ "name": "TestEnum",
+ "symbols": [ "one", "two", "three" ],
+ "cp_float" : 1.0 }
+ """,True),
+]
+
EXAMPLES = PRIMITIVE_EXAMPLES
EXAMPLES += FIXED_EXAMPLES
EXAMPLES += ENUM_EXAMPLES
@@ -261,6 +296,7 @@ EXAMPLES += ARRAY_EXAMPLES
EXAMPLES += MAP_EXAMPLES
EXAMPLES += UNION_EXAMPLES
EXAMPLES += RECORD_EXAMPLES
+EXAMPLES += DOC_EXAMPLES
VALID_EXAMPLES = [e for e in EXAMPLES if e.valid]
@@ -390,5 +426,50 @@ class TestSchema(unittest.TestCase):
fullname = schema.Name('a', 'o.a.a', 'o.a.h').fullname
self.assertEqual(fullname, 'o.a.a.a')
+ def test_doc_attributes(self):
+ print_test_name('TEST DOC ATTRIBUTES')
+ correct = 0
+ for example in DOC_EXAMPLES:
+ original_schema = schema.parse(example.schema_string)
+ if original_schema.doc is not None:
+ correct += 1
+ if original_schema.type == 'record':
+ for f in original_schema.fields:
+ if f.doc is None:
+ self.fail("Failed to preserve 'doc' in fields: " + example.schema_string)
+ self.assertEqual(correct,len(DOC_EXAMPLES))
+
+ def test_other_attributes(self):
+ print_test_name('TEST OTHER ATTRIBUTES')
+ correct = 0
+ props = {}
+ for example in OTHER_PROP_EXAMPLES:
+ original_schema = schema.parse(example.schema_string)
+ round_trip_schema = schema.parse(str(original_schema))
+ self.assertEqual(original_schema.other_props,round_trip_schema.other_props)
+ if original_schema.type == "record":
+ field_props = 0
+ for f in original_schema.fields:
+ if f.other_props:
+ props.update(f.other_props)
+ field_props += 1
+ self.assertEqual(field_props,len(original_schema.fields))
+ if original_schema.other_props:
+ props.update(original_schema.other_props)
+ correct += 1
+ for k in props:
+ v = props[k]
+ if k == "cp_boolean":
+ self.assertEqual(type(v), bool)
+ elif k == "cp_int":
+ self.assertEqual(type(v), int)
+ elif k == "cp_object":
+ self.assertEqual(type(v), dict)
+ elif k == "cp_float":
+ self.assertEqual(type(v), float)
+ elif k == "cp_array":
+ self.assertEqual(type(v), list)
+ self.assertEqual(correct,len(OTHER_PROP_EXAMPLES))
+
if __name__ == '__main__':
unittest.main()