You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@avro.apache.org by te...@apache.org on 2012/03/16 18:07:39 UTC

svn commit: r1301652 - in /avro/trunk/lang/py: src/avro/schema.py test/test_schema.py

Author: tebeka
Date: Fri Mar 16 17:07:38 2012
New Revision: 1301652

URL: http://svn.apache.org/viewvc?rev=1301652&view=rev
Log:
AVRO-300. Python: Support "doc" field in schemas
AVRO-301. Python: Handle non-reserved properties appropriately

Contributed by Marcio Silva


Modified:
    avro/trunk/lang/py/src/avro/schema.py
    avro/trunk/lang/py/test/test_schema.py

Modified: avro/trunk/lang/py/src/avro/schema.py
URL: http://svn.apache.org/viewvc/avro/trunk/lang/py/src/avro/schema.py?rev=1301652&r1=1301651&r2=1301652&view=diff
==============================================================================
--- avro/trunk/lang/py/src/avro/schema.py (original)
+++ avro/trunk/lang/py/src/avro/schema.py Fri Mar 16 17:07:38 2012
@@ -68,7 +68,7 @@ VALID_TYPES = PRIMITIVE_TYPES + NAMED_TY
   'error_union'
 )
 
-RESERVED_PROPS = (
+SCHEMA_RESERVED_PROPS = (
   'type',
   'name',
   'namespace',
@@ -77,6 +77,15 @@ RESERVED_PROPS = (
   'size',       # Fixed
   'symbols',    # Enum
   'values',     # Map
+  'doc',
+)
+
+FIELD_RESERVED_PROPS = (
+  'default',
+  'name',
+  'doc',
+  'order',
+  'type',
 )
 
 VALID_FIELD_SORT_ORDERS = (
@@ -101,7 +110,7 @@ class SchemaParseException(AvroException
 
 class Schema(object):
   """Base class for all Schema classes."""
-  def __init__(self, type):
+  def __init__(self, type, other_props=None):
     # Ensure valid ctor args
     if not isinstance(type, basestring):
       fail_msg = 'Schema type must be a string.'
@@ -114,11 +123,16 @@ class Schema(object):
     if not hasattr(self, '_props'): self._props = {}
     self.set_prop('type', type)
     self.type = type
+    self._props.update(other_props or {})
 
   # Read-only properties dict. Printing schemas
   # creates JSON properties directly from this dict. 
   props = property(lambda self: self._props)
 
+  # Read-only property dict. Non-reserved properties
+  other_props = property(lambda self: get_other_props(self._props, SCHEMA_RESERVED_PROPS),
+                         doc="dictionary of non-reserved properties")
+
   # utility functions to manipulate properties dict
   def get_prop(self, key):
     return self._props.get(key)
@@ -245,7 +259,7 @@ class Names(object):
 
 class NamedSchema(Schema):
   """Named Schemas specified in NAMED_TYPES."""
-  def __init__(self, type, name, namespace=None, names=None):
+  def __init__(self, type, name, namespace=None, names=None, other_props=None):
     # Ensure valid ctor args
     if not name:
       fail_msg = 'Named Schemas must have a non-empty name.'
@@ -258,7 +272,7 @@ class NamedSchema(Schema):
       raise SchemaParseException(fail_msg)
 
     # Call parent ctor
-    Schema.__init__(self, type)
+    Schema.__init__(self, type, other_props)
 
     # Add class members
     new_name = names.add_name(name, namespace, self)
@@ -283,7 +297,8 @@ class NamedSchema(Schema):
   fullname = property(lambda self: self._fullname)
 
 class Field(object):
-  def __init__(self, type, name, has_default, default=None, order=None, names=None):
+  def __init__(self, type, name, has_default, default=None,
+               order=None,names=None, doc=None, other_props=None):
     # Ensure valid ctor args
     if not name:
       fail_msg = 'Fields must have a non-empty name.'
@@ -298,6 +313,7 @@ class Field(object):
     # add members
     self._props = {}
     self._has_default = has_default
+    self._props.update(other_props or {})
 
     if (isinstance(type, basestring) and names is not None
         and names.has_name(type, None)):
@@ -315,14 +331,20 @@ class Field(object):
     # TODO(hammer): check to ensure default is valid
     if has_default: self.set_prop('default', default)
     if order is not None: self.set_prop('order', order)
+    if doc is not None: self.set_prop('doc', doc)
 
   # read-only properties
   default = property(lambda self: self.get_prop('default'))
   has_default = property(lambda self: self._has_default)
   order = property(lambda self: self.get_prop('order'))
+  doc = property(lambda self: self.get_prop('doc'))
   props = property(lambda self: self._props)
 
-  # utility functions to manipulate properties dict
+  # Read-only property dict. Non-reserved properties
+  other_props = property(lambda self: get_other_props(self._props, FIELD_RESERVED_PROPS),
+                         doc="dictionary of non-reserved properties")
+
+# utility functions to manipulate properties dict
   def get_prop(self, key):
     return self._props.get(key)
   def set_prop(self, key, value):
@@ -366,14 +388,14 @@ class PrimitiveSchema(Schema):
 #
 
 class FixedSchema(NamedSchema):
-  def __init__(self, name, namespace, size, names=None):
+  def __init__(self, name, namespace, size, names=None, other_props=None):
     # Ensure valid ctor args
     if not isinstance(size, int):
       fail_msg = 'Fixed Schema requires a valid integer for size property.'
       raise AvroException(fail_msg)
 
     # Call parent ctor
-    NamedSchema.__init__(self, 'fixed', name, namespace, names)
+    NamedSchema.__init__(self, 'fixed', name, namespace, names, other_props)
 
     # Add class members
     self.set_prop('size', size)
@@ -392,7 +414,7 @@ class FixedSchema(NamedSchema):
     return self.props == that.props
 
 class EnumSchema(NamedSchema):
-  def __init__(self, name, namespace, symbols, names=None):
+  def __init__(self, name, namespace, symbols, names=None, doc=None, other_props=None):
     # Ensure valid ctor args
     if not isinstance(symbols, list):
       fail_msg = 'Enum Schema requires a JSON array for the symbols property.'
@@ -405,13 +427,15 @@ class EnumSchema(NamedSchema):
       raise AvroException(fail_msg)
 
     # Call parent ctor
-    NamedSchema.__init__(self, 'enum', name, namespace, names)
+    NamedSchema.__init__(self, 'enum', name, namespace, names, other_props)
 
     # Add class members
     self.set_prop('symbols', symbols)
+    if doc is not None: self.set_prop('doc', doc)
 
   # read-only properties
   symbols = property(lambda self: self.get_prop('symbols'))
+  doc = property(lambda self: self.get_prop('doc'))
 
   def to_json(self, names):
     if self.fullname in names.names:
@@ -428,9 +452,9 @@ class EnumSchema(NamedSchema):
 #
 
 class ArraySchema(Schema):
-  def __init__(self, items, names=None):
+  def __init__(self, items, names=None, other_props=None):
     # Call parent ctor
-    Schema.__init__(self, 'array')
+    Schema.__init__(self, 'array', other_props)
     # Add class members
 
     if isinstance(items, basestring) and names.has_name(items, None):
@@ -458,9 +482,9 @@ class ArraySchema(Schema):
     return to_cmp == json.loads(str(that))
 
 class MapSchema(Schema):
-  def __init__(self, values, names=None):
+  def __init__(self, values, names=None, other_props=None):
     # Call parent ctor
-    Schema.__init__(self, 'map')
+    Schema.__init__(self, 'map',other_props)
 
     # Add class members
     if isinstance(values, basestring) and names.has_name(values, None):
@@ -564,7 +588,10 @@ class RecordSchema(NamedSchema):
           default = field.get('default')
 
         order = field.get('order')
-        new_field = Field(type, name, has_default, default, order, names)
+        doc = field.get('doc')
+        other_props = get_other_props(field, FIELD_RESERVED_PROPS)
+        new_field = Field(type, name, has_default, default, order, names, doc,
+                         other_props)
         # make sure field name has not been used yet
         if new_field.name in field_names:
           fail_msg = 'Field name %s already in use.' % new_field.name
@@ -575,7 +602,8 @@ class RecordSchema(NamedSchema):
       field_objects.append(new_field)
     return field_objects
 
-  def __init__(self, name, namespace, fields, names=None, schema_type='record'):
+  def __init__(self, name, namespace, fields, names=None, schema_type='record',
+               doc=None, other_props=None):
     # Ensure valid ctor args
     if fields is None:
       fail_msg = 'Record schema requires a non-empty fields property.'
@@ -586,9 +614,10 @@ class RecordSchema(NamedSchema):
 
     # Call parent ctor (adds own name to namespace, too)
     if schema_type == 'request':
-      Schema.__init__(self, schema_type)
+      Schema.__init__(self, schema_type, other_props)
     else:
-      NamedSchema.__init__(self, schema_type, name, namespace, names)
+      NamedSchema.__init__(self, schema_type, name, namespace, names,
+                           other_props)
 
     if schema_type == 'record': 
       old_default = names.default_namespace
@@ -598,12 +627,14 @@ class RecordSchema(NamedSchema):
     # Add class members
     field_objects = RecordSchema.make_field_objects(fields, names)
     self.set_prop('fields', field_objects)
-    
+    if doc is not None: self.set_prop('doc', doc)
+
     if schema_type == 'record':
       names.default_namespace = old_default
 
   # read-only properties
   fields = property(lambda self: self.get_prop('fields'))
+  doc = property(lambda self: self.get_prop('doc'))
 
   @property
   def fields_dict(self):
@@ -633,8 +664,16 @@ class RecordSchema(NamedSchema):
 #
 # Module Methods
 #
+def get_other_props(all_props,reserved_props):
+  """
+  Retrieve the non-reserved properties from a dictionary of properties
+  @args reserved_props: The set of reserved properties to exclude
+  """
+  if hasattr(all_props, 'items') and callable(all_props.items):
+    return dict([(k,v) for (k,v) in all_props.items() if k not in
+                 reserved_props ])
+
 
-# TODO(hammer): handle non-reserved properties
 def make_avsc_object(json_data, names=None):
   """
   Build Avro Schema from data parsed out of JSON string.
@@ -647,6 +686,7 @@ def make_avsc_object(json_data, names=No
   # JSON object (non-union)
   if hasattr(json_data, 'get') and callable(json_data.get):
     type = json_data.get('type')
+    other_props = get_other_props(json_data, SCHEMA_RESERVED_PROPS)
     if type in PRIMITIVE_TYPES:
       return PrimitiveSchema(type)
     elif type in NAMED_TYPES:
@@ -654,22 +694,24 @@ def make_avsc_object(json_data, names=No
       namespace = json_data.get('namespace')
       if type == 'fixed':
         size = json_data.get('size')
-        return FixedSchema(name, namespace, size, names)
+        return FixedSchema(name, namespace, size, names, other_props)
       elif type == 'enum':
         symbols = json_data.get('symbols')
-        return EnumSchema(name, namespace, symbols, names)
+        doc = json_data.get('doc')
+        return EnumSchema(name, namespace, symbols, names, doc, other_props)
       elif type in ['record', 'error']:
         fields = json_data.get('fields')
-        return RecordSchema(name, namespace, fields, names, type)
+        doc = json_data.get('doc')
+        return RecordSchema(name, namespace, fields, names, type, doc, other_props)
       else:
         raise SchemaParseException('Unknown Named Type: %s' % type)
     elif type in VALID_TYPES:
       if type == 'array':
         items = json_data.get('items')
-        return ArraySchema(items, names)
+        return ArraySchema(items, names, other_props)
       elif type == 'map':
         values = json_data.get('values')
-        return MapSchema(values, names)
+        return MapSchema(values, names, other_props)
       elif type == 'error_union':
         declared_errors = json_data.get('declared_errors')
         return ErrorUnionSchema(declared_errors, names)

Modified: avro/trunk/lang/py/test/test_schema.py
URL: http://svn.apache.org/viewvc/avro/trunk/lang/py/test/test_schema.py?rev=1301652&r1=1301651&r2=1301652&view=diff
==============================================================================
--- avro/trunk/lang/py/test/test_schema.py (original)
+++ avro/trunk/lang/py/test/test_schema.py Fri Mar 16 17:07:38 2012
@@ -254,6 +254,41 @@ RECORD_EXAMPLES = [
     """, False),
 ]
 
+DOC_EXAMPLES = [
+  ExampleSchema("""\
+    {"type": "record",
+     "name": "TestDoc",
+     "doc":  "Doc string",
+     "fields": [{"name": "name", "type": "string", 
+                 "doc" : "Doc String"}]}
+    """, True),
+  ExampleSchema("""\
+    {"type": "enum", "name": "Test", "symbols": ["A", "B"],
+     "doc": "Doc String"}
+    """, True),
+]
+
+OTHER_PROP_EXAMPLES = [
+  ExampleSchema("""\
+    {"type": "record",
+     "name": "TestRecord",
+     "cp_string": "string",
+     "cp_int": 1,
+     "cp_array": [ 1, 2, 3, 4],
+     "fields": [ {"name": "f1", "type": "string", "cp_object": {"a":1,"b":2} },
+                 {"name": "f2", "type": "long", "cp_null": null} ]}
+    """, True),
+  ExampleSchema("""\
+     {"type": "map", "values": "long", "cp_boolean": true}
+    """, True),
+  ExampleSchema("""\
+    {"type": "enum",
+     "name": "TestEnum",
+     "symbols": [ "one", "two", "three" ],
+     "cp_float" : 1.0 }
+    """,True),
+]
+
 EXAMPLES = PRIMITIVE_EXAMPLES
 EXAMPLES += FIXED_EXAMPLES
 EXAMPLES += ENUM_EXAMPLES
@@ -261,6 +296,7 @@ EXAMPLES += ARRAY_EXAMPLES
 EXAMPLES += MAP_EXAMPLES
 EXAMPLES += UNION_EXAMPLES
 EXAMPLES += RECORD_EXAMPLES
+EXAMPLES += DOC_EXAMPLES
 
 VALID_EXAMPLES = [e for e in EXAMPLES if e.valid]
 
@@ -390,5 +426,50 @@ class TestSchema(unittest.TestCase):
     fullname = schema.Name('a', 'o.a.a', 'o.a.h').fullname
     self.assertEqual(fullname, 'o.a.a.a')
 
+  def test_doc_attributes(self):
+    print_test_name('TEST DOC ATTRIBUTES')
+    correct = 0
+    for example in DOC_EXAMPLES:
+      original_schema = schema.parse(example.schema_string)
+      if original_schema.doc is not None:
+        correct += 1
+      if original_schema.type == 'record':
+        for f in original_schema.fields:
+          if f.doc is None:
+            self.fail("Failed to preserve 'doc' in fields: " + example.schema_string)
+    self.assertEqual(correct,len(DOC_EXAMPLES))
+
+  def test_other_attributes(self):
+    print_test_name('TEST OTHER ATTRIBUTES')
+    correct = 0
+    props = {}
+    for example in OTHER_PROP_EXAMPLES:
+      original_schema = schema.parse(example.schema_string)
+      round_trip_schema = schema.parse(str(original_schema))
+      self.assertEqual(original_schema.other_props,round_trip_schema.other_props)
+      if original_schema.type == "record":
+        field_props = 0
+        for f in original_schema.fields:
+          if f.other_props:
+            props.update(f.other_props)
+            field_props += 1
+        self.assertEqual(field_props,len(original_schema.fields))
+      if original_schema.other_props:
+        props.update(original_schema.other_props)
+        correct += 1
+    for k in props:
+      v = props[k]
+      if k == "cp_boolean":
+        self.assertEqual(type(v), bool)
+      elif k == "cp_int":
+        self.assertEqual(type(v), int)
+      elif k == "cp_object":
+        self.assertEqual(type(v), dict)
+      elif k == "cp_float":
+        self.assertEqual(type(v), float)
+      elif k == "cp_array":
+        self.assertEqual(type(v), list)
+    self.assertEqual(correct,len(OTHER_PROP_EXAMPLES))
+
 if __name__ == '__main__':
   unittest.main()