You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@avro.apache.org by ph...@apache.org on 2010/08/24 06:37:31 UTC

svn commit: r988383 - in /avro/trunk: CHANGES.txt lang/py/src/avro/protocol.py lang/py/src/avro/schema.py lang/py/test/test_protocol.py lang/py/test/test_schema.py

Author: philz
Date: Tue Aug 24 04:37:30 2010
New Revision: 988383

URL: http://svn.apache.org/viewvc?rev=988383&view=rev
Log:
AVRO-620. Python implementation doesn't stringify sub-schemas correctly.


Modified:
    avro/trunk/CHANGES.txt
    avro/trunk/lang/py/src/avro/protocol.py
    avro/trunk/lang/py/src/avro/schema.py
    avro/trunk/lang/py/test/test_protocol.py
    avro/trunk/lang/py/test/test_schema.py

Modified: avro/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/avro/trunk/CHANGES.txt?rev=988383&r1=988382&r2=988383&view=diff
==============================================================================
--- avro/trunk/CHANGES.txt (original)
+++ avro/trunk/CHANGES.txt Tue Aug 24 04:37:30 2010
@@ -138,6 +138,9 @@ Avro 1.4.0 (unreleased)
 
   BUG FIXES
 
+    AVRO-620. Python implementation doesn't stringify sub-schemas 
+    correctly. (philz)
+
     AVRO-618. Avro doesn't work with python 2.4 (philz)
 
     AVRO-502. Memory leak from parsing JSON schema.

Modified: avro/trunk/lang/py/src/avro/protocol.py
URL: http://svn.apache.org/viewvc/avro/trunk/lang/py/src/avro/protocol.py?rev=988383&r1=988382&r2=988383&view=diff
==============================================================================
--- avro/trunk/lang/py/src/avro/protocol.py (original)
+++ avro/trunk/lang/py/src/avro/protocol.py Tue Aug 24 04:37:30 2010
@@ -120,20 +120,23 @@ class Protocol(object):
   def set_prop(self, key, value):
     self.props[key] = value  
 
-  def __str__(self):
-    # until we implement a JSON encoder for Schema and Message objects,
-    # we'll have to go through and call str() by hand.
+  def to_json(self):
     to_dump = {}
     to_dump['protocol'] = self.name
-    if self.namespace: to_dump['namespace'] = self.namespace
+    names = schema.Names()
+    if self.namespace: 
+      to_dump['namespace'] = self.namespace
     if self.types:
-      to_dump['types'] = [json.loads(str(t)) for t in self.types]
+      to_dump['types'] = [ t.to_json(names) for t in self.types ]
     if self.messages:
       messages_dict = {}
       for name, body in self.messages.iteritems():
-        messages_dict[name] = json.loads(str(body))
+        messages_dict[name] = body.to_json(names)
       to_dump['messages'] = messages_dict
-    return json.dumps(to_dump)
+    return to_dump
+
+  def __str__(self):
+    return json.dumps(self.to_json())
 
   def __eq__(self, that):
     to_cmp = json.loads(str(self))
@@ -149,7 +152,6 @@ class Message(object):
   
   def _parse_response(self, response, names):
     if isinstance(response, basestring) and names.has_name(response, None):
-      self._response_from_names = True
       return names.get_name(response, None)
     else:
       return schema.make_avsc_object(response, names)
@@ -163,7 +165,6 @@ class Message(object):
 
   def __init__(self,  name, request, response, errors=None, names=None):
     self._name = name
-    self._response_from_names = False
 
     self._props = {}
     self.set_prop('request', self._parse_request(request, names))
@@ -173,7 +174,6 @@ class Message(object):
 
   # read-only properties
   name = property(lambda self: self._name)
-  response_from_names = property(lambda self: self._response_from_names)
   request = property(lambda self: self.get_prop('request'))
   response = property(lambda self: self.get_prop('response'))
   errors = property(lambda self: self.get_prop('errors'))
@@ -185,17 +185,16 @@ class Message(object):
   def set_prop(self, key, value):
     self.props[key] = value  
 
-  # TODO(hammer): allow schemas and fields to be JSON Encoded!
   def __str__(self):
+    return json.dumps(self.to_json(schema.Names()))
+
+  def to_json(self, names):
     to_dump = {}
-    to_dump['request'] = json.loads(str(self.request))
-    if self.response_from_names:
-      to_dump['response'] = self.response.fullname
-    else:
-      to_dump['response'] = json.loads(str(self.response))
+    to_dump['request'] = self.request.to_json(names)
+    to_dump['response'] = self.response.to_json(names)
     if self.errors:
-      to_dump['errors'] = json.loads(str(self.errors))
-    return json.dumps(to_dump)
+      to_dump['errors'] = self.errors.to_json(names)
+    return to_dump
 
   def __eq__(self, that):
     return self.name == that.name and self.props == that.props

Modified: avro/trunk/lang/py/src/avro/schema.py
URL: http://svn.apache.org/viewvc/avro/trunk/lang/py/src/avro/schema.py?rev=988383&r1=988382&r2=988383&view=diff
==============================================================================
--- avro/trunk/lang/py/src/avro/schema.py (original)
+++ avro/trunk/lang/py/src/avro/schema.py Tue Aug 24 04:37:30 2010
@@ -122,9 +122,24 @@ class Schema(object):
   # utility functions to manipulate properties dict
   def get_prop(self, key):
     return self.props.get(key)
+
   def set_prop(self, key, value):
     self.props[key] = value
 
+  def __str__(self):
+    names = Names()
+    return json.dumps(self.to_json(names))
+
+  def to_json(self, names):
+    """
+    Converts the schema object into its AVRO specification representation.
+
+    Schema types that have names (records, enums, and fixed) must
+    be aware of not re-defining schemas that are already listed
+    in the parameter names.
+    """
+    raise Exception("Must be implemented by subclasses.")
+
 class Name(object):
   """Class to describe Avro name."""
   
@@ -277,18 +292,16 @@ class Field(object):
 
     # add members
     self._props = {}
-    self._type_from_names = False
     self._has_default = has_default
 
     if (isinstance(type, basestring) and names is not None
         and names.has_name(type, None)):
       type_schema = names.get_name(type, None)
-      self._type_from_names = True
     else:
       try:
         type_schema = make_avsc_object(type, names)
-      except:
-        fail_msg = 'Type property "%s" not a valid Avro schema.' % type
+      except Exception, e:
+        fail_msg = 'Type property "%s" not a valid Avro schema: %s' % (type, e)
         raise SchemaParseException(fail_msg)
     self.set_prop('type', type_schema)
     self.set_prop('name', name)
@@ -303,7 +316,6 @@ class Field(object):
   has_default = property(lambda self: self._has_default)
   order = property(lambda self: self.get_prop('order'))
   props = property(lambda self: self._props)
-  type_from_names = property(lambda self: self._type_from_names)
 
   # utility functions to manipulate properties dict
   def get_prop(self, key):
@@ -311,13 +323,10 @@ class Field(object):
   def set_prop(self, key, value):
     self.props[key] = value
 
-  def __str__(self):
+  def to_json(self, names):
     to_dump = self.props.copy()
-    if self.type_from_names:
-      to_dump['type'] = self.type.fullname
-    else:
-      to_dump['type'] = json.loads(str(to_dump['type']))
-    return json.dumps(to_dump)
+    to_dump['type'] = self.type.to_json(names)
+    return to_dump
 
   def __eq__(self, that):
     to_cmp = json.loads(str(self))
@@ -336,12 +345,13 @@ class PrimitiveSchema(Schema):
     # Call parent ctor
     Schema.__init__(self, type)
 
-  def __str__(self):
-    # if there are no arbitrary properties, use short form
+    self.fullname = type
+
+  def to_json(self, names):
     if len(self.props) == 1:
-      return '"%s"' % self.type
+      return self.fullname
     else:
-      return json.dumps(self.props)
+      return self.props
 
   def __eq__(self, that):
     return self.props == that.props
@@ -366,8 +376,12 @@ class FixedSchema(NamedSchema):
   # read-only properties
   size = property(lambda self: self.get_prop('size'))
 
-  def __str__(self):
-    return json.dumps(self.props)
+  def to_json(self, names):
+    if self.fullname in names.names:
+      return self.fullname
+    else:
+      names.names[self.fullname] = self
+      return self.props
 
   def __eq__(self, that):
     return self.props == that.props
@@ -394,8 +408,12 @@ class EnumSchema(NamedSchema):
   # read-only properties
   symbols = property(lambda self: self.get_prop('symbols'))
 
-  def __str__(self):
-    return json.dumps(self.props)
+  def to_json(self, names):
+    if self.fullname in names.names:
+      return self.fullname
+    else:
+      names.names[self.fullname] = self
+      return self.props
 
   def __eq__(self, that):
     return self.props == that.props
@@ -406,36 +424,30 @@ class EnumSchema(NamedSchema):
 
 class ArraySchema(Schema):
   def __init__(self, items, names=None):
-    # initialize private class members
-    self._items_schema_from_names = False
-
     # Call parent ctor
     Schema.__init__(self, 'array')
     # Add class members
 
     if isinstance(items, basestring) and names.has_name(items, None):
       items_schema = names.get_name(items, None)
-      self._items_schema_from_names = True
     else:
       try:
         items_schema = make_avsc_object(items, names)
-      except:
-        fail_msg = 'Items schema not a valid Avro schema.'
+      except SchemaParseException, e:
+        fail_msg = 'Items schema (%s) not a valid Avro schema: %s (known names: %s)' % (items, e, names.names.keys())
+        __import__("pdb").set_trace()
         raise SchemaParseException(fail_msg)
 
     self.set_prop('items', items_schema)
 
   # read-only properties
   items = property(lambda self: self.get_prop('items'))
-  items_schema_from_names = property(lambda self: self._items_schema_from_names)
 
-  def __str__(self):
+  def to_json(self, names):
     to_dump = self.props.copy()
-    if self.items_schema_from_names:
-      to_dump['items'] = self.get_prop('items').fullname
-    else:
-      to_dump['items'] = json.loads(str(to_dump['items']))
-    return json.dumps(to_dump)
+    item_schema = self.get_prop('items')
+    to_dump['items'] = item_schema.to_json(names)
+    return to_dump
 
   def __eq__(self, that):
     to_cmp = json.loads(str(self))
@@ -443,16 +455,12 @@ class ArraySchema(Schema):
 
 class MapSchema(Schema):
   def __init__(self, values, names=None):
-    # initialize private class members
-    self._values_schema_from_names = False
-
     # Call parent ctor
     Schema.__init__(self, 'map')
 
     # Add class members
     if isinstance(values, basestring) and names.has_name(values, None):
       values_schema = names.get_name(values)
-      self._values_schema_from_names = True
     else:
       try:
         values_schema = make_avsc_object(values, names)
@@ -464,16 +472,11 @@ class MapSchema(Schema):
 
   # read-only properties
   values = property(lambda self: self.get_prop('values'))
-  values_schema_from_names = property(lambda self:
-                                      self._values_schema_from_names)
 
-  def __str__(self):
+  def to_json(self, names):
     to_dump = self.props.copy()
-    if self.values_schema_from_names:
-      to_dump['values'] = self.get_prop('values').fullname
-    else:
-      to_dump['values'] = json.loads(str(to_dump['values']))
-    return json.dumps(to_dump)
+    to_dump['values'] = self.get_prop('values').to_json(names)
+    return to_dump
 
   def __eq__(self, that):
     to_cmp = json.loads(str(self))
@@ -494,17 +497,14 @@ class UnionSchema(Schema):
 
     # Add class members
     schema_objects = []
-    self._schema_from_names_indices = []
-    for i, schema in enumerate(schemas):
-      from_names = False
+    for schema in schemas:
       if isinstance(schema, basestring) and names.has_name(schema, None):
         new_schema = names.get_name(schema, None)
-        from_names = True
       else:
         try:
           new_schema = make_avsc_object(schema, names)
-        except:
-          raise SchemaParseException('Union item must be a valid Avro schema.')
+        except Exception, e:
+          raise SchemaParseException('Union item must be a valid Avro schema: %s' % str(e))
       # check the new schema
       if (new_schema.type in VALID_TYPES and new_schema.type not in NAMED_TYPES
           and new_schema.type in [schema.type for schema in schema_objects]):
@@ -513,22 +513,16 @@ class UnionSchema(Schema):
         raise SchemaParseException('Unions cannot contain other unions.')
       else:
         schema_objects.append(new_schema)
-        if from_names: self._schema_from_names_indices.append(i)
     self._schemas = schema_objects
 
   # read-only properties
   schemas = property(lambda self: self._schemas)
-  schema_from_names_indices = property(lambda self:
-                                       self._schema_from_names_indices)
 
-  def __str__(self):
+  def to_json(self, names):
     to_dump = []
-    for i, schema in enumerate(self.schemas):
-      if i in self.schema_from_names_indices:
-        to_dump.append(schema.fullname)
-      else:
-        to_dump.append(json.loads(str(schema)))
-    return json.dumps(to_dump)
+    for schema in self.schemas:
+      to_dump.append(schema.to_json(names))
+    return to_dump
 
   def __eq__(self, that):
     to_cmp = json.loads(str(self))
@@ -539,16 +533,13 @@ class ErrorUnionSchema(UnionSchema):
     # Prepend "string" to handle system errors
     UnionSchema.__init__(self, ['string'] + schemas, names)
 
-  def __str__(self):
+  def to_json(self, names):
     to_dump = []
-    for i, schema in enumerate(self.schemas):
+    for schema in self.schemas:
       # Don't print the system error schema
       if schema.type == 'string': continue
-      if i in self.schema_from_names_indices:
-        to_dump.append(schema.fullname)
-      else:
-        to_dump.append(json.loads(str(schema)))
-    return json.dumps(to_dump)
+      to_dump.append(schema.to_json(names))
+    return to_dump
 
 class RecordSchema(NamedSchema):
   @staticmethod
@@ -617,13 +608,19 @@ class RecordSchema(NamedSchema):
       fields_dict[field.name] = field
     return fields_dict
 
-  def __str__(self):
-    to_dump = self.props.copy()
-    to_dump['fields'] = [json.loads(str(f)) for f in self.fields]
+  def to_json(self, names):
+    # Request records don't have names
     if self.type == 'request':
-      return json.dumps(to_dump['fields'])
+      return [ f.to_json(names) for f in self.fields ]
+
+    if self.fullname in names.names:
+      return self.fullname
     else:
-      return json.dumps(to_dump)
+      names.names[self.fullname] = self
+
+    to_dump = self.props.copy()
+    to_dump['fields'] = [ f.to_json(names) for f in self.fields ]
+    return to_dump
 
   def __eq__(self, that):
     to_cmp = json.loads(str(self))

Modified: avro/trunk/lang/py/test/test_protocol.py
URL: http://svn.apache.org/viewvc/avro/trunk/lang/py/test/test_protocol.py?rev=988383&r1=988382&r2=988383&view=diff
==============================================================================
--- avro/trunk/lang/py/test/test_protocol.py (original)
+++ avro/trunk/lang/py/test/test_protocol.py Tue Aug 24 04:37:30 2010
@@ -310,23 +310,19 @@ VALID_EXAMPLES = [e for e in EXAMPLES if
 
 class TestProtocol(unittest.TestCase):
   def test_parse(self):
-    print ''
-    print 'TEST PARSE'
-    print '=========='
-    print ''
-
     num_correct = 0
     for example in EXAMPLES:
       try:
-        try:
-          protocol.parse(example.protocol_string)
-          if example.valid: num_correct += 1
-          debug_msg = "%s: PARSE SUCCESS" % example.name
-        except:
-          if not example.valid: num_correct += 1
-          debug_msg = "%s: PARSE FAILURE" % example.name
-      finally:
-        print debug_msg
+        protocol.parse(example.protocol_string)
+        if example.valid: 
+          num_correct += 1
+        else:
+          self.fail("Parsed invalid protocol: %s" % (example.name,))
+      except Exception, e:
+        if not example.valid: 
+          num_correct += 1
+        else:
+          self.fail("Coudl not parse valid protocol: %s" % (example.name,))
 
     fail_msg = "Parse behavior correct on %d out of %d protocols." % \
       (num_correct, len(EXAMPLES))
@@ -373,20 +369,14 @@ class TestProtocol(unittest.TestCase):
 
     num_correct = 0
     for example in VALID_EXAMPLES:
-      try:
-        try:
-          original_protocol = protocol.parse(example.protocol_string)
-          round_trip_protocol = protocol.parse(str(original_protocol))
+      original_protocol = protocol.parse(example.protocol_string)
+      round_trip_protocol = protocol.parse(str(original_protocol))
 
-          if original_protocol == round_trip_protocol:
-            num_correct += 1
-            debug_msg = "%s: ROUND TRIP SUCCESS" % example.name
-          else:       
-            debug_msg = "%s: ROUND TRIP FAILURE" % example.name
-        except:
-          debug_msg = "%s: ROUND TRIP FAILURE" % example.name
-      finally:
-        print debug_msg
+      if original_protocol == round_trip_protocol:
+        num_correct += 1
+        debug_msg = "%s: ROUND TRIP SUCCESS" % example.name
+      else:       
+        self.fail("Round trip failure: %s %s %s", (example.name, example.protocol_string, str(original_protocol)))
 
     fail_msg = "Round trip success on %d out of %d protocols" % \
       (num_correct, len(VALID_EXAMPLES))

Modified: avro/trunk/lang/py/test/test_schema.py
URL: http://svn.apache.org/viewvc/avro/trunk/lang/py/test/test_schema.py?rev=988383&r1=988382&r2=988383&view=diff
==============================================================================
--- avro/trunk/lang/py/test/test_schema.py (original)
+++ avro/trunk/lang/py/test/test_schema.py Tue Aug 24 04:37:30 2010
@@ -270,26 +270,27 @@ VALID_EXAMPLES = [e for e in EXAMPLES if
 # TODO(hammer): show strack trace to user
 # TODO(hammer): use logging module?
 class TestSchema(unittest.TestCase):
+
+  def test_correct_recursive_extraction(self):
+    s = schema.parse('{"type": "record", "name": "X", "fields": [{"name": "y", "type": {"type": "record", "name": "Y", "fields": [{"name": "Z", "type": "X"}]}}]}')
+    t = schema.parse(str(s.fields[0].type))
+    # If we've made it this far, the subschema was reasonably stringified; it ccould be reparsed.
+    self.assertEqual("X", t.fields[0].type.name)
+
   def test_parse(self):
-    print_test_name('TEST PARSE')
     correct = 0
     for example in EXAMPLES:
       try:
-        try:
-          schema.parse(example.schema_string)
-          if example.valid:
-            correct += 1
-          else:
-            self.fail("Invalid schema was parsed: " + example.schema_string)
-          debug_msg = "%s: PARSE SUCCESS" % example.name
-        except:
-          if not example.valid: 
-            correct += 1
-          else:
-            self.fail("Valid schema failed to parse: " + example.schema_string)
-          debug_msg = "%s: PARSE FAILURE" % example.name
-      finally:
-        print debug_msg
+        schema.parse(example.schema_string)
+        if example.valid:
+          correct += 1
+        else:
+          self.fail("Invalid schema was parsed: " + example.schema_string)
+      except:
+        if not example.valid: 
+          correct += 1
+        else:
+          self.fail("Valid schema failed to parse: " + example.schema_string)
 
     fail_msg = "Parse behavior correct on %d out of %d schemas." % \
       (correct, len(EXAMPLES))
@@ -304,15 +305,8 @@ class TestSchema(unittest.TestCase):
     correct = 0
     for example in VALID_EXAMPLES:
       schema_data = schema.parse(example.schema_string)
-      try:
-        try:
-          schema.parse(str(schema_data))
-          debug_msg = "%s: STRING CAST SUCCESS" % example.name
-          correct += 1
-        except:
-          debug_msg = "%s: STRING CAST FAILURE" % example.name
-      finally:
-        print debug_msg
+      schema.parse(str(schema_data))
+      correct += 1
 
     fail_msg = "Cast to string success on %d out of %d schemas" % \
       (correct, len(VALID_EXAMPLES))
@@ -328,19 +322,14 @@ class TestSchema(unittest.TestCase):
     print_test_name('TEST ROUND TRIP')
     correct = 0
     for example in VALID_EXAMPLES:
-      try:
-        try:
-          original_schema = schema.parse(example.schema_string)
-          round_trip_schema = schema.parse(str(original_schema))
-          if original_schema == round_trip_schema:
-            correct += 1
-            debug_msg = "%s: ROUND TRIP SUCCESS" % example.name
-          else:       
-            debug_msg = "%s: ROUND TRIP FAILURE" % example.name
-        except:
-          debug_msg = "%s: ROUND TRIP FAILURE" % example.name
-      finally:
-        print debug_msg
+      original_schema = schema.parse(example.schema_string)
+      round_trip_schema = schema.parse(str(original_schema))
+      if original_schema == round_trip_schema:
+        correct += 1
+        debug_msg = "%s: ROUND TRIP SUCCESS" % example.name
+      else:       
+        debug_msg = "%s: ROUND TRIP FAILURE" % example.name
+        self.fail("Round trip failure: %s, %s, %s" % (example.name, original_schema, str(original_schema)))
 
     fail_msg = "Round trip success on %d out of %d schemas" % \
       (correct, len(VALID_EXAMPLES))