You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@avro.apache.org by ph...@apache.org on 2010/08/24 06:37:31 UTC
svn commit: r988383 - in /avro/trunk: CHANGES.txt
lang/py/src/avro/protocol.py lang/py/src/avro/schema.py
lang/py/test/test_protocol.py lang/py/test/test_schema.py
Author: philz
Date: Tue Aug 24 04:37:30 2010
New Revision: 988383
URL: http://svn.apache.org/viewvc?rev=988383&view=rev
Log:
AVRO-620. Python implementation doesn't stringify sub-schemas correctly.
Modified:
avro/trunk/CHANGES.txt
avro/trunk/lang/py/src/avro/protocol.py
avro/trunk/lang/py/src/avro/schema.py
avro/trunk/lang/py/test/test_protocol.py
avro/trunk/lang/py/test/test_schema.py
Modified: avro/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/avro/trunk/CHANGES.txt?rev=988383&r1=988382&r2=988383&view=diff
==============================================================================
--- avro/trunk/CHANGES.txt (original)
+++ avro/trunk/CHANGES.txt Tue Aug 24 04:37:30 2010
@@ -138,6 +138,9 @@ Avro 1.4.0 (unreleased)
BUG FIXES
+ AVRO-620. Python implementation doesn't stringify sub-schemas
+ correctly. (philz)
+
AVRO-618. Avro doesn't work with python 2.4 (philz)
AVRO-502. Memory leak from parsing JSON schema.
Modified: avro/trunk/lang/py/src/avro/protocol.py
URL: http://svn.apache.org/viewvc/avro/trunk/lang/py/src/avro/protocol.py?rev=988383&r1=988382&r2=988383&view=diff
==============================================================================
--- avro/trunk/lang/py/src/avro/protocol.py (original)
+++ avro/trunk/lang/py/src/avro/protocol.py Tue Aug 24 04:37:30 2010
@@ -120,20 +120,23 @@ class Protocol(object):
def set_prop(self, key, value):
self.props[key] = value
- def __str__(self):
- # until we implement a JSON encoder for Schema and Message objects,
- # we'll have to go through and call str() by hand.
+ def to_json(self):
to_dump = {}
to_dump['protocol'] = self.name
- if self.namespace: to_dump['namespace'] = self.namespace
+ names = schema.Names()
+ if self.namespace:
+ to_dump['namespace'] = self.namespace
if self.types:
- to_dump['types'] = [json.loads(str(t)) for t in self.types]
+ to_dump['types'] = [ t.to_json(names) for t in self.types ]
if self.messages:
messages_dict = {}
for name, body in self.messages.iteritems():
- messages_dict[name] = json.loads(str(body))
+ messages_dict[name] = body.to_json(names)
to_dump['messages'] = messages_dict
- return json.dumps(to_dump)
+ return to_dump
+
+ def __str__(self):
+ return json.dumps(self.to_json())
def __eq__(self, that):
to_cmp = json.loads(str(self))
@@ -149,7 +152,6 @@ class Message(object):
def _parse_response(self, response, names):
if isinstance(response, basestring) and names.has_name(response, None):
- self._response_from_names = True
return names.get_name(response, None)
else:
return schema.make_avsc_object(response, names)
@@ -163,7 +165,6 @@ class Message(object):
def __init__(self, name, request, response, errors=None, names=None):
self._name = name
- self._response_from_names = False
self._props = {}
self.set_prop('request', self._parse_request(request, names))
@@ -173,7 +174,6 @@ class Message(object):
# read-only properties
name = property(lambda self: self._name)
- response_from_names = property(lambda self: self._response_from_names)
request = property(lambda self: self.get_prop('request'))
response = property(lambda self: self.get_prop('response'))
errors = property(lambda self: self.get_prop('errors'))
@@ -185,17 +185,16 @@ class Message(object):
def set_prop(self, key, value):
self.props[key] = value
- # TODO(hammer): allow schemas and fields to be JSON Encoded!
def __str__(self):
+ return json.dumps(self.to_json(schema.Names()))
+
+ def to_json(self, names):
to_dump = {}
- to_dump['request'] = json.loads(str(self.request))
- if self.response_from_names:
- to_dump['response'] = self.response.fullname
- else:
- to_dump['response'] = json.loads(str(self.response))
+ to_dump['request'] = self.request.to_json(names)
+ to_dump['response'] = self.response.to_json(names)
if self.errors:
- to_dump['errors'] = json.loads(str(self.errors))
- return json.dumps(to_dump)
+ to_dump['errors'] = self.errors.to_json(names)
+ return to_dump
def __eq__(self, that):
return self.name == that.name and self.props == that.props
Modified: avro/trunk/lang/py/src/avro/schema.py
URL: http://svn.apache.org/viewvc/avro/trunk/lang/py/src/avro/schema.py?rev=988383&r1=988382&r2=988383&view=diff
==============================================================================
--- avro/trunk/lang/py/src/avro/schema.py (original)
+++ avro/trunk/lang/py/src/avro/schema.py Tue Aug 24 04:37:30 2010
@@ -122,9 +122,24 @@ class Schema(object):
# utility functions to manipulate properties dict
def get_prop(self, key):
return self.props.get(key)
+
def set_prop(self, key, value):
self.props[key] = value
+ def __str__(self):
+ names = Names()
+ return json.dumps(self.to_json(names))
+
+ def to_json(self, names):
+ """
+ Converts the schema object into its AVRO specification representation.
+
+ Schema types that have names (records, enums, and fixed) must
+ be aware of not re-defining schemas that are already listed
+ in the parameter names.
+ """
+ raise Exception("Must be implemented by subclasses.")
+
class Name(object):
"""Class to describe Avro name."""
@@ -277,18 +292,16 @@ class Field(object):
# add members
self._props = {}
- self._type_from_names = False
self._has_default = has_default
if (isinstance(type, basestring) and names is not None
and names.has_name(type, None)):
type_schema = names.get_name(type, None)
- self._type_from_names = True
else:
try:
type_schema = make_avsc_object(type, names)
- except:
- fail_msg = 'Type property "%s" not a valid Avro schema.' % type
+ except Exception, e:
+ fail_msg = 'Type property "%s" not a valid Avro schema: %s' % (type, e)
raise SchemaParseException(fail_msg)
self.set_prop('type', type_schema)
self.set_prop('name', name)
@@ -303,7 +316,6 @@ class Field(object):
has_default = property(lambda self: self._has_default)
order = property(lambda self: self.get_prop('order'))
props = property(lambda self: self._props)
- type_from_names = property(lambda self: self._type_from_names)
# utility functions to manipulate properties dict
def get_prop(self, key):
@@ -311,13 +323,10 @@ class Field(object):
def set_prop(self, key, value):
self.props[key] = value
- def __str__(self):
+ def to_json(self, names):
to_dump = self.props.copy()
- if self.type_from_names:
- to_dump['type'] = self.type.fullname
- else:
- to_dump['type'] = json.loads(str(to_dump['type']))
- return json.dumps(to_dump)
+ to_dump['type'] = self.type.to_json(names)
+ return to_dump
def __eq__(self, that):
to_cmp = json.loads(str(self))
@@ -336,12 +345,13 @@ class PrimitiveSchema(Schema):
# Call parent ctor
Schema.__init__(self, type)
- def __str__(self):
- # if there are no arbitrary properties, use short form
+ self.fullname = type
+
+ def to_json(self, names):
if len(self.props) == 1:
- return '"%s"' % self.type
+ return self.fullname
else:
- return json.dumps(self.props)
+ return self.props
def __eq__(self, that):
return self.props == that.props
@@ -366,8 +376,12 @@ class FixedSchema(NamedSchema):
# read-only properties
size = property(lambda self: self.get_prop('size'))
- def __str__(self):
- return json.dumps(self.props)
+ def to_json(self, names):
+ if self.fullname in names.names:
+ return self.fullname
+ else:
+ names.names[self.fullname] = self
+ return self.props
def __eq__(self, that):
return self.props == that.props
@@ -394,8 +408,12 @@ class EnumSchema(NamedSchema):
# read-only properties
symbols = property(lambda self: self.get_prop('symbols'))
- def __str__(self):
- return json.dumps(self.props)
+ def to_json(self, names):
+ if self.fullname in names.names:
+ return self.fullname
+ else:
+ names.names[self.fullname] = self
+ return self.props
def __eq__(self, that):
return self.props == that.props
@@ -406,36 +424,30 @@ class EnumSchema(NamedSchema):
class ArraySchema(Schema):
def __init__(self, items, names=None):
- # initialize private class members
- self._items_schema_from_names = False
-
# Call parent ctor
Schema.__init__(self, 'array')
# Add class members
if isinstance(items, basestring) and names.has_name(items, None):
items_schema = names.get_name(items, None)
- self._items_schema_from_names = True
else:
try:
items_schema = make_avsc_object(items, names)
- except:
- fail_msg = 'Items schema not a valid Avro schema.'
+ except SchemaParseException, e:
+ fail_msg = 'Items schema (%s) not a valid Avro schema: %s (known names: %s)' % (items, e, names.names.keys())
+ __import__("pdb").set_trace()
raise SchemaParseException(fail_msg)
self.set_prop('items', items_schema)
# read-only properties
items = property(lambda self: self.get_prop('items'))
- items_schema_from_names = property(lambda self: self._items_schema_from_names)
- def __str__(self):
+ def to_json(self, names):
to_dump = self.props.copy()
- if self.items_schema_from_names:
- to_dump['items'] = self.get_prop('items').fullname
- else:
- to_dump['items'] = json.loads(str(to_dump['items']))
- return json.dumps(to_dump)
+ item_schema = self.get_prop('items')
+ to_dump['items'] = item_schema.to_json(names)
+ return to_dump
def __eq__(self, that):
to_cmp = json.loads(str(self))
@@ -443,16 +455,12 @@ class ArraySchema(Schema):
class MapSchema(Schema):
def __init__(self, values, names=None):
- # initialize private class members
- self._values_schema_from_names = False
-
# Call parent ctor
Schema.__init__(self, 'map')
# Add class members
if isinstance(values, basestring) and names.has_name(values, None):
values_schema = names.get_name(values)
- self._values_schema_from_names = True
else:
try:
values_schema = make_avsc_object(values, names)
@@ -464,16 +472,11 @@ class MapSchema(Schema):
# read-only properties
values = property(lambda self: self.get_prop('values'))
- values_schema_from_names = property(lambda self:
- self._values_schema_from_names)
- def __str__(self):
+ def to_json(self, names):
to_dump = self.props.copy()
- if self.values_schema_from_names:
- to_dump['values'] = self.get_prop('values').fullname
- else:
- to_dump['values'] = json.loads(str(to_dump['values']))
- return json.dumps(to_dump)
+ to_dump['values'] = self.get_prop('values').to_json(names)
+ return to_dump
def __eq__(self, that):
to_cmp = json.loads(str(self))
@@ -494,17 +497,14 @@ class UnionSchema(Schema):
# Add class members
schema_objects = []
- self._schema_from_names_indices = []
- for i, schema in enumerate(schemas):
- from_names = False
+ for schema in schemas:
if isinstance(schema, basestring) and names.has_name(schema, None):
new_schema = names.get_name(schema, None)
- from_names = True
else:
try:
new_schema = make_avsc_object(schema, names)
- except:
- raise SchemaParseException('Union item must be a valid Avro schema.')
+ except Exception, e:
+ raise SchemaParseException('Union item must be a valid Avro schema: %s' % str(e))
# check the new schema
if (new_schema.type in VALID_TYPES and new_schema.type not in NAMED_TYPES
and new_schema.type in [schema.type for schema in schema_objects]):
@@ -513,22 +513,16 @@ class UnionSchema(Schema):
raise SchemaParseException('Unions cannot contain other unions.')
else:
schema_objects.append(new_schema)
- if from_names: self._schema_from_names_indices.append(i)
self._schemas = schema_objects
# read-only properties
schemas = property(lambda self: self._schemas)
- schema_from_names_indices = property(lambda self:
- self._schema_from_names_indices)
- def __str__(self):
+ def to_json(self, names):
to_dump = []
- for i, schema in enumerate(self.schemas):
- if i in self.schema_from_names_indices:
- to_dump.append(schema.fullname)
- else:
- to_dump.append(json.loads(str(schema)))
- return json.dumps(to_dump)
+ for schema in self.schemas:
+ to_dump.append(schema.to_json(names))
+ return to_dump
def __eq__(self, that):
to_cmp = json.loads(str(self))
@@ -539,16 +533,13 @@ class ErrorUnionSchema(UnionSchema):
# Prepend "string" to handle system errors
UnionSchema.__init__(self, ['string'] + schemas, names)
- def __str__(self):
+ def to_json(self, names):
to_dump = []
- for i, schema in enumerate(self.schemas):
+ for schema in self.schemas:
# Don't print the system error schema
if schema.type == 'string': continue
- if i in self.schema_from_names_indices:
- to_dump.append(schema.fullname)
- else:
- to_dump.append(json.loads(str(schema)))
- return json.dumps(to_dump)
+ to_dump.append(schema.to_json(names))
+ return to_dump
class RecordSchema(NamedSchema):
@staticmethod
@@ -617,13 +608,19 @@ class RecordSchema(NamedSchema):
fields_dict[field.name] = field
return fields_dict
- def __str__(self):
- to_dump = self.props.copy()
- to_dump['fields'] = [json.loads(str(f)) for f in self.fields]
+ def to_json(self, names):
+ # Request records don't have names
if self.type == 'request':
- return json.dumps(to_dump['fields'])
+ return [ f.to_json(names) for f in self.fields ]
+
+ if self.fullname in names.names:
+ return self.fullname
else:
- return json.dumps(to_dump)
+ names.names[self.fullname] = self
+
+ to_dump = self.props.copy()
+ to_dump['fields'] = [ f.to_json(names) for f in self.fields ]
+ return to_dump
def __eq__(self, that):
to_cmp = json.loads(str(self))
Modified: avro/trunk/lang/py/test/test_protocol.py
URL: http://svn.apache.org/viewvc/avro/trunk/lang/py/test/test_protocol.py?rev=988383&r1=988382&r2=988383&view=diff
==============================================================================
--- avro/trunk/lang/py/test/test_protocol.py (original)
+++ avro/trunk/lang/py/test/test_protocol.py Tue Aug 24 04:37:30 2010
@@ -310,23 +310,19 @@ VALID_EXAMPLES = [e for e in EXAMPLES if
class TestProtocol(unittest.TestCase):
def test_parse(self):
- print ''
- print 'TEST PARSE'
- print '=========='
- print ''
-
num_correct = 0
for example in EXAMPLES:
try:
- try:
- protocol.parse(example.protocol_string)
- if example.valid: num_correct += 1
- debug_msg = "%s: PARSE SUCCESS" % example.name
- except:
- if not example.valid: num_correct += 1
- debug_msg = "%s: PARSE FAILURE" % example.name
- finally:
- print debug_msg
+ protocol.parse(example.protocol_string)
+ if example.valid:
+ num_correct += 1
+ else:
+ self.fail("Parsed invalid protocol: %s" % (example.name,))
+ except Exception, e:
+ if not example.valid:
+ num_correct += 1
+ else:
+ self.fail("Coudl not parse valid protocol: %s" % (example.name,))
fail_msg = "Parse behavior correct on %d out of %d protocols." % \
(num_correct, len(EXAMPLES))
@@ -373,20 +369,14 @@ class TestProtocol(unittest.TestCase):
num_correct = 0
for example in VALID_EXAMPLES:
- try:
- try:
- original_protocol = protocol.parse(example.protocol_string)
- round_trip_protocol = protocol.parse(str(original_protocol))
+ original_protocol = protocol.parse(example.protocol_string)
+ round_trip_protocol = protocol.parse(str(original_protocol))
- if original_protocol == round_trip_protocol:
- num_correct += 1
- debug_msg = "%s: ROUND TRIP SUCCESS" % example.name
- else:
- debug_msg = "%s: ROUND TRIP FAILURE" % example.name
- except:
- debug_msg = "%s: ROUND TRIP FAILURE" % example.name
- finally:
- print debug_msg
+ if original_protocol == round_trip_protocol:
+ num_correct += 1
+ debug_msg = "%s: ROUND TRIP SUCCESS" % example.name
+ else:
+ self.fail("Round trip failure: %s %s %s", (example.name, example.protocol_string, str(original_protocol)))
fail_msg = "Round trip success on %d out of %d protocols" % \
(num_correct, len(VALID_EXAMPLES))
Modified: avro/trunk/lang/py/test/test_schema.py
URL: http://svn.apache.org/viewvc/avro/trunk/lang/py/test/test_schema.py?rev=988383&r1=988382&r2=988383&view=diff
==============================================================================
--- avro/trunk/lang/py/test/test_schema.py (original)
+++ avro/trunk/lang/py/test/test_schema.py Tue Aug 24 04:37:30 2010
@@ -270,26 +270,27 @@ VALID_EXAMPLES = [e for e in EXAMPLES if
# TODO(hammer): show strack trace to user
# TODO(hammer): use logging module?
class TestSchema(unittest.TestCase):
+
+ def test_correct_recursive_extraction(self):
+ s = schema.parse('{"type": "record", "name": "X", "fields": [{"name": "y", "type": {"type": "record", "name": "Y", "fields": [{"name": "Z", "type": "X"}]}}]}')
+ t = schema.parse(str(s.fields[0].type))
+ # If we've made it this far, the subschema was reasonably stringified; it ccould be reparsed.
+ self.assertEqual("X", t.fields[0].type.name)
+
def test_parse(self):
- print_test_name('TEST PARSE')
correct = 0
for example in EXAMPLES:
try:
- try:
- schema.parse(example.schema_string)
- if example.valid:
- correct += 1
- else:
- self.fail("Invalid schema was parsed: " + example.schema_string)
- debug_msg = "%s: PARSE SUCCESS" % example.name
- except:
- if not example.valid:
- correct += 1
- else:
- self.fail("Valid schema failed to parse: " + example.schema_string)
- debug_msg = "%s: PARSE FAILURE" % example.name
- finally:
- print debug_msg
+ schema.parse(example.schema_string)
+ if example.valid:
+ correct += 1
+ else:
+ self.fail("Invalid schema was parsed: " + example.schema_string)
+ except:
+ if not example.valid:
+ correct += 1
+ else:
+ self.fail("Valid schema failed to parse: " + example.schema_string)
fail_msg = "Parse behavior correct on %d out of %d schemas." % \
(correct, len(EXAMPLES))
@@ -304,15 +305,8 @@ class TestSchema(unittest.TestCase):
correct = 0
for example in VALID_EXAMPLES:
schema_data = schema.parse(example.schema_string)
- try:
- try:
- schema.parse(str(schema_data))
- debug_msg = "%s: STRING CAST SUCCESS" % example.name
- correct += 1
- except:
- debug_msg = "%s: STRING CAST FAILURE" % example.name
- finally:
- print debug_msg
+ schema.parse(str(schema_data))
+ correct += 1
fail_msg = "Cast to string success on %d out of %d schemas" % \
(correct, len(VALID_EXAMPLES))
@@ -328,19 +322,14 @@ class TestSchema(unittest.TestCase):
print_test_name('TEST ROUND TRIP')
correct = 0
for example in VALID_EXAMPLES:
- try:
- try:
- original_schema = schema.parse(example.schema_string)
- round_trip_schema = schema.parse(str(original_schema))
- if original_schema == round_trip_schema:
- correct += 1
- debug_msg = "%s: ROUND TRIP SUCCESS" % example.name
- else:
- debug_msg = "%s: ROUND TRIP FAILURE" % example.name
- except:
- debug_msg = "%s: ROUND TRIP FAILURE" % example.name
- finally:
- print debug_msg
+ original_schema = schema.parse(example.schema_string)
+ round_trip_schema = schema.parse(str(original_schema))
+ if original_schema == round_trip_schema:
+ correct += 1
+ debug_msg = "%s: ROUND TRIP SUCCESS" % example.name
+ else:
+ debug_msg = "%s: ROUND TRIP FAILURE" % example.name
+ self.fail("Round trip failure: %s, %s, %s" % (example.name, original_schema, str(original_schema)))
fail_msg = "Round trip success on %d out of %d schemas" % \
(correct, len(VALID_EXAMPLES))