You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@avro.apache.org by cu...@apache.org on 2010/01/04 19:09:43 UTC
svn commit: r895732 [2/2] - in /hadoop/avro/trunk: ./ lib/py/ src/
src/py/avro/ src/test/py/
Added: hadoop/avro/trunk/src/test/py/test_datafile.py
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/test/py/test_datafile.py?rev=895732&view=auto
==============================================================================
--- hadoop/avro/trunk/src/test/py/test_datafile.py (added)
+++ hadoop/avro/trunk/src/test/py/test_datafile.py Mon Jan 4 18:09:42 2010
@@ -0,0 +1,144 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+import unittest
+from avro import schema
+from avro import io
+from avro import datafile
+
+SCHEMAS_TO_VALIDATE = (
+ ('"null"', None),
+ ('"boolean"', True),
+ ('"string"', unicode('adsfasdf09809dsf-=adsf')),
+ ('"bytes"', '12345abcd'),
+ ('"int"', 1234),
+ ('"long"', 1234),
+ ('"float"', 1234.0),
+ ('"double"', 1234.0),
+ ('{"type": "fixed", "name": "Test", "size": 1}', 'B'),
+ ('{"type": "enum", "name": "Test", "symbols": ["A", "B"]}', 'B'),
+ ('{"type": "array", "items": "long"}', [1, 3, 2]),
+ ('{"type": "map", "values": "long"}', {'a': 1, 'b': 3, 'c': 2}),
+ ('["string", "null", "long"]', None),
+ ("""\
+ {"type": "record",
+ "name": "Test",
+ "fields": [{"name": "f", "type": "long"}]}
+ """, {'f': 5}),
+ ("""\
+ {"type": "record",
+ "name": "Lisp",
+ "fields": [{"name": "value",
+ "type": ["null", "string",
+ {"type": "record",
+ "name": "Cons",
+ "fields": [{"name": "car", "type": "Lisp"},
+ {"name": "cdr", "type": "Lisp"}]}]}]}
+ """, {'value': {'car': {'value': 'head'}, 'cdr': {'value': None}}}),
+)
+
+FILENAME = 'test_datafile.out'
+
+# TODO(hammer): clean up written files with ant, not os.remove
+class TestDataFile(unittest.TestCase):
+ def test_round_trip(self):
+ print ''
+ print 'TEST ROUND TRIP'
+ print '==============='
+ print ''
+ correct = 0
+ for i, (example_schema, datum) in enumerate(SCHEMAS_TO_VALIDATE):
+ print ''
+ print 'SCHEMA NUMBER %d' % (i + 1)
+ print '================'
+ print ''
+ print 'Schema: %s' % example_schema
+ print 'Datum: %s' % datum
+
+ # write data in binary to file 10 times
+ writer = open(FILENAME, 'wb')
+ datum_writer = io.DatumWriter()
+ schema_object = schema.parse(example_schema)
+ dfw = datafile.DataFileWriter(writer, datum_writer, schema_object)
+ for i in range(10):
+ dfw.append(datum)
+ dfw.close()
+
+ # read data in binary from file
+ reader = open(FILENAME, 'rb')
+ datum_reader = io.DatumReader()
+ dfr = datafile.DataFileReader(reader, datum_reader)
+ round_trip_data = []
+ for datum in dfr:
+ round_trip_data.append(datum)
+
+ print 'Round Trip Data: %s' % round_trip_data
+ print 'Round Trip Data Length: %d' % len(round_trip_data)
+ is_correct = [datum] * 10 == round_trip_data
+ if is_correct: correct += 1
+ print 'Correct Round Trip: %s' % is_correct
+ print ''
+ os.remove(FILENAME)
+ self.assertEquals(correct, len(SCHEMAS_TO_VALIDATE))
+
+ def test_append(self):
+ print ''
+ print 'TEST APPEND'
+ print '==========='
+ print ''
+ correct = 0
+ for i, (example_schema, datum) in enumerate(SCHEMAS_TO_VALIDATE):
+ print ''
+ print 'SCHEMA NUMBER %d' % (i + 1)
+ print '================'
+ print ''
+ print 'Schema: %s' % example_schema
+ print 'Datum: %s' % datum
+
+ # write data in binary to file once
+ writer = open(FILENAME, 'wb')
+ datum_writer = io.DatumWriter()
+ schema_object = schema.parse(example_schema)
+ dfw = datafile.DataFileWriter(writer, datum_writer, schema_object)
+ dfw.append(datum)
+ dfw.close()
+
+ # open file, write, and close nine times
+ for i in range(9):
+ writer = open(FILENAME, 'ab+')
+ dfw = datafile.DataFileWriter(writer, io.DatumWriter())
+ dfw.append(datum)
+ dfw.close()
+
+ # read data in binary from file
+ reader = open(FILENAME, 'rb')
+ datum_reader = io.DatumReader()
+ dfr = datafile.DataFileReader(reader, datum_reader)
+ appended_data = []
+ for datum in dfr:
+ appended_data.append(datum)
+
+ print 'Appended Data: %s' % appended_data
+ print 'Appended Data Length: %d' % len(appended_data)
+ is_correct = [datum] * 10 == appended_data
+ if is_correct: correct += 1
+ print 'Correct Appended: %s' % is_correct
+ print ''
+ os.remove(FILENAME)
+ self.assertEquals(correct, len(SCHEMAS_TO_VALIDATE))
+
+if __name__ == '__main__':
+ unittest.main()
Added: hadoop/avro/trunk/src/test/py/test_io.py
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/test/py/test_io.py?rev=895732&view=auto
==============================================================================
--- hadoop/avro/trunk/src/test/py/test_io.py (added)
+++ hadoop/avro/trunk/src/test/py/test_io.py Mon Jan 4 18:09:42 2010
@@ -0,0 +1,154 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import unittest
+import cStringIO
+from avro import schema
+from avro import io
+
+SCHEMAS_TO_VALIDATE = (
+ ('"null"', None),
+ ('"boolean"', True),
+ ('"string"', unicode('adsfasdf09809dsf-=adsf')),
+ ('"bytes"', '12345abcd'),
+ ('"int"', 1234),
+ ('"long"', 1234),
+ ('"float"', 1234.0),
+ ('"double"', 1234.0),
+ ('{"type": "fixed", "name": "Test", "size": 1}', 'B'),
+ ('{"type": "enum", "name": "Test", "symbols": ["A", "B"]}', 'B'),
+ ('{"type": "array", "items": "long"}', [1, 3, 2]),
+ ('{"type": "map", "values": "long"}', {'a': 1, 'b': 3, 'c': 2}),
+ ('["string", "null", "long"]', None),
+ ("""\
+ {"type": "record",
+ "name": "Test",
+ "fields": [{"name": "f", "type": "long"}]}
+ """, {'f': 5}),
+ ("""\
+ {"type": "record",
+ "name": "Lisp",
+ "fields": [{"name": "value",
+ "type": ["null", "string",
+ {"type": "record",
+ "name": "Cons",
+ "fields": [{"name": "car", "type": "Lisp"},
+ {"name": "cdr", "type": "Lisp"}]}]}]}
+ """, {'value': {'car': {'value': 'head'}, 'cdr': {'value': None}}}),
+)
+
+class TestIO(unittest.TestCase):
+ def test_validate(self):
+ print ''
+ print 'Test Validate'
+ print '============='
+ print ''
+ passed = 0
+ for expected_schema, datum in SCHEMAS_TO_VALIDATE:
+ print expected_schema, datum
+ validated = io.validate(schema.parse(expected_schema), datum)
+ print validated
+ if validated: passed += 1
+ self.assertEquals(passed, len(SCHEMAS_TO_VALIDATE))
+
+ # TODO(hammer): print bytes in python
+ def test_encode(self):
+ print ''
+ print 'Test Encode'
+ print '============='
+ print ''
+
+ # boolean
+ writer = cStringIO.StringIO()
+ string_encoder = io.BinaryEncoder(writer)
+ string_encoder.write_boolean(True)
+ print 'Boolean: ' + repr(writer.getvalue())
+
+ # string
+ writer = cStringIO.StringIO()
+ string_encoder = io.BinaryEncoder(writer)
+ string_encoder.write_utf8(unicode('adsfasdf09809dsf-=adsf'))
+ print 'String: ' + repr(writer.getvalue())
+
+ # int
+ writer = cStringIO.StringIO()
+ string_encoder = io.BinaryEncoder(writer)
+ string_encoder.write_int(1)
+ print 'Int: ' + repr(writer.getvalue())
+
+ # long
+ writer = cStringIO.StringIO()
+ string_encoder = io.BinaryEncoder(writer)
+ string_encoder.write_long(1)
+ print 'Long: ' + repr(writer.getvalue())
+
+ # float
+ writer = cStringIO.StringIO()
+ string_encoder = io.BinaryEncoder(writer)
+ string_encoder.write_float(1.0)
+ print 'Float: ' + repr(writer.getvalue())
+
+ # double
+ writer = cStringIO.StringIO()
+ string_encoder = io.BinaryEncoder(writer)
+ string_encoder.write_double(1.0)
+ print 'Double: ' + repr(writer.getvalue())
+
+ # bytes
+ writer = cStringIO.StringIO()
+ string_encoder = io.BinaryEncoder(writer)
+ string_encoder.write_bytes('12345abcd')
+ print 'Bytes: ' + repr(writer.getvalue())
+
+ def test_decode(self):
+ pass
+
+ def test_datum_reader(self):
+ pass
+
+ def test_datum_writer(self):
+ pass
+
+ def test_round_trip(self):
+ print ''
+ print 'TEST ROUND TRIP'
+ print '==============='
+ print ''
+ correct = 0
+ for example_schema, datum in SCHEMAS_TO_VALIDATE:
+ print 'Schema: %s' % example_schema
+ print 'Datum: %s' % datum
+ print 'Valid: %s' % io.validate(schema.parse(example_schema), datum)
+
+ # write datum in binary to string buffer
+ writer = cStringIO.StringIO()
+ encoder = io.BinaryEncoder(writer)
+ datum_writer = io.DatumWriter(schema.parse(example_schema))
+ datum_writer.write(datum, encoder)
+
+ # read data from string buffer
+ reader = cStringIO.StringIO(writer.getvalue())
+ decoder = io.BinaryDecoder(reader)
+ datum_reader = io.DatumReader(schema.parse(example_schema))
+ round_trip_datum = datum_reader.read(decoder)
+
+ print 'Round Trip Datum: %s' % round_trip_datum
+ if datum == round_trip_datum: correct += 1
+ print 'Correct Round Trip: %s' % (datum == round_trip_datum)
+ print ''
+ self.assertEquals(correct, len(SCHEMAS_TO_VALIDATE))
+
+if __name__ == '__main__':
+ unittest.main()
Added: hadoop/avro/trunk/src/test/py/test_schema.py
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/test/py/test_schema.py?rev=895732&view=auto
==============================================================================
--- hadoop/avro/trunk/src/test/py/test_schema.py (added)
+++ hadoop/avro/trunk/src/test/py/test_schema.py Mon Jan 4 18:09:42 2010
@@ -0,0 +1,338 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Test the schema parsing logic.
+"""
+import unittest
+from avro import schema
+
+class ExampleSchema(object):
+ def __init__(self, schema_string, valid, name='', comment=''):
+ self._schema_string = schema_string
+ self._valid = valid
+ self._name = name or schema_string # default to schema_string for name
+ self.comment = comment
+
+ @property
+ def schema_string(self):
+ return self._schema_string
+
+ @property
+ def valid(self):
+ return self._valid
+
+ @property
+ def name(self):
+ return self._name
+
+#
+# Example Schemas
+#
+
+def make_primitive_examples():
+ examples = []
+ for type in schema.PRIMITIVE_TYPES:
+ examples.append(ExampleSchema('"%s"' % type, True))
+ examples.append(ExampleSchema('{"type": "%s"}' % type, True))
+ return examples
+
+PRIMITIVE_EXAMPLES = [
+ ExampleSchema('"True"', False),
+ ExampleSchema('True', False),
+ ExampleSchema('{"no_type": "test"}', False),
+ ExampleSchema('{"type": "panther"}', False),
+] + make_primitive_examples()
+
+FIXED_EXAMPLES = [
+ ExampleSchema('{"type": "fixed", "name": "Test", "size": 1}', True),
+ ExampleSchema("""\
+ {"type": "fixed",
+ "name": "MyFixed",
+ "namespace": "org.apache.hadoop.avro",
+ "size": 1}
+ """, True),
+ ExampleSchema("""\
+ {"type": "fixed",
+ "name": "Missing size"}
+ """, False),
+ ExampleSchema("""\
+ {"type": "fixed",
+ "size": 314}
+ """, False),
+]
+
+ENUM_EXAMPLES = [
+ ExampleSchema('{"type": "enum", "name": "Test", "symbols": ["A", "B"]}', True),
+ ExampleSchema("""\
+ {"type": "enum",
+ "name": "Status",
+ "symbols": "Normal Caution Critical"}
+ """, False),
+ ExampleSchema("""\
+ {"type": "enum",
+ "name": [ 0, 1, 1, 2, 3, 5, 8 ],
+ "symbols": ["Golden", "Mean"]}
+ """, False),
+ ExampleSchema("""\
+ {"type": "enum",
+ "symbols" : ["I", "will", "fail", "no", "name"]}
+ """, False),
+]
+
+ARRAY_EXAMPLES = [
+ ExampleSchema('{"type": "array", "items": "long"}', True),
+ ExampleSchema("""\
+ {"type": "array",
+ "items": {"type": "enum", "name": "Test", "symbols": ["A", "B"]}}
+ """, True),
+]
+
+MAP_EXAMPLES = [
+ ExampleSchema('{"type": "map", "values": "long"}', True),
+ ExampleSchema("""\
+ {"type": "map",
+ "values": {"type": "enum", "name": "Test", "symbols": ["A", "B"]}}
+ """, True),
+]
+
+UNION_EXAMPLES = [
+ ExampleSchema('["string", "null", "long"]', True),
+ ExampleSchema('["null", "null"]', False),
+ ExampleSchema("""\
+ [{"type": "array", "items": "long"}
+ {"type": "array", "items": "string"}]
+ """, False),
+]
+
+RECORD_EXAMPLES = [
+ ExampleSchema("""\
+ {"type": "record",
+ "name": "Test",
+ "fields": [{"name": "f",
+ "type": "long"}]}
+ """, True),
+ ExampleSchema("""\
+ {"type": "error",
+ "name": "Test",
+ "fields": [{"name": "f",
+ "type": "long"}]}
+ """, True),
+ ExampleSchema("""\
+ {"type": "record",
+ "name": "Node",
+ "fields": [{"name": "label", "type": "string"},
+ {"name": "children",
+ "type": {"type": "array", "items": "Node"}}]}
+ """, True),
+ ExampleSchema("""\
+ {"type": "record",
+ "name": "Lisp",
+ "fields": [{"name": "value",
+ "type": ["null", "string",
+ {"type": "record",
+ "name": "Cons",
+ "fields": [{"name": "car", "type": "Lisp"},
+ {"name": "cdr", "type": "Lisp"}]}]}]}
+ """, True),
+ ExampleSchema("""\
+ {"type": "record",
+ "name": "HandshakeRequest",
+ "namespace": "org.apache.avro.ipc",
+ "fields": [{"name": "clientHash",
+ "type": {"type": "fixed", "name": "MD5", "size": 16}},
+ {"name": "clientProtocol", "type": ["null", "string"]},
+ {"name": "serverHash", "type": "MD5"},
+ {"name": "meta",
+ "type": ["null", {"type": "map", "values": "bytes"}]}]}
+ """, True),
+ ExampleSchema("""\
+ {"type": "record",
+ "name": "HandshakeResponse",
+ "namespace": "org.apache.avro.ipc",
+ "fields": [{"name": "match",
+ "type": {"type": "enum",
+ "name": "HandshakeMatch",
+ "symbols": ["BOTH", "CLIENT", "NONE"]}},
+ {"name": "serverProtocol", "type": ["null", "string"]},
+ {"name": "serverHash",
+ "type": ["null",
+ {"name": "MD5", "size": 16, "type": "fixed"}]},
+ {"name": "meta",
+ "type": ["null", {"type": "map", "values": "bytes"}]}]}
+ """, True),
+ ExampleSchema("""\
+ {"type": "record",
+ "name": "Interop",
+ "namespace": "org.apache.avro",
+ "fields": [{"name": "intField", "type": "int"},
+ {"name": "longField", "type": "long"},
+ {"name": "stringField", "type": "string"},
+ {"name": "boolField", "type": "boolean"},
+ {"name": "floatField", "type": "float"},
+ {"name": "doubleField", "type": "double"},
+ {"name": "bytesField", "type": "bytes"},
+ {"name": "nullField", "type": "null"},
+ {"name": "arrayField",
+ "type": {"type": "array", "items": "double"}},
+ {"name": "mapField",
+ "type": {"type": "map",
+ "values": {"name": "Foo",
+ "type": "record",
+ "fields": [{"name": "label",
+ "type": "string"}]}}},
+ {"name": "unionField",
+ "type": ["boolean",
+ "double",
+ {"type": "array", "items": "bytes"}]},
+ {"name": "enumField",
+ "type": {"type": "enum",
+ "name": "Kind",
+ "symbols": ["A", "B", "C"]}},
+ {"name": "fixedField",
+ "type": {"type": "fixed", "name": "MD5", "size": 16}},
+ {"name": "recordField",
+ "type": {"type": "record",
+ "name": "Node",
+ "fields": [{"name": "label", "type": "string"},
+ {"name": "children",
+ "type": {"type": "array",
+ "items": "Node"}}]}}]}
+ """, True),
+ ExampleSchema("""\
+ {"type": "record",
+ "name": "Address",
+ "fields": [{"type": "string"},
+ {"type": "string", "name": "City"}]}
+ """, False),
+ ExampleSchema("""\
+ {"type": "record",
+ "name": "Event",
+ "fields": [{"name": "Sponsor"},
+ {"name": "City", "type": "string"}]}
+ """, False),
+ ExampleSchema("""\
+ {"type": "record",
+ "fields": "His vision, from the constantly passing bars,"
+ "name", "Rainer"}
+ """, False),
+ ExampleSchema("""\
+ {"name": ["Tom", "Jerry"],
+ "type": "record",
+ "fields": [{"name": "name", "type": "string"}]}
+ """, False),
+]
+
+EXAMPLES = PRIMITIVE_EXAMPLES
+EXAMPLES += FIXED_EXAMPLES
+EXAMPLES += ENUM_EXAMPLES
+EXAMPLES += ARRAY_EXAMPLES
+EXAMPLES += MAP_EXAMPLES
+EXAMPLES += UNION_EXAMPLES
+EXAMPLES += RECORD_EXAMPLES
+
+VALID_EXAMPLES = [e for e in EXAMPLES if e.valid]
+
+# TODO(hammer): refactor into harness for examples
+# TODO(hammer): pretty-print detailed output
+# TODO(hammer): make verbose flag
+# TODO(hammer): show strack trace to user
+# TODO(hammer): use logging module?
+class TestSchema(unittest.TestCase):
+ def test_parse(self):
+ debug_msg = "\nTEST PARSE\n"
+ print debug_msg
+
+ num_correct = 0
+ for example in EXAMPLES:
+ try:
+ schema.parse(example.schema_string)
+ if example.valid: num_correct += 1
+ debug_msg = "%s: PARSE SUCCESS" % example.name
+ except:
+ if not example.valid: num_correct += 1
+ debug_msg = "%s: PARSE FAILURE" % example.name
+ finally:
+ print debug_msg
+
+ fail_msg = "Parse behavior correct on %d out of %d schemas." % \
+ (num_correct, len(EXAMPLES))
+ self.assertEqual(num_correct, len(EXAMPLES), fail_msg)
+
+ def test_valid_cast_to_string_after_parse(self):
+ """
+ Test that the string generated by an Avro Schema object
+ is, in fact, a valid Avro schema.
+ """
+ debug_msg = "\nTEST CAST TO STRING\n"
+ print debug_msg
+
+ num_correct = 0
+ for example in VALID_EXAMPLES:
+ schema_data = schema.parse(example.schema_string)
+ try:
+ schema.parse(str(schema_data))
+ debug_msg = "%s: STRING CAST SUCCESS" % example.name
+ num_correct += 1
+ except:
+ debug_msg = "%s: STRING CAST FAILURE" % example.name
+ finally:
+ print debug_msg
+
+ fail_msg = "Cast to string success on %d out of %d schemas" % \
+ (num_correct, len(VALID_EXAMPLES))
+ self.assertEqual(num_correct, len(VALID_EXAMPLES), fail_msg)
+
+ def test_equivalence_after_round_trip(self):
+ """
+ 1. Given a string, parse it to get Avro schema "original".
+ 2. Serialize "original" to a string and parse that string
+ to generate Avro schema "round trip".
+ 3. Ensure "original" and "round trip" schemas are equivalent.
+ """
+ debug_msg = "\nTEST ROUND TRIP\n"
+ print debug_msg
+
+ num_correct = 0
+ for example in VALID_EXAMPLES:
+ try:
+ original_schema = schema.parse(example.schema_string)
+ round_trip_schema = schema.parse(str(original_schema))
+
+ if original_schema == round_trip_schema:
+ num_correct += 1
+ debug_msg = "%s: ROUND TRIP SUCCESS" % example.name
+ else:
+ debug_msg = "%s: ROUND TRIP FAILURE" % example.name
+ except:
+ debug_msg = "%s: ROUND TRIP FAILURE" % example.name
+ finally:
+ print debug_msg
+
+ fail_msg = "Round trip success on %d out of %d schemas" % \
+ (num_correct, len(VALID_EXAMPLES))
+ self.assertEqual(num_correct, len(VALID_EXAMPLES), fail_msg)
+
+ # TODO(hammer): more tests
+ def test_fullname(self):
+ """Test process for making full names from name, namespace pairs."""
+ debug_msg = '\nTEST FULL NAME\n'
+ print debug_msg
+
+ fullname = schema.Name.make_fullname('a', 'o.a.h')
+ self.assertEqual(fullname, 'o.a.h.a')
+
+if __name__ == '__main__':
+ unittest.main()