You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@avro.apache.org by cu...@apache.org on 2010/01/20 02:05:07 UTC
svn commit: r901024 [2/2] - in /hadoop/avro/trunk: ./ lang/ruby/
lang/ruby/lib/ lang/ruby/lib/avro/ lang/ruby/test/ share/
Added: hadoop/avro/trunk/lang/ruby/test/test_io.rb
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/ruby/test/test_io.rb?rev=901024&view=auto
==============================================================================
--- hadoop/avro/trunk/lang/ruby/test/test_io.rb (added)
+++ hadoop/avro/trunk/lang/ruby/test/test_io.rb Wed Jan 20 01:05:06 2010
@@ -0,0 +1,361 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+require 'test_help'
+
+class TestIO < Test::Unit::TestCase
+ DATAFILE = 'tmp/test.rb.avro'
+ Schema = Avro::Schema
+
+ def test_null
+ check_default('"null"', "null", nil)
+ end
+
+ def test_boolean
+ check_default('"boolean"', "true", true)
+ check_default('"boolean"', "false", false)
+ end
+
+ def test_string
+ check_default('"string"', '"foo"', "foo")
+ end
+
+ def test_bytes
+ check_default('"bytes"', '"foo"', "foo")
+ end
+
+ def test_int
+ check_default('"int"', "5", 5)
+ end
+
+ def test_long
+ check_default('"long"', "9", 9)
+ end
+
+ def test_float
+ check_default('"float"', "1.2", 1.2)
+ end
+
+ def test_double
+ check_default('"double"', "1.2", 1.2)
+ end
+
+ def test_array
+ array_schema = '{"type": "array", "items": "long"}'
+ check_default(array_schema, "[1]", [1])
+ end
+
+ def test_map
+ map_schema = '{"type": "map", "values": "long"}'
+ check_default(map_schema, '{"a": 1}', {"a" => 1})
+ end
+
+ def test_record
+ record_schema = <<EOS
+ {"type": "record",
+ "name": "Test",
+ "fields": [{"name": "f",
+ "type": "long"}]}
+EOS
+ check_default(record_schema, '{"f": 11}', {"f" => 11})
+ end
+
+ def test_enum
+ enum_schema = '{"type": "enum", "name": "Test","symbols": ["A", "B"]}'
+ check_default(enum_schema, '"B"', "B")
+ end
+
+ def test_recursive
+ recursive_schema = <<EOS
+ {"type": "record",
+ "name": "Node",
+ "fields": [{"name": "label", "type": "string"},
+ {"name": "children",
+ "type": {"type": "array", "items": "Node"}}]}
+EOS
+ check(recursive_schema)
+ end
+
+ def test_union
+ union_schema = <<EOS
+ ["string",
+ "null",
+ "long",
+ {"type": "record",
+ "name": "Cons",
+ "fields": [{"name": "car", "type": "string"},
+ {"name": "cdr", "type": "string"}]}]
+EOS
+ check(union_schema)
+ check_default('["double", "long"]', "1.1", 1.1)
+ end
+
+ def test_lisp
+ lisp_schema = <<EOS
+ {"type": "record",
+ "name": "Lisp",
+ "fields": [{"name": "value",
+ "type": ["null", "string",
+ {"type": "record",
+ "name": "Cons",
+ "fields": [{"name": "car", "type": "Lisp"},
+ {"name": "cdr", "type": "Lisp"}]}]}]}
+EOS
+ check(lisp_schema)
+ end
+
+ def test_fixed
+ fixed_schema = '{"type": "fixed", "name": "Test", "size": 1}'
+ check_default(fixed_schema, '"a"', "a")
+ end
+
+ def test_enum_with_duplicate
+ str = '{"type": "enum", "name": "Test","symbols" : ["AA", "AA"]}'
+ assert_raises(Avro::SchemaParseError) do
+ schema = Avro::Schema.parse str
+ end
+ end
+
+ BINARY_INT_ENCODINGS = [
+ [0, '00'],
+ [-1, '01'],
+ [1, '02'],
+ [-2, '03'],
+ [2, '04'],
+ [-64, '7f'],
+ [64, '80 01'],
+ [8192, '80 80 01'],
+ [-8193, '81 80 01'],
+ ]
+
+ def avro_hexlify(reader)
+ bytes = []
+ current_byte = reader.read(1)
+ bytes << hexlify(current_byte)
+ while (current_byte[0] & 0x80) != 0
+ current_byte = reader.read(1)
+ bytes << hexlify(current_byte)
+ end
+ bytes.join ' '
+ end
+
+ def hexlify(msg)
+ msg.split("").collect { |c| c[0].to_s(16).rjust(2, '0') }.join
+ end
+
+ def test_binary_int_encoding
+ for value, hex_encoding in BINARY_INT_ENCODINGS
+ # write datum in binary to string buffer
+ buffer = StringIO.new
+ encoder = Avro::IO::BinaryEncoder.new(buffer)
+ datum_writer = Avro::IO::DatumWriter.new(Avro::Schema.parse('"int"'))
+ datum_writer.write(value, encoder)
+
+ buffer.seek(0)
+ hex_val = avro_hexlify(buffer)
+
+ assert_equal hex_encoding, hex_val
+ end
+ end
+
+ def test_binary_long_encoding
+ for value, hex_encoding in BINARY_INT_ENCODINGS
+ buffer = StringIO.new
+ encoder = Avro::IO::BinaryEncoder.new(buffer)
+ datum_writer = Avro::IO::DatumWriter.new(Avro::Schema.parse('"long"'))
+ datum_writer.write(value, encoder)
+
+ # read it out of the buffer and hexlify it
+ buffer.seek(0)
+ hex_val = avro_hexlify(buffer)
+
+ assert_equal hex_encoding, hex_val
+ end
+ end
+
+ def test_skip_long
+ for value_to_skip, hex_encoding in BINARY_INT_ENCODINGS
+ value_to_read = 6253
+
+ # write some data in binary to string buffer
+ writer = StringIO.new
+ encoder = Avro::IO::BinaryEncoder.new(writer)
+ datum_writer = Avro::IO::DatumWriter.new(Avro::Schema.parse('"long"'))
+ datum_writer.write(value_to_skip, encoder)
+ datum_writer.write(value_to_read, encoder)
+
+ # skip the value
+ reader = StringIO.new(writer.string())
+ decoder = Avro::IO::BinaryDecoder.new(reader)
+ decoder.skip_long()
+
+ # read data from string buffer
+ datum_reader = Avro::IO::DatumReader.new(Avro::Schema.parse('"long"'))
+ read_value = datum_reader.read(decoder)
+
+ # check it
+ assert_equal value_to_read, read_value
+ end
+ end
+
+ def test_skip_int
+ for value_to_skip, hex_encoding in BINARY_INT_ENCODINGS
+ value_to_read = 6253
+
+ writer = StringIO.new
+ encoder = Avro::IO::BinaryEncoder.new(writer)
+ datum_writer = Avro::IO::DatumWriter.new(Avro::Schema.parse('"int"'))
+ datum_writer.write(value_to_skip, encoder)
+ datum_writer.write(value_to_read, encoder)
+
+ reader = StringIO.new(writer.string)
+ decoder = Avro::IO::BinaryDecoder.new(reader)
+ decoder.skip_int
+
+ datum_reader = Avro::IO::DatumReader.new(Avro::Schema.parse('"int"'))
+ read_value = datum_reader.read(decoder)
+
+ assert_equal value_to_read, read_value
+ end
+ end
+
+ def test_schema_promotion
+ promotable_schemas = ['"int"', '"long"', '"float"', '"double"']
+ incorrect = 0
+ promotable_schemas.each_with_index do |ws, i|
+ writers_schema = Avro::Schema.parse(ws)
+ datum_to_write = 219
+ for rs in promotable_schemas[(i + 1)..-1]
+ readers_schema = Avro::Schema.parse(rs)
+ writer, enc, dw = write_datum(datum_to_write, writers_schema)
+ datum_read = read_datum(writer, writers_schema, readers_schema)
+ if datum_read != datum_to_write
+ incorrect += 1
+ end
+ end
+ assert_equal(incorrect, 0)
+ end
+ end
+ private
+
+ def check_default(schema_json, default_json, default_value)
+ check(schema_json)
+ actual_schema = '{"type": "record", "name": "Foo", "fields": []}'
+ actual = Avro::Schema.parse(actual_schema)
+
+ expected_schema = <<EOS
+ {"type": "record",
+ "name": "Foo",
+ "fields": [{"name": "f", "type": #{schema_json}, "default": #{default_json}}]}
+EOS
+ expected = Avro::Schema.parse(expected_schema)
+
+ reader = Avro::IO::DatumReader.new(actual, expected)
+ record = reader.read(Avro::IO::BinaryDecoder.new(StringIO.new))
+ assert_equal default_value, record["f"]
+ end
+
+ def check(str)
+ # parse schema, then convert back to string
+ schema = Avro::Schema.parse str
+
+ parsed_string = schema.to_s
+
+ # test that the round-trip didn't mess up anything
+ # NB: I don't think we should do this. Why enforce ordering?
+ assert_equal(Yajl.load(str),
+ Yajl.load(parsed_string))
+
+ # test __eq__
+ assert_equal(schema, Avro::Schema.parse(str))
+
+ # test hashcode doesn't generate infinite recursion
+ schema.hash
+
+ # test serialization of random data
+ randomdata = RandomData.new(schema)
+ 9.times { checkser(schema, randomdata) }
+
+ # test writing of data to file
+ check_datafile(schema)
+ end
+
+ def checkser(schm, randomdata)
+ datum = randomdata.next
+ assert validate(schm, datum)
+ w = Avro::IO::DatumWriter.new(schm)
+ writer = StringIO.new "", "w"
+ w.write(datum, Avro::IO::BinaryEncoder.new(writer))
+ r = datum_reader(schm)
+ reader = StringIO.new(writer.string)
+ ob = r.read(Avro::IO::BinaryDecoder.new(reader))
+ assert_equal(datum, ob) # FIXME check on assertdata conditional
+ end
+
+ def check_datafile(schm)
+ seed = 0
+ count = 10
+ random_data = RandomData.new(schm, seed)
+
+
+ f = File.open(DATAFILE, 'wb')
+ dw = Avro::DataFile::Writer.new(f, datum_writer(schm), schm)
+ count.times{ dw << random_data.next }
+ dw.close
+
+ random_data = RandomData.new(schm, seed)
+
+
+ f = File.open(DATAFILE, 'r+')
+ dr = Avro::DataFile::Reader.new(f, datum_reader(schm))
+
+ last_index = nil
+ dr.each_with_index do |data, c|
+ last_index = c
+ # FIXME assertdata conditional
+ assert_equal(random_data.next, data)
+ end
+ dr.close
+ assert_equal count, last_index+1
+ end
+
+ def validate(schm, datum)
+ Avro::Schema.validate(schm, datum)
+ end
+
+ def datum_writer(schm)
+ Avro::IO::DatumWriter.new(schm)
+ end
+
+ def datum_reader(schm)
+ Avro::IO::DatumReader.new(schm)
+ end
+
+ def write_datum(datum, writers_schema)
+ writer = StringIO.new
+ encoder = Avro::IO::BinaryEncoder.new(writer)
+ datum_writer = Avro::IO::DatumWriter.new(writers_schema)
+ datum_writer.write(datum, encoder)
+ [writer, encoder, datum_writer]
+ end
+
+ def read_datum(buffer, writers_schema, readers_schema=nil)
+ reader = StringIO.new(buffer.string)
+ decoder = Avro::IO::BinaryDecoder.new(reader)
+ datum_reader = Avro::IO::DatumReader.new(writers_schema, readers_schema)
+ datum_reader.read(decoder)
+ end
+end
Added: hadoop/avro/trunk/lang/ruby/test/test_protocol.rb
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/ruby/test/test_protocol.rb?rev=901024&view=auto
==============================================================================
--- hadoop/avro/trunk/lang/ruby/test/test_protocol.rb (added)
+++ hadoop/avro/trunk/lang/ruby/test/test_protocol.rb Wed Jan 20 01:05:06 2010
@@ -0,0 +1,192 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+require 'test_help'
+
+class TestProtocol < Test::Unit::TestCase
+
+ class ExampleProtocol
+ attr_reader :protocol_string, :valid, :name
+ attr_accessor :comment
+ def initialize(protocol_string, name=nil, comment='')
+ @protocol_string = protocol_string
+ @name = name || protocol_string # default to schema_string for name
+ @comment = comment
+ end
+ end
+#
+# Example Protocols
+#
+
+EXAMPLES = [
+ ExampleProtocol.new(<<-EOS, true),
+{
+ "namespace": "com.acme",
+ "protocol": "HelloWorld",
+
+ "types": [
+ {"name": "Greeting", "type": "record", "fields": [
+ {"name": "message", "type": "string"}]},
+ {"name": "Curse", "type": "error", "fields": [
+ {"name": "message", "type": "string"}]}
+ ],
+
+ "messages": {
+ "hello": {
+ "request": [{"name": "greeting", "type": "Greeting" }],
+ "response": "Greeting",
+ "errors": ["Curse"]
+ }
+ }
+}
+EOS
+
+ ExampleProtocol.new(<<-EOS, true),
+{"namespace": "org.apache.avro.test",
+ "protocol": "Simple",
+
+ "types": [
+ {"name": "Kind", "type": "enum", "symbols": ["FOO","BAR","BAZ"]},
+
+ {"name": "MD5", "type": "fixed", "size": 16},
+
+ {"name": "TestRecord", "type": "record",
+ "fields": [
+ {"name": "name", "type": "string", "order": "ignore"},
+ {"name": "kind", "type": "Kind", "order": "descending"},
+ {"name": "hash", "type": "MD5"}
+ ]
+ },
+
+ {"name": "TestError", "type": "error", "fields": [
+ {"name": "message", "type": "string"}
+ ]
+ }
+
+ ],
+
+ "messages": {
+
+ "hello": {
+ "request": [{"name": "greeting", "type": "string"}],
+ "response": "string"
+ },
+
+ "echo": {
+ "request": [{"name": "record", "type": "TestRecord"}],
+ "response": "TestRecord"
+ },
+
+ "add": {
+ "request": [{"name": "arg1", "type": "int"}, {"name": "arg2", "type": "int"}],
+ "response": "int"
+ },
+
+ "echoBytes": {
+ "request": [{"name": "data", "type": "bytes"}],
+ "response": "bytes"
+ },
+
+ "error": {
+ "request": [],
+ "response": "null",
+ "errors": ["TestError"]
+ }
+ }
+
+}
+EOS
+ ExampleProtocol.new(<<-EOS, true),
+{"namespace": "org.apache.avro.test.namespace",
+ "protocol": "TestNamespace",
+
+ "types": [
+ {"name": "org.apache.avro.test.util.MD5", "type": "fixed", "size": 16},
+ {"name": "TestRecord", "type": "record",
+ "fields": [ {"name": "hash", "type": "org.apache.avro.test.util.MD5"} ]
+ },
+ {"name": "TestError", "namespace": "org.apache.avro.test.errors",
+ "type": "error", "fields": [ {"name": "message", "type": "string"} ]
+ }
+ ],
+
+ "messages": {
+ "echo": {
+ "request": [{"name": "record", "type": "TestRecord"}],
+ "response": "TestRecord"
+ },
+
+ "error": {
+ "request": [],
+ "response": "null",
+ "errors": ["org.apache.avro.test.errors.TestError"]
+ }
+
+ }
+
+}
+EOS
+ ExampleProtocol.new(<<-EOS, true)
+{"namespace": "org.apache.avro.test",
+ "protocol": "BulkData",
+
+ "types": [],
+
+ "messages": {
+
+ "read": {
+ "request": [],
+ "response": "bytes"
+ },
+
+ "write": {
+ "request": [ {"name": "data", "type": "bytes"} ],
+ "response": "null"
+ }
+
+ }
+
+}
+EOS
+]
+
+ Protocol = Avro::Protocol
+ def test_parse
+ EXAMPLES.each do |example|
+ assert_nothing_raised("should be valid: #{example.protocol_string}") {
+ Protocol.parse(example.protocol_string)
+ }
+ end
+ end
+
+ def test_valid_cast_to_string_after_parse
+ EXAMPLES.each do |example|
+ assert_nothing_raised("round tripped okay #{example.protocol_string}") {
+ foo = Protocol.parse(example.protocol_string).to_s
+ Protocol.parse(foo)
+ }
+ end
+ end
+
+ def test_equivalence_after_round_trip
+ EXAMPLES.each do |example|
+ original = Protocol.parse(example.protocol_string)
+ round_trip = Protocol.parse(original.to_s)
+
+ assert_equal original, round_trip
+ end
+ end
+end
Modified: hadoop/avro/trunk/share/rat-excludes.txt
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/share/rat-excludes.txt?rev=901024&r1=901023&r2=901024&view=diff
==============================================================================
--- hadoop/avro/trunk/share/rat-excludes.txt (original)
+++ hadoop/avro/trunk/share/rat-excludes.txt Wed Jan 20 01:05:06 2010
@@ -22,3 +22,4 @@
lang/c/json/**
lang/c/tests/**
lang/c/version.sh
+lang/ruby/Manifest