You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@avro.apache.org by jm...@apache.org on 2010/04/03 04:13:06 UTC
svn commit: r930458 - in /hadoop/avro/trunk/lang/ruby: lib/avro/io.rb
test/test_datafile.rb
Author: jmhodges
Date: Sat Apr 3 02:13:06 2010
New Revision: 930458
URL: http://svn.apache.org/viewvc?rev=930458&view=rev
Log:
AVRO-461. Skipping primitives in the ruby side
Added:
hadoop/avro/trunk/lang/ruby/test/test_datafile.rb
Modified:
hadoop/avro/trunk/lang/ruby/lib/avro/io.rb
Modified: hadoop/avro/trunk/lang/ruby/lib/avro/io.rb
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/ruby/lib/avro/io.rb?rev=930458&r1=930457&r2=930458&view=diff
==============================================================================
--- hadoop/avro/trunk/lang/ruby/lib/avro/io.rb (original)
+++ hadoop/avro/trunk/lang/ruby/lib/avro/io.rb Sat Apr 3 02:13:06 2010
@@ -424,7 +424,6 @@ module Avro
if readers_fields_hash.size > read_record.size
writers_fields_hash = writers_schema.fields_hash
readers_fields_hash.each do |field_name, field|
-
unless writers_fields_hash.has_key? field_name
if !field.default.nil?
field_val = read_default_value(field.type, field.default)
@@ -482,6 +481,41 @@ module Avro
raise AvroError(fail_msg)
end
end
+
+ def skip_data(writers_schema, decoder)
+ case writers_schema.type
+ when 'null'
+ decoder.skip_null
+ when 'boolean'
+ decoder.skip_boolean
+ when 'string'
+ decoder.skip_string
+ when 'int'
+ decoder.skip_int
+ when 'long'
+ decoder.skip_long
+ when 'float'
+ decoder.skip_float
+ when 'double'
+ decoder.skip_double
+ when 'bytes'
+ decoder.skip_bytes
+ when 'fixed'
+ skip_fixed(writers_schema, decoder)
+ when 'enum'
+ skip_enum(writers_schema, decoder)
+ when 'array'
+ skip_array(writers_schema, decoder)
+ when 'map'
+ skip_map(writers_schema, decoder)
+ when 'union'
+ skip_union(writers_schema, decoder)
+ when 'record', 'error', 'request'
+ skip_record(writers_schema, decoder)
+ else
+ raise AvroError, "Unknown schema type: #{schm.type}"
+ end
+ end
end # DatumReader
# DatumWriter for generic ruby objects
Added: hadoop/avro/trunk/lang/ruby/test/test_datafile.rb
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/ruby/test/test_datafile.rb?rev=930458&view=auto
==============================================================================
--- hadoop/avro/trunk/lang/ruby/test/test_datafile.rb (added)
+++ hadoop/avro/trunk/lang/ruby/test/test_datafile.rb Sat Apr 3 02:13:06 2010
@@ -0,0 +1,59 @@
+require 'test_help'
+
+class TestDataFile < Test::Unit::TestCase
+ HERE = File.expand_path File.dirname(__FILE__)
+ def setup
+ if File.exists?(HERE + '/data.avr')
+ File.unlink(HERE + '/data.avr')
+ end
+ end
+
+ def teardown
+ if File.exists?(HERE + '/data.avr')
+ File.unlink(HERE + '/data.avr')
+ end
+ end
+
+ def test_differing_schemas
+ writer_schema = <<-JSON
+{ "type": "record",
+ "name": "User",
+ "fields" : [
+ {"name": "username", "type": "string"},
+ {"name": "age", "type": "int"},
+ {"name": "verified", "type": "boolean", "default": "false"}
+ ]}
+JSON
+
+ data = [{"username" => "john", "age" => 25, "verified" => true},
+ {"username" => "ryan", "age" => 23, "verified" => false}]
+
+ file = File.open('data.avr', 'wb')
+ schema = Avro::Schema.parse(writer_schema)
+ writer = Avro::IO::DatumWriter.new(schema)
+ dw = Avro::DataFile::Writer.new(file, writer, schema)
+ data.each{|h| dw << h }
+ dw.close
+
+ file = File.open('data.avr', 'r+')
+ dr = Avro::DataFile::Reader.new(file, Avro::IO::DatumReader.new)
+
+ # extract the username only from the avro serialized file
+ reader_schema = <<-JSON
+{ "type": "record",
+ "name": "User",
+ "fields" : [
+ {"name": "username", "type": "string"}
+ ]}
+JSON
+
+ reader = Avro::IO::DatumReader.new(nil, Avro::Schema.parse(reader_schema))
+ dr = Avro::DataFile::Reader.new(file, reader)
+ run = false
+ dr.each_with_index do |record, i|
+ run = true
+ assert_equal data[i]['username'], record['username']
+ end
+ assert run, "enumerable is run through at least once"
+ end
+end