You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@avro.apache.org by cu...@apache.org on 2010/02/09 20:36:43 UTC

svn commit: r908170 - in /hadoop/avro/trunk: CHANGES.txt build.sh lang/ruby/Rakefile lang/ruby/lib/avro/data_file.rb lang/ruby/test/random_data.rb lang/ruby/test/test_help.rb

Author: cutting
Date: Tue Feb  9 19:36:42 2010
New Revision: 908170

URL: http://svn.apache.org/viewvc?rev=908170&view=rev
Log:
AVRO-411. Add Ruby data file interop tests.  Contributed by Jeff Hodges.

Added:
    hadoop/avro/trunk/lang/ruby/test/random_data.rb
Modified:
    hadoop/avro/trunk/CHANGES.txt
    hadoop/avro/trunk/build.sh
    hadoop/avro/trunk/lang/ruby/Rakefile
    hadoop/avro/trunk/lang/ruby/lib/avro/data_file.rb
    hadoop/avro/trunk/lang/ruby/test/test_help.rb

Modified: hadoop/avro/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/CHANGES.txt?rev=908170&r1=908169&r2=908170&view=diff
==============================================================================
--- hadoop/avro/trunk/CHANGES.txt (original)
+++ hadoop/avro/trunk/CHANGES.txt Tue Feb  9 19:36:42 2010
@@ -344,6 +344,8 @@
 
     AVRO-383. Optimizing ResolvingDecoder for default values (thiru)
 
+    AVRO-411. Add Ruby data file interop tests. (Jeff Hodges via cutting) 
+
   BUG FIXES
  
     AVRO-176. Safeguard against bad istreams before reading. (sbanacho)

Modified: hadoop/avro/trunk/build.sh
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/build.sh?rev=908170&r1=908169&r2=908170&view=diff
==============================================================================
--- hadoop/avro/trunk/build.sh (original)
+++ hadoop/avro/trunk/build.sh Tue Feb  9 19:36:42 2010
@@ -44,20 +44,21 @@
 	(cd lang/py; ant test)
 	(cd lang/c; ./build.sh test)
 	(cd lang/c++; ./build.sh test)
-	# (cd lang/ruby; rake test)
+	(cd lang/ruby; rake test)
 
 	# create interop test data
 	(cd lang/java; ant interop-data-generate)
 	#(cd lang/py; ant interop-data-generate)
 	(cd lang/c; ./build.sh interop-data-generate)
 	#(cd lang/c++; make interop-data-generate)
+	(cd lang/ruby; rake generate_interop)
 
 	# run interop data tests
 	(cd lang/java; ant interop-data-test)
 	#(cd lang/py; ant interop-data-test)
 	(cd lang/c; ./build.sh interop-data-test)
-	#(cd lang/c; make interop-data-test)
 	#(cd lang/c++; make interop-data-test)
+	(cd lang/ruby; rake interop)
 
 	# run interop rpc tests
 	/bin/bash share/test/interop/bin/test_rpc_interop.sh
@@ -101,6 +102,9 @@
 	(cd lang/c; ./build.sh clean)
 
 	(cd lang/c++; ./build.sh clean)
+
+	(cd lang/ruby; rake clean)
+
 	;;
 
     *)

Modified: hadoop/avro/trunk/lang/ruby/Rakefile
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/ruby/Rakefile?rev=908170&r1=908169&r2=908170&view=diff
==============================================================================
--- hadoop/avro/trunk/lang/ruby/Rakefile (original)
+++ hadoop/avro/trunk/lang/ruby/Rakefile Tue Feb  9 19:36:42 2010
@@ -14,6 +14,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+require 'rubygems'
 require 'echoe'
 Echoe.new('avro') do |p|
   p.author = "Jeff Hodges"
@@ -22,3 +23,31 @@
   p.url = "http://hadoop.apache.org/avro/"
   p.runtime_dependencies = %w[rubygems yajl]
 end
+
+t = Rake::TestTask.new(:interop)
+t.pattern = 'interop/test*.rb'
+
+task :generate_interop do
+  $:.unshift(HERE + '/lib')
+  $:.unshift(HERE + '/test')
+  require 'avro'
+  require 'random_data'
+
+  schema = Avro::Schema.parse(File.read(SCHEMAS + '/interop.avsc'))
+  r = RandomData.new(schema, ENV['SEED'])
+  f = File.open(BUILD + '/interop/data/ruby.avro', 'w')
+  writer = Avro::DataFile::Writer.new(f, Avro::IO::DatumWriter.new(schema), schema)
+  begin
+    writer << r.next
+    writer << r.next
+  ensure
+    writer.close
+  end
+end
+
+
+HERE = File.expand_path(File.dirname(__FILE__))
+SHARE = HERE + '/../../share'
+SCHEMAS = SHARE + '/test/schemas'
+BUILD = HERE + '/../../build'
+

Modified: hadoop/avro/trunk/lang/ruby/lib/avro/data_file.rb
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/ruby/lib/avro/data_file.rb?rev=908170&r1=908169&r2=908170&view=diff
==============================================================================
--- hadoop/avro/trunk/lang/ruby/lib/avro/data_file.rb (original)
+++ hadoop/avro/trunk/lang/ruby/lib/avro/data_file.rb Tue Feb  9 19:36:42 2010
@@ -18,7 +18,7 @@
 
 module Avro
   module DataFile
-    VERSION = 0
+    VERSION = 1
     MAGIC = "Obj" + [VERSION].pack('c')
     MAGIC_SIZE = MAGIC.size
     SYNC_SIZE = 16
@@ -50,22 +50,22 @@
 
         if writers_schema
           @sync_marker = Writer.generate_sync_marker
-          meta['codec'] = 'null'
-          meta['schema'] = writers_schema.to_s
+          meta['avro.codec'] = 'null'
+          meta['avro.schema'] = writers_schema.to_s
           datum_writer.writers_schema = writers_schema
           write_header
         else
           # open writer for reading to collect metadata
-          dfr = DataFileReader.new(writer, Avro::IO::DatumReader.new)
+          dfr = Reader.new(writer, Avro::IO::DatumReader.new)
 
           # FIXME(jmhodges): collect arbitrary metadata
           # collect metadata
           @sync_marker = dfr.sync_marker
-          meta['codec'] = dfr.meta['codec']
+          meta['avro.codec'] = dfr.meta['avro.codec']
 
           # get schema used to write existing file
-          schema_from_file = dfr.meta['schema']
-          meta['schema'] = schema_from_file
+          schema_from_file = dfr.meta['avro.schema']
+          meta['avro.schema'] = schema_from_file
           datum_writer.writers_schema = Schema.parse(schema_from_file)
 
           # seek to the end of the file and prepare for writing
@@ -128,10 +128,10 @@
           encoder.write_long(to_write.size)
 
           # write block contents
-          if meta['codec'] == 'null'
+          if meta['avro.codec'] == 'null'
             writer.write(to_write)
           else
-            msg = "#{meta['codec'].inspect} coded is not supported"
+            msg = "#{meta['avro.codec'].inspect} coded is not supported"
             raise DataFileError, msg
           end
 
@@ -161,14 +161,14 @@
         read_header
 
         # ensure the codec is valid
-        codec_from_file = meta['codec']
+        codec_from_file = meta['avro.codec']
         if codec_from_file && ! VALID_CODECS.include?(codec_from_file)
           raise DataFileError, "Unknown codec: #{codec_from_file}"
         end
 
         # get ready to read
         @block_count = 0
-        datum_reader.writers_schema = Schema.parse meta['schema']
+        datum_reader.writers_schema = Schema.parse meta['avro.schema']
       end
 
       # Iterates through each datum in this file
@@ -209,7 +209,7 @@
           msg = 'Not an Avro data file: shorter than the Avro magic block'
           raise DataFileError, msg
         elsif magic_in_file != MAGIC
-          msg = "Not an Avro data file: #{magic_in_file} doesn't match #{MAGIC}"
+          msg = "Not an Avro data file: #{magic_in_file.inspect} doesn't match #{MAGIC.inspect}"
           raise DataFileError, msg
         end
 

Added: hadoop/avro/trunk/lang/ruby/test/random_data.rb
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/ruby/test/random_data.rb?rev=908170&view=auto
==============================================================================
--- hadoop/avro/trunk/lang/ruby/test/random_data.rb (added)
+++ hadoop/avro/trunk/lang/ruby/test/random_data.rb Tue Feb  9 19:36:42 2010
@@ -0,0 +1,73 @@
+class RandomData
+  def initialize(schm, seed=nil)
+    srand(seed) if seed
+    @seed = seed
+    @schm = schm
+  end
+
+  def next
+    nextdata(@schm)
+  end
+
+  def nextdata(schm, d=0)
+    case schm.type
+    when 'boolean'
+      rand > 0.5
+    when 'string'
+      randstr()
+    when 'int'
+      rand(Avro::Schema::INT_MAX_VALUE - Avro::Schema::INT_MIN_VALUE) + Avro::Schema::INT_MIN_VALUE
+    when 'long'
+      rand(Avro::Schema::LONG_MAX_VALUE - Avro::Schema::LONG_MIN_VALUE) + Avro::Schema::LONG_MIN_VALUE
+    when 'float'
+      (-1024 + 2048 * rand).round.to_f
+    when 'double'
+      Avro::Schema::LONG_MIN_VALUE + (Avro::Schema::LONG_MAX_VALUE - Avro::Schema::LONG_MIN_VALUE) * rand
+    when 'bytes'
+      randstr(BYTEPOOL)
+    when 'null'
+      nil
+    when 'array'
+      arr = []
+      len = rand(5) + 2 - d
+      len = 0 if len < 0
+      len.times{ arr << nextdata(schm.items, d+1) }
+      arr
+    when 'map'
+      map = {}
+      len = rand(5) + 2 - d
+      len = 0 if len < 0
+      len.times do
+        map[nextdata(Avro::Schema::PrimitiveSchema.new('string'))] = nextdata(schm.values, d+1)
+      end
+      map
+    when 'record'
+      m = {}
+      schm.fields.each do |field|
+        m[field.name] = nextdata(field.type, d+1)
+      end
+      m
+    when 'union'
+      types = schm.schemas
+      nextdata(types[rand(types.size)], d)
+    when 'enum'
+      symbols = schm.symbols
+      len = symbols.size
+      return nil if len == 0
+      symbols[rand(len)]
+    when 'fixed'
+      f = ""
+      schm.size.times { f << BYTEPOOL[rand(BYTEPOOL.size), 1] }
+      f
+    end
+  end
+
+  CHARPOOL = 'abcdefghjkmnpqrstuvwxyzABCDEFGHJKLMNPQRSTUVWXYZ23456789'
+  BYTEPOOL = '12345abcd'
+
+  def randstr(chars=CHARPOOL, length=20)
+    str = ''
+    rand(length+1).times { str << chars[rand(chars.size)] }
+    str
+  end
+end

Modified: hadoop/avro/trunk/lang/ruby/test/test_help.rb
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/ruby/test/test_help.rb?rev=908170&r1=908169&r2=908170&view=diff
==============================================================================
--- hadoop/avro/trunk/lang/ruby/test/test_help.rb (original)
+++ hadoop/avro/trunk/lang/ruby/test/test_help.rb Tue Feb  9 19:36:42 2010
@@ -16,80 +16,8 @@
 
 require 'rubygems'
 require 'test/unit'
-require 'avro'
 require 'stringio'
-
 require 'fileutils'
 FileUtils.mkdir_p('tmp')
-
-class RandomData
-  def initialize(schm, seed=nil)
-    srand(seed) if seed
-    @seed = seed
-    @schm = schm
-  end
-
-  def next
-    nextdata(@schm)
-  end
-
-  def nextdata(schm, d=0)
-    case schm.type
-    when 'boolean'
-      rand > 0.5
-    when 'string'
-      randstr()
-    when 'int'
-      rand(Avro::Schema::INT_MAX_VALUE - Avro::Schema::INT_MIN_VALUE) + Avro::Schema::INT_MIN_VALUE
-    when 'long'
-      rand(Avro::Schema::LONG_MAX_VALUE - Avro::Schema::LONG_MIN_VALUE) + Avro::Schema::LONG_MIN_VALUE
-    when 'float'
-      (-1024 + 2048 * rand).round.to_f
-    when 'double'
-      Avro::Schema::LONG_MIN_VALUE + (Avro::Schema::LONG_MAX_VALUE - Avro::Schema::LONG_MIN_VALUE) * rand
-    when 'bytes'
-      randstr(BYTEPOOL)
-    when 'null'
-      nil
-    when 'array'
-      arr = []
-      len = rand(5) + 2 - d
-      len = 0 if len < 0
-      len.times{ arr << nextdata(schm.items, d+1) }
-      arr
-    when 'map'
-      map = {}
-      len = rand(5) + 2 - d
-      len = 0 if len < 0
-      len.times do
-        map[nextdata(Avro::Schema::PrimitiveSchema.new('string'))] = nextdata(schm.values, d+1)
-      end
-      map
-    when 'record'
-      m = {}
-      schm.fields.each do |field|
-        m[field.name] = nextdata(field.type, d+1)
-      end
-      m
-    when 'union'
-      types = schm.schemas
-      nextdata(types[rand(types.size)], d)
-    when 'enum'
-      symbols = schm.symbols
-      len = symbols.size
-      return nil if len == 0
-      symbols[rand(len)]
-    when 'fixed'
-      BYTEPOOL[rand(BYTEPOOL.size), 1]
-    end
-  end
-
-  CHARPOOL = 'abcdefghjkmnpqrstuvwxyzABCDEFGHJKLMNPQRSTUVWXYZ23456789'
-  BYTEPOOL = '12345abcd'
-
-  def randstr(chars=CHARPOOL, length=20)
-    str = ''
-    rand(length+1).times { str << chars[rand(chars.size)] }
-    str
-  end
-end
+require 'avro'
+require 'random_data'