You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@avro.apache.org by fo...@apache.org on 2019/10/24 08:23:45 UTC

[avro] branch master updated: Add Single Object Encoding Support to Ruby library (#317)

This is an automated email from the ASF dual-hosted git repository.

fokko pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/avro.git


The following commit(s) were added to refs/heads/master by this push:
     new a36cdb3  Add Single Object Encoding Support to Ruby library (#317)
a36cdb3 is described below

commit a36cdb36b06541bcbb02345039390291d09e2080
Author: Jess Szmajda <je...@gmail.com>
AuthorDate: Thu Oct 24 04:23:38 2019 -0400

    Add Single Object Encoding Support to Ruby library (#317)
    
    * AVRO-1740 Add crc_64_avro_fingerprint method to Schema
    
    * AVRO-1740 Single Object Encoding header
    
    This commit adds the Single Object Encoding header as defined by the
    AVRO spec, but I've put it in what I imagine is _definitely_ the wrong
    place.
    
    Happy to move it given proper guidance!
---
 lang/ruby/lib/avro/schema.rb        | 43 +++++++++++++++++++++++++++++++++++++
 lang/ruby/test/test_fingerprints.rb | 19 ++++++++++++++++
 2 files changed, 62 insertions(+)

diff --git a/lang/ruby/lib/avro/schema.rb b/lang/ruby/lib/avro/schema.rb
index 75937b9..d57ea71 100644
--- a/lang/ruby/lib/avro/schema.rb
+++ b/lang/ruby/lib/avro/schema.rb
@@ -131,6 +131,49 @@ module Avro
       Digest::SHA256.hexdigest(parsing_form).to_i(16)
     end
 
+    CRC_EMPTY = 0xc15d213aa4d7a795
+
+    # The java library caches this value after initialized, so this pattern
+    # mimics that.
+    @@fp_table = nil
+    def initFPTable
+      @@fp_table = Array.new(256)
+      256.times do |i|
+        fp = i
+        8.times do |j|
+          fp = (fp >> 1) ^ ( CRC_EMPTY & -( fp & 1 ) )
+        end
+        @@fp_table[i] = fp
+      end
+    end
+
+    def crc_64_avro_fingerprint
+      parsing_form = Avro::SchemaNormalization.to_parsing_form(self)
+      data_bytes = parsing_form.unpack("C*")
+
+      initFPTable unless @@fp_table
+
+      fp = CRC_EMPTY
+      data_bytes.each do |b|
+        fp = (fp >> 8) ^ @@fp_table[ (fp ^ b) & 0xff ]
+      end
+      fp
+    end
+
+    SINGLE_OBJECT_MAGIC_NUMBER = [0xC3, 0x01]
+    def single_object_encoding_header
+      [SINGLE_OBJECT_MAGIC_NUMBER, single_object_schema_fingerprint].flatten
+    end
+    def single_object_schema_fingerprint
+      working = crc_64_avro_fingerprint
+      bytes = Array.new(8)
+      8.times do |i|
+        bytes[7 - i] = (working & 0xff)
+        working = working >> 8
+      end
+      bytes
+    end
+
     def read?(writers_schema)
       SchemaCompatibility.can_read?(writers_schema, self)
     end
diff --git a/lang/ruby/test/test_fingerprints.rb b/lang/ruby/test/test_fingerprints.rb
index 5343f7c..516a9a0 100644
--- a/lang/ruby/test/test_fingerprints.rb
+++ b/lang/ruby/test/test_fingerprints.rb
@@ -34,4 +34,23 @@ class TestFingerprints < Test::Unit::TestCase
     assert_equal 28572620203319713300323544804233350633246234624932075150020181448463213378117,
       schema.sha256_fingerprint
   end
+
+  def test_crc_64_avro_fingerprint
+    schema = Avro::Schema.parse <<-SCHEMA
+      { "type": "int" }
+    SCHEMA
+
+    assert_equal 8247732601305521295, # hex: 0x7275d51a3f395c8f
+      schema.crc_64_avro_fingerprint
+  end
+
+  # This definitely belongs somewhere else
+  def test_single_object_encoding_header
+    schema = Avro::Schema.parse <<-SCHEMA
+      { "type": "int" }
+    SCHEMA
+
+    assert_equal ["c3", "01", "72", "75", "d5", "1a", "3f", "39", "5c", "8f"].map{|e| e.to_i(16) },
+      schema.single_object_encoding_header
+  end
 end