You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@avro.apache.org by bu...@apache.org on 2015/09/14 02:07:08 UTC
svn commit: r1702840 - in /avro/branches/branch-1.8/lang/ruby: lib/avro.rb
lib/avro/schema.rb lib/avro/schema_normalization.rb test/case_finder.rb
test/test_fingerprints.rb test/test_schema_normalization.rb
Author: busbey
Date: Mon Sep 14 00:07:07 2015
New Revision: 1702840
URL: http://svn.apache.org/r1702840
Log:
AVRO-1694. Ruby: Schema normaliation and fingerprints. Contributed by Daniel Schierbeck.
* Avro::SchemaNormalization.to_parsing_form converts a schema to Parsing
Canonical Form
* support for MD5 and SHA256 fingerprints
This closes #40
Added:
avro/branches/branch-1.8/lang/ruby/lib/avro/schema_normalization.rb
avro/branches/branch-1.8/lang/ruby/test/case_finder.rb
avro/branches/branch-1.8/lang/ruby/test/test_fingerprints.rb
avro/branches/branch-1.8/lang/ruby/test/test_schema_normalization.rb
Modified:
avro/branches/branch-1.8/lang/ruby/lib/avro.rb
avro/branches/branch-1.8/lang/ruby/lib/avro/schema.rb
Modified: avro/branches/branch-1.8/lang/ruby/lib/avro.rb
URL: http://svn.apache.org/viewvc/avro/branches/branch-1.8/lang/ruby/lib/avro.rb?rev=1702840&r1=1702839&r2=1702840&view=diff
==============================================================================
--- avro/branches/branch-1.8/lang/ruby/lib/avro.rb (original)
+++ avro/branches/branch-1.8/lang/ruby/lib/avro.rb Mon Sep 14 00:07:07 2015
@@ -39,3 +39,4 @@ require 'avro/io'
require 'avro/data_file'
require 'avro/protocol'
require 'avro/ipc'
+require 'avro/schema_normalization'
Modified: avro/branches/branch-1.8/lang/ruby/lib/avro/schema.rb
URL: http://svn.apache.org/viewvc/avro/branches/branch-1.8/lang/ruby/lib/avro/schema.rb?rev=1702840&r1=1702839&r2=1702840&view=diff
==============================================================================
--- avro/branches/branch-1.8/lang/ruby/lib/avro/schema.rb (original)
+++ avro/branches/branch-1.8/lang/ruby/lib/avro/schema.rb Mon Sep 14 00:07:07 2015
@@ -137,6 +137,18 @@ module Avro
# Deprecated in favor of {#type_sym}.
def type; @type_sym.to_s; end
+ # Returns the MD5 fingerprint of the schema as an Integer.
+ def md5_fingerprint
+ parsing_form = SchemaNormalization.to_parsing_form(self)
+ Digest::MD5.hexdigest(parsing_form).to_i(16)
+ end
+
+ # Returns the SHA-256 fingerprint of the schema as an Integer.
+ def sha256_fingerprint
+ parsing_form = SchemaNormalization.to_parsing_form(self)
+ Digest::SHA256.hexdigest(parsing_form).to_i(16)
+ end
+
def ==(other, seen=nil)
other.is_a?(Schema) && type_sym == other.type_sym
end
Added: avro/branches/branch-1.8/lang/ruby/lib/avro/schema_normalization.rb
URL: http://svn.apache.org/viewvc/avro/branches/branch-1.8/lang/ruby/lib/avro/schema_normalization.rb?rev=1702840&view=auto
==============================================================================
--- avro/branches/branch-1.8/lang/ruby/lib/avro/schema_normalization.rb (added)
+++ avro/branches/branch-1.8/lang/ruby/lib/avro/schema_normalization.rb Mon Sep 14 00:07:07 2015
@@ -0,0 +1,83 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+module Avro
+ class SchemaNormalization
+ def self.to_parsing_form(schema)
+ new.to_parsing_form(schema)
+ end
+
+ def initialize
+ @processed_names = []
+ end
+
+ def to_parsing_form(schema)
+ JSON.dump(normalize_schema(schema))
+ end
+
+ private
+
+ def normalize_schema(schema)
+ type = schema.type_sym.to_s
+
+ if Schema::NAMED_TYPES.include?(type)
+ if @processed_names.include?(schema.name)
+ return schema.name
+ else
+ @processed_names << schema.name
+ end
+ end
+
+ case type
+ when *Schema::PRIMITIVE_TYPES
+ type
+ when "record"
+ fields = schema.fields.map {|field| normalize_field(field) }
+
+ normalize_named_type(schema, fields: fields)
+ when "enum"
+ normalize_named_type(schema, symbols: schema.symbols)
+ when "fixed"
+ normalize_named_type(schema, size: schema.size)
+ when "array"
+ { type: type, items: normalize_schema(schema.items) }
+ when "map"
+ { type: type, values: normalize_schema(schema.values) }
+ when "union"
+ if schema.schemas.nil?
+ []
+ else
+ schema.schemas.map {|s| normalize_schema(s) }
+ end
+ else
+ raise "unknown type #{type}"
+ end
+ end
+
+ def normalize_field(field)
+ {
+ name: field.name,
+ type: normalize_schema(field.type)
+ }
+ end
+
+ def normalize_named_type(schema, attributes = {})
+ name = Name.make_fullname(schema.name, schema.namespace)
+
+ { name: name, type: schema.type_sym.to_s }.merge(attributes)
+ end
+ end
+end
Added: avro/branches/branch-1.8/lang/ruby/test/case_finder.rb
URL: http://svn.apache.org/viewvc/avro/branches/branch-1.8/lang/ruby/test/case_finder.rb?rev=1702840&view=auto
==============================================================================
--- avro/branches/branch-1.8/lang/ruby/test/case_finder.rb (added)
+++ avro/branches/branch-1.8/lang/ruby/test/case_finder.rb Mon Sep 14 00:07:07 2015
@@ -0,0 +1,67 @@
+class CaseFinder
+ PATH = File.expand_path("../../../../share/test/data/schema-tests.txt", __FILE__)
+
+ Case = Struct.new(:id, :input, :canonical, :fingerprint)
+
+ def self.cases
+ new.cases
+ end
+
+ def initialize
+ @scanner = StringScanner.new(File.read(PATH))
+ @cases = []
+ end
+
+ def cases
+ until @scanner.eos?
+ test_case = scan_case
+ @cases << test_case if test_case
+ end
+
+ @cases
+ end
+
+ private
+
+ def scan_case
+ if id = @scanner.scan(/\/\/ \d+\n/)
+ while @scanner.skip(/\/\/ .*\n/); end
+
+ input = scan_input
+ canonical = scan_canonical
+ fingerprint = scan_fingerprint
+
+ Case.new(id, input, canonical, fingerprint)
+ else
+ @scanner.skip(/.*\n/)
+ nil
+ end
+ end
+
+ def scan_item(name)
+ if @scanner.scan(/<<#{name}\n/)
+ lines = []
+ while line = @scanner.scan(/.+\n/)
+ break if line.chomp == name
+ lines << line
+ end
+ lines.join
+ elsif @scanner.scan(/<<#{name} /)
+ input = @scanner.scan(/.+$/)
+ @scanner.skip(/\n/)
+ input
+ end
+ end
+
+ def scan_input
+ scan_item("INPUT")
+ end
+
+ def scan_canonical
+ scan_item("canonical")
+ end
+
+ def scan_fingerprint
+ scan_item("fingerprint")
+ end
+end
Added: avro/branches/branch-1.8/lang/ruby/test/test_fingerprints.rb
URL: http://svn.apache.org/viewvc/avro/branches/branch-1.8/lang/ruby/test/test_fingerprints.rb?rev=1702840&view=auto
==============================================================================
--- avro/branches/branch-1.8/lang/ruby/test/test_fingerprints.rb (added)
+++ avro/branches/branch-1.8/lang/ruby/test/test_fingerprints.rb Mon Sep 14 00:07:07 2015
@@ -0,0 +1,37 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+require 'test_help'
+
+class TestFingerprints < Test::Unit::TestCase
+ def test_md5_fingerprint
+ schema = Avro::Schema.parse <<-SCHEMA
+ { "type": "int" }
+ SCHEMA
+
+ assert_equal 318112854175969537208795771590915775282,
+ schema.md5_fingerprint
+ end
+
+ def test_sha256_fingerprint
+ schema = Avro::Schema.parse <<-SCHEMA
+ { "type": "int" }
+ SCHEMA
+
+ assert_equal 28572620203319713300323544804233350633246234624932075150020181448463213378117,
+ schema.sha256_fingerprint
+ end
+end
Added: avro/branches/branch-1.8/lang/ruby/test/test_schema_normalization.rb
URL: http://svn.apache.org/viewvc/avro/branches/branch-1.8/lang/ruby/test/test_schema_normalization.rb?rev=1702840&view=auto
==============================================================================
--- avro/branches/branch-1.8/lang/ruby/test/test_schema_normalization.rb (added)
+++ avro/branches/branch-1.8/lang/ruby/test/test_schema_normalization.rb Mon Sep 14 00:07:07 2015
@@ -0,0 +1,171 @@
+# -*- coding: utf-8 -*-
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+require 'test_help'
+require 'case_finder'
+
+class TestSchemaNormalization < Test::Unit::TestCase
+ def test_primitives
+ %w[null boolean string bytes int long float double].each do |type|
+ schema = Avro::Schema.parse(<<-JSON)
+ { "type": "#{type}" }
+ JSON
+
+ canonical_form = Avro::SchemaNormalization.to_parsing_form(schema)
+
+ assert_equal %("#{type}"), canonical_form
+ end
+ end
+
+ def test_records
+ schema = Avro::Schema.parse(<<-JSON)
+ {
+ "type": "record",
+ "name": "test",
+ "namespace": "random",
+ "doc": "some record",
+ "fields": [
+ { "name": "height", "type": "int", "doc": "the height" }
+ ]
+ }
+ JSON
+
+ expected_type = <<-JSON.strip
+ {"name":"random.test","type":"record","fields":[{"name":"height","type":"int"}]}
+ JSON
+
+ canonical_form = Avro::SchemaNormalization.to_parsing_form(schema)
+
+ assert_equal expected_type, canonical_form
+ end
+
+ def test_recursive_records
+ schema = Avro::Schema.parse(<<-JSON)
+ {
+ "type": "record",
+ "name": "item",
+ "fields": [
+ { "name": "next", "type": "item" }
+ ]
+ }
+ JSON
+
+ expected_type = <<-JSON.strip
+ {"name":"item","type":"record","fields":[{"name":"next","type":"item"}]}
+ JSON
+
+ canonical_form = Avro::SchemaNormalization.to_parsing_form(schema)
+
+ assert_equal expected_type, canonical_form
+ end
+
+ def test_enums
+ schema = Avro::Schema.parse(<<-JSON)
+ {
+ "type": "enum",
+ "name": "suit",
+ "namespace": "cards",
+ "doc": "the different suits of cards",
+ "symbols": ["club", "hearts", "diamond", "spades"]
+ }
+ JSON
+
+ expected_type = <<-JSON.strip
+ {"name":"cards.suit","type":"enum","symbols":["club","hearts","diamond","spades"]}
+ JSON
+
+ canonical_form = Avro::SchemaNormalization.to_parsing_form(schema)
+
+ assert_equal expected_type, canonical_form
+ end
+
+ def test_fixed
+ schema = Avro::Schema.parse(<<-JSON)
+ {
+ "type": "fixed",
+ "name": "id",
+ "namespace": "db",
+ "size": 64
+ }
+ JSON
+
+ expected_type = <<-JSON.strip
+ {"name":"db.id","type":"fixed","size":64}
+ JSON
+
+ canonical_form = Avro::SchemaNormalization.to_parsing_form(schema)
+
+ assert_equal expected_type, canonical_form
+ end
+
+ def test_arrays
+ schema = Avro::Schema.parse(<<-JSON)
+ {
+ "type": "array",
+ "doc": "the items",
+ "items": "int"
+ }
+ JSON
+
+ expected_type = <<-JSON.strip
+ {"type":"array","items":"int"}
+ JSON
+
+ canonical_form = Avro::SchemaNormalization.to_parsing_form(schema)
+
+ assert_equal expected_type, canonical_form
+ end
+
+ def test_maps
+ schema = Avro::Schema.parse(<<-JSON)
+ {
+ "type": "map",
+ "doc": "the items",
+ "values": "int"
+ }
+ JSON
+
+ expected_type = <<-JSON.strip
+ {"type":"map","values":"int"}
+ JSON
+
+ canonical_form = Avro::SchemaNormalization.to_parsing_form(schema)
+
+ assert_equal expected_type, canonical_form
+ end
+
+ def test_unions
+ schema = Avro::Schema.parse(<<-JSON)
+ ["int", "string"]
+ JSON
+
+ expected_type = <<-JSON.strip
+ ["int","string"]
+ JSON
+
+ canonical_form = Avro::SchemaNormalization.to_parsing_form(schema)
+
+ assert_equal expected_type, canonical_form
+ end
+
+ def test_shared_dataset
+ CaseFinder.cases.each do |test_case|
+ schema = Avro::Schema.parse(test_case.input)
+ assert_equal test_case.canonical, Avro::SchemaNormalization.to_parsing_form(schema)
+ end
+ end
+end