You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@avro.apache.org by fo...@apache.org on 2020/05/03 07:30:42 UTC
[avro] branch master updated: AVRO-2535: Add Ruby support for enum
defaults (#628)
This is an automated email from the ASF dual-hosted git repository.
fokko pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/avro.git
The following commit(s) were added to refs/heads/master by this push:
new 4019608 AVRO-2535: Add Ruby support for enum defaults (#628)
4019608 is described below
commit 40196080593557380d8e9ad5bc588a61dc5de93e
Author: Tim Perkins <tj...@users.noreply.github.com>
AuthorDate: Sun May 3 03:30:34 2020 -0400
AVRO-2535: Add Ruby support for enum defaults (#628)
Added Ruby support for enum type defaults introduced in v1.9.0.
Existing non-conforming Ruby behavior is preserved to return
unknown writer's symbols when no enum default is present.
When an enum default is present it is returned instead of an
unknown writer's symbol.
Compatibility checking still strictly enforces the specification.
---
lang/ruby/lib/avro/io.rb | 8 ++---
lang/ruby/lib/avro/schema.rb | 17 +++++++---
lang/ruby/lib/avro/schema_compatibility.rb | 4 +--
lang/ruby/test/test_io.rb | 51 +++++++++++++++++++++++++++++
lang/ruby/test/test_schema.rb | 34 +++++++++++++++++++
lang/ruby/test/test_schema_compatibility.rb | 5 +++
6 files changed, 109 insertions(+), 10 deletions(-)
diff --git a/lang/ruby/lib/avro/io.rb b/lang/ruby/lib/avro/io.rb
index 406fac4..48118f6 100644
--- a/lang/ruby/lib/avro/io.rb
+++ b/lang/ruby/lib/avro/io.rb
@@ -300,12 +300,12 @@ module Avro
index_of_symbol = decoder.read_int
read_symbol = writers_schema.symbols[index_of_symbol]
- # TODO(jmhodges): figure out what unset means for resolution
- # schema resolution
- unless readers_schema.symbols.include?(read_symbol)
- # 'unset' here
+ if !readers_schema.symbols.include?(read_symbol) && readers_schema.default
+ read_symbol = readers_schema.default
end
+ # This implementation deviates from the spec by always returning
+ # a symbol.
read_symbol
end
diff --git a/lang/ruby/lib/avro/schema.rb b/lang/ruby/lib/avro/schema.rb
index 24b6329..c31fdfb 100644
--- a/lang/ruby/lib/avro/schema.rb
+++ b/lang/ruby/lib/avro/schema.rb
@@ -76,7 +76,8 @@ module Avro
when :enum
symbols = json_obj['symbols']
doc = json_obj['doc']
- return EnumSchema.new(name, namespace, symbols, names, doc)
+ default = json_obj['default']
+ return EnumSchema.new(name, namespace, symbols, names, doc, default)
when :record, :error
fields = json_obj['fields']
doc = json_obj['doc']
@@ -367,20 +368,28 @@ module Avro
end
class EnumSchema < NamedSchema
- attr_reader :symbols, :doc
+ attr_reader :symbols, :doc, :default
- def initialize(name, space, symbols, names=nil, doc=nil)
+ def initialize(name, space, symbols, names=nil, doc=nil, default=nil)
if symbols.uniq.length < symbols.length
fail_msg = "Duplicate symbol: #{symbols}"
raise Avro::SchemaParseError, fail_msg
end
+ if default && !symbols.include?(default)
+ raise Avro::SchemaParseError, "Default '#{default}' is not a valid symbol for enum #{name}"
+ end
super(:enum, name, space, names, doc)
+ @default = default
@symbols = symbols
end
def to_avro(_names=Set.new)
avro = super
- avro.is_a?(Hash) ? avro.merge('symbols' => symbols) : avro
+ if avro.is_a?(Hash)
+ avro['symbols'] = symbols
+ avro['default'] = default if default
+ end
+ avro
end
end
diff --git a/lang/ruby/lib/avro/schema_compatibility.rb b/lang/ruby/lib/avro/schema_compatibility.rb
index c88d8b4..a4b763d 100644
--- a/lang/ruby/lib/avro/schema_compatibility.rb
+++ b/lang/ruby/lib/avro/schema_compatibility.rb
@@ -118,8 +118,8 @@ module Avro
when :union
match_union_schemas(writers_schema, readers_schema)
when :enum
- # reader's symbols must contain all writer's symbols
- (writers_schema.symbols - readers_schema.symbols).empty?
+ # reader's symbols must contain all writer's symbols or reader has default
+ (writers_schema.symbols - readers_schema.symbols).empty? || !readers_schema.default.nil?
else
if writers_schema.type_sym == :union && writers_schema.schemas.size == 1
full_match_schemas(writers_schema.schemas.first, readers_schema)
diff --git a/lang/ruby/test/test_io.rb b/lang/ruby/test/test_io.rb
index ca724b7..e364421 100644
--- a/lang/ruby/test/test_io.rb
+++ b/lang/ruby/test/test_io.rb
@@ -115,6 +115,13 @@ EOS
check_default(enum_schema, '"B"', "B")
end
+ def test_enum_with_default
+ enum_schema = '{"type": "enum", "name": "Test", "symbols": ["A", "B"], "default": "A"}'
+ check(enum_schema)
+ # Field default is used for missing field.
+ check_default(enum_schema, '"B"', "B")
+ end
+
def test_recursive
recursive_schema = <<EOS
{"type": "record",
@@ -404,6 +411,50 @@ EOS
assert_equal(incorrect, 0)
end
+ def test_unknown_enum_symbol
+ writers_schema = Avro::Schema.parse(<<-SCHEMA)
+ {
+ "type": "enum",
+ "name": "test",
+ "symbols": ["B", "C"]
+ }
+ SCHEMA
+ readers_schema = Avro::Schema.parse(<<-SCHEMA)
+ {
+ "type": "enum",
+ "name": "test",
+ "symbols": ["A", "B"]
+ }
+ SCHEMA
+ datum_to_write = "C"
+ writer, * = write_datum(datum_to_write, writers_schema)
+ datum_read = read_datum(writer, writers_schema, readers_schema)
+ # Ruby implementation did not follow the spec and returns the writer's symbol here
+ assert_equal(datum_read, datum_to_write)
+ end
+
+ def test_unknown_enum_symbol_with_enum_default
+ writers_schema = Avro::Schema.parse(<<-SCHEMA)
+ {
+ "type": "enum",
+ "name": "test",
+ "symbols": ["B", "C"]
+ }
+ SCHEMA
+ readers_schema = Avro::Schema.parse(<<-SCHEMA)
+ {
+ "type": "enum",
+ "name": "test",
+ "symbols": ["A", "B", "UNKNOWN"],
+ "default": "UNKNOWN"
+ }
+ SCHEMA
+ datum_to_write = "C"
+ writer, * = write_datum(datum_to_write, writers_schema)
+ datum_read = read_datum(writer, writers_schema, readers_schema)
+ assert_equal(datum_read, "UNKNOWN")
+ end
+
def test_array_schema_promotion
writers_schema = Avro::Schema.parse('{"type":"array", "items":"int"}')
readers_schema = Avro::Schema.parse('{"type":"array", "items":"long"}')
diff --git a/lang/ruby/test/test_schema.rb b/lang/ruby/test/test_schema.rb
index e16765e..faa3fc2 100644
--- a/lang/ruby/test/test_schema.rb
+++ b/lang/ruby/test/test_schema.rb
@@ -314,6 +314,40 @@ class TestSchema < Test::Unit::TestCase
assert_equal enum_schema_hash, enum_schema_json.to_avro
end
+ def test_enum_default_attribute
+ enum_schema = Avro::Schema.parse <<-SCHEMA
+ {
+ "type": "enum",
+ "name": "fruit",
+ "default": "apples",
+ "symbols": ["apples", "oranges"]
+ }
+ SCHEMA
+
+ enum_schema_hash = {
+ 'type' => 'enum',
+ 'name' => 'fruit',
+ 'default' => 'apples',
+ 'symbols' => %w(apples oranges)
+ }
+
+ assert_equal(enum_schema.default, "apples")
+ assert_equal(enum_schema_hash, enum_schema.to_avro)
+ end
+
+ def test_validate_enum_default
+ exception = assert_raise(Avro::SchemaParseError) do
+ hash_to_schema(
+ type: 'enum',
+ name: 'fruit',
+ default: 'bananas',
+ symbols: %w(apples oranges)
+ )
+ end
+ assert_equal("Default 'bananas' is not a valid symbol for enum fruit",
+ exception.to_s)
+ end
+
def test_empty_record
schema = Avro::Schema.parse('{"type":"record", "name":"Empty"}')
assert_empty(schema.fields)
diff --git a/lang/ruby/test/test_schema_compatibility.rb b/lang/ruby/test/test_schema_compatibility.rb
index 1e822a0..e5134e5 100644
--- a/lang/ruby/test/test_schema_compatibility.rb
+++ b/lang/ruby/test/test_schema_compatibility.rb
@@ -40,6 +40,7 @@ class TestSchemaCompatibility < Test::Unit::TestCase
enum1_ab_schema, enum1_ab_schema,
enum1_abc_schema, enum1_ab_schema,
+ enum1_ab_default_schema, enum1_abc_schema,
string_schema, bytes_schema,
bytes_schema, string_schema,
@@ -373,6 +374,10 @@ class TestSchemaCompatibility < Test::Unit::TestCase
Avro::Schema.parse('{"type":"enum", "name":"Enum1", "symbols":["A","B"]}')
end
+ def enum1_ab_default_schema
+ Avro::Schema.parse('{"type":"enum", "name":"Enum1", "symbols":["A","B"], "default":"A"}')
+ end
+
def enum1_abc_schema
Avro::Schema.parse('{"type":"enum", "name":"Enum1", "symbols":["A","B","C"]}')
end