You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@avro.apache.org by fo...@apache.org on 2020/05/03 07:30:42 UTC

[avro] branch master updated: AVRO-2535: Add Ruby support for enum defaults (#628)

This is an automated email from the ASF dual-hosted git repository.

fokko pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/avro.git


The following commit(s) were added to refs/heads/master by this push:
     new 4019608  AVRO-2535: Add Ruby support for enum defaults (#628)
4019608 is described below

commit 40196080593557380d8e9ad5bc588a61dc5de93e
Author: Tim Perkins <tj...@users.noreply.github.com>
AuthorDate: Sun May 3 03:30:34 2020 -0400

    AVRO-2535: Add Ruby support for enum defaults (#628)
    
    Added Ruby support for enum type defaults introduced in v1.9.0.
    
    Existing non-conforming Ruby behavior is preserved to return
    unknown writer's symbols when no enum default is present.
    
    When an enum default is present it is returned instead of an
    unknown writer's symbol.
    
    Compatibility checking still strictly enforces the specification.
---
 lang/ruby/lib/avro/io.rb                    |  8 ++---
 lang/ruby/lib/avro/schema.rb                | 17 +++++++---
 lang/ruby/lib/avro/schema_compatibility.rb  |  4 +--
 lang/ruby/test/test_io.rb                   | 51 +++++++++++++++++++++++++++++
 lang/ruby/test/test_schema.rb               | 34 +++++++++++++++++++
 lang/ruby/test/test_schema_compatibility.rb |  5 +++
 6 files changed, 109 insertions(+), 10 deletions(-)

diff --git a/lang/ruby/lib/avro/io.rb b/lang/ruby/lib/avro/io.rb
index 406fac4..48118f6 100644
--- a/lang/ruby/lib/avro/io.rb
+++ b/lang/ruby/lib/avro/io.rb
@@ -300,12 +300,12 @@ module Avro
         index_of_symbol = decoder.read_int
         read_symbol = writers_schema.symbols[index_of_symbol]
 
-        # TODO(jmhodges): figure out what unset means for resolution
-        # schema resolution
-        unless readers_schema.symbols.include?(read_symbol)
-          # 'unset' here
+        if !readers_schema.symbols.include?(read_symbol) && readers_schema.default
+          read_symbol = readers_schema.default
         end
 
+        # This implementation deviates from the spec by always returning
+        # a symbol.
         read_symbol
       end
 
diff --git a/lang/ruby/lib/avro/schema.rb b/lang/ruby/lib/avro/schema.rb
index 24b6329..c31fdfb 100644
--- a/lang/ruby/lib/avro/schema.rb
+++ b/lang/ruby/lib/avro/schema.rb
@@ -76,7 +76,8 @@ module Avro
           when :enum
             symbols = json_obj['symbols']
             doc     = json_obj['doc']
-            return EnumSchema.new(name, namespace, symbols, names, doc)
+            default = json_obj['default']
+            return EnumSchema.new(name, namespace, symbols, names, doc, default)
           when :record, :error
             fields = json_obj['fields']
             doc    = json_obj['doc']
@@ -367,20 +368,28 @@ module Avro
     end
 
     class EnumSchema < NamedSchema
-      attr_reader :symbols, :doc
+      attr_reader :symbols, :doc, :default
 
-      def initialize(name, space, symbols, names=nil, doc=nil)
+      def initialize(name, space, symbols, names=nil, doc=nil, default=nil)
         if symbols.uniq.length < symbols.length
           fail_msg = "Duplicate symbol: #{symbols}"
           raise Avro::SchemaParseError, fail_msg
         end
+        if default && !symbols.include?(default)
+          raise Avro::SchemaParseError, "Default '#{default}' is not a valid symbol for enum #{name}"
+        end
         super(:enum, name, space, names, doc)
+        @default = default
         @symbols = symbols
       end
 
       def to_avro(_names=Set.new)
         avro = super
-        avro.is_a?(Hash) ? avro.merge('symbols' => symbols) : avro
+        if avro.is_a?(Hash)
+          avro['symbols'] = symbols
+          avro['default'] = default if default
+        end
+        avro
       end
     end
 
diff --git a/lang/ruby/lib/avro/schema_compatibility.rb b/lang/ruby/lib/avro/schema_compatibility.rb
index c88d8b4..a4b763d 100644
--- a/lang/ruby/lib/avro/schema_compatibility.rb
+++ b/lang/ruby/lib/avro/schema_compatibility.rb
@@ -118,8 +118,8 @@ module Avro
         when :union
           match_union_schemas(writers_schema, readers_schema)
         when :enum
-          # reader's symbols must contain all writer's symbols
-          (writers_schema.symbols - readers_schema.symbols).empty?
+          # reader's symbols must contain all writer's symbols or reader has default
+          (writers_schema.symbols - readers_schema.symbols).empty? || !readers_schema.default.nil?
         else
           if writers_schema.type_sym == :union && writers_schema.schemas.size == 1
             full_match_schemas(writers_schema.schemas.first, readers_schema)
diff --git a/lang/ruby/test/test_io.rb b/lang/ruby/test/test_io.rb
index ca724b7..e364421 100644
--- a/lang/ruby/test/test_io.rb
+++ b/lang/ruby/test/test_io.rb
@@ -115,6 +115,13 @@ EOS
     check_default(enum_schema, '"B"', "B")
   end
 
+  def test_enum_with_default
+    enum_schema = '{"type": "enum", "name": "Test", "symbols": ["A", "B"], "default": "A"}'
+    check(enum_schema)
+    # Field default is used for missing field.
+    check_default(enum_schema, '"B"', "B")
+  end
+
   def test_recursive
     recursive_schema = <<EOS
       {"type": "record",
@@ -404,6 +411,50 @@ EOS
     assert_equal(incorrect, 0)
   end
 
+  def test_unknown_enum_symbol
+    writers_schema = Avro::Schema.parse(<<-SCHEMA)
+      {
+        "type": "enum",
+        "name": "test",
+        "symbols": ["B", "C"]
+      }
+    SCHEMA
+    readers_schema = Avro::Schema.parse(<<-SCHEMA)
+      {
+        "type": "enum",
+        "name": "test",
+        "symbols": ["A", "B"]
+      }
+    SCHEMA
+    datum_to_write = "C"
+    writer, * = write_datum(datum_to_write, writers_schema)
+    datum_read = read_datum(writer, writers_schema, readers_schema)
+    # Ruby implementation did not follow the spec and returns the writer's symbol here
+    assert_equal(datum_read, datum_to_write)
+  end
+
+  def test_unknown_enum_symbol_with_enum_default
+    writers_schema = Avro::Schema.parse(<<-SCHEMA)
+      {
+        "type": "enum",
+        "name": "test",
+        "symbols": ["B", "C"]
+      }
+    SCHEMA
+    readers_schema = Avro::Schema.parse(<<-SCHEMA)
+      {
+        "type": "enum",
+        "name": "test",
+        "symbols": ["A", "B", "UNKNOWN"],
+        "default": "UNKNOWN"
+      }
+    SCHEMA
+    datum_to_write = "C"
+    writer, * = write_datum(datum_to_write, writers_schema)
+    datum_read = read_datum(writer, writers_schema, readers_schema)
+    assert_equal(datum_read, "UNKNOWN")
+  end
+
   def test_array_schema_promotion
     writers_schema = Avro::Schema.parse('{"type":"array", "items":"int"}')
     readers_schema = Avro::Schema.parse('{"type":"array", "items":"long"}')
diff --git a/lang/ruby/test/test_schema.rb b/lang/ruby/test/test_schema.rb
index e16765e..faa3fc2 100644
--- a/lang/ruby/test/test_schema.rb
+++ b/lang/ruby/test/test_schema.rb
@@ -314,6 +314,40 @@ class TestSchema < Test::Unit::TestCase
     assert_equal enum_schema_hash, enum_schema_json.to_avro
   end
 
+  def test_enum_default_attribute
+    enum_schema = Avro::Schema.parse <<-SCHEMA
+      {
+        "type": "enum",
+        "name": "fruit",
+        "default": "apples",
+        "symbols": ["apples", "oranges"]
+      }
+    SCHEMA
+
+    enum_schema_hash = {
+      'type' => 'enum',
+      'name' => 'fruit',
+      'default' => 'apples',
+      'symbols' => %w(apples oranges)
+    }
+
+    assert_equal(enum_schema.default, "apples")
+    assert_equal(enum_schema_hash, enum_schema.to_avro)
+  end
+
+  def test_validate_enum_default
+    exception = assert_raise(Avro::SchemaParseError) do
+      hash_to_schema(
+        type: 'enum',
+        name: 'fruit',
+        default: 'bananas',
+        symbols: %w(apples oranges)
+      )
+    end
+    assert_equal("Default 'bananas' is not a valid symbol for enum fruit",
+                 exception.to_s)
+  end
+
   def test_empty_record
     schema = Avro::Schema.parse('{"type":"record", "name":"Empty"}')
     assert_empty(schema.fields)
diff --git a/lang/ruby/test/test_schema_compatibility.rb b/lang/ruby/test/test_schema_compatibility.rb
index 1e822a0..e5134e5 100644
--- a/lang/ruby/test/test_schema_compatibility.rb
+++ b/lang/ruby/test/test_schema_compatibility.rb
@@ -40,6 +40,7 @@ class TestSchemaCompatibility < Test::Unit::TestCase
 
       enum1_ab_schema, enum1_ab_schema,
       enum1_abc_schema, enum1_ab_schema,
+      enum1_ab_default_schema, enum1_abc_schema,
 
       string_schema, bytes_schema,
       bytes_schema, string_schema,
@@ -373,6 +374,10 @@ class TestSchemaCompatibility < Test::Unit::TestCase
     Avro::Schema.parse('{"type":"enum", "name":"Enum1", "symbols":["A","B"]}')
   end
 
+  def enum1_ab_default_schema
+    Avro::Schema.parse('{"type":"enum", "name":"Enum1", "symbols":["A","B"], "default":"A"}')
+  end
+
   def enum1_abc_schema
     Avro::Schema.parse('{"type":"enum", "name":"Enum1", "symbols":["A","B","C"]}')
   end