You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@avro.apache.org by fo...@apache.org on 2020/05/09 07:13:49 UTC

[avro] branch master updated: AVRO-2800: Validate enum symbol values (#855)

This is an automated email from the ASF dual-hosted git repository.

fokko pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/avro.git


The following commit(s) were added to refs/heads/master by this push:
     new 9c80782  AVRO-2800: Validate enum symbol values (#855)
9c80782 is described below

commit 9c80782e3df8f025726ef6d7ccbba4817482082d
Author: Chris Hoffman <ya...@gmail.com>
AuthorDate: Sat May 9 03:13:38 2020 -0400

    AVRO-2800: Validate enum symbol values (#855)
    
    * AVRO-2800 Actually validate enum symbol values
    
    Also added a test for duplicate symbols because there wasn't one.
    
    * AVRO-2800 Allow Ruby enum symbol validation to be disabled
    
    I also renamed Avro::Schema::EnumSchema::SYMBOL_PATTERN to SYMBOL_REGEX to
    better fix the existing naming conventions for regex constants.
---
 lang/ruby/lib/avro.rb         |  7 +++++-
 lang/ruby/lib/avro/schema.rb  | 13 ++++++++++
 lang/ruby/test/test_schema.rb | 55 +++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 74 insertions(+), 1 deletion(-)

diff --git a/lang/ruby/lib/avro.rb b/lang/ruby/lib/avro.rb
index c56c760..b9c44ec 100644
--- a/lang/ruby/lib/avro.rb
+++ b/lang/ruby/lib/avro.rb
@@ -34,9 +34,15 @@ module Avro
   end
 
   class << self
+    attr_writer :disable_enum_symbol_validation
     attr_writer :disable_field_default_validation
     attr_writer :disable_schema_name_validation
 
+    def disable_enum_symbol_validation
+      @disable_enum_symbol_validation ||=
+        ENV.fetch('AVRO_DISABLE_ENUM_SYMBOL_VALIDATION', '') != ''
+    end
+
     def disable_field_default_validation
       @disable_field_default_validation ||=
         ENV.fetch('AVRO_DISABLE_FIELD_DEFAULT_VALIDATION', '') != ''
@@ -46,7 +52,6 @@ module Avro
       @disable_schema_name_validation ||=
         ENV.fetch('AVRO_DISABLE_SCHEMA_NAME_VALIDATION', '') != ''
     end
-
   end
 end
 
diff --git a/lang/ruby/lib/avro/schema.rb b/lang/ruby/lib/avro/schema.rb
index c31fdfb..454c723 100644
--- a/lang/ruby/lib/avro/schema.rb
+++ b/lang/ruby/lib/avro/schema.rb
@@ -368,6 +368,8 @@ module Avro
     end
 
     class EnumSchema < NamedSchema
+      SYMBOL_REGEX = /^[A-Za-z_][A-Za-z0-9_]*$/
+
       attr_reader :symbols, :doc, :default
 
       def initialize(name, space, symbols, names=nil, doc=nil, default=nil)
@@ -375,9 +377,20 @@ module Avro
           fail_msg = "Duplicate symbol: #{symbols}"
           raise Avro::SchemaParseError, fail_msg
         end
+
+        if !Avro.disable_enum_symbol_validation
+          invalid_symbols = symbols.select { |symbol| symbol !~ SYMBOL_REGEX }
+
+          if invalid_symbols.any?
+            raise SchemaParseError,
+              "Invalid symbols for #{name}: #{invalid_symbols.join(', ')} don't match #{SYMBOL_REGEX.inspect}"
+          end
+        end
+
         if default && !symbols.include?(default)
           raise Avro::SchemaParseError, "Default '#{default}' is not a valid symbol for enum #{name}"
         end
+
         super(:enum, name, space, names, doc)
         @default = default
         @symbols = symbols
diff --git a/lang/ruby/test/test_schema.rb b/lang/ruby/test/test_schema.rb
index faa3fc2..b52ed5c 100644
--- a/lang/ruby/test/test_schema.rb
+++ b/lang/ruby/test/test_schema.rb
@@ -575,4 +575,59 @@ class TestSchema < Test::Unit::TestCase
     schema_str = 'bytes'
     assert_equal schema_str, schema.to_avro
   end
+
+  def test_validate_duplicate_symbols
+    exception = assert_raise(Avro::SchemaParseError) do
+      hash_to_schema(
+        type: 'enum',
+        name: 'name',
+        symbols: ['erica', 'erica']
+      )
+    end
+    assert_equal(
+      'Duplicate symbol: ["erica", "erica"]',
+      exception.to_s
+    )
+  end
+
+  def test_validate_enum_symbols
+    exception = assert_raise(Avro::SchemaParseError) do
+      hash_to_schema(
+        type: 'enum',
+        name: 'things',
+        symbols: ['good_symbol', '_GOOD_SYMBOL_2', '8ad_symbol', 'also-bad-symbol', '>=', '$']
+      )
+    end
+
+    assert_equal(
+      "Invalid symbols for things: 8ad_symbol, also-bad-symbol, >=, $ don't match #{Avro::Schema::EnumSchema::SYMBOL_REGEX.inspect}",
+      exception.to_s
+    )
+  end
+
+  def test_enum_symbol_validation_disabled_via_env
+    Avro.disable_enum_symbol_validation = nil
+    ENV['AVRO_DISABLE_ENUM_SYMBOL_VALIDATION'] = '1'
+
+    hash_to_schema(
+      type: 'enum',
+      name: 'things',
+      symbols: ['good_symbol', '_GOOD_SYMBOL_2', '8ad_symbol', 'also-bad-symbol', '>=', '$'],
+    )
+  ensure
+    ENV.delete('AVRO_DISABLE_ENUM_SYMBOL_VALIDATION')
+    Avro.disable_enum_symbol_validation = nil
+  end
+
+  def test_enum_symbol_validation_disabled_via_class_method
+    Avro.disable_enum_symbol_validation = true
+
+    hash_to_schema(
+      type: 'enum',
+      name: 'things',
+      symbols: ['good_symbol', '_GOOD_SYMBOL_2', '8ad_symbol', 'also-bad-symbol', '>=', '$'],
+    )
+  ensure
+    Avro.disable_enum_symbol_validation = nil
+  end
 end