You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@avro.apache.org by fo...@apache.org on 2020/05/09 07:13:49 UTC
[avro] branch master updated: AVRO-2800: Validate enum symbol
values (#855)
This is an automated email from the ASF dual-hosted git repository.
fokko pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/avro.git
The following commit(s) were added to refs/heads/master by this push:
new 9c80782 AVRO-2800: Validate enum symbol values (#855)
9c80782 is described below
commit 9c80782e3df8f025726ef6d7ccbba4817482082d
Author: Chris Hoffman <ya...@gmail.com>
AuthorDate: Sat May 9 03:13:38 2020 -0400
AVRO-2800: Validate enum symbol values (#855)
* AVRO-2800 Actually validate enum symbol values
Also added a test for duplicate symbols because there wasn't one.
* AVRO-2800 Allow Ruby enum symbol validation to be disabled
I also renamed Avro::Schema::EnumSchema::SYMBOL_PATTERN to SYMBOL_REGEX to
better fix the existing naming conventions for regex constants.
---
lang/ruby/lib/avro.rb | 7 +++++-
lang/ruby/lib/avro/schema.rb | 13 ++++++++++
lang/ruby/test/test_schema.rb | 55 +++++++++++++++++++++++++++++++++++++++++++
3 files changed, 74 insertions(+), 1 deletion(-)
diff --git a/lang/ruby/lib/avro.rb b/lang/ruby/lib/avro.rb
index c56c760..b9c44ec 100644
--- a/lang/ruby/lib/avro.rb
+++ b/lang/ruby/lib/avro.rb
@@ -34,9 +34,15 @@ module Avro
end
class << self
+ attr_writer :disable_enum_symbol_validation
attr_writer :disable_field_default_validation
attr_writer :disable_schema_name_validation
+ def disable_enum_symbol_validation
+ @disable_enum_symbol_validation ||=
+ ENV.fetch('AVRO_DISABLE_ENUM_SYMBOL_VALIDATION', '') != ''
+ end
+
def disable_field_default_validation
@disable_field_default_validation ||=
ENV.fetch('AVRO_DISABLE_FIELD_DEFAULT_VALIDATION', '') != ''
@@ -46,7 +52,6 @@ module Avro
@disable_schema_name_validation ||=
ENV.fetch('AVRO_DISABLE_SCHEMA_NAME_VALIDATION', '') != ''
end
-
end
end
diff --git a/lang/ruby/lib/avro/schema.rb b/lang/ruby/lib/avro/schema.rb
index c31fdfb..454c723 100644
--- a/lang/ruby/lib/avro/schema.rb
+++ b/lang/ruby/lib/avro/schema.rb
@@ -368,6 +368,8 @@ module Avro
end
class EnumSchema < NamedSchema
+ SYMBOL_REGEX = /^[A-Za-z_][A-Za-z0-9_]*$/
+
attr_reader :symbols, :doc, :default
def initialize(name, space, symbols, names=nil, doc=nil, default=nil)
@@ -375,9 +377,20 @@ module Avro
fail_msg = "Duplicate symbol: #{symbols}"
raise Avro::SchemaParseError, fail_msg
end
+
+ if !Avro.disable_enum_symbol_validation
+ invalid_symbols = symbols.select { |symbol| symbol !~ SYMBOL_REGEX }
+
+ if invalid_symbols.any?
+ raise SchemaParseError,
+ "Invalid symbols for #{name}: #{invalid_symbols.join(', ')} don't match #{SYMBOL_REGEX.inspect}"
+ end
+ end
+
if default && !symbols.include?(default)
raise Avro::SchemaParseError, "Default '#{default}' is not a valid symbol for enum #{name}"
end
+
super(:enum, name, space, names, doc)
@default = default
@symbols = symbols
diff --git a/lang/ruby/test/test_schema.rb b/lang/ruby/test/test_schema.rb
index faa3fc2..b52ed5c 100644
--- a/lang/ruby/test/test_schema.rb
+++ b/lang/ruby/test/test_schema.rb
@@ -575,4 +575,59 @@ class TestSchema < Test::Unit::TestCase
schema_str = 'bytes'
assert_equal schema_str, schema.to_avro
end
+
+ def test_validate_duplicate_symbols
+ exception = assert_raise(Avro::SchemaParseError) do
+ hash_to_schema(
+ type: 'enum',
+ name: 'name',
+ symbols: ['erica', 'erica']
+ )
+ end
+ assert_equal(
+ 'Duplicate symbol: ["erica", "erica"]',
+ exception.to_s
+ )
+ end
+
+ def test_validate_enum_symbols
+ exception = assert_raise(Avro::SchemaParseError) do
+ hash_to_schema(
+ type: 'enum',
+ name: 'things',
+ symbols: ['good_symbol', '_GOOD_SYMBOL_2', '8ad_symbol', 'also-bad-symbol', '>=', '$']
+ )
+ end
+
+ assert_equal(
+ "Invalid symbols for things: 8ad_symbol, also-bad-symbol, >=, $ don't match #{Avro::Schema::EnumSchema::SYMBOL_REGEX.inspect}",
+ exception.to_s
+ )
+ end
+
+ def test_enum_symbol_validation_disabled_via_env
+ Avro.disable_enum_symbol_validation = nil
+ ENV['AVRO_DISABLE_ENUM_SYMBOL_VALIDATION'] = '1'
+
+ hash_to_schema(
+ type: 'enum',
+ name: 'things',
+ symbols: ['good_symbol', '_GOOD_SYMBOL_2', '8ad_symbol', 'also-bad-symbol', '>=', '$'],
+ )
+ ensure
+ ENV.delete('AVRO_DISABLE_ENUM_SYMBOL_VALIDATION')
+ Avro.disable_enum_symbol_validation = nil
+ end
+
+ def test_enum_symbol_validation_disabled_via_class_method
+ Avro.disable_enum_symbol_validation = true
+
+ hash_to_schema(
+ type: 'enum',
+ name: 'things',
+ symbols: ['good_symbol', '_GOOD_SYMBOL_2', '8ad_symbol', 'also-bad-symbol', '>=', '$'],
+ )
+ ensure
+ Avro.disable_enum_symbol_validation = nil
+ end
end