You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@avro.apache.org by GitBox <gi...@apache.org> on 2021/05/15 14:23:11 UTC

[GitHub] [avro] tjwp commented on a change in pull request #1102: AVRO-3054: Fix / support decimal logical type in Ruby

tjwp commented on a change in pull request #1102:
URL: https://github.com/apache/avro/pull/1102#discussion_r632953810



##########
File path: lang/ruby/test/test_logical_types.rb
##########
@@ -99,6 +100,140 @@ def test_parse_fixed_duration
     assert_equal 'duration', schema.logical_type
   end
 
+  def test_bytes_decimal
+    schema = Avro::Schema.parse <<-SCHEMA
+      { "type": "bytes", "logicalType": "decimal", "precision": 9, "scale": 6 }
+    SCHEMA
+
+    assert_equal 'decimal', schema.logical_type
+    assert_equal 9, schema.precision
+    assert_equal 6, schema.scale
+
+    assert_encode_and_decode BigDecimal('-3.4562'), schema
+    assert_encode_and_decode BigDecimal('3.4562'), schema
+    assert_encode_and_decode 15.123, schema
+    assert_encode_and_decode 15, schema
+    assert_encode_and_decode BigDecimal('0.123456'), schema
+    assert_encode_and_decode BigDecimal('0'), schema
+    assert_encode_and_decode BigDecimal('1'), schema
+    assert_encode_and_decode BigDecimal('-1'), schema
+
+    assert_raise ArgumentError do
+      type = Avro::LogicalTypes::BytesDecimal.new(schema)
+      type.encode('1.23')
+    end
+  end
+
+  def test_bytes_decimal_range_errors
+    schema = Avro::Schema.parse <<-SCHEMA
+      { "type": "bytes", "logicalType": "decimal", "precision": 4, "scale": 2 }
+    SCHEMA
+
+    type = Avro::LogicalTypes::BytesDecimal.new(schema)
+
+    assert_raises RangeError do
+      type.encode(BigDecimal('345'))
+    end
+
+    assert_raises RangeError do
+      type.encode(BigDecimal('1.5342'))
+    end
+
+    assert_raises RangeError do
+      type.encode(BigDecimal('-1.5342'))
+    end
+
+    assert_raises RangeError do
+      type.encode(BigDecimal('-100.2'))
+    end
+
+    assert_raises RangeError do
+      type.encode(BigDecimal('-99.991'))
+    end
+  end
+
+  def test_bytes_decimal_conversion
+    schema = Avro::Schema.parse <<-SCHEMA
+      { "type": "bytes", "logicalType": "decimal", "precision": 12, "scale": 6 }
+    SCHEMA
+
+    type = Avro::LogicalTypes::BytesDecimal.new(schema)
+
+    enc = "\xcb\x43\x38".dup.force_encoding('BINARY')
+    assert_equal enc, type.encode(BigDecimal('-3.4562'))
+    assert_equal BigDecimal('-3.4562'), type.decode(enc)
+
+    assert_equal "\x34\xbc\xc8".dup.force_encoding('BINARY'), type.encode(BigDecimal('3.4562'))
+    assert_equal BigDecimal('3.4562'), type.decode("\x34\xbc\xc8".dup.force_encoding('BINARY'))
+
+    assert_equal "\x6a\x33\x0e\x87\x00".dup.force_encoding('BINARY'), type.encode(BigDecimal('456123.123456'))
+    assert_equal BigDecimal('456123.123456'), type.decode("\x6a\x33\x0e\x87\x00".dup.force_encoding('BINARY'))
+  end
+
+  def test_logical_type_with_schema
+    exception = assert_raises(ArgumentError) do
+      Avro::LogicalTypes::LogicalTypeWithSchema.new(nil)
+    end
+    assert_equal exception.to_s, 'schema is required'
+
+    schema = Avro::Schema.parse <<-SCHEMA
+      { "type": "bytes", "logicalType": "decimal", "precision": 12, "scale": 6 }
+    SCHEMA
+
+    assert_nothing_raised do
+      Avro::LogicalTypes::LogicalTypeWithSchema.new(schema)
+    end
+
+    assert_raises NotImplementedError do
+      Avro::LogicalTypes::LogicalTypeWithSchema.new(schema).encode(BigDecimal('2'))
+    end
+
+    assert_raises NotImplementedError do
+      Avro::LogicalTypes::LogicalTypeWithSchema.new(schema).decode('foo')
+    end
+  end
+
+  def test_bytes_decimal_object_allocations_encode
+    schema = Avro::Schema.parse <<-SCHEMA
+      { "type": "bytes", "logicalType": "decimal", "precision": 4, "scale": 2 }
+    SCHEMA
+
+    type = Avro::LogicalTypes::BytesDecimal.new(schema)
+
+    positive_value = BigDecimal('5.2')
+    negative_value = BigDecimal('-5.2')
+
+    [positive_value, negative_value].each do |value|
+      report = MemoryProfiler.report do
+        type.encode(value)
+      end
+
+      assert_equal 5, report.total_allocated
+      # Ruby 2.3, 2.4 and 2.7 do not retain anything. Ruby 2.5 and some 2.6 retain 1
+      assert_operator 1, :>=, report.total_retained
+    end
+  end
+
+  def test_bytes_decimal_object_allocations_decode
+    schema = Avro::Schema.parse <<-SCHEMA
+      { "type": "bytes", "logicalType": "decimal", "precision": 4, "scale": 2 }
+    SCHEMA
+
+    type = Avro::LogicalTypes::BytesDecimal.new(schema)
+
+    positive_enc = "\x02\b".dup.force_encoding('BINARY')
+    negative_enc = "\xFD\xF8".dup.force_encoding('BINARY')
+
+    [positive_enc, negative_enc].each do |encoded|
+      report = MemoryProfiler.report do
+        type.decode(encoded)
+      end
+
+      assert_equal 5, report.total_allocated
+      assert_equal 0, report.total_retained

Review comment:
       This line is failing with Ruby 2.6 where it is getting 1. This should probably be changed to match https://github.com/apache/avro/pull/1102/files#diff-5c5f6ae9be9e29a9812a5b49746e79d50c326fb32fc984064dc15c52a7b41385R212-R213 including a similar comment. 

##########
File path: lang/ruby/lib/avro/logical_types.rb
##########
@@ -17,9 +17,193 @@
 # limitations under the License.
 
 require 'date'
+require 'bigdecimal'
+require 'bigdecimal/util'
 
 module Avro
   module LogicalTypes
+    ##
+    # Base class for logical types requiring a schema to be present
+    class LogicalTypeWithSchema
+      ##
+      # @return [Avro::Schema] The schema this logical type is dealing with
+      attr_reader :schema
+
+      ##
+      # Build a new instance of a logical type using the provided schema
+      #
+      # @param schema [Avro::Schema]
+      #     The schema to use with this instance
+      #
+      # @raise [ArgumentError]
+      #     If the provided schema is nil
+      def initialize(schema)
+        raise ArgumentError, 'schema is required' if schema.nil?
+
+        @schema = schema
+      end
+
+      ##
+      # Encode the provided datum
+      #
+      # @param datum [Object] The datum to encode
+      #
+      # @raise [NotImplementedError]
+      #     Subclass will need to override this method
+      def encode(datum)
+        raise NotImplementedError
+      end
+
+      ##
+      # Decode the provided datum
+      #
+      # @param datum [Object] The datum to decode
+      #
+      # @raise [NotImplementedError]
+      #     Subclass will need to override this method
+      def decode(datum)
+        raise NotImplementedError
+      end
+    end
+
+    ##
+    # Logical type to handle arbitrary-precision decimals using byte array.
+    #
+    # The byte array contains the two's-complement representation of the unscaled integer
+    # value in big-endian byte order.
+    class BytesDecimal < LogicalTypeWithSchema
+      # Messages for exceptions
+      ERROR_INSUFFICIENT_PRECISION = 'Precision is too small'
+      ERROR_ROUNDING_NECESSARY     = 'Rounding necessary'
+      ERROR_VALUE_MUST_BE_NUMERIC  = 'value must be numeric'
+
+      # The pattern used to pack up the byte array (8 bit unsigned integer/char)
+      PACK_UNSIGNED_CHARS = 'C*'
+
+      # The number 10 as BigDecimal
+      TEN = BigDecimal(10).freeze
+
+      ##
+      # @return [Integer] The number of total digits supported by the decimal
+      attr_reader :precision
+
+      ##
+      # @return [Integer] The number of fractional digits
+      attr_reader :scale
+
+      ##
+      # Build a new decimal logical type
+      #
+      # @param schema [Avro::Schema]
+      #     The schema defining precision and scale for the conversion
+      #
+      # @raise [ArgumentError]

Review comment:
       Minor, but this initialize method no longer raises since the validation was moved to the `BytesSchema` class.
   
   Now that parsing and matching has been added for fixed decimal types the same validation should be performed there, and it would be nice to share the implementation. I think that can happen separately.

##########
File path: lang/ruby/test/test_logical_types.rb
##########
@@ -99,6 +100,140 @@ def test_parse_fixed_duration
     assert_equal 'duration', schema.logical_type
   end
 
+  def test_bytes_decimal
+    schema = Avro::Schema.parse <<-SCHEMA
+      { "type": "bytes", "logicalType": "decimal", "precision": 9, "scale": 6 }
+    SCHEMA
+
+    assert_equal 'decimal', schema.logical_type
+    assert_equal 9, schema.precision
+    assert_equal 6, schema.scale
+
+    assert_encode_and_decode BigDecimal('-3.4562'), schema
+    assert_encode_and_decode BigDecimal('3.4562'), schema
+    assert_encode_and_decode 15.123, schema
+    assert_encode_and_decode 15, schema
+    assert_encode_and_decode BigDecimal('0.123456'), schema
+    assert_encode_and_decode BigDecimal('0'), schema
+    assert_encode_and_decode BigDecimal('1'), schema
+    assert_encode_and_decode BigDecimal('-1'), schema
+
+    assert_raise ArgumentError do
+      type = Avro::LogicalTypes::BytesDecimal.new(schema)
+      type.encode('1.23')
+    end
+  end
+
+  def test_bytes_decimal_range_errors
+    schema = Avro::Schema.parse <<-SCHEMA
+      { "type": "bytes", "logicalType": "decimal", "precision": 4, "scale": 2 }
+    SCHEMA
+
+    type = Avro::LogicalTypes::BytesDecimal.new(schema)
+
+    assert_raises RangeError do
+      type.encode(BigDecimal('345'))
+    end
+
+    assert_raises RangeError do
+      type.encode(BigDecimal('1.5342'))
+    end
+
+    assert_raises RangeError do
+      type.encode(BigDecimal('-1.5342'))
+    end
+
+    assert_raises RangeError do
+      type.encode(BigDecimal('-100.2'))
+    end
+
+    assert_raises RangeError do
+      type.encode(BigDecimal('-99.991'))
+    end
+  end
+
+  def test_bytes_decimal_conversion
+    schema = Avro::Schema.parse <<-SCHEMA
+      { "type": "bytes", "logicalType": "decimal", "precision": 12, "scale": 6 }
+    SCHEMA
+
+    type = Avro::LogicalTypes::BytesDecimal.new(schema)
+
+    enc = "\xcb\x43\x38".dup.force_encoding('BINARY')
+    assert_equal enc, type.encode(BigDecimal('-3.4562'))
+    assert_equal BigDecimal('-3.4562'), type.decode(enc)
+
+    assert_equal "\x34\xbc\xc8".dup.force_encoding('BINARY'), type.encode(BigDecimal('3.4562'))
+    assert_equal BigDecimal('3.4562'), type.decode("\x34\xbc\xc8".dup.force_encoding('BINARY'))
+
+    assert_equal "\x6a\x33\x0e\x87\x00".dup.force_encoding('BINARY'), type.encode(BigDecimal('456123.123456'))
+    assert_equal BigDecimal('456123.123456'), type.decode("\x6a\x33\x0e\x87\x00".dup.force_encoding('BINARY'))
+  end
+
+  def test_logical_type_with_schema
+    exception = assert_raises(ArgumentError) do
+      Avro::LogicalTypes::LogicalTypeWithSchema.new(nil)
+    end
+    assert_equal exception.to_s, 'schema is required'
+
+    schema = Avro::Schema.parse <<-SCHEMA
+      { "type": "bytes", "logicalType": "decimal", "precision": 12, "scale": 6 }
+    SCHEMA
+
+    assert_nothing_raised do
+      Avro::LogicalTypes::LogicalTypeWithSchema.new(schema)
+    end
+
+    assert_raises NotImplementedError do
+      Avro::LogicalTypes::LogicalTypeWithSchema.new(schema).encode(BigDecimal('2'))
+    end
+
+    assert_raises NotImplementedError do
+      Avro::LogicalTypes::LogicalTypeWithSchema.new(schema).decode('foo')
+    end
+  end
+
+  def test_bytes_decimal_object_allocations_encode
+    schema = Avro::Schema.parse <<-SCHEMA
+      { "type": "bytes", "logicalType": "decimal", "precision": 4, "scale": 2 }
+    SCHEMA
+
+    type = Avro::LogicalTypes::BytesDecimal.new(schema)
+
+    positive_value = BigDecimal('5.2')
+    negative_value = BigDecimal('-5.2')
+
+    [positive_value, negative_value].each do |value|
+      report = MemoryProfiler.report do
+        type.encode(value)
+      end
+
+      assert_equal 5, report.total_allocated
+      # Ruby 2.3, 2.4 and 2.7 do not retain anything. Ruby 2.5 and some 2.6 retain 1

Review comment:
       Only Ruby 2.6 and later are supported now, so references to earlier versions could be dropped here. (optional)




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org