You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ko...@apache.org on 2022/06/22 23:58:32 UTC

[arrow] branch master updated: ARROW-14518: [Ruby] Add support for Arrow::Array.new([BigDecimal]) (#13377)

This is an automated email from the ASF dual-hosted git repository.

kou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 6fd4d3423e ARROW-14518: [Ruby] Add support for Arrow::Array.new([BigDecimal]) (#13377)
6fd4d3423e is described below

commit 6fd4d3423e3f0d597fac547ee74f4ccba2812c8a
Author: Sutou Kouhei <ko...@clear-code.com>
AuthorDate: Thu Jun 23 08:58:26 2022 +0900

    ARROW-14518: [Ruby] Add support for Arrow::Array.new([BigDecimal]) (#13377)
    
    This requires bigdecimal 3.1.0 or later for BigDecimal#scale.
    
    Arrow::Array.new([BigDecimal]) detects the max precision and scale
    from BigDecimals and creates suitable Arrow::Decimal{128,256}DataType
    automatically.
    
    This also truncates given BigDecimal when the specified
    Arrow::Decimal{128,256}DataType doesn't have enough and scale. This
    still doesn't check precision. If an user specifies data that have too
    much precision, the data are used as-is.
    
    Authored-by: Sutou Kouhei <ko...@clear-code.com>
    Signed-off-by: Sutou Kouhei <ko...@clear-code.com>
---
 ruby/red-arrow/lib/arrow/array-builder.rb          | 46 +++++++++++++---
 .../lib/arrow/decimal128-array-builder.rb          | 22 +++++---
 ruby/red-arrow/lib/arrow/decimal128.rb             | 14 +++++
 .../lib/arrow/decimal256-array-builder.rb          | 22 +++++---
 ruby/red-arrow/lib/arrow/decimal256.rb             | 14 +++++
 ruby/red-arrow/lib/arrow/loader.rb                 |  1 +
 .../arrow/string-array-builder.rb}                 | 27 ++++------
 ruby/red-arrow/red-arrow.gemspec                   |  2 +-
 ruby/red-arrow/test/test-array-builder.rb          | 62 ++++++++++++++++++++++
 .../test/test-decimal128-array-builder.rb          | 14 +++++
 ruby/red-arrow/test/test-decimal128-array.rb       |  4 ++
 ruby/red-arrow/test/test-decimal128.rb             | 28 +++++++++-
 .../test/test-decimal256-array-builder.rb          | 14 +++++
 ruby/red-arrow/test/test-decimal256-array.rb       |  4 ++
 ruby/red-arrow/test/test-decimal256.rb             | 28 +++++++++-
 15 files changed, 262 insertions(+), 40 deletions(-)

diff --git a/ruby/red-arrow/lib/arrow/array-builder.rb b/ruby/red-arrow/lib/arrow/array-builder.rb
index 651aed9623..876fd71120 100644
--- a/ruby/red-arrow/lib/arrow/array-builder.rb
+++ b/ruby/red-arrow/lib/arrow/array-builder.rb
@@ -33,6 +33,11 @@ module Arrow
         end
         if builder_info
           builder = builder_info[:builder]
+          if builder.nil? and builder_info[:builder_type]
+            builder = create_builder(builder_info)
+          end
+        end
+        if builder
           builder.build(values)
         else
           Arrow::StringArray.new(values)
@@ -121,15 +126,28 @@ module Arrow
             detected: true,
           }
         when BigDecimal
-          if value.to_arrow.is_a?(Decimal128)
-            {
-              builder: Decimal128ArrayBuilder.new,
-            }
-          else
+          builder_info ||= {}
+          if builder_info[:builder] or value.nan? or value.infinite?
             {
-              builder: Decimal256ArrayBuilder.new,
+              builder: StringArrayBuilder.new,
               detected: true,
             }
+          else
+            precision = [builder_info[:precision] || 0, value.precision].max
+            scale = [builder_info[:scale] || 0, value.scale].max
+            if precision <= Decimal128DataType::MAX_PRECISION
+              {
+                builder_type: :decimal128,
+                precision: precision,
+                scale: scale,
+              }
+            else
+              {
+                builder_type: :decimal256,
+                precision: precision,
+                scale: scale,
+              }
+            end
           end
         when ::Array
           sub_builder_info = nil
@@ -154,6 +172,22 @@ module Arrow
           }
         end
       end
+
+      def create_builder(builder_info)
+        builder_type = builder_info[:builder_type]
+        case builder_type
+        when :decimal128
+          data_type = Decimal128DataType.new(builder_info[:precision],
+                                             builder_info[:scale])
+          Decimal128ArrayBuilder.new(data_type)
+        when :decimal256
+          data_type = Decimal256DataType.new(builder_info[:precision],
+                                             builder_info[:scale])
+          Decimal256ArrayBuilder.new(data_type)
+        else
+          nil
+        end
+      end
     end
 
     def build(values)
diff --git a/ruby/red-arrow/lib/arrow/decimal128-array-builder.rb b/ruby/red-arrow/lib/arrow/decimal128-array-builder.rb
index d380ce070d..ff245e48bc 100644
--- a/ruby/red-arrow/lib/arrow/decimal128-array-builder.rb
+++ b/ruby/red-arrow/lib/arrow/decimal128-array-builder.rb
@@ -42,16 +42,26 @@ module Arrow
     end
 
     private
+    def precision
+      @precision ||= value_data_type.precision
+    end
+
+    def scale
+      @scale ||= value_data_type.scale
+    end
+
     def normalize_value(value)
       case value
-      when String
-        Decimal128.new(value)
-      when Float
-        Decimal128.new(value.to_s)
       when BigDecimal
-        Decimal128.new(value.to_s)
+        if value.nan? or value.infinite?
+          message = "can't use #{value} as an Arrow::Decimal128Array value"
+          raise FloatDomainError, message
+        end
+        integer, decimal = value.to_s("f").split(".", 2)
+        decimal = decimal[0, scale].ljust(scale, "0")
+        Decimal128.new("#{integer}.#{decimal}")
       else
-        value
+        Decimal128.try_convert(value) || value
       end
     end
   end
diff --git a/ruby/red-arrow/lib/arrow/decimal128.rb b/ruby/red-arrow/lib/arrow/decimal128.rb
index bf853ae7f1..6b9e5008c0 100644
--- a/ruby/red-arrow/lib/arrow/decimal128.rb
+++ b/ruby/red-arrow/lib/arrow/decimal128.rb
@@ -17,6 +17,20 @@
 
 module Arrow
   class Decimal128
+    class << self
+      # @api private
+      def try_convert(value)
+        case value
+        when String
+          new(value)
+        when Float
+          new(value.to_s)
+        else
+          nil
+        end
+      end
+    end
+
     alias_method :to_s_raw, :to_s
 
     # @overload to_s
diff --git a/ruby/red-arrow/lib/arrow/decimal256-array-builder.rb b/ruby/red-arrow/lib/arrow/decimal256-array-builder.rb
index fb89ff00b6..0deb5a4fd7 100644
--- a/ruby/red-arrow/lib/arrow/decimal256-array-builder.rb
+++ b/ruby/red-arrow/lib/arrow/decimal256-array-builder.rb
@@ -45,16 +45,26 @@ module Arrow
     end
 
     private
+    def precision
+      @precision ||= value_data_type.precision
+    end
+
+    def scale
+      @scale ||= value_data_type.scale
+    end
+
     def normalize_value(value)
       case value
-      when String
-        Decimal256.new(value)
-      when Float
-        Decimal256.new(value.to_s)
       when BigDecimal
-        Decimal256.new(value.to_s)
+        if value.nan? or value.infinite?
+          message = "can't use #{value} as an Arrow::Decimal256Array value"
+          raise FloatDomainError, message
+        end
+        integer, decimal = value.to_s("f").split(".", 2)
+        decimal = decimal[0, scale].ljust(scale, "0")
+        Decimal256.new("#{integer}.#{decimal}")
       else
-        value
+        Decimal256.try_convert(value) || value
       end
     end
   end
diff --git a/ruby/red-arrow/lib/arrow/decimal256.rb b/ruby/red-arrow/lib/arrow/decimal256.rb
index 1a7097a4d3..d79558a73a 100644
--- a/ruby/red-arrow/lib/arrow/decimal256.rb
+++ b/ruby/red-arrow/lib/arrow/decimal256.rb
@@ -17,6 +17,20 @@
 
 module Arrow
   class Decimal256
+    class << self
+      # @api private
+      def try_convert(value)
+        case value
+        when String
+          new(value)
+        when Float
+          new(value.to_s)
+        else
+          nil
+        end
+      end
+    end
+
     alias_method :to_s_raw, :to_s
 
     # @overload to_s
diff --git a/ruby/red-arrow/lib/arrow/loader.rb b/ruby/red-arrow/lib/arrow/loader.rb
index 4cc6e63465..58b11e567f 100644
--- a/ruby/red-arrow/lib/arrow/loader.rb
+++ b/ruby/red-arrow/lib/arrow/loader.rb
@@ -107,6 +107,7 @@ module Arrow
       require "arrow/source-node-options"
       require "arrow/sparse-union-data-type"
       require "arrow/string-dictionary-array-builder"
+      require "arrow/string-array-builder"
       require "arrow/struct-array"
       require "arrow/struct-array-builder"
       require "arrow/struct-data-type"
diff --git a/ruby/red-arrow/test/test-decimal256-array.rb b/ruby/red-arrow/lib/arrow/string-array-builder.rb
similarity index 62%
copy from ruby/red-arrow/test/test-decimal256-array.rb
copy to ruby/red-arrow/lib/arrow/string-array-builder.rb
index d45015677e..0922d57388 100644
--- a/ruby/red-arrow/test/test-decimal256-array.rb
+++ b/ruby/red-arrow/lib/arrow/string-array-builder.rb
@@ -15,23 +15,16 @@
 # specific language governing permissions and limitations
 # under the License.
 
-class Decimal256ArrayTest < Test::Unit::TestCase
-  sub_test_case(".new") do
-    test("build") do
-      values = [
-        10.1,
-        nil,
-        "10.1",
-        BigDecimal("10.1"),
-      ]
-      array = Arrow::Decimal256Array.new({precision: 3, scale: 1}, values)
-      assert_equal([
-                     BigDecimal("10.1"),
-                     nil,
-                     BigDecimal("10.1"),
-                     BigDecimal("10.1"),
-                   ],
-                   array.to_a)
+module Arrow
+  class StringArrayBuilder
+    private
+    def convert_to_arrow_value(value)
+      case value
+      when GLib::Bytes, String
+        value
+      else
+        value.to_s
+      end
     end
   end
 end
diff --git a/ruby/red-arrow/red-arrow.gemspec b/ruby/red-arrow/red-arrow.gemspec
index 449969ee16..6be1f0b626 100644
--- a/ruby/red-arrow/red-arrow.gemspec
+++ b/ruby/red-arrow/red-arrow.gemspec
@@ -46,7 +46,7 @@ Gem::Specification.new do |spec|
   spec.test_files += Dir.glob("test/**/*")
   spec.extensions = ["ext/arrow/extconf.rb"]
 
-  spec.add_runtime_dependency("bigdecimal", ">= 2.0.3")
+  spec.add_runtime_dependency("bigdecimal", ">= 3.1.0")
   spec.add_runtime_dependency("extpp", ">= 0.0.7")
   spec.add_runtime_dependency("gio2", ">= 3.5.0")
   spec.add_runtime_dependency("native-package-installer")
diff --git a/ruby/red-arrow/test/test-array-builder.rb b/ruby/red-arrow/test/test-array-builder.rb
index 318167d511..fb48aba8a4 100644
--- a/ruby/red-arrow/test/test-array-builder.rb
+++ b/ruby/red-arrow/test/test-array-builder.rb
@@ -68,6 +68,68 @@ class ArrayBuilderTest < Test::Unit::TestCase
                      ])
       end
 
+      test("decimal + string") do
+        raw_array = [BigDecimal("10.1"), "10.1"]
+        array = Arrow::ArrayBuilder.build(raw_array)
+        assert_equal(raw_array.collect(&:to_s), array.to_a)
+      end
+
+      test("NaN") do
+        raw_array = [BigDecimal("10.1"), BigDecimal::NAN]
+        array = Arrow::ArrayBuilder.build(raw_array)
+        assert_equal(raw_array.collect(&:to_s), array.to_a)
+      end
+
+      test("Infinity") do
+        raw_array = [BigDecimal("10.1"), BigDecimal::INFINITY]
+        array = Arrow::ArrayBuilder.build(raw_array)
+        assert_equal(raw_array.collect(&:to_s), array.to_a)
+      end
+
+      test("decimal128") do
+        values = [
+          BigDecimal("10.1"),
+          BigDecimal("1.11"),
+          BigDecimal("1"),
+        ]
+        array = Arrow::Array.new(values)
+        data_type = Arrow::Decimal128DataType.new(3, 2)
+        assert_equal({
+                       data_type: data_type,
+                       values: [
+                         BigDecimal("10.1"),
+                         BigDecimal("1.11"),
+                         BigDecimal("1"),
+                       ],
+                     },
+                     {
+                       data_type: array.value_data_type,
+                       values: array.to_a,
+                     })
+      end
+
+      test("decimal256") do
+        values = [
+          BigDecimal("1" * 40 + ".1"),
+          BigDecimal("1" * 38 + ".11"),
+          BigDecimal("1" * 37),
+        ]
+        array = Arrow::Array.new(values)
+        data_type = Arrow::Decimal256DataType.new(41, 2)
+        assert_equal({
+                       data_type: data_type,
+                       values: [
+                         BigDecimal("1" * 40 + ".1"),
+                         BigDecimal("1" * 38 + ".11"),
+                         BigDecimal("1" * 37),
+                       ],
+                     },
+                     {
+                       data_type: array.value_data_type,
+                       values: array.to_a,
+                     })
+      end
+
       test("list<boolean>s") do
         assert_build(Arrow::ArrayBuilder,
                      [
diff --git a/ruby/red-arrow/test/test-decimal128-array-builder.rb b/ruby/red-arrow/test/test-decimal128-array-builder.rb
index 31d58bd581..6a22492703 100644
--- a/ruby/red-arrow/test/test-decimal128-array-builder.rb
+++ b/ruby/red-arrow/test/test-decimal128-array-builder.rb
@@ -55,6 +55,20 @@ class Decimal128ArrayBuilderTest < Test::Unit::TestCase
       assert_equal(BigDecimal("10.1"),
                    array[0])
     end
+
+    test("BigDecimal::NAN") do
+      message = "can't use NaN as an Arrow::Decimal128Array value"
+      assert_raise(FloatDomainError.new(message)) do
+        @builder.append_value(BigDecimal::NAN)
+      end
+    end
+
+    test("BigDecimal::INFINITY") do
+      message = "can't use Infinity as an Arrow::Decimal128Array value"
+      assert_raise(FloatDomainError.new(message)) do
+        @builder.append_value(BigDecimal::INFINITY)
+      end
+    end
   end
 
   sub_test_case("#append_values") do
diff --git a/ruby/red-arrow/test/test-decimal128-array.rb b/ruby/red-arrow/test/test-decimal128-array.rb
index 9cc64064e4..a50e2cf4a4 100644
--- a/ruby/red-arrow/test/test-decimal128-array.rb
+++ b/ruby/red-arrow/test/test-decimal128-array.rb
@@ -23,6 +23,8 @@ class Decimal128ArrayTest < Test::Unit::TestCase
         nil,
         "10.1",
         BigDecimal("10.1"),
+        BigDecimal("1.11"),
+        BigDecimal("1"),
       ]
       array = Arrow::Decimal128Array.new({precision: 3, scale: 1}, values)
       assert_equal([
@@ -30,6 +32,8 @@ class Decimal128ArrayTest < Test::Unit::TestCase
                      nil,
                      BigDecimal("10.1"),
                      BigDecimal("10.1"),
+                     BigDecimal("1.1"),
+                     BigDecimal("1"),
                    ],
                    array.to_a)
     end
diff --git a/ruby/red-arrow/test/test-decimal128.rb b/ruby/red-arrow/test/test-decimal128.rb
index 9e7f8792cf..7da2c55f2b 100644
--- a/ruby/red-arrow/test/test-decimal128.rb
+++ b/ruby/red-arrow/test/test-decimal128.rb
@@ -28,9 +28,21 @@ class Decimal128Test < Test::Unit::TestCase
         end
       end
 
+      test("String") do
+        assert do
+          @decimal128 == "10.1"
+        end
+      end
+
+      test("Float") do
+        assert do
+          @decimal128 == 10.1
+        end
+      end
+
       test("not Arrow::Decimal128") do
         assert do
-          not (@decimal128 == 10.1)
+          not (@decimal128 == :"10.1")
         end
       end
     end
@@ -42,9 +54,21 @@ class Decimal128Test < Test::Unit::TestCase
         end
       end
 
+      test("String") do
+        assert do
+          not (@decimal128 != "10.1")
+        end
+      end
+
+      test("Float") do
+        assert do
+          not (@decimal128 != 10.1)
+        end
+      end
+
       test("not Arrow::Decimal128") do
         assert do
-          @decimal128 != 10.1
+          @decimal128 != :"10.1"
         end
       end
     end
diff --git a/ruby/red-arrow/test/test-decimal256-array-builder.rb b/ruby/red-arrow/test/test-decimal256-array-builder.rb
index f0769b6623..7ca14a67bc 100644
--- a/ruby/red-arrow/test/test-decimal256-array-builder.rb
+++ b/ruby/red-arrow/test/test-decimal256-array-builder.rb
@@ -55,6 +55,20 @@ class Decimal256ArrayBuilderTest < Test::Unit::TestCase
       assert_equal(BigDecimal("10.1"),
                    array[0])
     end
+
+    test("BigDecimal::NAN") do
+      message = "can't use NaN as an Arrow::Decimal256Array value"
+      assert_raise(FloatDomainError.new(message)) do
+        @builder.append_value(BigDecimal::NAN)
+      end
+    end
+
+    test("BigDecimal::INFINITY") do
+      message = "can't use Infinity as an Arrow::Decimal256Array value"
+      assert_raise(FloatDomainError.new(message)) do
+        @builder.append_value(BigDecimal::INFINITY)
+      end
+    end
   end
 
   sub_test_case("#append_values") do
diff --git a/ruby/red-arrow/test/test-decimal256-array.rb b/ruby/red-arrow/test/test-decimal256-array.rb
index d45015677e..ed542f2d6c 100644
--- a/ruby/red-arrow/test/test-decimal256-array.rb
+++ b/ruby/red-arrow/test/test-decimal256-array.rb
@@ -23,6 +23,8 @@ class Decimal256ArrayTest < Test::Unit::TestCase
         nil,
         "10.1",
         BigDecimal("10.1"),
+        BigDecimal("1.11"),
+        BigDecimal("1"),
       ]
       array = Arrow::Decimal256Array.new({precision: 3, scale: 1}, values)
       assert_equal([
@@ -30,6 +32,8 @@ class Decimal256ArrayTest < Test::Unit::TestCase
                      nil,
                      BigDecimal("10.1"),
                      BigDecimal("10.1"),
+                     BigDecimal("1.1"),
+                     BigDecimal("1"),
                    ],
                    array.to_a)
     end
diff --git a/ruby/red-arrow/test/test-decimal256.rb b/ruby/red-arrow/test/test-decimal256.rb
index 422167f992..8170ae3c51 100644
--- a/ruby/red-arrow/test/test-decimal256.rb
+++ b/ruby/red-arrow/test/test-decimal256.rb
@@ -28,9 +28,21 @@ class Decimal256Test < Test::Unit::TestCase
         end
       end
 
+      test("String") do
+        assert do
+          @decimal256 == "10.1"
+        end
+      end
+
+      test("Float") do
+        assert do
+          @decimal256 == 10.1
+        end
+      end
+
       test("not Arrow::Decimal256") do
         assert do
-          not (@decimal256 == 10.1)
+          not (@decimal256 == :"10.1")
         end
       end
     end
@@ -42,9 +54,21 @@ class Decimal256Test < Test::Unit::TestCase
         end
       end
 
+      test("String") do
+        assert do
+          not (@decimal256 != "10.1")
+        end
+      end
+
+      test("Float") do
+        assert do
+          not (@decimal256 != 10.1)
+        end
+      end
+
       test("not Arrow::Decimal256") do
         assert do
-          @decimal256 != 10.1
+          @decimal256 != :"10.1"
         end
       end
     end