You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ko...@apache.org on 2022/06/22 20:58:05 UTC

[arrow] branch master updated: ARROW-16874: [Ruby] Use more .try_convert for auto data type conversion (#13417)

This is an automated email from the ASF dual-hosted git repository.

kou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new b8c3ad7c86 ARROW-16874: [Ruby] Use more .try_convert for auto data type conversion (#13417)
b8c3ad7c86 is described below

commit b8c3ad7c8643d15986dee51a58aec8b557fa334e
Author: Sutou Kouhei <ko...@clear-code.com>
AuthorDate: Thu Jun 23 05:57:57 2022 +0900

    ARROW-16874: [Ruby] Use more .try_convert for auto data type conversion (#13417)
    
    Authored-by: Sutou Kouhei <ko...@clear-code.com>
    Signed-off-by: Sutou Kouhei <ko...@clear-code.com>
---
 ruby/red-arrow/lib/arrow/data-type.rb              |  3 +-
 ruby/red-arrow/lib/arrow/field.rb                  | 16 ++++++++
 ruby/red-arrow/lib/arrow/list-data-type.rb         |  7 +---
 ruby/red-arrow/lib/arrow/loader.rb                 |  1 +
 .../arrow/time-unit.rb}                            | 29 +++++--------
 ruby/red-arrow/lib/arrow/time32-array-builder.rb   | 16 +-------
 ruby/red-arrow/lib/arrow/time32-data-type.rb       | 47 +++++-----------------
 ruby/red-arrow/lib/arrow/time64-array-builder.rb   | 16 +-------
 ruby/red-arrow/lib/arrow/time64-data-type.rb       | 47 +++++-----------------
 .../red-arrow/lib/arrow/timestamp-array-builder.rb | 16 +-------
 ruby/red-arrow/lib/arrow/timestamp-data-type.rb    | 43 +++++---------------
 ruby/red-arrow/test/test-decimal128-array.rb       |  3 +-
 ruby/red-arrow/test/test-decimal256-array.rb       |  3 +-
 13 files changed, 66 insertions(+), 181 deletions(-)

diff --git a/ruby/red-arrow/lib/arrow/data-type.rb b/ruby/red-arrow/lib/arrow/data-type.rb
index 07b4525215..1c3653120a 100644
--- a/ruby/red-arrow/lib/arrow/data-type.rb
+++ b/ruby/red-arrow/lib/arrow/data-type.rb
@@ -110,7 +110,7 @@ module Arrow
               description[key] = value
             end
           end
-          if type.nil?
+          if type.nil? and self == DataType
             message =
               "data type description must have :type value: #{data_type.inspect}"
             raise ArgumentError, message
@@ -152,6 +152,7 @@ module Arrow
 
       private
       def resolve_class(data_type)
+        return self if data_type.nil?
         components = data_type.to_s.split("_").collect(&:capitalize)
         data_type_name = components.join.gsub(/\AUint/, "UInt")
         data_type_class_name = "#{data_type_name}DataType"
diff --git a/ruby/red-arrow/lib/arrow/field.rb b/ruby/red-arrow/lib/arrow/field.rb
index e439cb960c..216b0a6e6c 100644
--- a/ruby/red-arrow/lib/arrow/field.rb
+++ b/ruby/red-arrow/lib/arrow/field.rb
@@ -17,6 +17,22 @@
 
 module Arrow
   class Field
+    class << self
+      # @api private
+      def try_convert(value)
+        case value
+        when Hash
+          begin
+            new(value)
+          rescue ArgumentError
+            nil
+          end
+        else
+          nil
+        end
+      end
+    end
+
     alias_method :initialize_raw, :initialize
     private :initialize_raw
 
diff --git a/ruby/red-arrow/lib/arrow/list-data-type.rb b/ruby/red-arrow/lib/arrow/list-data-type.rb
index cfcdd2a9e1..30c9cbdc70 100644
--- a/ruby/red-arrow/lib/arrow/list-data-type.rb
+++ b/ruby/red-arrow/lib/arrow/list-data-type.rb
@@ -107,12 +107,7 @@ module Arrow
         description = arg
         arg = description[:field]
       end
-      if arg.is_a?(Hash)
-        field_description = arg
-        Field.new(field_description)
-      else
-        arg
-      end
+      arg
     end
   end
 end
diff --git a/ruby/red-arrow/lib/arrow/loader.rb b/ruby/red-arrow/lib/arrow/loader.rb
index 71e1c42902..4cc6e63465 100644
--- a/ruby/red-arrow/lib/arrow/loader.rb
+++ b/ruby/red-arrow/lib/arrow/loader.rb
@@ -119,6 +119,7 @@ module Arrow
       require "arrow/table-saver"
       require "arrow/tensor"
       require "arrow/time"
+      require "arrow/time-unit"
       require "arrow/time32-array"
       require "arrow/time32-array-builder"
       require "arrow/time32-data-type"
diff --git a/ruby/red-arrow/test/test-decimal256-array.rb b/ruby/red-arrow/lib/arrow/time-unit.rb
similarity index 60%
copy from ruby/red-arrow/test/test-decimal256-array.rb
copy to ruby/red-arrow/lib/arrow/time-unit.rb
index 7049a45093..e48443ee0c 100644
--- a/ruby/red-arrow/test/test-decimal256-array.rb
+++ b/ruby/red-arrow/lib/arrow/time-unit.rb
@@ -15,24 +15,17 @@
 # specific language governing permissions and limitations
 # under the License.
 
-class Decimal256ArrayTest < Test::Unit::TestCase
-  sub_test_case(".new") do
-    test("build") do
-      data_type = Arrow::Decimal256DataType.new(3, 1)
-      values = [
-        10.1,
-        nil,
-        "10.1",
-        BigDecimal("10.1"),
-      ]
-      array = Arrow::Decimal256Array.new(data_type, values)
-      assert_equal([
-                     BigDecimal("10.1"),
-                     nil,
-                     BigDecimal("10.1"),
-                     BigDecimal("10.1"),
-                   ],
-                   array.to_a)
+module Arrow
+  class TimeUnit
+    class << self
+      # @api private
+      def try_convert(value)
+        if value.is_a?(Hash) and value.size == 1 and value[:unit]
+          super(value[:unit])
+        else
+          super
+        end
+      end
     end
   end
 end
diff --git a/ruby/red-arrow/lib/arrow/time32-array-builder.rb b/ruby/red-arrow/lib/arrow/time32-array-builder.rb
index 088f37c4e2..bf220ffef2 100644
--- a/ruby/red-arrow/lib/arrow/time32-array-builder.rb
+++ b/ruby/red-arrow/lib/arrow/time32-array-builder.rb
@@ -18,24 +18,12 @@
 module Arrow
   class Time32ArrayBuilder
     class << self
-      def build(unit_or_data_type, values)
-        builder = new(unit_or_data_type)
+      def build(data_type, values)
+        builder = new(data_type)
         builder.build(values)
       end
     end
 
-    alias_method :initialize_raw, :initialize
-    def initialize(unit_or_data_type)
-      case unit_or_data_type
-      when DataType
-        data_type = unit_or_data_type
-      else
-        unit = unit_or_data_type
-        data_type = Time32DataType.new(unit)
-      end
-      initialize_raw(data_type)
-    end
-
     def unit
       @unit ||= value_data_type.unit
     end
diff --git a/ruby/red-arrow/lib/arrow/time32-data-type.rb b/ruby/red-arrow/lib/arrow/time32-data-type.rb
index be1d04fa26..4042a2de4e 100644
--- a/ruby/red-arrow/lib/arrow/time32-data-type.rb
+++ b/ruby/red-arrow/lib/arrow/time32-data-type.rb
@@ -17,45 +17,16 @@
 
 module Arrow
   class Time32DataType
-    alias_method :initialize_raw, :initialize
-    private :initialize_raw
-
-    # Creates a new {Arrow::Time32DataType}.
-    #
-    # @overload initialize(unit)
-    #
-    #   @param unit [Arrow::TimeUnit, Symbol] The unit of the
-    #     time32 data type.
-    #
-    #     The unit must be second or millisecond.
-    #
-    #   @example Create a time32 data type with Arrow::TimeUnit
-    #     Arrow::Time32DataType.new(Arrow::TimeUnit::MILLI)
-    #
-    #   @example Create a time32 data type with Symbol
-    #     Arrow::Time32DataType.new(:milli)
-    #
-    # @overload initialize(description)
-    #
-    #   @param description [Hash] The description of the time32 data
-    #     type. It must have `:unit` value.
-    #
-    #   @option description [Arrow::TimeUnit, Symbol] :unit The unit of
-    #     the time32 data type.
-    #
-    #     The unit must be second or millisecond.
-    #
-    #   @example Create a time32 data type with Arrow::TimeUnit
-    #     Arrow::Time32DataType.new(unit: Arrow::TimeUnit::MILLI)
-    #
-    #   @example Create a time32 data type with Symbol
-    #     Arrow::Time32DataType.new(unit: :milli)
-    def initialize(unit)
-      if unit.is_a?(Hash)
-        description = unit
-        unit = description[:unit]
+    class << self
+      # @api private
+      def try_convert(value)
+        case value
+        when Symbol, Arrow::TimeUnit
+          new(value)
+        else
+          super
+        end
       end
-      initialize_raw(unit)
     end
   end
 end
diff --git a/ruby/red-arrow/lib/arrow/time64-array-builder.rb b/ruby/red-arrow/lib/arrow/time64-array-builder.rb
index dec15b8bfd..835a45a877 100644
--- a/ruby/red-arrow/lib/arrow/time64-array-builder.rb
+++ b/ruby/red-arrow/lib/arrow/time64-array-builder.rb
@@ -18,24 +18,12 @@
 module Arrow
   class Time64ArrayBuilder
     class << self
-      def build(unit_or_data_type, values)
-        builder = new(unit_or_data_type)
+      def build(data_type, values)
+        builder = new(data_type)
         builder.build(values)
       end
     end
 
-    alias_method :initialize_raw, :initialize
-    def initialize(unit_or_data_type)
-      case unit_or_data_type
-      when DataType
-        data_type = unit_or_data_type
-      else
-        unit = unit_or_data_type
-        data_type = Time64DataType.new(unit)
-      end
-      initialize_raw(data_type)
-    end
-
     def unit
       @unit ||= value_data_type.unit
     end
diff --git a/ruby/red-arrow/lib/arrow/time64-data-type.rb b/ruby/red-arrow/lib/arrow/time64-data-type.rb
index 13795aa83d..41d95e0ce5 100644
--- a/ruby/red-arrow/lib/arrow/time64-data-type.rb
+++ b/ruby/red-arrow/lib/arrow/time64-data-type.rb
@@ -17,45 +17,16 @@
 
 module Arrow
   class Time64DataType
-    alias_method :initialize_raw, :initialize
-    private :initialize_raw
-
-    # Creates a new {Arrow::Time64DataType}.
-    #
-    # @overload initialize(unit)
-    #
-    #   @param unit [Arrow::TimeUnit, Symbol] The unit of the
-    #     time64 data type.
-    #
-    #     The unit must be microsecond or nanosecond.
-    #
-    #   @example Create a time64 data type with Arrow::TimeUnit
-    #     Arrow::Time64DataType.new(Arrow::TimeUnit::NANO)
-    #
-    #   @example Create a time64 data type with Symbol
-    #     Arrow::Time64DataType.new(:nano)
-    #
-    # @overload initialize(description)
-    #
-    #   @param description [Hash] The description of the time64 data
-    #     type. It must have `:unit` value.
-    #
-    #   @option description [Arrow::TimeUnit, Symbol] :unit The unit of
-    #     the time64 data type.
-    #
-    #     The unit must be microsecond or nanosecond.
-    #
-    #   @example Create a time64 data type with Arrow::TimeUnit
-    #     Arrow::Time64DataType.new(unit: Arrow::TimeUnit::NANO)
-    #
-    #   @example Create a time64 data type with Symbol
-    #     Arrow::Time64DataType.new(unit: :nano)
-    def initialize(unit)
-      if unit.is_a?(Hash)
-        description = unit
-        unit = description[:unit]
+    class << self
+      # @api private
+      def try_convert(value)
+        case value
+        when Symbol, Arrow::TimeUnit
+          new(value)
+        else
+          super
+        end
       end
-      initialize_raw(unit)
     end
   end
 end
diff --git a/ruby/red-arrow/lib/arrow/timestamp-array-builder.rb b/ruby/red-arrow/lib/arrow/timestamp-array-builder.rb
index 68bcb0fec3..c1284aca3d 100644
--- a/ruby/red-arrow/lib/arrow/timestamp-array-builder.rb
+++ b/ruby/red-arrow/lib/arrow/timestamp-array-builder.rb
@@ -18,24 +18,12 @@
 module Arrow
   class TimestampArrayBuilder
     class << self
-      def build(unit_or_data_type, values)
-        builder = new(unit_or_data_type)
+      def build(data_type, values)
+        builder = new(data_type)
         builder.build(values)
       end
     end
 
-    alias_method :initialize_raw, :initialize
-    def initialize(unit_or_data_type)
-      case unit_or_data_type
-      when DataType
-        data_type = unit_or_data_type
-      else
-        unit = unit_or_data_type
-        data_type = TimestampDataType.new(unit)
-      end
-      initialize_raw(data_type)
-    end
-
     private
     def unit_id
       @unit_id ||= value_data_type.unit.nick.to_sym
diff --git a/ruby/red-arrow/lib/arrow/timestamp-data-type.rb b/ruby/red-arrow/lib/arrow/timestamp-data-type.rb
index cd91f567d1..b5696f8f75 100644
--- a/ruby/red-arrow/lib/arrow/timestamp-data-type.rb
+++ b/ruby/red-arrow/lib/arrow/timestamp-data-type.rb
@@ -17,41 +17,16 @@
 
 module Arrow
   class TimestampDataType
-    alias_method :initialize_raw, :initialize
-    private :initialize_raw
-
-    # Creates a new {Arrow::TimestampDataType}.
-    #
-    # @overload initialize(unit)
-    #
-    #   @param unit [Arrow::TimeUnit, Symbol] The unit of the
-    #     timestamp data type.
-    #
-    #   @example Create a timestamp data type with Arrow::TimeUnit
-    #     Arrow::TimestampDataType.new(Arrow::TimeUnit::MILLI)
-    #
-    #   @example Create a timestamp data type with Symbol
-    #     Arrow::TimestampDataType.new(:milli)
-    #
-    # @overload initialize(description)
-    #
-    #   @param description [Hash] The description of the timestamp data
-    #     type. It must have `:unit` value.
-    #
-    #   @option description [Arrow::TimeUnit, Symbol] :unit The unit of
-    #     the timestamp data type.
-    #
-    #   @example Create a timestamp data type with Arrow::TimeUnit
-    #     Arrow::TimestampDataType.new(unit: Arrow::TimeUnit::MILLI)
-    #
-    #   @example Create a timestamp data type with Symbol
-    #     Arrow::TimestampDataType.new(unit: :milli)
-    def initialize(unit)
-      if unit.is_a?(Hash)
-        description = unit
-        unit = description[:unit]
+    class << self
+      # @api private
+      def try_convert(value)
+        case value
+        when Symbol, Arrow::TimeUnit
+          new(value)
+        else
+          super
+        end
       end
-      initialize_raw(unit)
     end
   end
 end
diff --git a/ruby/red-arrow/test/test-decimal128-array.rb b/ruby/red-arrow/test/test-decimal128-array.rb
index 88ab1c26c7..9cc64064e4 100644
--- a/ruby/red-arrow/test/test-decimal128-array.rb
+++ b/ruby/red-arrow/test/test-decimal128-array.rb
@@ -18,14 +18,13 @@
 class Decimal128ArrayTest < Test::Unit::TestCase
   sub_test_case(".new") do
     test("build") do
-      data_type = Arrow::Decimal128DataType.new(3, 1)
       values = [
         10.1,
         nil,
         "10.1",
         BigDecimal("10.1"),
       ]
-      array = Arrow::Decimal128Array.new(data_type, values)
+      array = Arrow::Decimal128Array.new({precision: 3, scale: 1}, values)
       assert_equal([
                      BigDecimal("10.1"),
                      nil,
diff --git a/ruby/red-arrow/test/test-decimal256-array.rb b/ruby/red-arrow/test/test-decimal256-array.rb
index 7049a45093..d45015677e 100644
--- a/ruby/red-arrow/test/test-decimal256-array.rb
+++ b/ruby/red-arrow/test/test-decimal256-array.rb
@@ -18,14 +18,13 @@
 class Decimal256ArrayTest < Test::Unit::TestCase
   sub_test_case(".new") do
     test("build") do
-      data_type = Arrow::Decimal256DataType.new(3, 1)
       values = [
         10.1,
         nil,
         "10.1",
         BigDecimal("10.1"),
       ]
-      array = Arrow::Decimal256Array.new(data_type, values)
+      array = Arrow::Decimal256Array.new({precision: 3, scale: 1}, values)
       assert_equal([
                      BigDecimal("10.1"),
                      nil,