You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ko...@apache.org on 2022/11/29 06:54:07 UTC

[arrow] branch master updated: ARROW-18405: [Ruby] Avoid rebuilding chunked arrays in Arrow::Table.new (#14738)

This is an automated email from the ASF dual-hosted git repository.

kou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new be023c1576 ARROW-18405: [Ruby] Avoid rebuilding chunked arrays in Arrow::Table.new (#14738)
be023c1576 is described below

commit be023c157672275f326059ed98e234455083726c
Author: Sutou Kouhei <ko...@clear-code.com>
AuthorDate: Tue Nov 29 15:54:00 2022 +0900

    ARROW-18405: [Ruby] Avoid rebuilding chunked arrays in Arrow::Table.new (#14738)
    
    Authored-by: Sutou Kouhei <ko...@clear-code.com>
    Signed-off-by: Sutou Kouhei <ko...@clear-code.com>
---
 ruby/red-arrow/lib/arrow/array.rb               |  4 ++++
 ruby/red-arrow/lib/arrow/chunked-array.rb       | 12 ++++++++++++
 ruby/red-arrow/lib/arrow/raw-table-converter.rb | 12 +++++++-----
 ruby/red-arrow/lib/arrow/tensor.rb              |  4 ++++
 ruby/red-arrow/test/test-table.rb               | 22 ++++++++++++++++++++--
 5 files changed, 47 insertions(+), 7 deletions(-)

diff --git a/ruby/red-arrow/lib/arrow/array.rb b/ruby/red-arrow/lib/arrow/array.rb
index 90f5595f18..c9d741a5c9 100644
--- a/ruby/red-arrow/lib/arrow/array.rb
+++ b/ruby/red-arrow/lib/arrow/array.rb
@@ -115,6 +115,10 @@ module Arrow
       self
     end
 
+    def to_arrow_chunked_array
+      ChunkedArray.new([self])
+    end
+
     alias_method :value_data_type_raw, :value_data_type
     def value_data_type
       @value_data_type ||= value_data_type_raw
diff --git a/ruby/red-arrow/lib/arrow/chunked-array.rb b/ruby/red-arrow/lib/arrow/chunked-array.rb
index fb18a1baff..ce7308f08f 100644
--- a/ruby/red-arrow/lib/arrow/chunked-array.rb
+++ b/ruby/red-arrow/lib/arrow/chunked-array.rb
@@ -23,6 +23,18 @@ module Arrow
     include GenericFilterable
     include GenericTakeable
 
+    def to_arrow
+      self
+    end
+
+    def to_arrow_array
+      combine
+    end
+
+    def to_arrow_chunked_array
+      self
+    end
+
     alias_method :size, :n_rows
     unless method_defined?(:length)
       alias_method :length, :n_rows
diff --git a/ruby/red-arrow/lib/arrow/raw-table-converter.rb b/ruby/red-arrow/lib/arrow/raw-table-converter.rb
index f7d03aa782..e189dcde84 100644
--- a/ruby/red-arrow/lib/arrow/raw-table-converter.rb
+++ b/ruby/red-arrow/lib/arrow/raw-table-converter.rb
@@ -35,14 +35,16 @@ module Arrow
         fields = []
         @values = []
         @raw_table.each do |name, array|
-          if array.respond_to?(:to_arrow_array)
-            array = array.to_arrow_array
+          if array.respond_to?(:to_arrow_chunked_array)
+            chunked_array = array.to_arrow_chunked_array
+          elsif array.respond_to?(:to_arrow_array)
+            chunked_array = ChunkedArray.new([array.to_arrow_array])
           else
             array = array.to_ary if array.respond_to?(:to_ary)
-            array = ArrayBuilder.build(array)
+            chunked_array = ChunkedArray.new([ArrayBuilder.build(array)])
           end
-          fields << Field.new(name.to_s, array.value_data_type)
-          @values << array
+          fields << Field.new(name.to_s, chunked_array.value_data_type)
+          @values << chunked_array
         end
         @schema = Schema.new(fields)
       end
diff --git a/ruby/red-arrow/lib/arrow/tensor.rb b/ruby/red-arrow/lib/arrow/tensor.rb
index 203ecea93f..54ea729400 100644
--- a/ruby/red-arrow/lib/arrow/tensor.rb
+++ b/ruby/red-arrow/lib/arrow/tensor.rb
@@ -160,5 +160,9 @@ module Arrow
                                       nil,
                                       0)
     end
+
+    def to_arrow_chunked_array
+      ChunkedArray.new([to_arrow_array])
+    end
   end
 end
diff --git a/ruby/red-arrow/test/test-table.rb b/ruby/red-arrow/test/test-table.rb
index e30707d32b..8e1ba74df0 100644
--- a/ruby/red-arrow/test/test-table.rb
+++ b/ruby/red-arrow/test/test-table.rb
@@ -41,8 +41,25 @@ class TableTest < Test::Unit::TestCase
   end
 
   sub_test_case(".new") do
+    test("{Symbol: Arrow::Array}") do
+      schema = Arrow::Schema.new(numbers: :int64)
+      assert_equal(Arrow::Table.new(schema,
+                                    [Arrow::Int64Array.new([1, 2, 3])]),
+                   Arrow::Table.new(numbers: Arrow::Int64Array.new([1, 2, 3])))
+    end
+
+    test("{Symbol: Arrow::ChunkedArray}") do
+      chunked_array = Arrow::ChunkedArray.new([Arrow::Int64Array.new([1, 2, 3])])
+      schema = Arrow::Schema.new(numbers: :int64)
+      assert_equal(Arrow::Table.new(schema,
+                                    [Arrow::Int64Array.new([1, 2, 3])]),
+                   Arrow::Table.new(numbers: chunked_array))
+    end
+
     test("{Symbol: Arrow::Tensor}") do
-      assert_equal(Arrow::Table.new(numbers: Arrow::UInt8Array.new([1, 2, 3])),
+      schema = Arrow::Schema.new(numbers: :uint8)
+      assert_equal(Arrow::Table.new(schema,
+                                    [Arrow::UInt8Array.new([1, 2, 3])]),
                    Arrow::Table.new(numbers: Arrow::Tensor.new([1, 2, 3])))
     end
 
@@ -51,7 +68,8 @@ class TableTest < Test::Unit::TestCase
       def array_like.to_ary
         [1, 2, 3]
       end
-      assert_equal(Arrow::Table.new(numbers: Arrow::UInt8Array.new([1, 2, 3])),
+      schema = Arrow::Schema.new(numbers: :uint8)
+      assert_equal(Arrow::Table.new(schema, [Arrow::UInt8Array.new([1, 2, 3])]),
                    Arrow::Table.new(numbers: array_like))
     end
   end