You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ko...@apache.org on 2022/11/29 06:54:07 UTC
[arrow] branch master updated: ARROW-18405: [Ruby] Avoid rebuilding chunked arrays in Arrow::Table.new (#14738)
This is an automated email from the ASF dual-hosted git repository.
kou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new be023c1576 ARROW-18405: [Ruby] Avoid rebuilding chunked arrays in Arrow::Table.new (#14738)
be023c1576 is described below
commit be023c157672275f326059ed98e234455083726c
Author: Sutou Kouhei <ko...@clear-code.com>
AuthorDate: Tue Nov 29 15:54:00 2022 +0900
ARROW-18405: [Ruby] Avoid rebuilding chunked arrays in Arrow::Table.new (#14738)
Authored-by: Sutou Kouhei <ko...@clear-code.com>
Signed-off-by: Sutou Kouhei <ko...@clear-code.com>
---
ruby/red-arrow/lib/arrow/array.rb | 4 ++++
ruby/red-arrow/lib/arrow/chunked-array.rb | 12 ++++++++++++
ruby/red-arrow/lib/arrow/raw-table-converter.rb | 12 +++++++-----
ruby/red-arrow/lib/arrow/tensor.rb | 4 ++++
ruby/red-arrow/test/test-table.rb | 22 ++++++++++++++++++++--
5 files changed, 47 insertions(+), 7 deletions(-)
diff --git a/ruby/red-arrow/lib/arrow/array.rb b/ruby/red-arrow/lib/arrow/array.rb
index 90f5595f18..c9d741a5c9 100644
--- a/ruby/red-arrow/lib/arrow/array.rb
+++ b/ruby/red-arrow/lib/arrow/array.rb
@@ -115,6 +115,10 @@ module Arrow
self
end
+ def to_arrow_chunked_array
+ ChunkedArray.new([self])
+ end
+
alias_method :value_data_type_raw, :value_data_type
def value_data_type
@value_data_type ||= value_data_type_raw
diff --git a/ruby/red-arrow/lib/arrow/chunked-array.rb b/ruby/red-arrow/lib/arrow/chunked-array.rb
index fb18a1baff..ce7308f08f 100644
--- a/ruby/red-arrow/lib/arrow/chunked-array.rb
+++ b/ruby/red-arrow/lib/arrow/chunked-array.rb
@@ -23,6 +23,18 @@ module Arrow
include GenericFilterable
include GenericTakeable
+ def to_arrow
+ self
+ end
+
+ def to_arrow_array
+ combine
+ end
+
+ def to_arrow_chunked_array
+ self
+ end
+
alias_method :size, :n_rows
unless method_defined?(:length)
alias_method :length, :n_rows
diff --git a/ruby/red-arrow/lib/arrow/raw-table-converter.rb b/ruby/red-arrow/lib/arrow/raw-table-converter.rb
index f7d03aa782..e189dcde84 100644
--- a/ruby/red-arrow/lib/arrow/raw-table-converter.rb
+++ b/ruby/red-arrow/lib/arrow/raw-table-converter.rb
@@ -35,14 +35,16 @@ module Arrow
fields = []
@values = []
@raw_table.each do |name, array|
- if array.respond_to?(:to_arrow_array)
- array = array.to_arrow_array
+ if array.respond_to?(:to_arrow_chunked_array)
+ chunked_array = array.to_arrow_chunked_array
+ elsif array.respond_to?(:to_arrow_array)
+ chunked_array = ChunkedArray.new([array.to_arrow_array])
else
array = array.to_ary if array.respond_to?(:to_ary)
- array = ArrayBuilder.build(array)
+ chunked_array = ChunkedArray.new([ArrayBuilder.build(array)])
end
- fields << Field.new(name.to_s, array.value_data_type)
- @values << array
+ fields << Field.new(name.to_s, chunked_array.value_data_type)
+ @values << chunked_array
end
@schema = Schema.new(fields)
end
diff --git a/ruby/red-arrow/lib/arrow/tensor.rb b/ruby/red-arrow/lib/arrow/tensor.rb
index 203ecea93f..54ea729400 100644
--- a/ruby/red-arrow/lib/arrow/tensor.rb
+++ b/ruby/red-arrow/lib/arrow/tensor.rb
@@ -160,5 +160,9 @@ module Arrow
nil,
0)
end
+
+ def to_arrow_chunked_array
+ ChunkedArray.new([to_arrow_array])
+ end
end
end
diff --git a/ruby/red-arrow/test/test-table.rb b/ruby/red-arrow/test/test-table.rb
index e30707d32b..8e1ba74df0 100644
--- a/ruby/red-arrow/test/test-table.rb
+++ b/ruby/red-arrow/test/test-table.rb
@@ -41,8 +41,25 @@ class TableTest < Test::Unit::TestCase
end
sub_test_case(".new") do
+ test("{Symbol: Arrow::Array}") do
+ schema = Arrow::Schema.new(numbers: :int64)
+ assert_equal(Arrow::Table.new(schema,
+ [Arrow::Int64Array.new([1, 2, 3])]),
+ Arrow::Table.new(numbers: Arrow::Int64Array.new([1, 2, 3])))
+ end
+
+ test("{Symbol: Arrow::ChunkedArray}") do
+ chunked_array = Arrow::ChunkedArray.new([Arrow::Int64Array.new([1, 2, 3])])
+ schema = Arrow::Schema.new(numbers: :int64)
+ assert_equal(Arrow::Table.new(schema,
+ [Arrow::Int64Array.new([1, 2, 3])]),
+ Arrow::Table.new(numbers: chunked_array))
+ end
+
test("{Symbol: Arrow::Tensor}") do
- assert_equal(Arrow::Table.new(numbers: Arrow::UInt8Array.new([1, 2, 3])),
+ schema = Arrow::Schema.new(numbers: :uint8)
+ assert_equal(Arrow::Table.new(schema,
+ [Arrow::UInt8Array.new([1, 2, 3])]),
Arrow::Table.new(numbers: Arrow::Tensor.new([1, 2, 3])))
end
@@ -51,7 +68,8 @@ class TableTest < Test::Unit::TestCase
def array_like.to_ary
[1, 2, 3]
end
- assert_equal(Arrow::Table.new(numbers: Arrow::UInt8Array.new([1, 2, 3])),
+ schema = Arrow::Schema.new(numbers: :uint8)
+ assert_equal(Arrow::Table.new(schema, [Arrow::UInt8Array.new([1, 2, 3])]),
Arrow::Table.new(numbers: array_like))
end
end