You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ko...@apache.org on 2023/01/02 12:44:24 UTC
[arrow] branch master updated: ARROW-15206: [Ruby] Add support for `Arrow::Table.load(uri, schema:)` (#15148)
This is an automated email from the ASF dual-hosted git repository.
kou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 92f1dd0339 ARROW-15206: [Ruby] Add support for `Arrow::Table.load(uri, schema:)` (#15148)
92f1dd0339 is described below
commit 92f1dd03397110bbf8686164a9d54f94366c46a4
Author: Sutou Kouhei <ko...@clear-code.com>
AuthorDate: Mon Jan 2 21:43:28 2023 +0900
ARROW-15206: [Ruby] Add support for `Arrow::Table.load(uri, schema:)` (#15148)
Authored-by: Sutou Kouhei <ko...@clear-code.com>
Signed-off-by: Sutou Kouhei <ko...@clear-code.com>
---
.../lib/arrow-dataset/arrow-table-loadable.rb | 14 +++++++++++---
ruby/red-arrow-dataset/lib/arrow-dataset/dataset.rb | 7 +++++--
.../arrow-dataset/{dataset.rb => finish-options.rb} | 21 +++++++++++++++------
ruby/red-arrow-dataset/lib/arrow-dataset/loader.rb | 1 +
ruby/red-arrow-dataset/test/test-arrow-table.rb | 13 +++++++++++++
5 files changed, 45 insertions(+), 11 deletions(-)
diff --git a/ruby/red-arrow-dataset/lib/arrow-dataset/arrow-table-loadable.rb b/ruby/red-arrow-dataset/lib/arrow-dataset/arrow-table-loadable.rb
index 14c8dce6f5..b3e6b1a109 100644
--- a/ruby/red-arrow-dataset/lib/arrow-dataset/arrow-table-loadable.rb
+++ b/ruby/red-arrow-dataset/lib/arrow-dataset/arrow-table-loadable.rb
@@ -36,13 +36,21 @@ module ArrowDataset
end
def internal_load_from_uri(uri)
- format = FileFormat.resolve(@options[:format])
+ options = @options.dup
+ format = FileFormat.resolve(options.delete(:format))
dataset = FileSystemDataset.build(format) do |factory|
factory.file_system_uri = uri
+ finish_options = FinishOptions.new
+ FinishOptions.instance_methods(false).each do |method|
+ next unless method.end_with?("=")
+ value = options.delete(method[0..-2].to_sym)
+ next if value.nil?
+ finish_options.public_send(method, value)
+ end
+ finish_options
end
scanner_builder = dataset.begin_scan
- @options.each do |key, value|
- next if key == :format
+ options.each do |key, value|
next if value.nil?
setter = "#{key}="
next unless scanner_builder.respond_to?(setter)
diff --git a/ruby/red-arrow-dataset/lib/arrow-dataset/dataset.rb b/ruby/red-arrow-dataset/lib/arrow-dataset/dataset.rb
index a658fc3f2e..00d0546257 100644
--- a/ruby/red-arrow-dataset/lib/arrow-dataset/dataset.rb
+++ b/ruby/red-arrow-dataset/lib/arrow-dataset/dataset.rb
@@ -21,8 +21,11 @@ module ArrowDataset
def build(*args)
factory_class = ArrowDataset.const_get("#{name}Factory")
factory = factory_class.new(*args)
- yield(factory)
- factory.finish
+ options = yield(factory)
+ unless options.is_a?(FinishOptions)
+ options = FinishOptions.try_convert(options)
+ end
+ factory.finish(options)
end
end
end
diff --git a/ruby/red-arrow-dataset/lib/arrow-dataset/dataset.rb b/ruby/red-arrow-dataset/lib/arrow-dataset/finish-options.rb
similarity index 70%
copy from ruby/red-arrow-dataset/lib/arrow-dataset/dataset.rb
copy to ruby/red-arrow-dataset/lib/arrow-dataset/finish-options.rb
index a658fc3f2e..d26e4ba8cc 100644
--- a/ruby/red-arrow-dataset/lib/arrow-dataset/dataset.rb
+++ b/ruby/red-arrow-dataset/lib/arrow-dataset/finish-options.rb
@@ -16,13 +16,22 @@
# under the License.
module ArrowDataset
- class Dataset
+ class FinishOptions
class << self
- def build(*args)
- factory_class = ArrowDataset.const_get("#{name}Factory")
- factory = factory_class.new(*args)
- yield(factory)
- factory.finish
+ # @api private
+ def try_convert(value)
+ case value
+ when Hash
+ options = new
+ value.each do |k, v|
+ setter = "#{k}="
+ next unless options.respond_to?(setter)
+ options.public_send(setter, v)
+ end
+ options
+ else
+ nil
+ end
end
end
end
diff --git a/ruby/red-arrow-dataset/lib/arrow-dataset/loader.rb b/ruby/red-arrow-dataset/lib/arrow-dataset/loader.rb
index b1be000f7c..40748dcb49 100644
--- a/ruby/red-arrow-dataset/lib/arrow-dataset/loader.rb
+++ b/ruby/red-arrow-dataset/lib/arrow-dataset/loader.rb
@@ -34,6 +34,7 @@ module ArrowDataset
require "arrow-dataset/dataset"
require "arrow-dataset/file-format"
require "arrow-dataset/file-system-dataset-factory"
+ require "arrow-dataset/finish-options"
end
end
end
diff --git a/ruby/red-arrow-dataset/test/test-arrow-table.rb b/ruby/red-arrow-dataset/test/test-arrow-table.rb
index 1913063741..e875399b69 100644
--- a/ruby/red-arrow-dataset/test/test-arrow-table.rb
+++ b/ruby/red-arrow-dataset/test/test-arrow-table.rb
@@ -76,5 +76,18 @@ class TestArrowTable < Test::Unit::TestCase
Arrow::Table.load(@dir,
filter: ["equal", :visible, true]))
end
+
+ def test_schema
+ uri = build_file_uri(@path1)
+ @table1.save(uri)
+ schema = Arrow::Schema.new(visible: :boolean,
+ point: :int64)
+ assert_equal(Arrow::Table.new(schema,
+ [
+ @table1[:visible].data,
+ @table1[:point].cast(:int64),
+ ]),
+ Arrow::Table.load(uri, schema: schema))
+ end
end
end