You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ko...@apache.org on 2023/01/02 12:44:24 UTC

[arrow] branch master updated: ARROW-15206: [Ruby] Add support for `Arrow::Table.load(uri, schema:)` (#15148)

This is an automated email from the ASF dual-hosted git repository.

kou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 92f1dd0339 ARROW-15206: [Ruby] Add support for `Arrow::Table.load(uri, schema:)` (#15148)
92f1dd0339 is described below

commit 92f1dd03397110bbf8686164a9d54f94366c46a4
Author: Sutou Kouhei <ko...@clear-code.com>
AuthorDate: Mon Jan 2 21:43:28 2023 +0900

    ARROW-15206: [Ruby] Add support for `Arrow::Table.load(uri, schema:)` (#15148)
    
    Authored-by: Sutou Kouhei <ko...@clear-code.com>
    Signed-off-by: Sutou Kouhei <ko...@clear-code.com>
---
 .../lib/arrow-dataset/arrow-table-loadable.rb       | 14 +++++++++++---
 ruby/red-arrow-dataset/lib/arrow-dataset/dataset.rb |  7 +++++--
 .../arrow-dataset/{dataset.rb => finish-options.rb} | 21 +++++++++++++++------
 ruby/red-arrow-dataset/lib/arrow-dataset/loader.rb  |  1 +
 ruby/red-arrow-dataset/test/test-arrow-table.rb     | 13 +++++++++++++
 5 files changed, 45 insertions(+), 11 deletions(-)

diff --git a/ruby/red-arrow-dataset/lib/arrow-dataset/arrow-table-loadable.rb b/ruby/red-arrow-dataset/lib/arrow-dataset/arrow-table-loadable.rb
index 14c8dce6f5..b3e6b1a109 100644
--- a/ruby/red-arrow-dataset/lib/arrow-dataset/arrow-table-loadable.rb
+++ b/ruby/red-arrow-dataset/lib/arrow-dataset/arrow-table-loadable.rb
@@ -36,13 +36,21 @@ module ArrowDataset
     end
 
     def internal_load_from_uri(uri)
-      format = FileFormat.resolve(@options[:format])
+      options = @options.dup
+      format = FileFormat.resolve(options.delete(:format))
       dataset = FileSystemDataset.build(format) do |factory|
         factory.file_system_uri = uri
+        finish_options = FinishOptions.new
+        FinishOptions.instance_methods(false).each do |method|
+          next unless method.end_with?("=")
+          value = options.delete(method[0..-2].to_sym)
+          next if value.nil?
+          finish_options.public_send(method, value)
+        end
+        finish_options
       end
       scanner_builder = dataset.begin_scan
-      @options.each do |key, value|
-        next if key == :format
+      options.each do |key, value|
         next if value.nil?
         setter = "#{key}="
         next unless scanner_builder.respond_to?(setter)
diff --git a/ruby/red-arrow-dataset/lib/arrow-dataset/dataset.rb b/ruby/red-arrow-dataset/lib/arrow-dataset/dataset.rb
index a658fc3f2e..00d0546257 100644
--- a/ruby/red-arrow-dataset/lib/arrow-dataset/dataset.rb
+++ b/ruby/red-arrow-dataset/lib/arrow-dataset/dataset.rb
@@ -21,8 +21,11 @@ module ArrowDataset
       def build(*args)
         factory_class = ArrowDataset.const_get("#{name}Factory")
         factory = factory_class.new(*args)
-        yield(factory)
-        factory.finish
+        options = yield(factory)
+        unless options.is_a?(FinishOptions)
+          options = FinishOptions.try_convert(options)
+        end
+        factory.finish(options)
       end
     end
   end
diff --git a/ruby/red-arrow-dataset/lib/arrow-dataset/dataset.rb b/ruby/red-arrow-dataset/lib/arrow-dataset/finish-options.rb
similarity index 70%
copy from ruby/red-arrow-dataset/lib/arrow-dataset/dataset.rb
copy to ruby/red-arrow-dataset/lib/arrow-dataset/finish-options.rb
index a658fc3f2e..d26e4ba8cc 100644
--- a/ruby/red-arrow-dataset/lib/arrow-dataset/dataset.rb
+++ b/ruby/red-arrow-dataset/lib/arrow-dataset/finish-options.rb
@@ -16,13 +16,22 @@
 # under the License.
 
 module ArrowDataset
-  class Dataset
+  class FinishOptions
     class << self
-      def build(*args)
-        factory_class = ArrowDataset.const_get("#{name}Factory")
-        factory = factory_class.new(*args)
-        yield(factory)
-        factory.finish
+      # @api private
+      def try_convert(value)
+        case value
+        when Hash
+          options = new
+          value.each do |k, v|
+            setter = "#{k}="
+            next unless options.respond_to?(setter)
+            options.public_send(setter, v)
+          end
+          options
+        else
+          nil
+        end
       end
     end
   end
diff --git a/ruby/red-arrow-dataset/lib/arrow-dataset/loader.rb b/ruby/red-arrow-dataset/lib/arrow-dataset/loader.rb
index b1be000f7c..40748dcb49 100644
--- a/ruby/red-arrow-dataset/lib/arrow-dataset/loader.rb
+++ b/ruby/red-arrow-dataset/lib/arrow-dataset/loader.rb
@@ -34,6 +34,7 @@ module ArrowDataset
       require "arrow-dataset/dataset"
       require "arrow-dataset/file-format"
       require "arrow-dataset/file-system-dataset-factory"
+      require "arrow-dataset/finish-options"
     end
   end
 end
diff --git a/ruby/red-arrow-dataset/test/test-arrow-table.rb b/ruby/red-arrow-dataset/test/test-arrow-table.rb
index 1913063741..e875399b69 100644
--- a/ruby/red-arrow-dataset/test/test-arrow-table.rb
+++ b/ruby/red-arrow-dataset/test/test-arrow-table.rb
@@ -76,5 +76,18 @@ class TestArrowTable < Test::Unit::TestCase
                    Arrow::Table.load(@dir,
                                      filter: ["equal", :visible, true]))
     end
+
+    def test_schema
+      uri = build_file_uri(@path1)
+      @table1.save(uri)
+      schema = Arrow::Schema.new(visible: :boolean,
+                                 point: :int64)
+      assert_equal(Arrow::Table.new(schema,
+                                    [
+                                      @table1[:visible].data,
+                                      @table1[:point].cast(:int64),
+                                    ]),
+                   Arrow::Table.load(uri, schema: schema))
+    end
   end
 end