You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ko...@apache.org on 2022/06/22 23:58:32 UTC
[arrow] branch master updated: ARROW-14518: [Ruby] Add support for Arrow::Array.new([BigDecimal]) (#13377)
This is an automated email from the ASF dual-hosted git repository.
kou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 6fd4d3423e ARROW-14518: [Ruby] Add support for Arrow::Array.new([BigDecimal]) (#13377)
6fd4d3423e is described below
commit 6fd4d3423e3f0d597fac547ee74f4ccba2812c8a
Author: Sutou Kouhei <ko...@clear-code.com>
AuthorDate: Thu Jun 23 08:58:26 2022 +0900
ARROW-14518: [Ruby] Add support for Arrow::Array.new([BigDecimal]) (#13377)
This requires bigdecimal 3.1.0 or later for BigDecimal#scale.
Arrow::Array.new([BigDecimal]) detects the max precision and scale
from BigDecimals and creates suitable Arrow::Decimal{128,256}DataType
automatically.
This also truncates given BigDecimal when the specified
Arrow::Decimal{128,256}DataType doesn't have enough and scale. This
still doesn't check precision. If an user specifies data that have too
much precision, the data are used as-is.
Authored-by: Sutou Kouhei <ko...@clear-code.com>
Signed-off-by: Sutou Kouhei <ko...@clear-code.com>
---
ruby/red-arrow/lib/arrow/array-builder.rb | 46 +++++++++++++---
.../lib/arrow/decimal128-array-builder.rb | 22 +++++---
ruby/red-arrow/lib/arrow/decimal128.rb | 14 +++++
.../lib/arrow/decimal256-array-builder.rb | 22 +++++---
ruby/red-arrow/lib/arrow/decimal256.rb | 14 +++++
ruby/red-arrow/lib/arrow/loader.rb | 1 +
.../arrow/string-array-builder.rb} | 27 ++++------
ruby/red-arrow/red-arrow.gemspec | 2 +-
ruby/red-arrow/test/test-array-builder.rb | 62 ++++++++++++++++++++++
.../test/test-decimal128-array-builder.rb | 14 +++++
ruby/red-arrow/test/test-decimal128-array.rb | 4 ++
ruby/red-arrow/test/test-decimal128.rb | 28 +++++++++-
.../test/test-decimal256-array-builder.rb | 14 +++++
ruby/red-arrow/test/test-decimal256-array.rb | 4 ++
ruby/red-arrow/test/test-decimal256.rb | 28 +++++++++-
15 files changed, 262 insertions(+), 40 deletions(-)
diff --git a/ruby/red-arrow/lib/arrow/array-builder.rb b/ruby/red-arrow/lib/arrow/array-builder.rb
index 651aed9623..876fd71120 100644
--- a/ruby/red-arrow/lib/arrow/array-builder.rb
+++ b/ruby/red-arrow/lib/arrow/array-builder.rb
@@ -33,6 +33,11 @@ module Arrow
end
if builder_info
builder = builder_info[:builder]
+ if builder.nil? and builder_info[:builder_type]
+ builder = create_builder(builder_info)
+ end
+ end
+ if builder
builder.build(values)
else
Arrow::StringArray.new(values)
@@ -121,15 +126,28 @@ module Arrow
detected: true,
}
when BigDecimal
- if value.to_arrow.is_a?(Decimal128)
- {
- builder: Decimal128ArrayBuilder.new,
- }
- else
+ builder_info ||= {}
+ if builder_info[:builder] or value.nan? or value.infinite?
{
- builder: Decimal256ArrayBuilder.new,
+ builder: StringArrayBuilder.new,
detected: true,
}
+ else
+ precision = [builder_info[:precision] || 0, value.precision].max
+ scale = [builder_info[:scale] || 0, value.scale].max
+ if precision <= Decimal128DataType::MAX_PRECISION
+ {
+ builder_type: :decimal128,
+ precision: precision,
+ scale: scale,
+ }
+ else
+ {
+ builder_type: :decimal256,
+ precision: precision,
+ scale: scale,
+ }
+ end
end
when ::Array
sub_builder_info = nil
@@ -154,6 +172,22 @@ module Arrow
}
end
end
+
+ def create_builder(builder_info)
+ builder_type = builder_info[:builder_type]
+ case builder_type
+ when :decimal128
+ data_type = Decimal128DataType.new(builder_info[:precision],
+ builder_info[:scale])
+ Decimal128ArrayBuilder.new(data_type)
+ when :decimal256
+ data_type = Decimal256DataType.new(builder_info[:precision],
+ builder_info[:scale])
+ Decimal256ArrayBuilder.new(data_type)
+ else
+ nil
+ end
+ end
end
def build(values)
diff --git a/ruby/red-arrow/lib/arrow/decimal128-array-builder.rb b/ruby/red-arrow/lib/arrow/decimal128-array-builder.rb
index d380ce070d..ff245e48bc 100644
--- a/ruby/red-arrow/lib/arrow/decimal128-array-builder.rb
+++ b/ruby/red-arrow/lib/arrow/decimal128-array-builder.rb
@@ -42,16 +42,26 @@ module Arrow
end
private
+ def precision
+ @precision ||= value_data_type.precision
+ end
+
+ def scale
+ @scale ||= value_data_type.scale
+ end
+
def normalize_value(value)
case value
- when String
- Decimal128.new(value)
- when Float
- Decimal128.new(value.to_s)
when BigDecimal
- Decimal128.new(value.to_s)
+ if value.nan? or value.infinite?
+ message = "can't use #{value} as an Arrow::Decimal128Array value"
+ raise FloatDomainError, message
+ end
+ integer, decimal = value.to_s("f").split(".", 2)
+ decimal = decimal[0, scale].ljust(scale, "0")
+ Decimal128.new("#{integer}.#{decimal}")
else
- value
+ Decimal128.try_convert(value) || value
end
end
end
diff --git a/ruby/red-arrow/lib/arrow/decimal128.rb b/ruby/red-arrow/lib/arrow/decimal128.rb
index bf853ae7f1..6b9e5008c0 100644
--- a/ruby/red-arrow/lib/arrow/decimal128.rb
+++ b/ruby/red-arrow/lib/arrow/decimal128.rb
@@ -17,6 +17,20 @@
module Arrow
class Decimal128
+ class << self
+ # @api private
+ def try_convert(value)
+ case value
+ when String
+ new(value)
+ when Float
+ new(value.to_s)
+ else
+ nil
+ end
+ end
+ end
+
alias_method :to_s_raw, :to_s
# @overload to_s
diff --git a/ruby/red-arrow/lib/arrow/decimal256-array-builder.rb b/ruby/red-arrow/lib/arrow/decimal256-array-builder.rb
index fb89ff00b6..0deb5a4fd7 100644
--- a/ruby/red-arrow/lib/arrow/decimal256-array-builder.rb
+++ b/ruby/red-arrow/lib/arrow/decimal256-array-builder.rb
@@ -45,16 +45,26 @@ module Arrow
end
private
+ def precision
+ @precision ||= value_data_type.precision
+ end
+
+ def scale
+ @scale ||= value_data_type.scale
+ end
+
def normalize_value(value)
case value
- when String
- Decimal256.new(value)
- when Float
- Decimal256.new(value.to_s)
when BigDecimal
- Decimal256.new(value.to_s)
+ if value.nan? or value.infinite?
+ message = "can't use #{value} as an Arrow::Decimal256Array value"
+ raise FloatDomainError, message
+ end
+ integer, decimal = value.to_s("f").split(".", 2)
+ decimal = decimal[0, scale].ljust(scale, "0")
+ Decimal256.new("#{integer}.#{decimal}")
else
- value
+ Decimal256.try_convert(value) || value
end
end
end
diff --git a/ruby/red-arrow/lib/arrow/decimal256.rb b/ruby/red-arrow/lib/arrow/decimal256.rb
index 1a7097a4d3..d79558a73a 100644
--- a/ruby/red-arrow/lib/arrow/decimal256.rb
+++ b/ruby/red-arrow/lib/arrow/decimal256.rb
@@ -17,6 +17,20 @@
module Arrow
class Decimal256
+ class << self
+ # @api private
+ def try_convert(value)
+ case value
+ when String
+ new(value)
+ when Float
+ new(value.to_s)
+ else
+ nil
+ end
+ end
+ end
+
alias_method :to_s_raw, :to_s
# @overload to_s
diff --git a/ruby/red-arrow/lib/arrow/loader.rb b/ruby/red-arrow/lib/arrow/loader.rb
index 4cc6e63465..58b11e567f 100644
--- a/ruby/red-arrow/lib/arrow/loader.rb
+++ b/ruby/red-arrow/lib/arrow/loader.rb
@@ -107,6 +107,7 @@ module Arrow
require "arrow/source-node-options"
require "arrow/sparse-union-data-type"
require "arrow/string-dictionary-array-builder"
+ require "arrow/string-array-builder"
require "arrow/struct-array"
require "arrow/struct-array-builder"
require "arrow/struct-data-type"
diff --git a/ruby/red-arrow/test/test-decimal256-array.rb b/ruby/red-arrow/lib/arrow/string-array-builder.rb
similarity index 62%
copy from ruby/red-arrow/test/test-decimal256-array.rb
copy to ruby/red-arrow/lib/arrow/string-array-builder.rb
index d45015677e..0922d57388 100644
--- a/ruby/red-arrow/test/test-decimal256-array.rb
+++ b/ruby/red-arrow/lib/arrow/string-array-builder.rb
@@ -15,23 +15,16 @@
# specific language governing permissions and limitations
# under the License.
-class Decimal256ArrayTest < Test::Unit::TestCase
- sub_test_case(".new") do
- test("build") do
- values = [
- 10.1,
- nil,
- "10.1",
- BigDecimal("10.1"),
- ]
- array = Arrow::Decimal256Array.new({precision: 3, scale: 1}, values)
- assert_equal([
- BigDecimal("10.1"),
- nil,
- BigDecimal("10.1"),
- BigDecimal("10.1"),
- ],
- array.to_a)
+module Arrow
+ class StringArrayBuilder
+ private
+ def convert_to_arrow_value(value)
+ case value
+ when GLib::Bytes, String
+ value
+ else
+ value.to_s
+ end
end
end
end
diff --git a/ruby/red-arrow/red-arrow.gemspec b/ruby/red-arrow/red-arrow.gemspec
index 449969ee16..6be1f0b626 100644
--- a/ruby/red-arrow/red-arrow.gemspec
+++ b/ruby/red-arrow/red-arrow.gemspec
@@ -46,7 +46,7 @@ Gem::Specification.new do |spec|
spec.test_files += Dir.glob("test/**/*")
spec.extensions = ["ext/arrow/extconf.rb"]
- spec.add_runtime_dependency("bigdecimal", ">= 2.0.3")
+ spec.add_runtime_dependency("bigdecimal", ">= 3.1.0")
spec.add_runtime_dependency("extpp", ">= 0.0.7")
spec.add_runtime_dependency("gio2", ">= 3.5.0")
spec.add_runtime_dependency("native-package-installer")
diff --git a/ruby/red-arrow/test/test-array-builder.rb b/ruby/red-arrow/test/test-array-builder.rb
index 318167d511..fb48aba8a4 100644
--- a/ruby/red-arrow/test/test-array-builder.rb
+++ b/ruby/red-arrow/test/test-array-builder.rb
@@ -68,6 +68,68 @@ class ArrayBuilderTest < Test::Unit::TestCase
])
end
+ test("decimal + string") do
+ raw_array = [BigDecimal("10.1"), "10.1"]
+ array = Arrow::ArrayBuilder.build(raw_array)
+ assert_equal(raw_array.collect(&:to_s), array.to_a)
+ end
+
+ test("NaN") do
+ raw_array = [BigDecimal("10.1"), BigDecimal::NAN]
+ array = Arrow::ArrayBuilder.build(raw_array)
+ assert_equal(raw_array.collect(&:to_s), array.to_a)
+ end
+
+ test("Infinity") do
+ raw_array = [BigDecimal("10.1"), BigDecimal::INFINITY]
+ array = Arrow::ArrayBuilder.build(raw_array)
+ assert_equal(raw_array.collect(&:to_s), array.to_a)
+ end
+
+ test("decimal128") do
+ values = [
+ BigDecimal("10.1"),
+ BigDecimal("1.11"),
+ BigDecimal("1"),
+ ]
+ array = Arrow::Array.new(values)
+ data_type = Arrow::Decimal128DataType.new(3, 2)
+ assert_equal({
+ data_type: data_type,
+ values: [
+ BigDecimal("10.1"),
+ BigDecimal("1.11"),
+ BigDecimal("1"),
+ ],
+ },
+ {
+ data_type: array.value_data_type,
+ values: array.to_a,
+ })
+ end
+
+ test("decimal256") do
+ values = [
+ BigDecimal("1" * 40 + ".1"),
+ BigDecimal("1" * 38 + ".11"),
+ BigDecimal("1" * 37),
+ ]
+ array = Arrow::Array.new(values)
+ data_type = Arrow::Decimal256DataType.new(41, 2)
+ assert_equal({
+ data_type: data_type,
+ values: [
+ BigDecimal("1" * 40 + ".1"),
+ BigDecimal("1" * 38 + ".11"),
+ BigDecimal("1" * 37),
+ ],
+ },
+ {
+ data_type: array.value_data_type,
+ values: array.to_a,
+ })
+ end
+
test("list<boolean>s") do
assert_build(Arrow::ArrayBuilder,
[
diff --git a/ruby/red-arrow/test/test-decimal128-array-builder.rb b/ruby/red-arrow/test/test-decimal128-array-builder.rb
index 31d58bd581..6a22492703 100644
--- a/ruby/red-arrow/test/test-decimal128-array-builder.rb
+++ b/ruby/red-arrow/test/test-decimal128-array-builder.rb
@@ -55,6 +55,20 @@ class Decimal128ArrayBuilderTest < Test::Unit::TestCase
assert_equal(BigDecimal("10.1"),
array[0])
end
+
+ test("BigDecimal::NAN") do
+ message = "can't use NaN as an Arrow::Decimal128Array value"
+ assert_raise(FloatDomainError.new(message)) do
+ @builder.append_value(BigDecimal::NAN)
+ end
+ end
+
+ test("BigDecimal::INFINITY") do
+ message = "can't use Infinity as an Arrow::Decimal128Array value"
+ assert_raise(FloatDomainError.new(message)) do
+ @builder.append_value(BigDecimal::INFINITY)
+ end
+ end
end
sub_test_case("#append_values") do
diff --git a/ruby/red-arrow/test/test-decimal128-array.rb b/ruby/red-arrow/test/test-decimal128-array.rb
index 9cc64064e4..a50e2cf4a4 100644
--- a/ruby/red-arrow/test/test-decimal128-array.rb
+++ b/ruby/red-arrow/test/test-decimal128-array.rb
@@ -23,6 +23,8 @@ class Decimal128ArrayTest < Test::Unit::TestCase
nil,
"10.1",
BigDecimal("10.1"),
+ BigDecimal("1.11"),
+ BigDecimal("1"),
]
array = Arrow::Decimal128Array.new({precision: 3, scale: 1}, values)
assert_equal([
@@ -30,6 +32,8 @@ class Decimal128ArrayTest < Test::Unit::TestCase
nil,
BigDecimal("10.1"),
BigDecimal("10.1"),
+ BigDecimal("1.1"),
+ BigDecimal("1"),
],
array.to_a)
end
diff --git a/ruby/red-arrow/test/test-decimal128.rb b/ruby/red-arrow/test/test-decimal128.rb
index 9e7f8792cf..7da2c55f2b 100644
--- a/ruby/red-arrow/test/test-decimal128.rb
+++ b/ruby/red-arrow/test/test-decimal128.rb
@@ -28,9 +28,21 @@ class Decimal128Test < Test::Unit::TestCase
end
end
+ test("String") do
+ assert do
+ @decimal128 == "10.1"
+ end
+ end
+
+ test("Float") do
+ assert do
+ @decimal128 == 10.1
+ end
+ end
+
test("not Arrow::Decimal128") do
assert do
- not (@decimal128 == 10.1)
+ not (@decimal128 == :"10.1")
end
end
end
@@ -42,9 +54,21 @@ class Decimal128Test < Test::Unit::TestCase
end
end
+ test("String") do
+ assert do
+ not (@decimal128 != "10.1")
+ end
+ end
+
+ test("Float") do
+ assert do
+ not (@decimal128 != 10.1)
+ end
+ end
+
test("not Arrow::Decimal128") do
assert do
- @decimal128 != 10.1
+ @decimal128 != :"10.1"
end
end
end
diff --git a/ruby/red-arrow/test/test-decimal256-array-builder.rb b/ruby/red-arrow/test/test-decimal256-array-builder.rb
index f0769b6623..7ca14a67bc 100644
--- a/ruby/red-arrow/test/test-decimal256-array-builder.rb
+++ b/ruby/red-arrow/test/test-decimal256-array-builder.rb
@@ -55,6 +55,20 @@ class Decimal256ArrayBuilderTest < Test::Unit::TestCase
assert_equal(BigDecimal("10.1"),
array[0])
end
+
+ test("BigDecimal::NAN") do
+ message = "can't use NaN as an Arrow::Decimal256Array value"
+ assert_raise(FloatDomainError.new(message)) do
+ @builder.append_value(BigDecimal::NAN)
+ end
+ end
+
+ test("BigDecimal::INFINITY") do
+ message = "can't use Infinity as an Arrow::Decimal256Array value"
+ assert_raise(FloatDomainError.new(message)) do
+ @builder.append_value(BigDecimal::INFINITY)
+ end
+ end
end
sub_test_case("#append_values") do
diff --git a/ruby/red-arrow/test/test-decimal256-array.rb b/ruby/red-arrow/test/test-decimal256-array.rb
index d45015677e..ed542f2d6c 100644
--- a/ruby/red-arrow/test/test-decimal256-array.rb
+++ b/ruby/red-arrow/test/test-decimal256-array.rb
@@ -23,6 +23,8 @@ class Decimal256ArrayTest < Test::Unit::TestCase
nil,
"10.1",
BigDecimal("10.1"),
+ BigDecimal("1.11"),
+ BigDecimal("1"),
]
array = Arrow::Decimal256Array.new({precision: 3, scale: 1}, values)
assert_equal([
@@ -30,6 +32,8 @@ class Decimal256ArrayTest < Test::Unit::TestCase
nil,
BigDecimal("10.1"),
BigDecimal("10.1"),
+ BigDecimal("1.1"),
+ BigDecimal("1"),
],
array.to_a)
end
diff --git a/ruby/red-arrow/test/test-decimal256.rb b/ruby/red-arrow/test/test-decimal256.rb
index 422167f992..8170ae3c51 100644
--- a/ruby/red-arrow/test/test-decimal256.rb
+++ b/ruby/red-arrow/test/test-decimal256.rb
@@ -28,9 +28,21 @@ class Decimal256Test < Test::Unit::TestCase
end
end
+ test("String") do
+ assert do
+ @decimal256 == "10.1"
+ end
+ end
+
+ test("Float") do
+ assert do
+ @decimal256 == 10.1
+ end
+ end
+
test("not Arrow::Decimal256") do
assert do
- not (@decimal256 == 10.1)
+ not (@decimal256 == :"10.1")
end
end
end
@@ -42,9 +54,21 @@ class Decimal256Test < Test::Unit::TestCase
end
end
+ test("String") do
+ assert do
+ not (@decimal256 != "10.1")
+ end
+ end
+
+ test("Float") do
+ assert do
+ not (@decimal256 != 10.1)
+ end
+ end
+
test("not Arrow::Decimal256") do
assert do
- @decimal256 != 10.1
+ @decimal256 != :"10.1"
end
end
end