You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ko...@apache.org on 2023/01/06 05:16:40 UTC
[arrow] branch master updated: ARROW-18086: [Ruby] Add support for HalfFloat (#15204)
This is an automated email from the ASF dual-hosted git repository.
kou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new d2481a610f ARROW-18086: [Ruby] Add support for HalfFloat (#15204)
d2481a610f is described below
commit d2481a610f7653e1b965366461dd6be0c22c1fda
Author: Sutou Kouhei <ko...@clear-code.com>
AuthorDate: Fri Jan 6 14:16:28 2023 +0900
ARROW-18086: [Ruby] Add support for HalfFloat (#15204)
Authored-by: Sutou Kouhei <ko...@clear-code.com>
Signed-off-by: Sutou Kouhei <ko...@clear-code.com>
---
ruby/red-arrow/ext/arrow/converters.hpp | 47 +++++---
ruby/red-arrow/ext/arrow/raw-records.cpp | 3 +-
ruby/red-arrow/ext/arrow/values.cpp | 3 +-
.../lib/arrow/half-float-array-builder.rb | 32 +++++
ruby/red-arrow/lib/arrow/half-float-array.rb | 24 ++++
ruby/red-arrow/lib/arrow/half-float.rb | 118 +++++++++++++++++++
ruby/red-arrow/lib/arrow/loader.rb | 4 +
.../test/raw-records/test-basic-arrays.rb | 10 ++
ruby/red-arrow/test/test-half-float-array.rb | 43 +++++++
ruby/red-arrow/test/test-half-float.rb | 130 +++++++++++++++++++++
ruby/red-arrow/test/values/test-basic-arrays.rb | 10 ++
11 files changed, 406 insertions(+), 18 deletions(-)
diff --git a/ruby/red-arrow/ext/arrow/converters.hpp b/ruby/red-arrow/ext/arrow/converters.hpp
index 5a500574de..28955432a7 100644
--- a/ruby/red-arrow/ext/arrow/converters.hpp
+++ b/ruby/red-arrow/ext/arrow/converters.hpp
@@ -106,10 +106,34 @@ namespace red_arrow {
return ULL2NUM(array.Value(i));
}
- // TODO
- // inline VALUE convert(const arrow::HalfFloatArray& array,
- // const int64_t i) {
- // }
+ inline VALUE convert(const arrow::HalfFloatArray& array,
+ const int64_t i) {
+ const auto value = array.Value(i);
+ // | sign (1 bit) | exponent (5 bit) | fraction (10 bit) |
+ constexpr auto exponent_n_bits = 5;
+ static const auto exponent_mask =
+ static_cast<uint32_t>(std::pow(2.0, exponent_n_bits) - 1);
+ constexpr auto exponent_bias = 15;
+ constexpr auto fraction_n_bits = 10;
+ static const auto fraction_mask =
+ static_cast<uint32_t>(std::pow(2.0, fraction_n_bits)) - 1;
+ static const auto fraction_denominator = std::pow(2.0, fraction_n_bits);
+ const auto sign = value >> (exponent_n_bits + fraction_n_bits);
+ const auto exponent = (value >> fraction_n_bits) & exponent_mask;
+ const auto fraction = value & fraction_mask;
+ if (exponent == exponent_mask) {
+ if (sign == 0) {
+ return DBL2NUM(HUGE_VAL);
+ } else {
+ return DBL2NUM(-HUGE_VAL);
+ }
+ } else {
+ const auto implicit_fraction = (exponent == 0) ? 0 : 1;
+ return DBL2NUM(((sign == 0) ? 1 : -1) *
+ std::pow(2.0, exponent - exponent_bias) *
+ (implicit_fraction + fraction / fraction_denominator));
+ }
+ }
inline VALUE convert(const arrow::FloatArray& array,
const int64_t i) {
@@ -320,8 +344,7 @@ namespace red_arrow {
VISIT(UInt16)
VISIT(UInt32)
VISIT(UInt64)
- // TODO
- // VISIT(HalfFloat)
+ VISIT(HalfFloat)
VISIT(Float)
VISIT(Double)
VISIT(Binary)
@@ -427,8 +450,7 @@ namespace red_arrow {
VISIT(UInt16)
VISIT(UInt32)
VISIT(UInt64)
- // TODO
- // VISIT(HalfFloat)
+ VISIT(HalfFloat)
VISIT(Float)
VISIT(Double)
VISIT(Binary)
@@ -530,8 +552,7 @@ namespace red_arrow {
VISIT(UInt16)
VISIT(UInt32)
VISIT(UInt64)
- // TODO
- // VISIT(HalfFloat)
+ VISIT(HalfFloat)
VISIT(Float)
VISIT(Double)
VISIT(Binary)
@@ -634,8 +655,7 @@ namespace red_arrow {
VISIT(UInt16)
VISIT(UInt32)
VISIT(UInt64)
- // TODO
- // VISIT(HalfFloat)
+ VISIT(HalfFloat)
VISIT(Float)
VISIT(Double)
VISIT(Binary)
@@ -761,8 +781,7 @@ namespace red_arrow {
VISIT(UInt16)
VISIT(UInt32)
VISIT(UInt64)
- // TODO
- // VISIT(HalfFloat)
+ VISIT(HalfFloat)
VISIT(Float)
VISIT(Double)
VISIT(Binary)
diff --git a/ruby/red-arrow/ext/arrow/raw-records.cpp b/ruby/red-arrow/ext/arrow/raw-records.cpp
index e34ea2d3c8..e0326f9d2f 100644
--- a/ruby/red-arrow/ext/arrow/raw-records.cpp
+++ b/ruby/red-arrow/ext/arrow/raw-records.cpp
@@ -84,8 +84,7 @@ namespace red_arrow {
VISIT(UInt16)
VISIT(UInt32)
VISIT(UInt64)
- // TODO
- // VISIT(HalfFloat)
+ VISIT(HalfFloat)
VISIT(Float)
VISIT(Double)
VISIT(Binary)
diff --git a/ruby/red-arrow/ext/arrow/values.cpp b/ruby/red-arrow/ext/arrow/values.cpp
index 0fcb46e1bb..e412ce2273 100644
--- a/ruby/red-arrow/ext/arrow/values.cpp
+++ b/ruby/red-arrow/ext/arrow/values.cpp
@@ -65,8 +65,7 @@ namespace red_arrow {
VISIT(UInt16)
VISIT(UInt32)
VISIT(UInt64)
- // TODO
- // VISIT(HalfFloat)
+ VISIT(HalfFloat)
VISIT(Float)
VISIT(Double)
VISIT(Binary)
diff --git a/ruby/red-arrow/lib/arrow/half-float-array-builder.rb b/ruby/red-arrow/lib/arrow/half-float-array-builder.rb
new file mode 100644
index 0000000000..2b171e57a9
--- /dev/null
+++ b/ruby/red-arrow/lib/arrow/half-float-array-builder.rb
@@ -0,0 +1,32 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+ class HalfFloatArrayBuilder
+ private
+ def convert_to_arrow_value(value)
+ case value
+ when Float
+ HalfFloat.new(value).to_uint16
+ when HalfFloat
+ value.to_uint16
+ else
+ value
+ end
+ end
+ end
+end
diff --git a/ruby/red-arrow/lib/arrow/half-float-array.rb b/ruby/red-arrow/lib/arrow/half-float-array.rb
new file mode 100644
index 0000000000..94b8ebd51a
--- /dev/null
+++ b/ruby/red-arrow/lib/arrow/half-float-array.rb
@@ -0,0 +1,24 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+ class HalfFloatArray
+ def get_value(i)
+ HalfFloat.new(get_raw_value(i)).to_f
+ end
+ end
+end
diff --git a/ruby/red-arrow/lib/arrow/half-float.rb b/ruby/red-arrow/lib/arrow/half-float.rb
new file mode 100644
index 0000000000..e6fe976a29
--- /dev/null
+++ b/ruby/red-arrow/lib/arrow/half-float.rb
@@ -0,0 +1,118 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+ class HalfFloat
+ MAX = 65504
+ MIN = -65504
+ EXPONENT_N_BITS = 5
+ EXPONENT_MASK = (2 ** EXPONENT_N_BITS) - 1
+ EXPONENT_BIAS = 15
+ FRACTION_N_BITS = 10
+ FRACTION_MASK = (2 ** FRACTION_N_BITS) - 1
+ FRACTION_DENOMINATOR = 2.0 ** FRACTION_N_BITS
+
+ attr_reader :sign
+ attr_reader :exponent
+ attr_reader :fraction
+ def initialize(*args)
+ n_args = args.size
+ case n_args
+ when 1
+ if args[0].is_a?(Float)
+ @sign, @exponent, @fraction = deconstruct_float(args[0])
+ else
+ @sign, @exponent, @fraction = deconstruct_uint16(args[0])
+ end
+ when 3
+ @sign, @exponent, @fraction = *args
+ else
+ message = "wrong number of arguments (given #{n_args}, expected 1 or 3)"
+ raise ArgumentError, message
+ end
+ end
+
+ def to_f
+ if @exponent == EXPONENT_MASK
+ if @sign.zero?
+ Float::INFINITY
+ else
+ -Float::INFINITY
+ end
+ else
+ if @exponent.zero?
+ implicit_fraction = 0
+ else
+ implicit_fraction = 1
+ end
+ ((-1) ** @sign) *
+ (2 ** (@exponent - EXPONENT_BIAS)) *
+ (implicit_fraction + @fraction / FRACTION_DENOMINATOR)
+ end
+ end
+
+ def to_uint16
+ (@sign << (EXPONENT_N_BITS + FRACTION_N_BITS)) ^
+ (@exponent << FRACTION_N_BITS) ^
+ @fraction
+ end
+
+ def pack
+ [to_uint16].pack("S")
+ end
+
+ private
+ def deconstruct_float(float)
+ if float > MAX
+ float = Float::INFINITY
+ elsif float < MIN
+ float = -Float::INFINITY
+ end
+ is_infinite = float.infinite?
+ if is_infinite
+ sign = (is_infinite == 1) ? 0 : 1
+ exponent = EXPONENT_MASK
+ fraction = 0
+ elsif float.zero?
+ sign = 0
+ exponent = 0
+ fraction = 0
+ else
+ sign = (float.positive? ? 0 : 1)
+ float_abs = float.abs
+ 1.upto(EXPONENT_MASK) do |e|
+ next_exponent_value = 2 ** (e + 1 - EXPONENT_BIAS)
+ next if float_abs > next_exponent_value
+ exponent = e
+ exponent_value = 2 ** (e - EXPONENT_BIAS)
+ fraction =
+ ((float_abs / exponent_value - 1) * FRACTION_DENOMINATOR).round
+ break
+ end
+ end
+ [sign, exponent, fraction]
+ end
+
+ def deconstruct_uint16(uint16)
+ # | sign (1 bit) | exponent (5 bit) | fraction (10 bit) |
+ sign = (uint16 >> (EXPONENT_N_BITS + FRACTION_N_BITS))
+ exponent = ((uint16 >> FRACTION_N_BITS) & EXPONENT_MASK)
+ fraction = (uint16 & FRACTION_MASK)
+ [sign, exponent, fraction]
+ end
+ end
+end
diff --git a/ruby/red-arrow/lib/arrow/loader.rb b/ruby/red-arrow/lib/arrow/loader.rb
index 58b11e567f..9c8300628a 100644
--- a/ruby/red-arrow/lib/arrow/loader.rb
+++ b/ruby/red-arrow/lib/arrow/loader.rb
@@ -81,6 +81,9 @@ module Arrow
require "arrow/fixed-size-binary-array-builder"
require "arrow/function"
require "arrow/group"
+ require "arrow/half-float"
+ require "arrow/half-float-array"
+ require "arrow/half-float-array-builder"
require "arrow/list-array-builder"
require "arrow/list-data-type"
require "arrow/map-array"
@@ -196,6 +199,7 @@ module Arrow
"Arrow::Date64Array",
"Arrow::Decimal128Array",
"Arrow::Decimal256Array",
+ "Arrow::HalfFloatArray",
"Arrow::Time32Array",
"Arrow::Time64Array",
"Arrow::TimestampArray"
diff --git a/ruby/red-arrow/test/raw-records/test-basic-arrays.rb b/ruby/red-arrow/test/raw-records/test-basic-arrays.rb
index 0180cb92b4..15cdee6820 100644
--- a/ruby/red-arrow/test/raw-records/test-basic-arrays.rb
+++ b/ruby/red-arrow/test/raw-records/test-basic-arrays.rb
@@ -117,6 +117,16 @@ module RawRecordsBasicArraysTests
assert_equal(records, target.raw_records)
end
+ def test_half_float
+ records = [
+ [-1.5],
+ [nil],
+ [1.5],
+ ]
+ target = build({column: :half_float}, records)
+ assert_equal(records, target.raw_records)
+ end
+
def test_float
records = [
[-1.0],
diff --git a/ruby/red-arrow/test/test-half-float-array.rb b/ruby/red-arrow/test/test-half-float-array.rb
new file mode 100644
index 0000000000..a13dcea2f9
--- /dev/null
+++ b/ruby/red-arrow/test/test-half-float-array.rb
@@ -0,0 +1,43 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class HalfFloatArrayTest < Test::Unit::TestCase
+ sub_test_case(".new") do
+ test("Float") do
+ array = Arrow::HalfFloatArray.new([1.5])
+ assert_equal([1.5], array.to_a)
+ end
+
+ test("Integer") do
+ one_half = Arrow::HalfFloat.new(1.5)
+ array = Arrow::HalfFloatArray.new([one_half.to_uint16])
+ assert_equal([one_half.to_f], array.to_a)
+ end
+
+ test("HalfFloat") do
+ one_half = Arrow::HalfFloat.new(1.5)
+ array = Arrow::HalfFloatArray.new([one_half])
+ assert_equal([one_half.to_f], array.to_a)
+ end
+ end
+
+ test("#[]") do
+ one_half = Arrow::HalfFloat.new(1.5)
+ array = Arrow::HalfFloatArray.new([one_half.to_uint16])
+ assert_equal(one_half.to_f, array[0])
+ end
+end
diff --git a/ruby/red-arrow/test/test-half-float.rb b/ruby/red-arrow/test/test-half-float.rb
new file mode 100644
index 0000000000..1b551a0333
--- /dev/null
+++ b/ruby/red-arrow/test/test-half-float.rb
@@ -0,0 +1,130 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class HalfFloatTest < Test::Unit::TestCase
+ sub_test_case(".new") do
+ test("Array") do
+ positive_infinity = Arrow::HalfFloat.new(0b1, 0b11111, 0b0000000000)
+ assert_equal([0b1, 0b11111, 0b0000000000],
+ [
+ positive_infinity.sign,
+ positive_infinity.exponent,
+ positive_infinity.fraction,
+ ])
+ end
+
+ test("Integer - 0") do
+ zero = Arrow::HalfFloat.new(0)
+ assert_equal([0b0, 0b00000, 0b0000000000],
+ [
+ zero.sign,
+ zero.exponent,
+ zero.fraction,
+ ])
+ end
+
+ test("Integer - +infinity") do
+ positive_infinity = Arrow::HalfFloat.new(0x7c00)
+ assert_equal([0b0, 0b11111, 0b0000000000],
+ [
+ positive_infinity.sign,
+ positive_infinity.exponent,
+ positive_infinity.fraction,
+ ])
+ end
+
+ test("Integer - -infinity") do
+ negative_infinity = Arrow::HalfFloat.new(0xfc00)
+ assert_equal([0b1, 0b11111, 0b0000000000],
+ [
+ negative_infinity.sign,
+ negative_infinity.exponent,
+ negative_infinity.fraction,
+ ])
+ end
+
+ test("Integer - 1/3") do
+ one_thirds = Arrow::HalfFloat.new(0x3555)
+ assert_equal([0b0, 0b01101, 0b0101010101],
+ [
+ one_thirds.sign,
+ one_thirds.exponent,
+ one_thirds.fraction,
+ ])
+ end
+
+ test("Float - 0") do
+ zero = Arrow::HalfFloat.new(0.0)
+ assert_equal([0b0, 0b00000, 0b0000000000],
+ [
+ zero.sign,
+ zero.exponent,
+ zero.fraction,
+ ])
+ end
+
+ test("Float - too large") do
+ positive_infinity = Arrow::HalfFloat.new(65504.1)
+ assert_equal([0b0, 0b11111, 0b0000000000],
+ [
+ positive_infinity.sign,
+ positive_infinity.exponent,
+ positive_infinity.fraction,
+ ])
+ end
+
+ test("Float - +infinity") do
+ positive_infinity = Arrow::HalfFloat.new(Float::INFINITY)
+ assert_equal([0b0, 0b11111, 0b0000000000],
+ [
+ positive_infinity.sign,
+ positive_infinity.exponent,
+ positive_infinity.fraction,
+ ])
+ end
+
+ test("Float - too small") do
+ negative_infinity = Arrow::HalfFloat.new(-65504.1)
+ assert_equal([0b1, 0b11111, 0b0000000000],
+ [
+ negative_infinity.sign,
+ negative_infinity.exponent,
+ negative_infinity.fraction,
+ ])
+ end
+
+ test("Float - -infinity") do
+ negative_infinity = Arrow::HalfFloat.new(-Float::INFINITY)
+ assert_equal([0b1, 0b11111, 0b0000000000],
+ [
+ negative_infinity.sign,
+ negative_infinity.exponent,
+ negative_infinity.fraction,
+ ])
+ end
+
+ test("Float - 1/3") do
+ one_thirds = Arrow::HalfFloat.new((2 ** -2) * (1 + 341 / 1024.0))
+ assert_equal([0b0, 0b01101, 0b0101010101],
+ [
+ one_thirds.sign,
+ one_thirds.exponent,
+ one_thirds.fraction,
+ ])
+ end
+ end
+end
diff --git a/ruby/red-arrow/test/values/test-basic-arrays.rb b/ruby/red-arrow/test/values/test-basic-arrays.rb
index 237385fa7b..ae469d1bf0 100644
--- a/ruby/red-arrow/test/values/test-basic-arrays.rb
+++ b/ruby/red-arrow/test/values/test-basic-arrays.rb
@@ -107,6 +107,16 @@ module ValuesBasicArraysTests
assert_equal(values, target.values)
end
+ def test_half_float
+ values = [
+ -1.5,
+ nil,
+ 1.5,
+ ]
+ target = build(Arrow::HalfFloatArray.new(values))
+ assert_equal(values, target.values)
+ end
+
def test_float
values = [
-1.0,