You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by tu...@apache.org on 2022/10/03 12:52:57 UTC
[arrow-rs] branch master updated: Add i256 (#2637) (#2781)
This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 4df1f3c62 Add i256 (#2637) (#2781)
4df1f3c62 is described below
commit 4df1f3c62f6d089d8cf02892168b9179e4c8c6fd
Author: Raphael Taylor-Davies <17...@users.noreply.github.com>
AuthorDate: Mon Oct 3 13:52:52 2022 +0100
Add i256 (#2637) (#2781)
* Add i256 (#2637)
* RAT
* Fix doc comments
* Store as parts
* Custom multiply implementation
* Make from_parts public
* Document mulx
* Remove branch from to_i128
* Clippy
---
arrow-buffer/Cargo.toml | 1 +
arrow-buffer/src/bigint.rs | 369 +++++++++++++++++++++++++++++++++++++++++++++
arrow-buffer/src/lib.rs | 2 +
arrow-buffer/src/native.rs | 47 ++++++
4 files changed, 419 insertions(+)
diff --git a/arrow-buffer/Cargo.toml b/arrow-buffer/Cargo.toml
index 8ca95b956..af0bd9861 100644
--- a/arrow-buffer/Cargo.toml
+++ b/arrow-buffer/Cargo.toml
@@ -38,6 +38,7 @@ path = "src/lib.rs"
bench = false
[dependencies]
+num = { version = "0.4", default-features = false, features = ["std"] }
half = { version = "2.0", default-features = false }
[dev-dependencies]
diff --git a/arrow-buffer/src/bigint.rs b/arrow-buffer/src/bigint.rs
new file mode 100644
index 000000000..a08d280ca
--- /dev/null
+++ b/arrow-buffer/src/bigint.rs
@@ -0,0 +1,369 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use num::BigInt;
+use std::cmp::Ordering;
+
+/// A signed 256-bit integer
+#[allow(non_camel_case_types)]
+#[derive(Copy, Clone, Default, Eq, PartialEq, Hash)]
+pub struct i256 {
+ low: u128,
+ high: i128,
+}
+
+impl std::fmt::Debug for i256 {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ write!(f, "{}", self)
+ }
+}
+
+impl std::fmt::Display for i256 {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ write!(f, "{}", BigInt::from_signed_bytes_le(&self.to_le_bytes()))
+ }
+}
+
+impl PartialOrd for i256 {
+ fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+ Some(self.cmp(other))
+ }
+}
+
+impl Ord for i256 {
+ fn cmp(&self, other: &Self) -> Ordering {
+ // This is 25x faster than using a variable length encoding such
+ // as BigInt as it avoids allocation and branching
+ self.high.cmp(&other.high).then(self.low.cmp(&other.low))
+ }
+}
+
+impl i256 {
+ /// The additive identity for this integer type, i.e. `0`.
+ pub const ZERO: Self = i256 { low: 0, high: 0 };
+
+ /// The multiplicative identity for this integer type, i.e. `1`.
+ pub const ONE: Self = i256 { low: 1, high: 0 };
+
+ /// The multiplicative inverse for this integer type, i.e. `-1`.
+ pub const MINUS_ONE: Self = i256 {
+ low: u128::MAX,
+ high: -1,
+ };
+
+ /// Create an integer value from its representation as a byte array in little-endian.
+ #[inline]
+ pub fn from_le_bytes(b: [u8; 32]) -> Self {
+ Self {
+ high: i128::from_le_bytes(b[16..32].try_into().unwrap()),
+ low: u128::from_le_bytes(b[0..16].try_into().unwrap()),
+ }
+ }
+
+ /// Create an i256 from the provided low u128 and high i128
+ #[inline]
+ pub fn from_parts(low: u128, high: i128) -> Self {
+ Self { low, high }
+ }
+
+ /// Returns this `i256` as a low u128 and high i128
+ pub fn to_parts(self) -> (u128, i128) {
+ (self.low, self.high)
+ }
+
+ /// Converts this `i256` into an `i128` returning `None` if this would result
+ /// in truncation/overflow
+ pub fn to_i128(self) -> Option<i128> {
+ let as_i128 = self.low as i128;
+
+ let high_negative = self.high < 0;
+ let low_negative = as_i128 < 0;
+ let high_valid = self.high == -1 || self.high == 0;
+
+ (high_negative == low_negative && high_valid).then_some(self.low as i128)
+ }
+
+ /// Return the memory representation of this integer as a byte array in little-endian byte order.
+ #[inline]
+ pub fn to_le_bytes(self) -> [u8; 32] {
+ let mut t = [0; 32];
+ let t_low: &mut [u8; 16] = (&mut t[0..16]).try_into().unwrap();
+ *t_low = self.low.to_le_bytes();
+ let t_high: &mut [u8; 16] = (&mut t[16..32]).try_into().unwrap();
+ *t_high = self.high.to_le_bytes();
+ t
+ }
+
+ /// Create an i256 from the provided [`BigInt`] returning a bool indicating
+ /// if overflow occurred
+ fn from_bigint_with_overflow(v: BigInt) -> (Self, bool) {
+ let v_bytes = v.to_signed_bytes_le();
+ match v_bytes.len().cmp(&32) {
+ Ordering::Less => {
+ let mut bytes = if num::Signed::is_negative(&v) {
+ [255_u8; 32]
+ } else {
+ [0; 32]
+ };
+ bytes[0..v_bytes.len()].copy_from_slice(&v_bytes[..v_bytes.len()]);
+ (Self::from_le_bytes(bytes), false)
+ }
+ Ordering::Equal => (Self::from_le_bytes(v_bytes.try_into().unwrap()), false),
+ Ordering::Greater => {
+ (Self::from_le_bytes(v_bytes[..32].try_into().unwrap()), true)
+ }
+ }
+ }
+
+ /// Performs wrapping addition
+ #[inline]
+ pub fn wrapping_add(self, other: Self) -> Self {
+ let (low, carry) = self.low.overflowing_add(other.low);
+ let high = self.high.wrapping_add(other.high).wrapping_add(carry as _);
+ Self { low, high }
+ }
+
+ /// Performs checked addition
+ #[inline]
+ pub fn checked_add(self, other: Self) -> Option<Self> {
+ let (low, carry) = self.low.overflowing_add(other.low);
+ let high = self.high.checked_add(other.high)?.checked_add(carry as _)?;
+ Some(Self { low, high })
+ }
+
+ /// Performs wrapping subtraction
+ #[inline]
+ pub fn wrapping_sub(self, other: Self) -> Self {
+ let (low, carry) = self.low.overflowing_sub(other.low);
+ let high = self.high.wrapping_sub(other.high).wrapping_sub(carry as _);
+ Self { low, high }
+ }
+
+ /// Performs checked subtraction
+ #[inline]
+ pub fn checked_sub(self, other: Self) -> Option<Self> {
+ let (low, carry) = self.low.overflowing_sub(other.low);
+ let high = self.high.checked_sub(other.high)?.checked_sub(carry as _)?;
+ Some(Self { low, high })
+ }
+
+ /// Performs wrapping multiplication
+ #[inline]
+ pub fn wrapping_mul(self, other: Self) -> Self {
+ let (low, high) = mulx(self.low, other.low);
+
+ // Compute the high multiples, only impacting the high 128-bits
+ let hl = self.high.wrapping_mul(other.low as i128);
+ let lh = (self.low as i128).wrapping_mul(other.high);
+
+ Self {
+ low,
+ high: (high as i128).wrapping_add(hl).wrapping_add(lh),
+ }
+ }
+
+ /// Performs checked multiplication
+ #[inline]
+ pub fn checked_mul(self, other: Self) -> Option<Self> {
+ let (low, high) = mulx(self.low, other.low);
+
+ // Compute the high multiples, only impacting the high 128-bits
+ let hl = self.high.checked_mul(other.low as i128)?;
+ let lh = (self.low as i128).checked_mul(other.high)?;
+
+ Some(Self {
+ low,
+ high: (high as i128).checked_add(hl)?.checked_add(lh)?,
+ })
+ }
+
+ /// Performs wrapping division
+ #[inline]
+ pub fn wrapping_div(self, other: Self) -> Self {
+ let l = BigInt::from_signed_bytes_le(&self.to_le_bytes());
+ let r = BigInt::from_signed_bytes_le(&other.to_le_bytes());
+ Self::from_bigint_with_overflow(l / r).0
+ }
+
+ /// Performs checked division
+ #[inline]
+ pub fn checked_div(self, other: Self) -> Option<Self> {
+ let l = BigInt::from_signed_bytes_le(&self.to_le_bytes());
+ let r = BigInt::from_signed_bytes_le(&other.to_le_bytes());
+ let (val, overflow) = Self::from_bigint_with_overflow(l / r);
+ (!overflow).then_some(val)
+ }
+
+ /// Performs wrapping remainder
+ #[inline]
+ pub fn wrapping_rem(self, other: Self) -> Self {
+ let l = BigInt::from_signed_bytes_le(&self.to_le_bytes());
+ let r = BigInt::from_signed_bytes_le(&other.to_le_bytes());
+ Self::from_bigint_with_overflow(l % r).0
+ }
+
+ /// Performs checked remainder
+ #[inline]
+ pub fn checked_rem(self, other: Self) -> Option<Self> {
+ if other == Self::ZERO {
+ return None;
+ }
+
+ let l = BigInt::from_signed_bytes_le(&self.to_le_bytes());
+ let r = BigInt::from_signed_bytes_le(&other.to_le_bytes());
+ let (val, overflow) = Self::from_bigint_with_overflow(l % r);
+ (!overflow).then_some(val)
+ }
+}
+
+/// Performs an unsigned multiplication of `a * b` returning a tuple of
+/// `(low, high)` where `low` contains the lower 128-bits of the result
+/// and `high` the higher 128-bits
+///
+/// This mirrors the x86 mulx instruction but for 128-bit types
+#[inline]
+fn mulx(a: u128, b: u128) -> (u128, u128) {
+ let split = |a: u128| (a & (u64::MAX as u128), a >> 64);
+
+ const MASK: u128 = u64::MAX as _;
+
+ let (a_low, a_high) = split(a);
+ let (b_low, b_high) = split(b);
+
+ // Carry stores the upper 64-bits of low and lower 64-bits of high
+ let (mut low, mut carry) = split(a_low * b_low);
+ carry += a_high * b_low;
+
+ // Update low and high with corresponding parts of carry
+ low += carry << 64;
+ let mut high = carry >> 64;
+
+ // Update carry with overflow from low
+ carry = low >> 64;
+ low &= MASK;
+
+ // Perform multiply including overflow from low
+ carry += b_high * a_low;
+
+ // Update low and high with values from carry
+ low += carry << 64;
+ high += carry >> 64;
+
+ // Perform 4th multiplication
+ high += a_high * b_high;
+
+ (low, high)
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use num::{BigInt, FromPrimitive, ToPrimitive};
+ use rand::{thread_rng, Rng};
+
+ #[test]
+ fn test_signed_cmp() {
+ let a = i256::from_parts(i128::MAX as u128, 12);
+ let b = i256::from_parts(i128::MIN as u128, 12);
+ assert!(a < b);
+
+ let a = i256::from_parts(i128::MAX as u128, 12);
+ let b = i256::from_parts(i128::MIN as u128, -12);
+ assert!(a > b);
+ }
+
+ #[test]
+ fn test_to_i128() {
+ let vals = [
+ BigInt::from_i128(-1).unwrap(),
+ BigInt::from_i128(i128::MAX).unwrap(),
+ BigInt::from_i128(i128::MIN).unwrap(),
+ BigInt::from_u128(u128::MIN).unwrap(),
+ BigInt::from_u128(u128::MAX).unwrap(),
+ ];
+
+ for v in vals {
+ let (t, overflow) = i256::from_bigint_with_overflow(v.clone());
+ assert!(!overflow);
+ assert_eq!(t.to_i128(), v.to_i128(), "{} vs {}", v, t);
+ }
+ }
+
+ #[test]
+ fn test_i256() {
+ let mut rng = thread_rng();
+
+ for _ in 0..1000 {
+ let mut l = [0_u8; 32];
+ let len = rng.gen_range(0..32);
+ l.iter_mut().take(len).for_each(|x| *x = rng.gen());
+
+ let mut r = [0_u8; 32];
+ let len = rng.gen_range(0..32);
+ r.iter_mut().take(len).for_each(|x| *x = rng.gen());
+
+ let il = i256::from_le_bytes(l);
+ let ir = i256::from_le_bytes(r);
+
+ let bl = BigInt::from_signed_bytes_le(&l);
+ let br = BigInt::from_signed_bytes_le(&r);
+
+ // Comparison
+ assert_eq!(il.cmp(&ir), bl.cmp(&br), "{} cmp {}", bl, br);
+
+ // To i128
+ assert_eq!(il.to_i128(), bl.to_i128(), "{}", bl);
+ assert_eq!(ir.to_i128(), br.to_i128(), "{}", br);
+
+ // Addition
+ let actual = il.wrapping_add(ir);
+ let (expected, overflow) =
+ i256::from_bigint_with_overflow(bl.clone() + br.clone());
+ assert_eq!(actual, expected);
+
+ let checked = il.checked_add(ir);
+ match overflow {
+ true => assert!(checked.is_none()),
+ false => assert_eq!(checked.unwrap(), actual),
+ }
+
+ // Subtraction
+ let actual = il.wrapping_sub(ir);
+ let (expected, overflow) =
+ i256::from_bigint_with_overflow(bl.clone() - br.clone());
+ assert_eq!(actual.to_string(), expected.to_string());
+
+ let checked = il.checked_sub(ir);
+ match overflow {
+ true => assert!(checked.is_none()),
+ false => assert_eq!(checked.unwrap(), actual),
+ }
+
+ // Multiplication
+ let actual = il.wrapping_mul(ir);
+ let (expected, overflow) =
+ i256::from_bigint_with_overflow(bl.clone() * br.clone());
+ assert_eq!(actual.to_string(), expected.to_string());
+
+ let checked = il.checked_mul(ir);
+ match overflow {
+ true => assert!(checked.is_none()),
+ false => assert_eq!(checked.unwrap(), actual),
+ }
+ }
+ }
+}
diff --git a/arrow-buffer/src/lib.rs b/arrow-buffer/src/lib.rs
index 74d2bd5ec..13d44e4d5 100644
--- a/arrow-buffer/src/lib.rs
+++ b/arrow-buffer/src/lib.rs
@@ -21,8 +21,10 @@ pub mod alloc;
pub mod buffer;
pub use buffer::{Buffer, MutableBuffer};
+mod bigint;
mod bytes;
mod native;
+pub use bigint::i256;
pub use native::*;
mod util;
diff --git a/arrow-buffer/src/native.rs b/arrow-buffer/src/native.rs
index d371e7e11..6ac11a16f 100644
--- a/arrow-buffer/src/native.rs
+++ b/arrow-buffer/src/native.rs
@@ -15,6 +15,7 @@
// specific language governing permissions and limitations
// under the License.
+use crate::i256;
use half::f16;
mod private {
@@ -169,6 +170,29 @@ native_float!(f16, self, self.to_f32() as _);
native_float!(f32, self, self as _);
native_float!(f64, self, self as _);
+impl private::Sealed for i256 {}
+impl ArrowNativeType for i256 {
+ fn from_usize(u: usize) -> Option<Self> {
+ Some(Self::from_parts(u as u128, 0))
+ }
+
+ fn as_usize(self) -> usize {
+ self.to_parts().0 as usize
+ }
+
+ fn to_usize(self) -> Option<usize> {
+ let (low, high) = self.to_parts();
+ if high != 0 {
+ return None;
+ }
+ low.try_into().ok()
+ }
+
+ fn to_isize(self) -> Option<isize> {
+ self.to_i128()?.try_into().ok()
+ }
+}
+
/// Allows conversion from supported Arrow types to a byte slice.
pub trait ToByteSlice {
/// Converts this instance into a byte slice
@@ -192,3 +216,26 @@ impl<T: ArrowNativeType> ToByteSlice for T {
unsafe { std::slice::from_raw_parts(raw_ptr, std::mem::size_of::<T>()) }
}
}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn test_i256() {
+ let a = i256::from_parts(0, 0);
+ assert_eq!(a.as_usize(), 0);
+ assert_eq!(a.to_usize().unwrap(), 0);
+ assert_eq!(a.to_isize().unwrap(), 0);
+
+ let a = i256::from_parts(0, -1);
+ assert_eq!(a.as_usize(), 0);
+ assert!(a.to_usize().is_none());
+ assert!(a.to_usize().is_none());
+
+ let a = i256::from_parts(u128::MAX, -1);
+ assert_eq!(a.as_usize(), usize::MAX);
+ assert!(a.to_usize().is_none());
+ assert_eq!(a.to_isize().unwrap(), -1);
+ }
+}