You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ap...@apache.org on 2018/05/22 12:24:41 UTC
[arrow] branch master updated: ARROW-2585: [C++] Add
Decimal::FromBigEndian, which was formerly a static method in
parquet-cpp/src/parquet/arrow/reader.cc
This is an automated email from the ASF dual-hosted git repository.
apitrou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new fcc13f5 ARROW-2585: [C++] Add Decimal::FromBigEndian, which was formerly a static method in parquet-cpp/src/parquet/arrow/reader.cc
fcc13f5 is described below
commit fcc13f5088323989d925270e7850b817bde19d48
Author: Joshua Storck <jo...@twosigma.com>
AuthorDate: Tue May 22 14:24:35 2018 +0200
ARROW-2585: [C++] Add Decimal::FromBigEndian, which was formerly a static method in parquet-cpp/src/parquet/arrow/reader.cc
Author: Joshua Storck <jo...@twosigma.com>
Closes #2036 from joshuastorck/decimal_from_big_endian and squashes the following commits:
e970c87 <Joshua Storck> Fixing lint errors
4cb4d89 <Joshua Storck> Adding Decimal::FromBigEndian, which was formerly a static method in parquet-cpp/src/parquet/arrow/reader.cc
---
cpp/src/arrow/util/decimal-test.cc | 56 ++++++++++++++++++++++++++++++++++
cpp/src/arrow/util/decimal.cc | 62 ++++++++++++++++++++++++++++++++++++++
cpp/src/arrow/util/decimal.h | 5 +++
3 files changed, 123 insertions(+)
diff --git a/cpp/src/arrow/util/decimal-test.cc b/cpp/src/arrow/util/decimal-test.cc
index 2829e4a..0877617 100644
--- a/cpp/src/arrow/util/decimal-test.cc
+++ b/cpp/src/arrow/util/decimal-test.cc
@@ -380,4 +380,60 @@ TEST(Decimal128Test, TestNoDecimalPointExponential) {
ASSERT_EQ(0, scale);
}
+TEST(Decimal128Test, TestFromBigEndian) {
+ // We test out a variety of scenarios:
+ //
+ // * Positive values that are left shifted
+ // and filled in with the same bit pattern
+ // * Negated of the positive values
+ // * Complement of the positive values
+ //
+ // For the positive values, we can call FromBigEndian
+ // with a length that is less than 16, whereas we must
+ // pass all 16 bytes for the negative and complement.
+ //
+ // We use a number of bit patterns to increase the coverage
+ // of scenarios
+ for (int32_t start : {1, 15, /* 00001111 */
+ 85, /* 01010101 */
+ 127 /* 01111111 */}) {
+ Decimal128 value(start);
+ for (int ii = 0; ii < 16; ++ii) {
+ auto little_endian = value.ToBytes();
+ std::reverse(little_endian.begin(), little_endian.end());
+ Decimal128 out;
+ // Limit the number of bytes we are passing to make
+ // sure that it works correctly. That's why all of the
+ // 'start' values don't have a 1 in the most significant
+ // bit place
+ ASSERT_OK(Decimal128::FromBigEndian(little_endian.data() + 15 - ii, ii + 1, &out));
+ ASSERT_EQ(value, out);
+
+ // Negate it and convert to big endian
+ auto negated = -value;
+ little_endian = negated.ToBytes();
+ std::reverse(little_endian.begin(), little_endian.end());
+ // Convert all of the bytes since we have to include the sign bit
+ ASSERT_OK(Decimal128::FromBigEndian(little_endian.data(), 16, &out));
+ ASSERT_EQ(negated, out);
+
+ // Take the complement and convert to big endian
+ auto complement = ~value;
+ little_endian = complement.ToBytes();
+ std::reverse(little_endian.begin(), little_endian.end());
+ ASSERT_OK(Decimal128::FromBigEndian(little_endian.data(), 16, &out));
+ ASSERT_EQ(complement, out);
+
+ value <<= 8;
+ value += Decimal128(start);
+ }
+ }
+}
+
+TEST(Decimal128Test, TestFromBigEndianBadLength) {
+ Decimal128 out;
+ ASSERT_RAISES(Invalid, Decimal128::FromBigEndian(0, -1, &out));
+ ASSERT_RAISES(Invalid, Decimal128::FromBigEndian(0, 17, &out));
+}
+
} // namespace arrow
diff --git a/cpp/src/arrow/util/decimal.cc b/cpp/src/arrow/util/decimal.cc
index 668da6f..5d83653 100644
--- a/cpp/src/arrow/util/decimal.cc
+++ b/cpp/src/arrow/util/decimal.cc
@@ -17,6 +17,7 @@
#include <algorithm>
#include <cctype>
+#include <climits>
#include <cmath>
#include <cstring>
#include <iomanip>
@@ -873,4 +874,65 @@ Status Decimal128::Rescale(int32_t original_scale, int32_t new_scale,
return Status::OK();
}
+// Helper function used by Decimal128::FromBigEndian
+static inline uint64_t FromBigEndian(const uint8_t* bytes, int32_t length) {
+ // We don't bounds check the length here because this is called by
+ // FromBigEndian that has a Decimal128 as its out parameters and
+ // that function is already checking the length of the bytes and only
+ // passes lengths between zero and eight.
+ uint64_t result = 0;
+ // Using memcpy instead of special casing for length
+ // and doing the conversion in 16, 32 parts, which could
+ // possibly create unaligned memory access on certain platforms
+ memcpy(reinterpret_cast<uint8_t*>(&result) + 8 - length, bytes, length);
+ return ::arrow::BitUtil::FromBigEndian(result);
+}
+
+Status Decimal128::FromBigEndian(const uint8_t* bytes, int32_t length, Decimal128* out) {
+ static constexpr int32_t kMinDecimalBytes = 1;
+ static constexpr int32_t kMaxDecimalBytes = 16;
+
+ int64_t high;
+ uint64_t low;
+
+ if (length < kMinDecimalBytes || length > kMaxDecimalBytes) {
+ std::ostringstream stream;
+ stream << "Length of byte array passed to Decimal128::FromBigEndian ";
+ stream << "was " << length << ", but must be between ";
+ stream << kMinDecimalBytes << " and " << kMaxDecimalBytes;
+ return Status::Invalid(stream.str());
+ }
+
+ /// Bytes are coming in big-endian, so the first byte is the MSB and therefore holds the
+ /// sign bit.
+ const bool is_negative = static_cast<int8_t>(bytes[0]) < 0;
+
+ /// Sign extend the low bits if necessary
+ low = UINT64_MAX * (is_negative && length < 8);
+ high = -1 * (is_negative && length < kMaxDecimalBytes);
+
+ /// Stop byte of the high bytes
+ const int32_t high_bits_offset = std::max(0, length - 8);
+
+ /// Shift left enough bits to make room for the incoming int64_t
+ high <<= high_bits_offset * CHAR_BIT;
+
+ /// Preserve the upper bits by inplace OR-ing the int64_t
+ uint64_t value = arrow::FromBigEndian(bytes, high_bits_offset);
+ high |= value;
+
+ /// Stop byte of the low bytes
+ const int32_t low_bits_offset = std::min(length, 8);
+
+ /// Shift left enough bits to make room for the incoming uint64_t
+ low <<= low_bits_offset * CHAR_BIT;
+
+ /// Preserve the upper bits by inplace OR-ing the uint64_t
+ value = arrow::FromBigEndian(bytes + high_bits_offset, length - high_bits_offset);
+ low |= value;
+
+ *out = Decimal128(high, low);
+ return Status::OK();
+}
+
} // namespace arrow
diff --git a/cpp/src/arrow/util/decimal.h b/cpp/src/arrow/util/decimal.h
index 79a99ba..b3180cb 100644
--- a/cpp/src/arrow/util/decimal.h
+++ b/cpp/src/arrow/util/decimal.h
@@ -126,6 +126,11 @@ class ARROW_EXPORT Decimal128 {
static Status FromString(const std::string& s, Decimal128* out,
int32_t* precision = NULLPTR, int32_t* scale = NULLPTR);
+ /// \brief Convert from a big endian byte representation. The length must be
+ /// between 1 and 16
+ /// \return error status if the length is an invalid value
+ static Status FromBigEndian(const uint8_t* data, int32_t length, Decimal128* out);
+
/// \brief Convert Decimal128 from one scale to another
Status Rescale(int32_t original_scale, int32_t new_scale, Decimal128* out) const;
--
To stop receiving notification emails like this one, please contact
apitrou@apache.org.