You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@orc.apache.org by xn...@apache.org on 2019/12/16 17:52:54 UTC
[orc] branch master updated: ORC-573: [C++] Fix several issues in
Int128 and Decimal (#456)
This is an automated email from the ASF dual-hosted git repository.
xndai pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/orc.git
The following commit(s) were added to refs/heads/master by this push:
new fffd910 ORC-573: [C++] Fix several issues in Int128 and Decimal (#456)
fffd910 is described below
commit fffd910892da260af727548ab59d19ccaad5d8f1
Author: Gang Wu <ga...@alibaba-inc.com>
AuthorDate: Tue Dec 17 01:52:46 2019 +0800
ORC-573: [C++] Fix several issues in Int128 and Decimal (#456)
1. Support Decimal::toString() to trim trailing zeros.
2. Fix Int128::toString() to handle scale correctly.
3. Apply DecimalColumnWriter to trim trailing zeros before writing to bloom filter.
---
c++/include/orc/Common.hh | 24 ++++++++++++++++++++++++
c++/include/orc/Int128.hh | 7 ++++++-
c++/include/orc/Vector.hh | 2 +-
c++/src/ColumnWriter.cc | 4 ++--
c++/src/Int128.cc | 40 +++++++++++++++++++++++++---------------
c++/src/Statistics.hh | 6 +++---
c++/src/Vector.cc | 4 ++--
c++/test/TestInt128.cc | 26 ++++++++++++++++++++++++++
8 files changed, 89 insertions(+), 24 deletions(-)
diff --git a/c++/include/orc/Common.hh b/c++/include/orc/Common.hh
index ca926cf..448f288 100644
--- a/c++/include/orc/Common.hh
+++ b/c++/include/orc/Common.hh
@@ -274,6 +274,30 @@ namespace orc {
FUTURE = INT32_MAX
};
+ inline bool operator<(const Decimal& lhs, const Decimal& rhs) {
+ return compare(lhs, rhs);
+ }
+
+ inline bool operator>(const Decimal& lhs, const Decimal& rhs) {
+ return rhs < lhs;
+ }
+
+ inline bool operator<=(const Decimal& lhs, const Decimal& rhs) {
+ return !(lhs > rhs);
+ }
+
+ inline bool operator>=(const Decimal& lhs, const Decimal& rhs) {
+ return !(lhs < rhs);
+ }
+
+ inline bool operator!=(const Decimal& lhs, const Decimal& rhs) {
+ return lhs < rhs || rhs < lhs;
+ }
+
+ inline bool operator==(const Decimal& lhs, const Decimal& rhs) {
+ return !(lhs != rhs);
+ }
+
}
#endif
diff --git a/c++/include/orc/Int128.hh b/c++/include/orc/Int128.hh
index f86d8f0..1f68b2b 100644
--- a/c++/include/orc/Int128.hh
+++ b/c++/include/orc/Int128.hh
@@ -311,8 +311,13 @@ namespace orc {
/**
* Return the base 10 string representation with a decimal point,
* the given number of places after the decimal.
+ *
+ * @param scale scale of the Int128 to be interpreted as a decimal value
+ * @param trimTrailingZeros whether or not to trim trailing zeros
+ * @return converted string representation
*/
- std::string toDecimalString(int32_t scale=0) const;
+ std::string toDecimalString(int32_t scale = 0,
+ bool trimTrailingZeros = false) const;
/**
* Return the base 16 string representation of the two's complement with
diff --git a/c++/include/orc/Vector.hh b/c++/include/orc/Vector.hh
index 629c0b7..4cb2450 100644
--- a/c++/include/orc/Vector.hh
+++ b/c++/include/orc/Vector.hh
@@ -240,7 +240,7 @@ namespace orc {
explicit Decimal(const std::string& value);
Decimal();
- std::string toString() const;
+ std::string toString(bool trimTrailingZeros = false) const;
Int128 value;
int32_t scale;
};
diff --git a/c++/src/ColumnWriter.cc b/c++/src/ColumnWriter.cc
index 30d96ac..4fafb94 100644
--- a/c++/src/ColumnWriter.cc
+++ b/c++/src/ColumnWriter.cc
@@ -2026,7 +2026,7 @@ namespace orc {
++count;
if (enableBloomFilter) {
std::string decimal = Decimal(
- values[i], static_cast<int32_t>(scale)).toString();
+ values[i], static_cast<int32_t>(scale)).toString(true);
bloomFilter->addBytes(
decimal.c_str(), static_cast<int64_t>(decimal.size()));
}
@@ -2160,7 +2160,7 @@ namespace orc {
++count;
if (enableBloomFilter) {
std::string decimal = Decimal(
- values[i], static_cast<int32_t>(scale)).toString();
+ values[i], static_cast<int32_t>(scale)).toString(true);
bloomFilter->addBytes(
decimal.c_str(), static_cast<int64_t>(decimal.size()));
}
diff --git a/c++/src/Int128.cc b/c++/src/Int128.cc
index 433e6fa..4ff500f 100644
--- a/c++/src/Int128.cc
+++ b/c++/src/Int128.cc
@@ -391,41 +391,51 @@ namespace orc {
return buf.str();
}
- std::string Int128::toDecimalString(int32_t scale) const {
+ std::string Int128::toDecimalString(int32_t scale, bool trimTrailingZeros) const {
std::string str = toString();
+ std::string result;
if (scale == 0) {
return str;
} else if (*this < 0) {
int32_t len = static_cast<int32_t>(str.length());
if (len - 1 > scale) {
- return str.substr(0, static_cast<size_t>(len - scale)) + "." +
- str.substr(static_cast<size_t>(len - scale),
- static_cast<size_t>(scale));
+ result = str.substr(0, static_cast<size_t>(len - scale)) + "." +
+ str.substr(static_cast<size_t>(len - scale),
+ static_cast<size_t>(len));
} else if (len - 1 == scale) {
- return "-0." + str.substr(1, std::string::npos);
+ result = "-0." + str.substr(1, std::string::npos);
} else {
- std::string result = "-0.";
- for(int32_t i=0; i < scale - len + 1; ++i) {
+ result = "-0.";
+ for (int32_t i = 0; i < scale - len + 1; ++i) {
result += "0";
}
- return result + str.substr(1, std::string::npos);
+ result += str.substr(1, std::string::npos);
}
} else {
int32_t len = static_cast<int32_t>(str.length());
if (len > scale) {
- return str.substr(0, static_cast<size_t>(len - scale)) + "." +
- str.substr(static_cast<size_t>(len - scale),
- static_cast<size_t>(scale));
+ result = str.substr(0, static_cast<size_t>(len - scale)) + "." +
+ str.substr(static_cast<size_t>(len - scale),
+ static_cast<size_t>(len));
} else if (len == scale) {
- return "0." + str;
+ result = "0." + str;
} else {
- std::string result = "0.";
- for(int32_t i=0; i < scale - len; ++i) {
+ result = "0.";
+ for (int32_t i = 0; i < scale - len; ++i) {
result += "0";
}
- return result + str;
+ result += str;
}
}
+ if (trimTrailingZeros) {
+ size_t pos = result.find_last_not_of('0');
+ if (result[pos] == '.') {
+ result = result.substr(0, pos);
+ } else {
+ result = result.substr(0, pos + 1);
+ }
+ }
+ return result;
}
std::string Int128::toHexString() const {
diff --git a/c++/src/Statistics.hh b/c++/src/Statistics.hh
index 633450f..426317f 100644
--- a/c++/src/Statistics.hh
+++ b/c++/src/Statistics.hh
@@ -665,14 +665,14 @@ namespace orc {
proto::DecimalStatistics* decStats = pbStats.mutable_decimalstatistics();
if (_stats.hasMinimum()) {
- decStats->set_minimum(_stats.getMinimum().toString());
- decStats->set_maximum(_stats.getMaximum().toString());
+ decStats->set_minimum(_stats.getMinimum().toString(true));
+ decStats->set_maximum(_stats.getMaximum().toString(true));
} else {
decStats->clear_minimum();
decStats->clear_maximum();
}
if (_stats.hasSum()) {
- decStats->set_sum(_stats.getSum().toString());
+ decStats->set_sum(_stats.getSum().toString(true));
} else {
decStats->clear_sum();
}
diff --git a/c++/src/Vector.cc b/c++/src/Vector.cc
index 14c0ded..5c0e973 100644
--- a/c++/src/Vector.cc
+++ b/c++/src/Vector.cc
@@ -475,8 +475,8 @@ namespace orc {
// PASS
}
- std::string Decimal::toString() const {
- return value.toDecimalString(scale);
+ std::string Decimal::toString(bool trimTrailingZeros) const {
+ return value.toDecimalString(scale, trimTrailingZeros);
}
TimestampVectorBatch::TimestampVectorBatch(uint64_t _capacity,
diff --git a/c++/test/TestInt128.cc b/c++/test/TestInt128.cc
index f61f772..f72eeee 100644
--- a/c++/test/TestInt128.cc
+++ b/c++/test/TestInt128.cc
@@ -625,4 +625,30 @@ namespace orc {
EXPECT_EQ(Int128(0), num);
}
+ TEST(Int128, testToDecimalStringTrimTrailingZeros) {
+ EXPECT_TRUE(Int128(0).toDecimalString(0, false) == "0");
+ EXPECT_TRUE(Int128(0).toDecimalString(0, true) == "0");
+
+ EXPECT_TRUE(Int128(0).toDecimalString(4, false) == "0.0000");
+ EXPECT_TRUE(Int128(0).toDecimalString(4, true) == "0");
+
+ EXPECT_TRUE(Int128(12340000).toDecimalString(3, false) == "12340.000");
+ EXPECT_TRUE(Int128(12340000).toDecimalString(3, true) == "12340");
+
+ EXPECT_TRUE(Int128(12340000).toDecimalString(10, false) == "0.0012340000");
+ EXPECT_TRUE(Int128(12340000).toDecimalString(10, true) == "0.001234");
+
+ EXPECT_TRUE(Int128(12340000).toDecimalString(8, false) == "0.12340000");
+ EXPECT_TRUE(Int128(12340000).toDecimalString(8, true) == "0.1234");
+
+ EXPECT_TRUE(Int128(-12340000).toDecimalString(3, false) == "-12340.000");
+ EXPECT_TRUE(Int128(-12340000).toDecimalString(3, true) == "-12340");
+
+ EXPECT_TRUE(Int128(-12340000).toDecimalString(10, false) == "-0.0012340000");
+ EXPECT_TRUE(Int128(-12340000).toDecimalString(10, true) == "-0.001234");
+
+ EXPECT_TRUE(Int128(-12340000).toDecimalString(8, false) == "-0.12340000");
+ EXPECT_TRUE(Int128(-12340000).toDecimalString(8, true) == "-0.1234");
+ }
+
} // namespace orc