You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@orc.apache.org by xn...@apache.org on 2019/12/16 17:52:54 UTC
[orc] branch master updated: ORC-573: [C++] Fix several issues in Int128 and Decimal (#456)

This is an automated email from the ASF dual-hosted git repository.

xndai pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/orc.git


The following commit(s) were added to refs/heads/master by this push:
     new fffd910  ORC-573: [C++] Fix several issues in Int128 and Decimal (#456)
fffd910 is described below

commit fffd910892da260af727548ab59d19ccaad5d8f1
Author: Gang Wu <ga...@alibaba-inc.com>
AuthorDate: Tue Dec 17 01:52:46 2019 +0800

    ORC-573: [C++] Fix several issues in Int128 and Decimal (#456)
    
    1. Support Decimal::toString() to trim trailing zeros.
    2. Fix Int128::toString() to handle scale correctly.
    3. Apply DecimalColumnWriter to trim trailing zeros before writing to bloom filter.
---
 c++/include/orc/Common.hh | 24 ++++++++++++++++++++++++
 c++/include/orc/Int128.hh |  7 ++++++-
 c++/include/orc/Vector.hh |  2 +-
 c++/src/ColumnWriter.cc   |  4 ++--
 c++/src/Int128.cc         | 40 +++++++++++++++++++++++++---------------
 c++/src/Statistics.hh     |  6 +++---
 c++/src/Vector.cc         |  4 ++--
 c++/test/TestInt128.cc    | 26 ++++++++++++++++++++++++++
 8 files changed, 89 insertions(+), 24 deletions(-)

diff --git a/c++/include/orc/Common.hh b/c++/include/orc/Common.hh
index ca926cf..448f288 100644
--- a/c++/include/orc/Common.hh
+++ b/c++/include/orc/Common.hh
@@ -274,6 +274,30 @@ namespace orc {
     FUTURE = INT32_MAX
   };
 
+  inline bool operator<(const Decimal& lhs, const Decimal& rhs) {
+    return compare(lhs, rhs);
+  }
+
+  inline bool operator>(const Decimal& lhs, const Decimal& rhs) {
+    return rhs < lhs;
+  }
+
+  inline bool operator<=(const Decimal& lhs, const Decimal& rhs) {
+    return !(lhs > rhs);
+  }
+
+  inline bool operator>=(const Decimal& lhs, const Decimal& rhs) {
+    return !(lhs < rhs);
+  }
+
+  inline bool operator!=(const Decimal& lhs, const Decimal& rhs) {
+    return lhs < rhs || rhs < lhs;
+  }
+
+  inline bool operator==(const Decimal& lhs, const Decimal& rhs) {
+    return !(lhs != rhs);
+  }
+
 }
 
 #endif
diff --git a/c++/include/orc/Int128.hh b/c++/include/orc/Int128.hh
index f86d8f0..1f68b2b 100644
--- a/c++/include/orc/Int128.hh
+++ b/c++/include/orc/Int128.hh
@@ -311,8 +311,13 @@ namespace orc {
     /**
      * Return the base 10 string representation with a decimal point,
      * the given number of places after the decimal.
+     *
+     * @param scale scale of the Int128 to be interpreted as a decimal value
+     * @param trimTrailingZeros whether or not to trim trailing zeros
+     * @return converted string representation
      */
-    std::string toDecimalString(int32_t scale=0) const;
+    std::string toDecimalString(int32_t scale = 0,
+                                bool trimTrailingZeros = false) const;
 
     /**
      * Return the base 16 string representation of the two's complement with
diff --git a/c++/include/orc/Vector.hh b/c++/include/orc/Vector.hh
index 629c0b7..4cb2450 100644
--- a/c++/include/orc/Vector.hh
+++ b/c++/include/orc/Vector.hh
@@ -240,7 +240,7 @@ namespace orc {
     explicit Decimal(const std::string& value);
     Decimal();
 
-    std::string toString() const;
+    std::string toString(bool trimTrailingZeros = false) const;
     Int128 value;
     int32_t scale;
   };
diff --git a/c++/src/ColumnWriter.cc b/c++/src/ColumnWriter.cc
index 30d96ac..4fafb94 100644
--- a/c++/src/ColumnWriter.cc
+++ b/c++/src/ColumnWriter.cc
@@ -2026,7 +2026,7 @@ namespace orc {
         ++count;
         if (enableBloomFilter) {
           std::string decimal = Decimal(
-            values[i], static_cast<int32_t>(scale)).toString();
+            values[i], static_cast<int32_t>(scale)).toString(true);
           bloomFilter->addBytes(
             decimal.c_str(), static_cast<int64_t>(decimal.size()));
         }
@@ -2160,7 +2160,7 @@ namespace orc {
         ++count;
         if (enableBloomFilter) {
           std::string decimal = Decimal(
-            values[i], static_cast<int32_t>(scale)).toString();
+            values[i], static_cast<int32_t>(scale)).toString(true);
           bloomFilter->addBytes(
             decimal.c_str(), static_cast<int64_t>(decimal.size()));
         }
diff --git a/c++/src/Int128.cc b/c++/src/Int128.cc
index 433e6fa..4ff500f 100644
--- a/c++/src/Int128.cc
+++ b/c++/src/Int128.cc
@@ -391,41 +391,51 @@ namespace orc {
     return buf.str();
   }
 
-  std::string Int128::toDecimalString(int32_t scale) const {
+  std::string Int128::toDecimalString(int32_t scale, bool trimTrailingZeros) const {
     std::string str = toString();
+    std::string result;
     if (scale == 0) {
       return str;
     } else if (*this < 0) {
       int32_t len = static_cast<int32_t>(str.length());
       if (len - 1 > scale) {
-        return str.substr(0, static_cast<size_t>(len - scale)) + "." +
-          str.substr(static_cast<size_t>(len - scale),
-                     static_cast<size_t>(scale));
+        result = str.substr(0, static_cast<size_t>(len - scale)) + "." +
+                 str.substr(static_cast<size_t>(len - scale),
+                            static_cast<size_t>(len));
       } else if (len - 1 == scale) {
-        return "-0." + str.substr(1, std::string::npos);
+        result = "-0." + str.substr(1, std::string::npos);
       } else {
-        std::string result = "-0.";
-        for(int32_t i=0; i < scale - len + 1; ++i) {
+        result = "-0.";
+        for (int32_t i = 0; i < scale - len + 1; ++i) {
           result += "0";
         }
-        return result + str.substr(1, std::string::npos);
+        result += str.substr(1, std::string::npos);
       }
     } else {
       int32_t len = static_cast<int32_t>(str.length());
       if (len > scale) {
-        return str.substr(0, static_cast<size_t>(len - scale)) + "." +
-          str.substr(static_cast<size_t>(len - scale),
-                     static_cast<size_t>(scale));
+        result = str.substr(0, static_cast<size_t>(len - scale)) + "." +
+                 str.substr(static_cast<size_t>(len - scale),
+                            static_cast<size_t>(len));
       } else if (len == scale) {
-        return "0." + str;
+        result = "0." + str;
       } else {
-        std::string result = "0.";
-        for(int32_t i=0; i < scale - len; ++i) {
+        result = "0.";
+        for (int32_t i = 0; i < scale - len; ++i) {
           result += "0";
         }
-        return result + str;
+        result += str;
       }
     }
+    if (trimTrailingZeros) {
+      size_t pos = result.find_last_not_of('0');
+      if (result[pos] == '.') {
+        result = result.substr(0, pos);
+      } else {
+        result = result.substr(0, pos + 1);
+      }
+    }
+    return result;
   }
 
   std::string Int128::toHexString() const {
diff --git a/c++/src/Statistics.hh b/c++/src/Statistics.hh
index 633450f..426317f 100644
--- a/c++/src/Statistics.hh
+++ b/c++/src/Statistics.hh
@@ -665,14 +665,14 @@ namespace orc {
 
       proto::DecimalStatistics* decStats = pbStats.mutable_decimalstatistics();
       if (_stats.hasMinimum()) {
-        decStats->set_minimum(_stats.getMinimum().toString());
-        decStats->set_maximum(_stats.getMaximum().toString());
+        decStats->set_minimum(_stats.getMinimum().toString(true));
+        decStats->set_maximum(_stats.getMaximum().toString(true));
       } else {
         decStats->clear_minimum();
         decStats->clear_maximum();
       }
       if (_stats.hasSum()) {
-        decStats->set_sum(_stats.getSum().toString());
+        decStats->set_sum(_stats.getSum().toString(true));
       } else {
         decStats->clear_sum();
       }
diff --git a/c++/src/Vector.cc b/c++/src/Vector.cc
index 14c0ded..5c0e973 100644
--- a/c++/src/Vector.cc
+++ b/c++/src/Vector.cc
@@ -475,8 +475,8 @@ namespace orc {
     // PASS
   }
 
-  std::string Decimal::toString() const {
-    return value.toDecimalString(scale);
+  std::string Decimal::toString(bool trimTrailingZeros) const {
+    return value.toDecimalString(scale, trimTrailingZeros);
   }
 
   TimestampVectorBatch::TimestampVectorBatch(uint64_t _capacity,
diff --git a/c++/test/TestInt128.cc b/c++/test/TestInt128.cc
index f61f772..f72eeee 100644
--- a/c++/test/TestInt128.cc
+++ b/c++/test/TestInt128.cc
@@ -625,4 +625,30 @@ namespace orc {
     EXPECT_EQ(Int128(0), num);
   }
 
+  TEST(Int128, testToDecimalStringTrimTrailingZeros) {
+    EXPECT_TRUE(Int128(0).toDecimalString(0, false) == "0");
+    EXPECT_TRUE(Int128(0).toDecimalString(0, true) == "0");
+
+    EXPECT_TRUE(Int128(0).toDecimalString(4, false) == "0.0000");
+    EXPECT_TRUE(Int128(0).toDecimalString(4, true) == "0");
+
+    EXPECT_TRUE(Int128(12340000).toDecimalString(3, false) == "12340.000");
+    EXPECT_TRUE(Int128(12340000).toDecimalString(3, true) == "12340");
+
+    EXPECT_TRUE(Int128(12340000).toDecimalString(10, false) == "0.0012340000");
+    EXPECT_TRUE(Int128(12340000).toDecimalString(10, true) == "0.001234");
+
+    EXPECT_TRUE(Int128(12340000).toDecimalString(8, false) == "0.12340000");
+    EXPECT_TRUE(Int128(12340000).toDecimalString(8, true) == "0.1234");
+
+    EXPECT_TRUE(Int128(-12340000).toDecimalString(3, false) == "-12340.000");
+    EXPECT_TRUE(Int128(-12340000).toDecimalString(3, true) == "-12340");
+
+    EXPECT_TRUE(Int128(-12340000).toDecimalString(10, false) == "-0.0012340000");
+    EXPECT_TRUE(Int128(-12340000).toDecimalString(10, true) == "-0.001234");
+
+    EXPECT_TRUE(Int128(-12340000).toDecimalString(8, false) == "-0.12340000");
+    EXPECT_TRUE(Int128(-12340000).toDecimalString(8, true) == "-0.1234");
+  }
+
 }  // namespace orc