You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2017/12/22 15:47:08 UTC

[5/5] impala git commit: IMPALA-5014: Part 1: Round when casting string to decimal

IMPALA-5014: Part 1: Round when casting string to decimal

In this patch we implement rounding when casting string to decimal if
DECIMAL_V2 is enabled. The backend method that parses strings and
converts them to decimals is refactored to make it easier to understand.

Testing:
- Added some BE tests.

Change-Id: Icd8b92727fb384e6ff2d145e4aab7ae5d27db26d
Reviewed-on: http://gerrit.cloudera.org:8080/8774
Reviewed-by: Taras Bobrovytsky <tb...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/a16fe803
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/a16fe803
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/a16fe803

Branch: refs/heads/master
Commit: a16fe803caac16d4ba64aeaa4fc750388aeca203
Parents: 4ce23f7
Author: Taras Bobrovytsky <tb...@cloudera.com>
Authored: Tue Nov 7 20:03:11 2017 -0800
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Fri Dec 22 11:39:08 2017 +0000

----------------------------------------------------------------------
 be/src/benchmarks/atod-benchmark.cc      |   2 +-
 be/src/benchmarks/overflow-benchmark.cc  |   2 +-
 be/src/exec/hdfs-scanner-ir.cc           |  12 +--
 be/src/exec/text-converter.inline.h      |   6 +-
 be/src/exprs/decimal-operators-ir.cc     |  10 +-
 be/src/exprs/expr-test.cc                |   6 +-
 be/src/runtime/decimal-test.cc           | 115 +++++++++++++++++++++--
 be/src/runtime/decimal-value.inline.h    |  31 +------
 be/src/util/decimal-util.h               |  25 +++++
 be/src/util/string-parser.cc             |  12 +--
 be/src/util/string-parser.h              | 126 +++++++++++++++++---------
 tests/query_test/test_decimal_casting.py |   2 +-
 12 files changed, 245 insertions(+), 104 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/a16fe803/be/src/benchmarks/atod-benchmark.cc
----------------------------------------------------------------------
diff --git a/be/src/benchmarks/atod-benchmark.cc b/be/src/benchmarks/atod-benchmark.cc
index 9beb942..1e86844 100644
--- a/be/src/benchmarks/atod-benchmark.cc
+++ b/be/src/benchmarks/atod-benchmark.cc
@@ -83,7 +83,7 @@ void TestImpala(int batch_size, void* d) {
       const StringValue& str = data->data[j];
       StringParser::ParseResult dummy;
       val = StringParser::StringToDecimal<Storage>(
-          str.ptr, str.len, column_type, &dummy);
+          str.ptr, str.len, column_type, false, &dummy);
       data->result[j] = val;
     }
   }

http://git-wip-us.apache.org/repos/asf/impala/blob/a16fe803/be/src/benchmarks/overflow-benchmark.cc
----------------------------------------------------------------------
diff --git a/be/src/benchmarks/overflow-benchmark.cc b/be/src/benchmarks/overflow-benchmark.cc
index 1d4caf6..d59ab9d 100644
--- a/be/src/benchmarks/overflow-benchmark.cc
+++ b/be/src/benchmarks/overflow-benchmark.cc
@@ -84,7 +84,7 @@ void AddTestData(TestData* data, int n) {
     string str = ss.str();
     StringParser::ParseResult dummy;
     val = StringParser::StringToDecimal<int128_t>(
-        str.c_str(), str.length(), column_type, &dummy);
+        str.c_str(), str.length(), column_type, false, &dummy);
     data->values.push_back(val);
     data->int128_values.push_back(numeric_limits<int128_t>::max() * Rand());
   }

http://git-wip-us.apache.org/repos/asf/impala/blob/a16fe803/be/src/exec/hdfs-scanner-ir.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/hdfs-scanner-ir.cc b/be/src/exec/hdfs-scanner-ir.cc
index d7c2d81..ec1d2a3 100644
--- a/be/src/exec/hdfs-scanner-ir.cc
+++ b/be/src/exec/hdfs-scanner-ir.cc
@@ -90,9 +90,9 @@ ScalarExprEvaluator* HdfsScanner::GetConjunctEval(int idx) const {
 
 void StringToDecimalSymbolDummy() {
   // Force linker to to link the object file containing these functions.
-  StringToDecimal4(nullptr, 0, 0, 0, nullptr);
-  StringToDecimal8(nullptr, 0, 0, 0, nullptr);
-  StringToDecimal16(nullptr, 0, 0, 0, nullptr);
+  StringToDecimal4(nullptr, 0, 0, 0, false, nullptr);
+  StringToDecimal8(nullptr, 0, 0, 0, false, nullptr);
+  StringToDecimal16(nullptr, 0, 0, 0, false, nullptr);
 }
 
 // Define the string parsing functions for llvm.  Stamp out the templated functions
@@ -142,7 +142,7 @@ void IrStringToTimestamp(TimestampValue* out, const char* s, int len,
 extern "C"
 Decimal4Value IrStringToDecimal4(const char* s, int len, int type_precision,
     int type_scale, ParseResult* result)  {
-  auto ret = StringToDecimal4(s, len, type_precision, type_scale, result);
+  auto ret = StringToDecimal4(s, len, type_precision, type_scale, false, result);
   if (*result != ParseResult::PARSE_SUCCESS) *result = ParseResult::PARSE_FAILURE;
   return ret;
 }
@@ -150,7 +150,7 @@ Decimal4Value IrStringToDecimal4(const char* s, int len, int type_precision,
 extern "C"
 Decimal8Value IrStringToDecimal8(const char* s, int len, int type_precision,
     int type_scale, ParseResult* result)  {
-  auto ret = StringToDecimal8(s, len, type_precision, type_scale, result);
+  auto ret = StringToDecimal8(s, len, type_precision, type_scale, false, result);
   if (*result != ParseResult::PARSE_SUCCESS) *result = ParseResult::PARSE_FAILURE;
   return ret;
 }
@@ -158,7 +158,7 @@ Decimal8Value IrStringToDecimal8(const char* s, int len, int type_precision,
 extern "C"
 Decimal16Value IrStringToDecimal16(const char* s, int len, int type_precision,
     int type_scale, ParseResult* result)  {
-  auto ret = StringToDecimal16(s, len, type_precision, type_scale, result);
+  auto ret = StringToDecimal16(s, len, type_precision, type_scale, false, result);
   if (*result != ParseResult::PARSE_SUCCESS) *result = ParseResult::PARSE_FAILURE;
   return ret;
 }

http://git-wip-us.apache.org/repos/asf/impala/blob/a16fe803/be/src/exec/text-converter.inline.h
----------------------------------------------------------------------
diff --git a/be/src/exec/text-converter.inline.h b/be/src/exec/text-converter.inline.h
index 49f4e8a..c56bcce 100644
--- a/be/src/exec/text-converter.inline.h
+++ b/be/src/exec/text-converter.inline.h
@@ -141,12 +141,12 @@ inline bool TextConverter::WriteSlot(const SlotDescriptor* slot_desc, Tuple* tup
         case 4:
           *reinterpret_cast<Decimal4Value*>(slot) =
               StringParser::StringToDecimal<int32_t>(
-                  data, len, slot_desc->type(), &parse_result);
+                  data, len, slot_desc->type(), false, &parse_result);
           break;
         case 8:
           *reinterpret_cast<Decimal8Value*>(slot) =
               StringParser::StringToDecimal<int64_t>(
-                  data, len, slot_desc->type(), &parse_result);
+                  data, len, slot_desc->type(), false, &parse_result);
           break;
         case 12:
           DCHECK(false) << "Planner should not generate this.";
@@ -154,7 +154,7 @@ inline bool TextConverter::WriteSlot(const SlotDescriptor* slot_desc, Tuple* tup
         case 16:
           *reinterpret_cast<Decimal16Value*>(slot) =
               StringParser::StringToDecimal<int128_t>(
-                  data, len, slot_desc->type(), &parse_result);
+                  data, len, slot_desc->type(), false, &parse_result);
           break;
         default:
           DCHECK(false) << "Decimal slots can't be this size.";

http://git-wip-us.apache.org/repos/asf/impala/blob/a16fe803/be/src/exprs/decimal-operators-ir.cc
----------------------------------------------------------------------
diff --git a/be/src/exprs/decimal-operators-ir.cc b/be/src/exprs/decimal-operators-ir.cc
index bb54a0f..8612561 100644
--- a/be/src/exprs/decimal-operators-ir.cc
+++ b/be/src/exprs/decimal-operators-ir.cc
@@ -534,22 +534,26 @@ IR_ALWAYS_INLINE DecimalVal DecimalOperators::CastToDecimalVal(
   DecimalVal dv;
   int precision = ctx->impl()->GetConstFnAttr(FunctionContextImpl::RETURN_TYPE_PRECISION);
   int scale = ctx->impl()->GetConstFnAttr(FunctionContextImpl::RETURN_TYPE_SCALE);
+  bool is_decimal_v2 = ctx->impl()->GetConstFnAttr(FunctionContextImpl::DECIMAL_V2);
   switch (ColumnType::GetDecimalByteSize(precision)) {
     case 4: {
       Decimal4Value dv4 = StringParser::StringToDecimal<int32_t>(
-          reinterpret_cast<char*>(val.ptr), val.len, precision, scale, &result);
+          reinterpret_cast<char*>(val.ptr), val.len, precision, scale,
+          is_decimal_v2, &result);
       dv = DecimalVal(dv4.value());
       break;
     }
     case 8: {
       Decimal8Value dv8 = StringParser::StringToDecimal<int64_t>(
-          reinterpret_cast<char*>(val.ptr), val.len, precision, scale, &result);
+          reinterpret_cast<char*>(val.ptr), val.len, precision, scale,
+          is_decimal_v2, &result);
       dv = DecimalVal(dv8.value());
       break;
     }
     case 16: {
       Decimal16Value dv16 = StringParser::StringToDecimal<int128_t>(
-          reinterpret_cast<char*>(val.ptr), val.len, precision, scale, &result);
+          reinterpret_cast<char*>(val.ptr), val.len, precision, scale,
+          is_decimal_v2, &result);
       dv = DecimalVal(dv16.value());
       break;
     }

http://git-wip-us.apache.org/repos/asf/impala/blob/a16fe803/be/src/exprs/expr-test.cc
----------------------------------------------------------------------
diff --git a/be/src/exprs/expr-test.cc b/be/src/exprs/expr-test.cc
index 35599c0..88d957d 100644
--- a/be/src/exprs/expr-test.cc
+++ b/be/src/exprs/expr-test.cc
@@ -494,15 +494,15 @@ class ExprTest : public testing::Test {
     switch (expected_type.GetByteSize()) {
       case 4:
         EXPECT_EQ(expected_result.value(), StringParser::StringToDecimal<int32_t>(
-            value.data(), value.size(), expected_type, &result).value()) << query;
+            value.data(), value.size(), expected_type, false, &result).value()) << query;
         break;
       case 8:
         EXPECT_EQ(expected_result.value(), StringParser::StringToDecimal<int64_t>(
-            value.data(), value.size(), expected_type, &result).value()) << query;
+            value.data(), value.size(), expected_type, false, &result).value()) << query;
         break;
       case 16:
         EXPECT_EQ(expected_result.value(), StringParser::StringToDecimal<int128_t>(
-            value.data(), value.size(), expected_type, &result).value()) << query;
+            value.data(), value.size(), expected_type, false, &result).value()) << query;
         break;
       default:
         EXPECT_TRUE(false) << expected_type << " " << expected_type.GetByteSize();

http://git-wip-us.apache.org/repos/asf/impala/blob/a16fe803/be/src/runtime/decimal-test.cc
----------------------------------------------------------------------
diff --git a/be/src/runtime/decimal-test.cc b/be/src/runtime/decimal-test.cc
index f84fdaf..25d2f65 100644
--- a/be/src/runtime/decimal-test.cc
+++ b/be/src/runtime/decimal-test.cc
@@ -45,11 +45,11 @@ void VerifyEquals(const DecimalValue<T>& t1, const DecimalValue<T>& t2) {
 }
 
 template <typename T>
-void VerifyParse(const string& s, int precision, int scale,
+void VerifyParse(const string& s, int precision, int scale, bool round,
     const DecimalValue<T>& expected_val, StringParser::ParseResult expected_result) {
   StringParser::ParseResult parse_result;
   DecimalValue<T> val = StringParser::StringToDecimal<T>(
-      s.c_str(), s.size(), precision, scale, &parse_result);
+      s.c_str(), s.size(), precision, scale, round, &parse_result);
   EXPECT_EQ(expected_result, parse_result) << "Failed test string: " << s;
   if (expected_result == StringParser::PARSE_SUCCESS ||
       expected_result == StringParser::PARSE_UNDERFLOW) {
@@ -57,6 +57,22 @@ void VerifyParse(const string& s, int precision, int scale,
   }
 }
 
+template <typename T>
+void VerifyParse(const string& s, int precision, int scale,
+    const DecimalValue<T>& expected_val, StringParser::ParseResult expected_result) {
+  VerifyParse(s, precision, scale, false, expected_val, expected_result);
+  VerifyParse(s, precision, scale, true, expected_val, expected_result);
+}
+
+template <typename T>
+void VerifyParse(const string& s, int precision, int scale,
+    const DecimalValue<T>& expected_val_v1, StringParser::ParseResult expected_result_v1,
+    const DecimalValue<T>& expected_val_v2, StringParser::ParseResult expected_result_v2)
+{
+  VerifyParse(s, precision, scale, false, expected_val_v1, expected_result_v1);
+  VerifyParse(s, precision, scale, true, expected_val_v2, expected_result_v2);
+}
+
 template<typename T>
 void VerifyToString(const T& decimal, int precision, int scale, const string& expected) {
   EXPECT_EQ(decimal.ToString(precision, scale), expected);
@@ -69,6 +85,17 @@ void StringToAllDecimals(const string& s, int precision, int scale, int32_t val,
   VerifyParse(s, precision, scale, Decimal16Value(val), result);
 }
 
+void StringToAllDecimals(const string& s, int precision, int scale,
+    int32_t val_v1, StringParser::ParseResult result_v1,
+    int32_t val_v2, StringParser::ParseResult result_v2) {
+  VerifyParse(s, precision, scale,
+      Decimal4Value(val_v1), result_v1, Decimal4Value(val_v2), result_v2);
+  VerifyParse(s, precision, scale,
+      Decimal8Value(val_v1), result_v1, Decimal8Value(val_v2), result_v2);
+  VerifyParse(s, precision, scale,
+      Decimal16Value(val_v1), result_v1, Decimal16Value(val_v2), result_v2);
+}
+
 TEST(IntToDecimal, Basic) {
   Decimal16Value d16;
   bool overflow = false;
@@ -271,13 +298,27 @@ TEST(StringToDecimal, Basic) {
   StringToAllDecimals(".1", 10, 2, 10, StringParser::PARSE_SUCCESS);
   StringToAllDecimals("00012.3", 10, 2, 1230, StringParser::PARSE_SUCCESS);
   StringToAllDecimals("-00012.3", 10, 2, -1230, StringParser::PARSE_SUCCESS);
+  StringToAllDecimals("0.00000", 6, 5, 0, StringParser::PARSE_SUCCESS);
+  StringToAllDecimals("1.00000", 6, 5, 100000, StringParser::PARSE_SUCCESS);
+  StringToAllDecimals("0.000000", 6, 5, 0, StringParser::PARSE_UNDERFLOW);
+  StringToAllDecimals("1.000000", 6, 5, 100000, StringParser::PARSE_UNDERFLOW);
+  StringToAllDecimals("1.000004", 6, 5, 100000, StringParser::PARSE_UNDERFLOW);
+  StringToAllDecimals("1.000005", 6, 5,
+      100000, StringParser::PARSE_UNDERFLOW,
+      100001, StringParser::PARSE_UNDERFLOW);
+  StringToAllDecimals("0.4", 5, 0, 0, StringParser::PARSE_UNDERFLOW);
+  StringToAllDecimals("0.5", 5, 0,
+      0, StringParser::PARSE_UNDERFLOW,
+      1, StringParser::PARSE_UNDERFLOW);
 
   StringToAllDecimals("123.45", 10, 2, 12345, StringParser::PARSE_SUCCESS);
   StringToAllDecimals(".45", 10, 2, 45, StringParser::PARSE_SUCCESS);
   StringToAllDecimals("-.45", 10, 2, -45, StringParser::PARSE_SUCCESS);
   StringToAllDecimals(" 123.4 ", 10, 5, 12340000, StringParser::PARSE_SUCCESS);
   StringToAllDecimals("-123.45", 10, 5, -12345000, StringParser::PARSE_SUCCESS);
-  StringToAllDecimals("-123.456", 10, 2, -12345, StringParser::PARSE_UNDERFLOW);
+  StringToAllDecimals("-123.456", 10, 2,
+      -12345, StringParser::PARSE_UNDERFLOW,
+      -12346, StringParser::PARSE_UNDERFLOW);
 
   StringToAllDecimals("e", 2, 0, 0, StringParser::PARSE_FAILURE);
   StringToAllDecimals("E", 2, 0, 0, StringParser::PARSE_FAILURE);
@@ -347,7 +388,8 @@ TEST(StringToDecimal, LargeDecimals) {
   VerifyParse("123456.78", 8, 3,
       Decimal8Value(12345678L), StringParser::PARSE_OVERFLOW);
   VerifyParse("1234.5678", 8, 3,
-      Decimal8Value(1234567L), StringParser::PARSE_UNDERFLOW);
+      Decimal8Value(1234567L), StringParser::PARSE_UNDERFLOW,
+      Decimal8Value(1234568L), StringParser::PARSE_UNDERFLOW);
   VerifyParse("12345.678", 8, 3,
       Decimal16Value(12345678L), StringParser::PARSE_SUCCESS);
   VerifyParse("-12345.678", 8, 3,
@@ -355,7 +397,8 @@ TEST(StringToDecimal, LargeDecimals) {
   VerifyParse("123456.78", 8, 3,
       Decimal16Value(12345678L), StringParser::PARSE_OVERFLOW);
   VerifyParse("1234.5678", 8, 3,
-      Decimal16Value(1234567L), StringParser::PARSE_UNDERFLOW);
+      Decimal16Value(1234567L), StringParser::PARSE_UNDERFLOW,
+      Decimal16Value(1234568L), StringParser::PARSE_UNDERFLOW);
 
   // Test max unscaled value for each of the decimal types.
   VerifyParse("999999999", 9, 0,
@@ -411,16 +454,68 @@ TEST(StringToDecimal, LargeDecimals) {
       38, 38, Decimal16Value(-result), StringParser::PARSE_SUCCESS);
   VerifyParse("-.99999999999999999999999999999999999999e1",
       38, 38, Decimal16Value(-result), StringParser::PARSE_OVERFLOW);
-  VerifyParse("-.999999999999999999999999999999999999990e-1",
-      38, 38, Decimal16Value(-result / 10), StringParser::PARSE_UNDERFLOW);
-  VerifyParse("-.999999999999999999999999999999999999990000000000000000e-20",
-      38, 38,
+  VerifyParse("-.999999999999999999999999999999999999990e-1", 38, 38,
+      Decimal16Value(-result / 10), StringParser::PARSE_UNDERFLOW,
+      Decimal16Value(-result / 10 - 1), StringParser::PARSE_UNDERFLOW);
+  VerifyParse("-.999999999999999999999999999999999999990000000000000000e-20", 38, 38,
       Decimal16Value(-result / DecimalUtil::GetScaleMultiplier<int128_t>(20)),
+      StringParser::PARSE_UNDERFLOW,
+      Decimal16Value(-result / DecimalUtil::GetScaleMultiplier<int128_t>(20) - 1),
       StringParser::PARSE_UNDERFLOW);
   VerifyParse("100000000000000000000000000000000000000",
       38, 0, Decimal16Value(0), StringParser::PARSE_OVERFLOW);
   VerifyParse("-100000000000000000000000000000000000000",
       38, 0, Decimal16Value(0), StringParser::PARSE_OVERFLOW);
+
+  // Rounding tests.
+  VerifyParse("555.554", 5, 2, Decimal4Value(55555), StringParser::PARSE_UNDERFLOW);
+  VerifyParse("555.555", 5, 2,
+      Decimal4Value(55555), StringParser::PARSE_UNDERFLOW,
+      Decimal4Value(55556), StringParser::PARSE_UNDERFLOW);
+  // Too many digits to the left of the dot so we overflow.
+  VerifyParse("555.555e1", 5, 2, Decimal4Value(0), StringParser::PARSE_OVERFLOW);
+  VerifyParse("-555.555e1", 5, 2, Decimal4Value(0), StringParser::PARSE_OVERFLOW);
+  VerifyParse("5555.555", 5, 2, Decimal16Value(0), StringParser::PARSE_OVERFLOW);
+  VerifyParse("-555.555", 5, 2,
+        Decimal4Value(-55555), StringParser::PARSE_UNDERFLOW,
+        Decimal4Value(-55556), StringParser::PARSE_UNDERFLOW);
+  VerifyParse("-5555.555", 5, 2, Decimal16Value(0), StringParser::PARSE_OVERFLOW);
+  VerifyParse("5555.555e-1", 5, 2,
+          Decimal4Value(55555), StringParser::PARSE_UNDERFLOW,
+          Decimal4Value(55556), StringParser::PARSE_UNDERFLOW);
+  VerifyParse("55555.555e-1", 5, 2, Decimal16Value(0), StringParser::PARSE_OVERFLOW);
+  // Too many digits to the right of the dot and not enough to the left. Rounding via
+  // ScaleDownAndRound().
+  VerifyParse("5.55444", 5, 2, Decimal4Value(555), StringParser::PARSE_UNDERFLOW);
+  VerifyParse("5.55555", 5, 2,
+        Decimal4Value(555), StringParser::PARSE_UNDERFLOW,
+        Decimal4Value(556), StringParser::PARSE_UNDERFLOW);
+  VerifyParse("5.555e-9", 5, 2, Decimal4Value(0), StringParser::PARSE_UNDERFLOW);
+  // The number of digits to the left of the dot equals to precision - scale. Rounding
+  // by adding 1 if the first truncated digit is greater or equal to 5.
+  VerifyParse("555.554", 5, 2, Decimal4Value(55555), StringParser::PARSE_UNDERFLOW);
+  VerifyParse("555.555", 5, 2,
+      Decimal4Value(55555), StringParser::PARSE_UNDERFLOW,
+      Decimal4Value(55556), StringParser::PARSE_UNDERFLOW);
+  VerifyParse("5.55554e2", 5, 2, Decimal4Value(55555), StringParser::PARSE_UNDERFLOW);
+  VerifyParse("5.55555e2", 5, 2,
+      Decimal4Value(55555), StringParser::PARSE_UNDERFLOW,
+      Decimal4Value(55556), StringParser::PARSE_UNDERFLOW);
+  VerifyParse("55555.4e-2", 5, 2, Decimal4Value(55555), StringParser::PARSE_UNDERFLOW);
+  VerifyParse("55555.5e-2", 5, 2,
+          Decimal4Value(55555), StringParser::PARSE_UNDERFLOW,
+          Decimal4Value(55556), StringParser::PARSE_UNDERFLOW);
+  // Rounding causes overflow.
+  VerifyParse("999.994", 5, 2, Decimal4Value(99999), StringParser::PARSE_UNDERFLOW);
+  VerifyParse("999.995", 5, 2,
+        Decimal4Value(99999), StringParser::PARSE_UNDERFLOW,
+        Decimal4Value(0), StringParser::PARSE_OVERFLOW);
+  VerifyParse("9.99995e2", 5, 2,
+          Decimal4Value(99999), StringParser::PARSE_UNDERFLOW,
+          Decimal4Value(0), StringParser::PARSE_OVERFLOW);
+  VerifyParse("99999.5e-2", 5, 2,
+            Decimal4Value(99999), StringParser::PARSE_UNDERFLOW,
+            Decimal4Value(0), StringParser::PARSE_OVERFLOW);
 }
 
 TEST(DecimalTest, Overflow) {
@@ -780,7 +875,7 @@ TEST(DecimalArithmetic, DivideLargeScales) {
   StringParser::ParseResult result;
   const char* data = "319391280635.61476055";
   Decimal16Value x =
-      StringParser::StringToDecimal<int128_t>(data, strlen(data), t1, &result);
+      StringParser::StringToDecimal<int128_t>(data, strlen(data), t1, false, &result);
   Decimal16Value y(10000);
   bool is_nan = false;
   bool is_overflow = false;

http://git-wip-us.apache.org/repos/asf/impala/blob/a16fe803/be/src/runtime/decimal-value.inline.h
----------------------------------------------------------------------
diff --git a/be/src/runtime/decimal-value.inline.h b/be/src/runtime/decimal-value.inline.h
index 8f76335..2097ac8 100644
--- a/be/src/runtime/decimal-value.inline.h
+++ b/be/src/runtime/decimal-value.inline.h
@@ -140,27 +140,6 @@ inline DecimalValue<T> DecimalValue<T>::ScaleTo(int src_scale, int dst_scale,
 
 namespace detail {
 
-// Helper function to scale down over multiplied values back into result type,
-// truncating if round is false or rounding otherwise.
-template<typename T, typename RESULT_T>
-inline RESULT_T ScaleDownAndRound(RESULT_T value, int delta_scale, bool round) {
-  DCHECK_GT(delta_scale, 0);
-  // Multiplier can always be computed in potentially smaller type T
-  T multiplier = DecimalUtil::GetScaleMultiplier<T>(delta_scale);
-  DCHECK(multiplier > 1 && multiplier % 2 == 0);
-  RESULT_T result = value / multiplier;
-  if (round) {
-    RESULT_T remainder = value % multiplier;
-    // In general, shifting down the multiplier is not safe, but we know
-    // here that it is a multiple of two.
-    if (abs(remainder) >= (multiplier >> 1)) {
-      // Bias at zero must be corrected by sign of dividend.
-      result += BitUtil::Sign(value);
-    }
-  }
-  return result;
-}
-
 // If we have a number with 'num_lz' leading zeros, and we scale it up by 10^scale_diff,
 // this function returns the minimum number of leading zeros the result would have.
 inline int MinLeadingZerosAfterScaling(int num_lz, int scale_diff) {
@@ -232,7 +211,7 @@ inline int128_t AddLarge(int128_t x, int x_scale, int128_t y, int y_scale,
     right = x_right + y_right;
   }
   if (result_scale_decrease > 0) {
-    right = detail::ScaleDownAndRound<int128_t, int128_t>(
+    right = DecimalUtil::ScaleDownAndRound<int128_t>(
         right, result_scale_decrease, round);
   }
   DCHECK(right >= 0);
@@ -288,7 +267,7 @@ inline int128_t SubtractLarge(int128_t x, int x_scale, int128_t y, int y_scale,
   if (result_scale_decrease > 0) {
     // At this point, the scale of the fractional part is either x_scale or y_scale,
     // whichever is greater. We scale down the fractional part to result_scale here.
-    right = detail::ScaleDownAndRound<int128_t, int128_t>(
+    right = DecimalUtil::ScaleDownAndRound<int128_t>(
         right, result_scale_decrease, round);
   }
 
@@ -351,7 +330,7 @@ inline DecimalValue<RESULT_T> DecimalValue<T>::Add(int this_scale,
     if (result_scale_decrease > 0) {
       // After first adjusting x and y to the same scale and adding them together, we now
       // need scale down the result to result_scale.
-      x = detail::ScaleDownAndRound<T, RESULT_T>(x, result_scale_decrease, round);
+      x = DecimalUtil::ScaleDownAndRound<RESULT_T>(x, result_scale_decrease, round);
     }
     return DecimalValue<RESULT_T>(x);
   }
@@ -418,7 +397,7 @@ DecimalValue<RESULT_T> DecimalValue<T>::Multiply(int this_scale,
       DCHECK(*overflow);
     } else {
       int256_t intermediate_result = ConvertToInt256(x) * ConvertToInt256(y);
-      intermediate_result = detail::ScaleDownAndRound<int256_t, int256_t>(
+      intermediate_result = DecimalUtil::ScaleDownAndRound<int256_t>(
           intermediate_result, delta_scale, round);
       result = ConvertToInt128(
           intermediate_result, DecimalUtil::MAX_UNSCALED_DECIMAL16, overflow);
@@ -436,7 +415,7 @@ DecimalValue<RESULT_T> DecimalValue<T>::Multiply(int this_scale,
       result = x * y;
       // The largest value that result can have here is (2^64 - 1) * (2^63 - 1), which is
       // greater than MAX_UNSCALED_DECIMAL16.
-      result = detail::ScaleDownAndRound<T, RESULT_T>(result, delta_scale, round);
+      result = DecimalUtil::ScaleDownAndRound<RESULT_T>(result, delta_scale, round);
       // Since delta_scale is greater than zero, result can now be at most
       // ((2^64 - 1) * (2^63 - 1)) / 10, which is less than MAX_UNSCALED_DECIMAL16, so
       // there is no need to check for overflow.

http://git-wip-us.apache.org/repos/asf/impala/blob/a16fe803/be/src/util/decimal-util.h
----------------------------------------------------------------------
diff --git a/be/src/util/decimal-util.h b/be/src/util/decimal-util.h
index 288caf3..c87b2c5 100644
--- a/be/src/util/decimal-util.h
+++ b/be/src/util/decimal-util.h
@@ -69,6 +69,31 @@ class DecimalUtil {
     return result;
   }
 
+  /// Helper function to scale down values by 10^delta_scale, truncating if
+  /// round is false or rounding otherwise.
+  template<typename T>
+  static inline T ScaleDownAndRound(T value, int delta_scale, bool round) {
+    DCHECK_GT(delta_scale, 0);
+    T divisor = DecimalUtil::GetScaleMultiplier<T>(delta_scale);
+    if (divisor > 0) {
+      DCHECK(divisor % 2 == 0);
+      T result = value / divisor;
+      if (round) {
+        T remainder = value % divisor;
+        // In general, shifting down the multiplier is not safe, but we know
+        // here that it is a multiple of two.
+        if (abs(remainder) >= (divisor >> 1)) {
+          // Bias at zero must be corrected by sign of dividend.
+          result += BitUtil::Sign(value);
+        }
+      }
+      return result;
+    } else {
+      DCHECK(divisor == -1);
+      return 0;
+    }
+  }
+
   /// Write decimals as big endian (byte comparable) in fixed_len_size bytes.
   template<typename T>
   static inline void EncodeToFixedLenByteArray(

http://git-wip-us.apache.org/repos/asf/impala/blob/a16fe803/be/src/util/string-parser.cc
----------------------------------------------------------------------
diff --git a/be/src/util/string-parser.cc b/be/src/util/string-parser.cc
index f59c6b88..1f03de1 100644
--- a/be/src/util/string-parser.cc
+++ b/be/src/util/string-parser.cc
@@ -21,20 +21,20 @@ namespace impala {
 
 using ParseResult = StringParser::ParseResult;
 Decimal4Value StringToDecimal4(const char* s, int len, int type_precision,
-    int type_scale, StringParser::ParseResult* result) {
+    int type_scale, bool round, StringParser::ParseResult* result) {
   return StringParser::StringToDecimal<int32_t>(s, len, type_precision,
-      type_scale, result);
+      type_scale, round, result);
 }
 
 Decimal8Value StringToDecimal8(const char* s, int len, int type_precision,
-    int type_scale, StringParser::ParseResult* result) {
+    int type_scale, bool round, StringParser::ParseResult* result) {
   return StringParser::StringToDecimal<int64_t>(s, len, type_precision,
-      type_scale, result);
+      type_scale, round, result);
 }
 
 Decimal16Value StringToDecimal16(const char* s, int len, int type_precision,
-    int type_scale, StringParser::ParseResult* result) {
+    int type_scale, bool round, StringParser::ParseResult* result) {
   return StringParser::StringToDecimal<int128_t>(s, len, type_precision,
-      type_scale, result);
+      type_scale, round, result);
 }
 }

http://git-wip-us.apache.org/repos/asf/impala/blob/a16fe803/be/src/util/string-parser.h
----------------------------------------------------------------------
diff --git a/be/src/util/string-parser.h b/be/src/util/string-parser.h
index 10c3fc7..606e5b8 100644
--- a/be/src/util/string-parser.h
+++ b/be/src/util/string-parser.h
@@ -112,13 +112,13 @@ class StringParser {
   /// On underflow, the truncated value is returned.
   template <typename T>
   static inline DecimalValue<T> StringToDecimal(const char* s, int len,
-      const ColumnType& type, StringParser::ParseResult* result) {
-    return StringToDecimal<T>(s, len, type.precision, type.scale, result);
+      const ColumnType& type, bool round, StringParser::ParseResult* result) {
+    return StringToDecimal<T>(s, len, type.precision, type.scale, round, result);
   }
 
   template <typename T>
   static inline DecimalValue<T> StringToDecimal(const char* s, int len,
-      int type_precision, int type_scale, StringParser::ParseResult* result) {
+      int type_precision, int type_scale, bool round, StringParser::ParseResult* result) {
     // Special cases:
     //   1) '' == Fail, an empty string fails to parse.
     //   2) '   #   ' == #, leading and trailing white space is ignored.
@@ -156,7 +156,7 @@ class StringParser {
     // Ignore leading zeros even after a dot. This allows for differentiating between
     // cases like 0.01e2, which would fit in a DECIMAL(1, 0), and 0.10e2, which would
     // overflow.
-    int scale = 0;
+    int digits_after_dot_count = 0;
     int found_dot = 0;
     if (len > 0 && *s == '.') {
       found_dot = 1;
@@ -164,30 +164,36 @@ class StringParser {
       --len;
       while (len > 0 && UNLIKELY(*s == '0')) {
         found_value = true;
-        ++scale;
+        ++digits_after_dot_count;
         ++s;
         --len;
       }
     }
 
-    int precision = 0;
+    int total_digits_count = 0;
     bool found_exponent = false;
     int8_t exponent = 0;
+    int first_truncated_digit = 0;
     T value = 0;
     for (int i = 0; i < len; ++i) {
-      const char& c = s[i];
+      const char c = s[i];
       if (LIKELY('0' <= c && c <= '9')) {
         found_value = true;
         // Ignore digits once the type's precision limit is reached. This avoids
         // overflowing the underlying storage while handling a string like
         // 10000000000e-10 into a DECIMAL(1, 0). Adjustments for ignored digits and
         // an exponent will be made later.
-        if (LIKELY(type_precision > precision)) {
-          value = (value * 10) + (c - '0');  // Benchmarks are faster with parenthesis...
+        if (LIKELY(total_digits_count < type_precision)) {
+          // Benchmarks are faster with parenthesis.
+          T new_value = (value * 10) + (c - '0');
+          DCHECK(new_value >= value);
+          value = new_value;
+        } else if (UNLIKELY(round && total_digits_count == type_precision)) {
+          first_truncated_digit = c - '0';
         }
-        DCHECK(value >= 0);  // For some reason DCHECK_GE doesn't work with int128_t.
-        ++precision;
-        scale += found_dot;
+        DCHECK(value >= 0); // DCHECK_GE does not work with int128_t
+        ++total_digits_count;
+        digits_after_dot_count += found_dot;
       } else if (c == '.' && LIKELY(!found_dot)) {
         found_dot = 1;
       } else if ((c == 'e' || c == 'E') && LIKELY(!found_exponent)) {
@@ -207,48 +213,56 @@ class StringParser {
     }
 
     // Find the number of truncated digits before adjusting the precision for an exponent.
-    int truncated_digit_count = precision - type_precision;
-    if (exponent > scale) {
-      // Ex: 0.1e3 (which at this point would have precision == 1 and scale == 1), the
-      //     scale must be set to 0 and the value set to 100 which means a precision of 3.
-      precision += exponent - scale;
-      value *= DecimalUtil::GetScaleMultiplier<T>(exponent - scale);
-      scale = 0;
-    } else {
-      // Ex: 100e-4, the scale must be set to 4 but no adjustment to the value is needed,
-      //     the precision must also be set to 4 but that will be done below for the
-      //     non-exponent case anyways.
-      scale -= exponent;
-    }
-    // Ex: 0.001, at this point would have precision 1 and scale 3 since leading zeros
-    //     were ignored during previous parsing.
-    if (scale > precision) precision = scale;
+    int truncated_digit_count = std::max(total_digits_count - type_precision, 0);
+    // 'scale' and 'precision' refer to the scale and precision of the number that
+    // is contained the string that we are parsing. The scale of 'value' may be
+    // different because some digits may have been truncated.
+    int scale, precision;
+    ApplyExponent(total_digits_count, digits_after_dot_count,
+        exponent, &value, &precision, &scale);
 
     // Microbenchmarks show that beyond this point, returning on parse failure is slower
     // than just letting the function run out.
     *result = StringParser::PARSE_SUCCESS;
     if (UNLIKELY(precision - scale > type_precision - type_scale)) {
+      // The number in the string has too many digits to the left of the dot,
+      // so we overflow.
       *result = StringParser::PARSE_OVERFLOW;
     } else if (UNLIKELY(scale > type_scale)) {
+      // There are too many digits to the right of the dot in the string we are parsing.
       *result = StringParser::PARSE_UNDERFLOW;
-      int shift = scale - type_scale;
-      if (UNLIKELY(truncated_digit_count > 0)) shift -= truncated_digit_count;
+      // The scale of 'value'.
+      int value_scale = scale - truncated_digit_count;
+      int shift = value_scale - type_scale;
       if (shift > 0) {
-        T divisor = DecimalUtil::GetScaleMultiplier<T>(shift);
-        if (LIKELY(divisor >= 0)) {
-          value /= divisor;
-        } else {
-          DCHECK(divisor == -1); // DCHECK_EQ doesn't work with int128_t.
-          value = 0;
+        // There are less than maximum number of digits to the left of the dot.
+        value = DecimalUtil::ScaleDownAndRound<T>(value, shift, round);
+        DCHECK(value >= 0);
+        DCHECK(value < DecimalUtil::GetScaleMultiplier<int128_t>(type_precision));
+      } else {
+        // There are a maximum number of digits to the left of the dot. We round by
+        // looking at the first truncated digit.
+        DCHECK_EQ(shift, 0);
+        DCHECK(0 <= first_truncated_digit && first_truncated_digit <= 9);
+        DCHECK(first_truncated_digit == 0 || truncated_digit_count != 0);
+        DCHECK(first_truncated_digit == 0 || round);
+        // Apply the rounding.
+        value += (first_truncated_digit >= 5);
+        DCHECK(value >= 0);
+        DCHECK(value <= DecimalUtil::GetScaleMultiplier<int128_t>(type_precision));
+        if (UNLIKELY(value == DecimalUtil::GetScaleMultiplier<T>(type_precision))) {
+          // Overflow due to rounding.
+          *result = StringParser::PARSE_OVERFLOW;
         }
       }
-      DCHECK(value >= 0); // DCHECK_GE doesn't work with int128_t.
     } else if (UNLIKELY(!found_value && !found_dot)) {
       *result = StringParser::PARSE_FAILURE;
-    }
-
-    if (type_scale > scale) {
+    } else if (type_scale > scale) {
+      // There were not enough digits after the dot, so we have scale up the value.
+      DCHECK_EQ(truncated_digit_count, 0);
       value *= DecimalUtil::GetScaleMultiplier<T>(type_scale - scale);
+      // Overflow should be impossible.
+      DCHECK(value < DecimalUtil::GetScaleMultiplier<int128_t>(type_precision));
     }
 
     return DecimalValue<T>(is_negative ? -value : value);
@@ -372,6 +386,30 @@ class StringParser {
     return static_cast<T>(negative ? -val : val);
   }
 
+  /// Helper function that applies the exponent to the value. Also computes and
+  /// sets the precision and scale.
+  template <typename T>
+  static inline void ApplyExponent(int total_digits_count,
+      int digits_after_dot_count,int8_t exponent, T* value, int* precision, int* scale) {
+    if (exponent > digits_after_dot_count) {
+      // Ex: 0.1e3 (which at this point would have precision == 1 and scale == 1), the
+      //     scale must be set to 0 and the value set to 100 which means a precision of 3.
+      *value *= DecimalUtil::GetScaleMultiplier<T>(exponent - digits_after_dot_count);
+      *precision = total_digits_count + (exponent - digits_after_dot_count);
+      *scale = 0;
+    } else {
+      // Ex: 100e-4, the scale must be set to 4 but no adjustment to the value is needed,
+      //     the precision must also be set to 4 but that will be done below for the
+      //     non-exponent case anyways.
+      *precision = total_digits_count;
+      *scale = digits_after_dot_count - exponent;
+      // Ex: 0.001, at this point would have precision 1 and scale 3 since leading zeros
+      //     were ignored during previous parsing.
+      if (*precision < *scale) *precision = *scale;
+    }
+    DCHECK_GE(*precision, *scale);
+  }
+
   /// Checks if "inf" or "infinity" matches 's' in a case-insensitive manner. The match
   /// has to start at the beginning of 's', leading whitespace is considered invalid.
   /// Trailing whitespace characters are allowed.
@@ -580,7 +618,7 @@ class StringParser {
     return val;
   }
 
-  static inline bool IsWhitespace(const char& c) {
+  static inline bool IsWhitespace(const char c) {
     return c == ' ' || UNLIKELY(c == '\t' || c == '\n' || c == '\v' || c == '\f'
         || c == '\r');
   }
@@ -600,13 +638,13 @@ inline int StringParser::StringParseTraits<int64_t>::max_ascii_len() { return 19
 
 // These functions are too large to benefit from inlining.
 Decimal4Value StringToDecimal4(const char* s, int len, int type_precision,
-    int type_scale, StringParser::ParseResult* result);
+    int type_scale, bool round, StringParser::ParseResult* result);
 
 Decimal8Value StringToDecimal8(const char* s, int len, int type_precision,
-    int type_scale, StringParser::ParseResult* result);
+    int type_scale, bool round, StringParser::ParseResult* result);
 
 Decimal16Value StringToDecimal16(const char* s, int len, int type_precision,
-    int type_scale, StringParser::ParseResult* result);
+    int type_scale, bool round, StringParser::ParseResult* result);
 
 }
 #endif

http://git-wip-us.apache.org/repos/asf/impala/blob/a16fe803/tests/query_test/test_decimal_casting.py
----------------------------------------------------------------------
diff --git a/tests/query_test/test_decimal_casting.py b/tests/query_test/test_decimal_casting.py
index ce8037d..445fe6b 100644
--- a/tests/query_test/test_decimal_casting.py
+++ b/tests/query_test/test_decimal_casting.py
@@ -159,7 +159,7 @@ class TestDecimalCasting(ImpalaTestSuite):
           .format(val, precision, scale)
       res = Decimal(self.execute_scalar(cast, vector.get_value('exec_option')))
       # TODO: Remove check for cast_from once string to decimal is supported in decimal_v2.
-      if is_decimal_v2 and cast_from != 'string':
+      if is_decimal_v2:
         expected_val = val.quantize(Decimal('0e-%s' % scale), rounding=ROUND_HALF_UP)
       else:
         expected_val = val.quantize(Decimal('0e-%s' % scale), rounding=ROUND_DOWN)