You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2017/12/22 15:47:08 UTC
[5/5] impala git commit: IMPALA-5014: Part 1: Round when casting
string to decimal
IMPALA-5014: Part 1: Round when casting string to decimal
In this patch we implement rounding when casting string to decimal if
DECIMAL_V2 is enabled. The backend method that parses strings and
converts them to decimals is refactored to make it easier to understand.
Testing:
- Added some BE tests.
Change-Id: Icd8b92727fb384e6ff2d145e4aab7ae5d27db26d
Reviewed-on: http://gerrit.cloudera.org:8080/8774
Reviewed-by: Taras Bobrovytsky <tb...@cloudera.com>
Tested-by: Impala Public Jenkins
Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/a16fe803
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/a16fe803
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/a16fe803
Branch: refs/heads/master
Commit: a16fe803caac16d4ba64aeaa4fc750388aeca203
Parents: 4ce23f7
Author: Taras Bobrovytsky <tb...@cloudera.com>
Authored: Tue Nov 7 20:03:11 2017 -0800
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Fri Dec 22 11:39:08 2017 +0000
----------------------------------------------------------------------
be/src/benchmarks/atod-benchmark.cc | 2 +-
be/src/benchmarks/overflow-benchmark.cc | 2 +-
be/src/exec/hdfs-scanner-ir.cc | 12 +--
be/src/exec/text-converter.inline.h | 6 +-
be/src/exprs/decimal-operators-ir.cc | 10 +-
be/src/exprs/expr-test.cc | 6 +-
be/src/runtime/decimal-test.cc | 115 +++++++++++++++++++++--
be/src/runtime/decimal-value.inline.h | 31 +------
be/src/util/decimal-util.h | 25 +++++
be/src/util/string-parser.cc | 12 +--
be/src/util/string-parser.h | 126 +++++++++++++++++---------
tests/query_test/test_decimal_casting.py | 2 +-
12 files changed, 245 insertions(+), 104 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/impala/blob/a16fe803/be/src/benchmarks/atod-benchmark.cc
----------------------------------------------------------------------
diff --git a/be/src/benchmarks/atod-benchmark.cc b/be/src/benchmarks/atod-benchmark.cc
index 9beb942..1e86844 100644
--- a/be/src/benchmarks/atod-benchmark.cc
+++ b/be/src/benchmarks/atod-benchmark.cc
@@ -83,7 +83,7 @@ void TestImpala(int batch_size, void* d) {
const StringValue& str = data->data[j];
StringParser::ParseResult dummy;
val = StringParser::StringToDecimal<Storage>(
- str.ptr, str.len, column_type, &dummy);
+ str.ptr, str.len, column_type, false, &dummy);
data->result[j] = val;
}
}
http://git-wip-us.apache.org/repos/asf/impala/blob/a16fe803/be/src/benchmarks/overflow-benchmark.cc
----------------------------------------------------------------------
diff --git a/be/src/benchmarks/overflow-benchmark.cc b/be/src/benchmarks/overflow-benchmark.cc
index 1d4caf6..d59ab9d 100644
--- a/be/src/benchmarks/overflow-benchmark.cc
+++ b/be/src/benchmarks/overflow-benchmark.cc
@@ -84,7 +84,7 @@ void AddTestData(TestData* data, int n) {
string str = ss.str();
StringParser::ParseResult dummy;
val = StringParser::StringToDecimal<int128_t>(
- str.c_str(), str.length(), column_type, &dummy);
+ str.c_str(), str.length(), column_type, false, &dummy);
data->values.push_back(val);
data->int128_values.push_back(numeric_limits<int128_t>::max() * Rand());
}
http://git-wip-us.apache.org/repos/asf/impala/blob/a16fe803/be/src/exec/hdfs-scanner-ir.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/hdfs-scanner-ir.cc b/be/src/exec/hdfs-scanner-ir.cc
index d7c2d81..ec1d2a3 100644
--- a/be/src/exec/hdfs-scanner-ir.cc
+++ b/be/src/exec/hdfs-scanner-ir.cc
@@ -90,9 +90,9 @@ ScalarExprEvaluator* HdfsScanner::GetConjunctEval(int idx) const {
void StringToDecimalSymbolDummy() {
// Force linker to to link the object file containing these functions.
- StringToDecimal4(nullptr, 0, 0, 0, nullptr);
- StringToDecimal8(nullptr, 0, 0, 0, nullptr);
- StringToDecimal16(nullptr, 0, 0, 0, nullptr);
+ StringToDecimal4(nullptr, 0, 0, 0, false, nullptr);
+ StringToDecimal8(nullptr, 0, 0, 0, false, nullptr);
+ StringToDecimal16(nullptr, 0, 0, 0, false, nullptr);
}
// Define the string parsing functions for llvm. Stamp out the templated functions
@@ -142,7 +142,7 @@ void IrStringToTimestamp(TimestampValue* out, const char* s, int len,
extern "C"
Decimal4Value IrStringToDecimal4(const char* s, int len, int type_precision,
int type_scale, ParseResult* result) {
- auto ret = StringToDecimal4(s, len, type_precision, type_scale, result);
+ auto ret = StringToDecimal4(s, len, type_precision, type_scale, false, result);
if (*result != ParseResult::PARSE_SUCCESS) *result = ParseResult::PARSE_FAILURE;
return ret;
}
@@ -150,7 +150,7 @@ Decimal4Value IrStringToDecimal4(const char* s, int len, int type_precision,
extern "C"
Decimal8Value IrStringToDecimal8(const char* s, int len, int type_precision,
int type_scale, ParseResult* result) {
- auto ret = StringToDecimal8(s, len, type_precision, type_scale, result);
+ auto ret = StringToDecimal8(s, len, type_precision, type_scale, false, result);
if (*result != ParseResult::PARSE_SUCCESS) *result = ParseResult::PARSE_FAILURE;
return ret;
}
@@ -158,7 +158,7 @@ Decimal8Value IrStringToDecimal8(const char* s, int len, int type_precision,
extern "C"
Decimal16Value IrStringToDecimal16(const char* s, int len, int type_precision,
int type_scale, ParseResult* result) {
- auto ret = StringToDecimal16(s, len, type_precision, type_scale, result);
+ auto ret = StringToDecimal16(s, len, type_precision, type_scale, false, result);
if (*result != ParseResult::PARSE_SUCCESS) *result = ParseResult::PARSE_FAILURE;
return ret;
}
http://git-wip-us.apache.org/repos/asf/impala/blob/a16fe803/be/src/exec/text-converter.inline.h
----------------------------------------------------------------------
diff --git a/be/src/exec/text-converter.inline.h b/be/src/exec/text-converter.inline.h
index 49f4e8a..c56bcce 100644
--- a/be/src/exec/text-converter.inline.h
+++ b/be/src/exec/text-converter.inline.h
@@ -141,12 +141,12 @@ inline bool TextConverter::WriteSlot(const SlotDescriptor* slot_desc, Tuple* tup
case 4:
*reinterpret_cast<Decimal4Value*>(slot) =
StringParser::StringToDecimal<int32_t>(
- data, len, slot_desc->type(), &parse_result);
+ data, len, slot_desc->type(), false, &parse_result);
break;
case 8:
*reinterpret_cast<Decimal8Value*>(slot) =
StringParser::StringToDecimal<int64_t>(
- data, len, slot_desc->type(), &parse_result);
+ data, len, slot_desc->type(), false, &parse_result);
break;
case 12:
DCHECK(false) << "Planner should not generate this.";
@@ -154,7 +154,7 @@ inline bool TextConverter::WriteSlot(const SlotDescriptor* slot_desc, Tuple* tup
case 16:
*reinterpret_cast<Decimal16Value*>(slot) =
StringParser::StringToDecimal<int128_t>(
- data, len, slot_desc->type(), &parse_result);
+ data, len, slot_desc->type(), false, &parse_result);
break;
default:
DCHECK(false) << "Decimal slots can't be this size.";
http://git-wip-us.apache.org/repos/asf/impala/blob/a16fe803/be/src/exprs/decimal-operators-ir.cc
----------------------------------------------------------------------
diff --git a/be/src/exprs/decimal-operators-ir.cc b/be/src/exprs/decimal-operators-ir.cc
index bb54a0f..8612561 100644
--- a/be/src/exprs/decimal-operators-ir.cc
+++ b/be/src/exprs/decimal-operators-ir.cc
@@ -534,22 +534,26 @@ IR_ALWAYS_INLINE DecimalVal DecimalOperators::CastToDecimalVal(
DecimalVal dv;
int precision = ctx->impl()->GetConstFnAttr(FunctionContextImpl::RETURN_TYPE_PRECISION);
int scale = ctx->impl()->GetConstFnAttr(FunctionContextImpl::RETURN_TYPE_SCALE);
+ bool is_decimal_v2 = ctx->impl()->GetConstFnAttr(FunctionContextImpl::DECIMAL_V2);
switch (ColumnType::GetDecimalByteSize(precision)) {
case 4: {
Decimal4Value dv4 = StringParser::StringToDecimal<int32_t>(
- reinterpret_cast<char*>(val.ptr), val.len, precision, scale, &result);
+ reinterpret_cast<char*>(val.ptr), val.len, precision, scale,
+ is_decimal_v2, &result);
dv = DecimalVal(dv4.value());
break;
}
case 8: {
Decimal8Value dv8 = StringParser::StringToDecimal<int64_t>(
- reinterpret_cast<char*>(val.ptr), val.len, precision, scale, &result);
+ reinterpret_cast<char*>(val.ptr), val.len, precision, scale,
+ is_decimal_v2, &result);
dv = DecimalVal(dv8.value());
break;
}
case 16: {
Decimal16Value dv16 = StringParser::StringToDecimal<int128_t>(
- reinterpret_cast<char*>(val.ptr), val.len, precision, scale, &result);
+ reinterpret_cast<char*>(val.ptr), val.len, precision, scale,
+ is_decimal_v2, &result);
dv = DecimalVal(dv16.value());
break;
}
http://git-wip-us.apache.org/repos/asf/impala/blob/a16fe803/be/src/exprs/expr-test.cc
----------------------------------------------------------------------
diff --git a/be/src/exprs/expr-test.cc b/be/src/exprs/expr-test.cc
index 35599c0..88d957d 100644
--- a/be/src/exprs/expr-test.cc
+++ b/be/src/exprs/expr-test.cc
@@ -494,15 +494,15 @@ class ExprTest : public testing::Test {
switch (expected_type.GetByteSize()) {
case 4:
EXPECT_EQ(expected_result.value(), StringParser::StringToDecimal<int32_t>(
- value.data(), value.size(), expected_type, &result).value()) << query;
+ value.data(), value.size(), expected_type, false, &result).value()) << query;
break;
case 8:
EXPECT_EQ(expected_result.value(), StringParser::StringToDecimal<int64_t>(
- value.data(), value.size(), expected_type, &result).value()) << query;
+ value.data(), value.size(), expected_type, false, &result).value()) << query;
break;
case 16:
EXPECT_EQ(expected_result.value(), StringParser::StringToDecimal<int128_t>(
- value.data(), value.size(), expected_type, &result).value()) << query;
+ value.data(), value.size(), expected_type, false, &result).value()) << query;
break;
default:
EXPECT_TRUE(false) << expected_type << " " << expected_type.GetByteSize();
http://git-wip-us.apache.org/repos/asf/impala/blob/a16fe803/be/src/runtime/decimal-test.cc
----------------------------------------------------------------------
diff --git a/be/src/runtime/decimal-test.cc b/be/src/runtime/decimal-test.cc
index f84fdaf..25d2f65 100644
--- a/be/src/runtime/decimal-test.cc
+++ b/be/src/runtime/decimal-test.cc
@@ -45,11 +45,11 @@ void VerifyEquals(const DecimalValue<T>& t1, const DecimalValue<T>& t2) {
}
template <typename T>
-void VerifyParse(const string& s, int precision, int scale,
+void VerifyParse(const string& s, int precision, int scale, bool round,
const DecimalValue<T>& expected_val, StringParser::ParseResult expected_result) {
StringParser::ParseResult parse_result;
DecimalValue<T> val = StringParser::StringToDecimal<T>(
- s.c_str(), s.size(), precision, scale, &parse_result);
+ s.c_str(), s.size(), precision, scale, round, &parse_result);
EXPECT_EQ(expected_result, parse_result) << "Failed test string: " << s;
if (expected_result == StringParser::PARSE_SUCCESS ||
expected_result == StringParser::PARSE_UNDERFLOW) {
@@ -57,6 +57,22 @@ void VerifyParse(const string& s, int precision, int scale,
}
}
+template <typename T>
+void VerifyParse(const string& s, int precision, int scale,
+ const DecimalValue<T>& expected_val, StringParser::ParseResult expected_result) {
+ VerifyParse(s, precision, scale, false, expected_val, expected_result);
+ VerifyParse(s, precision, scale, true, expected_val, expected_result);
+}
+
+template <typename T>
+void VerifyParse(const string& s, int precision, int scale,
+ const DecimalValue<T>& expected_val_v1, StringParser::ParseResult expected_result_v1,
+ const DecimalValue<T>& expected_val_v2, StringParser::ParseResult expected_result_v2)
+{
+ VerifyParse(s, precision, scale, false, expected_val_v1, expected_result_v1);
+ VerifyParse(s, precision, scale, true, expected_val_v2, expected_result_v2);
+}
+
template<typename T>
void VerifyToString(const T& decimal, int precision, int scale, const string& expected) {
EXPECT_EQ(decimal.ToString(precision, scale), expected);
@@ -69,6 +85,17 @@ void StringToAllDecimals(const string& s, int precision, int scale, int32_t val,
VerifyParse(s, precision, scale, Decimal16Value(val), result);
}
+void StringToAllDecimals(const string& s, int precision, int scale,
+ int32_t val_v1, StringParser::ParseResult result_v1,
+ int32_t val_v2, StringParser::ParseResult result_v2) {
+ VerifyParse(s, precision, scale,
+ Decimal4Value(val_v1), result_v1, Decimal4Value(val_v2), result_v2);
+ VerifyParse(s, precision, scale,
+ Decimal8Value(val_v1), result_v1, Decimal8Value(val_v2), result_v2);
+ VerifyParse(s, precision, scale,
+ Decimal16Value(val_v1), result_v1, Decimal16Value(val_v2), result_v2);
+}
+
TEST(IntToDecimal, Basic) {
Decimal16Value d16;
bool overflow = false;
@@ -271,13 +298,27 @@ TEST(StringToDecimal, Basic) {
StringToAllDecimals(".1", 10, 2, 10, StringParser::PARSE_SUCCESS);
StringToAllDecimals("00012.3", 10, 2, 1230, StringParser::PARSE_SUCCESS);
StringToAllDecimals("-00012.3", 10, 2, -1230, StringParser::PARSE_SUCCESS);
+ StringToAllDecimals("0.00000", 6, 5, 0, StringParser::PARSE_SUCCESS);
+ StringToAllDecimals("1.00000", 6, 5, 100000, StringParser::PARSE_SUCCESS);
+ StringToAllDecimals("0.000000", 6, 5, 0, StringParser::PARSE_UNDERFLOW);
+ StringToAllDecimals("1.000000", 6, 5, 100000, StringParser::PARSE_UNDERFLOW);
+ StringToAllDecimals("1.000004", 6, 5, 100000, StringParser::PARSE_UNDERFLOW);
+ StringToAllDecimals("1.000005", 6, 5,
+ 100000, StringParser::PARSE_UNDERFLOW,
+ 100001, StringParser::PARSE_UNDERFLOW);
+ StringToAllDecimals("0.4", 5, 0, 0, StringParser::PARSE_UNDERFLOW);
+ StringToAllDecimals("0.5", 5, 0,
+ 0, StringParser::PARSE_UNDERFLOW,
+ 1, StringParser::PARSE_UNDERFLOW);
StringToAllDecimals("123.45", 10, 2, 12345, StringParser::PARSE_SUCCESS);
StringToAllDecimals(".45", 10, 2, 45, StringParser::PARSE_SUCCESS);
StringToAllDecimals("-.45", 10, 2, -45, StringParser::PARSE_SUCCESS);
StringToAllDecimals(" 123.4 ", 10, 5, 12340000, StringParser::PARSE_SUCCESS);
StringToAllDecimals("-123.45", 10, 5, -12345000, StringParser::PARSE_SUCCESS);
- StringToAllDecimals("-123.456", 10, 2, -12345, StringParser::PARSE_UNDERFLOW);
+ StringToAllDecimals("-123.456", 10, 2,
+ -12345, StringParser::PARSE_UNDERFLOW,
+ -12346, StringParser::PARSE_UNDERFLOW);
StringToAllDecimals("e", 2, 0, 0, StringParser::PARSE_FAILURE);
StringToAllDecimals("E", 2, 0, 0, StringParser::PARSE_FAILURE);
@@ -347,7 +388,8 @@ TEST(StringToDecimal, LargeDecimals) {
VerifyParse("123456.78", 8, 3,
Decimal8Value(12345678L), StringParser::PARSE_OVERFLOW);
VerifyParse("1234.5678", 8, 3,
- Decimal8Value(1234567L), StringParser::PARSE_UNDERFLOW);
+ Decimal8Value(1234567L), StringParser::PARSE_UNDERFLOW,
+ Decimal8Value(1234568L), StringParser::PARSE_UNDERFLOW);
VerifyParse("12345.678", 8, 3,
Decimal16Value(12345678L), StringParser::PARSE_SUCCESS);
VerifyParse("-12345.678", 8, 3,
@@ -355,7 +397,8 @@ TEST(StringToDecimal, LargeDecimals) {
VerifyParse("123456.78", 8, 3,
Decimal16Value(12345678L), StringParser::PARSE_OVERFLOW);
VerifyParse("1234.5678", 8, 3,
- Decimal16Value(1234567L), StringParser::PARSE_UNDERFLOW);
+ Decimal16Value(1234567L), StringParser::PARSE_UNDERFLOW,
+ Decimal16Value(1234568L), StringParser::PARSE_UNDERFLOW);
// Test max unscaled value for each of the decimal types.
VerifyParse("999999999", 9, 0,
@@ -411,16 +454,68 @@ TEST(StringToDecimal, LargeDecimals) {
38, 38, Decimal16Value(-result), StringParser::PARSE_SUCCESS);
VerifyParse("-.99999999999999999999999999999999999999e1",
38, 38, Decimal16Value(-result), StringParser::PARSE_OVERFLOW);
- VerifyParse("-.999999999999999999999999999999999999990e-1",
- 38, 38, Decimal16Value(-result / 10), StringParser::PARSE_UNDERFLOW);
- VerifyParse("-.999999999999999999999999999999999999990000000000000000e-20",
- 38, 38,
+ VerifyParse("-.999999999999999999999999999999999999990e-1", 38, 38,
+ Decimal16Value(-result / 10), StringParser::PARSE_UNDERFLOW,
+ Decimal16Value(-result / 10 - 1), StringParser::PARSE_UNDERFLOW);
+ VerifyParse("-.999999999999999999999999999999999999990000000000000000e-20", 38, 38,
Decimal16Value(-result / DecimalUtil::GetScaleMultiplier<int128_t>(20)),
+ StringParser::PARSE_UNDERFLOW,
+ Decimal16Value(-result / DecimalUtil::GetScaleMultiplier<int128_t>(20) - 1),
StringParser::PARSE_UNDERFLOW);
VerifyParse("100000000000000000000000000000000000000",
38, 0, Decimal16Value(0), StringParser::PARSE_OVERFLOW);
VerifyParse("-100000000000000000000000000000000000000",
38, 0, Decimal16Value(0), StringParser::PARSE_OVERFLOW);
+
+ // Rounding tests.
+ VerifyParse("555.554", 5, 2, Decimal4Value(55555), StringParser::PARSE_UNDERFLOW);
+ VerifyParse("555.555", 5, 2,
+ Decimal4Value(55555), StringParser::PARSE_UNDERFLOW,
+ Decimal4Value(55556), StringParser::PARSE_UNDERFLOW);
+ // Too many digits to the left of the dot so we overflow.
+ VerifyParse("555.555e1", 5, 2, Decimal4Value(0), StringParser::PARSE_OVERFLOW);
+ VerifyParse("-555.555e1", 5, 2, Decimal4Value(0), StringParser::PARSE_OVERFLOW);
+ VerifyParse("5555.555", 5, 2, Decimal16Value(0), StringParser::PARSE_OVERFLOW);
+ VerifyParse("-555.555", 5, 2,
+ Decimal4Value(-55555), StringParser::PARSE_UNDERFLOW,
+ Decimal4Value(-55556), StringParser::PARSE_UNDERFLOW);
+ VerifyParse("-5555.555", 5, 2, Decimal16Value(0), StringParser::PARSE_OVERFLOW);
+ VerifyParse("5555.555e-1", 5, 2,
+ Decimal4Value(55555), StringParser::PARSE_UNDERFLOW,
+ Decimal4Value(55556), StringParser::PARSE_UNDERFLOW);
+ VerifyParse("55555.555e-1", 5, 2, Decimal16Value(0), StringParser::PARSE_OVERFLOW);
+ // Too many digits to the right of the dot and not enough to the left. Rounding via
+ // ScaleDownAndRound().
+ VerifyParse("5.55444", 5, 2, Decimal4Value(555), StringParser::PARSE_UNDERFLOW);
+ VerifyParse("5.55555", 5, 2,
+ Decimal4Value(555), StringParser::PARSE_UNDERFLOW,
+ Decimal4Value(556), StringParser::PARSE_UNDERFLOW);
+ VerifyParse("5.555e-9", 5, 2, Decimal4Value(0), StringParser::PARSE_UNDERFLOW);
+ // The number of digits to the left of the dot equals to precision - scale. Rounding
+ // by adding 1 if the first truncated digit is greater or equal to 5.
+ VerifyParse("555.554", 5, 2, Decimal4Value(55555), StringParser::PARSE_UNDERFLOW);
+ VerifyParse("555.555", 5, 2,
+ Decimal4Value(55555), StringParser::PARSE_UNDERFLOW,
+ Decimal4Value(55556), StringParser::PARSE_UNDERFLOW);
+ VerifyParse("5.55554e2", 5, 2, Decimal4Value(55555), StringParser::PARSE_UNDERFLOW);
+ VerifyParse("5.55555e2", 5, 2,
+ Decimal4Value(55555), StringParser::PARSE_UNDERFLOW,
+ Decimal4Value(55556), StringParser::PARSE_UNDERFLOW);
+ VerifyParse("55555.4e-2", 5, 2, Decimal4Value(55555), StringParser::PARSE_UNDERFLOW);
+ VerifyParse("55555.5e-2", 5, 2,
+ Decimal4Value(55555), StringParser::PARSE_UNDERFLOW,
+ Decimal4Value(55556), StringParser::PARSE_UNDERFLOW);
+ // Rounding causes overflow.
+ VerifyParse("999.994", 5, 2, Decimal4Value(99999), StringParser::PARSE_UNDERFLOW);
+ VerifyParse("999.995", 5, 2,
+ Decimal4Value(99999), StringParser::PARSE_UNDERFLOW,
+ Decimal4Value(0), StringParser::PARSE_OVERFLOW);
+ VerifyParse("9.99995e2", 5, 2,
+ Decimal4Value(99999), StringParser::PARSE_UNDERFLOW,
+ Decimal4Value(0), StringParser::PARSE_OVERFLOW);
+ VerifyParse("99999.5e-2", 5, 2,
+ Decimal4Value(99999), StringParser::PARSE_UNDERFLOW,
+ Decimal4Value(0), StringParser::PARSE_OVERFLOW);
}
TEST(DecimalTest, Overflow) {
@@ -780,7 +875,7 @@ TEST(DecimalArithmetic, DivideLargeScales) {
StringParser::ParseResult result;
const char* data = "319391280635.61476055";
Decimal16Value x =
- StringParser::StringToDecimal<int128_t>(data, strlen(data), t1, &result);
+ StringParser::StringToDecimal<int128_t>(data, strlen(data), t1, false, &result);
Decimal16Value y(10000);
bool is_nan = false;
bool is_overflow = false;
http://git-wip-us.apache.org/repos/asf/impala/blob/a16fe803/be/src/runtime/decimal-value.inline.h
----------------------------------------------------------------------
diff --git a/be/src/runtime/decimal-value.inline.h b/be/src/runtime/decimal-value.inline.h
index 8f76335..2097ac8 100644
--- a/be/src/runtime/decimal-value.inline.h
+++ b/be/src/runtime/decimal-value.inline.h
@@ -140,27 +140,6 @@ inline DecimalValue<T> DecimalValue<T>::ScaleTo(int src_scale, int dst_scale,
namespace detail {
-// Helper function to scale down over multiplied values back into result type,
-// truncating if round is false or rounding otherwise.
-template<typename T, typename RESULT_T>
-inline RESULT_T ScaleDownAndRound(RESULT_T value, int delta_scale, bool round) {
- DCHECK_GT(delta_scale, 0);
- // Multiplier can always be computed in potentially smaller type T
- T multiplier = DecimalUtil::GetScaleMultiplier<T>(delta_scale);
- DCHECK(multiplier > 1 && multiplier % 2 == 0);
- RESULT_T result = value / multiplier;
- if (round) {
- RESULT_T remainder = value % multiplier;
- // In general, shifting down the multiplier is not safe, but we know
- // here that it is a multiple of two.
- if (abs(remainder) >= (multiplier >> 1)) {
- // Bias at zero must be corrected by sign of dividend.
- result += BitUtil::Sign(value);
- }
- }
- return result;
-}
-
// If we have a number with 'num_lz' leading zeros, and we scale it up by 10^scale_diff,
// this function returns the minimum number of leading zeros the result would have.
inline int MinLeadingZerosAfterScaling(int num_lz, int scale_diff) {
@@ -232,7 +211,7 @@ inline int128_t AddLarge(int128_t x, int x_scale, int128_t y, int y_scale,
right = x_right + y_right;
}
if (result_scale_decrease > 0) {
- right = detail::ScaleDownAndRound<int128_t, int128_t>(
+ right = DecimalUtil::ScaleDownAndRound<int128_t>(
right, result_scale_decrease, round);
}
DCHECK(right >= 0);
@@ -288,7 +267,7 @@ inline int128_t SubtractLarge(int128_t x, int x_scale, int128_t y, int y_scale,
if (result_scale_decrease > 0) {
// At this point, the scale of the fractional part is either x_scale or y_scale,
// whichever is greater. We scale down the fractional part to result_scale here.
- right = detail::ScaleDownAndRound<int128_t, int128_t>(
+ right = DecimalUtil::ScaleDownAndRound<int128_t>(
right, result_scale_decrease, round);
}
@@ -351,7 +330,7 @@ inline DecimalValue<RESULT_T> DecimalValue<T>::Add(int this_scale,
if (result_scale_decrease > 0) {
// After first adjusting x and y to the same scale and adding them together, we now
// need scale down the result to result_scale.
- x = detail::ScaleDownAndRound<T, RESULT_T>(x, result_scale_decrease, round);
+ x = DecimalUtil::ScaleDownAndRound<RESULT_T>(x, result_scale_decrease, round);
}
return DecimalValue<RESULT_T>(x);
}
@@ -418,7 +397,7 @@ DecimalValue<RESULT_T> DecimalValue<T>::Multiply(int this_scale,
DCHECK(*overflow);
} else {
int256_t intermediate_result = ConvertToInt256(x) * ConvertToInt256(y);
- intermediate_result = detail::ScaleDownAndRound<int256_t, int256_t>(
+ intermediate_result = DecimalUtil::ScaleDownAndRound<int256_t>(
intermediate_result, delta_scale, round);
result = ConvertToInt128(
intermediate_result, DecimalUtil::MAX_UNSCALED_DECIMAL16, overflow);
@@ -436,7 +415,7 @@ DecimalValue<RESULT_T> DecimalValue<T>::Multiply(int this_scale,
result = x * y;
// The largest value that result can have here is (2^64 - 1) * (2^63 - 1), which is
// greater than MAX_UNSCALED_DECIMAL16.
- result = detail::ScaleDownAndRound<T, RESULT_T>(result, delta_scale, round);
+ result = DecimalUtil::ScaleDownAndRound<RESULT_T>(result, delta_scale, round);
// Since delta_scale is greater than zero, result can now be at most
// ((2^64 - 1) * (2^63 - 1)) / 10, which is less than MAX_UNSCALED_DECIMAL16, so
// there is no need to check for overflow.
http://git-wip-us.apache.org/repos/asf/impala/blob/a16fe803/be/src/util/decimal-util.h
----------------------------------------------------------------------
diff --git a/be/src/util/decimal-util.h b/be/src/util/decimal-util.h
index 288caf3..c87b2c5 100644
--- a/be/src/util/decimal-util.h
+++ b/be/src/util/decimal-util.h
@@ -69,6 +69,31 @@ class DecimalUtil {
return result;
}
+ /// Helper function to scale down values by 10^delta_scale, truncating if
+ /// round is false or rounding otherwise.
+ template<typename T>
+ static inline T ScaleDownAndRound(T value, int delta_scale, bool round) {
+ DCHECK_GT(delta_scale, 0);
+ T divisor = DecimalUtil::GetScaleMultiplier<T>(delta_scale);
+ if (divisor > 0) {
+ DCHECK(divisor % 2 == 0);
+ T result = value / divisor;
+ if (round) {
+ T remainder = value % divisor;
+ // In general, shifting down the multiplier is not safe, but we know
+ // here that it is a multiple of two.
+ if (abs(remainder) >= (divisor >> 1)) {
+ // Bias at zero must be corrected by sign of dividend.
+ result += BitUtil::Sign(value);
+ }
+ }
+ return result;
+ } else {
+ DCHECK(divisor == -1);
+ return 0;
+ }
+ }
+
/// Write decimals as big endian (byte comparable) in fixed_len_size bytes.
template<typename T>
static inline void EncodeToFixedLenByteArray(
http://git-wip-us.apache.org/repos/asf/impala/blob/a16fe803/be/src/util/string-parser.cc
----------------------------------------------------------------------
diff --git a/be/src/util/string-parser.cc b/be/src/util/string-parser.cc
index f59c6b88..1f03de1 100644
--- a/be/src/util/string-parser.cc
+++ b/be/src/util/string-parser.cc
@@ -21,20 +21,20 @@ namespace impala {
using ParseResult = StringParser::ParseResult;
Decimal4Value StringToDecimal4(const char* s, int len, int type_precision,
- int type_scale, StringParser::ParseResult* result) {
+ int type_scale, bool round, StringParser::ParseResult* result) {
return StringParser::StringToDecimal<int32_t>(s, len, type_precision,
- type_scale, result);
+ type_scale, round, result);
}
Decimal8Value StringToDecimal8(const char* s, int len, int type_precision,
- int type_scale, StringParser::ParseResult* result) {
+ int type_scale, bool round, StringParser::ParseResult* result) {
return StringParser::StringToDecimal<int64_t>(s, len, type_precision,
- type_scale, result);
+ type_scale, round, result);
}
Decimal16Value StringToDecimal16(const char* s, int len, int type_precision,
- int type_scale, StringParser::ParseResult* result) {
+ int type_scale, bool round, StringParser::ParseResult* result) {
return StringParser::StringToDecimal<int128_t>(s, len, type_precision,
- type_scale, result);
+ type_scale, round, result);
}
}
http://git-wip-us.apache.org/repos/asf/impala/blob/a16fe803/be/src/util/string-parser.h
----------------------------------------------------------------------
diff --git a/be/src/util/string-parser.h b/be/src/util/string-parser.h
index 10c3fc7..606e5b8 100644
--- a/be/src/util/string-parser.h
+++ b/be/src/util/string-parser.h
@@ -112,13 +112,13 @@ class StringParser {
/// On underflow, the truncated value is returned.
template <typename T>
static inline DecimalValue<T> StringToDecimal(const char* s, int len,
- const ColumnType& type, StringParser::ParseResult* result) {
- return StringToDecimal<T>(s, len, type.precision, type.scale, result);
+ const ColumnType& type, bool round, StringParser::ParseResult* result) {
+ return StringToDecimal<T>(s, len, type.precision, type.scale, round, result);
}
template <typename T>
static inline DecimalValue<T> StringToDecimal(const char* s, int len,
- int type_precision, int type_scale, StringParser::ParseResult* result) {
+ int type_precision, int type_scale, bool round, StringParser::ParseResult* result) {
// Special cases:
// 1) '' == Fail, an empty string fails to parse.
// 2) ' # ' == #, leading and trailing white space is ignored.
@@ -156,7 +156,7 @@ class StringParser {
// Ignore leading zeros even after a dot. This allows for differentiating between
// cases like 0.01e2, which would fit in a DECIMAL(1, 0), and 0.10e2, which would
// overflow.
- int scale = 0;
+ int digits_after_dot_count = 0;
int found_dot = 0;
if (len > 0 && *s == '.') {
found_dot = 1;
@@ -164,30 +164,36 @@ class StringParser {
--len;
while (len > 0 && UNLIKELY(*s == '0')) {
found_value = true;
- ++scale;
+ ++digits_after_dot_count;
++s;
--len;
}
}
- int precision = 0;
+ int total_digits_count = 0;
bool found_exponent = false;
int8_t exponent = 0;
+ int first_truncated_digit = 0;
T value = 0;
for (int i = 0; i < len; ++i) {
- const char& c = s[i];
+ const char c = s[i];
if (LIKELY('0' <= c && c <= '9')) {
found_value = true;
// Ignore digits once the type's precision limit is reached. This avoids
// overflowing the underlying storage while handling a string like
// 10000000000e-10 into a DECIMAL(1, 0). Adjustments for ignored digits and
// an exponent will be made later.
- if (LIKELY(type_precision > precision)) {
- value = (value * 10) + (c - '0'); // Benchmarks are faster with parenthesis...
+ if (LIKELY(total_digits_count < type_precision)) {
+ // Benchmarks are faster with parenthesis.
+ T new_value = (value * 10) + (c - '0');
+ DCHECK(new_value >= value);
+ value = new_value;
+ } else if (UNLIKELY(round && total_digits_count == type_precision)) {
+ first_truncated_digit = c - '0';
}
- DCHECK(value >= 0); // For some reason DCHECK_GE doesn't work with int128_t.
- ++precision;
- scale += found_dot;
+ DCHECK(value >= 0); // DCHECK_GE does not work with int128_t
+ ++total_digits_count;
+ digits_after_dot_count += found_dot;
} else if (c == '.' && LIKELY(!found_dot)) {
found_dot = 1;
} else if ((c == 'e' || c == 'E') && LIKELY(!found_exponent)) {
@@ -207,48 +213,56 @@ class StringParser {
}
// Find the number of truncated digits before adjusting the precision for an exponent.
- int truncated_digit_count = precision - type_precision;
- if (exponent > scale) {
- // Ex: 0.1e3 (which at this point would have precision == 1 and scale == 1), the
- // scale must be set to 0 and the value set to 100 which means a precision of 3.
- precision += exponent - scale;
- value *= DecimalUtil::GetScaleMultiplier<T>(exponent - scale);
- scale = 0;
- } else {
- // Ex: 100e-4, the scale must be set to 4 but no adjustment to the value is needed,
- // the precision must also be set to 4 but that will be done below for the
- // non-exponent case anyways.
- scale -= exponent;
- }
- // Ex: 0.001, at this point would have precision 1 and scale 3 since leading zeros
- // were ignored during previous parsing.
- if (scale > precision) precision = scale;
+ int truncated_digit_count = std::max(total_digits_count - type_precision, 0);
+ // 'scale' and 'precision' refer to the scale and precision of the number that
+ // is contained the string that we are parsing. The scale of 'value' may be
+ // different because some digits may have been truncated.
+ int scale, precision;
+ ApplyExponent(total_digits_count, digits_after_dot_count,
+ exponent, &value, &precision, &scale);
// Microbenchmarks show that beyond this point, returning on parse failure is slower
// than just letting the function run out.
*result = StringParser::PARSE_SUCCESS;
if (UNLIKELY(precision - scale > type_precision - type_scale)) {
+ // The number in the string has too many digits to the left of the dot,
+ // so we overflow.
*result = StringParser::PARSE_OVERFLOW;
} else if (UNLIKELY(scale > type_scale)) {
+ // There are too many digits to the right of the dot in the string we are parsing.
*result = StringParser::PARSE_UNDERFLOW;
- int shift = scale - type_scale;
- if (UNLIKELY(truncated_digit_count > 0)) shift -= truncated_digit_count;
+ // The scale of 'value'.
+ int value_scale = scale - truncated_digit_count;
+ int shift = value_scale - type_scale;
if (shift > 0) {
- T divisor = DecimalUtil::GetScaleMultiplier<T>(shift);
- if (LIKELY(divisor >= 0)) {
- value /= divisor;
- } else {
- DCHECK(divisor == -1); // DCHECK_EQ doesn't work with int128_t.
- value = 0;
+ // There are less than maximum number of digits to the left of the dot.
+ value = DecimalUtil::ScaleDownAndRound<T>(value, shift, round);
+ DCHECK(value >= 0);
+ DCHECK(value < DecimalUtil::GetScaleMultiplier<int128_t>(type_precision));
+ } else {
+ // There are a maximum number of digits to the left of the dot. We round by
+ // looking at the first truncated digit.
+ DCHECK_EQ(shift, 0);
+ DCHECK(0 <= first_truncated_digit && first_truncated_digit <= 9);
+ DCHECK(first_truncated_digit == 0 || truncated_digit_count != 0);
+ DCHECK(first_truncated_digit == 0 || round);
+ // Apply the rounding.
+ value += (first_truncated_digit >= 5);
+ DCHECK(value >= 0);
+ DCHECK(value <= DecimalUtil::GetScaleMultiplier<int128_t>(type_precision));
+ if (UNLIKELY(value == DecimalUtil::GetScaleMultiplier<T>(type_precision))) {
+ // Overflow due to rounding.
+ *result = StringParser::PARSE_OVERFLOW;
}
}
- DCHECK(value >= 0); // DCHECK_GE doesn't work with int128_t.
} else if (UNLIKELY(!found_value && !found_dot)) {
*result = StringParser::PARSE_FAILURE;
- }
-
- if (type_scale > scale) {
+ } else if (type_scale > scale) {
+ // There were not enough digits after the dot, so we have scale up the value.
+ DCHECK_EQ(truncated_digit_count, 0);
value *= DecimalUtil::GetScaleMultiplier<T>(type_scale - scale);
+ // Overflow should be impossible.
+ DCHECK(value < DecimalUtil::GetScaleMultiplier<int128_t>(type_precision));
}
return DecimalValue<T>(is_negative ? -value : value);
@@ -372,6 +386,30 @@ class StringParser {
return static_cast<T>(negative ? -val : val);
}
+ /// Helper function that applies the exponent to the value. Also computes and
+ /// sets the precision and scale.
+ template <typename T>
+ static inline void ApplyExponent(int total_digits_count,
+ int digits_after_dot_count,int8_t exponent, T* value, int* precision, int* scale) {
+ if (exponent > digits_after_dot_count) {
+ // Ex: 0.1e3 (which at this point would have precision == 1 and scale == 1), the
+ // scale must be set to 0 and the value set to 100 which means a precision of 3.
+ *value *= DecimalUtil::GetScaleMultiplier<T>(exponent - digits_after_dot_count);
+ *precision = total_digits_count + (exponent - digits_after_dot_count);
+ *scale = 0;
+ } else {
+ // Ex: 100e-4, the scale must be set to 4 but no adjustment to the value is needed,
+ // the precision must also be set to 4 but that will be done below for the
+ // non-exponent case anyways.
+ *precision = total_digits_count;
+ *scale = digits_after_dot_count - exponent;
+ // Ex: 0.001, at this point would have precision 1 and scale 3 since leading zeros
+ // were ignored during previous parsing.
+ if (*precision < *scale) *precision = *scale;
+ }
+ DCHECK_GE(*precision, *scale);
+ }
+
/// Checks if "inf" or "infinity" matches 's' in a case-insensitive manner. The match
/// has to start at the beginning of 's', leading whitespace is considered invalid.
/// Trailing whitespace characters are allowed.
@@ -580,7 +618,7 @@ class StringParser {
return val;
}
- static inline bool IsWhitespace(const char& c) {
+ static inline bool IsWhitespace(const char c) {
return c == ' ' || UNLIKELY(c == '\t' || c == '\n' || c == '\v' || c == '\f'
|| c == '\r');
}
@@ -600,13 +638,13 @@ inline int StringParser::StringParseTraits<int64_t>::max_ascii_len() { return 19
// These functions are too large to benefit from inlining.
Decimal4Value StringToDecimal4(const char* s, int len, int type_precision,
- int type_scale, StringParser::ParseResult* result);
+ int type_scale, bool round, StringParser::ParseResult* result);
Decimal8Value StringToDecimal8(const char* s, int len, int type_precision,
- int type_scale, StringParser::ParseResult* result);
+ int type_scale, bool round, StringParser::ParseResult* result);
Decimal16Value StringToDecimal16(const char* s, int len, int type_precision,
- int type_scale, StringParser::ParseResult* result);
+ int type_scale, bool round, StringParser::ParseResult* result);
}
#endif
http://git-wip-us.apache.org/repos/asf/impala/blob/a16fe803/tests/query_test/test_decimal_casting.py
----------------------------------------------------------------------
diff --git a/tests/query_test/test_decimal_casting.py b/tests/query_test/test_decimal_casting.py
index ce8037d..445fe6b 100644
--- a/tests/query_test/test_decimal_casting.py
+++ b/tests/query_test/test_decimal_casting.py
@@ -159,7 +159,7 @@ class TestDecimalCasting(ImpalaTestSuite):
.format(val, precision, scale)
res = Decimal(self.execute_scalar(cast, vector.get_value('exec_option')))
# TODO: Remove check for cast_from once string to decimal is supported in decimal_v2.
- if is_decimal_v2 and cast_from != 'string':
+ if is_decimal_v2:
expected_val = val.quantize(Decimal('0e-%s' % scale), rounding=ROUND_HALF_UP)
else:
expected_val = val.quantize(Decimal('0e-%s' % scale), rounding=ROUND_DOWN)