You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2017/11/21 22:46:29 UTC
[3/3] incubator-impala git commit: IMPALA-5019: Decimal V2 addition

IMPALA-5019: Decimal V2 addition

In this patch, we implement the new decimal return type rules for
addition expressions. These rules become active when the query option
DECIMAL_V2 is enabled. The algorithm for determining the type of the
result is described in the JIRA.

DECIMAL V1:
+----------------------------------------------------------------+
| typeof(cast(1 as decimal(38,0)) + cast(0.1 as decimal(38,38))) |
+----------------------------------------------------------------+
| DECIMAL(38,38)                                                 |
+----------------------------------------------------------------+

DECIMAL V2:
+----------------------------------------------------------------+
| typeof(cast(1 as decimal(38,0)) + cast(0.1 as decimal(38,38))) |
+----------------------------------------------------------------+
| DECIMAL(38,6)                                                  |
+----------------------------------------------------------------+

This patch required backend changes. We implement an algorithm where
we handle the whole and fractional parts separately, and then combine
them to get the final result. This is more complex and slower. We try
to avoid this by first checking if the result would fit into int128.

Testing:
- Added expr tests.
- Tested locally on my machine with a script that generates random
  decimal numbers and checks that Impala adds them correctly.

Performance:

For the common case, performance remains the same.
  select cast(2.2 as decimal(18, 1) + cast(2.2 as decimal(18, 1)

  BEFORE: 4.74s
  AFTER:  4.73s

In this case, we check if it is necessary to do the complex addition,
and it turns out to be not necessary. We see a slowdown because the
result needs to be scaled down by dividing.
  select cast(2.2 as decimal(38, 19) + cast(2.2 as decimal(38, 19)

  BEFORE: 1.63s
  AFTER:  13.57s

In following case, we take the most complex path and see the most
signification perfmance hit.
  select cast(7.5 as decimal(38,37)) + cast(2.2 as decimal(38,37))

  BEFORE: 1.63s
  AFTER: 20.57

Change-Id: I401049c56d910eb1546a178c909c923b01239336
Reviewed-on: http://gerrit.cloudera.org:8080/8309
Reviewed-by: Taras Bobrovytsky <tb...@cloudera.com>
Reviewed-by: Dan Hecht <dh...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/bc12a9eb
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/bc12a9eb
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/bc12a9eb

Branch: refs/heads/master
Commit: bc12a9eb35ff60d7a7e0f6732e9ab6a1d4538f2a
Parents: baa537f
Author: Taras Bobrovytsky <tb...@cloudera.com>
Authored: Tue Sep 19 16:23:24 2017 -0700
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Tue Nov 21 22:08:20 2017 +0000

----------------------------------------------------------------------
 be/src/exprs/expr-test.cc                       | 164 +++++++++++-
 be/src/runtime/decimal-value.h                  |   6 +-
 be/src/runtime/decimal-value.inline.h           | 259 +++++++++++++++----
 .../org/apache/impala/analysis/TypesUtil.java   |   9 +-
 .../impala/analysis/AnalyzeExprsTest.java       |  24 +-
 5 files changed, 386 insertions(+), 76 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/bc12a9eb/be/src/exprs/expr-test.cc
----------------------------------------------------------------------
diff --git a/be/src/exprs/expr-test.cc b/be/src/exprs/expr-test.cc
index d43f424..294dcc9 100644
--- a/be/src/exprs/expr-test.cc
+++ b/be/src/exprs/expr-test.cc
@@ -1564,12 +1564,164 @@ DecimalTestCase decimal_cases[] = {
   // Test add/subtract operators
   { "1.23 + cast(1 as decimal(4,3))", {{ false, false, 2230, 5, 3 }}},
   { "1.23 - cast(0.23 as decimal(10,3))", {{ false, false, 1000, 11, 3 }}},
-  { "cast(1.23 as decimal(8,2)) + cast(1 as decimal(20,3))",
-    {{ false, false, 2230, 21, 3 }}},
-  { "cast(1.23 as decimal(30,2)) - cast(1 as decimal(4,3))",
-    {{ false, false, 230, 32, 3 }}},
-  { "cast(1 as decimal(38,0)) + cast(.2 as decimal(38,1))",
-    {{ false, false, 12, 38, 1 }}},
+  { "cast(1.23 as decimal(8,2)) + cast(1 as decimal(20,3))", {{ false, false, 2230, 21, 3 }}},
+  { "cast(1.23 as decimal(30,2)) - cast(1 as decimal(4,3))", {{ false, false, 230, 32, 3 }}},
+  { "cast(1 as decimal(38,0)) + cast(0.2 as decimal(38,1))", {{ false, false, 12, 38, 1 }}},
+  { "cast(100000000000000000000000000000000 as decimal(38,0)) + cast(0 as decimal(38,38))",
+    {{ false, true, 0, 38, 38 },
+     { false, true, 0, 38, 6 }}},
+  { "cast(-100000000000000000000000000000000 as decimal(38,0)) + cast(0 as decimal(38,38))",
+    {{ false, true, 0, 38, 38 },
+     { false, true, 0, 38, 6 }}},
+  { "cast(99999999999999999999999999999999 as decimal(38,0)) + cast(0 as decimal(38,38))",
+    {{ false, true, 0, 38, 38 },
+     { false, false, StringToInt128("99999999999999999999999999999999000000"), 38, 6 }}},
+  { "cast(-99999999999999999999999999999999 as decimal(38,0)) + cast(0 as decimal(38,38))",
+    {{ false, true, 0, 38, 38 },
+     { false, false, StringToInt128("-99999999999999999999999999999999000000"), 38, 6 }}},
+  { "cast(99999999999999999999999999.9999999999 as decimal(36,10)) + "
+    "cast(99999999999999999999999999.9999999999 as decimal(36,10))",
+    {{ false, false, StringToInt128("1999999999999999999999999999999999998"), 37, 10 }}},
+  { "cast(99999999999999999999999999.9999999999 as decimal(36,10)) + "
+    "cast(-99999999999999999999999999.9999999999 as decimal(36,10))",
+    {{ false, false, 0, 37, 10 }}},
+  { "cast(999999999999999999999999999.9999999999 as decimal(37,10)) + "
+    "cast(999999999999999999999999999.9999999999 as decimal(37,10))",
+    {{ false, false, StringToInt128("19999999999999999999999999999999999998"), 38, 10 }}},
+  { "cast(999999999999999999999999999.9999999999 as decimal(37,10)) + "
+    "cast(-999999999999999999999999999.9999999999 as decimal(37,10))",
+    {{ false, false, 0, 38, 10 }}},
+  // Largest simple case where we don't call AddLarge() or AubtractLarge().
+  // We are adding (2^125 - 1) + (2^125 - 1) here.
+  { "cast(42535295865117307932921825928971026431 as decimal(38,0)) + "
+    "cast(42535295865117307932921825928971026431 as decimal(38,0))",
+    {{ false, false, StringToInt128("85070591730234615865843651857942052862"), 38, 0 }}},
+  { "cast(-42535295865117307932921825928971026431 as decimal(38,0)) + "
+    "cast(-42535295865117307932921825928971026431 as decimal(38,0))",
+    {{ false, false, StringToInt128("-85070591730234615865843651857942052862"), 38, 0 }}},
+  { "cast(42535295865117307932921825928971026431 as decimal(38,0)) + "
+    "cast(-42535295865117307932921825928971026431 as decimal(38,0))",
+    {{ false, false, 0, 38, 0 }}},
+  // Smallest case where we call AddLarge(). We are adding (2^125) + (2^125 - 1) here.
+  { "cast(42535295865117307932921825928971026432 as decimal(38,0)) + "
+    "cast(42535295865117307932921825928971026431 as decimal(38,0))",
+    {{ false, false, StringToInt128("85070591730234615865843651857942052863"), 38, 0 }}},
+  { "cast(-42535295865117307932921825928971026432 as decimal(38,0)) + "
+    "cast(-42535295865117307932921825928971026431 as decimal(38,0))",
+    {{ false, false, StringToInt128("-85070591730234615865843651857942052863"), 38, 0 }}},
+  { "cast(42535295865117307932921825928971026432 as decimal(38,0)) + "
+    "cast(-42535295865117307932921825928971026431 as decimal(38,0))",
+    {{ false, false, StringToInt128("1"), 38, 0 }}},
+  { "cast(99999999999999999999999999999999.999999 as decimal(38,6)) + "
+    "cast(8899999999999999999999999999999.999999 as decimal(38,6))",
+    {{ false, true, 0, 38, 6 }}},
+  { "cast(-99999999999999999999999999999999.999999 as decimal(38,6)) + "
+    "cast(-8899999999999999999999999999999.999999 as decimal(38,6))",
+    {{ false, true, 0, 38, 6 }}},
+  { "cast(-99999999999999999999999999999999.999999 as decimal(38,6)) + "
+    "cast(8899999999999999999999999999999.999999 as decimal(38,6))",
+    {{ false, false, StringToInt128("-91100000000000000000000000000000000000"), 38, 6 }}},
+  // Close to the maximum value.
+  { "cast(77777777777777777777777777777777.777777 as decimal(38,6)) + "
+    "cast(22222222222222222222222222222222.222222 as decimal(38,6))",
+    {{ false, false, StringToInt128("99999999999999999999999999999999999999"), 38, 6 }}},
+  { "cast(-77777777777777777777777777777777.777777 as decimal(38,6)) + "
+    "cast(22222222222222222222222222222222.222222 as decimal(38,6))",
+    {{ false, false, StringToInt128("-55555555555555555555555555555555555555"), 38, 6 }}},
+  { "cast(-77777777777777777777777777777777.777777 as decimal(38,6)) + "
+    "cast(-22222222222222222222222222222222.222222 as decimal(38,6))",
+    {{ false, false, StringToInt128("-99999999999999999999999999999999999999"), 38, 6 }}},
+  // Smallest result that overflows.
+  { "cast(77777777777777777777777777777777.777777 as decimal(38,6)) + "
+    "cast(22222222222222222222222222222222.222223 as decimal(38,6))",
+    {{ false, true, 0, 38, 6 }}},
+  { "cast(-77777777777777777777777777777777.777777 as decimal(38,6)) + "
+    "cast(-22222222222222222222222222222222.222223 as decimal(38,6))",
+    {{ false, true, 0, 38, 6 }}},
+  { "cast(11111111111111111111111111111111.777777 as decimal(38,6)) + "
+    "cast(11111111111111111111111111111111.555555 as decimal(38,6))",
+    {{ false, false, StringToInt128("22222222222222222222222222222223333332"), 38, 6 }}},
+  { "cast(-11111111111111111111111111111111.777777 as decimal(38,6)) + "
+    "cast(11111111111111111111111111111111.555555 as decimal(38,6))",
+    {{ false, false, StringToInt128("-222222"), 38, 6 }}},
+  { "cast(11111111111111111111111111111111.777777 as decimal(38,6)) + "
+    "cast(-11111111111111111111111111111111.555555 as decimal(38,6))",
+    {{ false, false, StringToInt128("222222"), 38, 6 }}},
+  { "cast(-11111111111111111111111111111111.777777 as decimal(38,6)) + "
+    "cast(-11111111111111111111111111111111.555555 as decimal(38,6))",
+    {{ false, false, StringToInt128("-22222222222222222222222222222223333332"), 38, 6 }}},
+  { "cast(3333333333333333333333333333333.8888884 as decimal(38,7)) + "
+    "cast(3333333333333333333333333333333.1111111 as decimal(38,7))",
+    {{ false, false, StringToInt128("66666666666666666666666666666669999995"), 38, 7 },
+     { false, false, StringToInt128("6666666666666666666666666666667000000"), 38, 6 }}},
+  { "cast(-3333333333333333333333333333333.8888884 as decimal(38,7)) + "
+    "cast(-3333333333333333333333333333333.1111111 as decimal(38,7))",
+    {{ false, false, StringToInt128("-66666666666666666666666666666669999995"), 38, 7 },
+     { false, false, StringToInt128("-6666666666666666666666666666667000000"), 38, 6 }}},
+  { "cast(3333333333333333333333333333333.9999995 as decimal(38,7)) + "
+    "cast(0 as decimal(38,7))",
+    {{ false, false, StringToInt128("33333333333333333333333333333339999995"), 38, 7 },
+     { false, false, StringToInt128("3333333333333333333333333333334000000"), 38, 6 }}},
+  { "cast(-3333333333333333333333333333333.9999995 as decimal(38,7)) + "
+    "cast(0 as decimal(38,7))",
+    {{ false, false, StringToInt128("-33333333333333333333333333333339999995"), 38, 7 },
+     { false, false, StringToInt128("-3333333333333333333333333333334000000"), 38, 6 }}},
+  // overflow due to rounding
+  { "cast(99999999999999999999999999999999.999999 as decimal(38,6)) + "
+    "cast(0.0000005 as decimal(38,7))",
+    {{ false, true, 0, 38, 7 },
+     { false, true, 0, 38, 6 }}},
+  { "cast(-99999999999999999999999999999999.999999 as decimal(38,6)) + "
+    "cast(-0.0000005 as decimal(38,7))",
+    {{ false, true, 0, 38, 7 },
+     { false, true, 0, 38, 6 }}},
+  { "cast(99999999999999999999999999999999.999999 as decimal(38,6)) + "
+    "cast(-0.0000006 as decimal(38,7))",
+    {{ false, true, 0, 38, 7 },
+     { false, false, StringToInt128("99999999999999999999999999999999999998"), 38, 6 }}},
+  { "cast(99999999999999999999999999999999.999999 as decimal(38,6)) + "
+    "cast(0.0000004 as decimal(38,7))",
+    {{ false, true, 0, 38, 7 },
+     { false, false, StringToInt128("99999999999999999999999999999999999999"), 38, 6 }}},
+  { "cast(-99999999999999999999999999999999.999999 as decimal(38,6)) + "
+    "cast(-0.0000004 as decimal(38,7))",
+    {{ false, true, 0, 38, 7 },
+     { false, false, StringToInt128("-99999999999999999999999999999999999999"), 38, 6 }}},
+  { "cast(99999999999999999999999999999999 as decimal(38,0)) + "
+    "cast(0.9999994 as decimal(38,38))",
+    {{ false, true, 0, 38, 38 },
+     { false, false, StringToInt128("99999999999999999999999999999999999999"), 38, 6 }}},
+  { "cast(-99999999999999999999999999999999 as decimal(38,0)) + "
+    "cast(-0.9999994 as decimal(38,38))",
+    {{ false, true, 0, 38, 38 },
+     { false, false, StringToInt128("-99999999999999999999999999999999999999"), 38, 6 }}},
+  { "cast(99999999999999999999999999999999 as decimal(38,0)) + "
+    "cast(0.9999995 as decimal(38,38))",
+    {{ false, true, 0, 38, 38 },
+     { false, true, 0, 38, 6 }}},
+  { "cast(-99999999999999999999999999999999 as decimal(38,0)) + "
+    "cast(-0.9999995 as decimal(38,38))",
+    {{ false, true, 0, 38, 38 },
+     { false, true, 0, 38, 6 }}},
+  { "cast(0.99999999999999999999999999999999999999 as decimal(38,38)) + "
+    "cast(-5e-38 as decimal(38,38))",
+    {{ false, false, StringToInt128("99999999999999999999999999999999999994"), 38, 38 },
+     { false, false, StringToInt128("9999999999999999999999999999999999999"), 38, 37 }}},
+  { "cast(0.99999999999999999999999999999999999999 as decimal(38,38)) + "
+    "cast(-4e-38 as decimal(38,38))",
+    {{ false, false, StringToInt128("99999999999999999999999999999999999995"), 38, 38 },
+     { false, false, StringToInt128("10000000000000000000000000000000000000"), 38, 37 }}},
+  { "cast(0.99999999999999999999999999999999999999 as decimal(38,38)) + "
+    "cast(0.99999999999999999999999999999999999999 as decimal(38,38))",
+    {{ false, true, 0, 38, 38 },
+     { false, false, StringToInt128("20000000000000000000000000000000000000"), 38, 37 }}},
+  { "cast(-0.99999999999999999999999999999999999999 as decimal(38,38)) + "
+    "cast(0.99999999999999999999999999999999999999 as decimal(38,38))",
+    {{ false, false, StringToInt128("0"), 38, 38 },
+     { false, false, StringToInt128("0"), 38, 37 }}},
+  { "cast(0 as decimal(38,38)) + cast(0 as decimal(38,38))",
+    {{ false, false, StringToInt128("0"), 38, 38 },
+     { false, false, StringToInt128("0"), 38, 37 }}},
   // Test multiply operator
   { "cast(1.23 as decimal(30,2)) * cast(1 as decimal(10,3))",
     {{ false, false, 123000, 38, 5 }}},

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/bc12a9eb/be/src/runtime/decimal-value.h
----------------------------------------------------------------------
diff --git a/be/src/runtime/decimal-value.h b/be/src/runtime/decimal-value.h
index 0432057..5f72b8d 100644
--- a/be/src/runtime/decimal-value.h
+++ b/be/src/runtime/decimal-value.h
@@ -195,9 +195,9 @@ class DecimalValue {
  private:
   T value_;
 
-  /// Returns in *x_val and *y_val, the adjusted values so that both
-  /// are at the same scale. The scale is the number of digits after the decimal.
-  /// Returns true if the adjusted causes overflow in which case the values in
+  /// Returns in *x_val and *y_val, the adjusted values so that both are at
+  /// max(x_scale, y_scale) scale. The scale is the number of digits after the decimal.
+  /// Returns true if the adjustment causes overflow in which case the values in
   /// x_scaled and y_scaled are unmodified.
   template <typename RESULT_T>
   static inline bool AdjustToSameScale(const DecimalValue& x, int x_scale,

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/bc12a9eb/be/src/runtime/decimal-value.inline.h
----------------------------------------------------------------------
diff --git a/be/src/runtime/decimal-value.inline.h b/be/src/runtime/decimal-value.inline.h
index 93b7311..3f14229 100644
--- a/be/src/runtime/decimal-value.inline.h
+++ b/be/src/runtime/decimal-value.inline.h
@@ -138,58 +138,6 @@ inline DecimalValue<T> DecimalValue<T>::ScaleTo(int src_scale, int dst_scale,
   return DecimalValue(result);
 }
 
-// Use __builtin_add_overflow on GCC if available.
-// Avoid using on Clang: it regresses performance.
-#if 5 <= __GNUC__
-template<typename T>
-template<typename RESULT_T>
-inline DecimalValue<RESULT_T> DecimalValue<T>::Add(int this_scale,
-    const DecimalValue& other, int other_scale, int result_precision, int result_scale,
-    bool round, bool* overflow) const {
-  DCHECK_EQ(result_scale, std::max(this_scale, other_scale));
-  RESULT_T x = 0;
-  RESULT_T y = 0;
-  *overflow |= AdjustToSameScale(*this, this_scale, other, other_scale,
-      result_precision, &x, &y);
-  if (result_precision == ColumnType::MAX_PRECISION) {
-    DCHECK_EQ(sizeof(RESULT_T), 16);
-    RESULT_T result = 0;
-    *overflow |= __builtin_add_overflow(x, y, &result);
-    *overflow |= abs(result) > DecimalUtil::MAX_UNSCALED_DECIMAL16;
-    return DecimalValue<RESULT_T>(result);
-  } else {
-    DCHECK(!*overflow) << "Cannot overflow unless result is Decimal16Value";
-  }
-  return DecimalValue<RESULT_T>(x + y);
-}
-#else
-template<typename T>
-template<typename RESULT_T>
-inline DecimalValue<RESULT_T> DecimalValue<T>::Add(int this_scale,
-    const DecimalValue& other, int other_scale, int result_precision, int result_scale,
-    bool round, bool* overflow) const {
-  DCHECK_EQ(result_scale, std::max(this_scale, other_scale));
-  RESULT_T x = 0;
-  RESULT_T y = 0;
-  *overflow |= AdjustToSameScale(*this, this_scale, other, other_scale,
-      result_precision, &x, &y);
-  if (sizeof(RESULT_T) == 16) {
-    // Check overflow.
-    if (!*overflow && is_negative() == other.is_negative() &&
-        result_precision == ColumnType::MAX_PRECISION) {
-      // Can only overflow if the signs are the same and result precision reaches
-      // max precision.
-      *overflow |= DecimalUtil::MAX_UNSCALED_DECIMAL16 - abs(x) < abs(y);
-      // TODO: faster to return here? We don't care at all about the perf on
-      // the overflow case but what makes the normal path faster?
-    }
-  } else {
-    DCHECK(!*overflow) << "Cannot overflow unless result is Decimal16Value";
-  }
-  return DecimalValue<RESULT_T>(x + y);
-}
-#endif
-
 namespace detail {
 
 // Helper function to scale down over multiplied values back into result type,
@@ -212,6 +160,213 @@ inline RESULT_T ScaleDownAndRound(RESULT_T value, int delta_scale, bool round) {
   }
   return result;
 }
+
+// If we have a number with 'num_lz' leading zeros, and we scale it up by 10^scale_diff,
+// this function returns the minimum number of leading zeros the result would have.
+inline int MinLeadingZerosAfterScaling(int num_lz, int scale_diff) {
+  DCHECK_GE(scale_diff, 0);
+  // We will rely on the following formula to estimate the number of leading zeros after
+  // scaling up: Lz(a*b) >= Lz(a) - floor(log2(b)) - 1
+  // We precompute floor(log2(10^b)) for b = 0, 1, 2, 3...
+  static const int floor_log2[] = {
+      0, 3, 6, 9, 13, 16, 19, 23, 26, 29, 33, 36, 39, 43, 46, 49, 53, 56, 59, 63, 66, 69,
+      73, 76, 79, 83, 86, 89, 93, 96, 99, 102, 106, 109, 112, 116, 119, 122, 126, 129 };
+  return num_lz - floor_log2[scale_diff] - 1;
+}
+
+// Separates x and y into into fractional and whole parts.
+inline void SeparateFractional(int128_t x, int x_scale, int128_t y, int y_scale,
+    int128_t* x_left, int128_t* x_right, int128_t* y_left, int128_t* y_right) {
+  // The whole part.
+  *x_left = x / DecimalUtil::GetScaleMultiplier<int128_t>(x_scale);
+  *y_left = y / DecimalUtil::GetScaleMultiplier<int128_t>(y_scale);
+  // The fractional part.
+  *x_right = x % DecimalUtil::GetScaleMultiplier<int128_t>(x_scale);
+  *y_right = y % DecimalUtil::GetScaleMultiplier<int128_t>(y_scale);
+  // Scale up the fractional part of the operand with the smaller scale so that
+  // the scales match match.
+  if (x_scale < y_scale) {
+    *x_right *= DecimalUtil::GetScaleMultiplier<int128_t>(y_scale - x_scale);
+  } else {
+    *y_right *= DecimalUtil::GetScaleMultiplier<int128_t>(x_scale - y_scale);
+  }
+}
+
+// Adds numbers that are large enough so they can't be added directly. Both
+// numbers must be either positive or zero.
+inline int128_t AddLarge(int128_t x, int x_scale, int128_t y, int y_scale,
+    int result_scale, bool round, bool *overflow) {
+  DCHECK(x >= 0 && y >= 0);
+
+  int128_t left, right, x_left, x_right, y_left, y_right;
+  SeparateFractional(x, x_scale, y, y_scale, &x_left, &x_right, &y_left, &y_right);
+  DCHECK(x_left >= 0 && y_left >= 0 && x_right >= 0 && y_right >=0);
+
+  int max_scale = std::max(x_scale, y_scale);
+  int result_scale_decrease = max_scale - result_scale;
+  DCHECK(result_scale_decrease >= 0);
+
+  // carry_to_left should be 1 if there is an overflow when adding the fractional parts.
+  int carry_to_left = 0;
+  if (UNLIKELY(x_right >=
+      DecimalUtil::GetScaleMultiplier<int128_t>(max_scale) - y_right)) {
+    // Case where adding the fractional parts results in an overflow.
+    carry_to_left = 1;
+    right = x_right - DecimalUtil::GetScaleMultiplier<int128_t>(max_scale) + y_right;
+  } else {
+    // Case where adding the fractional parts does not result in an overflow.
+    right = x_right + y_right;
+  }
+  if (result_scale_decrease > 0) {
+    right = detail::ScaleDownAndRound<int128_t, int128_t>(
+        right, result_scale_decrease, round);
+  }
+  DCHECK(right >= 0);
+  // It is possible that right gets rounded up after scaling down (and it would look like
+  // it overflowed). We could handle this case by subtracting 10^result_scale from right
+  // (which would make it equal to zero) and adding one to carry_to_left, but
+  // it is not necessary, because doing that is equivalent to doing nothing.
+  DCHECK(right <= DecimalUtil::GetScaleMultiplier<int128_t>(result_scale));
+
+  *overflow |= x_left > DecimalUtil::MAX_UNSCALED_DECIMAL16 - y_left - carry_to_left;
+  left = x_left + y_left + carry_to_left;
+
+  int128_t mult = DecimalUtil::GetScaleMultiplier<int128_t>(result_scale);
+  if (UNLIKELY(!*overflow &&
+      left > (DecimalUtil::MAX_UNSCALED_DECIMAL16 - right) / mult)) {
+    *overflow = true;
+  }
+  return (left * mult) + right;
+}
+
+// Subtracts numbers that are large enough so that we can't subtract directly. Neither
+// of the numbers can be zero and one must be positive and the other one negative.
+inline int128_t SubtractLarge(int128_t x, int x_scale, int128_t y, int y_scale,
+    int result_scale, bool round, bool *overflow) {
+  DCHECK(x != 0 && y != 0);
+  DCHECK((x > 0) != (y > 0));
+
+  int128_t left, right, x_left, x_right, y_left, y_right;
+  SeparateFractional(x, x_scale, y, y_scale, &x_left, &x_right, &y_left, &y_right);
+
+  int max_scale = std::max(x_scale, y_scale);
+  int result_scale_decrease = max_scale - result_scale;
+  DCHECK_GE(result_scale_decrease, 0);
+
+  right = x_right + y_right;
+  left = x_left + y_left;
+  // Overflow is not possible because one number is positive and the other one is
+  // negative.
+  DCHECK(abs(right) < DecimalUtil::MAX_UNSCALED_DECIMAL16);
+  DCHECK(abs(left) < DecimalUtil::MAX_UNSCALED_DECIMAL16);
+  // If the whole and fractional parts have different signs, then we need to make the
+  // fractional part have the same sign as the whole part. If either left or right is
+  // zero, then nothing needs to be done.
+  if (left < 0 && right > 0) {
+    left += 1;
+    right -= DecimalUtil::GetScaleMultiplier<int128_t>(max_scale);
+  } else if (left > 0 && right < 0) {
+    left -= 1;
+    right += DecimalUtil::GetScaleMultiplier<int128_t>(max_scale);
+  }
+  // The operation above brought left closer to zero.
+  DCHECK(abs(left) <= abs(x_left + y_left));
+  if (result_scale_decrease > 0) {
+    // At this point, the scale of the fractional part is either x_scale or y_scale,
+    // whichever is greater. We scale down the fractional part to result_scale here.
+    right = detail::ScaleDownAndRound<int128_t, int128_t>(
+        right, result_scale_decrease, round);
+  }
+
+  // Check that left and right have the same sign.
+  DCHECK(left == 0 || right == 0 || (left > 0) == (right > 0));
+  // It is possible that right gets rounded up after scaling down (and it would look like
+  // it overflowed). This does not need to be handled in a special way and will result
+  // in incrementing the whole part by one.
+  DCHECK(abs(right) <= DecimalUtil::GetScaleMultiplier<int128_t>(result_scale));
+
+  int128_t mult = DecimalUtil::GetScaleMultiplier<int128_t>(result_scale);
+  if (UNLIKELY(abs(left) > (DecimalUtil::MAX_UNSCALED_DECIMAL16 - abs(right)) / mult)) {
+    *overflow = true;
+  }
+  return (left * mult) + right;
+}
+
+}
+
+template<typename T>
+template<typename RESULT_T>
+inline DecimalValue<RESULT_T> DecimalValue<T>::Add(int this_scale,
+    const DecimalValue& other, int other_scale, int result_precision, int result_scale,
+    bool round, bool* overflow) const {
+
+  if (sizeof(RESULT_T) < 16 || result_precision < 38) {
+    // The following check is guaranteed by the frontend.
+    DCHECK_EQ(result_scale, std::max(this_scale, other_scale));
+    RESULT_T x = 0;
+    RESULT_T y = 0;
+    *overflow |= AdjustToSameScale(*this, this_scale, other, other_scale,
+        result_precision, &x, &y);
+    DCHECK(!*overflow) << "Cannot overflow unless result is Decimal16Value";
+    return DecimalValue<RESULT_T>(x + y);
+  }
+
+  // We compute how many leading zeros x and y would have after one of them gets scaled
+  // up to match the scale of the other one.
+  int x_lz = BitUtil::CountLeadingZeros(abs(value()));
+  int y_lz = BitUtil::CountLeadingZeros(abs(other.value()));
+
+  int result_scale_decrease = this_scale - result_scale;
+  int scale_diff = this_scale - other_scale;
+  if (scale_diff > 0) {
+    y_lz = detail::MinLeadingZerosAfterScaling(y_lz, scale_diff);
+  } else if (scale_diff < 0) {
+    result_scale_decrease = other_scale - result_scale;
+    x_lz = detail::MinLeadingZerosAfterScaling(x_lz, -scale_diff);
+  }
+  DCHECK_GE(result_scale_decrease, 0);
+  DCHECK_EQ(result_scale_decrease, std::max(this_scale, other_scale) - result_scale);
+
+  const int MIN_LZ = 3;
+  if ((x_lz >= MIN_LZ) && (y_lz >= MIN_LZ)) {
+    // If both numbers have at least MIN_LZ leading zeros, we can add them directly
+    // without the risk of overflow.
+    // We want the result to have at least 2 leading zeros, which ensures that it fits
+    // into the maximum decimal because 2^126 - 1 < 10^38 - 1. If both x and y have at
+    // least 3 leading zeros, then we are guaranteed that the result will have at lest 2
+    // leading zeros.
+    RESULT_T x = 0;
+    RESULT_T y = 0;
+    bool ovf = AdjustToSameScale(*this, this_scale, other, other_scale,
+        result_precision, &x, &y);
+    DCHECK(!ovf) << "Cannot overflow because the leading zero estimate passed";
+    DCHECK(abs(x) <= DecimalUtil::MAX_UNSCALED_DECIMAL16 - abs(y));
+    x += y;
+    if (result_scale_decrease > 0) {
+      // After first adjusting x and y to the same scale and adding them together, we now
+      // need scale down the result to result_scale.
+      x = detail::ScaleDownAndRound<T, RESULT_T>(x, result_scale_decrease, round);
+    }
+    return DecimalValue<RESULT_T>(x);
+  }
+
+  // If both numbers cannot be added directly, we have to resort to a more complex
+  // and slower algorithm.
+  int128_t x = value();
+  int128_t y = other.value();
+  int128_t result;
+
+  if (x >= 0 && y >= 0) {
+    result = detail::AddLarge(
+        x, this_scale, y, other_scale, result_scale, round, overflow);
+  } else if (x <= 0 && y <= 0) {
+    result = -detail::AddLarge(
+        -x, this_scale, -y, other_scale, result_scale, round, overflow);
+  } else {
+    result = detail::SubtractLarge(
+        x, this_scale, y, other_scale, result_scale, round, overflow);
+  }
+  return DecimalValue<RESULT_T>(result);
 }
 
 template<typename T>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/bc12a9eb/fe/src/main/java/org/apache/impala/analysis/TypesUtil.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/analysis/TypesUtil.java b/fe/src/main/java/org/apache/impala/analysis/TypesUtil.java
index 1bd34ef..9effc33 100644
--- a/fe/src/main/java/org/apache/impala/analysis/TypesUtil.java
+++ b/fe/src/main/java/org/apache/impala/analysis/TypesUtil.java
@@ -225,8 +225,6 @@ public class TypesUtil {
    * http://blogs.msdn.com/b/sqlprogrammability/archive/2006/03/29/564110.aspx
    * https://msdn.microsoft.com/en-us/library/ms190476.aspx
    *
-   * TODO: implement V2 rules for ADD/SUB.
-   *
    * Changes:
    *  - There are slight difference with how precision/scale reduction occurs compared
    *    to SQL server when the desired precision is more than the maximum supported
@@ -261,9 +259,12 @@ public class TypesUtil {
         break;
       case ADD:
       case SUBTRACT:
+        resultScale = Math.max(s1, s2);
+        resultPrecision = Math.max(p1 - s1, p2 - s2) + resultScale + 1;
+        break;
       default:
-        // Not yet implemented - fall back to V1 rules.
-        return getDecimalArithmeticResultTypeV1(t1, t2, op);
+        Preconditions.checkState(false);
+        return null;
     }
     // Use the scale reduction technique when resultPrecision is too large.
     return ScalarType.createAdjustedDecimalType(resultPrecision, resultScale);

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/bc12a9eb/fe/src/test/java/org/apache/impala/analysis/AnalyzeExprsTest.java
----------------------------------------------------------------------
diff --git a/fe/src/test/java/org/apache/impala/analysis/AnalyzeExprsTest.java b/fe/src/test/java/org/apache/impala/analysis/AnalyzeExprsTest.java
index 724d437..93a4090 100644
--- a/fe/src/test/java/org/apache/impala/analysis/AnalyzeExprsTest.java
+++ b/fe/src/test/java/org/apache/impala/analysis/AnalyzeExprsTest.java
@@ -1429,9 +1429,9 @@ public class AnalyzeExprsTest extends AnalyzerTest {
     // DECIMAL_V1: floating point + any numeric literal = floating point
     // DECIMAL_V2: floating point + any expr of type decimal = decimal
     checkDecimalReturnType("select float_col + 1.1 from functional.alltypestiny",
-        Type.DOUBLE, ScalarType.createDecimalType(38, 9));
+        Type.DOUBLE, ScalarType.createDecimalType(38, 8));
     checkDecimalReturnType("select float_col - 1.1 from functional.alltypestiny",
-        Type.DOUBLE, ScalarType.createDecimalType(38, 9));
+        Type.DOUBLE, ScalarType.createDecimalType(38, 8));
     checkDecimalReturnType("select float_col / 1.1 from functional.alltypestiny",
         Type.DOUBLE, ScalarType.createDecimalType(38, 8));
     checkDecimalReturnType("select float_col % 1.1 from functional.alltypestiny",
@@ -1497,16 +1497,18 @@ public class AnalyzeExprsTest extends AnalyzerTest {
     // must be an integer.
     checkDecimalReturnType("select round(1.2345, 2) * pow(10, 10)", Type.DOUBLE);
     checkDecimalReturnType("select round(1.2345, 2) + pow(10, 10)",
-        Type.DOUBLE, ScalarType.createDecimalType(38, 17));
+        Type.DOUBLE, ScalarType.createDecimalType(38, 16));
 
     // Explicitly casting the literal to a decimal or float changes the type of the
     // literal. This is independent of the DECIMAL_V2 setting.
     checkDecimalReturnType("select int_col + cast(1.1 as decimal(2,1)) from "
         + " functional.alltypestiny", ScalarType.createDecimalType(12, 1));
     checkDecimalReturnType("select float_col + cast(1.1 as decimal(2,1)) from "
-        + " functional.alltypestiny", ScalarType.createDecimalType(38, 9));
+        + " functional.alltypestiny",
+        ScalarType.createDecimalType(38, 9), ScalarType.createDecimalType(38, 8));
     checkDecimalReturnType("select float_col + cast(1.1*1.2+1.3 as decimal(2,1)) from "
-        + " functional.alltypestiny", ScalarType.createDecimalType(38, 9));
+        + " functional.alltypestiny",
+        ScalarType.createDecimalType(38, 9), ScalarType.createDecimalType(38, 8));
     checkDecimalReturnType("select float_col + cast(1.1 as float) from "
         + " functional.alltypestiny", Type.DOUBLE);
     checkDecimalReturnType("select float_col + cast(1.1 as float) from "
@@ -1516,24 +1518,24 @@ public class AnalyzeExprsTest extends AnalyzerTest {
 
     // Test behavior of compound expressions with a single slot ref and many literals.
     checkDecimalReturnType("select 1.0 + float_col + 1.1 from functional.alltypestiny",
-        Type.DOUBLE, ScalarType.createDecimalType(38, 9));
+        Type.DOUBLE, ScalarType.createDecimalType(38, 7));
     checkDecimalReturnType("select 1.0 + 2.0 + float_col from functional.alltypestiny",
-        Type.DOUBLE, ScalarType.createDecimalType(38, 9));
+        Type.DOUBLE, ScalarType.createDecimalType(38, 8));
     checkDecimalReturnType("select 1.0 + 2.0 + pi() * float_col from functional.alltypestiny",
-        Type.DOUBLE, ScalarType.createDecimalType(38, 17));
+        Type.DOUBLE, ScalarType.createDecimalType(38, 16));
     checkDecimalReturnType("select 1.0 + d1 + 1.1 from functional.decimal_tbl",
         ScalarType.createDecimalType(12, 1));
     checkDecimalReturnType("select 1.0 + 2.0 + d1 from functional.decimal_tbl",
         ScalarType.createDecimalType(11, 1));
     checkDecimalReturnType("select 1.0 + 2.0 + pi() * d1 from functional.decimal_tbl",
-        Type.DOUBLE, ScalarType.createDecimalType(38, 17));
+        Type.DOUBLE, ScalarType.createDecimalType(38, 16));
 
     // Test behavior of compound expressions with multiple slot refs and literals.
     checkDecimalReturnType("select double_col + 1.23 + float_col + 1.0 " +
-        " from functional.alltypestiny", Type.DOUBLE, ScalarType.createDecimalType(38, 17));
+        " from functional.alltypestiny", Type.DOUBLE, ScalarType.createDecimalType(38, 7));
     checkDecimalReturnType("select double_col + 1.23 + float_col + 1.0 + int_col " +
         " + bigint_col from functional.alltypestiny", Type.DOUBLE,
-        ScalarType.createDecimalType(38, 17));
+        ScalarType.createDecimalType(38, 6));
     checkDecimalReturnType("select d1 + 1.23 + d2 + 1.0 " +
         " from functional.decimal_tbl", ScalarType.createDecimalType(14, 2));