You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by kw...@apache.org on 2017/12/08 08:06:51 UTC

[1/2] impala git commit: IMPALA-6285: Don't print stack trace on RPC errors.

Repository: impala
Updated Branches:
  refs/heads/master f3fa3e017 -> 542dc227c


IMPALA-6285: Don't print stack trace on RPC errors.

There is not much benefit in printing the stack trace when
Thrift RPC hits an error. As long as we print enough info about
the error and identify the caller, that should be sufficient.
In fact, it has been observed that stack crawl caused unnecessary
CPU spikes in the past. This change replaces Status() with
Status::Expected() in DoRpc(), RetryRpc(), RetryRpcRecv() and
Coordinator::BackendState::Exec() to avoid unnecessary stack crawls.

Testing done: private core build. Verified error strings with
test_rpc_timeout.py and test_rpc_exception.py

Change-Id: Ia83294494442ef21f7934f92ba9112e80d81fa58
Reviewed-on: http://gerrit.cloudera.org:8080/8788
Reviewed-by: Michael Ho <kw...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/d60eb192
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/d60eb192
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/d60eb192

Branch: refs/heads/master
Commit: d60eb192a959afd5e1a7062b360ade2ef8a8f4f4
Parents: f3fa3e0
Author: Michael Ho <kw...@cloudera.com>
Authored: Thu Dec 7 00:21:19 2017 -0800
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Thu Dec 7 23:58:40 2017 +0000

----------------------------------------------------------------------
 be/src/runtime/client-cache.h               | 43 ++++++++++++++++--------
 be/src/runtime/coordinator-backend-state.cc |  2 +-
 common/thrift/generate_error_codes.py       |  2 +-
 3 files changed, 31 insertions(+), 16 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/d60eb192/be/src/runtime/client-cache.h
----------------------------------------------------------------------
diff --git a/be/src/runtime/client-cache.h b/be/src/runtime/client-cache.h
index d0fd30e..500ad59 100644
--- a/be/src/runtime/client-cache.h
+++ b/be/src/runtime/client-cache.h
@@ -240,8 +240,7 @@ class ClientConnection {
       (client_->*f)(*response, request, &send_done);
     } catch (const apache::thrift::transport::TTransportException& e) {
       if (send_done && IsRecvTimeoutTException(e)) {
-        return Status(TErrorCode::RPC_RECV_TIMEOUT, strings::Substitute(
-            "Client $0 timed-out during recv call.", TNetworkAddressToString(address_)));
+        return RecvTimeoutStatus(typeid(*response).name());
       }
 
       // Client may have unexpectedly been closed, so re-open and retry once if we didn't
@@ -254,9 +253,9 @@ class ClientConnection {
       }
 
       // Payload was sent and failure wasn't a timeout waiting for response. Fail the RPC.
-      return Status(TErrorCode::RPC_GENERAL_ERROR, ExceptionMsg(e, send_done));
+      return GeneralErrorStatus(e, typeid(*response).name(), send_done);
     } catch (const apache::thrift::TException& e) {
-      return Status(TErrorCode::RPC_GENERAL_ERROR, ExceptionMsg(e, send_done));
+      return GeneralErrorStatus(e, typeid(*response).name(), send_done);
     }
     client_is_unrecoverable_ = false;
     return Status::OK();
@@ -273,13 +272,12 @@ class ClientConnection {
       (client_->*recv_func)(*response);
     } catch (const apache::thrift::transport::TTransportException& e) {
       if (IsRecvTimeoutTException(e)) {
-        return Status(TErrorCode::RPC_RECV_TIMEOUT, strings::Substitute(
-            "Client $0 timed-out during recv call.", TNetworkAddressToString(address_)));
+        return RecvTimeoutStatus(typeid(*response).name());
       }
       // If it's not timeout exception, then the connection is broken, stop retrying.
-      return Status(TErrorCode::RPC_GENERAL_ERROR, e.what());
+      return GeneralErrorStatus(e, typeid(*response).name(), true);
     } catch (const apache::thrift::TException& e) {
-      return Status(TErrorCode::RPC_GENERAL_ERROR, e.what());
+      return GeneralErrorStatus(e, typeid(*response).name(), true);
     }
     client_is_unrecoverable_ = false;
     return Status::OK();
@@ -295,10 +293,25 @@ class ClientConnection {
   /// recovered.
   bool client_is_unrecoverable_;
 
-  std::string ExceptionMsg(const apache::thrift::TException& e, bool send_done) {
-    return strings::Substitute("Client for $0 hits an unexpected exception: $1, type: $2"
-        " rpc send completed: $3", TNetworkAddressToString(address_), e.what(),
-        typeid(e).name(), send_done ? "true" : "false");
+  /// Returns an error Status for general RPC errors not due to recv timeout or
+  /// stale connections. Will also log some details about the error.
+  Status GeneralErrorStatus(
+      const apache::thrift::TException& e, const std::string& rpc_name, bool send_done) {
+    ErrorMsg msg(TErrorCode::RPC_GENERAL_ERROR, strings::Substitute("Client for $0 hit "
+        "an unexpected exception: $1, type: $2, rpc: $3, send: $4",
+        TNetworkAddressToString(address_), e.what(), typeid(e).name(), rpc_name,
+        send_done ? "done" : "not done"));
+    VLOG_QUERY << msg.msg();
+    return Status::Expected(msg);
+  }
+
+  /// Returns an error Status for RPC errors due to recv timeout.
+  /// Will also log some details about the error.
+  Status RecvTimeoutStatus(const std::string& rpc_name) {
+    ErrorMsg msg(TErrorCode::RPC_RECV_TIMEOUT, TNetworkAddressToString(address_),
+        rpc_name);
+    VLOG_QUERY << msg.msg();
+    return Status::Expected(msg);
   }
 
   /// Retry the RPC if TCP connection underpinning this client has been closed
@@ -312,7 +325,9 @@ class ClientConnection {
     // TODO: ThriftClient should return proper error codes.
     Status status = Reopen();
     if (!status.ok()) {
-      return Status(TErrorCode::RPC_CLIENT_CONNECT_FAILURE, status.GetDetail());
+      ErrorMsg msg(TErrorCode::RPC_CLIENT_CONNECT_FAILURE, status.GetDetail());
+      VLOG_QUERY << msg.msg() << " rpc: " << typeid(*response).name();
+      return Status::Expected(msg);
     }
     bool send_done = false;
     try {
@@ -321,7 +336,7 @@ class ClientConnection {
       // By this point the RPC really has failed.
       // TODO: Revisit this logic later. It's possible that the new connection
       // works but we hit timeout here.
-      return Status(TErrorCode::RPC_GENERAL_ERROR, ExceptionMsg(e, send_done));
+      return GeneralErrorStatus(e, typeid(*response).name(), send_done);
     }
     client_is_unrecoverable_ = false;
     return Status::OK();

http://git-wip-us.apache.org/repos/asf/impala/blob/d60eb192/be/src/runtime/coordinator-backend-state.cc
----------------------------------------------------------------------
diff --git a/be/src/runtime/coordinator-backend-state.cc b/be/src/runtime/coordinator-backend-state.cc
index 973aa25..9d89086 100644
--- a/be/src/runtime/coordinator-backend-state.cc
+++ b/be/src/runtime/coordinator-backend-state.cc
@@ -183,7 +183,7 @@ void Coordinator::BackendState::Exec(
     const string& err_msg =
         Substitute(ERR_TEMPLATE, PrintId(query_id_), rpc_status.msg().msg());
     VLOG_QUERY << err_msg;
-    status_ = Status(err_msg);
+    status_ = Status::Expected(err_msg);
     return;
   }
 

http://git-wip-us.apache.org/repos/asf/impala/blob/d60eb192/common/thrift/generate_error_codes.py
----------------------------------------------------------------------
diff --git a/common/thrift/generate_error_codes.py b/common/thrift/generate_error_codes.py
index 40739e0..5aec1ce 100755
--- a/common/thrift/generate_error_codes.py
+++ b/common/thrift/generate_error_codes.py
@@ -114,7 +114,7 @@ error_codes = (
    "Verify that all your impalads are the same version."),
 
   ("RPC_GENERAL_ERROR", 30, "RPC Error: $0"),
-  ("RPC_RECV_TIMEOUT", 31, "RPC recv timed out: $0"),
+  ("RPC_RECV_TIMEOUT", 31, "RPC recv timed out: dest address: $0, rpc: $1"),
 
   ("UDF_VERIFY_FAILED", 32,
    "Failed to verify function $0 from LLVM module $1, see log for more details."),


[2/2] impala git commit: IMPALA-6292: Fix incorrect DCHECK in decimal subtraction

Posted by kw...@apache.org.
IMPALA-6292: Fix incorrect DCHECK in decimal subtraction

When subtracting two decimals, and one of them is large, we incorrectly
hit a DCHECK if the intermediate result was equal to 10^39-1, which is
MAX_UNSCALED_DECIMAL16. We fix the problem by changing the condition in
the DCHECK from "less than" to "less than or equal to".

Testing:
- Added expr tests that reproduce the issue.

Change-Id: I42d42ad85efe32b7a0db0d2353385939fee09934
Reviewed-on: http://gerrit.cloudera.org:8080/8796
Reviewed-by: Tim Armstrong <ta...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/542dc227
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/542dc227
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/542dc227

Branch: refs/heads/master
Commit: 542dc227c613e2fbe2b9a4f98817f395b2d1238c
Parents: d60eb19
Author: Taras Bobrovytsky <tb...@cloudera.com>
Authored: Thu Dec 7 16:51:35 2017 -0800
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Fri Dec 8 05:02:08 2017 +0000

----------------------------------------------------------------------
 be/src/exprs/expr-test.cc             | 23 ++++++++++++++++-------
 be/src/runtime/decimal-value.inline.h |  6 +++---
 2 files changed, 19 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/542dc227/be/src/exprs/expr-test.cc
----------------------------------------------------------------------
diff --git a/be/src/exprs/expr-test.cc b/be/src/exprs/expr-test.cc
index 4b7bcd0..5670bcf 100644
--- a/be/src/exprs/expr-test.cc
+++ b/be/src/exprs/expr-test.cc
@@ -1611,7 +1611,7 @@ DecimalTestCase decimal_cases[] = {
     {{ false, false, StringToInt128("-85070591730234615865843651857942052863"), 38, 0 }}},
   { "cast(42535295865117307932921825928971026432 as decimal(38,0)) + "
     "cast(-42535295865117307932921825928971026431 as decimal(38,0))",
-    {{ false, false, StringToInt128("1"), 38, 0 }}},
+    {{ false, false, 1, 38, 0 }}},
   { "cast(99999999999999999999999999999999.999999 as decimal(38,6)) + "
     "cast(8899999999999999999999999999999.999999 as decimal(38,6))",
     {{ false, true, 0, 38, 6 },
@@ -1647,10 +1647,10 @@ DecimalTestCase decimal_cases[] = {
     {{ false, false, StringToInt128("22222222222222222222222222222223333332"), 38, 6 }}},
   { "cast(-11111111111111111111111111111111.777777 as decimal(38,6)) + "
     "cast(11111111111111111111111111111111.555555 as decimal(38,6))",
-    {{ false, false, StringToInt128("-222222"), 38, 6 }}},
+    {{ false, false, -222222, 38, 6 }}},
   { "cast(11111111111111111111111111111111.777777 as decimal(38,6)) + "
     "cast(-11111111111111111111111111111111.555555 as decimal(38,6))",
-    {{ false, false, StringToInt128("222222"), 38, 6 }}},
+    {{ false, false, 222222, 38, 6 }}},
   { "cast(-11111111111111111111111111111111.777777 as decimal(38,6)) + "
     "cast(-11111111111111111111111111111111.555555 as decimal(38,6))",
     {{ false, false, StringToInt128("-22222222222222222222222222222223333332"), 38, 6 }}},
@@ -1721,11 +1721,20 @@ DecimalTestCase decimal_cases[] = {
      { false, false, StringToInt128("20000000000000000000000000000000000000"), 38, 37 }}},
   { "cast(-0.99999999999999999999999999999999999999 as decimal(38,38)) + "
     "cast(0.99999999999999999999999999999999999999 as decimal(38,38))",
-    {{ false, false, StringToInt128("0"), 38, 38 },
-     { false, false, StringToInt128("0"), 38, 37 }}},
+    {{ false, false, 0, 38, 38 },
+     { false, false, 0, 38, 37 }}},
   { "cast(0 as decimal(38,38)) + cast(0 as decimal(38,38))",
-    {{ false, false, StringToInt128("0"), 38, 38 },
-     { false, false, StringToInt128("0"), 38, 37 }}},
+    {{ false, false, 0, 38, 38 },
+     { false, false, 0, 38, 37 }}},
+  // IMPALA-6292
+  { "cast(1 as decimal(13,12)) - "
+    "cast(0.99999999999999999999999999999999999999 as decimal(38,38))",
+     {{ false, false, 1, 38, 38 },
+      { false, false, 0, 38, 36 }}},
+  { "cast(0.1 as decimal(13,12)) - "
+    "cast(99999999999999999999999999999999999999 as decimal(38,0))",
+     {{ false, true, 0, 38, 12 },
+      { true, false, 0, 38, 6 }}},
   // Test multiply operator
   { "cast(1.23 as decimal(30,2)) * cast(1 as decimal(10,3))",
     {{ false, false, 123000, 38, 5 }}},

http://git-wip-us.apache.org/repos/asf/impala/blob/542dc227/be/src/runtime/decimal-value.inline.h
----------------------------------------------------------------------
diff --git a/be/src/runtime/decimal-value.inline.h b/be/src/runtime/decimal-value.inline.h
index 2e17d96..8f76335 100644
--- a/be/src/runtime/decimal-value.inline.h
+++ b/be/src/runtime/decimal-value.inline.h
@@ -267,12 +267,12 @@ inline int128_t SubtractLarge(int128_t x, int x_scale, int128_t y, int y_scale,
   int result_scale_decrease = max_scale - result_scale;
   DCHECK_GE(result_scale_decrease, 0);
 
-  right = x_right + y_right;
   left = x_left + y_left;
+  right = x_right + y_right;
   // Overflow is not possible because one number is positive and the other one is
   // negative.
-  DCHECK(abs(right) < DecimalUtil::MAX_UNSCALED_DECIMAL16);
-  DCHECK(abs(left) < DecimalUtil::MAX_UNSCALED_DECIMAL16);
+  DCHECK(abs(left) <= DecimalUtil::MAX_UNSCALED_DECIMAL16);
+  DCHECK(abs(right) <= DecimalUtil::MAX_UNSCALED_DECIMAL16);
   // If the whole and fractional parts have different signs, then we need to make the
   // fractional part have the same sign as the whole part. If either left or right is
   // zero, then nothing needs to be done.