You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2017/04/19 01:20:30 UTC

[3/6] incubator-impala git commit: IMPALA-4858: add more info to MemLimitExceeded errors

IMPALA-4858: add more info to MemLimitExceeded errors

This improves the usefulness of MemLimitExceeded errors:
- The host name is included.
- The memory left in the query and process limits is included.
- The approach for deciding whether to print the query or process
  MemTracker hierarchy is more robust: we show the query hierarchy
  only when we're closer to the query limit than the process limit.
  Previously if we were near but not over the process limit it
  printed the query output, which was often useless.
- The detailed output is included in the main error message, rather
  than in the "details" or merged errors. This reduces the impact
  of not logging those details - IMPALA-4697.

Note that the output still does not exactly reflect the state of
the world when the memory limit was exceeded because other threads
will concurrently allocate and release memory.

This also refactors the various related methods slightly to remove
RuntimeState::LogMemLimitExceeded() and reduce the number of
different methods that can log memory limit errors.

Sample output:
--------------
  Memory limit exceeded: Cannot perform aggregation at node with id 1. Failed to allocate 252 bytes for intermediate tuple.
  Fragment ab4765ecacca4d01:23edbb0800000002 could not allocate 252.00 B without exceeding limit.
  Error occurred on backend tarmstrong-box:22001 by fragment ab4765ecacca4d01:23edbb0800000002
  Memory left in process limit: 7.94 GB
  Memory left in query limit: 1.02 MB
  Query(ab4765ecacca4d01:23edbb0800000000): Limit=300.00 MB Total=298.98 MB Peak=299.98 MB
    Fragment ab4765ecacca4d01:23edbb0800000002: Total=105.32 MB Peak=114.14 MB
      AGGREGATION_NODE (id=1): Total=11.46 MB Peak=11.84 MB
      HDFS_SCAN_NODE (id=0): Total=93.73 MB Peak=102.09 MB
      DataStreamSender (dst_id=2): Total=70.02 KB Peak=86.02 KB
      CodeGen: Total=24.83 KB Peak=1.22 MB
    Block Manager: Limit=200.00 MB Total=185.00 MB Peak=186.00 MB
    Fragment ab4765ecacca4d01:23edbb0800000005: Total=185.66 MB Peak=194.66 MB
      AGGREGATION_NODE (id=3): Total=185.48 MB Peak=194.48 MB
      EXCHANGE_NODE (id=2): Total=0 Peak=0
      DataStreamRecvr: Total=160.24 KB Peak=435.45 KB
      DataStreamSender (dst_id=4): Total=688.00 B Peak=688.00 B
      CodeGen: Total=24.28 KB Peak=963.50 KB

Change-Id: Id4ab9d162cf7cd4508bce1efbfccfb4ba97e7355
Reviewed-on: http://gerrit.cloudera.org:8080/6537
Reviewed-by: Tim Armstrong <ta...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/c0e30604
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/c0e30604
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/c0e30604

Branch: refs/heads/master
Commit: c0e306047de57c39c43c9b7fbe06e64a4b824ca9
Parents: 96316e3
Author: Tim Armstrong <ta...@cloudera.com>
Authored: Mon Apr 3 15:42:59 2017 -0700
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Tue Apr 18 20:34:50 2017 +0000

----------------------------------------------------------------------
 be/src/common/status.cc                         |  5 +++
 be/src/common/status.h                          |  1 +
 be/src/runtime/collection-value-builder-test.cc |  3 ++
 be/src/runtime/mem-tracker.cc                   | 34 ++++++++++++++++--
 be/src/runtime/runtime-state.cc                 | 37 ++++----------------
 be/src/runtime/runtime-state.h                  |  9 ++---
 be/src/util/debug-util.cc                       |  7 ++++
 be/src/util/debug-util.h                        |  3 ++
 8 files changed, 58 insertions(+), 41 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/c0e30604/be/src/common/status.cc
----------------------------------------------------------------------
diff --git a/be/src/common/status.cc b/be/src/common/status.cc
index 6803103..3d3e6e7 100644
--- a/be/src/common/status.cc
+++ b/be/src/common/status.cc
@@ -37,6 +37,11 @@ Status Status::MemLimitExceeded() {
   return Status(TErrorCode::MEM_LIMIT_EXCEEDED, "Memory limit exceeded");
 }
 
+Status Status::MemLimitExceeded(const std::string& details) {
+  return Status(TErrorCode::MEM_LIMIT_EXCEEDED,
+        Substitute("Memory limit exceeded: $0", details));
+}
+
 Status::Status(TErrorCode::type code)
     : msg_(new ErrorMsg(code)) {
   VLOG(1) << msg_->msg() << "\n" << GetStackTrace();

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/c0e30604/be/src/common/status.h
----------------------------------------------------------------------
diff --git a/be/src/common/status.h b/be/src/common/status.h
index 6cd9035..ff54dc9 100644
--- a/be/src/common/status.h
+++ b/be/src/common/status.h
@@ -90,6 +90,7 @@ class Status {
 
   // Return a MEM_LIMIT_EXCEEDED error status.
   static Status MemLimitExceeded();
+  static Status MemLimitExceeded(const std::string& details);
 
   static const Status CANCELLED;
   static const Status DEPRECATED_RPC;

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/c0e30604/be/src/runtime/collection-value-builder-test.cc
----------------------------------------------------------------------
diff --git a/be/src/runtime/collection-value-builder-test.cc b/be/src/runtime/collection-value-builder-test.cc
index 613132a..af710ce 100644
--- a/be/src/runtime/collection-value-builder-test.cc
+++ b/be/src/runtime/collection-value-builder-test.cc
@@ -17,6 +17,7 @@
 
 #include "runtime/collection-value-builder.h"
 #include "runtime/mem-tracker.h"
+#include "runtime/test-env.h"
 #include "service/fe-support.h"
 #include "service/frontend.h"
 #include "testutil/desc-tbl-builder.h"
@@ -30,6 +31,8 @@ using namespace impala;
 static scoped_ptr<Frontend> fe;
 
 TEST(CollectionValueBuilderTest, MaxBufferSize) {
+  TestEnv test_env;
+  ASSERT_OK(test_env.Init());
   ObjectPool obj_pool;
   DescriptorTblBuilder builder(fe.get(), &obj_pool);
   builder.DeclareTuple() << TYPE_TINYINT << TYPE_TINYINT << TYPE_TINYINT;

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/c0e30604/be/src/runtime/mem-tracker.cc
----------------------------------------------------------------------
diff --git a/be/src/runtime/mem-tracker.cc b/be/src/runtime/mem-tracker.cc
index 9e3775d..cfd1881 100644
--- a/be/src/runtime/mem-tracker.cc
+++ b/be/src/runtime/mem-tracker.cc
@@ -286,9 +286,37 @@ string MemTracker::LogUsage(const string& prefix, const list<MemTracker*>& track
 
 Status MemTracker::MemLimitExceeded(RuntimeState* state, const std::string& details,
     int64_t failed_allocation_size) {
-  Status status = Status::MemLimitExceeded();
-  status.AddDetail(details);
-  if (state != NULL) state->LogMemLimitExceeded(this, failed_allocation_size);
+  DCHECK_GE(failed_allocation_size, 0);
+  stringstream ss;
+  if (details.size() != 0) ss << details << endl;
+  if (failed_allocation_size != 0) {
+    ss << label() << " could not allocate "
+       << PrettyPrinter::Print(failed_allocation_size, TUnit::BYTES)
+       << " without exceeding limit." << endl;
+  }
+  ss << "Error occurred on backend " << GetBackendString();
+  if (state != nullptr) ss << " by fragment " << state->fragment_instance_id();
+  ss << endl;
+  ExecEnv* exec_env = ExecEnv::GetInstance();
+  MemTracker* process_tracker = exec_env->process_mem_tracker();
+  const int64_t process_capacity = process_tracker->SpareCapacity();
+  ss << "Memory left in process limit: "
+     << PrettyPrinter::Print(process_capacity, TUnit::BYTES) << endl;
+
+  // Choose which tracker to log the usage of. Default to the process tracker so we can
+  // get the full view of memory consumption.
+  MemTracker* tracker_to_log = process_tracker;
+  if (state != nullptr && state->query_mem_tracker()->has_limit()) {
+    MemTracker* query_tracker = state->query_mem_tracker();
+    const int64_t query_capacity = query_tracker->limit() - query_tracker->consumption();
+    ss << "Memory left in query limit: "
+       << PrettyPrinter::Print(query_capacity, TUnit::BYTES) << endl;
+    // Log the query tracker only if the query limit was closer to being exceeded.
+    if (query_capacity < process_capacity) tracker_to_log = query_tracker;
+  }
+  ss << tracker_to_log->LogUsage();
+  Status status = Status::MemLimitExceeded(ss.str());
+  if (state != nullptr) state->LogError(status.msg());
   return status;
 }
 

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/c0e30604/be/src/runtime/runtime-state.cc
----------------------------------------------------------------------
diff --git a/be/src/runtime/runtime-state.cc b/be/src/runtime/runtime-state.cc
index c7b916b..62d0737 100644
--- a/be/src/runtime/runtime-state.cc
+++ b/be/src/runtime/runtime-state.cc
@@ -224,39 +224,15 @@ Status RuntimeState::LogOrReturnError(const ErrorMsg& message) {
   return Status::OK();
 }
 
-void RuntimeState::LogMemLimitExceeded(
-    const MemTracker* tracker, int64_t failed_allocation_size) {
-  DCHECK_GE(failed_allocation_size, 0);
-  DCHECK(query_mem_tracker_ != NULL);
-  stringstream ss;
-  ss << "Memory Limit Exceeded by fragment: " << fragment_instance_id() << endl;
-  if (failed_allocation_size != 0) {
-    DCHECK(tracker != NULL);
-    ss << "  " << tracker->label() << " could not allocate "
-       << PrettyPrinter::Print(failed_allocation_size, TUnit::BYTES)
-       << " without exceeding limit." << endl;
-  }
-
-  if (exec_env_->process_mem_tracker()->LimitExceeded()) {
-    ss << exec_env_->process_mem_tracker()->LogUsage();
-  } else {
-    ss << query_mem_tracker_->LogUsage();
-  }
-  LogError(ErrorMsg(TErrorCode::GENERAL, ss.str()));
-}
-
-Status RuntimeState::SetMemLimitExceeded(MemTracker* tracker,
+void RuntimeState::SetMemLimitExceeded(MemTracker* tracker,
     int64_t failed_allocation_size, const ErrorMsg* msg) {
+  Status status = tracker->MemLimitExceeded(this, msg == nullptr ? "" : msg->msg(),
+      failed_allocation_size);
   {
     lock_guard<SpinLock> l(query_status_lock_);
-    if (query_status_.ok()) {
-      query_status_ = Status::MemLimitExceeded();
-      if (msg != NULL) query_status_.MergeStatus(*msg);
-    } else {
-      return query_status_;
-    }
+    if (query_status_.ok()) query_status_ = status;
   }
-  LogMemLimitExceeded(tracker, failed_allocation_size);
+  LogError(status.msg());
   // Add warning about missing stats except for compute stats child queries.
   if (!query_ctx().__isset.parent_query_id &&
       query_ctx().__isset.tables_missing_stats &&
@@ -265,13 +241,12 @@ Status RuntimeState::SetMemLimitExceeded(MemTracker* tracker,
         GetTablesMissingStatsWarning(query_ctx().tables_missing_stats)));
   }
   DCHECK(query_status_.IsMemLimitExceeded());
-  return query_status_;
 }
 
 Status RuntimeState::CheckQueryState() {
   if (instance_mem_tracker_ != nullptr
       && UNLIKELY(instance_mem_tracker_->AnyLimitExceeded())) {
-    return SetMemLimitExceeded();
+    SetMemLimitExceeded(instance_mem_tracker_.get());
   }
   return GetQueryStatus();
 }

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/c0e30604/be/src/runtime/runtime-state.h
----------------------------------------------------------------------
diff --git a/be/src/runtime/runtime-state.h b/be/src/runtime/runtime-state.h
index 009fee5..db9948f 100644
--- a/be/src/runtime/runtime-state.h
+++ b/be/src/runtime/runtime-state.h
@@ -278,13 +278,8 @@ class RuntimeState {
     query_status_ = Status(err_msg);
   }
 
-  /// Function for logging memory usages to the error log when memory limit is exceeded.
-  /// If 'failed_allocation_size' is greater than zero, logs the allocation size. If
-  /// 'failed_allocation_size' is zero, nothing about the allocation size is logged.
-  void LogMemLimitExceeded(const MemTracker* tracker, int64_t failed_allocation_size);
-
   /// Sets query_status_ to MEM_LIMIT_EXCEEDED and logs all the registered trackers.
-  /// Subsequent calls to this will be no-ops. Returns query_status_.
+  /// Subsequent calls to this will be no-ops.
   /// If 'failed_allocation_size' is not 0, then it is the size of the allocation (in
   /// bytes) that would have exceeded the limit allocated for 'tracker'.
   /// This value and tracker are only used for error reporting.
@@ -292,7 +287,7 @@ class RuntimeState {
   /// generic "Memory limit exceeded" error.
   /// Note that this interface is deprecated and MemTracker::LimitExceeded() should be
   /// used and the error status should be returned.
-  Status SetMemLimitExceeded(MemTracker* tracker = NULL,
+  void SetMemLimitExceeded(MemTracker* tracker,
       int64_t failed_allocation_size = 0, const ErrorMsg* msg = NULL);
 
   /// Returns a non-OK status if query execution should stop (e.g., the query was

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/c0e30604/be/src/util/debug-util.cc
----------------------------------------------------------------------
diff --git a/be/src/util/debug-util.cc b/be/src/util/debug-util.cc
index 596e7d2..d04550c 100644
--- a/be/src/util/debug-util.cc
+++ b/be/src/util/debug-util.cc
@@ -45,6 +45,9 @@ using boost::tokenizer;
 using namespace beeswax;
 using namespace parquet;
 
+DECLARE_int32(be_port);
+DECLARE_string(hostname);
+
 namespace impala {
 
 #define THRIFT_ENUM_OUTPUT_FN_IMPL(E, MAP) \
@@ -322,4 +325,8 @@ string GetStackTrace() {
   return s;
 }
 
+string GetBackendString() {
+  return Substitute("$0:$1", FLAGS_hostname, FLAGS_be_port);
+}
+
 }

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/c0e30604/be/src/util/debug-util.h
----------------------------------------------------------------------
diff --git a/be/src/util/debug-util.h b/be/src/util/debug-util.h
index a02a288..fcfb9d6 100644
--- a/be/src/util/debug-util.h
+++ b/be/src/util/debug-util.h
@@ -104,6 +104,9 @@ std::string GetVersionString(bool compact = false);
 /// for recursive calls.
 std::string GetStackTrace();
 
+/// Returns the backend name in "host:port" form suitable for human consumption.
+std::string GetBackendString();
+
 // FILE_CHECKs are conditions that we expect to be true but could fail due to a malformed
 // input file. They differentiate these cases from DCHECKs, which indicate conditions that
 // are true unless there's a bug in Impala. We would ideally always return a bad Status