You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2016/04/12 23:18:53 UTC

[19/50] incubator-impala git commit: IMPALA-3238: avoid log spam for very large hash tables

IMPALA-3238: avoid log spam for very large hash tables

Before this change, every time the limit was hit in ConsumeMemory() was
hit, a warning message with stack trace was logged. The streaming preagg
calls this once per partition per batch, resulting in an enormous volume
of logging.

The fix is to log the warning only once for each client (i.e. each exec
node).

We need a very large data set to reproduce this error. To test it
manually I lowered the threshold to 1 << 19, ran 'select distinct
l_orderkey from tpch_20_parquet', and confirmed that only two warning
messages per query appeared in the logs (one per agg node).

Change-Id: I366ce1148e83ac2eb71535d552bc68cb08388eec
Reviewed-on: http://gerrit.cloudera.org:8080/2659
Reviewed-by: Tim Armstrong <ta...@cloudera.com>
Tested-by: Tim Armstrong <ta...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/ce928445
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/ce928445
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/ce928445

Branch: refs/heads/master
Commit: ce9284454b6c3ff382ca5f509f75697a528b14a4
Parents: 97b5233
Author: Tim Armstrong <ta...@cloudera.com>
Authored: Tue Mar 29 14:37:11 2016 -0700
Committer: Tim Armstrong <ta...@cloudera.com>
Committed: Wed Mar 30 15:42:42 2016 +0000

----------------------------------------------------------------------
 be/src/runtime/buffered-block-mgr.cc | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/ce928445/be/src/runtime/buffered-block-mgr.cc
----------------------------------------------------------------------
diff --git a/be/src/runtime/buffered-block-mgr.cc b/be/src/runtime/buffered-block-mgr.cc
index 17d344b..aecd8a2 100644
--- a/be/src/runtime/buffered-block-mgr.cc
+++ b/be/src/runtime/buffered-block-mgr.cc
@@ -55,7 +55,8 @@ struct BufferedBlockMgr::Client {
         num_reserved_buffers_(num_reserved_buffers),
         tolerates_oversubscription_(tolerates_oversubscription),
         num_tmp_reserved_buffers_(0),
-        num_pinned_buffers_(0) {
+        num_pinned_buffers_(0),
+        logged_large_allocation_warning_(false) {
     DCHECK(tracker != NULL);
   }
 
@@ -95,6 +96,10 @@ struct BufferedBlockMgr::Client {
   /// Number of buffers pinned by this client.
   int num_pinned_buffers_;
 
+  /// Whether a warning about a large allocation has been made for this client. Used
+  /// to avoid producing excessive log messages.
+  bool logged_large_allocation_warning_;
+
   void PinBuffer(BufferDescriptor* buffer) {
     DCHECK(buffer != NULL);
     if (buffer->len == mgr_->max_block_size()) {
@@ -301,8 +306,13 @@ bool BufferedBlockMgr::ConsumeMemory(Client* client, int64_t size) {
   // Workaround IMPALA-1619. Return immediately if the allocation size will cause
   // an arithmetic overflow.
   if (UNLIKELY(size >= (1LL << 31))) {
-    LOG(WARNING) << "Trying to allocate memory >=2GB (" << size << ")B."
-                 << GetStackTrace();
+    // IMPALA-3238: don't repeatedly log warning when bumping up against this limit for
+    // large hash tables.
+    if (!client->logged_large_allocation_warning_) {
+      LOG(WARNING) << "Trying to allocate memory >=2GB (" << size << ")B."
+                   << GetStackTrace();
+      client->logged_large_allocation_warning_ = true;
+    }
     return false;
   }
   int buffers_needed = BitUtil::Ceil(size, max_block_size());