You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2016/05/12 22:10:24 UTC

[49/50] [abbrv] incubator-impala git commit: IMPALA-3490: Add flag to reduce minidump size

IMPALA-3490: Add flag to reduce minidump size

IMPALA-2686 added the breakpad library to all impala daemons, thus enabling them
to write minidump files. This change introduces a flag
'minidump_size_limit_hint_kb', which causes breakpad to reduce the amount of
thread stack memory it includes in a minidump, aiming to reduce the minidump
size during crashes with a lot of threads. Once a minidump is expected to
exceed the configured value, breakpad will include the full stack memory for the
first 20 threads, and afterwards capture only 2KB of stack memory for each
additional thread.

Change-Id: I2f3aa0df51be9f0bf0755fb288702911cdb88052
Reviewed-on: http://gerrit.cloudera.org:8080/2990
Reviewed-by: Lars Volker <lv...@cloudera.com>
Tested-by: Internal Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/df8bf3a9
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/df8bf3a9
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/df8bf3a9

Branch: refs/heads/master
Commit: df8bf3a96551b4844ec8152aa5d4c16c5d6b6bde
Parents: eaa3926
Author: Lars Volker <lv...@cloudera.com>
Authored: Fri May 6 23:29:07 2016 +0200
Committer: Tim Armstrong <ta...@cloudera.com>
Committed: Thu May 12 14:18:04 2016 -0700

----------------------------------------------------------------------
 be/src/common/global-flags.cc         |  5 +++
 be/src/util/minidump.cc               | 10 +++++-
 tests/custom_cluster/test_breakpad.py | 56 ++++++++++++++++++++++++++----
 3 files changed, 63 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/df8bf3a9/be/src/common/global-flags.cc
----------------------------------------------------------------------
diff --git a/be/src/common/global-flags.cc b/be/src/common/global-flags.cc
index b3f83d9..82263e0 100644
--- a/be/src/common/global-flags.cc
+++ b/be/src/common/global-flags.cc
@@ -76,6 +76,11 @@ DEFINE_string(minidump_path, "/tmp/impala-minidumps", "Directory to write minidu
 DEFINE_int32(max_minidumps, 9, "Maximum number of minidump files to keep per daemon. "
     "Older files are removed first. Set to 0 to keep all minidump files.");
 
+DEFINE_int32(minidump_size_limit_hint_kb, 20480, "Size limit hint for minidump files in "
+    "KB. If a minidump exceeds this value, then breakpad will reduce the stack memory it "
+    "collects for each thread from 8KB to 2KB. However it will always include the full "
+    "stack memory for the first 20 threads, including the thread that crashed.");
+
 DEFINE_bool(load_auth_to_local_rules, false, "If true, load auth_to_local configuration "
     "from hdfs' core-site.xml. When enabled, impalad reads the rules from the property "
     "hadoop.security.auth_to_local and applies them to translate the Kerberos principal "

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/df8bf3a9/be/src/util/minidump.cc
----------------------------------------------------------------------
diff --git a/be/src/util/minidump.cc b/be/src/util/minidump.cc
index 713777a..861ed3c 100644
--- a/be/src/util/minidump.cc
+++ b/be/src/util/minidump.cc
@@ -39,8 +39,9 @@ using boost::filesystem::path;
 using boost::filesystem::remove;
 using boost::system::error_code;
 
-DECLARE_int32(max_minidumps);
 DECLARE_string(minidump_path);
+DECLARE_int32(max_minidumps);
+DECLARE_int32(minidump_size_limit_hint_kb);
 
 #define MINIDUMP_LOG_BUF_SIZE 256
 
@@ -186,6 +187,13 @@ Status RegisterMinidump(const char* cmd_line_path) {
 
   google_breakpad::MinidumpDescriptor desc(FLAGS_minidump_path.c_str());
 
+  // Limit filesize if configured.
+  if (FLAGS_minidump_size_limit_hint_kb > 0) {
+    size_t size_limit = 1024 * static_cast<int64_t>(FLAGS_minidump_size_limit_hint_kb);
+    LOG(INFO) << "Setting minidump size limit to " << size_limit << ".";
+    desc.set_size_limit(size_limit);
+  }
+
   // Intentionally leaked. We want this to have the lifetime of the process.
   google_breakpad::ExceptionHandler* eh =
       new google_breakpad::ExceptionHandler(desc, NULL, DumpCallback, NULL, true, -1);

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/df8bf3a9/tests/custom_cluster/test_breakpad.py
----------------------------------------------------------------------
diff --git a/tests/custom_cluster/test_breakpad.py b/tests/custom_cluster/test_breakpad.py
index 595bbff..4345ee9 100644
--- a/tests/custom_cluster/test_breakpad.py
+++ b/tests/custom_cluster/test_breakpad.py
@@ -19,7 +19,7 @@ import shutil
 import tempfile
 import time
 
-from signal import SIGSEGV
+from signal import SIGSEGV, SIGKILL
 
 from tests.common.custom_cluster_test_suite import CustomClusterTestSuite
 
@@ -56,17 +56,21 @@ class TestBreakpad(CustomClusterTestSuite):
     # Start default cluster for subsequent tests (verify_metrics).
     cls._start_impala_cluster([])
 
-  def start_cluster(self):
-    cluster_options = ["""--%s='-minidump_path=%s -max_minidumps=2'"""
-                       % (arg, self.tmp_dir) for arg in DAEMON_ARGS]
+  def start_cluster_with_args(self, **kwargs):
+    cluster_options = []
+    for daemon_arg in DAEMON_ARGS:
+      daemon_options = " ".join("-%s=%s" % i for i in kwargs.iteritems())
+      cluster_options.append("""--%s='%s'""" % (daemon_arg, daemon_options))
     self._start_impala_cluster(cluster_options)
 
+  def start_cluster(self):
+    self.start_cluster_with_args(minidump_path=self.tmp_dir, max_minidumps=2)
+
   def start_cluster_without_minidumps(self):
-    cluster_options = ["""--%s='-minidump_path= -max_minidumps=2'"""
-                       % arg for arg in DAEMON_ARGS]
-    self._start_impala_cluster(cluster_options)
+    self.start_cluster_with_args(minidump_path='', max_minidumps=2)
 
   def kill_cluster(self, signal):
+    self.cluster.refresh()
     cluster = self.cluster
     for impalad in cluster.impalads:
       impalad.kill(signal)
@@ -129,3 +133,41 @@ class TestBreakpad(CustomClusterTestSuite):
     self.start_cluster_without_minidumps()
     self.kill_cluster(SIGSEGV)
     self.assert_num_logfile_entries(0)
+
+  def trigger_single_minidump_and_get_size(self):
+    """Kill a single impalad with SIGSEGV to make it write a minidump. Kill the rest of
+    the cluster. Clean up the single minidump file and return its size.
+    """
+    assert len(self.cluster.impalads) > 0
+    # Make one impalad write a minidump.
+    self.cluster.impalads[0].kill(SIGSEGV)
+    # Wait for the minidump to be written before killing the rest of the cluster.
+    time.sleep(1)
+    # Kill the rest of the cluster.
+    self.kill_cluster(SIGKILL)
+    assert self.count_minidumps('impalad') == 1
+    # Get file size of that miniump.
+    path = os.path.join(self.tmp_dir, 'impalad')
+    minidump_file = glob.glob("%s/*.dmp" % path)[0]
+    minidump_size = os.path.getsize(minidump_file)
+    os.remove(minidump_file)
+    assert self.count_all_minidumps() == 0
+    return minidump_size
+
+  @pytest.mark.execute_serially
+  def test_limit_minidump_size(self):
+    """Check that setting the 'minidump_size_limit_hint_kb' to a small value will reduce
+    the minidump file size.
+    """
+    assert self.count_all_minidumps() == 0
+    # Generate minidump with default settings.
+    self.start_cluster()
+    full_minidump_size = self.trigger_single_minidump_and_get_size()
+    # Start cluster with limited minidump file size, we use a very small value, to ensure
+    # the resulting minidump will be as small as possible.
+    self.start_cluster_with_args(minidump_path=self.tmp_dir,
+        minidump_size_limit_hint_kb=1)
+    reduced_minidump_size = self.trigger_single_minidump_and_get_size()
+    # Check that the minidump file size has been reduced.
+    assert reduced_minidump_size < full_minidump_size
+