You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by he...@apache.org on 2017/08/16 16:18:43 UTC

[3/3] incubator-impala git commit: IMPALA-5769: Add periodic minidump cleanup

IMPALA-5769: Add periodic minidump cleanup

Minidumps can be written by sending SIGUSR1 to our daemon processes.
That way, an arbitrary number of minidump files can be created. This
change adds minidump cleanup to the periodic log file cleanup to
effectively bound the maximum number of minidumps we keep around.

Change-Id: Ie02ff2271412d814f84a4ff42ccbca51d91bf980
Reviewed-on: http://gerrit.cloudera.org:8080/7605
Reviewed-by: Lars Volker <lv...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/294d42ad
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/294d42ad
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/294d42ad

Branch: refs/heads/master
Commit: 294d42adc117046f975665834af03ddaa53ec27e
Parents: f74f665
Author: Lars Volker <lv...@cloudera.com>
Authored: Mon Aug 7 11:53:27 2017 -0700
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Wed Aug 16 08:41:16 2017 +0000

----------------------------------------------------------------------
 be/src/common/init.cc                 | 13 +++++++----
 be/src/util/minidump.cc               |  9 +-------
 be/src/util/minidump.h                |  4 ++++
 tests/custom_cluster/test_breakpad.py | 36 +++++++++++++++++++++++++-----
 4 files changed, 45 insertions(+), 17 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/294d42ad/be/src/common/init.cc
----------------------------------------------------------------------
diff --git a/be/src/common/init.cc b/be/src/common/init.cc
index 523796a..d778523 100644
--- a/be/src/common/init.cc
+++ b/be/src/common/init.cc
@@ -55,12 +55,13 @@
 
 using namespace impala;
 
+DECLARE_bool(enable_process_lifetime_heap_profiling);
+DECLARE_string(heap_profile_dir);
 DECLARE_string(hostname);
-DECLARE_string(redaction_rules_file);
-// TODO: renamed this to be more generic when we have a good CM release to do so.
+// TODO: rename this to be more generic when we have a good CM release to do so.
 DECLARE_int32(logbufsecs);
-DECLARE_string(heap_profile_dir);
-DECLARE_bool(enable_process_lifetime_heap_profiling);
+DECLARE_int32(max_minidumps);
+DECLARE_string(redaction_rules_file);
 
 DEFINE_int32(max_log_files, 10, "Maximum number of log files to retain per severity "
     "level. The most recent log files are retained. If set to 0, all log files are "
@@ -121,6 +122,10 @@ static scoped_ptr<impala::Thread> pause_monitor;
     impala::CheckAndRotateLogFiles(FLAGS_max_log_files);
     // Check for audit event log rotation in every interval of the maintenance thread
     impala::CheckAndRotateAuditEventLogFiles(FLAGS_max_audit_event_log_files);
+    // Check for minidump rotation in every interval of the maintenance thread. This is
+    // necessary since an arbitrary number of minidumps can be written by sending SIGUSR1
+    // to the process.
+    impala::CheckAndRotateMinidumps(FLAGS_max_minidumps);
   }
 }
 

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/294d42ad/be/src/util/minidump.cc
----------------------------------------------------------------------
diff --git a/be/src/util/minidump.cc b/be/src/util/minidump.cc
index f722b20..dbaa0cc 100644
--- a/be/src/util/minidump.cc
+++ b/be/src/util/minidump.cc
@@ -116,9 +116,7 @@ static void SetupSigUSR1Handler(bool minidumps_enabled) {
   sigaction(SIGUSR1, &sig_action, NULL);
 }
 
-/// Check the number of minidump files and removes the oldest ones to maintain an upper
-/// bound on the number of files.
-static void CheckAndRemoveMinidumps(int max_minidumps) {
+void CheckAndRotateMinidumps(int max_minidumps) {
   // Disable rotation if 0 or wrong input
   if (max_minidumps <= 0) return;
 
@@ -225,11 +223,6 @@ Status RegisterMinidump(const char* cmd_line_path) {
     return Status(ss.str());
   }
 
-  // Rotate old minidump files. We only need to do this on startup (in contrast to
-  // periodically) because only process crashes will trigger the creation of new minidump
-  // files.
-  CheckAndRemoveMinidumps(FLAGS_max_minidumps);
-
   google_breakpad::MinidumpDescriptor desc(FLAGS_minidump_path.c_str());
 
   // Limit filesize if configured.

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/294d42ad/be/src/util/minidump.h
----------------------------------------------------------------------
diff --git a/be/src/util/minidump.h b/be/src/util/minidump.h
index b2c7160..158e43e 100644
--- a/be/src/util/minidump.h
+++ b/be/src/util/minidump.h
@@ -30,6 +30,10 @@ Status RegisterMinidump(const char* cmd_line_path);
 /// tests that deliberately trigger DCHECKs. Returns true if minidumps were previously
 /// enabled or false otherwise.
 bool EnableMinidumpsForTest(bool enabled);
+
+/// Checks the number of minidump files and removes the oldest ones to maintain an upper
+/// bound on the number of files.
+void CheckAndRotateMinidumps(int max_minidumps);
 }
 
 #endif

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/294d42ad/tests/custom_cluster/test_breakpad.py
----------------------------------------------------------------------
diff --git a/tests/custom_cluster/test_breakpad.py b/tests/custom_cluster/test_breakpad.py
index 79ba70c..424e1c0 100644
--- a/tests/custom_cluster/test_breakpad.py
+++ b/tests/custom_cluster/test_breakpad.py
@@ -117,7 +117,7 @@ class TestBreakpad(CustomClusterTestSuite):
       return self.cluster.statestored and 1 or 0
     raise RuntimeError("Unknown daemon name: %s" % daemon)
 
-  def wait_for_num_processes(self, daemon, num_expected, timeout=60):
+  def wait_for_num_processes(self, daemon, num_expected, timeout=5):
     end = time.time() + timeout
     self.cluster.refresh()
     num_processes = self.get_num_processes(daemon)
@@ -146,7 +146,6 @@ class TestBreakpad(CustomClusterTestSuite):
     assert self.count_minidumps('statestored', base_dir) == 1
     assert self.count_minidumps('catalogd', base_dir) == 1
 
-
   def assert_num_logfile_entries(self, expected_count):
     self.assert_impalad_log_contains('INFO', 'Wrote minidump to ',
         expected_count=expected_count)
@@ -172,9 +171,9 @@ class TestBreakpad(CustomClusterTestSuite):
     cluster_size = self.get_num_processes('impalad')
     self.kill_cluster(SIGUSR1)
     # Breakpad forks to write its minidump files, wait for all the clones to terminate.
-    assert self.wait_for_num_processes('impalad', cluster_size, 5) == cluster_size
-    assert self.wait_for_num_processes('catalogd', 1, 5) == 1
-    assert self.wait_for_num_processes('statestored', 1, 5) == 1
+    assert self.wait_for_num_processes('impalad', cluster_size) == cluster_size
+    assert self.wait_for_num_processes('catalogd', 1) == 1
+    assert self.wait_for_num_processes('statestored', 1) == 1
     # Make sure impalad still answers queries.
     client = self.create_impala_client()
     self.execute_query_expect_success(client, "SELECT COUNT(*) FROM functional.alltypes")
@@ -229,6 +228,33 @@ class TestBreakpad(CustomClusterTestSuite):
     assert self.count_minidumps('catalogd') == 1
 
   @pytest.mark.execute_serially
+  def test_minidump_cleanup_thread(self):
+    """Check that periodic rotation preserves a limited number of minidumps."""
+    assert self.count_all_minidumps() == 0
+    # Maximum number of minidump that the impalads should keep for this test.
+    max_minidumps = 2
+    # Sleep interval for the log rotation thread.
+    rotation_interval = 1
+    self.start_cluster_with_args(minidump_path=self.tmp_dir,
+                                 max_minidumps=max_minidumps,
+                                 logbufsecs=rotation_interval)
+    cluster_size = self.get_num_processes('impalad')
+    # We trigger several rounds of minidump creation to make sure that all daemons wrote
+    # enough files to trigger rotation.
+    for i in xrange(max_minidumps + 1):
+      self.kill_cluster(SIGUSR1)
+      # Breakpad forks to write its minidump files, wait for all the clones to terminate.
+      assert self.wait_for_num_processes('impalad', cluster_size) == cluster_size
+      assert self.wait_for_num_processes('catalogd', 1) == 1
+      assert self.wait_for_num_processes('statestored', 1) == 1
+      self.assert_num_logfile_entries(i + 1)
+    # Sleep long enough for log cleaning to take effect.
+    time.sleep(rotation_interval + 1)
+    assert self.count_minidumps('impalad') == min(cluster_size, max_minidumps)
+    assert self.count_minidumps('statestored') == max_minidumps
+    assert self.count_minidumps('catalogd') == max_minidumps
+
+  @pytest.mark.execute_serially
   def test_disable_minidumps(self):
     """Check that setting enable_minidumps to false disables minidump creation."""
     assert self.count_all_minidumps() == 0