You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by mp...@apache.org on 2017/01/31 20:44:05 UTC

[3/3] kudu git commit: Control mutex stack walking in DEBUG mode with a gflag

Control mutex stack walking in DEBUG mode with a gflag

This patch disables the Mutex owner stack trace collection on DEBUG
builds by default, only enabling it when a certain gflag is set.

In DEBUG mode, our Mutex implementation collects a stack trace of the
owning thread each time the Mutex is acquired. It does this by calling
google::GetStackTrace() from glog, which in the context of the Kudu
build environment calls into libunwind to collect the stack trace.

At the time of writing, google::GetStackTrace() only allows access by
one thread at a time. If more than one thread attempts to invoke this
function simultaneously, there is a CAS that determines exclusivity. The
"loser" of this contest gets a short-circuit return along with an empty
stack trace, indicating a failure to collect the stack trace.

NB: I have filed a glog issue about that behavior upstream. For more
information, see https://github.com/google/glog/issues/160

This situation becomes a problem when there are one or more Mutexes
constantly being acquired. When that happens, there is always a thread
collecting a stack trace, and so the probability of being able to
successfully collect a stack trace at any given moment is greatly
reduced.

One important caller of google::GetStackTrace() is the glog failure
function and SIGABRT signal handler that is called when a CHECK() fails
or a LOG(FATAL) call is invoked. I have observed that this crash handler
will often print an empty stack trace in DEBUG mode. Investigating this
issue led me to discover that we had a thread (our AsyncLogger thread)
constantly acquiring a Mutex and racing on the above-mentioned CAS check
inside google::GetStackTrace(). Depriving our DEBUG builds of stack
traces on LOG(FATAL) or CHECK failures, especially on Jenkins runs, is
counterproductive. One simple solution to this problem is to disable
this behavior by default.

Change-Id: Ie4593cf7173867ce2f6151e03df0be94f97d95d2
Reviewed-on: http://gerrit.cloudera.org:8080/5741
Tested-by: Mike Percy <mp...@apache.org>
Reviewed-by: Adar Dembo <ad...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/f994ee96
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/f994ee96
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/f994ee96

Branch: refs/heads/master
Commit: f994ee96c73fd16352d046e2ee085688c5840c83
Parents: 19e88d5
Author: Mike Percy <mp...@apache.org>
Authored: Thu Jan 12 19:52:45 2017 -0800
Committer: Mike Percy <mp...@apache.org>
Committed: Tue Jan 31 20:43:49 2017 +0000

----------------------------------------------------------------------
 src/kudu/util/mutex.cc | 39 ++++++++++++++++++++++++++++++++-------
 src/kudu/util/mutex.h  |  6 +++++-
 2 files changed, 37 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/f994ee96/src/kudu/util/mutex.cc
----------------------------------------------------------------------
diff --git a/src/kudu/util/mutex.cc b/src/kudu/util/mutex.cc
index 842a354..bd9539b 100644
--- a/src/kudu/util/mutex.cc
+++ b/src/kudu/util/mutex.cc
@@ -19,13 +19,27 @@
 
 #include "kudu/util/mutex.h"
 
+#include <gflags/gflags.h>
 #include <glog/logging.h>
 
+#include "kudu/gutil/strings/substitute.h"
 #include "kudu/gutil/walltime.h"
 #include "kudu/util/debug-util.h"
 #include "kudu/util/env.h"
+#include "kudu/util/flag_tags.h"
 #include "kudu/util/trace.h"
 
+using std::string;
+using strings::Substitute;
+using strings::SubstituteAndAppend;
+
+#ifndef NDEBUG
+DEFINE_bool(debug_mutex_collect_stacktrace, false,
+            "Whether to collect a stacktrace on Mutex contention in a DEBUG build");
+TAG_FLAG(debug_mutex_collect_stacktrace, advanced);
+TAG_FLAG(debug_mutex_collect_stacktrace, hidden);
+#endif
+
 namespace kudu {
 
 Mutex::Mutex()
@@ -59,9 +73,7 @@ Mutex::~Mutex() {
 bool Mutex::TryAcquire() {
   int rv = pthread_mutex_trylock(&native_handle_);
 #ifndef NDEBUG
-  DCHECK(rv == 0 || rv == EBUSY) << ". " << strerror(rv)
-      << ". Owner tid: " << owning_tid_ << "; Self tid: " << Env::Default()->gettid()
-      << "; Owner stack: " << std::endl << stack_trace_->Symbolize();;
+  DCHECK(rv == 0 || rv == EBUSY) << ". " << strerror(rv) << ". " << GetOwnerThreadInfo();
   if (rv == 0) {
     CheckUnheldAndMark();
   }
@@ -86,8 +98,7 @@ void Mutex::Acquire() {
   int rv = pthread_mutex_lock(&native_handle_);
   DCHECK_EQ(0, rv) << ". " << strerror(rv)
 #ifndef NDEBUG
-      << ". Owner tid: " << owning_tid_ << "; Self tid: " << Env::Default()->gettid()
-      << "; Owner stack: " << std::endl << stack_trace_->Symbolize()
+                   << ". " << GetOwnerThreadInfo()
 #endif
   ; // NOLINT(whitespace/semicolon)
   MicrosecondsInt64 end_time = GetMonoTimeMicros();
@@ -118,13 +129,27 @@ void Mutex::AssertAcquired() const {
 void Mutex::CheckHeldAndUnmark() {
   AssertAcquired();
   owning_tid_ = 0;
-  stack_trace_->Reset();
+  if (FLAGS_debug_mutex_collect_stacktrace) {
+    stack_trace_->Reset();
+  }
 }
 
 void Mutex::CheckUnheldAndMark() {
   DCHECK_EQ(0, owning_tid_);
   owning_tid_ = Env::Default()->gettid();
-  stack_trace_->Collect();
+  if (FLAGS_debug_mutex_collect_stacktrace) {
+    stack_trace_->Collect();
+  }
+}
+
+string Mutex::GetOwnerThreadInfo() const {
+  string str = Substitute("Owner tid: $0; Self tid: $1; ", owning_tid_, Env::Default()->gettid());
+  if (FLAGS_debug_mutex_collect_stacktrace) {
+    SubstituteAndAppend(&str, "Owner stack:\n$0", stack_trace_->Symbolize());
+  } else {
+    str += "To collect the owner stack trace, enable the flag --debug_mutex_collect_stacktrace";
+  }
+  return str;
 }
 
 #endif

http://git-wip-us.apache.org/repos/asf/kudu/blob/f994ee96/src/kudu/util/mutex.h
----------------------------------------------------------------------
diff --git a/src/kudu/util/mutex.h b/src/kudu/util/mutex.h
index fb6c53c..9277ac0 100644
--- a/src/kudu/util/mutex.h
+++ b/src/kudu/util/mutex.h
@@ -18,9 +18,12 @@
 #define KUDU_UTIL_MUTEX_H
 
 #include <pthread.h>
-#include <glog/logging.h>
 #include <sys/types.h>
 
+#include <string>
+
+#include <glog/logging.h>
+
 #include "kudu/gutil/gscoped_ptr.h"
 #include "kudu/gutil/macros.h"
 
@@ -62,6 +65,7 @@ class Mutex {
   // Members and routines taking care of locks assertions.
   void CheckHeldAndUnmark();
   void CheckUnheldAndMark();
+  std::string GetOwnerThreadInfo() const;
 
   // All private data is implicitly protected by native_handle_.
   // Be VERY careful to only access members under that lock.