You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by al...@apache.org on 2018/02/13 21:07:54 UTC

[1/3] kudu git commit: KUDU-2291 (part 1): allow collecting a thread's stack without immediate symbolization

Repository: kudu
Updated Branches:
  refs/heads/master 136b8058f -> 5d10a56f9


KUDU-2291 (part 1): allow collecting a thread's stack without immediate symbolization

For the /stacks page we would like to be able to grab the stack from a
bunch of threads as quickly as possible to eliminate "skid" between the
collection times. Symbolization of stacks is much slower than
collection, so this patch adds a new API to collect the remote thread
stack without symbolizing it immediately.

This adds a simple benchmark for remote thread stack collection. The
initial results are not that different between symbolized and
unsymbolized because the stack collection performance itself is poor.
This will be improved in a later patch in this series.

I0207 19:33:20.960477 30799 debug-util-test.cc:175] Throughput: 99 dumps/second (symbolize=0)
I0207 19:33:21.963946 30799 debug-util-test.cc:175] Throughput: 84 dumps/second (symbolize=1)

Change-Id: I0a14fbb7d0cafcd7b966dac507e71bae32e2cc1a
Reviewed-on: http://gerrit.cloudera.org:8080/9252
Tested-by: Kudu Jenkins
Reviewed-by: Will Berkeley <wd...@gmail.com>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/f9b2ccc8
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/f9b2ccc8
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/f9b2ccc8

Branch: refs/heads/master
Commit: f9b2ccc874a9e643cf38966b3e25d927f4b19b1a
Parents: 136b805
Author: Todd Lipcon <to...@apache.org>
Authored: Wed Feb 7 18:27:54 2018 -0800
Committer: Todd Lipcon <to...@apache.org>
Committed: Tue Feb 13 20:33:44 2018 +0000

----------------------------------------------------------------------
 src/kudu/util/debug-util-test.cc | 27 +++++++++++++++++++++++++++
 src/kudu/util/debug-util.cc      | 30 +++++++++++++++++++++---------
 src/kudu/util/debug-util.h       |  4 ++++
 3 files changed, 52 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/f9b2ccc8/src/kudu/util/debug-util-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/util/debug-util-test.cc b/src/kudu/util/debug-util-test.cc
index 455f8cb..734b2d9 100644
--- a/src/kudu/util/debug-util-test.cc
+++ b/src/kudu/util/debug-util-test.cc
@@ -18,6 +18,7 @@
 #include <unistd.h>
 
 #include <csignal>
+#include <ostream>
 #include <string>
 #include <vector>
 
@@ -149,6 +150,32 @@ TEST_F(DebugUtilTest, TestDumpAllThreads) {
     LOG(INFO) << DumpThreadStack(tid);
   }
 }
+
+TEST_F(DebugUtilTest, Benchmark) {
+  CountDownLatch l(1);
+  scoped_refptr<Thread> t;
+  ASSERT_OK(Thread::Create("test", "test thread", &SleeperThread, &l, &t));
+  SCOPED_CLEANUP({
+      // Allow the thread to finish.
+      l.CountDown();
+      t->Join();
+    });
+
+  for (bool symbolize : {false, true}) {
+    MonoTime end_time = MonoTime::Now() + MonoDelta::FromSeconds(1);
+    int count = 0;
+    volatile int prevent_optimize = 0;
+    while (MonoTime::Now() < end_time) {
+      StackTrace trace;
+      GetThreadStack(t->tid(), &trace);
+      if (symbolize) {
+        prevent_optimize += trace.Symbolize().size();
+      }
+      count++;
+    }
+    LOG(INFO) << "Throughput: " << count << " dumps/second (symbolize=" << symbolize << ")";
+  }
+}
 #endif
 
 } // namespace kudu

http://git-wip-us.apache.org/repos/asf/kudu/blob/f9b2ccc8/src/kudu/util/debug-util.cc
----------------------------------------------------------------------
diff --git a/src/kudu/util/debug-util.cc b/src/kudu/util/debug-util.cc
index 1aea5c3..bd382a9 100644
--- a/src/kudu/util/debug-util.cc
+++ b/src/kudu/util/debug-util.cc
@@ -39,6 +39,7 @@
 #include "kudu/gutil/spinlock.h"
 #include "kudu/gutil/stringprintf.h"
 #include "kudu/gutil/strings/numbers.h"
+#include "kudu/gutil/strings/substitute.h"
 #include "kudu/util/debug/sanitizer_scopes.h"
 #include "kudu/util/errno.h"
 #include "kudu/util/monotime.h"
@@ -236,14 +237,14 @@ Status SetStackTraceSignal(int signum) {
   return Status::OK();
 }
 
-std::string DumpThreadStack(int64_t tid) {
+Status GetThreadStack(int64_t tid, StackTrace* stack) {
 #if defined(__linux__)
   base::SpinLockHolder h(&g_dumper_thread_lock);
 
   // Ensure that our signal handler is installed. We don't need any fancy GoogleOnce here
   // because of the mutex above.
   if (!InitSignalHandlerUnlocked(g_stack_trace_signum)) {
-    return "<unable to take thread stack: signal handler unavailable>";
+    return Status::NotSupported("unable to take thread stack: signal handler unavailable");
   }
 
   // Set the target TID in our communication structure, so if we end up with any
@@ -262,7 +263,7 @@ std::string DumpThreadStack(int64_t tid) {
       SignalCommunication::Lock l;
       g_comm.target_tid = 0;
     }
-    return "(unable to deliver signal: process may have exited)";
+    return Status::NotFound("unable to deliver signal: process may have exited");
   }
 
   // We give the thread ~1s to respond. In testing, threads typically respond within
@@ -282,21 +283,32 @@ std::string DumpThreadStack(int64_t tid) {
     SignalCommunication::Lock l;
     CHECK_EQ(tid, g_comm.target_tid);
 
+    g_comm.target_tid = 0;
     if (!g_comm.result_ready) {
-      ret = "(thread did not respond: maybe it is blocking signals)";
-    } else {
-      ret = g_comm.stack.Symbolize();
+      return Status::TimedOut("(thread did not respond: maybe it is blocking signals)");
     }
 
-    g_comm.target_tid = 0;
+    stack->CopyFrom(g_comm.stack);
+
     g_comm.result_ready = 0;
   }
-  return ret;
+  return Status::OK();
 #else // defined(__linux__)
-  return "(unsupported platform)";
+  return Status::NotSupported("unsupported platform");
 #endif
 }
 
+string DumpThreadStack(int64_t tid) {
+  StackTrace trace;
+  Status s = GetThreadStack(tid, &trace);
+  if (s.ok()) {
+    return trace.Symbolize();
+  }
+  return strings::Substitute("<$0>", s.ToString());
+}
+
+
+
 Status ListThreads(vector<pid_t> *tids) {
 #if defined(__linux__)
   DIR *dir = opendir("/proc/self/task/");

http://git-wip-us.apache.org/repos/asf/kudu/blob/f9b2ccc8/src/kudu/util/debug-util.h
----------------------------------------------------------------------
diff --git a/src/kudu/util/debug-util.h b/src/kudu/util/debug-util.h
index a84c04c..0d713b9 100644
--- a/src/kudu/util/debug-util.h
+++ b/src/kudu/util/debug-util.h
@@ -29,6 +29,8 @@
 
 namespace kudu {
 
+class StackTrace;
+
 // Return true if coverage is enabled.
 bool IsCoverageBuild();
 
@@ -59,6 +61,8 @@ Status SetStackTraceSignal(int signum);
 // may be active at a time.
 std::string DumpThreadStack(int64_t tid);
 
+Status GetThreadStack(int64_t tid, StackTrace* stack);
+
 // Return the current stack trace, stringified.
 std::string GetStackTrace();
 


[2/3] kudu git commit: [Java] Downgrade protobuf to 3.4.0

Posted by al...@apache.org.
[Java] Downgrade protobuf to 3.4.0

The recent bump in the Protobuf version breaks el6
compatibility due to a bug in the Maven delpoyed
protoc artifacts.

This patch downgrades to the latest compatible version
and adds the issue url for tracking.

https://github.com/google/protobuf/issues/4109

Change-Id: I1c23138bb030a3d5069dd8efa9e7231c3f2fd339
Reviewed-on: http://gerrit.cloudera.org:8080/9304
Reviewed-by: Todd Lipcon <to...@apache.org>
Tested-by: Kudu Jenkins


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/6f30a311
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/6f30a311
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/6f30a311

Branch: refs/heads/master
Commit: 6f30a3118e351bcb11a8ca2aa3f0e2ee8d882cc6
Parents: f9b2ccc
Author: Grant Henke <gr...@gmail.com>
Authored: Tue Feb 13 14:20:39 2018 -0600
Committer: Grant Henke <gr...@gmail.com>
Committed: Tue Feb 13 21:04:11 2018 +0000

----------------------------------------------------------------------
 java/gradle/dependencies.gradle | 4 +++-
 java/pom.xml                    | 4 +++-
 2 files changed, 6 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/6f30a311/java/gradle/dependencies.gradle
----------------------------------------------------------------------
diff --git a/java/gradle/dependencies.gradle b/java/gradle/dependencies.gradle
index 1dd2afe..12635b5 100755
--- a/java/gradle/dependencies.gradle
+++ b/java/gradle/dependencies.gradle
@@ -46,7 +46,9 @@ versions += [
     netty          : "3.10.6.Final",
     parquet        : "1.9.0",
     pmd            : "5.8.1",
-    protobuf       : "3.5.1",
+    // Upgrading protobuf breaks el6 compatibility
+    // https://github.com/google/protobuf/issues/4109
+    protobuf       : "3.4.0",
     scala          : "2.11.12",
     scalatest      : "3.0.5",
     slf4j          : "1.7.25",

http://git-wip-us.apache.org/repos/asf/kudu/blob/6f30a311/java/pom.xml
----------------------------------------------------------------------
diff --git a/java/pom.xml b/java/pom.xml
index c798530..3355201 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -83,7 +83,9 @@
         <murmur.version>1.0.0</murmur.version>
         <netty.version>3.10.6.Final</netty.version>
         <parquet.version>1.9.0</parquet.version>
-        <protobuf.version>3.5.1</protobuf.version>
+        <!-- Upgrading protobuf breaks el6 compatibility
+             https://github.com/google/protobuf/issues/4109 -->
+        <protobuf.version>3.4.0</protobuf.version>
         <slf4j.version>1.7.25</slf4j.version>
         <sparkavro.version>3.2.0</sparkavro.version>
         <yetus.version>0.7.0</yetus.version>


[3/3] kudu git commit: [tablet] fix nullptr dereference while capturing iterators

Posted by al...@apache.org.
[tablet] fix nullptr dereference while capturing iterators

Added a check into Tablet::CaptureConsistentIterators() to make sure
the tablet is not stopped/shutdown.

Before this patch in one test scenario I saw stack traces
like below (built in DEBUG configuration):

kudu-tserver: src/kudu/gutil/ref_counted.h:284: T *scoped_refptr<kudu::tablet::TabletComponents>::operator->() const [T = kudu::tablet::TabletComponents]: Assertion `ptr_ != __null' failed.
*** Aborted at 1517534012 (unix time) try "date -d @1517534012" if you are using GNU date ***
PC: @     0x7ff9ad39cc37 gsignal
*** SIGABRT (@0x3e80000745f) received by PID 29791 (TID 0x7ff99a0bc700) from PID 29791; stack trace: ***
    @     0x7ff9b5129330 (unknown) at ??:0
    @     0x7ff9ad39cc37 gsignal at ??:0
    @     0x7ff9ad3a0028 abort at ??:0
    @     0x7ff9ad395bf6 (unknown) at ??:0
    @     0x7ff9ad395ca2 __assert_fail at ??:0
    @     0x7ff9b7f2ce52 scoped_refptr<>::operator->() at ??:0
    @     0x7ff9b7f1bf6d kudu::tablet::Tablet::CaptureConsistentIterators() at ??:0
    @     0x7ff9b7f225f6 kudu::tablet::Tablet::Iterator::Init() at ??:0
    @     0x7ff9b94372e3 kudu::tserver::TabletServiceImpl::HandleNewScanRequest() at ??:0
    @     0x7ff9b943a906 kudu::tserver::TabletServiceImpl::Checksum() at ??:0
    @     0x7ff9b3d3c83d kudu::tserver::TabletServerServiceIf::TabletServerServiceIf()::$_11::operator()() at ??:0
    @     0x7ff9b3d3c682 std::_Function_handler<>::_M_invoke() at ??:0
    @     0x7ff9b2ea026b std::function<>::operator()() at ??:0
    @     0x7ff9b2e9fb2d kudu::rpc::GeneratedServiceIf::Handle() at ??:0
    @     0x7ff9b2ea1ee6 kudu::rpc::ServicePool::RunThread() at ??:0
    @     0x7ff9b2ea4499 boost::_mfi::mf0<>::operator()() at ??:0
    @     0x7ff9b2ea4400 boost::_bi::list1<>::operator()<>() at ??:0
    @     0x7ff9b2ea43aa boost::_bi::bind_t<>::operator()() at ??:0
    @     0x7ff9b2ea418d boost::detail::function::void_function_obj_invoker0<>::invoke() at ??:0
    @     0x7ff9b2e45f68 boost::function0<>::operator()() at ??:0
    @     0x7ff9b115162d kudu::Thread::SuperviseThread() at ??:0
    @     0x7ff9b5121184 start_thread at ??:0
    @     0x7ff9ad463ffd clone at ??:0
    @                0x0 (unknown)

I used the following WIP stress test for the reproduction scenario:
  https://gerrit.cloudera.org/#/c/9255/

For DEBUG builds, without fix the issues appeared ~0.5% of cases.  After
the fix, the issue could not be reproduced:

Without fix:
  http://dist-test.cloudera.org//job?job_id=aserbin.1518492521.137030

With fix:
  http://dist-test.cloudera.org//job?job_id=aserbin.1518492937.141401

Change-Id: Ia7600f006c8df7f445cc2551e99390177378bcff
Reviewed-on: http://gerrit.cloudera.org:8080/9189
Tested-by: Kudu Jenkins
Reviewed-by: Mike Percy <mp...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/5d10a56f
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/5d10a56f
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/5d10a56f

Branch: refs/heads/master
Commit: 5d10a56f9d06dc695f2a4469edbabce978912eb4
Parents: 6f30a31
Author: Alexey Serbin <as...@cloudera.com>
Authored: Thu Feb 1 19:21:07 2018 -0800
Committer: Alexey Serbin <as...@cloudera.com>
Committed: Tue Feb 13 21:05:40 2018 +0000

----------------------------------------------------------------------
 src/kudu/tablet/tablet.cc | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/5d10a56f/src/kudu/tablet/tablet.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/tablet.cc b/src/kudu/tablet/tablet.cc
index 7c7298c..431de32 100644
--- a/src/kudu/tablet/tablet.cc
+++ b/src/kudu/tablet/tablet.cc
@@ -1718,21 +1718,23 @@ Status Tablet::DebugDump(vector<string> *lines) {
 }
 
 Status Tablet::CaptureConsistentIterators(
-  const Schema *projection,
-  const MvccSnapshot &snap,
-  const ScanSpec *spec,
-  OrderMode order,
-  vector<shared_ptr<RowwiseIterator> > *iters) const {
+    const Schema* projection,
+    const MvccSnapshot& snap,
+    const ScanSpec* spec,
+    OrderMode order,
+    vector<shared_ptr<RowwiseIterator>>* iters) const {
+
   shared_lock<rw_spinlock> l(component_lock_);
+  RETURN_IF_STOPPED_OR_CHECK_STATE(kOpen);
 
   // Construct all the iterators locally first, so that if we fail
   // in the middle, we don't modify the output arguments.
-  vector<shared_ptr<RowwiseIterator> > ret;
+  vector<shared_ptr<RowwiseIterator>> ret;
 
   // Grab the memrowset iterator.
   gscoped_ptr<RowwiseIterator> ms_iter;
   RETURN_NOT_OK(components_->memrowset->NewRowIterator(projection, snap, order, &ms_iter));
-  ret.push_back(shared_ptr<RowwiseIterator>(ms_iter.release()));
+  ret.emplace_back(ms_iter.release());
 
   // Cull row-sets in the case of key-range queries.
   if (spec != nullptr && spec->lower_bound_key() && spec->exclusive_upper_bound_key()) {
@@ -1750,7 +1752,7 @@ Status Tablet::CaptureConsistentIterators(
       RETURN_NOT_OK_PREPEND(rs->NewRowIterator(projection, snap, order, &row_it),
                             Substitute("Could not create iterator for rowset $0",
                                        rs->ToString()));
-      ret.push_back(shared_ptr<RowwiseIterator>(row_it.release()));
+      ret.emplace_back(row_it.release());
     }
     ret.swap(*iters);
     return Status::OK();
@@ -1763,7 +1765,7 @@ Status Tablet::CaptureConsistentIterators(
     RETURN_NOT_OK_PREPEND(rs->NewRowIterator(projection, snap, order, &row_it),
                           Substitute("Could not create iterator for rowset $0",
                                      rs->ToString()));
-    ret.push_back(shared_ptr<RowwiseIterator>(row_it.release()));
+    ret.emplace_back(row_it.release());
   }
 
   // Swap results into the parameters.