You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by da...@apache.org on 2017/08/10 17:56:45 UTC
kudu git commit: [maintenance manager] fix op scheduling lock
contention, thread ID tweak
Repository: kudu
Updated Branches:
refs/heads/master dd68c6255 -> 7eb58663f
[maintenance manager] fix op scheduling lock contention, thread ID tweak
943b1ae26e62a0c82 introduced a lock acquisition on the MM global lock
when a worker thread begins running an op. This resulted in measurable
lock contention with many MM threads, for instance the dense_node-itest
which creates 100 MM threads. This commit introduces a finer grained
lock around the currently running ops container, which reduces the
contention on the global lock.
I've also snuck in a change to make the thread ID correspond to the
system thread ID, instead of a synthetic C++ stdlib ID.
Change-Id: I018ebe65c71344d8911ff66848478f75fef085af
Reviewed-on: http://gerrit.cloudera.org:8080/7621
Reviewed-by: Todd Lipcon <to...@apache.org>
Tested-by: Kudu Jenkins
Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/7eb58663
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/7eb58663
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/7eb58663
Branch: refs/heads/master
Commit: 7eb58663f2876415244f0f5f322135afedc98458
Parents: dd68c62
Author: Dan Burkert <da...@apache.org>
Authored: Tue Aug 8 15:20:50 2017 -0700
Committer: Dan Burkert <da...@apache.org>
Committed: Thu Aug 10 17:56:29 2017 +0000
----------------------------------------------------------------------
src/kudu/util/maintenance_manager.cc | 18 ++++++++++++------
src/kudu/util/maintenance_manager.h | 27 +++++++++++++++++----------
src/kudu/util/maintenance_manager.proto | 2 +-
3 files changed, 30 insertions(+), 17 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/kudu/blob/7eb58663/src/kudu/util/maintenance_manager.cc
----------------------------------------------------------------------
diff --git a/src/kudu/util/maintenance_manager.cc b/src/kudu/util/maintenance_manager.cc
index 747efad..dbe865f 100644
--- a/src/kudu/util/maintenance_manager.cc
+++ b/src/kudu/util/maintenance_manager.cc
@@ -431,22 +431,25 @@ MaintenanceOp* MaintenanceManager::FindBestOp() {
}
void MaintenanceManager::LaunchOp(MaintenanceOp* op) {
- std::thread::id thread_id = std::this_thread::get_id();
+ int64_t thread_id = Thread::CurrentThreadId();
OpInstance op_instance;
op_instance.thread_id = thread_id;
op_instance.name = op->name();
op_instance.start_mono_time = MonoTime::Now();
op->RunningGauge()->Increment();
{
- std::lock_guard<Mutex> lock(lock_);
+ std::lock_guard<Mutex> lock(running_instances_lock_);
InsertOrDie(&running_instances_, thread_id, &op_instance);
}
- auto cleanup = MakeScopedCleanup([&]{
+ auto cleanup = MakeScopedCleanup([&] {
op->RunningGauge()->Decrement();
std::lock_guard<Mutex> l(lock_);
- running_instances_.erase(thread_id);
+ {
+ std::lock_guard<Mutex> lock(running_instances_lock_);
+ running_instances_.erase(thread_id);
+ }
op_instance.duration = MonoTime::Now() - op_instance.start_mono_time;
completed_ops_[completed_ops_count_ % completed_ops_.size()] = op_instance;
completed_ops_count_++;
@@ -496,8 +499,11 @@ void MaintenanceManager::GetMaintenanceManagerStatusDump(MaintenanceManagerStatu
}
}
- for (const auto& running_instance : running_instances_) {
- *out_pb->add_running_operations() = running_instance.second->DumpToPB();
+ {
+ std::lock_guard<Mutex> lock(running_instances_lock_);
+ for (const auto& running_instance : running_instances_) {
+ *out_pb->add_running_operations() = running_instance.second->DumpToPB();
+ }
}
for (int n = 1; n <= completed_ops_.size(); n++) {
http://git-wip-us.apache.org/repos/asf/kudu/blob/7eb58663/src/kudu/util/maintenance_manager.h
----------------------------------------------------------------------
diff --git a/src/kudu/util/maintenance_manager.h b/src/kudu/util/maintenance_manager.h
index ef2fdf1..e939583 100644
--- a/src/kudu/util/maintenance_manager.h
+++ b/src/kudu/util/maintenance_manager.h
@@ -149,7 +149,7 @@ class MaintenanceOpStats {
// Represents an instance of a maintenance operation.
struct OpInstance {
// Id of thread the instance ran on.
- std::thread::id thread_id;
+ int64_t thread_id;
// Name of operation.
std::string name;
// Time the operation took to run. Value is unitialized if instance is still running.
@@ -158,9 +158,7 @@ struct OpInstance {
MaintenanceManagerStatusPB_OpInstancePB DumpToPB() const {
MaintenanceManagerStatusPB_OpInstancePB pb;
- std::stringstream ss;
- ss << thread_id;
- pb.set_thread_id(ss.str());
+ pb.set_thread_id(thread_id);
pb.set_name(name);
if (duration.Initialized()) {
pb.set_duration_millis(duration.ToMilliseconds());
@@ -329,12 +327,6 @@ class MaintenanceManager : public std::enable_shared_from_this<MaintenanceManage
ConditionVariable cond_;
bool shutdown_;
int32_t polling_interval_ms_;
- // Maps thread ids to instances of an op that they're running. Instances should be added
- // right before MaintenanceOp::Perform() is called, and should be removed right after
- // MaintenanceOp::Perform() completes. Any thread that adds an instance to this map
- // owns that instance, and the instance should exist until the same thread removes it.
- // Must acquire lock_ before accessing.
- std::map<std::thread::id, OpInstance*> running_instances_;
uint64_t running_ops_;
// Vector used as a circular buffer for recently completed ops. Elements need to be added at
// the completed_ops_count_ % the vector's size and then the count needs to be incremented.
@@ -347,6 +339,21 @@ class MaintenanceManager : public std::enable_shared_from_this<MaintenanceManage
// This is indirected for testing purposes.
std::function<bool(double*)> memory_pressure_func_;
+ // Running instances lock.
+ //
+ // This is separate of lock_ so that worker threads don't need to take the
+ // global MM lock when beginning operations. When taking both
+ // running_instances_lock_ and lock_, lock_ must be acquired first.
+ Mutex running_instances_lock_;
+
+ // Maps thread ids to instances of an op that they're running. Instances should be added
+ // right before MaintenanceOp::Perform() is called, and should be removed right after
+ // MaintenanceOp::Perform() completes. Any thread that adds an instance to this map
+ // owns that instance, and the instance should exist until the same thread removes it.
+ //
+ // Protected by running_instances_lock_;
+ std::unordered_map<int64_t, OpInstance*> running_instances_;
+
DISALLOW_COPY_AND_ASSIGN(MaintenanceManager);
};
http://git-wip-us.apache.org/repos/asf/kudu/blob/7eb58663/src/kudu/util/maintenance_manager.proto
----------------------------------------------------------------------
diff --git a/src/kudu/util/maintenance_manager.proto b/src/kudu/util/maintenance_manager.proto
index 5e63875..b6b1203 100644
--- a/src/kudu/util/maintenance_manager.proto
+++ b/src/kudu/util/maintenance_manager.proto
@@ -32,7 +32,7 @@ message MaintenanceManagerStatusPB {
}
message OpInstancePB {
- required string thread_id = 1;
+ required int64 thread_id = 1;
required string name = 2;
// How long the op took to run. Only present if the instance completed.
optional int32 duration_millis = 3;